diff --git a/t32per/perparse.py b/t32per/perparse.py new file mode 100644 index 0000000..1aca5cc --- /dev/null +++ b/t32per/perparse.py @@ -0,0 +1,90 @@ + +from typing import * + +from perrepeat import * +from pertoken import * + + +T32GroupAttr = Literal['h', 'r', 's', 'w'] +T32FieldAttr = Literal['a','r','hex','event','setclr'] +T32MaskAttr = Literal['dec','hex','rhex'] +T32TreeVisibility = Literal['open','closed'] + + +class T32Field(NamedTuple): + attr: T32FieldAttr + qual: PerTokenQual + thing: int + bitrange: Tuple[int, int] # inclusive + name: str + description: str + values: Sequence[str] + + +class T32Mask(NamedTuple): + attr: T32MaskAttr + qual: Tuple[PerTokenQual, PerTokenQual] + thing: int + bitrange: Tuple[int, int] # inclusive + mask: int + name: str + description: str + values: Sequence[str] + + +class T32Line(NamedTuple): + hidden: bool + qual: PerTokenQual + offset: int + name: str + description: str + field:s Sequence[Union[T32Field, T32Mask]] + + +class T32Group(NamedTuple): + attrs: T32GroupAttr + qual: PerTokenQual + address: Union[T32Address, T32AddressRange] + description: str + regs: Sequence[T32Line] + copy: bool = False + + +class T32GroupRepeat(NamedTuple): + repeat: T32Repeat + width: int + groups: Sequence[T32Group] +class T32GroupRepeatReplay(NamedTuple): + pass + + +class T32Tree(NamedTuple): + base: T32Address + width: int + visibility: T32TreeVisibility + groups: Sequence[Union[T32Group, T32GroupRepeat, T32GroupRepeatReplay]] + subtrees: Sequence[Union[T32TreeRepeat, T32Tree]] + + +class T32TreeRepeat(NamedTuple): + repeat: T32Repeat + subtrees: Sequence[Union[T32TreeRepeat, T32Tree]] + + +class T32Per(NamedTuple): + title: str + props: str + author: str + changelog: Sequence[str] + manufacturer: str + doc: str + core: str + chip: Sequence[str] + copyright: str + description: str + keywords: str + date: str + id: str + config: Tuple[int, int] + trees: Union[T32Tree, T32TreeRepeat] + diff --git a/t32per/perrepeat.py b/t32per/perrepeat.py new file mode 100644 index 0000000..5d7d1e1 --- /dev/null +++ b/t32per/perrepeat.py @@ -0,0 +1,128 @@ + +from typing import * + + +class T32SpacedAddress(NamedTuple): + addrspace: str + address: int + + def __add__(self, other): + if isinstance(other, int): + return T32SpacedAddress(self.addrspace, self.address+other) + elif isinstance(other, T32SpacedAddress): + assert self.addrspace == other.addrspace + return T32SpacedAddress(self.addrspace, self.address+other.address) + else: + raise TypeError("cannot add %s to T32SpacedAddress"%type(other)) + + def __repr__(self): + return "T32SpacedAddress(%s, %s)"%(repr(self.addrspace),hex(self.address)) + def __str__(self): + return "%s:%s"%(self.addrspace,hex(self.address)) + + +T32Address = Union[str, int, T32SpacedAddress] +T32RepeatVarType = Literal['strings','list','increment'] + + +class T32RepeatVar(NamedTuple): + type: T32RepeatVarType + args: Sequence[T32Address] + + +class T32AddressRange(NamedTuple): + base: T32Address + size: int + + +class T32Repeat(NamedTuple): + count: int + vars: Sequence[T32RepeatVar] + + def get_value(self, repind: int, varind: int): + assert repind >= 0 and repind < self.count, "%d vs %s"%(repind,self) + assert varind >= 0 and varind < len(self.vars), "%d vs %s"%(varind,self) + v = self.vars[varind] + if v.type == 'increment': + vvv = v.args[0] + v.args[1]*repind + #print("repind %s -> %s"%(repind,vvv), self) + return v.args[0] + v.args[1]*repind + else: + #print("repind %s -> %s"%(repind,v.args[repind]), self) + return v.args[repind] + + def eval(self, repind: int, s: str) -> str: + if len(self.vars) == 0: return s + if "$" not in s: return s + + for i in range(self.count): + t = "$%d" % (i+1) + if t in s: + s = s.replace(t, str(self.get_value(repind, i))) + return s + + + +def parse_spacedAddress(s: str) -> T32SpacedAddress: + sp = s.split(':') + assert len(sp) == 2 + return T32SpacedAddress(sp[0], parse_int(sp[1], False)) + + +def parse_int(s: str, spaced: bool = True): + assert s is not None and len(s) > 0, "'%s'"%s + if ':' in s: + return parse_spacedAddress(s) + elif s[-1] == '.': + return int(s[:-1]) + elif s.startswith('0x') or s.startswith('0X'): + return int(s[2:], 16) + else: + return int(s, 0) # shrug + + +def parse_repeatvar(s: str) -> T32RepeatVar: + assert (s[0] == '(' and s[-1] == ')'), "badly formatted repeatvar: %s"%s + s = s[1:-1] + sl = s.lower() + + typ = None + rest = None + if sl.startswith('strings '): + typ = 'strings' + rest = [] + start = len(typ)+1 + instr = False + for i in range(start, len(s)): + if s[i] == ' ' and not instr: + continue + if s[i] == '"': + if not instr: + instr = True + start = i+1 + else: + rest.append(s[start:i]) + instr = False + elif sl.startswith('list '): + typ = 'list' + rest = sl[len(typ)+1:].strip().split() + elif sl.startswith('increment '): + typ = 'increment' + rest = sl[len(typ)+1:].strip().split() + assert len(rest) == 2, "bad increment repeatvar args: %s"%s + rest = [parse_int(x) for x in rest] + else: + assert False, "bad repeatvar type: %s"%s + + return T32RepeatVar(typ, rest) + + +def parse_repeat(args: Sequence[str]) -> T32Repeat: + assert len(args) >= 1 + count = parse_int(args[0], False) + vars = tuple(parse_repeatvar(x) for x in args[1:]) + for v in vars: + if v.type != 'increment': + assert len(v.args) == count + return T32Repeat(count, vars) + diff --git a/t32per/pertoken.py b/t32per/pertoken.py index 26654d0..e308b80 100755 --- a/t32per/pertoken.py +++ b/t32per/pertoken.py @@ -3,6 +3,8 @@ from enum import Enum from typing import * +from perrepeat import * + class PerTokenName(Enum): Title = "; @Title" Props = "; @Props" @@ -18,16 +20,16 @@ class PerTokenName(Enum): Keywords = "; @Keywords" Date = "; @Date" #HeaderSep = "; -------" - Id = "; @Id" + Id = "; $Id" CONFIG = "config" # what is this BASE = "base" # base address, base addrspace:expr # expr can be int or (d.l(addrspace:off)) -> ??? WIDTH = "width" # what is this - SAVEINDEX = "saveindex" # what is this + #SAVEINDEX = "saveindex" # what is this TREE_OPEN = "tree.open" - TREE_CLOSE = "tree.close" # like tree.open + TREE_CLOSE = "tree.close" # like tree.open, determines visibility TREE_END = "tree.end" TREE = "tree" @@ -51,7 +53,7 @@ class PerTokenName(Enum): RBITFLD = "rbitfld" HEXFLD = "hexfld" #RHEXFLD? WHEXFLD?? - EVENTFLD = "eventfld" + EVENTFLD = "eventfld" # == read-clear? SETCLRFLD = "setclrfld" # masks in registers? # hexmask.qual IDK start--end MASK "CODE,description" @@ -71,8 +73,14 @@ class PerTokenName(Enum): # IDK: entry, read, wait, saveindex, saveout, set, getx, register # EH: include, endian.{be,le} (-> conditional endianness) # TODO: copy: copy from previous group? (->derivedFrom whee) + # copy: copy all from previous group + # (tree+group define peripheral, can be multiple groups per tree tho. 'base' part of tree, optional) # TODO: repeat{,.end}: repeat macro stuff (cf. dimIndexElement) + # repeat <...> + # stuff -> $1,$2 are variables + # repeat.end # TODO: repeat.replay: copy+replay + # repeat.replay: copy previous repeat PER_TOKEN_IGNORE = { 'assert','autoindent.on','autoindent.off','button','elif','else','endif', @@ -122,49 +130,110 @@ class PerToken(NamedTuple): args: List[str] = [] -# TODO: tokenize into data stream with useful arguments # TODO: data stream -> tree structure -def find_tok(l: str, hdr: bool) -> PerTokenName: +def find_tok(l: str, hdr: bool) -> Union[PerTokenName, Sequence[PerTokenQual]]: ll=l.lower() if hdr else l for h in (PER_TOKEN_HEADER if hdr else PER_TOKEN_BODY): #print("ll='%s', h='%s'"%(ll,h.value)) if ll.startswith(h.value.lower() if hdr else h.value): - return h + if hdr: return h + rest = ll[len(h.value):] + if len(rest) == 0 or rest[0] == ' ': + return (h, None) + elif rest[0] == '.': + # quals follow + quals = [] + cqualstart = 1 + for i in range(1, len(rest)): + if rest[i] == ' ': + quals.append(rest[cqualstart:i].upper()) + break + elif rest[i] == '.': + if i != cqualstart + 1: + quals.append(rest[cqualstart:i].upper()) + cqualstart = i+1 + qs = tuple(PerTokenQual._member_map_[k] for k in quals) + return (h, qs[0] if len(qs) == 1 else qs) + else: + continue # not a match assert not hdr or l.split()[0] in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l return None +def split_str(s: str) -> List[str]: + r = [] + start = 0 + instr = False + inparen = 0 + for i in range(len(s)): + if s[i] == '"': + if not instr and i > 0 and s[i-1] == '"' and start != i-1: + # sigh... + ss = s[start:i].strip() + if len(ss) > 0: r.append(ss) + start = i + instr = not instr + elif s[i] == '(' and not instr: + if inparen == 0 and start != i-1: + ss = s[start:i].strip() + if len(ss) > 0: r.append(ss) + start = i + inparen += 1 + elif s[i] == ')' and not instr: + assert inparen > 0 + inparen -= 1 + elif s[i] == ' ' and start != i-1 and not instr and inparen == 0: + ss = s[start:i].strip() + if len(ss) > 0: r.append(ss) + start = i + if start < len(s): + ss = s[start:].strip() + if len(ss) > 0: r.append(ss) + return r + def tokenize_body(f, l=None): prevtell = -1 + emptycount = 0 + #prevl = None while True: if l is None: l = f.readline().strip() if len(l) == 0: tell = f.tell() - if tell == prevtell: break # EOF + #print("empty, prevl:",prevl) + if tell == prevtell: + emptycount += 1 + if emptycount == 3: + break # EOF + else: + emptycount = 0 prevtell = tell + l = None continue + #prevl = l ll = l.lower() - sp = l.split() + sp = split_str(l)#l.split() # regular token t = find_tok(ll, False) #print("t",t) if t is not None: - yield PerToken(t, args=sp[1:]) + #print("regular token", t) + tt, qq = t + yield PerToken(tt, qual=qq, args=sp[1:]) l = None continue - ht = sp[0].lower().split('.') - assert len(ht) > 0 or sp[0].lower() in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l - t = find_tok(ht[0], False) - if t is not None: - assert len(ht) in {2,3},"bad qual %s in line %s"%(repr(ht),l) - quals = [PerTokenQual.__members__[k] for k in ht[1:]] - if len(quals) == 1: quals = quals[0] - else: quals = tuple(quals) - yield PerToken(t, qual=quals, args=sp[1:]) - l = None - continue + #ht = sp[0].lower().split('.') + #assert len(ht) > 0 or sp[0].lower() in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l + #t = find_tok(ht[0], False) + #if t is not None: + # assert len(ht) in {2,3},"bad qual %s in line %s"%(repr(ht),l) + # quals = [PerTokenQual.__members__[k] for k in ht[1:]] + # if len(quals) == 1: quals = quals[0] + # else: quals = tuple(quals) + # yield PerToken(t, qual=quals, args=sp[1:]) + # l = None + # continue l = None @@ -182,11 +251,12 @@ def tokenize(f): prevtell = tell continue if l[0] != ';': + yield PerToken(curtok, args=curlines) yield from tokenize_body(f,l) break if l.startswith('; -------'): continue - if l.startswith('; @') and l[3] != ' ': + if (l.startswith('; @') or l.startswith('; $')) and l[3] != ' ': # new token! # flush old one if curtok is not None: @@ -198,21 +268,100 @@ def tokenize(f): curlines.append(l[3:].strip()) +def repeat_apply(r: T32Repeat, l: List[PerToken]): + for i in range(r.count): + for tok in l: + #print("==>",tok,r) + if tok.args is None or len(tok.args) == 0 or len(r.vars) == 0 or \ + all("$" not in a for a in tok.args): + yield tok + else: + yield PerToken(tok.name, tok.qual, [r.eval(i,x) for x in tok.args]) + + # flatten copy and repeat statements -def tokenize_flatten(itor): - yield from itor # TODO +# actually no let's not, we can turn this into derivedFrom statements, which is probably better +# eh, we can leave it in for easier processing earlier on +def tokenize_flatten_repeat(itor: Iterable[PerToken]): + currep = None + currvar = None + lastrvar = None + for tok in itor: + if tok.name == PerTokenName.REPEAT: + assert currvar is None + currvar = parse_repeat(tok.args) + currep = [] + elif tok.name == PerTokenName.REPEAT_END: + yield from repeat_apply(currvar, currep) + lastrvar = currvar + currvar = None + elif tok.name == PerTokenName.REPEAT_REPLAY: + assert lastrvar is not None + yield from repeat_apply(lastrvar, currep) + elif currvar is not None: + currep.append(tok) + else: + yield tok +def tokenize_flatten_copy(itor: Iterable[PerToken]): + curgrp = None + curgcmd = None + lastgrp = None + lastgcmd = None + in_grp = False + + for tok in itor: + if tok.name in {PerTokenName.GROUP,PerTokenName.HGROUP, + PerTokenName.RGROUP,PerTokenName.SGROUP, + PerTokenName.WGROUP}: + lastgrp = curgrp + lastgcmd = curgcmd + curgrp = [] + curgcmd = tok + in_grp = True + yield tok + elif tok.name == PerTokenName.TREE_END: + # group must end at tree end + in_grp = False + yield tok + elif tok.name == PerTokenName.COPY: + assert lastgrp is not None + yield from lastgrp + # be able to do multiple copies + in_grp = False + curgrp = lastgrp + elif in_grp: + curgrp.append(tok) + else: + yield tok + + + +def tokenize_flatten(itor: Iterable[PerToken]): + yield from tokenize_flatten_repeat(tokenize_flatten_copy(itor)) if __name__ == '__main__': - import glob - for p in glob.glob('t/*/*.per'): - print(p) - with open(p,'r') as f: - for x in tokenize(f): pass + if False: + import glob + for p in glob.glob('t/*/*.per'): + print(p) + with open(p,'r') as f: + for x in tokenize(f): pass - with open('t/arm/peram65xx.per','r') as f: - for x in tokenize(f): print(x) - with open('t/arm/perfm0p.per','r') as f: - for x in tokenize(f): print(x) + #with open('t/arm/perfm0p.per','r') as f: # nothing special + # for x in tokenize(f): print(x) + #with open('t/arm/peram65xx.per','r') as f: # general, also has repeat cmd + # for x in tokenize_flatten(tokenize(f)): print(x) + with open('t/arm/perpxa.per','r') as f: # copy cmd + for x in tokenize_flatten(tokenize(f)): print(x) + #with open('t/arm/perpsoc4000.per','r') as f: # weird paren grammar + # for x in tokenize(f): print(x) + # weird string literal grammar: + #with open('t/arm/perrzt1.per','r') as f: # at VLAN_PRIORITY0 + # for x in tokenize(f): print(x) + # arm/pertegrax1.per arm/pers32g2.per arm/perkinetisw.per arm/perrzt1.per arm/perstm32f7x.per arm/perxavier.per arm/pertegrax2.per + # counterexample/-test: arm/percortexa73a53.per + #with open('t/arm/percortexa73a53.per','r') as f: + # for x in tokenize(f): print(x)