#!/usr/bin/env python3 from enum import Enum from typing import * from perrepeat import * class PerTokenName(Enum): Title = "; @Title" Props = "; @Props" Author = "; @Author" Changelog = "; @Changelog" Manufacturer = "; @Manufacturer" Doc = "; @Doc" Core = "; @Core" Chip = "; @Chip" Chiplist = "; @Chiplist" Copyright = "; @Copyright" Description = "; @Description" Keywords = "; @Keywords" Date = "; @Date" #HeaderSep = "; -------" Id = "; $Id" CONFIG = "config" # what is this BASE = "base" # base address, base addrspace:expr # expr can be int or (d.l(addrspace:off)) -> ??? WIDTH = "width" # what is this #SAVEINDEX = "saveindex" # what is this TREE_OPEN = "tree.open" TREE_CLOSE = "tree.close" # like tree.open, determines visibility TREE_END = "tree.end" TREE = "tree" # group of stuff (sometimes one reg?) # group (addrspace:)start--end # addrspaces: c15 c14 ad(=what?) EE(avr eeprom) D(avr data?) d(msp430) CSR(riscv) NAR(xtensa shit), "e:comp.base('name',-1)"(also xtensa) SPR(xtensa) # also seen: group iospace() (teaklite) GROUP = "group" HGROUP = "hgroup" RGROUP = "rgroup" SGROUP = "sgroup" WGROUP = "wgroup" # a register (sometimes hidden) # line.qual offset "CODENAME,description" HIDE = "hide" LINE = "line" # bitfields and stuff in registers # bitfld.qual IDK start(--end) "CODE,description" ("if 0","if 1",...) BITFLD = "bitfld" ABITFLD = "abitfld" RBITFLD = "rbitfld" HEXFLD = "hexfld" #RHEXFLD? WHEXFLD?? EVENTFLD = "eventfld" # == read-clear? SETCLRFLD = "setclrfld" # masks in registers? # hexmask.qual IDK start--end MASK "CODE,description" DECMASK = "decmask" #RDECMASK? WDECMASK?? HEXMASK = "hexmask" RHEXMASK = "rhexmask" #WHEXMASK?? COPY = "copy" REPEAT_END = "repeat.end" REPEAT_REPLAY = "repeat.replay" REPEAT = "repeat" # to ignore: assert autoindent.{on,off} button elif else endif if in # newline sif textline textfld x # IDK: entry, read, wait, saveindex, saveout, set, getx, register # EH: include, endian.{be,le} (-> conditional endianness) # TODO: copy: copy from previous group? (->derivedFrom whee) # copy: copy all from previous group # (tree+group define peripheral, can be multiple groups per tree tho. 'base' part of tree, optional) # TODO: repeat{,.end}: repeat macro stuff (cf. dimIndexElement) # repeat <...> # stuff -> $1,$2 are variables # repeat.end # TODO: repeat.replay: copy+replay # repeat.replay: copy previous repeat PER_TOKEN_IGNORE = { 'assert','autoindent.on','autoindent.off','button','elif','else','endif', 'if','in','newline','sif','textline','textfld','x', 'entry','read','wait','saveindex','saveout','set','getx','register', 'include','endian.be','endian.le' } PER_TOKEN_HEADER = [PerTokenName.__members__[x] for x in ( 'Title','Props','Author','Changelog','Manufacturer','Doc','Core','Chiplist', 'Copyright','Id','Chip','Description','Keywords','Date',#'HeaderSep', )] PER_TOKEN_BODY = [v for k, v in PerTokenName.__members__.items() if v not in PER_TOKEN_HEADER] class PerTokenQual(Enum): # also .? BYTE = "byte" WORD = "word" LONG = "long" QUAD = "quad" SHORT = "short" SBYTE = "sbyte" TBYTE = "tbyte" # used for mask stuff """ LONG_TBYTE = "long.tbyte" LONG_BYTE = "long.byte" LONG_WORD = "long.word" LONG_LONG = "long.long" WORD_BYTE = "word.byte" WORD_WORD = "word.word" BYTE_BYTE = "byte.byte" QUAD_BYTE = "quad.byte" QUAD_SBYTE = "quad.sbyte" QUAD_TBYTE = "quad.tbyte" QUAD_WORD = "quad.word" QUAD_SHORT = "quad.short" QUAD_LONG = "quad.long" QUAD_QUAD = "quad.quad" TBYTE_BYTE = "tbyte.byte" TBYTE_WORD = "tbyte.word" """ class PerToken(NamedTuple): name: PerTokenName qual: Union[PerTokenQual, Tuple[PerTokenQual, PerTokenQual]] = None args: List[str] = [] # TODO: data stream -> tree structure def find_tok(l: str, hdr: bool) -> Union[PerTokenName, Sequence[PerTokenQual]]: ll=l.lower() if hdr else l for h in (PER_TOKEN_HEADER if hdr else PER_TOKEN_BODY): #print("ll='%s', h='%s'"%(ll,h.value)) if ll.startswith(h.value.lower() if hdr else h.value): if hdr: return h rest = ll[len(h.value):] if len(rest) == 0 or rest[0] == ' ': return (h, None) elif rest[0] == '.': # quals follow quals = [] cqualstart = 1 for i in range(1, len(rest)): if rest[i] == ' ': quals.append(rest[cqualstart:i].upper()) break elif rest[i] == '.': if i != cqualstart + 1: quals.append(rest[cqualstart:i].upper()) cqualstart = i+1 qs = tuple(PerTokenQual._member_map_[k] for k in quals) return (h, qs[0] if len(qs) == 1 else qs) else: continue # not a match assert not hdr or l.split()[0] in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l return None def split_str(s: str) -> List[str]: r = [] start = 0 instr = False inparen = 0 for i in range(len(s)): if s[i] == '"': if not instr and i > 0 and s[i-1] == '"' and start != i-1: # sigh... ss = s[start:i].strip() if len(ss) > 0: r.append(ss) start = i instr = not instr elif s[i] == '(' and not instr: if inparen == 0 and start != i-1: ss = s[start:i].strip() if len(ss) > 0: r.append(ss) start = i inparen += 1 elif s[i] == ')' and not instr: assert inparen > 0 inparen -= 1 elif s[i] == ' ' and start != i-1 and not instr and inparen == 0: ss = s[start:i].strip() if len(ss) > 0: r.append(ss) start = i if start < len(s): ss = s[start:].strip() if len(ss) > 0: r.append(ss) return r def tokenize_body(f, l=None): prevtell = -1 emptycount = 0 #prevl = None while True: if l is None: l = f.readline().strip() if len(l) == 0: tell = f.tell() #print("empty, prevl:",prevl) if tell == prevtell: emptycount += 1 if emptycount == 3: break # EOF else: emptycount = 0 prevtell = tell l = None continue #prevl = l ll = l.lower() sp = split_str(l)#l.split() # regular token t = find_tok(ll, False) #print("t",t) if t is not None: #print("regular token", t) tt, qq = t yield PerToken(tt, qual=qq, args=sp[1:]) l = None continue #ht = sp[0].lower().split('.') #assert len(ht) > 0 or sp[0].lower() in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l #t = find_tok(ht[0], False) #if t is not None: # assert len(ht) in {2,3},"bad qual %s in line %s"%(repr(ht),l) # quals = [PerTokenQual.__members__[k] for k in ht[1:]] # if len(quals) == 1: quals = quals[0] # else: quals = tuple(quals) # yield PerToken(t, qual=quals, args=sp[1:]) # l = None # continue l = None def tokenize(f): curtok = None curlines = [] prevtell = -1 while True: l = f.readline().strip() if len(l) == 0: tell = f.tell() if tell == prevtell: break # EOF prevtell = tell continue if l[0] != ';': yield PerToken(curtok, args=curlines) yield from tokenize_body(f,l) break if l.startswith('; -------'): continue if (l.startswith('; @') or l.startswith('; $')) and l[3] != ' ': # new token! # flush old one if curtok is not None: yield PerToken(curtok, args=curlines) # start new curtok = find_tok(l, True) curlines = [l[len(curtok.value)+1:].strip()] else: curlines.append(l[3:].strip()) def repeat_apply(r: T32Repeat, l: List[PerToken]): for i in range(r.count): for tok in l: #print("==>",tok,r) if tok.args is None or len(tok.args) == 0 or len(r.vars) == 0 or \ all("$" not in a for a in tok.args): yield tok else: yield PerToken(tok.name, tok.qual, [r.eval(i,x) for x in tok.args]) # flatten copy and repeat statements # actually no let's not, we can turn this into derivedFrom statements, which is probably better # eh, we can leave it in for easier processing earlier on def tokenize_flatten_repeat(itor: Iterable[PerToken]): currep = None currvar = None lastrvar = None for tok in itor: if tok.name == PerTokenName.REPEAT: assert currvar is None currvar = parse_repeat(tok.args) currep = [] elif tok.name == PerTokenName.REPEAT_END: yield from repeat_apply(currvar, currep) lastrvar = currvar currvar = None elif tok.name == PerTokenName.REPEAT_REPLAY: assert lastrvar is not None yield from repeat_apply(lastrvar, currep) elif currvar is not None: currep.append(tok) else: yield tok def tokenize_flatten_copy(itor: Iterable[PerToken]): curgrp = None curgcmd = None lastgrp = None lastgcmd = None in_grp = False for tok in itor: if tok.name in {PerTokenName.GROUP,PerTokenName.HGROUP, PerTokenName.RGROUP,PerTokenName.SGROUP, PerTokenName.WGROUP}: lastgrp = curgrp lastgcmd = curgcmd curgrp = [] curgcmd = tok in_grp = True yield tok elif tok.name == PerTokenName.TREE_END: # group must end at tree end in_grp = False yield tok elif tok.name == PerTokenName.COPY: assert lastgrp is not None yield from lastgrp # be able to do multiple copies in_grp = False curgrp = lastgrp elif in_grp: curgrp.append(tok) else: yield tok def tokenize_flatten(itor: Iterable[PerToken]): yield from tokenize_flatten_repeat(tokenize_flatten_copy(itor)) if __name__ == '__main__': if False: import glob for p in glob.glob('t/*/*.per'): print(p) with open(p,'r') as f: for x in tokenize(f): pass #with open('t/arm/perfm0p.per','r') as f: # nothing special # for x in tokenize(f): print(x) #with open('t/arm/peram65xx.per','r') as f: # general, also has repeat cmd # for x in tokenize_flatten(tokenize(f)): print(x) with open('t/arm/perpxa.per','r') as f: # copy cmd for x in tokenize_flatten(tokenize(f)): print(x) #with open('t/arm/perpsoc4000.per','r') as f: # weird paren grammar # for x in tokenize(f): print(x) # weird string literal grammar: #with open('t/arm/perrzt1.per','r') as f: # at VLAN_PRIORITY0 # for x in tokenize(f): print(x) # arm/pertegrax1.per arm/pers32g2.per arm/perkinetisw.per arm/perrzt1.per arm/perstm32f7x.per arm/perxavier.per arm/pertegrax2.per # counterexample/-test: arm/percortexa73a53.per #with open('t/arm/percortexa73a53.per','r') as f: # for x in tokenize(f): print(x)