From 07ae409fdf6cc203b064ef93d8a74ca674410f1e Mon Sep 17 00:00:00 2001 From: Triss Date: Thu, 2 Jun 2022 03:37:38 +0200 Subject: [PATCH] PER: tokenizer --- t32per/pertoken.py | 218 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100755 t32per/pertoken.py diff --git a/t32per/pertoken.py b/t32per/pertoken.py new file mode 100755 index 0000000..26654d0 --- /dev/null +++ b/t32per/pertoken.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 + +from enum import Enum +from typing import * + +class PerTokenName(Enum): + Title = "; @Title" + Props = "; @Props" + Author = "; @Author" + Changelog = "; @Changelog" + Manufacturer = "; @Manufacturer" + Doc = "; @Doc" + Core = "; @Core" + Chip = "; @Chip" + Chiplist = "; @Chiplist" + Copyright = "; @Copyright" + Description = "; @Description" + Keywords = "; @Keywords" + Date = "; @Date" + #HeaderSep = "; -------" + Id = "; @Id" + + CONFIG = "config" # what is this + BASE = "base" # base address, base addrspace:expr + # expr can be int or (d.l(addrspace:off)) -> ??? + WIDTH = "width" # what is this + SAVEINDEX = "saveindex" # what is this + + TREE_OPEN = "tree.open" + TREE_CLOSE = "tree.close" # like tree.open + TREE_END = "tree.end" + TREE = "tree" + + # group of stuff (sometimes one reg?) + # group (addrspace:)start--end + # addrspaces: c15 c14 ad(=what?) EE(avr eeprom) D(avr data?) d(msp430) CSR(riscv) NAR(xtensa shit), "e:comp.base('name',-1)"(also xtensa) SPR(xtensa) + # also seen: group iospace() (teaklite) + GROUP = "group" + HGROUP = "hgroup" + RGROUP = "rgroup" + SGROUP = "sgroup" + WGROUP = "wgroup" + # a register (sometimes hidden) + # line.qual offset "CODENAME,description" + HIDE = "hide" + LINE = "line" + # bitfields and stuff in registers + # bitfld.qual IDK start(--end) "CODE,description" ("if 0","if 1",...) + BITFLD = "bitfld" + ABITFLD = "abitfld" + RBITFLD = "rbitfld" + HEXFLD = "hexfld" + #RHEXFLD? WHEXFLD?? + EVENTFLD = "eventfld" + SETCLRFLD = "setclrfld" + # masks in registers? + # hexmask.qual IDK start--end MASK "CODE,description" + DECMASK = "decmask" + #RDECMASK? WDECMASK?? + HEXMASK = "hexmask" + RHEXMASK = "rhexmask" + #WHEXMASK?? + + COPY = "copy" + REPEAT_END = "repeat.end" + REPEAT_REPLAY = "repeat.replay" + REPEAT = "repeat" + + # to ignore: assert autoindent.{on,off} button elif else endif if in + # newline sif textline textfld x + # IDK: entry, read, wait, saveindex, saveout, set, getx, register + # EH: include, endian.{be,le} (-> conditional endianness) + # TODO: copy: copy from previous group? (->derivedFrom whee) + # TODO: repeat{,.end}: repeat macro stuff (cf. dimIndexElement) + # TODO: repeat.replay: copy+replay + +PER_TOKEN_IGNORE = { + 'assert','autoindent.on','autoindent.off','button','elif','else','endif', + 'if','in','newline','sif','textline','textfld','x', + 'entry','read','wait','saveindex','saveout','set','getx','register', + 'include','endian.be','endian.le' +} +PER_TOKEN_HEADER = [PerTokenName.__members__[x] for x in ( + 'Title','Props','Author','Changelog','Manufacturer','Doc','Core','Chiplist', + 'Copyright','Id','Chip','Description','Keywords','Date',#'HeaderSep', +)] +PER_TOKEN_BODY = [v for k, v in PerTokenName.__members__.items() if v not in PER_TOKEN_HEADER] + +class PerTokenQual(Enum): + # also .? + BYTE = "byte" + WORD = "word" + LONG = "long" + QUAD = "quad" + SHORT = "short" + SBYTE = "sbyte" + TBYTE = "tbyte" + # used for mask stuff + """ + LONG_TBYTE = "long.tbyte" + LONG_BYTE = "long.byte" + LONG_WORD = "long.word" + LONG_LONG = "long.long" + WORD_BYTE = "word.byte" + WORD_WORD = "word.word" + BYTE_BYTE = "byte.byte" + QUAD_BYTE = "quad.byte" + QUAD_SBYTE = "quad.sbyte" + QUAD_TBYTE = "quad.tbyte" + QUAD_WORD = "quad.word" + QUAD_SHORT = "quad.short" + QUAD_LONG = "quad.long" + QUAD_QUAD = "quad.quad" + TBYTE_BYTE = "tbyte.byte" + TBYTE_WORD = "tbyte.word" + """ + + +class PerToken(NamedTuple): + name: PerTokenName + qual: Union[PerTokenQual, Tuple[PerTokenQual, PerTokenQual]] = None + args: List[str] = [] + + +# TODO: tokenize into data stream with useful arguments +# TODO: data stream -> tree structure + +def find_tok(l: str, hdr: bool) -> PerTokenName: + ll=l.lower() if hdr else l + for h in (PER_TOKEN_HEADER if hdr else PER_TOKEN_BODY): + #print("ll='%s', h='%s'"%(ll,h.value)) + if ll.startswith(h.value.lower() if hdr else h.value): + return h + assert not hdr or l.split()[0] in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l + return None + + +def tokenize_body(f, l=None): + prevtell = -1 + while True: + if l is None: l = f.readline().strip() + if len(l) == 0: + tell = f.tell() + if tell == prevtell: break # EOF + prevtell = tell + continue + ll = l.lower() + sp = l.split() + # regular token + t = find_tok(ll, False) + #print("t",t) + if t is not None: + yield PerToken(t, args=sp[1:]) + l = None + continue + + ht = sp[0].lower().split('.') + assert len(ht) > 0 or sp[0].lower() in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l + t = find_tok(ht[0], False) + if t is not None: + assert len(ht) in {2,3},"bad qual %s in line %s"%(repr(ht),l) + quals = [PerTokenQual.__members__[k] for k in ht[1:]] + if len(quals) == 1: quals = quals[0] + else: quals = tuple(quals) + yield PerToken(t, qual=quals, args=sp[1:]) + l = None + continue + + l = None + + +def tokenize(f): + curtok = None + curlines = [] + + prevtell = -1 + while True: + l = f.readline().strip() + if len(l) == 0: + tell = f.tell() + if tell == prevtell: break # EOF + prevtell = tell + continue + if l[0] != ';': + yield from tokenize_body(f,l) + break + if l.startswith('; -------'): continue + + if l.startswith('; @') and l[3] != ' ': + # new token! + # flush old one + if curtok is not None: + yield PerToken(curtok, args=curlines) + # start new + curtok = find_tok(l, True) + curlines = [l[len(curtok.value)+1:].strip()] + else: + curlines.append(l[3:].strip()) + + +# flatten copy and repeat statements +def tokenize_flatten(itor): + yield from itor # TODO + + +if __name__ == '__main__': + import glob + for p in glob.glob('t/*/*.per'): + print(p) + with open(p,'r') as f: + for x in tokenize(f): pass + + with open('t/arm/peram65xx.per','r') as f: + for x in tokenize(f): print(x) + with open('t/arm/perfm0p.per','r') as f: + for x in tokenize(f): print(x) + +