mmiogrok/t32per/pertoken.py

#!/usr/bin/env python3

from enum import Enum
from typing import *

from perrepeat import *

class PerTokenName(Enum):
    Title = "; @Title"
    Props = "; @Props"
    Author = "; @Author"
    Changelog = "; @Changelog"
    Manufacturer = "; @Manufacturer"
    Doc = "; @Doc"
    Core = "; @Core"
    Chip = "; @Chip"
    Chiplist = "; @Chiplist"
    Copyright = "; @Copyright"
    Description = "; @Description"
    Keywords = "; @Keywords"
    Date = "; @Date"
    #HeaderSep = "; -------"
    Id = "; $Id"

    CONFIG = "config" # what is this
    BASE = "base" # base address, base addrspace:expr
    # expr can be int or (d.l(addrspace:off)) -> ???
    WIDTH = "width" # what is this
    #SAVEINDEX = "saveindex" # what is this

    TREE_OPEN = "tree.open"
    TREE_CLOSE = "tree.close" # like tree.open, determines visibility
    TREE_END = "tree.end"
    TREE = "tree"

    # group of stuff (sometimes one reg?)
    # group (addrspace:)start--end
    # addrspaces: c15 c14 ad(=what?) <none> EE(avr eeprom) D(avr data?) d(msp430) CSR(riscv) NAR(xtensa shit), "e:comp.base('name',-1)"(also xtensa) SPR(xtensa)
    # also seen: group iospace() (teaklite)
    GROUP = "group"
    HGROUP = "hgroup"
    RGROUP = "rgroup"
    SGROUP = "sgroup"
    WGROUP = "wgroup"
    # a register (sometimes hidden)
    # line.qual offset "CODENAME,description"
    HIDE = "hide"
    LINE = "line"
    # bitfields and stuff in registers
    # bitfld.qual IDK start(--end) "CODE,description" ("if 0","if 1",...)
    BITFLD = "bitfld"
    ABITFLD = "abitfld"
    RBITFLD = "rbitfld"
    HEXFLD = "hexfld"
    #RHEXFLD? WHEXFLD??
    EVENTFLD = "eventfld" # == read-clear?
    SETCLRFLD = "setclrfld"
    # masks in registers?
    # hexmask.qual IDK start--end MASK "CODE,description"
    DECMASK = "decmask"
    #RDECMASK? WDECMASK??
    HEXMASK = "hexmask"
    RHEXMASK = "rhexmask"
    #WHEXMASK??

    COPY = "copy"
    REPEAT_END = "repeat.end"
    REPEAT_REPLAY = "repeat.replay"
    REPEAT = "repeat"

    # to ignore: assert autoindent.{on,off} button elif else endif if in
    #            newline sif textline textfld x
    # IDK: entry, read, wait, saveindex, saveout, set, getx, register
    # EH: include, endian.{be,le} (-> conditional endianness)
    # TODO: copy: copy from previous group? (->derivedFrom whee)
    # copy: copy all from previous group
    # (tree+group define peripheral, can be multiple groups per tree tho. 'base' part of tree, optional)
    # TODO: repeat{,.end}: repeat macro stuff (cf. dimIndexElement)
    # repeat <repno> <list1> <list2> <...>
    # stuff -> $1,$2 are variables
    # repeat.end
    # TODO: repeat.replay: copy+replay
    # repeat.replay: copy previous repeat

PER_TOKEN_IGNORE = {
    'assert','autoindent.on','autoindent.off','button','elif','else','endif',
    'if','in','newline','sif','textline','textfld','x',
    'entry','read','wait','saveindex','saveout','set','getx','register',
    'include','endian.be','endian.le'
}
PER_TOKEN_HEADER = [PerTokenName.__members__[x] for x in (
    'Title','Props','Author','Changelog','Manufacturer','Doc','Core','Chiplist',
    'Copyright','Id','Chip','Description','Keywords','Date',#'HeaderSep',
)]
PER_TOKEN_BODY = [v for k, v in PerTokenName.__members__.items() if v not in PER_TOKEN_HEADER]

class PerTokenQual(Enum):
    # also .<hex>?
    BYTE = "byte"
    WORD = "word"
    LONG = "long"
    QUAD = "quad"
    SHORT = "short"
    SBYTE = "sbyte"
    TBYTE = "tbyte"
    # used for mask stuff
    """
    LONG_TBYTE = "long.tbyte"
    LONG_BYTE = "long.byte"
    LONG_WORD = "long.word"
    LONG_LONG = "long.long"
    WORD_BYTE = "word.byte"
    WORD_WORD = "word.word"
    BYTE_BYTE = "byte.byte"
    QUAD_BYTE = "quad.byte"
    QUAD_SBYTE = "quad.sbyte"
    QUAD_TBYTE = "quad.tbyte"
    QUAD_WORD = "quad.word"
    QUAD_SHORT = "quad.short"
    QUAD_LONG = "quad.long"
    QUAD_QUAD = "quad.quad"
    TBYTE_BYTE = "tbyte.byte"
    TBYTE_WORD = "tbyte.word"
    """


class PerToken(NamedTuple):
    name: PerTokenName
    qual: Union[PerTokenQual, Tuple[PerTokenQual, PerTokenQual]] = None
    args: List[str] = []


# TODO: data stream -> tree structure

def find_tok(l: str, hdr: bool) -> Union[PerTokenName, Sequence[PerTokenQual]]:
    ll=l.lower() if hdr else l
    for h in (PER_TOKEN_HEADER if hdr else PER_TOKEN_BODY):
        #print("ll='%s', h='%s'"%(ll,h.value))
        if ll.startswith(h.value.lower() if hdr else h.value):
            if hdr: return h
            rest = ll[len(h.value):]
            if len(rest) == 0 or rest[0] == ' ':
                return (h, None)
            elif rest[0] == '.':
                # quals follow
                quals = []
                cqualstart = 1
                for i in range(1, len(rest)):
                    if rest[i] == ' ':
                        quals.append(rest[cqualstart:i].upper())
                        break
                    elif rest[i] == '.':
                        if i != cqualstart + 1:
                            quals.append(rest[cqualstart:i].upper())
                        cqualstart = i+1
                qs = tuple(PerTokenQual._member_map_[k] for k in quals)
                return (h, qs[0] if len(qs) == 1 else qs)
            else:
                continue # not a match
    assert not hdr or l.split()[0] in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l
    return None


def split_str(s: str) -> List[str]:
    r = []
    start = 0
    instr = False
    inparen = 0
    for i in range(len(s)):
        if s[i] == '"':
            if not instr and i > 0 and s[i-1] == '"' and start != i-1:
                # sigh...
                ss = s[start:i].strip()
                if len(ss) > 0: r.append(ss)
                start = i
            instr = not instr
        elif s[i] == '(' and not instr:
            if inparen == 0 and start != i-1:
                ss = s[start:i].strip()
                if len(ss) > 0: r.append(ss)
                start = i
            inparen += 1
        elif s[i] == ')' and not instr:
            assert inparen > 0
            inparen -= 1
        elif s[i] == ' ' and start != i-1 and not instr and inparen == 0:
            ss = s[start:i].strip()
            if len(ss) > 0: r.append(ss)
            start = i
    if start < len(s):
        ss = s[start:].strip()
        if len(ss) > 0: r.append(ss)
    return r

def tokenize_body(f, l=None):
    prevtell = -1
    emptycount = 0
    #prevl = None
    while True:
        if l is None: l = f.readline().strip()
        if len(l) == 0:
            tell = f.tell()
            #print("empty, prevl:",prevl)
            if tell == prevtell:
                emptycount += 1
                if emptycount == 3:
                    break # EOF
            else:
                emptycount = 0
            prevtell = tell
            l = None
            continue
        #prevl = l
        ll = l.lower()
        sp = split_str(l)#l.split()
        # regular token
        t = find_tok(ll, False)
        #print("t",t)
        if t is not None:
            #print("regular token", t)
            tt, qq = t
            yield PerToken(tt, qual=qq, args=sp[1:])
            l = None
            continue

        #ht = sp[0].lower().split('.')
        #assert len(ht) > 0 or sp[0].lower() in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l
        #t = find_tok(ht[0], False)
        #if t is not None:
        #    assert len(ht) in {2,3},"bad qual %s in line %s"%(repr(ht),l)
        #    quals = [PerTokenQual.__members__[k] for k in ht[1:]]
        #    if len(quals) == 1: quals = quals[0]
        #    else: quals = tuple(quals)
        #    yield PerToken(t, qual=quals, args=sp[1:])
        #    l = None
        #    continue

        l = None


def tokenize(f):
    curtok = None
    curlines = []

    prevtell = -1
    while True:
        l = f.readline().strip()
        if len(l) == 0:
            tell = f.tell()
            if tell == prevtell: break # EOF
            prevtell = tell
            continue
        if l[0] != ';':
            yield PerToken(curtok, args=curlines)
            yield from tokenize_body(f,l)
            break
        if l.startswith('; -------'): continue

        if (l.startswith('; @') or l.startswith('; $')) and l[3] != ' ':
            # new token!
            # flush old one
            if curtok is not None:
                yield PerToken(curtok, args=curlines)
            # start new
            curtok = find_tok(l, True)
            curlines = [l[len(curtok.value)+1:].strip()]
        else:
            curlines.append(l[3:].strip())


def repeat_apply(r: T32Repeat, l: List[PerToken]):
    for i in range(r.count):
        for tok in l:
            #print("==>",tok,r)
            if tok.args is None or len(tok.args) == 0 or len(r.vars) == 0 or \
                    all("$" not in a for a in tok.args):
                yield tok
            else:
                yield PerToken(tok.name, tok.qual, [r.eval(i,x) for x in tok.args])


# flatten copy and repeat statements
# actually no let's not, we can turn this into derivedFrom statements, which is probably better
# eh, we can leave it in for easier processing earlier on
def tokenize_flatten_repeat(itor: Iterable[PerToken]):
    currep = None
    currvar = None
    lastrvar = None
    for tok in itor:
        if tok.name == PerTokenName.REPEAT:
            assert currvar is None
            currvar = parse_repeat(tok.args)
            currep = []
        elif tok.name == PerTokenName.REPEAT_END:
            yield from repeat_apply(currvar, currep)
            lastrvar = currvar
            currvar = None
        elif tok.name == PerTokenName.REPEAT_REPLAY:
            assert lastrvar is not None
            yield from repeat_apply(lastrvar, currep)
        elif currvar is not None:
            currep.append(tok)
        else:
            yield tok
def tokenize_flatten_copy(itor: Iterable[PerToken]):
    curgrp = None
    curgcmd = None
    lastgrp = None
    lastgcmd = None
    in_grp = False

    for tok in itor:
        if tok.name in {PerTokenName.GROUP,PerTokenName.HGROUP,
                        PerTokenName.RGROUP,PerTokenName.SGROUP,
                        PerTokenName.WGROUP}:
            lastgrp = curgrp
            lastgcmd = curgcmd
            curgrp = []
            curgcmd = tok
            in_grp = True
            yield tok
        elif tok.name == PerTokenName.TREE_END:
            # group must end at tree end
            in_grp = False
            yield tok
        elif tok.name == PerTokenName.COPY:
            assert lastgrp is not None
            yield from lastgrp
            # be able to do multiple copies
            in_grp = False
            curgrp = lastgrp
        elif in_grp:
            curgrp.append(tok)
        else:
            yield tok


def tokenize_flatten(itor: Iterable[PerToken]):
    yield from tokenize_flatten_repeat(tokenize_flatten_copy(itor))


if __name__ == '__main__':
    if False:
        import glob
        for p in glob.glob('t/*/*.per'):
            print(p)
            with open(p,'r') as f:
                for x in tokenize(f): pass

    #with open('t/arm/perfm0p.per','r') as f: # nothing special
    #    for x in tokenize(f): print(x)
    #with open('t/arm/peram65xx.per','r') as f: # general, also has repeat cmd
    #    for x in tokenize_flatten(tokenize(f)): print(x)
    with open('t/arm/perpxa.per','r') as f: # copy cmd
        for x in tokenize_flatten(tokenize(f)): print(x)
    #with open('t/arm/perpsoc4000.per','r') as f: # weird paren grammar
    #    for x in tokenize(f): print(x)
    # weird string literal grammar:
    #with open('t/arm/perrzt1.per','r') as f: # at VLAN_PRIORITY0
    #    for x in tokenize(f): print(x)
    # arm/pertegrax1.per arm/pers32g2.per arm/perkinetisw.per arm/perrzt1.per arm/perstm32f7x.per arm/perxavier.per arm/pertegrax2.per
    # counterexample/-test: arm/percortexa73a53.per
    #with open('t/arm/percortexa73a53.per','r') as f:
    #    for x in tokenize(f): print(x)
PER: tokenizer 2022-06-02 01:37:38 +00:00			`#!/usr/bin/env python3`

			`from enum import Enum`
			`from typing import *`

T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`from perrepeat import *`

PER: tokenizer 2022-06-02 01:37:38 +00:00			`class PerTokenName(Enum):`
			`Title = "; @Title"`
			`Props = "; @Props"`
			`Author = "; @Author"`
			`Changelog = "; @Changelog"`
			`Manufacturer = "; @Manufacturer"`
			`Doc = "; @Doc"`
			`Core = "; @Core"`
			`Chip = "; @Chip"`
			`Chiplist = "; @Chiplist"`
			`Copyright = "; @Copyright"`
			`Description = "; @Description"`
			`Keywords = "; @Keywords"`
			`Date = "; @Date"`
			`#HeaderSep = "; -------"`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`Id = "; $Id"`
PER: tokenizer 2022-06-02 01:37:38 +00:00
			`CONFIG = "config" # what is this`
			`BASE = "base" # base address, base addrspace:expr`
			`# expr can be int or (d.l(addrspace:off)) -> ???`
			`WIDTH = "width" # what is this`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`#SAVEINDEX = "saveindex" # what is this`
PER: tokenizer 2022-06-02 01:37:38 +00:00
			`TREE_OPEN = "tree.open"`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`TREE_CLOSE = "tree.close" # like tree.open, determines visibility`
PER: tokenizer 2022-06-02 01:37:38 +00:00			`TREE_END = "tree.end"`
			`TREE = "tree"`

			`# group of stuff (sometimes one reg?)`
			`# group (addrspace:)start--end`
			`# addrspaces: c15 c14 ad(=what?) <none> EE(avr eeprom) D(avr data?) d(msp430) CSR(riscv) NAR(xtensa shit), "e:comp.base('name',-1)"(also xtensa) SPR(xtensa)`
			`# also seen: group iospace() (teaklite)`
			`GROUP = "group"`
			`HGROUP = "hgroup"`
			`RGROUP = "rgroup"`
			`SGROUP = "sgroup"`
			`WGROUP = "wgroup"`
			`# a register (sometimes hidden)`
			`# line.qual offset "CODENAME,description"`
			`HIDE = "hide"`
			`LINE = "line"`
			`# bitfields and stuff in registers`
			`# bitfld.qual IDK start(--end) "CODE,description" ("if 0","if 1",...)`
			`BITFLD = "bitfld"`
			`ABITFLD = "abitfld"`
			`RBITFLD = "rbitfld"`
			`HEXFLD = "hexfld"`
			`#RHEXFLD? WHEXFLD??`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`EVENTFLD = "eventfld" # == read-clear?`
PER: tokenizer 2022-06-02 01:37:38 +00:00			`SETCLRFLD = "setclrfld"`
			`# masks in registers?`
			`# hexmask.qual IDK start--end MASK "CODE,description"`
			`DECMASK = "decmask"`
			`#RDECMASK? WDECMASK??`
			`HEXMASK = "hexmask"`
			`RHEXMASK = "rhexmask"`
			`#WHEXMASK??`

			`COPY = "copy"`
			`REPEAT_END = "repeat.end"`
			`REPEAT_REPLAY = "repeat.replay"`
			`REPEAT = "repeat"`

			`# to ignore: assert autoindent.{on,off} button elif else endif if in`
			`# newline sif textline textfld x`
			`# IDK: entry, read, wait, saveindex, saveout, set, getx, register`
			`# EH: include, endian.{be,le} (-> conditional endianness)`
			`# TODO: copy: copy from previous group? (->derivedFrom whee)`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`# copy: copy all from previous group`
			`# (tree+group define peripheral, can be multiple groups per tree tho. 'base' part of tree, optional)`
PER: tokenizer 2022-06-02 01:37:38 +00:00			`# TODO: repeat{,.end}: repeat macro stuff (cf. dimIndexElement)`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`# repeat <repno> <list1> <list2> <...>`
			`# stuff -> $1,$2 are variables`
			`# repeat.end`
PER: tokenizer 2022-06-02 01:37:38 +00:00			`# TODO: repeat.replay: copy+replay`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`# repeat.replay: copy previous repeat`
PER: tokenizer 2022-06-02 01:37:38 +00:00
			`PER_TOKEN_IGNORE = {`
			`'assert','autoindent.on','autoindent.off','button','elif','else','endif',`
			`'if','in','newline','sif','textline','textfld','x',`
			`'entry','read','wait','saveindex','saveout','set','getx','register',`
			`'include','endian.be','endian.le'`
			`}`
			`PER_TOKEN_HEADER = [PerTokenName.__members__[x] for x in (`
			`'Title','Props','Author','Changelog','Manufacturer','Doc','Core','Chiplist',`
			`'Copyright','Id','Chip','Description','Keywords','Date',#'HeaderSep',`
			`)]`
			`PER_TOKEN_BODY = [v for k, v in PerTokenName.__members__.items() if v not in PER_TOKEN_HEADER]`

			`class PerTokenQual(Enum):`
			`# also .<hex>?`
			`BYTE = "byte"`
			`WORD = "word"`
			`LONG = "long"`
			`QUAD = "quad"`
			`SHORT = "short"`
			`SBYTE = "sbyte"`
			`TBYTE = "tbyte"`
			`# used for mask stuff`
			`"""`
			`LONG_TBYTE = "long.tbyte"`
			`LONG_BYTE = "long.byte"`
			`LONG_WORD = "long.word"`
			`LONG_LONG = "long.long"`
			`WORD_BYTE = "word.byte"`
			`WORD_WORD = "word.word"`
			`BYTE_BYTE = "byte.byte"`
			`QUAD_BYTE = "quad.byte"`
			`QUAD_SBYTE = "quad.sbyte"`
			`QUAD_TBYTE = "quad.tbyte"`
			`QUAD_WORD = "quad.word"`
			`QUAD_SHORT = "quad.short"`
			`QUAD_LONG = "quad.long"`
			`QUAD_QUAD = "quad.quad"`
			`TBYTE_BYTE = "tbyte.byte"`
			`TBYTE_WORD = "tbyte.word"`
			`"""`


			`class PerToken(NamedTuple):`
			`name: PerTokenName`
			`qual: Union[PerTokenQual, Tuple[PerTokenQual, PerTokenQual]] = None`
			`args: List[str] = []`


			`# TODO: data stream -> tree structure`

T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`def find_tok(l: str, hdr: bool) -> Union[PerTokenName, Sequence[PerTokenQual]]:`
PER: tokenizer 2022-06-02 01:37:38 +00:00			`ll=l.lower() if hdr else l`
			`for h in (PER_TOKEN_HEADER if hdr else PER_TOKEN_BODY):`
			`#print("ll='%s', h='%s'"%(ll,h.value))`
			`if ll.startswith(h.value.lower() if hdr else h.value):`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`if hdr: return h`
			`rest = ll[len(h.value):]`
			`if len(rest) == 0 or rest[0] == ' ':`
			`return (h, None)`
			`elif rest[0] == '.':`
			`# quals follow`
			`quals = []`
			`cqualstart = 1`
			`for i in range(1, len(rest)):`
			`if rest[i] == ' ':`
			`quals.append(rest[cqualstart:i].upper())`
			`break`
			`elif rest[i] == '.':`
			`if i != cqualstart + 1:`
			`quals.append(rest[cqualstart:i].upper())`
			`cqualstart = i+1`
			`qs = tuple(PerTokenQual._member_map_[k] for k in quals)`
			`return (h, qs[0] if len(qs) == 1 else qs)`
			`else:`
			`continue # not a match`
PER: tokenizer 2022-06-02 01:37:38 +00:00			`assert not hdr or l.split()[0] in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l`
			`return None`


T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`def split_str(s: str) -> List[str]:`
			`r = []`
			`start = 0`
			`instr = False`
			`inparen = 0`
			`for i in range(len(s)):`
			`if s[i] == '"':`
			`if not instr and i > 0 and s[i-1] == '"' and start != i-1:`
			`# sigh...`
			`ss = s[start:i].strip()`
			`if len(ss) > 0: r.append(ss)`
			`start = i`
			`instr = not instr`
			`elif s[i] == '(' and not instr:`
			`if inparen == 0 and start != i-1:`
			`ss = s[start:i].strip()`
			`if len(ss) > 0: r.append(ss)`
			`start = i`
			`inparen += 1`
			`elif s[i] == ')' and not instr:`
			`assert inparen > 0`
			`inparen -= 1`
			`elif s[i] == ' ' and start != i-1 and not instr and inparen == 0:`
			`ss = s[start:i].strip()`
			`if len(ss) > 0: r.append(ss)`
			`start = i`
			`if start < len(s):`
			`ss = s[start:].strip()`
			`if len(ss) > 0: r.append(ss)`
			`return r`

PER: tokenizer 2022-06-02 01:37:38 +00:00			`def tokenize_body(f, l=None):`
			`prevtell = -1`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`emptycount = 0`
			`#prevl = None`
PER: tokenizer 2022-06-02 01:37:38 +00:00			`while True:`
			`if l is None: l = f.readline().strip()`
			`if len(l) == 0:`
			`tell = f.tell()`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`#print("empty, prevl:",prevl)`
			`if tell == prevtell:`
			`emptycount += 1`
			`if emptycount == 3:`
			`break # EOF`
			`else:`
			`emptycount = 0`
PER: tokenizer 2022-06-02 01:37:38 +00:00			`prevtell = tell`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`l = None`
PER: tokenizer 2022-06-02 01:37:38 +00:00			`continue`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`#prevl = l`
PER: tokenizer 2022-06-02 01:37:38 +00:00			`ll = l.lower()`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`sp = split_str(l)#l.split()`
PER: tokenizer 2022-06-02 01:37:38 +00:00			`# regular token`
			`t = find_tok(ll, False)`
			`#print("t",t)`
			`if t is not None:`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`#print("regular token", t)`
			`tt, qq = t`
			`yield PerToken(tt, qual=qq, args=sp[1:])`
PER: tokenizer 2022-06-02 01:37:38 +00:00			`l = None`
			`continue`

T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`#ht = sp[0].lower().split('.')`
			`#assert len(ht) > 0 or sp[0].lower() in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l`
			`#t = find_tok(ht[0], False)`
			`#if t is not None:`
			`# assert len(ht) in {2,3},"bad qual %s in line %s"%(repr(ht),l)`
			`# quals = [PerTokenQual.__members__[k] for k in ht[1:]]`
			`# if len(quals) == 1: quals = quals[0]`
			`# else: quals = tuple(quals)`
			`# yield PerToken(t, qual=quals, args=sp[1:])`
			`# l = None`
			`# continue`
PER: tokenizer 2022-06-02 01:37:38 +00:00
			`l = None`


			`def tokenize(f):`
			`curtok = None`
			`curlines = []`

			`prevtell = -1`
			`while True:`
			`l = f.readline().strip()`
			`if len(l) == 0:`
			`tell = f.tell()`
			`if tell == prevtell: break # EOF`
			`prevtell = tell`
			`continue`
			`if l[0] != ';':`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`yield PerToken(curtok, args=curlines)`
PER: tokenizer 2022-06-02 01:37:38 +00:00			`yield from tokenize_body(f,l)`
			`break`
			`if l.startswith('; -------'): continue`

T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`if (l.startswith('; @') or l.startswith('; $')) and l[3] != ' ':`
PER: tokenizer 2022-06-02 01:37:38 +00:00			`# new token!`
			`# flush old one`
			`if curtok is not None:`
			`yield PerToken(curtok, args=curlines)`
			`# start new`
			`curtok = find_tok(l, True)`
			`curlines = [l[len(curtok.value)+1:].strip()]`
			`else:`
			`curlines.append(l[3:].strip())`


T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`def repeat_apply(r: T32Repeat, l: List[PerToken]):`
			`for i in range(r.count):`
			`for tok in l:`
			`#print("==>",tok,r)`
			`if tok.args is None or len(tok.args) == 0 or len(r.vars) == 0 or \`
			`all("$" not in a for a in tok.args):`
			`yield tok`
			`else:`
			`yield PerToken(tok.name, tok.qual, [r.eval(i,x) for x in tok.args])`


PER: tokenizer 2022-06-02 01:37:38 +00:00			`# flatten copy and repeat statements`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`# actually no let's not, we can turn this into derivedFrom statements, which is probably better`
			`# eh, we can leave it in for easier processing earlier on`
			`def tokenize_flatten_repeat(itor: Iterable[PerToken]):`
			`currep = None`
			`currvar = None`
			`lastrvar = None`
			`for tok in itor:`
			`if tok.name == PerTokenName.REPEAT:`
			`assert currvar is None`
			`currvar = parse_repeat(tok.args)`
			`currep = []`
			`elif tok.name == PerTokenName.REPEAT_END:`
			`yield from repeat_apply(currvar, currep)`
			`lastrvar = currvar`
			`currvar = None`
			`elif tok.name == PerTokenName.REPEAT_REPLAY:`
			`assert lastrvar is not None`
			`yield from repeat_apply(lastrvar, currep)`
			`elif currvar is not None:`
			`currep.append(tok)`
			`else:`
			`yield tok`
			`def tokenize_flatten_copy(itor: Iterable[PerToken]):`
			`curgrp = None`
			`curgcmd = None`
			`lastgrp = None`
			`lastgcmd = None`
			`in_grp = False`

			`for tok in itor:`
			`if tok.name in {PerTokenName.GROUP,PerTokenName.HGROUP,`
			`PerTokenName.RGROUP,PerTokenName.SGROUP,`
			`PerTokenName.WGROUP}:`
			`lastgrp = curgrp`
			`lastgcmd = curgcmd`
			`curgrp = []`
			`curgcmd = tok`
			`in_grp = True`
			`yield tok`
			`elif tok.name == PerTokenName.TREE_END:`
			`# group must end at tree end`
			`in_grp = False`
			`yield tok`
			`elif tok.name == PerTokenName.COPY:`
			`assert lastgrp is not None`
			`yield from lastgrp`
			`# be able to do multiple copies`
			`in_grp = False`
			`curgrp = lastgrp`
			`elif in_grp:`
			`curgrp.append(tok)`
			`else:`
			`yield tok`



			`def tokenize_flatten(itor: Iterable[PerToken]):`
			`yield from tokenize_flatten_repeat(tokenize_flatten_copy(itor))`
PER: tokenizer 2022-06-02 01:37:38 +00:00

			`if __name__ == '__main__':`
T32 per: fix tokenizer, repeat/copy impl, parse tree types 2022-06-04 18:52:42 +00:00			`if False:`
			`import glob`
			`for p in glob.glob('t//.per'):`
			`print(p)`
			`with open(p,'r') as f:`
			`for x in tokenize(f): pass`

			`#with open('t/arm/perfm0p.per','r') as f: # nothing special`
			`# for x in tokenize(f): print(x)`
			`#with open('t/arm/peram65xx.per','r') as f: # general, also has repeat cmd`
			`# for x in tokenize_flatten(tokenize(f)): print(x)`
			`with open('t/arm/perpxa.per','r') as f: # copy cmd`
			`for x in tokenize_flatten(tokenize(f)): print(x)`
			`#with open('t/arm/perpsoc4000.per','r') as f: # weird paren grammar`
			`# for x in tokenize(f): print(x)`
			`# weird string literal grammar:`
			`#with open('t/arm/perrzt1.per','r') as f: # at VLAN_PRIORITY0`
			`# for x in tokenize(f): print(x)`
			`# arm/pertegrax1.per arm/pers32g2.per arm/perkinetisw.per arm/perrzt1.per arm/perstm32f7x.per arm/perxavier.per arm/pertegrax2.per`
			`# counterexample/-test: arm/percortexa73a53.per`
			`#with open('t/arm/percortexa73a53.per','r') as f:`
			`# for x in tokenize(f): print(x)`
PER: tokenizer 2022-06-02 01:37:38 +00:00