mmiogrok/t32per/pertoken.py

368 lines
12 KiB
Python
Raw Permalink Normal View History

2022-06-02 01:37:38 +00:00
#!/usr/bin/env python3
from enum import Enum
from typing import *
from perrepeat import *
2022-06-02 01:37:38 +00:00
class PerTokenName(Enum):
Title = "; @Title"
Props = "; @Props"
Author = "; @Author"
Changelog = "; @Changelog"
Manufacturer = "; @Manufacturer"
Doc = "; @Doc"
Core = "; @Core"
Chip = "; @Chip"
Chiplist = "; @Chiplist"
Copyright = "; @Copyright"
Description = "; @Description"
Keywords = "; @Keywords"
Date = "; @Date"
#HeaderSep = "; -------"
Id = "; $Id"
2022-06-02 01:37:38 +00:00
CONFIG = "config" # what is this
BASE = "base" # base address, base addrspace:expr
# expr can be int or (d.l(addrspace:off)) -> ???
WIDTH = "width" # what is this
#SAVEINDEX = "saveindex" # what is this
2022-06-02 01:37:38 +00:00
TREE_OPEN = "tree.open"
TREE_CLOSE = "tree.close" # like tree.open, determines visibility
2022-06-02 01:37:38 +00:00
TREE_END = "tree.end"
TREE = "tree"
# group of stuff (sometimes one reg?)
# group (addrspace:)start--end
# addrspaces: c15 c14 ad(=what?) <none> EE(avr eeprom) D(avr data?) d(msp430) CSR(riscv) NAR(xtensa shit), "e:comp.base('name',-1)"(also xtensa) SPR(xtensa)
# also seen: group iospace() (teaklite)
GROUP = "group"
HGROUP = "hgroup"
RGROUP = "rgroup"
SGROUP = "sgroup"
WGROUP = "wgroup"
# a register (sometimes hidden)
# line.qual offset "CODENAME,description"
HIDE = "hide"
LINE = "line"
# bitfields and stuff in registers
# bitfld.qual IDK start(--end) "CODE,description" ("if 0","if 1",...)
BITFLD = "bitfld"
ABITFLD = "abitfld"
RBITFLD = "rbitfld"
HEXFLD = "hexfld"
#RHEXFLD? WHEXFLD??
EVENTFLD = "eventfld" # == read-clear?
2022-06-02 01:37:38 +00:00
SETCLRFLD = "setclrfld"
# masks in registers?
# hexmask.qual IDK start--end MASK "CODE,description"
DECMASK = "decmask"
#RDECMASK? WDECMASK??
HEXMASK = "hexmask"
RHEXMASK = "rhexmask"
#WHEXMASK??
COPY = "copy"
REPEAT_END = "repeat.end"
REPEAT_REPLAY = "repeat.replay"
REPEAT = "repeat"
# to ignore: assert autoindent.{on,off} button elif else endif if in
# newline sif textline textfld x
# IDK: entry, read, wait, saveindex, saveout, set, getx, register
# EH: include, endian.{be,le} (-> conditional endianness)
# TODO: copy: copy from previous group? (->derivedFrom whee)
# copy: copy all from previous group
# (tree+group define peripheral, can be multiple groups per tree tho. 'base' part of tree, optional)
2022-06-02 01:37:38 +00:00
# TODO: repeat{,.end}: repeat macro stuff (cf. dimIndexElement)
# repeat <repno> <list1> <list2> <...>
# stuff -> $1,$2 are variables
# repeat.end
2022-06-02 01:37:38 +00:00
# TODO: repeat.replay: copy+replay
# repeat.replay: copy previous repeat
2022-06-02 01:37:38 +00:00
PER_TOKEN_IGNORE = {
'assert','autoindent.on','autoindent.off','button','elif','else','endif',
'if','in','newline','sif','textline','textfld','x',
'entry','read','wait','saveindex','saveout','set','getx','register',
'include','endian.be','endian.le'
}
PER_TOKEN_HEADER = [PerTokenName.__members__[x] for x in (
'Title','Props','Author','Changelog','Manufacturer','Doc','Core','Chiplist',
'Copyright','Id','Chip','Description','Keywords','Date',#'HeaderSep',
)]
PER_TOKEN_BODY = [v for k, v in PerTokenName.__members__.items() if v not in PER_TOKEN_HEADER]
class PerTokenQual(Enum):
# also .<hex>?
BYTE = "byte"
WORD = "word"
LONG = "long"
QUAD = "quad"
SHORT = "short"
SBYTE = "sbyte"
TBYTE = "tbyte"
# used for mask stuff
"""
LONG_TBYTE = "long.tbyte"
LONG_BYTE = "long.byte"
LONG_WORD = "long.word"
LONG_LONG = "long.long"
WORD_BYTE = "word.byte"
WORD_WORD = "word.word"
BYTE_BYTE = "byte.byte"
QUAD_BYTE = "quad.byte"
QUAD_SBYTE = "quad.sbyte"
QUAD_TBYTE = "quad.tbyte"
QUAD_WORD = "quad.word"
QUAD_SHORT = "quad.short"
QUAD_LONG = "quad.long"
QUAD_QUAD = "quad.quad"
TBYTE_BYTE = "tbyte.byte"
TBYTE_WORD = "tbyte.word"
"""
class PerToken(NamedTuple):
name: PerTokenName
qual: Union[PerTokenQual, Tuple[PerTokenQual, PerTokenQual]] = None
args: List[str] = []
# TODO: data stream -> tree structure
def find_tok(l: str, hdr: bool) -> Union[PerTokenName, Sequence[PerTokenQual]]:
2022-06-02 01:37:38 +00:00
ll=l.lower() if hdr else l
for h in (PER_TOKEN_HEADER if hdr else PER_TOKEN_BODY):
#print("ll='%s', h='%s'"%(ll,h.value))
if ll.startswith(h.value.lower() if hdr else h.value):
if hdr: return h
rest = ll[len(h.value):]
if len(rest) == 0 or rest[0] == ' ':
return (h, None)
elif rest[0] == '.':
# quals follow
quals = []
cqualstart = 1
for i in range(1, len(rest)):
if rest[i] == ' ':
quals.append(rest[cqualstart:i].upper())
break
elif rest[i] == '.':
if i != cqualstart + 1:
quals.append(rest[cqualstart:i].upper())
cqualstart = i+1
qs = tuple(PerTokenQual._member_map_[k] for k in quals)
return (h, qs[0] if len(qs) == 1 else qs)
else:
continue # not a match
2022-06-02 01:37:38 +00:00
assert not hdr or l.split()[0] in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l
return None
def split_str(s: str) -> List[str]:
r = []
start = 0
instr = False
inparen = 0
for i in range(len(s)):
if s[i] == '"':
if not instr and i > 0 and s[i-1] == '"' and start != i-1:
# sigh...
ss = s[start:i].strip()
if len(ss) > 0: r.append(ss)
start = i
instr = not instr
elif s[i] == '(' and not instr:
if inparen == 0 and start != i-1:
ss = s[start:i].strip()
if len(ss) > 0: r.append(ss)
start = i
inparen += 1
elif s[i] == ')' and not instr:
assert inparen > 0
inparen -= 1
elif s[i] == ' ' and start != i-1 and not instr and inparen == 0:
ss = s[start:i].strip()
if len(ss) > 0: r.append(ss)
start = i
if start < len(s):
ss = s[start:].strip()
if len(ss) > 0: r.append(ss)
return r
2022-06-02 01:37:38 +00:00
def tokenize_body(f, l=None):
prevtell = -1
emptycount = 0
#prevl = None
2022-06-02 01:37:38 +00:00
while True:
if l is None: l = f.readline().strip()
if len(l) == 0:
tell = f.tell()
#print("empty, prevl:",prevl)
if tell == prevtell:
emptycount += 1
if emptycount == 3:
break # EOF
else:
emptycount = 0
2022-06-02 01:37:38 +00:00
prevtell = tell
l = None
2022-06-02 01:37:38 +00:00
continue
#prevl = l
2022-06-02 01:37:38 +00:00
ll = l.lower()
sp = split_str(l)#l.split()
2022-06-02 01:37:38 +00:00
# regular token
t = find_tok(ll, False)
#print("t",t)
if t is not None:
#print("regular token", t)
tt, qq = t
yield PerToken(tt, qual=qq, args=sp[1:])
2022-06-02 01:37:38 +00:00
l = None
continue
#ht = sp[0].lower().split('.')
#assert len(ht) > 0 or sp[0].lower() in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l
#t = find_tok(ht[0], False)
#if t is not None:
# assert len(ht) in {2,3},"bad qual %s in line %s"%(repr(ht),l)
# quals = [PerTokenQual.__members__[k] for k in ht[1:]]
# if len(quals) == 1: quals = quals[0]
# else: quals = tuple(quals)
# yield PerToken(t, qual=quals, args=sp[1:])
# l = None
# continue
2022-06-02 01:37:38 +00:00
l = None
def tokenize(f):
curtok = None
curlines = []
prevtell = -1
while True:
l = f.readline().strip()
if len(l) == 0:
tell = f.tell()
if tell == prevtell: break # EOF
prevtell = tell
continue
if l[0] != ';':
yield PerToken(curtok, args=curlines)
2022-06-02 01:37:38 +00:00
yield from tokenize_body(f,l)
break
if l.startswith('; -------'): continue
if (l.startswith('; @') or l.startswith('; $')) and l[3] != ' ':
2022-06-02 01:37:38 +00:00
# new token!
# flush old one
if curtok is not None:
yield PerToken(curtok, args=curlines)
# start new
curtok = find_tok(l, True)
curlines = [l[len(curtok.value)+1:].strip()]
else:
curlines.append(l[3:].strip())
def repeat_apply(r: T32Repeat, l: List[PerToken]):
for i in range(r.count):
for tok in l:
#print("==>",tok,r)
if tok.args is None or len(tok.args) == 0 or len(r.vars) == 0 or \
all("$" not in a for a in tok.args):
yield tok
else:
yield PerToken(tok.name, tok.qual, [r.eval(i,x) for x in tok.args])
2022-06-02 01:37:38 +00:00
# flatten copy and repeat statements
# actually no let's not, we can turn this into derivedFrom statements, which is probably better
# eh, we can leave it in for easier processing earlier on
def tokenize_flatten_repeat(itor: Iterable[PerToken]):
currep = None
currvar = None
lastrvar = None
for tok in itor:
if tok.name == PerTokenName.REPEAT:
assert currvar is None
currvar = parse_repeat(tok.args)
currep = []
elif tok.name == PerTokenName.REPEAT_END:
yield from repeat_apply(currvar, currep)
lastrvar = currvar
currvar = None
elif tok.name == PerTokenName.REPEAT_REPLAY:
assert lastrvar is not None
yield from repeat_apply(lastrvar, currep)
elif currvar is not None:
currep.append(tok)
else:
yield tok
def tokenize_flatten_copy(itor: Iterable[PerToken]):
curgrp = None
curgcmd = None
lastgrp = None
lastgcmd = None
in_grp = False
for tok in itor:
if tok.name in {PerTokenName.GROUP,PerTokenName.HGROUP,
PerTokenName.RGROUP,PerTokenName.SGROUP,
PerTokenName.WGROUP}:
lastgrp = curgrp
lastgcmd = curgcmd
curgrp = []
curgcmd = tok
in_grp = True
yield tok
elif tok.name == PerTokenName.TREE_END:
# group must end at tree end
in_grp = False
yield tok
elif tok.name == PerTokenName.COPY:
assert lastgrp is not None
yield from lastgrp
# be able to do multiple copies
in_grp = False
curgrp = lastgrp
elif in_grp:
curgrp.append(tok)
else:
yield tok
def tokenize_flatten(itor: Iterable[PerToken]):
yield from tokenize_flatten_repeat(tokenize_flatten_copy(itor))
2022-06-02 01:37:38 +00:00
if __name__ == '__main__':
if False:
import glob
for p in glob.glob('t/*/*.per'):
print(p)
with open(p,'r') as f:
for x in tokenize(f): pass
#with open('t/arm/perfm0p.per','r') as f: # nothing special
# for x in tokenize(f): print(x)
#with open('t/arm/peram65xx.per','r') as f: # general, also has repeat cmd
# for x in tokenize_flatten(tokenize(f)): print(x)
with open('t/arm/perpxa.per','r') as f: # copy cmd
for x in tokenize_flatten(tokenize(f)): print(x)
#with open('t/arm/perpsoc4000.per','r') as f: # weird paren grammar
# for x in tokenize(f): print(x)
# weird string literal grammar:
#with open('t/arm/perrzt1.per','r') as f: # at VLAN_PRIORITY0
# for x in tokenize(f): print(x)
# arm/pertegrax1.per arm/pers32g2.per arm/perkinetisw.per arm/perrzt1.per arm/perstm32f7x.per arm/perxavier.per arm/pertegrax2.per
# counterexample/-test: arm/percortexa73a53.per
#with open('t/arm/percortexa73a53.per','r') as f:
# for x in tokenize(f): print(x)
2022-06-02 01:37:38 +00:00