PER: tokenizer
This commit is contained in:
parent
53d3501604
commit
07ae409fdf
|
@ -0,0 +1,218 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from enum import Enum
|
||||
from typing import *
|
||||
|
||||
class PerTokenName(Enum):
|
||||
Title = "; @Title"
|
||||
Props = "; @Props"
|
||||
Author = "; @Author"
|
||||
Changelog = "; @Changelog"
|
||||
Manufacturer = "; @Manufacturer"
|
||||
Doc = "; @Doc"
|
||||
Core = "; @Core"
|
||||
Chip = "; @Chip"
|
||||
Chiplist = "; @Chiplist"
|
||||
Copyright = "; @Copyright"
|
||||
Description = "; @Description"
|
||||
Keywords = "; @Keywords"
|
||||
Date = "; @Date"
|
||||
#HeaderSep = "; -------"
|
||||
Id = "; @Id"
|
||||
|
||||
CONFIG = "config" # what is this
|
||||
BASE = "base" # base address, base addrspace:expr
|
||||
# expr can be int or (d.l(addrspace:off)) -> ???
|
||||
WIDTH = "width" # what is this
|
||||
SAVEINDEX = "saveindex" # what is this
|
||||
|
||||
TREE_OPEN = "tree.open"
|
||||
TREE_CLOSE = "tree.close" # like tree.open
|
||||
TREE_END = "tree.end"
|
||||
TREE = "tree"
|
||||
|
||||
# group of stuff (sometimes one reg?)
|
||||
# group (addrspace:)start--end
|
||||
# addrspaces: c15 c14 ad(=what?) <none> EE(avr eeprom) D(avr data?) d(msp430) CSR(riscv) NAR(xtensa shit), "e:comp.base('name',-1)"(also xtensa) SPR(xtensa)
|
||||
# also seen: group iospace() (teaklite)
|
||||
GROUP = "group"
|
||||
HGROUP = "hgroup"
|
||||
RGROUP = "rgroup"
|
||||
SGROUP = "sgroup"
|
||||
WGROUP = "wgroup"
|
||||
# a register (sometimes hidden)
|
||||
# line.qual offset "CODENAME,description"
|
||||
HIDE = "hide"
|
||||
LINE = "line"
|
||||
# bitfields and stuff in registers
|
||||
# bitfld.qual IDK start(--end) "CODE,description" ("if 0","if 1",...)
|
||||
BITFLD = "bitfld"
|
||||
ABITFLD = "abitfld"
|
||||
RBITFLD = "rbitfld"
|
||||
HEXFLD = "hexfld"
|
||||
#RHEXFLD? WHEXFLD??
|
||||
EVENTFLD = "eventfld"
|
||||
SETCLRFLD = "setclrfld"
|
||||
# masks in registers?
|
||||
# hexmask.qual IDK start--end MASK "CODE,description"
|
||||
DECMASK = "decmask"
|
||||
#RDECMASK? WDECMASK??
|
||||
HEXMASK = "hexmask"
|
||||
RHEXMASK = "rhexmask"
|
||||
#WHEXMASK??
|
||||
|
||||
COPY = "copy"
|
||||
REPEAT_END = "repeat.end"
|
||||
REPEAT_REPLAY = "repeat.replay"
|
||||
REPEAT = "repeat"
|
||||
|
||||
# to ignore: assert autoindent.{on,off} button elif else endif if in
|
||||
# newline sif textline textfld x
|
||||
# IDK: entry, read, wait, saveindex, saveout, set, getx, register
|
||||
# EH: include, endian.{be,le} (-> conditional endianness)
|
||||
# TODO: copy: copy from previous group? (->derivedFrom whee)
|
||||
# TODO: repeat{,.end}: repeat macro stuff (cf. dimIndexElement)
|
||||
# TODO: repeat.replay: copy+replay
|
||||
|
||||
PER_TOKEN_IGNORE = {
|
||||
'assert','autoindent.on','autoindent.off','button','elif','else','endif',
|
||||
'if','in','newline','sif','textline','textfld','x',
|
||||
'entry','read','wait','saveindex','saveout','set','getx','register',
|
||||
'include','endian.be','endian.le'
|
||||
}
|
||||
PER_TOKEN_HEADER = [PerTokenName.__members__[x] for x in (
|
||||
'Title','Props','Author','Changelog','Manufacturer','Doc','Core','Chiplist',
|
||||
'Copyright','Id','Chip','Description','Keywords','Date',#'HeaderSep',
|
||||
)]
|
||||
PER_TOKEN_BODY = [v for k, v in PerTokenName.__members__.items() if v not in PER_TOKEN_HEADER]
|
||||
|
||||
class PerTokenQual(Enum):
|
||||
# also .<hex>?
|
||||
BYTE = "byte"
|
||||
WORD = "word"
|
||||
LONG = "long"
|
||||
QUAD = "quad"
|
||||
SHORT = "short"
|
||||
SBYTE = "sbyte"
|
||||
TBYTE = "tbyte"
|
||||
# used for mask stuff
|
||||
"""
|
||||
LONG_TBYTE = "long.tbyte"
|
||||
LONG_BYTE = "long.byte"
|
||||
LONG_WORD = "long.word"
|
||||
LONG_LONG = "long.long"
|
||||
WORD_BYTE = "word.byte"
|
||||
WORD_WORD = "word.word"
|
||||
BYTE_BYTE = "byte.byte"
|
||||
QUAD_BYTE = "quad.byte"
|
||||
QUAD_SBYTE = "quad.sbyte"
|
||||
QUAD_TBYTE = "quad.tbyte"
|
||||
QUAD_WORD = "quad.word"
|
||||
QUAD_SHORT = "quad.short"
|
||||
QUAD_LONG = "quad.long"
|
||||
QUAD_QUAD = "quad.quad"
|
||||
TBYTE_BYTE = "tbyte.byte"
|
||||
TBYTE_WORD = "tbyte.word"
|
||||
"""
|
||||
|
||||
|
||||
class PerToken(NamedTuple):
|
||||
name: PerTokenName
|
||||
qual: Union[PerTokenQual, Tuple[PerTokenQual, PerTokenQual]] = None
|
||||
args: List[str] = []
|
||||
|
||||
|
||||
# TODO: tokenize into data stream with useful arguments
|
||||
# TODO: data stream -> tree structure
|
||||
|
||||
def find_tok(l: str, hdr: bool) -> PerTokenName:
|
||||
ll=l.lower() if hdr else l
|
||||
for h in (PER_TOKEN_HEADER if hdr else PER_TOKEN_BODY):
|
||||
#print("ll='%s', h='%s'"%(ll,h.value))
|
||||
if ll.startswith(h.value.lower() if hdr else h.value):
|
||||
return h
|
||||
assert not hdr or l.split()[0] in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l
|
||||
return None
|
||||
|
||||
|
||||
def tokenize_body(f, l=None):
|
||||
prevtell = -1
|
||||
while True:
|
||||
if l is None: l = f.readline().strip()
|
||||
if len(l) == 0:
|
||||
tell = f.tell()
|
||||
if tell == prevtell: break # EOF
|
||||
prevtell = tell
|
||||
continue
|
||||
ll = l.lower()
|
||||
sp = l.split()
|
||||
# regular token
|
||||
t = find_tok(ll, False)
|
||||
#print("t",t)
|
||||
if t is not None:
|
||||
yield PerToken(t, args=sp[1:])
|
||||
l = None
|
||||
continue
|
||||
|
||||
ht = sp[0].lower().split('.')
|
||||
assert len(ht) > 0 or sp[0].lower() in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l
|
||||
t = find_tok(ht[0], False)
|
||||
if t is not None:
|
||||
assert len(ht) in {2,3},"bad qual %s in line %s"%(repr(ht),l)
|
||||
quals = [PerTokenQual.__members__[k] for k in ht[1:]]
|
||||
if len(quals) == 1: quals = quals[0]
|
||||
else: quals = tuple(quals)
|
||||
yield PerToken(t, qual=quals, args=sp[1:])
|
||||
l = None
|
||||
continue
|
||||
|
||||
l = None
|
||||
|
||||
|
||||
def tokenize(f):
|
||||
curtok = None
|
||||
curlines = []
|
||||
|
||||
prevtell = -1
|
||||
while True:
|
||||
l = f.readline().strip()
|
||||
if len(l) == 0:
|
||||
tell = f.tell()
|
||||
if tell == prevtell: break # EOF
|
||||
prevtell = tell
|
||||
continue
|
||||
if l[0] != ';':
|
||||
yield from tokenize_body(f,l)
|
||||
break
|
||||
if l.startswith('; -------'): continue
|
||||
|
||||
if l.startswith('; @') and l[3] != ' ':
|
||||
# new token!
|
||||
# flush old one
|
||||
if curtok is not None:
|
||||
yield PerToken(curtok, args=curlines)
|
||||
# start new
|
||||
curtok = find_tok(l, True)
|
||||
curlines = [l[len(curtok.value)+1:].strip()]
|
||||
else:
|
||||
curlines.append(l[3:].strip())
|
||||
|
||||
|
||||
# flatten copy and repeat statements
|
||||
def tokenize_flatten(itor):
|
||||
yield from itor # TODO
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import glob
|
||||
for p in glob.glob('t/*/*.per'):
|
||||
print(p)
|
||||
with open(p,'r') as f:
|
||||
for x in tokenize(f): pass
|
||||
|
||||
with open('t/arm/peram65xx.per','r') as f:
|
||||
for x in tokenize(f): print(x)
|
||||
with open('t/arm/perfm0p.per','r') as f:
|
||||
for x in tokenize(f): print(x)
|
||||
|
||||
|
Loading…
Reference in New Issue