PER: tokenizer
This commit is contained in:
parent
53d3501604
commit
07ae409fdf
|
@ -0,0 +1,218 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from enum import Enum
|
||||||
|
from typing import *
|
||||||
|
|
||||||
|
class PerTokenName(Enum):
|
||||||
|
Title = "; @Title"
|
||||||
|
Props = "; @Props"
|
||||||
|
Author = "; @Author"
|
||||||
|
Changelog = "; @Changelog"
|
||||||
|
Manufacturer = "; @Manufacturer"
|
||||||
|
Doc = "; @Doc"
|
||||||
|
Core = "; @Core"
|
||||||
|
Chip = "; @Chip"
|
||||||
|
Chiplist = "; @Chiplist"
|
||||||
|
Copyright = "; @Copyright"
|
||||||
|
Description = "; @Description"
|
||||||
|
Keywords = "; @Keywords"
|
||||||
|
Date = "; @Date"
|
||||||
|
#HeaderSep = "; -------"
|
||||||
|
Id = "; @Id"
|
||||||
|
|
||||||
|
CONFIG = "config" # what is this
|
||||||
|
BASE = "base" # base address, base addrspace:expr
|
||||||
|
# expr can be int or (d.l(addrspace:off)) -> ???
|
||||||
|
WIDTH = "width" # what is this
|
||||||
|
SAVEINDEX = "saveindex" # what is this
|
||||||
|
|
||||||
|
TREE_OPEN = "tree.open"
|
||||||
|
TREE_CLOSE = "tree.close" # like tree.open
|
||||||
|
TREE_END = "tree.end"
|
||||||
|
TREE = "tree"
|
||||||
|
|
||||||
|
# group of stuff (sometimes one reg?)
|
||||||
|
# group (addrspace:)start--end
|
||||||
|
# addrspaces: c15 c14 ad(=what?) <none> EE(avr eeprom) D(avr data?) d(msp430) CSR(riscv) NAR(xtensa shit), "e:comp.base('name',-1)"(also xtensa) SPR(xtensa)
|
||||||
|
# also seen: group iospace() (teaklite)
|
||||||
|
GROUP = "group"
|
||||||
|
HGROUP = "hgroup"
|
||||||
|
RGROUP = "rgroup"
|
||||||
|
SGROUP = "sgroup"
|
||||||
|
WGROUP = "wgroup"
|
||||||
|
# a register (sometimes hidden)
|
||||||
|
# line.qual offset "CODENAME,description"
|
||||||
|
HIDE = "hide"
|
||||||
|
LINE = "line"
|
||||||
|
# bitfields and stuff in registers
|
||||||
|
# bitfld.qual IDK start(--end) "CODE,description" ("if 0","if 1",...)
|
||||||
|
BITFLD = "bitfld"
|
||||||
|
ABITFLD = "abitfld"
|
||||||
|
RBITFLD = "rbitfld"
|
||||||
|
HEXFLD = "hexfld"
|
||||||
|
#RHEXFLD? WHEXFLD??
|
||||||
|
EVENTFLD = "eventfld"
|
||||||
|
SETCLRFLD = "setclrfld"
|
||||||
|
# masks in registers?
|
||||||
|
# hexmask.qual IDK start--end MASK "CODE,description"
|
||||||
|
DECMASK = "decmask"
|
||||||
|
#RDECMASK? WDECMASK??
|
||||||
|
HEXMASK = "hexmask"
|
||||||
|
RHEXMASK = "rhexmask"
|
||||||
|
#WHEXMASK??
|
||||||
|
|
||||||
|
COPY = "copy"
|
||||||
|
REPEAT_END = "repeat.end"
|
||||||
|
REPEAT_REPLAY = "repeat.replay"
|
||||||
|
REPEAT = "repeat"
|
||||||
|
|
||||||
|
# to ignore: assert autoindent.{on,off} button elif else endif if in
|
||||||
|
# newline sif textline textfld x
|
||||||
|
# IDK: entry, read, wait, saveindex, saveout, set, getx, register
|
||||||
|
# EH: include, endian.{be,le} (-> conditional endianness)
|
||||||
|
# TODO: copy: copy from previous group? (->derivedFrom whee)
|
||||||
|
# TODO: repeat{,.end}: repeat macro stuff (cf. dimIndexElement)
|
||||||
|
# TODO: repeat.replay: copy+replay
|
||||||
|
|
||||||
|
PER_TOKEN_IGNORE = {
|
||||||
|
'assert','autoindent.on','autoindent.off','button','elif','else','endif',
|
||||||
|
'if','in','newline','sif','textline','textfld','x',
|
||||||
|
'entry','read','wait','saveindex','saveout','set','getx','register',
|
||||||
|
'include','endian.be','endian.le'
|
||||||
|
}
|
||||||
|
PER_TOKEN_HEADER = [PerTokenName.__members__[x] for x in (
|
||||||
|
'Title','Props','Author','Changelog','Manufacturer','Doc','Core','Chiplist',
|
||||||
|
'Copyright','Id','Chip','Description','Keywords','Date',#'HeaderSep',
|
||||||
|
)]
|
||||||
|
PER_TOKEN_BODY = [v for k, v in PerTokenName.__members__.items() if v not in PER_TOKEN_HEADER]
|
||||||
|
|
||||||
|
class PerTokenQual(Enum):
|
||||||
|
# also .<hex>?
|
||||||
|
BYTE = "byte"
|
||||||
|
WORD = "word"
|
||||||
|
LONG = "long"
|
||||||
|
QUAD = "quad"
|
||||||
|
SHORT = "short"
|
||||||
|
SBYTE = "sbyte"
|
||||||
|
TBYTE = "tbyte"
|
||||||
|
# used for mask stuff
|
||||||
|
"""
|
||||||
|
LONG_TBYTE = "long.tbyte"
|
||||||
|
LONG_BYTE = "long.byte"
|
||||||
|
LONG_WORD = "long.word"
|
||||||
|
LONG_LONG = "long.long"
|
||||||
|
WORD_BYTE = "word.byte"
|
||||||
|
WORD_WORD = "word.word"
|
||||||
|
BYTE_BYTE = "byte.byte"
|
||||||
|
QUAD_BYTE = "quad.byte"
|
||||||
|
QUAD_SBYTE = "quad.sbyte"
|
||||||
|
QUAD_TBYTE = "quad.tbyte"
|
||||||
|
QUAD_WORD = "quad.word"
|
||||||
|
QUAD_SHORT = "quad.short"
|
||||||
|
QUAD_LONG = "quad.long"
|
||||||
|
QUAD_QUAD = "quad.quad"
|
||||||
|
TBYTE_BYTE = "tbyte.byte"
|
||||||
|
TBYTE_WORD = "tbyte.word"
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class PerToken(NamedTuple):
|
||||||
|
name: PerTokenName
|
||||||
|
qual: Union[PerTokenQual, Tuple[PerTokenQual, PerTokenQual]] = None
|
||||||
|
args: List[str] = []
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: tokenize into data stream with useful arguments
|
||||||
|
# TODO: data stream -> tree structure
|
||||||
|
|
||||||
|
def find_tok(l: str, hdr: bool) -> PerTokenName:
|
||||||
|
ll=l.lower() if hdr else l
|
||||||
|
for h in (PER_TOKEN_HEADER if hdr else PER_TOKEN_BODY):
|
||||||
|
#print("ll='%s', h='%s'"%(ll,h.value))
|
||||||
|
if ll.startswith(h.value.lower() if hdr else h.value):
|
||||||
|
return h
|
||||||
|
assert not hdr or l.split()[0] in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def tokenize_body(f, l=None):
|
||||||
|
prevtell = -1
|
||||||
|
while True:
|
||||||
|
if l is None: l = f.readline().strip()
|
||||||
|
if len(l) == 0:
|
||||||
|
tell = f.tell()
|
||||||
|
if tell == prevtell: break # EOF
|
||||||
|
prevtell = tell
|
||||||
|
continue
|
||||||
|
ll = l.lower()
|
||||||
|
sp = l.split()
|
||||||
|
# regular token
|
||||||
|
t = find_tok(ll, False)
|
||||||
|
#print("t",t)
|
||||||
|
if t is not None:
|
||||||
|
yield PerToken(t, args=sp[1:])
|
||||||
|
l = None
|
||||||
|
continue
|
||||||
|
|
||||||
|
ht = sp[0].lower().split('.')
|
||||||
|
assert len(ht) > 0 or sp[0].lower() in PER_TOKEN_IGNORE, "Unknown token on line: %s"%l
|
||||||
|
t = find_tok(ht[0], False)
|
||||||
|
if t is not None:
|
||||||
|
assert len(ht) in {2,3},"bad qual %s in line %s"%(repr(ht),l)
|
||||||
|
quals = [PerTokenQual.__members__[k] for k in ht[1:]]
|
||||||
|
if len(quals) == 1: quals = quals[0]
|
||||||
|
else: quals = tuple(quals)
|
||||||
|
yield PerToken(t, qual=quals, args=sp[1:])
|
||||||
|
l = None
|
||||||
|
continue
|
||||||
|
|
||||||
|
l = None
|
||||||
|
|
||||||
|
|
||||||
|
def tokenize(f):
|
||||||
|
curtok = None
|
||||||
|
curlines = []
|
||||||
|
|
||||||
|
prevtell = -1
|
||||||
|
while True:
|
||||||
|
l = f.readline().strip()
|
||||||
|
if len(l) == 0:
|
||||||
|
tell = f.tell()
|
||||||
|
if tell == prevtell: break # EOF
|
||||||
|
prevtell = tell
|
||||||
|
continue
|
||||||
|
if l[0] != ';':
|
||||||
|
yield from tokenize_body(f,l)
|
||||||
|
break
|
||||||
|
if l.startswith('; -------'): continue
|
||||||
|
|
||||||
|
if l.startswith('; @') and l[3] != ' ':
|
||||||
|
# new token!
|
||||||
|
# flush old one
|
||||||
|
if curtok is not None:
|
||||||
|
yield PerToken(curtok, args=curlines)
|
||||||
|
# start new
|
||||||
|
curtok = find_tok(l, True)
|
||||||
|
curlines = [l[len(curtok.value)+1:].strip()]
|
||||||
|
else:
|
||||||
|
curlines.append(l[3:].strip())
|
||||||
|
|
||||||
|
|
||||||
|
# flatten copy and repeat statements
|
||||||
|
def tokenize_flatten(itor):
|
||||||
|
yield from itor # TODO
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import glob
|
||||||
|
for p in glob.glob('t/*/*.per'):
|
||||||
|
print(p)
|
||||||
|
with open(p,'r') as f:
|
||||||
|
for x in tokenize(f): pass
|
||||||
|
|
||||||
|
with open('t/arm/peram65xx.per','r') as f:
|
||||||
|
for x in tokenize(f): print(x)
|
||||||
|
with open('t/arm/perfm0p.per','r') as f:
|
||||||
|
for x in tokenize(f): print(x)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue