140 lines
4.8 KiB
Racket
140 lines
4.8 KiB
Racket
#lang racket/base
|
|
|
|
(require racket/function racket/list racket/match racket/set
|
|
parser-tools/lex
|
|
(prefix-in : parser-tools/lex-sre)
|
|
parser-tools/yacc)
|
|
|
|
(define-tokens kaitai-expr [boolean number string identifier])
|
|
(define-empty-tokens kaitai-sym [eof + - * / % < <= > >= == != << >> & pipe ^ not and or ? : ~
|
|
lparen rparen lbracket comma dot rbracket])
|
|
|
|
(define (kaitai-numstr->number str)
|
|
(match (regexp-replace* #px"_" str "")
|
|
[(pregexp #px"^0x(.*?)$" (list _ num)) (string->number num 16)]
|
|
[(pregexp #px"^0b(.*?)$" (list _ num)) (string->number num 2)]
|
|
[(pregexp #px"^0o(.*?)$" (list _ num)) (string->number num 8)]
|
|
[num (string->number num)]))
|
|
|
|
(define (kaitai-subst-string str)
|
|
;; racket's built in string translation works fine
|
|
(read (open-input-string
|
|
(string-append "\"" (substring str 1 (sub1 (string-length str))) "\""))))
|
|
|
|
(define kaitai-lexer
|
|
(lexer
|
|
[(:or "true" "false") (token-boolean (string=? "true" lexeme))]
|
|
[(:or (:: "0x" (:+ (:or numeric (char-set "abcdefABCDEF_"))))
|
|
(:: "0b" (:+ (char-set "01_")))
|
|
(:: "0o" (:+ (char-set "01234567_")))
|
|
(:: (:? (char-set "+-"))
|
|
(:+ (:or numeric "_"))
|
|
(:? (:: "." (:* (:or numeric "_"))))
|
|
(:? (:: (char-set "eE") (:? (char-set "+-")) (:+ (:or numeric "_"))))))
|
|
(token-number (kaitai-numstr->number lexeme))]
|
|
[(:or (:: "'" (:* (:or (:- any-char "'") "\\'")) "'")
|
|
(:: "\"" (:* (:or (:- any-char "\"") "\\\"")) "\""))
|
|
(token-string (kaitai-subst-string lexeme))]
|
|
[whitespace (kaitai-lexer input-port)]
|
|
["+" (token-+)]
|
|
["-" (token--)]
|
|
["*" (token-*)]
|
|
["/" (token-/)]
|
|
["%" (token-%)]
|
|
["<" (token-<)]
|
|
["<=" (token-<=)]
|
|
[">" (token->)]
|
|
[">=" (token->=)]
|
|
["==" (token-==)]
|
|
["!=" (token-!=)]
|
|
[">>" (token->>)]
|
|
["<<" (token-<<)]
|
|
["&" (token-&)]
|
|
["|" (token-pipe)]
|
|
["^" (token-^)]
|
|
["~" (token-~)]
|
|
["not" (token-not)]
|
|
["and" (token-and)]
|
|
["or" (token-or)]
|
|
["?" (token-?)]
|
|
[":" (token-:)]
|
|
["(" (token-lparen)]
|
|
[")" (token-rparen)]
|
|
["[" (token-lbracket)]
|
|
["," (token-comma)]
|
|
["." (token-dot)]
|
|
["]" (token-rbracket)]
|
|
[(:: (:+ (:or alphabetic "_"))
|
|
(:* (:or alphabetic numeric "_" "::")))
|
|
(token-identifier (string->symbol lexeme))]
|
|
[(eof) (token-eof)]))
|
|
|
|
(define kaitai-parser
|
|
(parser
|
|
[start exp]
|
|
[end eof]
|
|
[error (lambda (tok-ok? tok-name tok-value)
|
|
(error "parser error" tok-ok? tok-name tok-value))]
|
|
[tokens kaitai-expr kaitai-sym]
|
|
[precs (left comma)
|
|
(right ? :)
|
|
(left and or)
|
|
(left & pipe ^)
|
|
(left < <= > >= != ==)
|
|
(left << >>)
|
|
(left + - ~)
|
|
(left * / %)
|
|
(right not)
|
|
(left dot lparen rparen lbracket rbracket)]
|
|
[grammar
|
|
(exp [(number) $1]
|
|
[(string) $1]
|
|
[(boolean) $1]
|
|
[(identifier) $1]
|
|
[(lbracket apply-args rbracket) (cons 'array $2)]
|
|
[(exp dot identifier) (list 'get $1 $3)]
|
|
[(exp lparen apply-args rparen) `(apply ,$1 ,@$3)]
|
|
[(exp * exp) (list '* $1 $3)]
|
|
[(exp / exp) (list '/ $1 $3)]
|
|
[(exp % exp) (list '% $1 $3)]
|
|
[(exp + exp) (list '+ $1 $3)]
|
|
[(exp - exp) (list '- $1 $3)]
|
|
[(exp < exp) (list '< $1 $3)]
|
|
[(exp <= exp) (list '<= $1 $3)]
|
|
[(exp > exp) (list '> $1 $3)]
|
|
[(exp >= exp) (list '>= $1 $3)]
|
|
[(exp != exp) (list '!= $1 $3)]
|
|
[(exp == exp) (list '== $1 $3)]
|
|
[(exp >> exp) (list '>> $1 $3)]
|
|
[(exp << exp) (list '<< $1 $3)]
|
|
[(exp & exp) (list '& $1 $3)]
|
|
[(exp pipe exp) (list 'pipe $1 $3)]
|
|
[(exp ^ exp) (list '^ $1 $3)]
|
|
[(exp and exp) (list 'and $1 $3)]
|
|
[(exp or exp) (list 'or $1 $3)]
|
|
[(not exp) (list 'not $2)]
|
|
[(+ exp) (list '+ $2)]
|
|
[(- exp) (list '- $2)]
|
|
[(~ exp) (list '~ $2)]
|
|
[(exp ? exp : exp) (list 'if $1 $3 $5)]
|
|
[(lparen exp rparen) $2])
|
|
(apply-args
|
|
[(exp) (list $1)]
|
|
[(exp comma apply-args) (cons $1 $3)])]))
|
|
|
|
;; kaitai expr AST
|
|
;; it's like racket mostly. builtins are
|
|
;; - all the builtin operators
|
|
;; - (apply func args ...): applies a function
|
|
;; - (array args ...): constructs array
|
|
;; - (get obj attr): references an attribute on an object
|
|
;; primitive types: boolean, number, string, symbol (identifier)
|
|
|
|
(define test2 "true and 'a' != 'b' ? -1 + ~bits : ('hello' + 'world').substring(2, 3)")
|
|
;; => '(if (and #t (!= "a" "b"))
|
|
;; (+ -1 (~ bits))
|
|
;; (apply (get (+ "hello" "world") substring) 2 3))
|
|
|
|
(let ([input (open-input-string test2)])
|
|
(kaitai-parser (lambda () (kaitai-lexer input))))
|