#lang racket/base (require racket/function racket/list racket/match racket/set parser-tools/lex (prefix-in : parser-tools/lex-sre) parser-tools/yacc) (define-tokens kaitai-expr [boolean number string identifier]) (define-empty-tokens kaitai-sym [eof + - * / % < <= > >= == != << >> & pipe ^ not and or ? : ~ lparen rparen lbracket comma dot rbracket]) (define (kaitai-numstr->number str) (match (regexp-replace* #px"_" str "") [(pregexp #px"^0x(.*?)$" (list _ num)) (string->number num 16)] [(pregexp #px"^0b(.*?)$" (list _ num)) (string->number num 2)] [(pregexp #px"^0o(.*?)$" (list _ num)) (string->number num 8)] [num (string->number num)])) (define (kaitai-subst-string str) ;; racket's built in string translation works fine (read (open-input-string (string-append "\"" (substring str 1 (sub1 (string-length str))) "\"")))) (define kaitai-lexer (lexer [(:or "true" "false") (token-boolean (string=? "true" lexeme))] [(:or (:: "0x" (:+ (:or numeric (char-set "abcdefABCDEF_")))) (:: "0b" (:+ (char-set "01_"))) (:: "0o" (:+ (char-set "01234567_"))) (:: (:? (char-set "+-")) (:+ (:or numeric "_")) (:? (:: "." (:* (:or numeric "_")))) (:? (:: (char-set "eE") (:? (char-set "+-")) (:+ (:or numeric "_")))))) (token-number (kaitai-numstr->number lexeme))] [(:or (:: "'" (:* (:or (:- any-char "'") "\\'")) "'") (:: "\"" (:* (:or (:- any-char "\"") "\\\"")) "\"")) (token-string (kaitai-subst-string lexeme))] [whitespace (kaitai-lexer input-port)] ["+" (token-+)] ["-" (token--)] ["*" (token-*)] ["/" (token-/)] ["%" (token-%)] ["<" (token-<)] ["<=" (token-<=)] [">" (token->)] [">=" (token->=)] ["==" (token-==)] ["!=" (token-!=)] [">>" (token->>)] ["<<" (token-<<)] ["&" (token-&)] ["|" (token-pipe)] ["^" (token-^)] ["~" (token-~)] ["not" (token-not)] ["and" (token-and)] ["or" (token-or)] ["?" (token-?)] [":" (token-:)] ["(" (token-lparen)] [")" (token-rparen)] ["[" (token-lbracket)] ["," (token-comma)] ["." (token-dot)] ["]" (token-rbracket)] [(:: (:+ (:or alphabetic "_")) (:* (:or alphabetic numeric "_" "::"))) (token-identifier (string->symbol lexeme))] [(eof) (token-eof)])) (define kaitai-parser (parser [start exp] [end eof] [error (lambda (tok-ok? tok-name tok-value) (error "parser error" tok-ok? tok-name tok-value))] [tokens kaitai-expr kaitai-sym] [precs (left comma) (right ? :) (left and or) (left & pipe ^) (left < <= > >= != ==) (left << >>) (left + - ~) (left * / %) (right not) (left dot lparen rparen lbracket rbracket)] [grammar (exp [(number) $1] [(string) $1] [(boolean) $1] [(identifier) $1] [(lbracket apply-args rbracket) (cons 'array $2)] [(exp dot identifier) (list 'get $1 $3)] [(exp lparen apply-args rparen) `(apply ,$1 ,@$3)] [(exp * exp) (list '* $1 $3)] [(exp / exp) (list '/ $1 $3)] [(exp % exp) (list '% $1 $3)] [(exp + exp) (list '+ $1 $3)] [(exp - exp) (list '- $1 $3)] [(exp < exp) (list '< $1 $3)] [(exp <= exp) (list '<= $1 $3)] [(exp > exp) (list '> $1 $3)] [(exp >= exp) (list '>= $1 $3)] [(exp != exp) (list '!= $1 $3)] [(exp == exp) (list '== $1 $3)] [(exp >> exp) (list '>> $1 $3)] [(exp << exp) (list '<< $1 $3)] [(exp & exp) (list '& $1 $3)] [(exp pipe exp) (list 'pipe $1 $3)] [(exp ^ exp) (list '^ $1 $3)] [(exp and exp) (list 'and $1 $3)] [(exp or exp) (list 'or $1 $3)] [(not exp) (list 'not $2)] [(+ exp) (list '+ $2)] [(- exp) (list '- $2)] [(~ exp) (list '~ $2)] [(exp ? exp : exp) (list 'if $1 $3 $5)] [(lparen exp rparen) $2]) (apply-args [(exp) (list $1)] [(exp comma apply-args) (cons $1 $3)])])) ;; kaitai expr AST ;; it's like racket mostly. builtins are ;; - all the builtin operators ;; - (apply func args ...): applies a function ;; - (array args ...): constructs array ;; - (get obj attr): references an attribute on an object ;; primitive types: boolean, number, string, symbol (identifier) (define test2 "true and 'a' != 'b' ? -1 + ~bits : ('hello' + 'world').substring(2, 3)") ;; => '(if (and #t (!= "a" "b")) ;; (+ -1 (~ bits)) ;; (apply (get (+ "hello" "world") substring) 2 3)) (let ([input (open-input-string test2)]) (kaitai-parser (lambda () (kaitai-lexer input))))