#lang racket/base (require racket/function racket/match racket/set parser-tools/lex (prefix-in : parser-tools/lex-sre) parser-tools/yacc) (define-tokens kaitai-expr [boolean number string identifier]) (define-empty-tokens kaitai-empty [eof + - * / % < <= > >= == != << >> & pipe ^ not and or ? : lparen rparen lbracket comma dot rbracket]) (define (kaitai-numstr->number str) (match (regexp-replace* #px"_" str "") [(pregexp #px"^0x(.*?)$" (list _ num)) (string->number num 16)] [(pregexp #px"^0b(.*?)$" (list _ num)) (string->number num 2)] [(pregexp #px"^0o(.*?)$" (list _ num)) (string->number num 8)] [num (string->number num)])) (define (kaitai-subst-string str) ;; racket's built in string translation works fine (printf "uwu ~s\n" str) (read (open-input-string (string-append "\"" (substring str 1 (sub1 (string-length str))) "\"")))) (define kaitai-lexer (lexer [(:or "true" "false") (token-boolean (string=? "true" lexeme))] [(:or (:: "0x" (:+ (:or numeric (char-set "abcdefABCDEF_")))) (:: "0b" (:+ (char-set "01_"))) (:: "0o" (:+ (char-set "01234567_"))) (:: (:? (char-set "+-")) (:+ (:or numeric "_")) (:? (:: "." (:* (:or numeric "_")))) (:? (:: (char-set "eE") (:? (char-set "+-")) (:+ (:or numeric "_")))))) (token-number (kaitai-numstr->number lexeme))] [(:or (:: "'" (:* (:- any-char "'") "\\'") "'") (:: "\"" (:* (:- any-char "\"") "\\\"") "\"")) (token-string (kaitai-subst-string lexeme))] [whitespace (kaitai-lexer input-port)] ["+" (token-+)] ["-" (token--)] ["*" (token-*)] ["/" (token-/)] ["%" (token-%)] ["<" (token-<)] ["<=" (token-<=)] [">" (token->)] [">=" (token->=)] ["==" (token-==)] ["!=" (token-!=)] [">>" (token->>)] ["<<" (token-<<)] ["&" (token-&)] ["|" (token-pipe)] ["^" (token-^)] ["not" (token-not)] ["and" (token-and)] ["or" (token-or)] ["?" (token-?)] [":" (token-:)] ["(" (token-lparen)] [")" (token-rparen)] ["[" (token-lbracket)] ["," (token-comma)] ["." (token-dot)] ["]" (token-rbracket)] [(:: (:+ (:or alphabetic "_")) (:* (:or alphabetic numeric "_" "::"))) (token-identifier lexeme)] [(eof) (token-eof)])) (let ([input (open-input-string "_root._io.size.to_i & 0x08000000 != 0 + code == block_type::int32 ? 4 : 8")]) (let loop () (match (kaitai-lexer input) [(? (curry equal? (token-eof))) (void)] [x (printf "~v\n" x) (loop)])))