implement expr lexer

This commit is contained in:
xenia 2020-09-09 03:35:17 -04:00
parent ac1ea37c54
commit 347c22d8b2
3 changed files with 81 additions and 12 deletions

76
kaitai/expr.rkt Normal file
View File

@ -0,0 +1,76 @@
#lang racket/base
(require racket/function racket/match racket/set
parser-tools/lex
(prefix-in : parser-tools/lex-sre)
parser-tools/yacc)
(define-tokens kaitai-expr [boolean number string identifier])
(define-empty-tokens kaitai-empty [eof + - * / % < <= > >= == != << >> & pipe ^ not and or ? :
lparen rparen lbracket comma dot rbracket])
(define (kaitai-numstr->number str)
(match (regexp-replace* #px"_" str "")
[(pregexp #px"^0x(.*?)$" (list _ num)) (string->number num 16)]
[(pregexp #px"^0b(.*?)$" (list _ num)) (string->number num 2)]
[(pregexp #px"^0o(.*?)$" (list _ num)) (string->number num 8)]
[num (string->number num)]))
(define (kaitai-subst-string str)
;; racket's built in string translation works fine
(printf "uwu ~s\n" str)
(read (open-input-string
(string-append "\"" (substring str 1 (sub1 (string-length str))) "\""))))
(define kaitai-lexer
(lexer
[(:or "true" "false") (token-boolean (string=? "true" lexeme))]
[(:or (:: "0x" (:+ (:or numeric (char-set "abcdefABCDEF_"))))
(:: "0b" (:+ (char-set "01_")))
(:: "0o" (:+ (char-set "01234567_")))
(:: (:? (char-set "+-"))
(:+ (:or numeric "_"))
(:? (:: "." (:* (:or numeric "_"))))
(:? (:: (char-set "eE") (:? (char-set "+-")) (:+ (:or numeric "_"))))))
(token-number (kaitai-numstr->number lexeme))]
[(:or (:: "'" (:* (:- any-char "'") "\\'") "'")
(:: "\"" (:* (:- any-char "\"") "\\\"") "\""))
(token-string (kaitai-subst-string lexeme))]
[whitespace (kaitai-lexer input-port)]
["+" (token-+)]
["-" (token--)]
["*" (token-*)]
["/" (token-/)]
["%" (token-%)]
["<" (token-<)]
["<=" (token-<=)]
[">" (token->)]
[">=" (token->=)]
["==" (token-==)]
["!=" (token-!=)]
[">>" (token->>)]
["<<" (token-<<)]
["&" (token-&)]
["|" (token-pipe)]
["^" (token-^)]
["not" (token-not)]
["and" (token-and)]
["or" (token-or)]
["?" (token-?)]
[":" (token-:)]
["(" (token-lparen)]
[")" (token-rparen)]
["[" (token-lbracket)]
["," (token-comma)]
["." (token-dot)]
["]" (token-rbracket)]
[(:: (:+ (:or alphabetic "_"))
(:* (:or alphabetic numeric "_" "::")))
(token-identifier lexeme)]
[(eof) (token-eof)]))
(let ([input (open-input-string "_root._io.size.to_i & 0x08000000 != 0 + code == block_type::int32 ? 4 : 8")])
(let loop ()
(match (kaitai-lexer input)
[(? (curry equal? (token-eof))) (void)]
[x (printf "~v\n" x) (loop)])))

View File

@ -2,6 +2,8 @@
(define version "0.0.1")
(define collection "kaitai")
(define deps '("base" "yaml" "binaryio-lib"))
(define deps '("base"
"binaryio-lib"
"yaml"))
(define test-omit-paths '("info.rkt"))
; (define scribblings '(("kaitai.scrbl"))')

View File

@ -1,6 +1,6 @@
#lang racket/base
(require (for-syntax racket/base racket/list))
(require (for-syntax racket/base racket/list racket/match))
(provide (rename-out [kaitai:module-begin #%module-begin])
(except-out (all-from-out racket/base) #%module-begin))
@ -42,13 +42,6 @@
[#,@(map (lambda (a) (kaitai-str->sym (hash-ref a "id" (make-rse "seq element has no id"))))
seq)] #:transparent)))
;; generates parser function
(define-for-syntax (gen-parser top [given-id #f])
(define-values [id-sym seq] (get-id-seq top given-id))
#`((define (,(sym+ "parse-" id-sym) input)
(void))
(provide ,(sym+ "parse-" id-sym))))
;; runtime lib
(module runtime racket/base
@ -66,10 +59,8 @@
(define-syntax (kaitai:module-begin stx)
(define body (second (syntax-e stx)))
(define top-struct (gen-struct (syntax->datum body)))
; (displayln top-struct)
(displayln (syntax->datum body))
#`(#%module-begin
#,@top-struct
(module+ main
(kaitai:uwu))))