forked from haskal/shark-compiler
🦈 initial commit
This commit is contained in:
commit
02e439fd57
|
@ -0,0 +1,3 @@
|
||||||
|
*.zo
|
||||||
|
*.rktd
|
||||||
|
*.rkt~
|
|
@ -0,0 +1,7 @@
|
||||||
|
# shark compiler
|
||||||
|
|
||||||
|
i wanted to make a framework for extremely retargetable esolang compilation so i made one
|
||||||
|
|
||||||
|
currently this defines a low-level IR and compilation framework for targeting. in the future, there
|
||||||
|
will be a more usable language that compiles to the LLIR. and polyfills in case targets don't
|
||||||
|
implement operations, etc
|
|
@ -0,0 +1,408 @@
|
||||||
|
#!/usr/bin/env racket
|
||||||
|
#lang racket
|
||||||
|
|
||||||
|
;; LLIR
|
||||||
|
;; defs
|
||||||
|
;; (global x)
|
||||||
|
;; (const x) ;; TODO
|
||||||
|
;; (set! x immediate)
|
||||||
|
;;
|
||||||
|
;; the only thing that accepts immediates is set!
|
||||||
|
;; all other ops need globals or consts
|
||||||
|
;; all globals, constants, and labels share a single namespace.
|
||||||
|
;; for example, there cannot be a global named helloworld and a label also named helloworld
|
||||||
|
;;
|
||||||
|
;; branching
|
||||||
|
;; (label x)
|
||||||
|
;; (goto x)
|
||||||
|
;; (ifeq a b x y)
|
||||||
|
;; note: implement ifne by swapping x,y
|
||||||
|
;; (iflt a b x y)
|
||||||
|
;; note: implement ifgt by swapping a,b
|
||||||
|
;; ifle by swapping a,b and x,y
|
||||||
|
;; ifge by swapping x,y
|
||||||
|
;; (terminate)
|
||||||
|
;;
|
||||||
|
;; arithmetic
|
||||||
|
;; (add! a b) a += b
|
||||||
|
;; (sub! a b) a -= b
|
||||||
|
;; (mul! a b) etc
|
||||||
|
;; (div! a b)
|
||||||
|
;; (mod! a b)
|
||||||
|
;; bitwise
|
||||||
|
;; (and! a b)
|
||||||
|
;; (or! a b)
|
||||||
|
;; (xor! a b)
|
||||||
|
;; (invert! a)
|
||||||
|
;;
|
||||||
|
;; target language
|
||||||
|
;; passes all args to the target assembler
|
||||||
|
;; (asm! ...)
|
||||||
|
;; (asmgoto! [x y] ...) ;; impl-defined branching operation
|
||||||
|
;; (prelude) implementation-defined prelude
|
||||||
|
|
||||||
|
;; predicates for opcodes
|
||||||
|
|
||||||
|
(define binary-branch-ops (set 'ifeq 'iflt))
|
||||||
|
(define (binary-branch-op? x)
|
||||||
|
(and (symbol? x) (set-member? binary-branch-ops x)))
|
||||||
|
|
||||||
|
(define binary-arith-ops
|
||||||
|
(set 'add! 'sub! 'mul! 'div! 'mod! 'and! 'or! 'xor!))
|
||||||
|
(define (binary-arith-op? x)
|
||||||
|
(and (symbol? x) (set-member? binary-arith-ops x)))
|
||||||
|
|
||||||
|
(define unary-arith-ops (set 'invert!))
|
||||||
|
(define (unary-arith-op? x)
|
||||||
|
(and (symbol? x) (set-member? unary-arith-ops x)))
|
||||||
|
|
||||||
|
|
||||||
|
;; low-level representation of a memory location
|
||||||
|
(struct mem-loc [value] #:transparent)
|
||||||
|
|
||||||
|
;; basic block node
|
||||||
|
;; contents:
|
||||||
|
;; - LLIR list with LLIR branch instruction
|
||||||
|
;; - target contents with struct branch
|
||||||
|
;; - target contents with list of branch labels
|
||||||
|
;; - target contents with list of branch numbers
|
||||||
|
(struct bbnode [contents branch] #:transparent)
|
||||||
|
|
||||||
|
;; low-level branch representation, with a target node and optional cleanup sequence
|
||||||
|
;; the setup and cleanup will get converted into basic blocks and merged such that each bb ends
|
||||||
|
;; directly with a branch
|
||||||
|
(struct branch [setup targets cleanups] #:transparent)
|
||||||
|
|
||||||
|
;; compiler context
|
||||||
|
(struct cctx [num-globals] #:transparent)
|
||||||
|
(define current-cctx (make-parameter #f))
|
||||||
|
|
||||||
|
|
||||||
|
;; validates LLIR for consistency
|
||||||
|
;; check every used symbol is a defined global or const
|
||||||
|
;; check every branch points to a valid label
|
||||||
|
;; validate args on each op
|
||||||
|
;; check that main label is present, start label is not present
|
||||||
|
(define (validate src)
|
||||||
|
(error "TODO")) ;; lmao
|
||||||
|
|
||||||
|
|
||||||
|
;; LLIR -> LLIR Int
|
||||||
|
;; assigns each global a number and returns total number of globals
|
||||||
|
(define (convert-globals src)
|
||||||
|
;; create src without the global declarations
|
||||||
|
(define new-src (filter (lambda (x) (not (symbol=? (first x) 'global))) src))
|
||||||
|
;; map globals to memory locations
|
||||||
|
(define globals (for/list ([x (in-list src)] #:when (symbol=? (first x) 'global)) (second x)))
|
||||||
|
(define global-map
|
||||||
|
(for/hash ([i (in-naturals)] [glob (in-list globals)])
|
||||||
|
(values glob (mem-loc i))))
|
||||||
|
|
||||||
|
(define (translate x) (hash-ref global-map x))
|
||||||
|
|
||||||
|
(define (update-arg x)
|
||||||
|
(if (hash-has-key? global-map x) (translate x) x))
|
||||||
|
|
||||||
|
(define (update-op op)
|
||||||
|
(match op
|
||||||
|
[(list 'set! target value)
|
||||||
|
(list 'set! (translate target)
|
||||||
|
(if (symbol? value) (translate value) value))]
|
||||||
|
[(list (? binary-branch-op? op) a b x y)
|
||||||
|
(list op (translate a) (translate b) x y)]
|
||||||
|
[(list (? binary-arith-op? op) a b)
|
||||||
|
(list op (translate a) (translate b))]
|
||||||
|
[(list (? unary-arith-op? op) a)
|
||||||
|
(list op (translate a))]
|
||||||
|
[(list 'asm! args ...)
|
||||||
|
(cons 'asm! (map update-arg args))]
|
||||||
|
[(list 'asmgoto! branches args ...)
|
||||||
|
(cons 'asmgoto! (cons branches (map update-arg args)))]
|
||||||
|
[any any]))
|
||||||
|
|
||||||
|
(values (map update-op new-src) (length globals)))
|
||||||
|
|
||||||
|
|
||||||
|
;; builds graph of basic blocks
|
||||||
|
(define (convert-bb src)
|
||||||
|
(define graph (make-hash))
|
||||||
|
|
||||||
|
(define (bb-commit! lab blk branch)
|
||||||
|
(hash-set! graph lab (bbnode blk branch)))
|
||||||
|
|
||||||
|
(define (convert-step this-label this-block src)
|
||||||
|
(match src
|
||||||
|
['() (bb-commit! this-label this-block (list 'terminate))]
|
||||||
|
[(cons (list 'label lab) rst)
|
||||||
|
(bb-commit! this-label this-block (list 'goto lab))
|
||||||
|
(convert-step lab '() rst)]
|
||||||
|
[(cons (list 'goto next) rst)
|
||||||
|
(bb-commit! this-label this-block (list 'goto next))
|
||||||
|
(convert-step (gensym) '() rst)]
|
||||||
|
[(cons (list (? binary-branch-op? op) a b x y) rst)
|
||||||
|
(bb-commit! this-label this-block (list op a b x y))
|
||||||
|
(convert-step (gensym) '() rst)]
|
||||||
|
[(cons (cons 'asmgoto! args) rst)
|
||||||
|
(bb-commit! this-label this-block (cons 'asmgoto! args))
|
||||||
|
(convert-step (gensym) '() rst)]
|
||||||
|
[(cons (list 'terminate) rst)
|
||||||
|
(bb-commit! this-label this-block (list 'terminate))
|
||||||
|
(convert-step (gensym) '() rst)]
|
||||||
|
[(cons fst rst)
|
||||||
|
(convert-step this-label (append this-block (list fst)) rst)]))
|
||||||
|
|
||||||
|
(define (graph-prune g)
|
||||||
|
(define (dfs! g seen label)
|
||||||
|
(unless (set-member? seen label)
|
||||||
|
(set-add! seen label)
|
||||||
|
(match (bbnode-branch (hash-ref g label))
|
||||||
|
[(list 'terminate) (void)]
|
||||||
|
[(list 'goto next) (dfs! g seen next)]
|
||||||
|
[(list (? binary-branch-op? op) _ _ x y)
|
||||||
|
(dfs! g seen x)
|
||||||
|
(dfs! g seen y)]
|
||||||
|
[(list 'asmgoto! branches args)
|
||||||
|
(for ([next (in-list branches)])
|
||||||
|
(dfs! g seen next))])))
|
||||||
|
(define marked (mutable-set))
|
||||||
|
(dfs! g marked 'start)
|
||||||
|
(for/hash ([(k v) (in-hash g)]
|
||||||
|
#:when (set-member? marked k))
|
||||||
|
(values k v)))
|
||||||
|
|
||||||
|
;; generate graph of bb nodes
|
||||||
|
;; insert prelude
|
||||||
|
(convert-step 'start '((prelude)) (cons '(goto main) src))
|
||||||
|
|
||||||
|
;; prune unreachable blocks
|
||||||
|
(graph-prune graph))
|
||||||
|
|
||||||
|
|
||||||
|
;; expands and simplifies target-compiled basic blocks
|
||||||
|
;; numbers each label and returns a vector of numbered blocks
|
||||||
|
(define (convert-flat graph)
|
||||||
|
(define new-graph (make-hash))
|
||||||
|
|
||||||
|
(for ([(k v) (in-hash graph)])
|
||||||
|
(match-define (bbnode contents (branch setup targets cleanups)) v)
|
||||||
|
(define cleanups-labels (map (lambda (_) (gensym)) cleanups))
|
||||||
|
(for ([target (in-list targets)] [cleanup (in-list cleanups)] [label (in-list cleanups-labels)])
|
||||||
|
(hash-set! new-graph label (bbnode cleanup (list target))))
|
||||||
|
(hash-set! new-graph k (merge-bb
|
||||||
|
(bbnode contents '())
|
||||||
|
(bbnode setup cleanups-labels))))
|
||||||
|
|
||||||
|
;; simplifies the bb graph to a smaller number of blocks, if possible
|
||||||
|
(define (simplify-graph!)
|
||||||
|
;; generate reverse edges map
|
||||||
|
(define graph-rev (make-hash))
|
||||||
|
(define (update-rev! label called-by)
|
||||||
|
(hash-update! graph-rev label (lambda (v) (set-add v called-by)) set))
|
||||||
|
|
||||||
|
(for ([(k v) (in-hash new-graph)])
|
||||||
|
(match-define (bbnode _ branches) v)
|
||||||
|
(for ([to (in-list branches)])
|
||||||
|
(update-rev! to k)))
|
||||||
|
|
||||||
|
;; for each edge A->B
|
||||||
|
;; if this is the only edge out of A and the only edge into B, then merge A and B
|
||||||
|
(define keys (hash-keys new-graph))
|
||||||
|
(for ([key (in-list keys)])
|
||||||
|
(define rev (hash-ref graph-rev key set))
|
||||||
|
(when (= 1 (set-count rev))
|
||||||
|
(define from-key (set-first rev))
|
||||||
|
(define from-node (hash-ref new-graph from-key))
|
||||||
|
(when (= 1 (length (bbnode-branch from-node)))
|
||||||
|
;; call target merge function
|
||||||
|
(define new-node (merge-bb from-node (hash-ref new-graph key)))
|
||||||
|
;; update graphs for merge
|
||||||
|
(hash-set! new-graph from-key new-node)
|
||||||
|
(hash-remove! new-graph key)
|
||||||
|
(hash-remove! graph-rev key)
|
||||||
|
(for ([next (in-list (bbnode-branch new-node))])
|
||||||
|
(hash-update! graph-rev next
|
||||||
|
(lambda (v) (set-map v (lambda (x) (if (equal? x key) from-key x))))
|
||||||
|
set))
|
||||||
|
(void)))))
|
||||||
|
|
||||||
|
(simplify-graph!)
|
||||||
|
|
||||||
|
;; create a vector of keys, ensure that start is at the front
|
||||||
|
(define new-keys
|
||||||
|
(list->vector
|
||||||
|
(let ([tmp (hash-keys new-graph)])
|
||||||
|
(cons 'start (filter (lambda (x) (not (symbol=? x 'start))) tmp)))))
|
||||||
|
(define keys-map (for/hash ([i (in-naturals)] [e (in-vector new-keys)]) (values e i)))
|
||||||
|
|
||||||
|
;; replace labels with numbers. now we have a low level representation of the target code ready
|
||||||
|
;; to pass to link-bb
|
||||||
|
(for/vector #:length (vector-length new-keys) ([e (in-vector new-keys)])
|
||||||
|
(match-define (bbnode contents bl) (hash-ref new-graph e))
|
||||||
|
(bbnode contents (map (lambda (x) (hash-ref keys-map x)) bl))))
|
||||||
|
|
||||||
|
|
||||||
|
;; steps:
|
||||||
|
;; 1. transform globals
|
||||||
|
;; 2. build bb graph
|
||||||
|
;; 3. assemble bb contents
|
||||||
|
;; 4. expand, merge, and flatten graph
|
||||||
|
;; 5. link
|
||||||
|
|
||||||
|
(define (compile prog)
|
||||||
|
(define-values (prog-t1 nglobals) (convert-globals prog))
|
||||||
|
(parameterize ([current-cctx (cctx nglobals)])
|
||||||
|
(define prog-t2 (convert-bb prog-t1))
|
||||||
|
(define prog-t3 (for/hash ([(k v) (in-hash prog-t2)]) (values k (assemble-bb v))))
|
||||||
|
(define prog-t4 (convert-flat prog-t3))
|
||||||
|
(define prog-final (link-bb prog-t4))
|
||||||
|
prog-final))
|
||||||
|
|
||||||
|
|
||||||
|
;;;;;;;; targeting
|
||||||
|
;; target implementation must define
|
||||||
|
;; - assemble-bb: assembles the contents of one basic block
|
||||||
|
;; - merge-bb: merges two compiled basic blocks
|
||||||
|
;; - link-bb: links basic blocks into final output
|
||||||
|
|
||||||
|
;; this targets an apparently befunge-inspired lang called laser
|
||||||
|
;; (https://github.com/Quintec/LaserLang)
|
||||||
|
|
||||||
|
(define (chr* c n)
|
||||||
|
(apply string (make-list n c)))
|
||||||
|
|
||||||
|
(define (mem->scratch loc)
|
||||||
|
(string-append
|
||||||
|
(chr* #\u (mem-loc-value loc))
|
||||||
|
"rs"
|
||||||
|
(chr* #\d (mem-loc-value loc))))
|
||||||
|
|
||||||
|
(define (scratch->mem loc)
|
||||||
|
(string-append
|
||||||
|
(chr* #\u (mem-loc-value loc))
|
||||||
|
"pUwD"
|
||||||
|
(chr* #\d (mem-loc-value loc))))
|
||||||
|
|
||||||
|
(define (basic-binary-op op)
|
||||||
|
(lambda (a b)
|
||||||
|
(string-append "UU" (mem->scratch a) (mem->scratch b) "U" op "D" (scratch->mem a) "DD")))
|
||||||
|
|
||||||
|
(define (basic-branch-op op)
|
||||||
|
(lambda (a b x y)
|
||||||
|
(branch
|
||||||
|
(string-append
|
||||||
|
"UU" (mem->scratch a) (mem->scratch b) "U" op "⌝")
|
||||||
|
(list y x)
|
||||||
|
(list "pDDD" "pDDD"))))
|
||||||
|
|
||||||
|
(define assemblers
|
||||||
|
(hash
|
||||||
|
'prelude (lambda ()
|
||||||
|
(string-append "UU" (chr* #\0 (cctx-num-globals (current-cctx))) "DD"))
|
||||||
|
'set! (lambda (target value)
|
||||||
|
(if (mem-loc? value)
|
||||||
|
(string-append "UU" (mem->scratch value) (scratch->mem target) "DD")
|
||||||
|
(string-append "UUU'" (number->string value) "'D" (scratch->mem target) "DD")))
|
||||||
|
'add! (basic-binary-op "+")
|
||||||
|
'sub! (basic-binary-op "-")
|
||||||
|
'mul! (basic-binary-op "×")
|
||||||
|
'div! (basic-binary-op "÷")
|
||||||
|
'mod! (basic-binary-op "%")
|
||||||
|
'and! (basic-binary-op "&")
|
||||||
|
'or! (basic-binary-op "|")
|
||||||
|
'xor! (lambda (a b) (error "unimplemented"))
|
||||||
|
'invert! (lambda (a)
|
||||||
|
(string-append "UU"
|
||||||
|
(chr* #\u (mem-loc-value a))
|
||||||
|
"~"
|
||||||
|
(chr* #\u (mem-loc-value a))
|
||||||
|
"DD"))
|
||||||
|
'asm! (lambda args
|
||||||
|
(define op (first args))
|
||||||
|
(string-append
|
||||||
|
"UU"
|
||||||
|
(apply string-append (map mem->scratch (rest args)))
|
||||||
|
"U" op "DDD"))
|
||||||
|
'asmgoto! (lambda (branches . args) (error "unimplemented"))
|
||||||
|
'ifeq (basic-branch-op "=")
|
||||||
|
'iflt (basic-branch-op "l")
|
||||||
|
'goto (lambda (a) (branch "" (list a) (list "")))
|
||||||
|
'terminate (lambda () (branch "#" '() '()))))
|
||||||
|
|
||||||
|
;; export
|
||||||
|
|
||||||
|
(define (assemble-bb bb)
|
||||||
|
;; algorithm: maintain the following stacks: input, output, memory, scratch
|
||||||
|
;; LLIR globals get allocated into memory
|
||||||
|
;; scratch is used to implement LLIR ops
|
||||||
|
(define (assemble-op op)
|
||||||
|
(match (hash-ref assemblers (first op) #f)
|
||||||
|
[#f (error "target doesn't implement" op)]
|
||||||
|
[func (apply func (rest op))]))
|
||||||
|
(match-define (bbnode contents branch) bb)
|
||||||
|
(bbnode (string-join (map assemble-op contents) "") (assemble-op branch)))
|
||||||
|
|
||||||
|
|
||||||
|
(define (merge-bb bb1 bb2)
|
||||||
|
(bbnode (string-append (bbnode-contents bb1) (bbnode-contents bb2)) (bbnode-branch bb2)))
|
||||||
|
|
||||||
|
|
||||||
|
; takes a vector of basic blocks and renders the result
|
||||||
|
(define (link-bb bbvec)
|
||||||
|
;; represent as sparse 2d matrix
|
||||||
|
(define render (make-hash))
|
||||||
|
;; algorithm
|
||||||
|
;; - each bb gets 3 lines, the first line is the bb, line 2 is normal return, line 3 is
|
||||||
|
;; branch return (if any)
|
||||||
|
;; - the left side is a switching matrix that links return lines to the next bb
|
||||||
|
(for ([i (in-naturals)] [bb (in-vector bbvec)])
|
||||||
|
(define line
|
||||||
|
(string-append
|
||||||
|
(chr* #\space i)
|
||||||
|
">"
|
||||||
|
(chr* #\space (- (vector-length bbvec) i 1))
|
||||||
|
(bbnode-contents bb)
|
||||||
|
"v"))
|
||||||
|
;; render line
|
||||||
|
(for ([j (in-naturals)] [c (in-string line)])
|
||||||
|
(hash-set! render (cons (* i 3) j) c))
|
||||||
|
;; insert returns
|
||||||
|
(hash-set! render (cons (+ (* i 3) 1) (- (string-length line) 1)) #\<)
|
||||||
|
(hash-set! render (cons (+ (* i 3) 2) (- (string-length line) 2)) #\<)
|
||||||
|
(for ([j (in-naturals)] [target (in-list (bbnode-branch bb))])
|
||||||
|
(hash-set! render
|
||||||
|
(cons (+ (* i 3) 1 j)
|
||||||
|
target)
|
||||||
|
(if (> target i) #\v #\^)))
|
||||||
|
(void))
|
||||||
|
|
||||||
|
;; convert matrix to string
|
||||||
|
(define w (add1 (apply max (map cdr (hash-keys render)))))
|
||||||
|
(define h (add1 (apply max (map car (hash-keys render)))))
|
||||||
|
(string-join
|
||||||
|
(for/list ([y (in-range h)])
|
||||||
|
(list->string
|
||||||
|
(for/list ([x (in-range w)])
|
||||||
|
(hash-ref render (cons y x) #\space))))
|
||||||
|
"\n"))
|
||||||
|
|
||||||
|
|
||||||
|
;;;;;;;; demo
|
||||||
|
|
||||||
|
(define prog
|
||||||
|
'((global i)
|
||||||
|
(global inc)
|
||||||
|
(global ten)
|
||||||
|
(label main)
|
||||||
|
(set! i 0)
|
||||||
|
(set! ten 10)
|
||||||
|
(set! inc 1)
|
||||||
|
(label loop)
|
||||||
|
(add! i inc)
|
||||||
|
(asm! "o" i)
|
||||||
|
(asm! "o" ten)
|
||||||
|
(ifeq i ten done loop)
|
||||||
|
(label done)
|
||||||
|
(asm! "\"hello world\"o")
|
||||||
|
(terminate)))
|
||||||
|
|
||||||
|
(displayln (compile prog))
|
Loading…
Reference in New Issue