shark-compiler/compiler.rkt

403 lines
13 KiB
Racket
Raw Normal View History

2020-07-20 06:47:58 +00:00
#lang racket
;; LLIR
;; defs
;; (global x)
;; (const x) ;; TODO
;; (set! x immediate)
;;
;; the only thing that accepts immediates is set!
;; all other ops need globals or consts
;; all globals, constants, and labels share a single namespace.
;; for example, there cannot be a global named helloworld and a label also named helloworld
;;
;; branching
;; (label x)
;; (goto x)
;; (ifeq a b x y)
;; note: implement ifne by swapping x,y
;; (iflt a b x y)
;; note: implement ifgt by swapping a,b
;; ifle by swapping a,b and x,y
;; ifge by swapping x,y
;; (terminate)
;;
;; arithmetic
;; (add! a b) a += b
;; (sub! a b) a -= b
;; (mul! a b) etc
;; (div! a b)
;; (mod! a b)
;; bitwise
;; (and! a b)
;; (or! a b)
;; (xor! a b)
;; (invert! a)
;;
;; target language
;; passes all args to the target assembler
;; (asm! ...)
;; (asmgoto! [x y] ...) ;; impl-defined branching operation
;; (prelude) implementation-defined prelude
;; predicates for opcodes
(define binary-branch-ops (set 'ifeq 'iflt))
(define (binary-branch-op? x)
(and (symbol? x) (set-member? binary-branch-ops x)))
(define binary-arith-ops
(set 'add! 'sub! 'mul! 'div! 'mod! 'and! 'or! 'xor!))
(define (binary-arith-op? x)
(and (symbol? x) (set-member? binary-arith-ops x)))
(define unary-arith-ops (set 'invert!))
(define (unary-arith-op? x)
(and (symbol? x) (set-member? unary-arith-ops x)))
;; low-level representation of a memory location
(struct mem-loc [value] #:transparent)
;; basic block node
;; contents:
;; - LLIR list with LLIR branch instruction
;; - target contents with struct branch
;; - target contents with list of branch labels
;; - target contents with list of branch numbers
(struct bbnode [contents branch] #:transparent)
;; low-level branch representation, with a target node and optional cleanup sequence
;; the setup and cleanup will get converted into basic blocks and merged such that each bb ends
;; directly with a branch
(struct branch [setup targets cleanups] #:transparent)
;; compiler context
(struct cctx [num-globals] #:transparent)
(define current-cctx (make-parameter #f))
;; validates LLIR for consistency
;; check every used symbol is a defined global or const
;; check every branch points to a valid label
;; validate args on each op
;; check that main label is present, start label is not present
(define (validate src)
(error "TODO")) ;; lmao
;; LLIR -> LLIR Int
;; assigns each global a number and returns total number of globals
(define (convert-globals src)
;; create src without the global declarations
(define new-src (filter (lambda (x) (not (symbol=? (first x) 'global))) src))
;; map globals to memory locations
(define globals (for/list ([x (in-list src)] #:when (symbol=? (first x) 'global)) (second x)))
(define global-map
(for/hash ([i (in-naturals)] [glob (in-list globals)])
(values glob (mem-loc i))))
(define (translate x) (hash-ref global-map x))
(define (update-arg x)
(if (hash-has-key? global-map x) (translate x) x))
(define (update-op op)
(match op
[(list 'set! target value)
(list 'set! (translate target)
(if (symbol? value) (translate value) value))]
[(list (? binary-branch-op? op) a b x y)
(list op (translate a) (translate b) x y)]
[(list (? binary-arith-op? op) a b)
(list op (translate a) (translate b))]
[(list (? unary-arith-op? op) a)
(list op (translate a))]
[(list 'asm! args ...)
(cons 'asm! (map update-arg args))]
[(list 'asmgoto! branches args ...)
(cons 'asmgoto! (cons branches (map update-arg args)))]
[any any]))
(values (map update-op new-src) (length globals)))
;; builds graph of basic blocks
(define (convert-bb src)
(define graph (make-hash))
(define (bb-commit! lab blk branch)
(hash-set! graph lab (bbnode blk branch)))
(define (convert-step this-label this-block src)
(match src
['() (bb-commit! this-label this-block (list 'terminate))]
[(cons (list 'label lab) rst)
(bb-commit! this-label this-block (list 'goto lab))
(convert-step lab '() rst)]
[(cons (list 'goto next) rst)
(bb-commit! this-label this-block (list 'goto next))
(convert-step (gensym) '() rst)]
[(cons (list (? binary-branch-op? op) a b x y) rst)
(bb-commit! this-label this-block (list op a b x y))
(convert-step (gensym) '() rst)]
[(cons (cons 'asmgoto! args) rst)
(bb-commit! this-label this-block (cons 'asmgoto! args))
(convert-step (gensym) '() rst)]
[(cons (list 'terminate) rst)
(bb-commit! this-label this-block (list 'terminate))
(convert-step (gensym) '() rst)]
[(cons fst rst)
(convert-step this-label (append this-block (list fst)) rst)]))
(define (graph-prune g)
(define (dfs! g seen label)
(unless (set-member? seen label)
(set-add! seen label)
(match (bbnode-branch (hash-ref g label))
[(list 'terminate) (void)]
[(list 'goto next) (dfs! g seen next)]
[(list (? binary-branch-op? op) _ _ x y)
(dfs! g seen x)
(dfs! g seen y)]
[(list 'asmgoto! branches args)
(for ([next (in-list branches)])
(dfs! g seen next))])))
(define marked (mutable-set))
(dfs! g marked 'start)
(for/hash ([(k v) (in-hash g)]
#:when (set-member? marked k))
(values k v)))
;; generate graph of bb nodes
;; insert prelude
(convert-step 'start '((prelude)) (cons '(goto main) src))
;; prune unreachable blocks
(graph-prune graph))
;; expands and simplifies target-compiled basic blocks
;; numbers each label and returns a vector of numbered blocks
(define (convert-flat graph)
(define new-graph (make-hash))
(for ([(k v) (in-hash graph)])
(match-define (bbnode contents (branch setup targets cleanups)) v)
(define cleanups-labels (map (lambda (_) (gensym)) cleanups))
(for ([target (in-list targets)] [cleanup (in-list cleanups)] [label (in-list cleanups-labels)])
(hash-set! new-graph label (bbnode cleanup (list target))))
(hash-set! new-graph k (merge-bb
(bbnode contents '())
(bbnode setup cleanups-labels))))
;; simplifies the bb graph to a smaller number of blocks, if possible
(define (simplify-graph!)
;; generate reverse edges map
(define graph-rev (make-hash))
(define (update-rev! label called-by)
(hash-update! graph-rev label (lambda (v) (set-add v called-by)) set))
(for ([(k v) (in-hash new-graph)])
(match-define (bbnode _ branches) v)
(for ([to (in-list branches)])
(update-rev! to k)))
;; for each edge A->B
;; if this is the only edge out of A and the only edge into B, then merge A and B
(define keys (hash-keys new-graph))
(for ([key (in-list keys)])
(define rev (hash-ref graph-rev key set))
(when (= 1 (set-count rev))
(define from-key (set-first rev))
(define from-node (hash-ref new-graph from-key))
(when (= 1 (length (bbnode-branch from-node)))
;; call target merge function
(define new-node (merge-bb from-node (hash-ref new-graph key)))
;; update graphs for merge
(hash-set! new-graph from-key new-node)
(hash-remove! new-graph key)
(hash-remove! graph-rev key)
(for ([next (in-list (bbnode-branch new-node))])
(hash-update! graph-rev next
(lambda (v) (set-map v (lambda (x) (if (equal? x key) from-key x))))
set))
(void)))))
(simplify-graph!)
;; create a vector of keys, ensure that start is at the front
(define new-keys
(list->vector
(let ([tmp (hash-keys new-graph)])
(cons 'start (filter (lambda (x) (not (symbol=? x 'start))) tmp)))))
(define keys-map (for/hash ([i (in-naturals)] [e (in-vector new-keys)]) (values e i)))
;; replace labels with numbers. now we have a low level representation of the target code ready
;; to pass to link-bb
(for/vector #:length (vector-length new-keys) ([e (in-vector new-keys)])
(match-define (bbnode contents bl) (hash-ref new-graph e))
(bbnode contents (map (lambda (x) (hash-ref keys-map x)) bl))))
;; steps:
;; 1. transform globals
;; 2. build bb graph
;; 3. assemble bb contents
;; 4. expand, merge, and flatten graph
;; 5. link
(define (compile prog)
(define-values (prog-t1 nglobals) (convert-globals prog))
(parameterize ([current-cctx (cctx nglobals)])
(define prog-t2 (convert-bb prog-t1))
(define prog-t3 (for/hash ([(k v) (in-hash prog-t2)]) (values k (assemble-bb v))))
(define prog-t4 (convert-flat prog-t3))
(define prog-final (link-bb prog-t4))
prog-final))
;;;;;;;; targeting
;; target implementation must define
;; - assemble-bb: assembles the contents of one basic block
;; - merge-bb: merges two compiled basic blocks
;; - link-bb: links basic blocks into final output
;; this targets an apparently befunge-inspired lang called laser
;; (https://github.com/Quintec/LaserLang)
(define (chr* c n)
(apply string (make-list n c)))
(define (mem->scratch loc)
(string-append
(chr* #\u (mem-loc-value loc))
"rs"
(chr* #\d (mem-loc-value loc))))
(define (scratch->mem loc)
(string-append
(chr* #\u (mem-loc-value loc))
"pUwD"
(chr* #\d (mem-loc-value loc))))
(define (basic-binary-op op)
(lambda (a b)
(string-append "UU" (mem->scratch a) (mem->scratch b) "U" op "D" (scratch->mem a) "DD")))
(define (basic-branch-op op)
(lambda (a b x y)
(branch
(string-append
"UU" (mem->scratch a) (mem->scratch b) "U" op "")
(list x y)
2020-07-20 06:47:58 +00:00
(list "pDDD" "pDDD"))))
(define assemblers
(hash
'prelude (lambda ()
(string-append "IUU" (chr* #\0 (cctx-num-globals (current-cctx))) "DD"))
2020-07-20 06:47:58 +00:00
'set! (lambda (target value)
(if (mem-loc? value)
(string-append "UU" (mem->scratch value) (scratch->mem target) "DD")
(string-append "UUU'" (number->string value) "'D" (scratch->mem target) "DD")))
'add! (basic-binary-op "+")
'sub! (basic-binary-op "-")
'mul! (basic-binary-op "×")
'div! (basic-binary-op "÷")
'mod! (basic-binary-op "%")
'and! (basic-binary-op "&")
'or! (basic-binary-op "|")
'xor! (lambda (a b) (error "unimplemented"))
'invert! (lambda (a)
(string-append "UU"
(chr* #\u (mem-loc-value a))
"~"
(chr* #\u (mem-loc-value a))
"DD"))
'asm! (lambda args
(match args
;; support input and output vars
[(list asm "r" r-vars ... "w" w-var)
(string-append
"UU"
(if r-vars
(apply string-append (map mem->scratch r-vars))
"")
"U" asm "D"
(if w-var
(scratch->mem w-var)
"")
"DD")]
[_ (error "invalid asm form")]))
2020-07-20 06:47:58 +00:00
'asmgoto! (lambda (branches . args) (error "unimplemented"))
'ifeq (basic-branch-op "=")
'iflt (basic-branch-op "l")
'goto (lambda (a) (branch "" (list a) (list "")))
'terminate (lambda () (branch "U#" '() '()))))
2020-07-20 06:47:58 +00:00
;; export
(define (assemble-bb bb)
;; algorithm: maintain the following stacks: input, output, memory, scratch
;; LLIR globals get allocated into memory
;; scratch is used to implement LLIR ops
(define (assemble-op op)
(match (hash-ref assemblers (first op) #f)
[#f (error "target doesn't implement" op)]
[func (apply func (rest op))]))
(match-define (bbnode contents branch) bb)
(bbnode (string-join (map assemble-op contents) "") (assemble-op branch)))
(define (merge-bb bb1 bb2)
(bbnode (string-append (bbnode-contents bb1) (bbnode-contents bb2)) (bbnode-branch bb2)))
; takes a vector of basic blocks and renders the result
(define (link-bb bbvec)
;; represent as sparse 2d matrix
(define render (make-hash))
;; algorithm
;; - each bb gets 3 lines, the first line is the bb, line 2 is normal return, line 3 is
;; branch return (if any)
;; - the left side is a switching matrix that links return lines to the next bb
(for ([i (in-naturals)] [bb (in-vector bbvec)])
(define line
(string-append
(chr* #\space i)
">"
(chr* #\space (- (vector-length bbvec) i 1))
(bbnode-contents bb)
"v"))
;; render line
(for ([j (in-naturals)] [c (in-string line)])
(hash-set! render (cons (* i 3) j) c))
;; insert returns
(hash-set! render (cons (+ (* i 3) 1) (- (string-length line) 1)) #\<)
(hash-set! render (cons (+ (* i 3) 2) (- (string-length line) 2)) #\<)
(for ([j (in-naturals)] [target (in-list (bbnode-branch bb))])
(hash-set! render
(cons (+ (* i 3) 1 j)
target)
(if (> target i) #\v #\^)))
(void))
;; convert matrix to string
(define w (add1 (apply max (map cdr (hash-keys render)))))
(define h (add1 (apply max (map car (hash-keys render)))))
(string-join
(for/list ([y (in-range h)])
(list->string
(for/list ([x (in-range w)])
(hash-ref render (cons y x) #\space))))
"\n"))
2020-07-21 13:57:48 +00:00
;;;;;;;; #lang
(define-syntax-rule (llir-module-begin EXPR ...)
(#%module-begin
(displayln (compile `(EXPR ...)))))
2020-07-20 06:47:58 +00:00
2020-07-21 13:57:48 +00:00
(provide (except-out (all-from-out racket) #%module-begin) ; probably overkill, w/e
(rename-out [llir-module-begin #%module-begin]))