shark-compiler/compiler.rkt

403 lines
13 KiB
Racket
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#lang racket
;; LLIR
;; defs
;; (global x)
;; (const x) ;; TODO
;; (set! x immediate)
;;
;; the only thing that accepts immediates is set!
;; all other ops need globals or consts
;; all globals, constants, and labels share a single namespace.
;; for example, there cannot be a global named helloworld and a label also named helloworld
;;
;; branching
;; (label x)
;; (goto x)
;; (ifeq a b x y)
;; note: implement ifne by swapping x,y
;; (iflt a b x y)
;; note: implement ifgt by swapping a,b
;; ifle by swapping a,b and x,y
;; ifge by swapping x,y
;; (terminate)
;;
;; arithmetic
;; (add! a b) a += b
;; (sub! a b) a -= b
;; (mul! a b) etc
;; (div! a b)
;; (mod! a b)
;; bitwise
;; (and! a b)
;; (or! a b)
;; (xor! a b)
;; (invert! a)
;;
;; target language
;; passes all args to the target assembler
;; (asm! ...)
;; (asmgoto! [x y] ...) ;; impl-defined branching operation
;; (prelude) implementation-defined prelude
;; predicates for opcodes
(define binary-branch-ops (set 'ifeq 'iflt))
(define (binary-branch-op? x)
(and (symbol? x) (set-member? binary-branch-ops x)))
(define binary-arith-ops
(set 'add! 'sub! 'mul! 'div! 'mod! 'and! 'or! 'xor!))
(define (binary-arith-op? x)
(and (symbol? x) (set-member? binary-arith-ops x)))
(define unary-arith-ops (set 'invert!))
(define (unary-arith-op? x)
(and (symbol? x) (set-member? unary-arith-ops x)))
;; low-level representation of a memory location
(struct mem-loc [value] #:transparent)
;; basic block node
;; contents:
;; - LLIR list with LLIR branch instruction
;; - target contents with struct branch
;; - target contents with list of branch labels
;; - target contents with list of branch numbers
(struct bbnode [contents branch] #:transparent)
;; low-level branch representation, with a target node and optional cleanup sequence
;; the setup and cleanup will get converted into basic blocks and merged such that each bb ends
;; directly with a branch
(struct branch [setup targets cleanups] #:transparent)
;; compiler context
(struct cctx [num-globals] #:transparent)
(define current-cctx (make-parameter #f))
;; validates LLIR for consistency
;; check every used symbol is a defined global or const
;; check every branch points to a valid label
;; validate args on each op
;; check that main label is present, start label is not present
(define (validate src)
(error "TODO")) ;; lmao
;; LLIR -> LLIR Int
;; assigns each global a number and returns total number of globals
(define (convert-globals src)
;; create src without the global declarations
(define new-src (filter (lambda (x) (not (symbol=? (first x) 'global))) src))
;; map globals to memory locations
(define globals (for/list ([x (in-list src)] #:when (symbol=? (first x) 'global)) (second x)))
(define global-map
(for/hash ([i (in-naturals)] [glob (in-list globals)])
(values glob (mem-loc i))))
(define (translate x) (hash-ref global-map x))
(define (update-arg x)
(if (hash-has-key? global-map x) (translate x) x))
(define (update-op op)
(match op
[(list 'set! target value)
(list 'set! (translate target)
(if (symbol? value) (translate value) value))]
[(list (? binary-branch-op? op) a b x y)
(list op (translate a) (translate b) x y)]
[(list (? binary-arith-op? op) a b)
(list op (translate a) (translate b))]
[(list (? unary-arith-op? op) a)
(list op (translate a))]
[(list 'asm! args ...)
(cons 'asm! (map update-arg args))]
[(list 'asmgoto! branches args ...)
(cons 'asmgoto! (cons branches (map update-arg args)))]
[any any]))
(values (map update-op new-src) (length globals)))
;; builds graph of basic blocks
(define (convert-bb src)
(define graph (make-hash))
(define (bb-commit! lab blk branch)
(hash-set! graph lab (bbnode blk branch)))
(define (convert-step this-label this-block src)
(match src
['() (bb-commit! this-label this-block (list 'terminate))]
[(cons (list 'label lab) rst)
(bb-commit! this-label this-block (list 'goto lab))
(convert-step lab '() rst)]
[(cons (list 'goto next) rst)
(bb-commit! this-label this-block (list 'goto next))
(convert-step (gensym) '() rst)]
[(cons (list (? binary-branch-op? op) a b x y) rst)
(bb-commit! this-label this-block (list op a b x y))
(convert-step (gensym) '() rst)]
[(cons (cons 'asmgoto! args) rst)
(bb-commit! this-label this-block (cons 'asmgoto! args))
(convert-step (gensym) '() rst)]
[(cons (list 'terminate) rst)
(bb-commit! this-label this-block (list 'terminate))
(convert-step (gensym) '() rst)]
[(cons fst rst)
(convert-step this-label (append this-block (list fst)) rst)]))
(define (graph-prune g)
(define (dfs! g seen label)
(unless (set-member? seen label)
(set-add! seen label)
(match (bbnode-branch (hash-ref g label))
[(list 'terminate) (void)]
[(list 'goto next) (dfs! g seen next)]
[(list (? binary-branch-op? op) _ _ x y)
(dfs! g seen x)
(dfs! g seen y)]
[(list 'asmgoto! branches args)
(for ([next (in-list branches)])
(dfs! g seen next))])))
(define marked (mutable-set))
(dfs! g marked 'start)
(for/hash ([(k v) (in-hash g)]
#:when (set-member? marked k))
(values k v)))
;; generate graph of bb nodes
;; insert prelude
(convert-step 'start '((prelude)) (cons '(goto main) src))
;; prune unreachable blocks
(graph-prune graph))
;; expands and simplifies target-compiled basic blocks
;; numbers each label and returns a vector of numbered blocks
(define (convert-flat graph)
(define new-graph (make-hash))
(for ([(k v) (in-hash graph)])
(match-define (bbnode contents (branch setup targets cleanups)) v)
(define cleanups-labels (map (lambda (_) (gensym)) cleanups))
(for ([target (in-list targets)] [cleanup (in-list cleanups)] [label (in-list cleanups-labels)])
(hash-set! new-graph label (bbnode cleanup (list target))))
(hash-set! new-graph k (merge-bb
(bbnode contents '())
(bbnode setup cleanups-labels))))
;; simplifies the bb graph to a smaller number of blocks, if possible
(define (simplify-graph!)
;; generate reverse edges map
(define graph-rev (make-hash))
(define (update-rev! label called-by)
(hash-update! graph-rev label (lambda (v) (set-add v called-by)) set))
(for ([(k v) (in-hash new-graph)])
(match-define (bbnode _ branches) v)
(for ([to (in-list branches)])
(update-rev! to k)))
;; for each edge A->B
;; if this is the only edge out of A and the only edge into B, then merge A and B
(define keys (hash-keys new-graph))
(for ([key (in-list keys)])
(define rev (hash-ref graph-rev key set))
(when (= 1 (set-count rev))
(define from-key (set-first rev))
(define from-node (hash-ref new-graph from-key))
(when (= 1 (length (bbnode-branch from-node)))
;; call target merge function
(define new-node (merge-bb from-node (hash-ref new-graph key)))
;; update graphs for merge
(hash-set! new-graph from-key new-node)
(hash-remove! new-graph key)
(hash-remove! graph-rev key)
(for ([next (in-list (bbnode-branch new-node))])
(hash-update! graph-rev next
(lambda (v) (set-map v (lambda (x) (if (equal? x key) from-key x))))
set))
(void)))))
(simplify-graph!)
;; create a vector of keys, ensure that start is at the front
(define new-keys
(list->vector
(let ([tmp (hash-keys new-graph)])
(cons 'start (filter (lambda (x) (not (symbol=? x 'start))) tmp)))))
(define keys-map (for/hash ([i (in-naturals)] [e (in-vector new-keys)]) (values e i)))
;; replace labels with numbers. now we have a low level representation of the target code ready
;; to pass to link-bb
(for/vector #:length (vector-length new-keys) ([e (in-vector new-keys)])
(match-define (bbnode contents bl) (hash-ref new-graph e))
(bbnode contents (map (lambda (x) (hash-ref keys-map x)) bl))))
;; steps:
;; 1. transform globals
;; 2. build bb graph
;; 3. assemble bb contents
;; 4. expand, merge, and flatten graph
;; 5. link
(define (compile prog)
(define-values (prog-t1 nglobals) (convert-globals prog))
(parameterize ([current-cctx (cctx nglobals)])
(define prog-t2 (convert-bb prog-t1))
(define prog-t3 (for/hash ([(k v) (in-hash prog-t2)]) (values k (assemble-bb v))))
(define prog-t4 (convert-flat prog-t3))
(define prog-final (link-bb prog-t4))
prog-final))
;;;;;;;; targeting
;; target implementation must define
;; - assemble-bb: assembles the contents of one basic block
;; - merge-bb: merges two compiled basic blocks
;; - link-bb: links basic blocks into final output
;; this targets an apparently befunge-inspired lang called laser
;; (https://github.com/Quintec/LaserLang)
(define (chr* c n)
(apply string (make-list n c)))
(define (mem->scratch loc)
(string-append
(chr* #\u (mem-loc-value loc))
"rs"
(chr* #\d (mem-loc-value loc))))
(define (scratch->mem loc)
(string-append
(chr* #\u (mem-loc-value loc))
"pUwD"
(chr* #\d (mem-loc-value loc))))
(define (basic-binary-op op)
(lambda (a b)
(string-append "UU" (mem->scratch a) (mem->scratch b) "U" op "D" (scratch->mem a) "DD")))
(define (basic-branch-op op)
(lambda (a b x y)
(branch
(string-append
"UU" (mem->scratch a) (mem->scratch b) "U" op "")
(list x y)
(list "pDDD" "pDDD"))))
(define assemblers
(hash
'prelude (lambda ()
(string-append "IUU" (chr* #\0 (cctx-num-globals (current-cctx))) "DD"))
'set! (lambda (target value)
(if (mem-loc? value)
(string-append "UU" (mem->scratch value) (scratch->mem target) "DD")
(string-append "UUU'" (number->string value) "'D" (scratch->mem target) "DD")))
'add! (basic-binary-op "+")
'sub! (basic-binary-op "-")
'mul! (basic-binary-op "×")
'div! (basic-binary-op "÷")
'mod! (basic-binary-op "%")
'and! (basic-binary-op "&")
'or! (basic-binary-op "|")
'xor! (lambda (a b) (error "unimplemented"))
'invert! (lambda (a)
(string-append "UU"
(chr* #\u (mem-loc-value a))
"~"
(chr* #\u (mem-loc-value a))
"DD"))
'asm! (lambda args
(match args
;; support input and output vars
[(list asm "r" r-vars ... "w" w-var)
(string-append
"UU"
(if r-vars
(apply string-append (map mem->scratch r-vars))
"")
"U" asm "D"
(if w-var
(scratch->mem w-var)
"")
"DD")]
[_ (error "invalid asm form")]))
'asmgoto! (lambda (branches . args) (error "unimplemented"))
'ifeq (basic-branch-op "=")
'iflt (basic-branch-op "l")
'goto (lambda (a) (branch "" (list a) (list "")))
'terminate (lambda () (branch "U#" '() '()))))
;; export
(define (assemble-bb bb)
;; algorithm: maintain the following stacks: input, output, memory, scratch
;; LLIR globals get allocated into memory
;; scratch is used to implement LLIR ops
(define (assemble-op op)
(match (hash-ref assemblers (first op) #f)
[#f (error "target doesn't implement" op)]
[func (apply func (rest op))]))
(match-define (bbnode contents branch) bb)
(bbnode (string-join (map assemble-op contents) "") (assemble-op branch)))
(define (merge-bb bb1 bb2)
(bbnode (string-append (bbnode-contents bb1) (bbnode-contents bb2)) (bbnode-branch bb2)))
; takes a vector of basic blocks and renders the result
(define (link-bb bbvec)
;; represent as sparse 2d matrix
(define render (make-hash))
;; algorithm
;; - each bb gets 3 lines, the first line is the bb, line 2 is normal return, line 3 is
;; branch return (if any)
;; - the left side is a switching matrix that links return lines to the next bb
(for ([i (in-naturals)] [bb (in-vector bbvec)])
(define line
(string-append
(chr* #\space i)
">"
(chr* #\space (- (vector-length bbvec) i 1))
(bbnode-contents bb)
"v"))
;; render line
(for ([j (in-naturals)] [c (in-string line)])
(hash-set! render (cons (* i 3) j) c))
;; insert returns
(hash-set! render (cons (+ (* i 3) 1) (- (string-length line) 1)) #\<)
(hash-set! render (cons (+ (* i 3) 2) (- (string-length line) 2)) #\<)
(for ([j (in-naturals)] [target (in-list (bbnode-branch bb))])
(hash-set! render
(cons (+ (* i 3) 1 j)
target)
(if (> target i) #\v #\^)))
(void))
;; convert matrix to string
(define w (add1 (apply max (map cdr (hash-keys render)))))
(define h (add1 (apply max (map car (hash-keys render)))))
(string-join
(for/list ([y (in-range h)])
(list->string
(for/list ([x (in-range w)])
(hash-ref render (cons y x) #\space))))
"\n"))
;;;;;;;; #lang
(define-syntax-rule (llir-module-begin EXPR ...)
(#%module-begin
(displayln (compile `(EXPR ...)))))
(provide (except-out (all-from-out racket) #%module-begin) ; probably overkill, w/e
(rename-out [llir-module-begin #%module-begin]))