#!/usr/bin/env racket #lang racket ;; LLIR ;; defs ;; (global x) ;; (const x) ;; TODO ;; (set! x immediate) ;; ;; the only thing that accepts immediates is set! ;; all other ops need globals or consts ;; all globals, constants, and labels share a single namespace. ;; for example, there cannot be a global named helloworld and a label also named helloworld ;; ;; branching ;; (label x) ;; (goto x) ;; (ifeq a b x y) ;; note: implement ifne by swapping x,y ;; (iflt a b x y) ;; note: implement ifgt by swapping a,b ;; ifle by swapping a,b and x,y ;; ifge by swapping x,y ;; (terminate) ;; ;; arithmetic ;; (add! a b) a += b ;; (sub! a b) a -= b ;; (mul! a b) etc ;; (div! a b) ;; (mod! a b) ;; bitwise ;; (and! a b) ;; (or! a b) ;; (xor! a b) ;; (invert! a) ;; ;; target language ;; passes all args to the target assembler ;; (asm! ...) ;; (asmgoto! [x y] ...) ;; impl-defined branching operation ;; (prelude) implementation-defined prelude ;; predicates for opcodes (define binary-branch-ops (set 'ifeq 'iflt)) (define (binary-branch-op? x) (and (symbol? x) (set-member? binary-branch-ops x))) (define binary-arith-ops (set 'add! 'sub! 'mul! 'div! 'mod! 'and! 'or! 'xor!)) (define (binary-arith-op? x) (and (symbol? x) (set-member? binary-arith-ops x))) (define unary-arith-ops (set 'invert!)) (define (unary-arith-op? x) (and (symbol? x) (set-member? unary-arith-ops x))) ;; low-level representation of a memory location (struct mem-loc [value] #:transparent) ;; basic block node ;; contents: ;; - LLIR list with LLIR branch instruction ;; - target contents with struct branch ;; - target contents with list of branch labels ;; - target contents with list of branch numbers (struct bbnode [contents branch] #:transparent) ;; low-level branch representation, with a target node and optional cleanup sequence ;; the setup and cleanup will get converted into basic blocks and merged such that each bb ends ;; directly with a branch (struct branch [setup targets cleanups] #:transparent) ;; compiler context (struct cctx [num-globals] #:transparent) (define current-cctx (make-parameter #f)) ;; validates LLIR for consistency ;; check every used symbol is a defined global or const ;; check every branch points to a valid label ;; validate args on each op ;; check that main label is present, start label is not present (define (validate src) (error "TODO")) ;; lmao ;; LLIR -> LLIR Int ;; assigns each global a number and returns total number of globals (define (convert-globals src) ;; create src without the global declarations (define new-src (filter (lambda (x) (not (symbol=? (first x) 'global))) src)) ;; map globals to memory locations (define globals (for/list ([x (in-list src)] #:when (symbol=? (first x) 'global)) (second x))) (define global-map (for/hash ([i (in-naturals)] [glob (in-list globals)]) (values glob (mem-loc i)))) (define (translate x) (hash-ref global-map x)) (define (update-arg x) (if (hash-has-key? global-map x) (translate x) x)) (define (update-op op) (match op [(list 'set! target value) (list 'set! (translate target) (if (symbol? value) (translate value) value))] [(list (? binary-branch-op? op) a b x y) (list op (translate a) (translate b) x y)] [(list (? binary-arith-op? op) a b) (list op (translate a) (translate b))] [(list (? unary-arith-op? op) a) (list op (translate a))] [(list 'asm! args ...) (cons 'asm! (map update-arg args))] [(list 'asmgoto! branches args ...) (cons 'asmgoto! (cons branches (map update-arg args)))] [any any])) (values (map update-op new-src) (length globals))) ;; builds graph of basic blocks (define (convert-bb src) (define graph (make-hash)) (define (bb-commit! lab blk branch) (hash-set! graph lab (bbnode blk branch))) (define (convert-step this-label this-block src) (match src ['() (bb-commit! this-label this-block (list 'terminate))] [(cons (list 'label lab) rst) (bb-commit! this-label this-block (list 'goto lab)) (convert-step lab '() rst)] [(cons (list 'goto next) rst) (bb-commit! this-label this-block (list 'goto next)) (convert-step (gensym) '() rst)] [(cons (list (? binary-branch-op? op) a b x y) rst) (bb-commit! this-label this-block (list op a b x y)) (convert-step (gensym) '() rst)] [(cons (cons 'asmgoto! args) rst) (bb-commit! this-label this-block (cons 'asmgoto! args)) (convert-step (gensym) '() rst)] [(cons (list 'terminate) rst) (bb-commit! this-label this-block (list 'terminate)) (convert-step (gensym) '() rst)] [(cons fst rst) (convert-step this-label (append this-block (list fst)) rst)])) (define (graph-prune g) (define (dfs! g seen label) (unless (set-member? seen label) (set-add! seen label) (match (bbnode-branch (hash-ref g label)) [(list 'terminate) (void)] [(list 'goto next) (dfs! g seen next)] [(list (? binary-branch-op? op) _ _ x y) (dfs! g seen x) (dfs! g seen y)] [(list 'asmgoto! branches args) (for ([next (in-list branches)]) (dfs! g seen next))]))) (define marked (mutable-set)) (dfs! g marked 'start) (for/hash ([(k v) (in-hash g)] #:when (set-member? marked k)) (values k v))) ;; generate graph of bb nodes ;; insert prelude (convert-step 'start '((prelude)) (cons '(goto main) src)) ;; prune unreachable blocks (graph-prune graph)) ;; expands and simplifies target-compiled basic blocks ;; numbers each label and returns a vector of numbered blocks (define (convert-flat graph) (define new-graph (make-hash)) (for ([(k v) (in-hash graph)]) (match-define (bbnode contents (branch setup targets cleanups)) v) (define cleanups-labels (map (lambda (_) (gensym)) cleanups)) (for ([target (in-list targets)] [cleanup (in-list cleanups)] [label (in-list cleanups-labels)]) (hash-set! new-graph label (bbnode cleanup (list target)))) (hash-set! new-graph k (merge-bb (bbnode contents '()) (bbnode setup cleanups-labels)))) ;; simplifies the bb graph to a smaller number of blocks, if possible (define (simplify-graph!) ;; generate reverse edges map (define graph-rev (make-hash)) (define (update-rev! label called-by) (hash-update! graph-rev label (lambda (v) (set-add v called-by)) set)) (for ([(k v) (in-hash new-graph)]) (match-define (bbnode _ branches) v) (for ([to (in-list branches)]) (update-rev! to k))) ;; for each edge A->B ;; if this is the only edge out of A and the only edge into B, then merge A and B (define keys (hash-keys new-graph)) (for ([key (in-list keys)]) (define rev (hash-ref graph-rev key set)) (when (= 1 (set-count rev)) (define from-key (set-first rev)) (define from-node (hash-ref new-graph from-key)) (when (= 1 (length (bbnode-branch from-node))) ;; call target merge function (define new-node (merge-bb from-node (hash-ref new-graph key))) ;; update graphs for merge (hash-set! new-graph from-key new-node) (hash-remove! new-graph key) (hash-remove! graph-rev key) (for ([next (in-list (bbnode-branch new-node))]) (hash-update! graph-rev next (lambda (v) (set-map v (lambda (x) (if (equal? x key) from-key x)))) set)) (void))))) (simplify-graph!) ;; create a vector of keys, ensure that start is at the front (define new-keys (list->vector (let ([tmp (hash-keys new-graph)]) (cons 'start (filter (lambda (x) (not (symbol=? x 'start))) tmp))))) (define keys-map (for/hash ([i (in-naturals)] [e (in-vector new-keys)]) (values e i))) ;; replace labels with numbers. now we have a low level representation of the target code ready ;; to pass to link-bb (for/vector #:length (vector-length new-keys) ([e (in-vector new-keys)]) (match-define (bbnode contents bl) (hash-ref new-graph e)) (bbnode contents (map (lambda (x) (hash-ref keys-map x)) bl)))) ;; steps: ;; 1. transform globals ;; 2. build bb graph ;; 3. assemble bb contents ;; 4. expand, merge, and flatten graph ;; 5. link (define (compile prog) (define-values (prog-t1 nglobals) (convert-globals prog)) (parameterize ([current-cctx (cctx nglobals)]) (define prog-t2 (convert-bb prog-t1)) (define prog-t3 (for/hash ([(k v) (in-hash prog-t2)]) (values k (assemble-bb v)))) (define prog-t4 (convert-flat prog-t3)) (define prog-final (link-bb prog-t4)) prog-final)) ;;;;;;;; targeting ;; target implementation must define ;; - assemble-bb: assembles the contents of one basic block ;; - merge-bb: merges two compiled basic blocks ;; - link-bb: links basic blocks into final output ;; this targets an apparently befunge-inspired lang called laser ;; (https://github.com/Quintec/LaserLang) (define (chr* c n) (apply string (make-list n c))) (define (mem->scratch loc) (string-append (chr* #\u (mem-loc-value loc)) "rs" (chr* #\d (mem-loc-value loc)))) (define (scratch->mem loc) (string-append (chr* #\u (mem-loc-value loc)) "pUwD" (chr* #\d (mem-loc-value loc)))) (define (basic-binary-op op) (lambda (a b) (string-append "UU" (mem->scratch a) (mem->scratch b) "U" op "D" (scratch->mem a) "DD"))) (define (basic-branch-op op) (lambda (a b x y) (branch (string-append "UU" (mem->scratch a) (mem->scratch b) "U" op "⌝") (list y x) (list "pDDD" "pDDD")))) (define assemblers (hash 'prelude (lambda () (string-append "UU" (chr* #\0 (cctx-num-globals (current-cctx))) "DD")) 'set! (lambda (target value) (if (mem-loc? value) (string-append "UU" (mem->scratch value) (scratch->mem target) "DD") (string-append "UUU'" (number->string value) "'D" (scratch->mem target) "DD"))) 'add! (basic-binary-op "+") 'sub! (basic-binary-op "-") 'mul! (basic-binary-op "×") 'div! (basic-binary-op "÷") 'mod! (basic-binary-op "%") 'and! (basic-binary-op "&") 'or! (basic-binary-op "|") 'xor! (lambda (a b) (error "unimplemented")) 'invert! (lambda (a) (string-append "UU" (chr* #\u (mem-loc-value a)) "~" (chr* #\u (mem-loc-value a)) "DD")) 'asm! (lambda args (define op (first args)) (string-append "UU" (apply string-append (map mem->scratch (rest args))) "U" op "DDD")) 'asmgoto! (lambda (branches . args) (error "unimplemented")) 'ifeq (basic-branch-op "=") 'iflt (basic-branch-op "l") 'goto (lambda (a) (branch "" (list a) (list ""))) 'terminate (lambda () (branch "#" '() '())))) ;; export (define (assemble-bb bb) ;; algorithm: maintain the following stacks: input, output, memory, scratch ;; LLIR globals get allocated into memory ;; scratch is used to implement LLIR ops (define (assemble-op op) (match (hash-ref assemblers (first op) #f) [#f (error "target doesn't implement" op)] [func (apply func (rest op))])) (match-define (bbnode contents branch) bb) (bbnode (string-join (map assemble-op contents) "") (assemble-op branch))) (define (merge-bb bb1 bb2) (bbnode (string-append (bbnode-contents bb1) (bbnode-contents bb2)) (bbnode-branch bb2))) ; takes a vector of basic blocks and renders the result (define (link-bb bbvec) ;; represent as sparse 2d matrix (define render (make-hash)) ;; algorithm ;; - each bb gets 3 lines, the first line is the bb, line 2 is normal return, line 3 is ;; branch return (if any) ;; - the left side is a switching matrix that links return lines to the next bb (for ([i (in-naturals)] [bb (in-vector bbvec)]) (define line (string-append (chr* #\space i) ">" (chr* #\space (- (vector-length bbvec) i 1)) (bbnode-contents bb) "v")) ;; render line (for ([j (in-naturals)] [c (in-string line)]) (hash-set! render (cons (* i 3) j) c)) ;; insert returns (hash-set! render (cons (+ (* i 3) 1) (- (string-length line) 1)) #\<) (hash-set! render (cons (+ (* i 3) 2) (- (string-length line) 2)) #\<) (for ([j (in-naturals)] [target (in-list (bbnode-branch bb))]) (hash-set! render (cons (+ (* i 3) 1 j) target) (if (> target i) #\v #\^))) (void)) ;; convert matrix to string (define w (add1 (apply max (map cdr (hash-keys render))))) (define h (add1 (apply max (map car (hash-keys render))))) (string-join (for/list ([y (in-range h)]) (list->string (for/list ([x (in-range w)]) (hash-ref render (cons y x) #\space)))) "\n")) ;;;;;;;; demo (define prog '((global i) (global inc) (global ten) (label main) (set! i 0) (set! ten 10) (set! inc 1) (label loop) (add! i inc) (asm! "o" i) (asm! "o" ten) (ifeq i ten done loop) (label done) (asm! "\"hello world\"o") (terminate))) (displayln (compile prog))