From 02e439fd574b153bb5f58ceebcbbed941a5405be Mon Sep 17 00:00:00 2001 From: haskal Date: Mon, 20 Jul 2020 02:47:58 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=A6=88=20initial=20commit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 + README.md | 7 + compiler.rkt | 408 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 418 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100755 compiler.rkt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0af6c41 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.zo +*.rktd +*.rkt~ diff --git a/README.md b/README.md new file mode 100644 index 0000000..dee6e4b --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# shark compiler + +i wanted to make a framework for extremely retargetable esolang compilation so i made one + +currently this defines a low-level IR and compilation framework for targeting. in the future, there +will be a more usable language that compiles to the LLIR. and polyfills in case targets don't +implement operations, etc diff --git a/compiler.rkt b/compiler.rkt new file mode 100755 index 0000000..e01dcb0 --- /dev/null +++ b/compiler.rkt @@ -0,0 +1,408 @@ +#!/usr/bin/env racket +#lang racket + +;; LLIR +;; defs +;; (global x) +;; (const x) ;; TODO +;; (set! x immediate) +;; +;; the only thing that accepts immediates is set! +;; all other ops need globals or consts +;; all globals, constants, and labels share a single namespace. +;; for example, there cannot be a global named helloworld and a label also named helloworld +;; +;; branching +;; (label x) +;; (goto x) +;; (ifeq a b x y) +;; note: implement ifne by swapping x,y +;; (iflt a b x y) +;; note: implement ifgt by swapping a,b +;; ifle by swapping a,b and x,y +;; ifge by swapping x,y +;; (terminate) +;; +;; arithmetic +;; (add! a b) a += b +;; (sub! a b) a -= b +;; (mul! a b) etc +;; (div! a b) +;; (mod! a b) +;; bitwise +;; (and! a b) +;; (or! a b) +;; (xor! a b) +;; (invert! a) +;; +;; target language +;; passes all args to the target assembler +;; (asm! ...) +;; (asmgoto! [x y] ...) ;; impl-defined branching operation +;; (prelude) implementation-defined prelude + +;; predicates for opcodes + +(define binary-branch-ops (set 'ifeq 'iflt)) +(define (binary-branch-op? x) + (and (symbol? x) (set-member? binary-branch-ops x))) + +(define binary-arith-ops + (set 'add! 'sub! 'mul! 'div! 'mod! 'and! 'or! 'xor!)) +(define (binary-arith-op? x) + (and (symbol? x) (set-member? binary-arith-ops x))) + +(define unary-arith-ops (set 'invert!)) +(define (unary-arith-op? x) + (and (symbol? x) (set-member? unary-arith-ops x))) + + +;; low-level representation of a memory location +(struct mem-loc [value] #:transparent) + +;; basic block node +;; contents: +;; - LLIR list with LLIR branch instruction +;; - target contents with struct branch +;; - target contents with list of branch labels +;; - target contents with list of branch numbers +(struct bbnode [contents branch] #:transparent) + +;; low-level branch representation, with a target node and optional cleanup sequence +;; the setup and cleanup will get converted into basic blocks and merged such that each bb ends +;; directly with a branch +(struct branch [setup targets cleanups] #:transparent) + +;; compiler context +(struct cctx [num-globals] #:transparent) +(define current-cctx (make-parameter #f)) + + +;; validates LLIR for consistency +;; check every used symbol is a defined global or const +;; check every branch points to a valid label +;; validate args on each op +;; check that main label is present, start label is not present +(define (validate src) + (error "TODO")) ;; lmao + + +;; LLIR -> LLIR Int +;; assigns each global a number and returns total number of globals +(define (convert-globals src) + ;; create src without the global declarations + (define new-src (filter (lambda (x) (not (symbol=? (first x) 'global))) src)) + ;; map globals to memory locations + (define globals (for/list ([x (in-list src)] #:when (symbol=? (first x) 'global)) (second x))) + (define global-map + (for/hash ([i (in-naturals)] [glob (in-list globals)]) + (values glob (mem-loc i)))) + + (define (translate x) (hash-ref global-map x)) + + (define (update-arg x) + (if (hash-has-key? global-map x) (translate x) x)) + + (define (update-op op) + (match op + [(list 'set! target value) + (list 'set! (translate target) + (if (symbol? value) (translate value) value))] + [(list (? binary-branch-op? op) a b x y) + (list op (translate a) (translate b) x y)] + [(list (? binary-arith-op? op) a b) + (list op (translate a) (translate b))] + [(list (? unary-arith-op? op) a) + (list op (translate a))] + [(list 'asm! args ...) + (cons 'asm! (map update-arg args))] + [(list 'asmgoto! branches args ...) + (cons 'asmgoto! (cons branches (map update-arg args)))] + [any any])) + + (values (map update-op new-src) (length globals))) + + +;; builds graph of basic blocks +(define (convert-bb src) + (define graph (make-hash)) + + (define (bb-commit! lab blk branch) + (hash-set! graph lab (bbnode blk branch))) + + (define (convert-step this-label this-block src) + (match src + ['() (bb-commit! this-label this-block (list 'terminate))] + [(cons (list 'label lab) rst) + (bb-commit! this-label this-block (list 'goto lab)) + (convert-step lab '() rst)] + [(cons (list 'goto next) rst) + (bb-commit! this-label this-block (list 'goto next)) + (convert-step (gensym) '() rst)] + [(cons (list (? binary-branch-op? op) a b x y) rst) + (bb-commit! this-label this-block (list op a b x y)) + (convert-step (gensym) '() rst)] + [(cons (cons 'asmgoto! args) rst) + (bb-commit! this-label this-block (cons 'asmgoto! args)) + (convert-step (gensym) '() rst)] + [(cons (list 'terminate) rst) + (bb-commit! this-label this-block (list 'terminate)) + (convert-step (gensym) '() rst)] + [(cons fst rst) + (convert-step this-label (append this-block (list fst)) rst)])) + + (define (graph-prune g) + (define (dfs! g seen label) + (unless (set-member? seen label) + (set-add! seen label) + (match (bbnode-branch (hash-ref g label)) + [(list 'terminate) (void)] + [(list 'goto next) (dfs! g seen next)] + [(list (? binary-branch-op? op) _ _ x y) + (dfs! g seen x) + (dfs! g seen y)] + [(list 'asmgoto! branches args) + (for ([next (in-list branches)]) + (dfs! g seen next))]))) + (define marked (mutable-set)) + (dfs! g marked 'start) + (for/hash ([(k v) (in-hash g)] + #:when (set-member? marked k)) + (values k v))) + + ;; generate graph of bb nodes + ;; insert prelude + (convert-step 'start '((prelude)) (cons '(goto main) src)) + + ;; prune unreachable blocks + (graph-prune graph)) + + +;; expands and simplifies target-compiled basic blocks +;; numbers each label and returns a vector of numbered blocks +(define (convert-flat graph) + (define new-graph (make-hash)) + + (for ([(k v) (in-hash graph)]) + (match-define (bbnode contents (branch setup targets cleanups)) v) + (define cleanups-labels (map (lambda (_) (gensym)) cleanups)) + (for ([target (in-list targets)] [cleanup (in-list cleanups)] [label (in-list cleanups-labels)]) + (hash-set! new-graph label (bbnode cleanup (list target)))) + (hash-set! new-graph k (merge-bb + (bbnode contents '()) + (bbnode setup cleanups-labels)))) + + ;; simplifies the bb graph to a smaller number of blocks, if possible + (define (simplify-graph!) + ;; generate reverse edges map + (define graph-rev (make-hash)) + (define (update-rev! label called-by) + (hash-update! graph-rev label (lambda (v) (set-add v called-by)) set)) + + (for ([(k v) (in-hash new-graph)]) + (match-define (bbnode _ branches) v) + (for ([to (in-list branches)]) + (update-rev! to k))) + + ;; for each edge A->B + ;; if this is the only edge out of A and the only edge into B, then merge A and B + (define keys (hash-keys new-graph)) + (for ([key (in-list keys)]) + (define rev (hash-ref graph-rev key set)) + (when (= 1 (set-count rev)) + (define from-key (set-first rev)) + (define from-node (hash-ref new-graph from-key)) + (when (= 1 (length (bbnode-branch from-node))) + ;; call target merge function + (define new-node (merge-bb from-node (hash-ref new-graph key))) + ;; update graphs for merge + (hash-set! new-graph from-key new-node) + (hash-remove! new-graph key) + (hash-remove! graph-rev key) + (for ([next (in-list (bbnode-branch new-node))]) + (hash-update! graph-rev next + (lambda (v) (set-map v (lambda (x) (if (equal? x key) from-key x)))) + set)) + (void))))) + + (simplify-graph!) + + ;; create a vector of keys, ensure that start is at the front + (define new-keys + (list->vector + (let ([tmp (hash-keys new-graph)]) + (cons 'start (filter (lambda (x) (not (symbol=? x 'start))) tmp))))) + (define keys-map (for/hash ([i (in-naturals)] [e (in-vector new-keys)]) (values e i))) + + ;; replace labels with numbers. now we have a low level representation of the target code ready + ;; to pass to link-bb + (for/vector #:length (vector-length new-keys) ([e (in-vector new-keys)]) + (match-define (bbnode contents bl) (hash-ref new-graph e)) + (bbnode contents (map (lambda (x) (hash-ref keys-map x)) bl)))) + + +;; steps: +;; 1. transform globals +;; 2. build bb graph +;; 3. assemble bb contents +;; 4. expand, merge, and flatten graph +;; 5. link + +(define (compile prog) + (define-values (prog-t1 nglobals) (convert-globals prog)) + (parameterize ([current-cctx (cctx nglobals)]) + (define prog-t2 (convert-bb prog-t1)) + (define prog-t3 (for/hash ([(k v) (in-hash prog-t2)]) (values k (assemble-bb v)))) + (define prog-t4 (convert-flat prog-t3)) + (define prog-final (link-bb prog-t4)) + prog-final)) + + +;;;;;;;; targeting +;; target implementation must define +;; - assemble-bb: assembles the contents of one basic block +;; - merge-bb: merges two compiled basic blocks +;; - link-bb: links basic blocks into final output + +;; this targets an apparently befunge-inspired lang called laser +;; (https://github.com/Quintec/LaserLang) + +(define (chr* c n) + (apply string (make-list n c))) + +(define (mem->scratch loc) + (string-append + (chr* #\u (mem-loc-value loc)) + "rs" + (chr* #\d (mem-loc-value loc)))) + +(define (scratch->mem loc) + (string-append + (chr* #\u (mem-loc-value loc)) + "pUwD" + (chr* #\d (mem-loc-value loc)))) + +(define (basic-binary-op op) + (lambda (a b) + (string-append "UU" (mem->scratch a) (mem->scratch b) "U" op "D" (scratch->mem a) "DD"))) + +(define (basic-branch-op op) + (lambda (a b x y) + (branch + (string-append + "UU" (mem->scratch a) (mem->scratch b) "U" op "⌝") + (list y x) + (list "pDDD" "pDDD")))) + +(define assemblers + (hash + 'prelude (lambda () + (string-append "UU" (chr* #\0 (cctx-num-globals (current-cctx))) "DD")) + 'set! (lambda (target value) + (if (mem-loc? value) + (string-append "UU" (mem->scratch value) (scratch->mem target) "DD") + (string-append "UUU'" (number->string value) "'D" (scratch->mem target) "DD"))) + 'add! (basic-binary-op "+") + 'sub! (basic-binary-op "-") + 'mul! (basic-binary-op "×") + 'div! (basic-binary-op "÷") + 'mod! (basic-binary-op "%") + 'and! (basic-binary-op "&") + 'or! (basic-binary-op "|") + 'xor! (lambda (a b) (error "unimplemented")) + 'invert! (lambda (a) + (string-append "UU" + (chr* #\u (mem-loc-value a)) + "~" + (chr* #\u (mem-loc-value a)) + "DD")) + 'asm! (lambda args + (define op (first args)) + (string-append + "UU" + (apply string-append (map mem->scratch (rest args))) + "U" op "DDD")) + 'asmgoto! (lambda (branches . args) (error "unimplemented")) + 'ifeq (basic-branch-op "=") + 'iflt (basic-branch-op "l") + 'goto (lambda (a) (branch "" (list a) (list ""))) + 'terminate (lambda () (branch "#" '() '())))) + +;; export + +(define (assemble-bb bb) + ;; algorithm: maintain the following stacks: input, output, memory, scratch + ;; LLIR globals get allocated into memory + ;; scratch is used to implement LLIR ops + (define (assemble-op op) + (match (hash-ref assemblers (first op) #f) + [#f (error "target doesn't implement" op)] + [func (apply func (rest op))])) + (match-define (bbnode contents branch) bb) + (bbnode (string-join (map assemble-op contents) "") (assemble-op branch))) + + +(define (merge-bb bb1 bb2) + (bbnode (string-append (bbnode-contents bb1) (bbnode-contents bb2)) (bbnode-branch bb2))) + + +; takes a vector of basic blocks and renders the result +(define (link-bb bbvec) + ;; represent as sparse 2d matrix + (define render (make-hash)) + ;; algorithm + ;; - each bb gets 3 lines, the first line is the bb, line 2 is normal return, line 3 is + ;; branch return (if any) + ;; - the left side is a switching matrix that links return lines to the next bb + (for ([i (in-naturals)] [bb (in-vector bbvec)]) + (define line + (string-append + (chr* #\space i) + ">" + (chr* #\space (- (vector-length bbvec) i 1)) + (bbnode-contents bb) + "v")) + ;; render line + (for ([j (in-naturals)] [c (in-string line)]) + (hash-set! render (cons (* i 3) j) c)) + ;; insert returns + (hash-set! render (cons (+ (* i 3) 1) (- (string-length line) 1)) #\<) + (hash-set! render (cons (+ (* i 3) 2) (- (string-length line) 2)) #\<) + (for ([j (in-naturals)] [target (in-list (bbnode-branch bb))]) + (hash-set! render + (cons (+ (* i 3) 1 j) + target) + (if (> target i) #\v #\^))) + (void)) + + ;; convert matrix to string + (define w (add1 (apply max (map cdr (hash-keys render))))) + (define h (add1 (apply max (map car (hash-keys render))))) + (string-join + (for/list ([y (in-range h)]) + (list->string + (for/list ([x (in-range w)]) + (hash-ref render (cons y x) #\space)))) + "\n")) + + +;;;;;;;; demo + +(define prog + '((global i) + (global inc) + (global ten) + (label main) + (set! i 0) + (set! ten 10) + (set! inc 1) + (label loop) + (add! i inc) + (asm! "o" i) + (asm! "o" ten) + (ifeq i ten done loop) + (label done) + (asm! "\"hello world\"o") + (terminate))) + +(displayln (compile prog))