reimplement bc compiler to utilize a simpler intermediate repr

This commit is contained in:
tali 2023-12-13 16:40:44 -05:00
parent 898cf7380c
commit dd27dc04d2
6 changed files with 450 additions and 230 deletions

View File

@ -5,10 +5,10 @@ let () =
Logs.set_level (Some Logs.Debug); Logs.set_level (Some Logs.Debug);
try try
let ast = parse "val x = 1 fun f() g() + x fun g() 5 println(f())" in let ast = parse "fun f() 3 val x = f() + 1" in
Logs.debug (fun m -> m "%a" Ast.pp_modl ast); Logs.debug (fun m -> m "%a" Ast.pp_modl ast);
let prog = compile ast in let prog = compile ast in
Logs.debug (fun m -> m "%a" Code.pp_program prog); Logs.debug (fun m -> m "%a" Code.pp_funct prog.main);
let modl = run prog in let modl = run prog in
Logs.debug (fun m -> m "%a" Value.pp modl) Logs.debug (fun m -> m "%a" Value.pp modl)
with Error msg -> Logs.err (fun m -> m "%s" msg) with Error msg -> Logs.err (fun m -> m "%s" msg)

View File

@ -10,186 +10,139 @@ let compile_error f =
let off (Code.R i) k = Code.R (i + k) let off (Code.R i) k = Code.R (i + k)
let suc r = off r 1 let suc r = off r 1
module Env = struct let undef_method =
type t = Value.Native_function
| Empty (* TODO: remove me *) (fun _ -> failwith "BUG: method undefined")
| Cons of t * t
| Obj of { self : Code.reg;
elems : (string, Value.elem) Hashtbl.t }
let rec find name = function let rec compile_lambda (lam : Ir.lambda) =
| Empty -> raise Not_found let entrypoint = Code.make_block () in
| Cons (e1, e2) -> let currb = ref entrypoint in
begin
try find name e2
with Not_found -> find name e1
end
| Obj { self; elems } ->
self, Hashtbl.find elems name
end
let compile modl lib =
let ep = Code.make_block () in
let currb = ref ep in
let emit i = Code.extend !currb i in let emit i = Code.extend !currb i in
let enter b = currb := b in let enter b = currb := b in
let rec compile_exp env rd = function let reg_of_id = Hashtbl.create 128 in
| Ast.Literal (Int n) -> emit (LDI (rd, Int n)) let set_reg id r =
| Ast.Literal True -> emit (LDI (rd, True)) if Hashtbl.mem reg_of_id id then failwith "BUG: id reassigned";
| Ast.Literal False -> emit (LDI (rd, False)) Hashtbl.add reg_of_id id r
| Ast.Literal Nil -> emit (LDI (rd, Nil)) in
let get_reg id =
try Hashtbl.find reg_of_id id with
Not_found -> failwith "BUG: id unassigned"
in
| Ast.Path path -> let rec emit_exp sp = function
let obj, loc = compile_path env rd path in | Ir.Var id ->
emit (GET (rd, obj, loc)) get_reg id
| Ast.Binop (op, e1, e2) -> | Ir.Let (id, rhs, bdy) ->
let r1 = rd in emit_exp_s sp rhs;
let r2 = suc rd in set_reg id sp;
compile_exp env r1 e1; emit_exp (suc sp) bdy
compile_exp env r2 e2;
begin match op with
| Ast.Add -> emit (ADD (rd, r1, r2))
| Ast.Sub -> emit (SUB (rd, r1, r2))
| Ast.Mul -> emit (MUL (rd, r1, r2))
| Ast.Div | Ast.Mod -> failwith "Bcc.compile_exp: TODO(Div,Mod)"
| Ast.Eql -> emit (EQL (rd, r1, r2))
| Ast.Grt -> emit (GRT (rd, r1, r2))
| Ast.Lst -> emit (LST (rd, r1, r2))
| Ast.Not_eql -> emit (EQL (r1, r1, r2)); emit (NOT (rd, r1))
| Ast.Lst_eql -> emit (GRT (r1, r1, r2)); emit (NOT (rd, r1))
| Ast.Grt_eql -> emit (LST (r1, r1, r2)); emit (NOT (rd, r1))
end
| Ast.Call (fn, args) -> | Ir.Seq (e1, e2) ->
let obj, mth = compile_path env rd fn in emit_exp sp e1 |> ignore;
emit_exp sp e2
| ir ->
emit_exp_s sp ir;
sp
and emit_exp_s sp = function
| Ir.Lit im ->
emit (LDI (sp, im))
| Ir.Get path ->
let obj, loc = emit_path sp path in
emit (GET (sp, obj, loc))
| Ir.Set (path, rhs) ->
let obj, loc = emit_path sp path in
let rv = emit_exp (suc sp) rhs in
emit (SET (rv, obj, loc))
| Ir.Seq (e1, e2) ->
emit_exp sp e1 |> ignore;
emit_exp_s sp e2
| Ir.If (e0, e1, e2) ->
let b1 = Code.make_block () in
let b2 = Code.make_block () in
let b3 = Code.make_block () in
let c = emit_exp sp e0 in
emit (CBR (c, b1, b2));
enter b1; emit_exp_s sp e1; emit (JMP b3);
enter b2; emit_exp_s sp e2; emit (JMP b3);
enter b3
| Ir.Uop (op, e1) ->
let r1 = emit_exp sp e1 in
emit (match op with
| Not -> NOT (sp, r1))
| Ir.Bop (op, e1, e2) ->
let r1 = emit_exp_s sp e1; sp in
let r2 = emit_exp sp e2 in
emit (match op with
| Add -> ADD (sp, r1, r2)
| Sub -> ADD (sp, r1, r2)
| Mul -> ADD (sp, r1, r2)
| Div -> failwith "Bcc.compile_exp: TODO(Bop(Div))"
| Mod -> failwith "Bcc.compile_exp: TODO(Bop(Mod))"
| Eql -> EQL (sp, r1, r2)
| Grt -> GRT (sp, r1, r2)
| Lst -> LST (sp, r1, r2))
| Ir.Call (fn, args) ->
let obj, mth = emit_path sp fn in
let args = let args =
List.mapi List.mapi
(fun i arg -> (fun i arg ->
let ri = off mth (i + 1) in let rv = off mth (i + 1) in
compile_exp env ri arg; emit_exp_s rv arg; rv)
ri)
args args
in in
emit (CAL (rd, obj, mth, args)) emit (CAL (sp, obj, mth, args))
| Ast.If (e0, e1, e2) -> | Ir.Obj { vals; funs } ->
let r0 = rd in let n_slots = List.length vals in
let b1 = Code.make_block () in let elems = Hashtbl.create (List.length vals + List.length funs) in
let b2 = Code.make_block () in let mthds = Array.make (List.length funs) undef_method in
compile_exp env r0 e0;
emit (CBR (r0, b1, b2));
let jp = Code.make_block () in
enter b1; compile_exp env rd e1; emit (JMP jp);
enter b2; compile_exp env rd e2; emit (JMP jp);
enter jp
| Ast.Fun (_, _) -> List.iteri
failwith "Bcc.compile_exp: TODO(Fun)" (fun i name ->
Hashtbl.add elems name (Value.Field i))
vals;
| Ast.Obj items -> List.iteri
ignore (compile_block env rd items) (fun i (name, lambda) ->
Hashtbl.add elems name (Value.Method i);
mthds.(i) <- Code.Function (compile_lambda lambda))
funs;
| Ast.Scope items -> emit (CON (sp, { n_slots; elems; mthds }))
begin match compile_block env rd items with
| Some r -> emit (LDR (rd, r))
| None -> compile_error "scope does not end with an expression"
end
and compile_path env rd path = | ir ->
match path with let rv = emit_exp sp ir in
| Ast.Var name -> if rv <> sp then emit (LDR (sp, rv))
let obj, ele =
try Env.find name env
with Not_found ->
compile_error "unbound variable %S" name
in
let loc = rd in
emit (LDI (loc, Value.of_elem ele));
obj, loc
| Ast.Ele (lhs, name) ->
let obj = rd in
let loc = suc rd in
compile_exp env obj lhs;
emit (LOC (loc, obj, name));
obj, loc
and compile_block env rd items = and emit_path sp (obj, fld) =
let elems = Hashtbl.create 100 in let obj = get_reg obj in
let n_vals, _, funs_rev = let loc = sp in
List.fold_left emit (LOC (loc, obj, fld));
(fun (nv, nf, fns) -> function obj, loc
| Ast.Item_exp _ -> nv, nf, fns
| Ast.Item_val (name, _)
| Ast.Item_obj (name, _) ->
Hashtbl.add elems name (Value.Field nv);
nv + 1, nf, fns
| Ast.Item_fun (name, params, body) ->
Hashtbl.add elems name (Value.Method nf);
nv, nf + 1, (name, params, body) :: fns)
(0, 0, [])
items
in
let prevb = !currb in
let mthds =
let clo = Code.R 0 in
let env = Env.Obj { self = clo; elems } in
List.rev_map
(fun (_, params, body) ->
if params <> [] then
failwith "Bcc.compile_block: TODO(params)";
let ep = Code.make_block () in
enter ep;
let rv = Code.R 1 in
compile_exp env rv body;
emit (RET rv);
Code.Method { n_args = 0; body = { Code.entry = ep } })
funs_rev
|> Array.of_list
in
enter prevb;
emit (CON (rd, { n_slots = n_vals; elems; mthds }));
let r0 = suc rd in
let r1 = suc r0 in
let env = Env.Cons (env, Env.Obj { self = rd; elems }) in
List.fold_left
(fun _ -> function
| Ast.Item_exp exp ->
compile_exp env r0 exp;
Some r0
| Ast.Item_val (name, exp) ->
let el = Hashtbl.find elems name in
emit (LDI (r0, Value.of_elem el));
compile_exp env r1 exp;
emit (SET (r1, rd, r0));
None
| Ast.Item_obj (name, body) ->
(* TODO: it would be ideal if we could CONstruct the empty versions of obj's in
a sort of "pre-init" phase, before assigning field values. but for now, obj
items are identical to val's where the rhs is an obj expression. *)
let el = Hashtbl.find elems name in
emit (LDI (r0, Value.of_elem el));
compile_block env r1 body |> ignore;
emit (SET (r1, rd, r0));
None
| Ast.Item_fun (_, _, _) ->
(* already handled previously *)
None)
None
items
in in
let init_env = set_reg lam.self (Code.R 0);
let elems = Hashtbl.create 100 in if lam.args <> [] then
List.iteri (fun i (name, _) -> Hashtbl.add elems name (Value.Method i)) lib; failwith "Bcc.compile: TODO(lambda.args)";
Env.Obj { self = R 0; elems } (* if lam.clos <> [] then *)
in (* failwith "Bcc.compile: TODO(lambda.clos)"; *)
let rv = Code.R 1 in
compile_block init_env rv modl.Ast.items |> ignore; let sp = Code.R 1 in
let rv = emit_exp sp lam.body in
emit (RET rv); emit (RET rv);
{ Code.entry = ep } Code.make_funct
(List.length lam.args)
entrypoint

239
lib/compile/ir.ml Normal file
View File

@ -0,0 +1,239 @@
module Ast = Spice_syntax.Ast
module Value = Spice_runtime.Value
exception Error of string
let compile_error f =
Fmt.kstr (fun msg -> raise (Error msg)) f
type imm = Value.t
type id = Id of int [@@unboxed]
type uop =
| Not
and bop =
| Add
| Sub
| Mul
| Div
| Mod
| Eql
| Grt
| Lst
type exp =
| Lit of Value.t
| Var of id
| Get of path
| Set of path * exp
| Let of id * exp * exp
| Seq of exp * exp
| If of exp * exp * exp
| Uop of uop * exp
| Bop of bop * exp * exp
| Call of path * exp list
| Obj of obj
and path = id * string
and obj = {
vals : string list;
funs : (string * lambda) list;
(* clos : id list; *)
}
and lambda = {
self : id;
args : id list;
body : exp;
}
(* lower *)
let make_id_dispenser () =
let i = ref (-1) in fun () -> (incr i; Id !i)
module Env = struct
type t =
| Empty
| Cons of t * t
| Args of (string * id) list
| Obj of {
self : id;
elems : string list;
}
let rec find name = function
| Empty -> raise Not_found
| Args args ->
List.assoc name args, None
| Cons (e1, e2) ->
begin
try find name e1 with
Not_found -> find name e2
end
| Obj { self; elems } ->
if List.mem name elems then
self, Some name
else
raise Not_found
end
let seq_r a b = Seq (b, a)
let lower ~lib (modl : Ast.modl) =
let new_id = make_id_dispenser () in
let rec lower_exp env = function
| Ast.Literal (Int n) -> Lit (Int n)
| Ast.Literal True -> Lit True
| Ast.Literal False -> Lit False
| Ast.Literal Nil -> Lit Nil
| Ast.Path path ->
lower_path env path
(function
| `Var id -> Var id
| `Get (obj, fld) -> Get (obj, fld))
| Ast.Binop (op, e1, e2) ->
let not e = Uop (Not, e) in
let bop, uop = match op with
| Ast.Add -> Add, Fun.id
| Ast.Sub -> Sub, Fun.id
| Ast.Mul -> Mul, Fun.id
| Ast.Div -> Div, Fun.id
| Ast.Mod -> Mod, Fun.id
| Ast.Eql -> Eql, Fun.id
| Ast.Grt -> Grt, Fun.id
| Ast.Lst -> Lst, Fun.id
| Ast.Not_eql -> Eql, not
| Ast.Grt_eql -> Lst, not
| Ast.Lst_eql -> Grt, not
in
uop (Bop (bop, lower_exp env e1, lower_exp env e2))
| Ast.Call (fn, args) ->
lower_path env fn
(fun fn ->
let fn_path = match fn with
| `Var _ -> failwith "Ir.lower_exp: TODO(fcf calls)"
| `Get (obj, mth) -> (obj, mth)
in
let args = List.map (lower_exp env) args in
Call (fn_path, args))
| Ast.If (e1, e2, e3) ->
If (lower_exp env e1, lower_exp env e2, lower_exp env e3)
| Ast.Obj items ->
lower_block env items
| Ast.Scope items ->
lower_block env items ~is_scope:true
| Ast.Fun (_, _) ->
failwith "Ir.lower_exp: TODO(Fun)"
and lower_path env path k =
match path with
| Ast.Ele (obj, fld) ->
let rhs = lower_exp env obj in
let lhs = new_id () in
Let (lhs, rhs, k (`Get (lhs, fld)))
| Ast.Var name ->
match Env.find name env with
| id, None -> k (`Var id)
| obj, Some fld -> k (`Get (obj, fld))
| exception Not_found ->
compile_error "unbound variable %S" name
and lower_block ?(is_scope = false) env items =
(* collect names of bindings to form the new environment; also check if a scope ends
with an expression, if not then it is an error *)
let elems, ends_with_exp =
List.fold_left
(fun (elems, _) -> function
| Ast.Item_exp _ -> elems, true
| Ast.Item_val (name, _)
| Ast.Item_obj (name, _)
| Ast.Item_fun (name, _, _) -> name :: elems, false)
([], false)
items
in
if is_scope && not ends_with_exp then
compile_error "scope does not end in expression";
(* build environment for field initializers; NOT for lambda capture *)
let self = new_id () in
let env_in = Env.Cons (Obj { self; elems }, env) in
let funs_r, vals_r, inits_r =
List.fold_left
(fun (fns, vls, ins) -> function
| Ast.Item_exp exp ->
let init = lower_exp env_in exp in
fns, vls, init :: ins
| Ast.Item_val (name, exp) ->
let init = Set ((self, name), lower_exp env_in exp) in
fns, name :: vls, init :: ins
| Ast.Item_obj (name, items) ->
(* TODO: it would be ideal if we could construct the empty versions of obj's
in a sort of "pre-init" phase, before assigning field values. but for now,
obj items are identical to val's where the rhs is an obj expression. *)
let init = Set ((self, name), lower_block env_in items) in
fns, name :: vls, init :: ins
| Ast.Item_fun (name, args, body) ->
let fn = name, compile_lambda env args body in
fn :: fns, vls, ins)
([], [], [])
items
in
(* TODO: closure conversion *)
(* if [is_scope], return the last expr, otherwise return the object itself *)
let ret, inits_r = match is_scope, inits_r with
| true, init :: inits -> init, inits
| _, inits -> Var self, inits
in
(* reverse order of inits and decls since they are cons'ed backwards *)
Let (
self,
Obj {
funs = List.rev funs_r;
vals = List.rev vals_r;
},
List.fold_left
(fun a b -> Seq (b, a))
ret
inits_r
)
and compile_lambda env args body =
let self = new_id () in
if args <> [] then
failwith "Ir.compile_lambda: TODO(args non-empty)";
(* FIXME: capture environment *)
let env = ignore env; Env.Empty in
let args = [] in
let body = lower_exp env body in
{ self; args; body }
in
let self = new_id () in
let env =
(* TODO: lib entries *)
let _ = lib in
Env.Empty
in
let args = [] in
let body = lower_block env modl.items in
{ self; args; body }

View File

@ -32,67 +32,82 @@ type ins =
and block = and block =
{ mutable ins_list_rev : ins list } { mutable ins_list_rev : ins list }
let registers = function
| JMP _ -> []
| RET r
| LDI (r, _)
| CON (r, _)
| CBR (r, _, _) -> [r]
| LDR (r1, r2)
| NOT (r1, r2)
| LOC (r1, r2, _) -> [r1; r2]
| ADD (r1, r2, r3)
| SUB (r1, r2, r3)
| MUL (r1, r2, r3)
| LST (r1, r2, r3)
| GRT (r1, r2, r3)
| EQL (r1, r2, r3)
| GET (r1, r2, r3)
| SET (r1, r2, r3) -> [r1; r2; r3]
| CAL (r1, r2, r3, rs) -> r1::r2::r3::rs
let make_block () = let make_block () =
{ ins_list_rev = [] } { ins_list_rev = [] }
let extend t ins = let extend b ins =
t.ins_list_rev <- ins :: t.ins_list_rev b.ins_list_rev <- ins :: b.ins_list_rev
let instructions t = let instructions b =
List.rev t.ins_list_rev List.rev b.ins_list_rev
let iter_blocks_df f b0 =
type prog = let stack = ref [ b0 ] in
{ entry : block } let visited = ref !stack in
type Value.mthd +=
| Method of { n_args : int; body : prog }
let frame_size t =
let queue = ref [ t.entry ] in
let visited = ref !queue in
let enqueue b = let enqueue b =
if not (List.memq b !visited) then ( if not (List.memq b !visited) then (
queue := b :: !queue; stack := b :: !stack;
visited := b :: !visited) visited := b :: !visited)
in in
let meas (R i) fs = max fs (i + 1) in let visit b =
let meas_ins fs = function f b;
| RET r (* NOTE: only [List.hd b.ins_list_rev] should be a branching instruction, so iterating
| LDI (r, _) the whole list is pointless. but just to be safe ... *)
| CON (r, _) List.iter
-> meas r fs (function
| LDR (r1, r2) | JMP b1 -> enqueue b1
| NOT (r1, r2) | CBR (_, b1, b2) -> enqueue b1; enqueue b2
| LOC (r1, r2, _) | _ -> ())
-> meas r1 (meas r2 fs) b.ins_list_rev
| ADD (r1, r2, r3)
| SUB (r1, r2, r3)
| MUL (r1, r2, r3)
| LST (r1, r2, r3)
| GRT (r1, r2, r3)
| EQL (r1, r2, r3)
| GET (r1, r2, r3)
| SET (r1, r2, r3)
-> meas r1 (meas r2 (meas r3 fs))
| CAL (r1, r2, r3, rs) ->
List.fold_right meas (r1::r2::r3::rs) fs
| JMP b ->
enqueue b;
fs
| CBR (r, b1, b2) ->
enqueue b1;
enqueue b2;
meas r fs
in in
let rec loop fs = while !stack <> [] do
match !queue with visit (List.hd !stack);
| [] -> fs stack := List.tl !stack;
| bl :: rest -> done
queue := rest;
loop (List.fold_left meas_ins fs bl.ins_list_rev)
type funct =
{ n_args : int;
frame_size : int;
entry : block }
type Value.mthd +=
| Function of funct
let make_funct n_args entry =
let frame_size =
let fsize = ref (n_args + 1) in
iter_blocks_df
(fun b ->
fsize :=
List.rev_map registers b.ins_list_rev
|> List.flatten
|> List.fold_left (fun fs (R i) -> max fs (i + 1))
!fsize)
entry;
!fsize
in in
loop 1 { n_args; frame_size; entry }
(* pretty printing *) (* pretty printing *)
@ -135,9 +150,9 @@ let pp_ins ~label ppf = function
let l2 = label b2 in let l2 = label b2 in
Fmt.pf ppf "cbr %a, %s, %s" pp_reg a l1 l2 Fmt.pf ppf "cbr %a, %s, %s" pp_reg a l1 l2
let pp_program ppf prog = let pp_funct ppf { entry; _ } =
let basic_blocks = ref [ prog.entry, "START" ] in let basic_blocks = ref [ entry, "START" ] in
let work_list = ref [ prog.entry ] in let work_list = ref [ entry ] in
let label bb = let label bb =
try List.assq bb !basic_blocks try List.assq bb !basic_blocks
with Not_found -> with Not_found ->

View File

@ -92,13 +92,8 @@ let rec exec ({ r; _ } as fr) = function
and call mthd self args = and call mthd self args =
match mthd with match mthd with
| Code.Method { n_args; body } -> | Code.Function fn -> run fn self args
if List.length args <> n_args then | _ -> Value.call mthd self args
runtime_error "wrong number of arguments, expected %d" n_args;
run body self (* args *)
| _ ->
Value.call mthd self args
and step fr = and step fr =
match fr.pc with match fr.pc with
@ -108,11 +103,18 @@ and step fr =
exec fr i; exec fr i;
step fr step fr
and run prog self (* args *) = and run fn self args =
let r = Array.make (Code.frame_size prog) Value.Nil in let Code.{ n_args; frame_size; entry } = fn in
let fr = { r; pc = []; rv = Nil } in if List.length args <> n_args then
runtime_error "wrong number of arguments, expected %d, got %d"
n_args (List.length args);
let r = Array.make frame_size Value.Nil in
r.(0) <- self; r.(0) <- self;
jmp fr prog.entry; List.iteri (fun i v -> r.(i + 1) <- v) args;
let fr = { r; pc = []; rv = Nil } in
jmp fr entry;
step fr; step fr;
fr.rv fr.rv

View File

@ -18,12 +18,23 @@ let parse input =
| Parser.Error -> failf "syntax error" | Parser.Error -> failf "syntax error"
| Lexer.Error msg -> failf "syntax error: %s" msg | Lexer.Error msg -> failf "syntax error: %s" msg
type program =
{ main : Code.funct }
let compile ast = let compile ast =
try Bcc.compile ast Std.lib try
with Bcc.Error msg -> {
main =
Ir.lower ast ~lib:Std.lib
|> Bcc.compile_lambda
}
with Ir.Error msg ->
failf "compile error: %s" msg failf "compile error: %s" msg
let run prog = let run { main } =
try Interp.run prog (Value.native_lib Std.lib) try
let lib = Value.native_lib Std.lib in
let args = [] in
Interp.run main lib args
with Interp.Runtime_error msg -> with Interp.Runtime_error msg ->
failf "runtime error: %s" msg failf "runtime error: %s" msg