From 466b224e653b2237af3bc4d649688c26529e3f83 Mon Sep 17 00:00:00 2001 From: tali Date: Thu, 23 Nov 2023 23:06:13 -0500 Subject: [PATCH] spice lang parser and lexer --- .gitignore | 1 + bin/dune | 4 ++ bin/main.ml | 6 +++ dune-project | 12 ++++++ lib/dune | 19 +++++++++ lib/error.ml | 8 ++++ lib/lexer.mll | 71 ++++++++++++++++++++++++++++++ lib/parser.mly | 72 +++++++++++++++++++++++++++++++ lib/spice.ml | 7 +++ lib/syn.ml | 114 +++++++++++++++++++++++++++++++++++++++++++++++++ spice.opam | 24 +++++++++++ 11 files changed, 338 insertions(+) create mode 100644 .gitignore create mode 100644 bin/dune create mode 100644 bin/main.ml create mode 100644 dune-project create mode 100644 lib/dune create mode 100644 lib/error.ml create mode 100644 lib/lexer.mll create mode 100644 lib/parser.mly create mode 100644 lib/spice.ml create mode 100644 lib/syn.ml create mode 100644 spice.opam diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9c5f578 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +_build \ No newline at end of file diff --git a/bin/dune b/bin/dune new file mode 100644 index 0000000..4d891a5 --- /dev/null +++ b/bin/dune @@ -0,0 +1,4 @@ +(executable + (public_name spicec) + (name main) + (libraries spice fmt logs)) diff --git a/bin/main.ml b/bin/main.ml new file mode 100644 index 0000000..e3a6f67 --- /dev/null +++ b/bin/main.ml @@ -0,0 +1,6 @@ +let () = + try + let modl = Spice.parse "fun main(x) { val y = x + 2 IO.print(y * x) }" in + Format.fprintf Format.std_formatter "%a\n" Spice.Syn.pp_modl modl + with Spice.Error.Error err -> + Format.fprintf Format.err_formatter "error: %a\n" Spice.Error.pp err diff --git a/dune-project b/dune-project new file mode 100644 index 0000000..d220f71 --- /dev/null +++ b/dune-project @@ -0,0 +1,12 @@ +(lang dune 3.8) +(using menhir 2.0) +(name spice) + +(generate_opam_files true) + +(package + (name spice) + (synopsis "Simple language") + (description "Compiler and interpeter for the spice language") + (depends ocaml dune) + (tags (topics "interpreter" "compiler"))) diff --git a/lib/dune b/lib/dune new file mode 100644 index 0000000..dd2f8c1 --- /dev/null +++ b/lib/dune @@ -0,0 +1,19 @@ +(library + (name spice)) + +(menhir + (modules parser) + (flags --unused-tokens)) + +(ocamllex + (modules lexer)) + +; (menhir +; (modules parser)) + +; (ocamllex lexer) + +; (library +; (name json_parser) +; (modules parser lexer json) +; (libraries core)) diff --git a/lib/error.ml b/lib/error.ml new file mode 100644 index 0000000..8e5153a --- /dev/null +++ b/lib/error.ml @@ -0,0 +1,8 @@ +type t = Syntax of string + +exception Error of t + +let raise_syntax_error msg = raise (Error (Syntax msg)) + +let pp ppf = function + | Syntax why -> Format.fprintf ppf "syntax error: %s" why diff --git a/lib/lexer.mll b/lib/lexer.mll new file mode 100644 index 0000000..175a417 --- /dev/null +++ b/lib/lexer.mll @@ -0,0 +1,71 @@ +{ +open Parser + +let keywords = Hashtbl.create 17 +let _ = begin + Hashtbl.add keywords "true" Kw_true; + Hashtbl.add keywords "false" Kw_false; + Hashtbl.add keywords "nil" Kw_nil; + Hashtbl.add keywords "val" Kw_val; + Hashtbl.add keywords "fun" Kw_fun; + Hashtbl.add keywords "obj" Kw_obj; +end + +let is_digit ch = ch >= '0' && ch <= '9' +let is_digit_or_sep ch = ch = '_' || is_digit ch + +let is_integer str = + String.exists is_digit str && + String.for_all is_digit_or_sep str + +let parse_integer str = + let buffer = Buffer.create (String.length str) in + String.iter (fun ch -> + if is_digit ch then + Buffer.add_char buffer ch) + str; + if Buffer.length buffer > 18 then + Error.raise_syntax_error "integer literal is too long"; + Int64.of_string (Buffer.contents buffer) + +let identifier str = + match Hashtbl.find keywords str with + | kw -> kw + | exception Not_found -> + if is_integer str then + Int (parse_integer str) + else + Name str +} + +let int = ['0'-'9'] ['0'-'9']* +let white = [' ' '\t']+ +let newline = '\n' +let id = ['a'-'z' 'A'-'Z' '0'-'9' '_']+ + +rule read = + parse + | white { read lexbuf } + | newline { Lexing.new_line lexbuf; read lexbuf } + | id { identifier (Lexing.lexeme lexbuf) } + | "(" { LP } + | ")" { RP } + | "{" { LC } + | "}" { RC } + | "+" { Binop Add } + | "-" { Binop Sub } + | "*" { Binop Mul } + | "/" { Binop Div } + | "%" { Binop Mod } + | "==" { Binop Eql } + | "=" { Eq } + | ">" { Binop Grt } + | "<=" { Binop Lst_eql } + | "<" { Binop Lst } + | ">=" { Binop Grt_eql } + | "!=" { Binop Not_eql } + | "." { Dot } + | "," { Com } + | _ { Error.raise_syntax_error "unrecognized character" } + | eof { EOF } + diff --git a/lib/parser.mly b/lib/parser.mly new file mode 100644 index 0000000..cffc122 --- /dev/null +++ b/lib/parser.mly @@ -0,0 +1,72 @@ +%token Int +%token Name +%token Binop + +%token LP "(" RP ")" +%token LC "{" RC "}" +%token Eq "=" +%token Dot "." +%token Com "," + +%token Kw_true "true" +%token Kw_false "false" +%token Kw_nil "nil" +%token Kw_val "val" +%token Kw_fun "fun" +%token Kw_obj "obj" + +%token EOF +%start modl + +%{ + open Syn + + let maybe_call prefix = function + | None -> Path prefix + | Some args -> Call (prefix, args) + + let infix_base e0 = (e0, []) + let infix_app (e0, ops) op e2 = (e0, (op, e2) :: ops) + + let resolve_fixity (e0, ops) = + List.fold_right + (fun (op, e2) e1 -> Binop (op, e1, e2)) + ops e0 +%} +%% + +modl: + | items = list(item); EOF { { items } } + +item: + | e = exp { Item_exp e } + | "val"; n = Name; "="; e = exp { Item_val (n, e) } + | "fun"; n = Name; p = params; e = exp { Item_fun (n, p, e) } + | "obj"; n = Name; b = block { Item_obj (n, b) } + +exp: + | e = infixexp { resolve_fixity e } + | "fun"; p = params; e = exp { Fun (p, e) } + | "obj"; b = block { Obj b } + +infixexp: + | e0 = singleexp { infix_base e0 } + | e1 = infixexp; op = Binop; e2 = singleexp { infix_app e1 op e2 } + +singleexp: + | i = Int { Literal (Int i) } + | p = path; a = option(args) { maybe_call p a } + | b = block { Scope b } + +path: + | x = Name { Var x } + | e = singleexp; "."; x = Name { Ele (e, x) } + +params: "("; list = trailing_list(Name); ")" { list } +args: "("; list = trailing_list(exp); ")" { list } +block: "{"; list = list(item); "}" { list } + +trailing_list(X): + | { [] } + | x = X { [x] } + | x = X; ","; xs = trailing_list(X) { x :: xs } diff --git a/lib/spice.ml b/lib/spice.ml new file mode 100644 index 0000000..3414e8a --- /dev/null +++ b/lib/spice.ml @@ -0,0 +1,7 @@ +module Error = Error +module Syn = Syn + +let parse input = + let lexbuf = Lexing.from_string input ~with_positions:true in + try Parser.modl Lexer.read lexbuf + with Parser.Error -> Error.raise_syntax_error "bad syntax" diff --git a/lib/syn.ml b/lib/syn.ml new file mode 100644 index 0000000..8aced94 --- /dev/null +++ b/lib/syn.ml @@ -0,0 +1,114 @@ +type name = string + +type literal = + | Int of int64 + | True + | False + | Nil + +type binop = + | Add + | Sub + | Mul + | Div + | Mod + | Eql + | Grt + | Lst + | Not_eql + | Grt_eql + | Lst_eql + +type modl = { items : item list } + +and item = + | Item_exp of exp + | Item_val of name * exp + | Item_fun of name * params * exp + | Item_obj of name * block + +and params = name list + +and exp = + | Literal of literal + | Path of path + | Call of path * exp list + | Binop of binop * exp * exp + | Fun of params * exp + | Obj of block + | Scope of block + +and path = + | Var of name + | Ele of exp * name + +and block = item list + +(* pretty printer *) + +let string_of_literal = function + | Int n -> Int64.to_string n + | True -> "true" + | False -> "false" + | Nil -> "nil" + +let string_of_binop = function + | Add -> "+" + | Sub -> "-" + | Mul -> "*" + | Div -> "/" + | Mod -> "%" + | Eql -> "==" + | Grt -> ">" + | Lst -> "<" + | Not_eql -> "!=" + | Grt_eql -> ">=" + | Lst_eql -> "<=" + +let pf = Format.fprintf +let pp_str ppf str = pf ppf "%S" str + +let pp_list pp_ele ppf list = + pf ppf "["; + List.iteri + (fun i ele -> + if i > 0 then pf ppf ","; + pp_ele ppf ele) + list; + pf ppf "]" + +let rec pp_exp ppf = function + | Literal (Int n) -> pf ppf "{\"int\":%s}" (Int64.to_string n) + | Literal l -> pf ppf "%s" (string_of_literal l) + | Path (Var x) -> pf ppf "{\"var\":%S}" x + | Path (Ele (e, x)) -> pf ppf "{\"ele\":%a,\"field\":%S}" pp_exp e x + | Call (fn, args) -> pf ppf "{\"call\":%a}" (pp_list pp_exp) (Path fn :: args) + | Binop (op, e1, e2) -> + pf + ppf + "{\"binop\":%S,\"lhs\":%a,\"rhs\":%a}" + (string_of_binop op) + pp_exp + e1 + pp_exp + e2 + | Fun (params, body) -> + pf ppf "{\"fun\":%a,\"body\":%a}" (pp_list pp_str) params pp_exp body + | Obj body -> pf ppf "{\"obj\":%a}" (pp_list pp_item) body + | Scope body -> pf ppf "{\"scope\":%a}" (pp_list pp_item) body + +and pp_item ppf = function + | Item_exp e -> pf ppf "{\"exp\":%a}" pp_exp e + | Item_val (name, rhs) -> pf ppf "{\"val\":%S,\"rhs\":%a}" name pp_exp rhs + | Item_fun (name, params, body) -> + pf + ppf + "{\"fun\":%S,\"params\":%a,\"body\":%a}" + name + (pp_list pp_str) + params + pp_exp + body + | Item_obj (name, body) -> pf ppf "{\"obj\":%S,\"body\":%a}" name (pp_list pp_item) body + +let pp_modl ppf m = pp_list pp_item ppf m.items diff --git a/spice.opam b/spice.opam new file mode 100644 index 0000000..c1a8e35 --- /dev/null +++ b/spice.opam @@ -0,0 +1,24 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "A short synopsis" +description: "A longer description" +tags: ["topics" "to describe" "your" "project"] +depends: [ + "ocaml" + "dune" {>= "3.8"} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +]