spice lang parser and lexer

This commit is contained in:
tali 2023-11-23 23:06:13 -05:00
commit 466b224e65
11 changed files with 338 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
_build

4
bin/dune Normal file
View File

@ -0,0 +1,4 @@
(executable
(public_name spicec)
(name main)
(libraries spice fmt logs))

6
bin/main.ml Normal file
View File

@ -0,0 +1,6 @@
let () =
try
let modl = Spice.parse "fun main(x) { val y = x + 2 IO.print(y * x) }" in
Format.fprintf Format.std_formatter "%a\n" Spice.Syn.pp_modl modl
with Spice.Error.Error err ->
Format.fprintf Format.err_formatter "error: %a\n" Spice.Error.pp err

12
dune-project Normal file
View File

@ -0,0 +1,12 @@
(lang dune 3.8)
(using menhir 2.0)
(name spice)
(generate_opam_files true)
(package
(name spice)
(synopsis "Simple language")
(description "Compiler and interpeter for the spice language")
(depends ocaml dune)
(tags (topics "interpreter" "compiler")))

19
lib/dune Normal file
View File

@ -0,0 +1,19 @@
(library
(name spice))
(menhir
(modules parser)
(flags --unused-tokens))
(ocamllex
(modules lexer))
; (menhir
; (modules parser))
; (ocamllex lexer)
; (library
; (name json_parser)
; (modules parser lexer json)
; (libraries core))

8
lib/error.ml Normal file
View File

@ -0,0 +1,8 @@
type t = Syntax of string
exception Error of t
let raise_syntax_error msg = raise (Error (Syntax msg))
let pp ppf = function
| Syntax why -> Format.fprintf ppf "syntax error: %s" why

71
lib/lexer.mll Normal file
View File

@ -0,0 +1,71 @@
{
open Parser
let keywords = Hashtbl.create 17
let _ = begin
Hashtbl.add keywords "true" Kw_true;
Hashtbl.add keywords "false" Kw_false;
Hashtbl.add keywords "nil" Kw_nil;
Hashtbl.add keywords "val" Kw_val;
Hashtbl.add keywords "fun" Kw_fun;
Hashtbl.add keywords "obj" Kw_obj;
end
let is_digit ch = ch >= '0' && ch <= '9'
let is_digit_or_sep ch = ch = '_' || is_digit ch
let is_integer str =
String.exists is_digit str &&
String.for_all is_digit_or_sep str
let parse_integer str =
let buffer = Buffer.create (String.length str) in
String.iter (fun ch ->
if is_digit ch then
Buffer.add_char buffer ch)
str;
if Buffer.length buffer > 18 then
Error.raise_syntax_error "integer literal is too long";
Int64.of_string (Buffer.contents buffer)
let identifier str =
match Hashtbl.find keywords str with
| kw -> kw
| exception Not_found ->
if is_integer str then
Int (parse_integer str)
else
Name str
}
let int = ['0'-'9'] ['0'-'9']*
let white = [' ' '\t']+
let newline = '\n'
let id = ['a'-'z' 'A'-'Z' '0'-'9' '_']+
rule read =
parse
| white { read lexbuf }
| newline { Lexing.new_line lexbuf; read lexbuf }
| id { identifier (Lexing.lexeme lexbuf) }
| "(" { LP }
| ")" { RP }
| "{" { LC }
| "}" { RC }
| "+" { Binop Add }
| "-" { Binop Sub }
| "*" { Binop Mul }
| "/" { Binop Div }
| "%" { Binop Mod }
| "==" { Binop Eql }
| "=" { Eq }
| ">" { Binop Grt }
| "<=" { Binop Lst_eql }
| "<" { Binop Lst }
| ">=" { Binop Grt_eql }
| "!=" { Binop Not_eql }
| "." { Dot }
| "," { Com }
| _ { Error.raise_syntax_error "unrecognized character" }
| eof { EOF }

72
lib/parser.mly Normal file
View File

@ -0,0 +1,72 @@
%token <int64> Int
%token <Syn.name> Name
%token <Syn.binop> Binop
%token LP "(" RP ")"
%token LC "{" RC "}"
%token Eq "="
%token Dot "."
%token Com ","
%token Kw_true "true"
%token Kw_false "false"
%token Kw_nil "nil"
%token Kw_val "val"
%token Kw_fun "fun"
%token Kw_obj "obj"
%token EOF
%start <Syn.modl> modl
%{
open Syn
let maybe_call prefix = function
| None -> Path prefix
| Some args -> Call (prefix, args)
let infix_base e0 = (e0, [])
let infix_app (e0, ops) op e2 = (e0, (op, e2) :: ops)
let resolve_fixity (e0, ops) =
List.fold_right
(fun (op, e2) e1 -> Binop (op, e1, e2))
ops e0
%}
%%
modl:
| items = list(item); EOF { { items } }
item:
| e = exp { Item_exp e }
| "val"; n = Name; "="; e = exp { Item_val (n, e) }
| "fun"; n = Name; p = params; e = exp { Item_fun (n, p, e) }
| "obj"; n = Name; b = block { Item_obj (n, b) }
exp:
| e = infixexp { resolve_fixity e }
| "fun"; p = params; e = exp { Fun (p, e) }
| "obj"; b = block { Obj b }
infixexp:
| e0 = singleexp { infix_base e0 }
| e1 = infixexp; op = Binop; e2 = singleexp { infix_app e1 op e2 }
singleexp:
| i = Int { Literal (Int i) }
| p = path; a = option(args) { maybe_call p a }
| b = block { Scope b }
path:
| x = Name { Var x }
| e = singleexp; "."; x = Name { Ele (e, x) }
params: "("; list = trailing_list(Name); ")" { list }
args: "("; list = trailing_list(exp); ")" { list }
block: "{"; list = list(item); "}" { list }
trailing_list(X):
| { [] }
| x = X { [x] }
| x = X; ","; xs = trailing_list(X) { x :: xs }

7
lib/spice.ml Normal file
View File

@ -0,0 +1,7 @@
module Error = Error
module Syn = Syn
let parse input =
let lexbuf = Lexing.from_string input ~with_positions:true in
try Parser.modl Lexer.read lexbuf
with Parser.Error -> Error.raise_syntax_error "bad syntax"

114
lib/syn.ml Normal file
View File

@ -0,0 +1,114 @@
type name = string
type literal =
| Int of int64
| True
| False
| Nil
type binop =
| Add
| Sub
| Mul
| Div
| Mod
| Eql
| Grt
| Lst
| Not_eql
| Grt_eql
| Lst_eql
type modl = { items : item list }
and item =
| Item_exp of exp
| Item_val of name * exp
| Item_fun of name * params * exp
| Item_obj of name * block
and params = name list
and exp =
| Literal of literal
| Path of path
| Call of path * exp list
| Binop of binop * exp * exp
| Fun of params * exp
| Obj of block
| Scope of block
and path =
| Var of name
| Ele of exp * name
and block = item list
(* pretty printer *)
let string_of_literal = function
| Int n -> Int64.to_string n
| True -> "true"
| False -> "false"
| Nil -> "nil"
let string_of_binop = function
| Add -> "+"
| Sub -> "-"
| Mul -> "*"
| Div -> "/"
| Mod -> "%"
| Eql -> "=="
| Grt -> ">"
| Lst -> "<"
| Not_eql -> "!="
| Grt_eql -> ">="
| Lst_eql -> "<="
let pf = Format.fprintf
let pp_str ppf str = pf ppf "%S" str
let pp_list pp_ele ppf list =
pf ppf "[";
List.iteri
(fun i ele ->
if i > 0 then pf ppf ",";
pp_ele ppf ele)
list;
pf ppf "]"
let rec pp_exp ppf = function
| Literal (Int n) -> pf ppf "{\"int\":%s}" (Int64.to_string n)
| Literal l -> pf ppf "%s" (string_of_literal l)
| Path (Var x) -> pf ppf "{\"var\":%S}" x
| Path (Ele (e, x)) -> pf ppf "{\"ele\":%a,\"field\":%S}" pp_exp e x
| Call (fn, args) -> pf ppf "{\"call\":%a}" (pp_list pp_exp) (Path fn :: args)
| Binop (op, e1, e2) ->
pf
ppf
"{\"binop\":%S,\"lhs\":%a,\"rhs\":%a}"
(string_of_binop op)
pp_exp
e1
pp_exp
e2
| Fun (params, body) ->
pf ppf "{\"fun\":%a,\"body\":%a}" (pp_list pp_str) params pp_exp body
| Obj body -> pf ppf "{\"obj\":%a}" (pp_list pp_item) body
| Scope body -> pf ppf "{\"scope\":%a}" (pp_list pp_item) body
and pp_item ppf = function
| Item_exp e -> pf ppf "{\"exp\":%a}" pp_exp e
| Item_val (name, rhs) -> pf ppf "{\"val\":%S,\"rhs\":%a}" name pp_exp rhs
| Item_fun (name, params, body) ->
pf
ppf
"{\"fun\":%S,\"params\":%a,\"body\":%a}"
name
(pp_list pp_str)
params
pp_exp
body
| Item_obj (name, body) -> pf ppf "{\"obj\":%S,\"body\":%a}" name (pp_list pp_item) body
let pp_modl ppf m = pp_list pp_item ppf m.items

24
spice.opam Normal file
View File

@ -0,0 +1,24 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
synopsis: "A short synopsis"
description: "A longer description"
tags: ["topics" "to describe" "your" "project"]
depends: [
"ocaml"
"dune" {>= "3.8"}
"odoc" {with-doc}
]
build: [
["dune" "subst"] {dev}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
]