Compare commits

...

6 Commits

Author SHA1 Message Date
tali 12a519cfd7 completely redo bytecode format and decoder 2023-12-21 14:00:57 -05:00
tali 18cc66697e rust decoder POC 2023-12-17 16:21:44 -05:00
tali 80f5ab654b ocaml encoder POC 2023-12-17 16:21:44 -05:00
tali 509ccc132d rust opcodes 2023-12-17 16:21:44 -05:00
tali d65bef661e wip bytecode format specification in rust 2023-12-16 15:01:30 -05:00
tali 5ebb404276 comments 2023-12-14 16:13:34 -05:00
11 changed files with 599 additions and 2 deletions

1
.cargo/config.toml Normal file
View File

@ -0,0 +1 @@
build.target-dir = "_build/native"

65
Cargo.lock generated Normal file
View File

@ -0,0 +1,65 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "proc-macro2"
version = "1.0.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
dependencies = [
"proc-macro2",
]
[[package]]
name = "spicei"
version = "0.1.0"
dependencies = [
"thiserror",
]
[[package]]
name = "syn"
version = "2.0.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f11c217e1416d6f036b870f14e0413d480dbf28edbee1f877abaf0206af43bb7"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"

3
Cargo.toml Normal file
View File

@ -0,0 +1,3 @@
[workspace]
members = ["native"]
resolver = "2"

127
bin/bctest.ml Normal file
View File

@ -0,0 +1,127 @@
module Bc = struct
[@@@warning "-32"]
type t = {
buf : Buffer.t;
out : out_channel;
}
let make out =
{ buf = Buffer.create 256; out }
let flush t =
output_string t.out (Buffer.contents t.buf);
Buffer.clear t.buf
let wr_u8 t xs =
List.iter (Buffer.add_uint8 t.buf) xs
let wr_u16 t xs =
List.iter (Buffer.add_uint16_le t.buf) xs
let wr_i32 t xs =
List.iter (Buffer.add_int32_le t.buf)
(List.map Int32.of_int xs)
type r = [`R of int]
type v = [r | `K of int]
type i = [r | `O of int]
type l = [`L of r * i]
let reg v =
match v with
| `R i -> assert (0 <= i && i <= 0xff); i
let cst i =
assert (0 <= i && i <= 0xff); i
let arg v b0 =
match v with
| #r as v -> b0, reg v
| `K i -> (b0 lor 0x80), cst i
let ofs v =
assert ((-0x80) <= v && v <= 0x7f);
v land 0xff
let loc v b0 =
match v with
| `L (r, (#r as i)) -> b0, reg r lor (reg i lsl 8)
| `L (r, `O i) -> (b0 lor 0x40), reg r lor (ofs i lsl 8)
let wr_ins_v b0 t (a : v) =
let b0, b1 = arg a b0 in
wr_u8 t [b0; b1]
let wr_ins_rv b0 b1 t (a : r) (b : v) =
let b2 = reg a in
let b0, b3 = arg b b0 in
wr_u8 t [b0; b1; b2; b3]
let wr_ins_rl b0 t (a : r) (b : l) =
let b1 = reg a in
let b0, s2 = loc b b0 in
wr_u8 t [b0; b1]; wr_u16 t [s2]
let wr_ins_lv b0 t (b : l) (a : v) =
let b0, b1 = arg a b0 in
let b0, s2 = loc b b0 in
wr_u8 t [b0; b1]; wr_u16 t [s2]
let wr_op o t a b =
assert (o >= 0x0 && o <= 0xf);
wr_ins_rv 0x07 o t a b
let wr_cb c t a b =
assert (c >= 0xa && c <= 0xf);
wr_ins_rv 0x17 c t a b
let mov t a b =
match a, b with
| (#r as a), (#v as b) -> wr_ins_rv 0x01 0x00 t a b
| (#r as a), (#l as b) -> wr_ins_rl 0x03 t a b
| (#l as a), (#v as b) -> wr_ins_lv 0x05 t a b
| #l, #l -> failwith "'mov L, L' mode unsupported"
let jmp t d =
let b0 = 0xff in
let w0 = b0 lor (d lsl 8) in
wr_i32 t [w0]
let ret = wr_ins_v 0x00
let btr = wr_ins_v 0x04
let bfl = wr_ins_v 0x14
let con = wr_op 0x0
let not_= wr_op 0x1
let neg = wr_op 0x2
let add = wr_op 0x3
let sub = wr_op 0x4
let mul = wr_op 0x5
let div = wr_op 0x6
let mod_= wr_op 0x7
let ceq = wr_op 0xa let beq = wr_cb 0xa
let cne = wr_op 0xb let bne = wr_cb 0xb
let clt = wr_op 0xc let blt = wr_cb 0xc
let cge = wr_op 0xd let bge = wr_cb 0xd
let cgt = wr_op 0xe let bgt = wr_cb 0xe
let cle = wr_op 0xf let ble = wr_cb 0xf
end
let bc = Bc.make stdout
let () =
begin
Bc.mov bc (`R 0) (`K 0); (* 0 *)
Bc.mov bc (`R 1) (`K 1); (* 2 *)
Bc.bgt bc (`R 0) (`R 1); (* 4 *)
Bc.jmp bc (+9 (* L3 *)); (* 6 *)
Bc.jmp bc (+3 (* L2 *)); (* 8 *)
Bc.ret bc (`R 1); (* L1: 10 *)
Bc.add bc (`R 1) (`K 2); (* L2: 11 *)
Bc.jmp bc (-3 (* L1 *)); (* 13 *)
Bc.mul bc (`R 1) (`R 1); (* L3: 15 *)
Bc.jmp bc (-7 (* L1 *)); (* 17 *)
end
let () =
Bc.flush bc

View File

@ -1,4 +1,11 @@
(executable
(public_name spicec)
(name main)
(modules main)
(libraries spice fmt logs))
(executable
(public_name spice_bctest)
(name bctest)
(modules bctest)
(libraries fmt logs))

View File

@ -61,7 +61,7 @@ and lambda = {
}
(* lower *)
(* env *)
module Env = struct
type t =
@ -98,12 +98,16 @@ module Env = struct
end
end
(* utils *)
let seq_r a b = Seq (b, a)
let union xs ys =
List.sort_uniq compare
List.sort_uniq (compare : id -> id -> int)
(List.rev_append ys xs)
(* lower *)
let lower ~lib (modl : Ast.modl) =
let new_id = make_id_dispenser () in

7
native/Cargo.toml Normal file
View File

@ -0,0 +1,7 @@
[package]
name = "spicei"
version = "0.1.0"
edition = "2021"
[dependencies]
thiserror = "1.0"

181
native/src/decode.rs Normal file
View File

@ -0,0 +1,181 @@
use thiserror::Error;
use super::opcodes::*;
#[derive(Debug, Error)]
pub enum DecodeError {
#[error("not enough data (len < {0})")]
Trunc(usize),
#[error("invalid instruction: {0:x}")]
Ins(u8),
#[error("invalid operation: {0:x}")]
Opr(u8),
}
/// Decode a single instruction from the bytecode, returning the instruction and its size
/// (in bytes).
pub fn decode_one(bs: &[u8]) -> Result<(Ins, usize), DecodeError> {
let byte0 = byte(bs, 0)?;
let len = (byte0 & 1) as usize * 2 + 2;
let ins = match byte0 & 0x3f {
0x00 => {
let a = arg(byte(bs, 1)?, byte0);
Ins::RET(a)
}
0x04 => {
let a = arg(byte(bs, 1)?, byte0);
Ins::Br(InsBr::BTR(a))
}
0x14 => {
let a = arg(byte(bs, 1)?, byte0);
Ins::Br(InsBr::BFL(a))
}
0x01 => {
let a = Reg(byte(bs, 2)?);
let b = arg(byte(bs, 3)?, byte0);
Ins::MOV(InsMov::RV(a, b))
}
0x03 => {
let a = Reg(byte(bs, 1)?);
let b = loc(byte(bs, 2)?, byte(bs, 3)?, byte0);
Ins::MOV(InsMov::RL(a, b))
}
0x05 => {
let a = arg(byte(bs, 1)?, byte0);
let b = loc(byte(bs, 2)?, byte(bs, 3)?, byte0);
Ins::MOV(InsMov::LV(b, a))
}
0x07 => {
let opr = opr(byte(bs, 1)?)?;
let a = Reg(byte(bs, 2)?);
let b = arg(byte(bs, 3)?, byte0);
Ins::MOV(InsMov::Op(opr, a, b))
}
0x17 => {
let cnd = cnd(byte(bs, 1)?)?;
let a = Reg(byte(bs, 2)?);
let b = arg(byte(bs, 3)?, byte0);
Ins::Br(InsBr::BCnd(cnd, a, b))
}
0x89 => {
let a = Reg(byte(bs, 1)?);
let b = Reg(byte(bs, 2)?);
let c = Cst(byte(bs, 3)?);
Ins::LOC(InsLoc(a, b, c))
}
0x3f => {
let d = i32_le(bs)? >> 8;
Ins::JMP(InsJmp(d))
}
_ if (byte0 & 0xf == 0xd) => {
let a = Reg(byte(bs, 1)?);
let b = loc(byte(bs, 2)?, byte(bs, 3)?, 0x0d);
let n = byte0 >> 4;
Ins::CAL(InsCal(a, b, n))
}
_ => return Err(DecodeError::Ins(byte0)),
};
Ok((ins, len))
}
#[inline]
fn byte(bs: &[u8], i: usize) -> Result<u8, DecodeError> {
bs.get(i).copied().ok_or(DecodeError::Trunc(i + 1))
}
#[inline]
fn i32_le(bs: &[u8]) -> Result<i32, DecodeError> {
let slice = bs.get(..4).ok_or(DecodeError::Trunc(4))?;
Ok(i32::from_le_bytes(slice.try_into().unwrap()))
}
fn arg(b: u8, byte0: u8) -> Arg {
if byte0 & 0x80 == 0 {
Arg::R(Reg(b))
} else {
Arg::K(Cst(b))
}
}
fn ofs(b: u8) -> i8 {
b as i8
}
fn loc(b: u8, c: u8, byte0: u8) -> Loc {
if byte0 & 0x40 == 0 {
Loc::R(Reg(b), Reg(c))
} else {
Loc::O(Reg(b), ofs(c))
}
}
fn opr(b: u8) -> Result<Opr, DecodeError> {
match b {
0x0 => Ok(Opr::CON),
0x1 => Ok(Opr::NOT),
0x2 => Ok(Opr::NEG),
0x3 => Ok(Opr::ADD),
0x4 => Ok(Opr::SUB),
0x5 => Ok(Opr::MUL),
0x6 => Ok(Opr::DIV),
0x7 => Ok(Opr::MOD),
// 0x8
// 0x9
_ => cnd(b).map(Opr::Cmp),
}
}
fn cnd(b: u8) -> Result<Cnd, DecodeError> {
match b {
0xa => Ok(Cnd::EQ),
0xb => Ok(Cnd::NE),
0xc => Ok(Cnd::LT),
0xd => Ok(Cnd::GE),
0xe => Ok(Cnd::GT),
0xf => Ok(Cnd::LE),
_ => Err(DecodeError::Opr(b)),
}
}
/// Iterator adapter for [`decode_one`].
#[derive(Clone)]
pub struct Decode<T: AsRef<[u8]>>(T, usize);
/// Decode all instructions from bytecode data.
pub fn decode<T: AsRef<[u8]>>(bs: T) -> Decode<T> {
Decode(bs, 0)
}
impl<T: AsRef<[u8]>> Decode<T> {
pub fn into_inner(self) -> T {
self.0
}
pub fn position(&self) -> usize {
self.1
}
}
impl<T: AsRef<[u8]>> Iterator for Decode<T> {
type Item = Result<Ins, DecodeError>;
fn next(&mut self) -> Option<Self::Item> {
let bs = self.0.as_ref();
let pos = &mut self.1;
if *pos >= bs.len() {
return None;
}
decode_one(&bs[*pos..])
.map(|(ins, len)| {
*pos += len;
Some(ins)
})
.map_err(|err| match err {
DecodeError::Trunc(i) => DecodeError::Trunc(i + *pos),
_ => err,
})
.transpose()
}
}

4
native/src/lib.rs Normal file
View File

@ -0,0 +1,4 @@
pub mod opcodes;
pub mod decode;
pub use decode::{decode, DecodeError};

39
native/src/main.rs Normal file
View File

@ -0,0 +1,39 @@
use std::io::{BufReader, Read};
use thiserror::Error;
use spicei::*;
#[derive(Debug, Error)]
enum Error {
#[error(transparent)]
Io(#[from] std::io::Error),
#[error("decode error at position {1}")]
Decode(#[source] DecodeError, usize),
}
fn entry() -> Result<(), Error> {
let mut bytes = vec![];
BufReader::new(std::io::stdin()).read_to_end(&mut bytes)?;
let mut decode = decode(&bytes);
while let Some(ins) = decode.next() {
let ins = ins.map_err(|err| Error::Decode(err, decode.position()))?;
println!("{ins}");
}
Ok(())
}
fn main() {
use std::error::Error;
if let Err(err) = entry() {
eprintln!("error: {err}");
let mut src = err.source();
while let Some(err) = src {
println!("from: {err}");
src = err.source();
}
}
}

159
native/src/opcodes.rs Normal file
View File

@ -0,0 +1,159 @@
use std::fmt;
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
#[repr(transparent)]
pub struct Reg(pub u8);
impl fmt::Display for Reg {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "R{}", self.0)
}
}
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
#[repr(transparent)]
pub struct Cst(pub u8);
impl fmt::Display for Cst {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "K{}", self.0)
}
}
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
pub enum Arg {
R(Reg),
K(Cst),
}
impl fmt::Display for Arg {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Self::R(r) => r.fmt(f),
Self::K(k) => k.fmt(f),
}
}
}
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
pub enum Loc {
R(Reg, Reg),
O(Reg, i8),
}
impl fmt::Display for Loc {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Self::R(r, l) => write!(f, "{r}[{l}]"),
Self::O(r, l) => write!(f, "{r}[{l}]"),
}
}
}
#[derive(Clone, Eq, PartialEq, Debug)]
pub enum Ins {
MOV(InsMov),
LOC(InsLoc),
CAL(InsCal),
JMP(InsJmp),
Br(InsBr),
RET(Arg),
}
impl fmt::Display for Ins {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use Ins::*;
use InsBr::*;
use InsMov::*;
match *self {
MOV(Op(opr, a, b)) => write!(f, "{opr} {a}, {b}"),
MOV(RV(a, b)) => write!(f, "mov {a}, {b}"),
MOV(RL(a, b)) => write!(f, "mov {a}, {b}"),
MOV(LV(a, b)) => write!(f, "mov {a}, {b}"),
LOC(InsLoc(a, b, k)) => write!(f, "loc {a}, {b}, {}", Arg::K(k)),
CAL(InsCal(a, b, n)) => write!(f, "cal {a}, {b}({a},...{n})"),
JMP(InsJmp(d)) => write!(f, "jmp {d:+}"),
RET(v) => write!(f, "ret {v}"),
Br(BTR(v)) => write!(f, "btr {v}"),
Br(BFL(v)) => write!(f, "btr {v}"),
Br(BCnd(cnd, a, b)) => write!(f, "b{cnd} {a}, {b}"),
}
}
}
#[derive(Clone, Eq, PartialEq, Debug)]
pub enum InsMov {
Op(Opr, Reg, Arg),
RV(Reg, Arg),
RL(Reg, Loc),
LV(Loc, Arg),
}
#[derive(Clone, Eq, PartialEq, Debug)]
pub enum InsBr {
BTR(Arg),
BFL(Arg),
BCnd(Cnd, Reg, Arg),
}
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct InsLoc(pub Reg, pub Reg, pub Cst);
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct InsCal(pub Reg, pub Loc, pub u8);
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct InsJmp(pub i32);
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
pub enum Opr {
CON,
NOT,
NEG,
ADD,
SUB,
MUL,
DIV,
MOD,
Cmp(Cnd),
}
impl fmt::Display for Opr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::CON => write!(f, "con"),
Self::NOT => write!(f, "not"),
Self::NEG => write!(f, "neg"),
Self::ADD => write!(f, "add"),
Self::SUB => write!(f, "sub"),
Self::MUL => write!(f, "mul"),
Self::DIV => write!(f, "div"),
Self::MOD => write!(f, "mod"),
Self::Cmp(cmp) => write!(f, "c{cmp}"),
}
}
}
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
pub enum Cnd {
EQ,
NE,
LT,
GT,
LE,
GE,
}
impl fmt::Display for Cnd {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::EQ => write!(f, "eq"),
Self::NE => write!(f, "ne"),
Self::LT => write!(f, "lt"),
Self::GT => write!(f, "gt"),
Self::LE => write!(f, "le"),
Self::GE => write!(f, "ge"),
}
}
}