import java.io.File; import java.io.FileInputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.Channels; import java.util.ArrayList; import java.util.Arrays; import java.util.List; public class Jocaml { private static String TRAILER_MAGIC = "Caml1999X028"; private static long MARSHAL_MAGIC_SMALL = 0x8495A6BE; private static long MARSHAL_MAGIC_BIG = 0x8495A6BF; private static enum CamlInst { ACC0(0, 0), ACC1(1, 0), ACC2(2, 0), ACC3(3, 0), ACC4(4, 0), ACC5(5, 0), ACC6(6, 0), ACC7(7, 0), ACC(8, 1), PUSH(9, 0), PUSHACC0(10, 0), PUSHACC1(11, 0), PUSHACC2(12, 0), PUSHACC3(13, 0), PUSHACC4(14, 0), PUSHACC5(15, 0), PUSHACC6(16, 0), PUSHACC7(17, 0), PUSHACC(18, 1), POP(19, 1), ASSIGN(20, 1), ENVACC1(21, 0), ENVACC2(22, 0), ENVACC3(23, 0), ENVACC4(24, 0), ENVACC(25, 1), PUSHENVACC1(26, 0), PUSHENVACC2(27, 0), PUSHENVACC3(28, 0), PUSHENVACC4(29, 0), PUSHENVACC(30, 1), PUSH_RETADDR(31, 1), APPLY(32, 1), APPLY1(33, 0), APPLY2(34, 0), APPLY3(35, 0), APPTERM(36, 2), APPTERM1(37, 1), APPTERM2(38, 1), APPTERM3(39, 1), RETURN(40, 1), RESTART(41, 0), GRAB(42, 1), CLOSURE(43, 2), CLOSUREREC(44, 2), OFFSETCLOSUREM2(45, 0), OFFSETCLOSURE0(46, 0), OFFSETCLOSURE2(47, 0), OFFSETCLOSURE(48, 1), PUSHOFFSETCLOSUREM2(49, 0), PUSHOFFSETCLOSURE0(50, 0), PUSHOFFSETCLOSURE2(51, 0), PUSHOFFSETCLOSURE(52, 1), GETGLOBAL(53, 1), PUSHGETGLOBAL(54, 1), GETGLOBALFIELD(55, 2), PUSHGETGLOBALFIELD(56, 2), SETGLOBAL(57, 1), ATOM0(58, 0), ATOM(59, 1), PUSHATOM0(60, 0), PUSHATOM(61, 1), MAKEBLOCK(62, 2), MAKEBLOCK1(63, 1), MAKEBLOCK2(64, 1), MAKEBLOCK3(65, 1), MAKEFLOATBLOCK(66, 1), GETFIELD0(67, 0), GETFIELD1(68, 0), GETFIELD2(69, 0), GETFIELD3(70, 0), GETFIELD(71, 1), GETFLOATFIELD(72, 1), SETFIELD0(73, 0), SETFIELD1(74, 0), SETFIELD2(75, 0), SETFIELD3(76, 0), SETFIELD(77, 1), SETFLOATFIELD(78, 1), VECTLENGTH(79, 0), GETVECTITEM(80, 0), SETVECTITEM(81, 0), GETBYTESCHAR(82, 0), SETBYTESCHAR(83, 0), BRANCH(84, 1), BRANCHIF(85, 1), BRANCHIFNOT(86, 1), SWITCH(87, 2), BOOTNOT(88, 0), PUSHTRAP(89, 1), POPTRAP(90, 0), RAISE(91, 0), CHECK_SIGNALS(92, 0), C_CALL1(93, 1), C_CALL2(94, 1), C_CALL3(95, 1), C_CALL4(96, 1), C_CALL5(97, 1), C_CALLN(98, 2), CONST0(99, 0), CONST1(100, 0), CONST2(101, 0), CONST3(102, 0), CONSTINT(103, 1), PUSHCONST0(104, 0), PUSHCONST1(105, 0), PUSHCONST2(106, 0), PUSHCONST3(107, 0), PUSHCONSTINT(108, 1), NEGINT(109, 0), ADDINT(110, 0), SUBINT(111, 0), MULINT(112, 0), DIVINT(113, 0), MODINT(114, 0), ANDINT(115, 0), ORINT(116, 0), XORINT(117, 0), LSLINT(118, 0), LSRINT(119, 0), ASRINT(120, 0), EQ(121, 0), NEQ(122, 0), LTINT(123, 0), LEINT(124, 0), GTINT(125, 0), GEINT(126, 0), OFFSETINT(127, 1), OFFSETREF(128, 1), ISINT(129, 0), GETMETHOD(130, 0), BEQ(131, 2), BNEQ(132, 2), BLTINT(133, 2), BLEINT(134, 2), BGTINT(135, 2), BGEINT(136, 2), ULTINT(137, 0), UGEINT(138, 0), BULTINT(139, 2), BUGEINT(140, 2), GETPUBMET(141, 2), GETDYNMET(142, 0), STOP(143, 0), EVENT(144, 0), BREAK(145, 0), RERAISE(146, 0), // ??? RAISE_NOTRACE(147, 0), GETSTRINGCHAR(148, 0); public final int code; public final int numArgs; CamlInst(int code, int numArgs) { this.code = code; this.numArgs = numArgs; } static { for (int i = 0; i < CamlInst.values().length; i++) { if (CamlInst.values()[i].code != i) { throw new RuntimeException("what"); } } } } private static class CodeOffset { public final int offset; public CodeOffset(int offset) { this.offset = offset; } public String toString() { return "CodeOffset<" + offset + ">"; } public boolean equals(Object other) { return other instanceof CodeOffset && ((CodeOffset) other).offset == this.offset; } public int hashCode() { return offset * 13; } } private static class Block { public final int tag; public final List contents; public Block(int tag, List contents) { this.tag = tag; this.contents = contents; } public String toString() { return "Block<" + tag + ": " + contents.toString() + ">"; } } private static class Section { String name; ByteBuffer data; public Section(String name, ByteBuffer data) { this.name = name; this.data = data; } public String toString() { return "Section"; } public List unstring() { List strings = new ArrayList<>(); for (int i = 0; i < this.data.capacity();) { StringBuffer buf = new StringBuffer(); while (this.data.get(i) != 0) { buf.append((char) this.data.get(i)); i += 1; } strings.add(buf.toString()); i += 1; } return strings; } private Object parseNextObject(List shared) throws Exception { int nextTag = this.data.get() & (short) 0xff; switch (nextTag) { // integers case 0x00: { byte value = this.data.get(); return Long.valueOf(value); } case 0x01: { short value = this.data.getShort(); return Long.valueOf(value); } case 0x02: { int value = this.data.getInt(); return Long.valueOf(value); } case 0x03: { long value = this.data.getLong(); return Long.valueOf(value); } // shared elements case 0x04: { long offset = this.data.get() & (long) 0xff; return shared.get(shared.size() - 1 - (int) offset); } case 0x05: { long offset = this.data.getShort() & (long) 0xff; return shared.get(shared.size() - 1 - (int) offset); } case 0x06: { long offset = this.data.getInt() & (long) 0xff; return shared.get(shared.size() - 1 - (int) offset); } // blocks case 0x08: { long header = this.data.getInt() & (long) 0xff; int tag = (int) header & 0xff; long size = header >> 10; List elems = new ArrayList<>(); Block block = new Block(tag, elems); if (size > 0) { shared.add(block); for (int i = 0; i < size; i++) { elems.add(parseNextObject(shared)); } } return block; } case 0x13: { throw new Exception("long block not supported"); } // string case 0x09: { int len = this.data.get() & (short) 0xff; byte[] buf = new byte[len]; this.data.get(buf); String s = new String(buf); shared.add(s); return s; } case 0x0a: { long len = this.data.getInt() & (long) 0xff; byte[] buf = new byte[(int)len]; this.data.get(buf); String s = new String(buf); shared.add(s); return s; } // float case 0x0b: case 0x0c: { Double obj = Double.valueOf(this.data.getDouble()); shared.add(obj); return obj; } case 0x0D: case 0x0E: { throw new Exception("double array not supported"); } case 0x07: case 0x0f: { throw new Exception("double array not supported"); } // misc case 0x10: { throw new Exception("code pointer with checksum"); } case 0x11: { throw new Exception("code pointer with closure"); } case 0x12: case 0x18: case 0x19: { StringBuffer buf = new StringBuffer(); while (true) { int next = this.data.get() & (short) 0xff; if (next == 0) { break; } buf.append((char) next); } String name = buf.toString(); switch (name) { // int64 case "_j": { long v = Long.valueOf(this.data.getLong()); shared.add(v); return v; } default: throw new Exception("unknown custom op: " + name); } } default: { if (nextTag < 0x20) { throw new Exception("invalid tag " + nextTag); } else if (nextTag < 0x40) { int length = nextTag & 0x1f; byte[] buf = new byte[(int)length]; this.data.get(buf); String s = new String(buf); shared.add(s); return s; } else if (nextTag < 0x80) { Long value = Long.valueOf(nextTag & 0x3f); return value; } else { long size = (nextTag >> 4) & 0x07; int tag = nextTag & 0x0f; List elems = new ArrayList<>(); Block block = new Block(tag, elems); if (size > 0) { shared.add(block); for (int i = 0; i < size; i++) { elems.add(parseNextObject(shared)); } } return block; } } } } public List unmarshal() throws Exception { this.data.order(ByteOrder.BIG_ENDIAN); int magic = this.data.getInt(0); if (magic != MARSHAL_MAGIC_SMALL) { throw new Exception("bad marshal magic"); } // int byteLength = this.data.getInt(4); int numObjs = this.data.getInt(8); System.out.println(numObjs); List shared = new ArrayList<>(); List objs = new ArrayList<>(); this.data.position(20); while (this.data.position() < this.data.capacity()) { Object next = parseNextObject(shared); objs.add(next); } System.out.println(shared.size()); return objs; } public void disassemble(List prim) throws Exception { this.data.order(ByteOrder.LITTLE_ENDIAN); this.data.position(0); while (this.data.position() < this.data.capacity()) { int pc = this.data.position() / 4; int op = this.data.getInt(); String rep = null; try { CamlInst inst = CamlInst.values()[op]; rep = inst.name(); if (inst == CamlInst.C_CALL1 || inst == CamlInst.C_CALL2 || inst == CamlInst.C_CALL3 || inst == CamlInst.C_CALL4 || inst == CamlInst.C_CALL5 || inst == CamlInst.C_CALLN) { int p = this.data.getInt(); rep += " " + prim.get(p); if (inst == CamlInst.C_CALLN) { int n = this.data.getInt(); rep += " " + n; } } else if (inst == CamlInst.CLOSUREREC) { int nfuncs = this.data.getInt(); int nargs = this.data.getInt(); rep += " " + nfuncs + " " + nargs; for (int i = 0; i < nfuncs; i++) { rep += " " + this.data.getInt(); } } else { for (int i = 0; i < inst.numArgs; i++) { rep += " " + this.data.getInt(); } } } catch (ArrayIndexOutOfBoundsException e) { rep = ""; } System.out.printf("%05d: ", pc); System.out.println(rep); } } } public static Section findSection(Section[] sections, String name) { for (Section section : sections) { if (section.name.equals(name)) { return section; } } return null; } public static void main(String[] args) throws Exception { ByteBuffer buf; int len; { FileInputStream in = new FileInputStream(new File(args[0])); len = in.available(); buf = ByteBuffer.allocate(len); Channels.newChannel(in).read(buf); } for (int i = 0; i < 12; i++) { if (buf.get(len - 12 + i) != (byte) TRAILER_MAGIC.charAt(i)) { throw new Exception("invalid trailer"); } } int numSections = buf.getInt(len - 16); Section[] sections = new Section[numSections]; for (int i = numSections - 1, sectionBase = len - 16 - (numSections * 8); i >= 0; i--) { int base = len - 16 - (numSections * 8) + (i * 8); char[] name = new char[4]; name[0] = (char) buf.get(base + 0); name[1] = (char) buf.get(base + 1); name[2] = (char) buf.get(base + 2); name[3] = (char) buf.get(base + 3); int sectionLength = buf.getInt(base + 4); sectionBase -= sectionLength; byte[] data = new byte[sectionLength]; buf.position(sectionBase); buf.get(data); sections[i] = new Section(new String(name), ByteBuffer.wrap(data)); } System.out.println(Arrays.toString(sections)); List symb = findSection(sections, "SYMB").unmarshal(); System.out.println(symb); List crcs = findSection(sections, "CRCS").unmarshal(); System.out.println(crcs); List data = findSection(sections, "DATA").unmarshal(); System.out.println(data); List prim = findSection(sections, "PRIM").unstring(); findSection(sections, "CODE").disassemble(prim); } }