From 7eb2c9bfc37c507f683149786724661057811ec5 Mon Sep 17 00:00:00 2001 From: x1phosura Date: Wed, 21 Apr 2021 00:08:19 -0400 Subject: [PATCH] Init commit, basic project working --- .gitignore | 9 + Makefile | 43 +++ README.md | 29 ++ bin/ass.sh | 91 +++++++ challenge-description.md | 10 + doc/LESSONS-LEARNED.md | 10 + doc/architecture-description.md | 25 ++ doc/challenge-design-process.md | 39 +++ doc/flag-memory-layout.txt | 16 ++ doc/flag-obfuscation.md | 43 +++ flag.txt | 3 + src/disass.c | 99 +++++++ src/main.c | 51 ++++ src/rom.asm | 453 ++++++++++++++++++++++++++++++++ src/rom.h | 97 +++++++ src/vm.h | 432 ++++++++++++++++++++++++++++++ src/zeropage.incbin | 16 ++ 17 files changed, 1466 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 README.md create mode 100755 bin/ass.sh create mode 100644 challenge-description.md create mode 100644 doc/LESSONS-LEARNED.md create mode 100644 doc/architecture-description.md create mode 100644 doc/challenge-design-process.md create mode 100644 doc/flag-memory-layout.txt create mode 100644 doc/flag-obfuscation.md create mode 100644 flag.txt create mode 100644 src/disass.c create mode 100644 src/main.c create mode 100644 src/rom.asm create mode 100644 src/rom.h create mode 100644 src/vm.h create mode 100644 src/zeropage.incbin diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57cfed3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ + +# don't commit object files or anything in bin +*.bin +*.o +bin/ + +# ...except for ass.sh, we want to include that +!bin/ass.sh + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7acb90e --- /dev/null +++ b/Makefile @@ -0,0 +1,43 @@ +# TODO: enforce c99 +# eventually add -Wextra + +# Personal makefile notes: +# '$@' gets replaced with the target name +# '$^' gets replaced with the target's dependencies (to right of target name) +# For more, check out https://youtu.be/G5dNorAoeCM + +# Note: I understand this makefile is not as optimal as it could/should be + +CC = gcc +CFLAGS ?= -Wall +DEBUG = -g -ggdb -D DEBUG -lreadline +#HEADERS = vm.h +SRCS = main.c +ROM = rom.bin +OUT = hard + +all: $(OUT) + +# Useful for some projects +#%.o: %c %.h +# $(CC) $(CLFAGS) -c $^ + +$(OUT): ./src/$(SRCS) + ./bin/ass.sh ./src/rom.asm ./src/zeropage.incbin + xxd -i ./src/$(ROM) > ./src/rom.h + $(CC) $(CFLAGS) -o ./bin/$@ $^ + strip ./bin/$@ + +debug: ./src/$(SRCS) + ./bin/ass.sh ./src/rom.asm ./src/zeropage.incbin + xxd -i ./src/$(ROM) > ./src/rom.h + $(CC) $(CFLAGS) $(DEBUG) -o ./bin/$(OUT)-$@ $^ + +disass: ./src/disass.c + $(CC) $(CFLAGS) $(DEBUG) -o ./bin/$@ $^ + +.PHONY: clean +clean: + rm -f ./bin/$(BIN)* + rm -f ./bin/disass + diff --git a/README.md b/README.md new file mode 100644 index 0000000..ccf30d2 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ + +# README + +------------------------------------------------------- +### Documentation +See files in `/doc` for my design process, VM specs, lessons learned, and other stuff. + +------------------------------------------------------- +### Build +Build competition/prod image (default): +`$ make` + +Build debug image (includes useful debugging capabilities): +`$ make debug` + +Build disassembler: +`$ make disass` + +The assembler, `ass.sh`, is already good to go (as good as a bad shell-scripted assembler can be at least) + +------------------------------------------------------- +### Run +`$ ./bin/hard` +or +`$ ./bin/hard-debug` + +depending on which version you which to run. + + diff --git a/bin/ass.sh b/bin/ass.sh new file mode 100755 index 0000000..bb8229f --- /dev/null +++ b/bin/ass.sh @@ -0,0 +1,91 @@ +#!/bin/sh + +# ass.sh: a bad assembler for my VM instruction set +# +# WARNING: this assembler is ASS! That is why I named it that (in addition to +# being short for 'ass'embler). This was purely a quick hack so that I could +# get working on writing my imaginary VM code ASAP. I would be embarassed if I +# had any shame left. +# +# Assumes GNU sed, will need to be modified if using BSD sed + +die() { + printf "$1\n" + exit $2 +} + + +# bad_assemble: "assembles" the temporary source file provided by $1 (converts +# instructions to opcodes, pretty simple) +# Forgive me for how this function is implemented... +bad_assemble() { + sed -i '/^[[:blank:]]*#/d;s/#.*//' "$1" # remove comments starting with '#' + sed -i 's/0X//gi' "$1" # remove hex prefix '0x' + sed -i 's/HALT[[:blank:]]*/00 00 00/gi' "$1" + sed -i 's/PUSH[[:blank:]]/01/gi' "$1" + sed -i 's/POP[[:blank:]]/02/gi' "$1" + sed -i 's/PUSHI[[:blank:]]/03/gi' "$1" + sed -i 's/LDLR[[:blank:]]/04/gi' "$1" + sed -i 's/STLR[[:blank:]]/05/gi' "$1" + sed -i 's/SETI[[:blank:]]/06/gi' "$1" + sed -i 's/DUP[[:blank:]]*/07 00 00/gi' "$1" + sed -i 's/ADD[[:blank:]]*/08 00 00/gi' "$1" + sed -i 's/SUB[[:blank:]]*/09 00 00/gi' "$1" + sed -i 's/XOR[[:blank:]]*/0a 00 00/gi' "$1" + sed -i 's/CALL[[:blank:]]/0b/gi' "$1" + sed -i 's/RET[[:blank:]]*/0c 00 00/gi' "$1" + sed -i 's/JMP[[:blank:]]/0d/gi' "$1" + sed -i 's/BEQ[[:blank:]]/0e/gi' "$1" + sed -i 's/BNQ[[:blank:]]/0f/gi' "$1" + sed -i 's/NOP[[:blank:]]*/ff 00 00/gi' "$1" + + sed -i 's/H//gi' "$1" # remove hex suffix 'h' after only hex digits remain + sed -i 's/ //g' "$1" + #sed -i 's/\n//g' "$1" +} + + +# link_asm_zp: "links" assembled code to zero page file by simply concatenating +# them, with the total result stored in $1. $1 is the temporary assembled file, +# and $2 is the zero page hexdump file +link_asm_zp() { + cat "$2" "$1" > "$1"-new # cat to temporary file + mv "$1"-new "$1" + rm -f "$1"-new +} + + +# write_binary: takes path to an "assembled" file in $1 (contains hex), writes +# it to a binary file, path provided by $2 +write_binary() { + sed -i 's/ //g' "$1" + tr -d '\n' < "$1" > "$1"mod; mv "$1"mod "$1" # in-place remove newlines + xxd -p -r "$1" > "$2" # writes raw binary +} + +usage_msg="Usage: $(basename $0) [vm-source-code] [optional-zero-page-hexdump]" +[ "$#" -ne 1 ] && [ "$#" -ne 2 ] && die "$usage_msg" 69 + +vm_src="$1" +zp_file="$2" + +! [ -f "$vm_src" ] && die "[ERROR]: file $vm_src does not exist" 65 +if [ "$#" = 2 ]; then + ! [ -f "$zp_file" ] && die "[ERROR]: file $zp_file does not exist" 65 +fi + +vm_out=$(printf "$vm_src" | sed 's/.asm$/.bin/') # in file.asm, out file.bin +temp_src="${vm_src}-TEMP$(date --iso-8601=ns)" # create temporary file +cp -v "$vm_src" "$temp_src" # copy to temp file + +bad_assemble "$temp_src" +if [ "$zp_file" != "" ]; then + link_asm_zp "$temp_src" "$zp_file" + write_binary "$temp_src" "$vm_out" +else + write_binary "$temp_src" "$vm_out" +fi + +rm -vf $temp_src # clean up +echo "Assembling $vm_src done. Results in $vm_out" + diff --git a/challenge-description.md b/challenge-description.md new file mode 100644 index 0000000..e5ff65d --- /dev/null +++ b/challenge-description.md @@ -0,0 +1,10 @@ +# Challenge description + +---------------------------------------------------- + +### Un-machined Aerial System + +We've recovered the rom of the crashed drone, and we need to extract a secret value (you'll know it when you see it). However, the rom is a bit _wierd_, and if you're tools can't understand it, you might have to write your own... + +Do the best you can! + diff --git a/doc/LESSONS-LEARNED.md b/doc/LESSONS-LEARNED.md new file mode 100644 index 0000000..82dae95 --- /dev/null +++ b/doc/LESSONS-LEARNED.md @@ -0,0 +1,10 @@ +# Lessons learned + +---------------------------------------------------------------- + +### Organization of tasks when writing a custom VM-packed challenge +// + +### // +// + diff --git a/doc/architecture-description.md b/doc/architecture-description.md new file mode 100644 index 0000000..1b890c3 --- /dev/null +++ b/doc/architecture-description.md @@ -0,0 +1,25 @@ + +Temporary documentation, definitely incorrect as of now + +VM specifications + +Memory: +- 64k +- endianness: whatever's the architecture this baby runs on (little?) + +Registers: +- pc +- lr +- sp + +Operations: fixed width architecture where each instruction is 3-bytes long because I'm wasteful, it's easier to en/decode, and also fuck you +- st1, st2 (from ToS) +- push1, push2 +- pop1, pop2 +- pushi, popi (push/pop immediate value) +- add, sub, and, or, not, (maybe xor) +- jmp +- beq, bne, blt (ToS ==/!=/< mem[operands]) + +use assert()s to make sure pc, sp, etc... are valid + diff --git a/doc/challenge-design-process.md b/doc/challenge-design-process.md new file mode 100644 index 0000000..db5bf1d --- /dev/null +++ b/doc/challenge-design-process.md @@ -0,0 +1,39 @@ +# How I designed this challenge + +---------------------------------------------------------------- + +1. Start with the main _trick_ of the challenge +* The point is that this challenge is both going to suck and also be fun because it uses a VM-based protection scheme, which is the _trick_ or point of the challenge. + +2. Figure out how challenge will work at a VERY high-level +* User types in flag as string, program reads flag and outputs if flag is correct or not +* Crackme-style challenge kinda, need to reverse the algorithm that takes a flag as input and checks if it is correct or not (in this case, the algorithm needs to be reversible) + +3. Design algorithm that obfuscates flag input +* Should take a plaintext flag, "scramble" it, then compare the scambled version with the stored scrambled flag + +4. Implement above algorithm in a "real" assembly language +* I used a limited subset of x86 so that I could more directly translate it to my imaginary architecture +* Realize this is actually a major pain in the ass, and purposefully limiting myself in a language that is already more powerful was _no fun_. +* Decide to heck with step 4, let's just get to writing the emulator! + +5. Design the imaginary architecture +* Stack- or register-machine? How many registers? Instruction encoding? Memory? +* Realize this wasn't going to happen either; decided to design ISA as I needed it +* However, as I started writing the ISA, I realized that stack machine instructions were "less work" in some ways to implement than a register machine (bc needing to handle many different possible registers instead of just one stack; future fun idea: implement a stack machine that supports TWO stacks and use different instructions to manipulate/switch between them). + +6. Write the base of the VM for the imaginary architecture +* Needs to be able to fetch, decode, and execute instructions, it would be helpful if the fetch/decode/execute/repeat cycle were easy/simple, so I decided on a fixed-width encoding using 8 bits for the instruction itself just to make things even easier. However, it had to at least be a little weird, so I used 3-byte instructions (okay, so not weird if you're from the 1960s, but wierd for 2021). +* Don't actually need to implement full ISA just yet; just literally 2-3 test instructions (ex. PUSH, POP, HALT) + +7. Write tooling for the imaginary architecture +* Tooling consists of a base for an assembler, disassembler, and debugger (base is made easier to get started since it only has to support 2-3 instructions at first; should be trivial or semi-easy to add new instructions) +* Should be able to display the assembly language representation given a raw machine code file (raw binary) +* Should be able to take a basic assembly language and translate it to a raw binary +* Should be able to step through instructions and print out some VM statistics +* Optional: debugger should support breakpoints (I ended up implementing this; BEST DECISION I EVER MADE FOR DEVELOPMENT!!!) + +8. Start coding and implementing/designing ISA +* My strategy: I won't know what I need until I need it, so I'll add it later. I stuck to using the instructions I had, and if I needed a new instruction either because I was missing a necessary capability or I wanted to increase architecture usability, I would just simply implement the new instruction in the switch-case with all the others and add it to my tooling +* I must be a masochist deep down because I could have actually written far more easy-to-implement instructions that would have made bare virtual assembly FAR less painful, but I didn't. Lesson learned: it costs almost nothing to add a new instruction if you're coding on modern computers, so when in doubt, add the instruction (it's just a CTF challenge anyway). + diff --git a/doc/flag-memory-layout.txt b/doc/flag-memory-layout.txt new file mode 100644 index 0000000..66169f4 --- /dev/null +++ b/doc/flag-memory-layout.txt @@ -0,0 +1,16 @@ +Flag memory layout information (I used this for final-stage debugging): + +Given flag input of aaaabbbbccccddddeee + 0x8d0: 6f 2a 2a 54 + 0x9a0: 6c 29 29 95 + 0xa70: 6d 28 28 95 + 0xb40: 62 27 27 89 + 0xc10: 63 26 26 89 + +Given flag input of B4bys_1st_VMPr0tect + pm 0x8d0 4 : 0f 59 48 f4 + pm 0x9a0 4 : 70 15 57 c9 + pm 0xa70 4 : 1c 3b 44 4e + pm 0xb40 4 : 1d 16 44 75 + pm 0xc10 4 : 62 47 19 c0 + diff --git a/doc/flag-obfuscation.md b/doc/flag-obfuscation.md new file mode 100644 index 0000000..d994fe3 --- /dev/null +++ b/doc/flag-obfuscation.md @@ -0,0 +1,43 @@ +# Flag obfuscation algorithm + +--------------------------------------------------------------------- + +Flag: `RS{B4bys_1st_VMPr0tect}` # 23-bytes long +Stored part of flag: `B4bys_1st_VMPr0tect` # 19-bytes long + +### reverse the string + + +... + +Note: program will print 'RITSEC{' and '}' for you; you have to reverse the contents + +Type in flag: 'b4bys_1st...' +Flag passed to virtual machine verbatim + +### Step 0 +Read the string into the virtualized memory space of the VM +* String written to VM memory at constant address (at the time of the competition, this address was 0x0080) +* Note: my entire VM assembly program assumes a 20-byte string. If the user enters fewer, the remaining characters will simply be NULLs (I should have changed this to padd with random chars to make reversing more difficult). The actual flag is 19-characters, so there was a guaranteed NULL terminator in this case. + +### Step 1 +Reverse the string: `tcet0rPMV_ts1_syb4B` + +### Step 2 +Originally, I was going to convert the string to a custom character encoding +* \#\#\# 0-9 is 0-9, 'A-Z' is 10-35, 'a-z' is 36-61 + +Instead, due to time limitations, I just subtracted each character byte by 27 +* Reason: writing the bare virtual assembly was starting to get so painful that I switched to the simple subtraction despite it being slightly lamer + +* Note: the memory assigned to the string is of such a size that if the string is not divisible by 4, it will automatically be NULL-padded (the magic of memset) +* For each 4-byte unit: + * byte 0: xor byte by value 69 + * byte 1: switch with 2 + * byte 2: switch with 1 + * byte 3: add byte 0 to this (after xor by 69) + +THAT is the stored value! +* Note: this value is stored split up into 4-byte words in different places in the program's memory + + diff --git a/flag.txt b/flag.txt new file mode 100644 index 0000000..b849c2e --- /dev/null +++ b/flag.txt @@ -0,0 +1,3 @@ +RS{B4bys_1st_VMPr0tect} # 23 bytes +B4bys_1st_VMPr0tect # 19 bytes + diff --git a/src/disass.c b/src/disass.c new file mode 100644 index 0000000..c9c8fe5 --- /dev/null +++ b/src/disass.c @@ -0,0 +1,99 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "vm.h" + + +char *usage = "Usage: %s [-z] [-s [start_addr]] -i [vmcodefile]\n" + "-z (optional): don't display addresses next to disassembly\n" + "-s (optional): start disassembly at 'start_addr' in file\n" + "-i (required): path to file to disassemble\n"; + +/* TODO: eventually reimplement assembler in C (going to be annoying) */ + +/* disassemble_file: + */ +void disassemble_file(FILE *fp, bool show_addr, uint16_t start_addr) +{ + uint8_t instr[3]; + uint16_t start = start_addr; + + if (start_addr) + fseek(fp, (long)start, SEEK_SET); + + while(!feof(fp)) { + if(!fread(instr, 1, 3, fp)) + break; + + if(show_addr) { + printf("0x%04x: ", start); + start += 3; + } + print_op_decoded(instr, true); + } + + /*// keep in case I ever want to read instructions from a static buffer + for (uint16_t i = 0; i < code_length / 3; i += 3) { + instr[0] = rom[i]; instr[1] = rom[i+1]; instr[2] = rom[i+2]; + print_decoded(instr); + }*/ +} + +int main(int argc, char *argv[]) +{ + int opt; + bool show_addr = true; // show address of instructions + int start_addr = 0x00; // by default, assume code starts here + char infile[4096]; // if file path > 4096, I mean... + + if (argc < 2) { // expect at least one argument + fprintf(stderr, usage, argv[0]); + return EXIT_FAILURE; + } + + while ((opt = getopt(argc, argv, "zs:i:")) != -1) { + switch (opt) { + case 'z': + show_addr = false; + break; + case 's': + if (sscanf(optarg, "%i", &start_addr) == EOF) { + fprintf(stderr, usage, argv[0]); + return EXIT_FAILURE; + } + break; + case 'i': + strncpy(infile, optarg, 4096-1); + break; + case '?': + default: + fprintf(stderr, usage, argv[0]); + return EXIT_FAILURE; + } + } + + if (optind > argc) { + fprintf(stderr, "Expected arguments after option -i\n"); + return EXIT_FAILURE; + } + + FILE *fp = fopen(infile, "rb"); + if (fp == NULL) { + perror("fopen() failed..."); + return EXIT_FAILURE; + } + + // TODO: maybe write assembler in C also, but I'll probably write it in + // another lang for now + printf("start_addr: %d\n", start_addr); + disassemble_file(fp, show_addr, (uint16_t)start_addr); + + fclose(fp); + return EXIT_SUCCESS; +} + diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..60750e9 --- /dev/null +++ b/src/main.c @@ -0,0 +1,51 @@ +#include +#include +#include +#include +#include +#include + +#include "vm.h" + +#include "rom.h" // automatically generated from rom.bin by `make` + +char *embedded_msg = "Strings won't save you here...\n"; + + +int main() +{ + char flag_input[21]; + uint8_t *memory = NULL; + struct CPU cpu; + + // believe me, I _really_ tried to put the below calloc() in vm_init() + memory = calloc(RAM_SIZE, sizeof(uint8_t)); + if (memory == NULL) + perror("Failed to allocate memory. Aborting..."); + + // __src_rom_bin* comes from "rom.h", generated by make from rom.bin + vm_init(&cpu, memory, __src_rom_bin, __src_rom_bin_len); + printf("Fill in the rest of the flag: RS{"); + fgets(flag_input, 20, stdin); + flag_input[strcspn(flag_input, "\n")] = 0; // trim trailing newline + + printf("The inputted flag was RS{%s}\n\n", flag_input); + for (uint8_t i = 0; i < strlen(flag_input); ++i) + memory[128+i] = (uint8_t)flag_input[i]; + + #ifdef DEBUG + printf("[DEBUG MODE]: Running interpreter...\n"); // DEBUG + is_being_traced = 1; + #endif + vm_run(&cpu, memory); + + uint8_t win = memory[0x30]; // memory test to see if + if (win == 7) // flag was correct + printf("YAY, you got the flag!\n"); + else + printf("Sorry, that's not the flag. Try again!\n"); + + free(memory); + return EXIT_SUCCESS; +} + diff --git a/src/rom.asm b/src/rom.asm new file mode 100644 index 0000000..9cda500 --- /dev/null +++ b/src/rom.asm @@ -0,0 +1,453 @@ +# rom.asm + +# Flag for this challenge: +# B4bys_1st_VMPr0tect + +# Note: as you go deeper in the file, it just gets worse and worse. You can +# see my mental endurance draining to the point of not wanting to think despite +# being forced to because I'm writing really tedious, hardcoded assembly. +# MAJOR lesson learned: if I ever have to write another assembler, I WILL +# add support for labels, as hardcoding EVERY address for every jump/reference +# ended up being even worse than I originally thought. + +# This was the result of 50-60+ hours of hard work (mostly Wednesday-Saturday), +# two all-nighters, and a complete and total rejection of my schoolwork. +# On the schoolwork: let the record show that I learned FAR more writing this +# in a week than I have so far learned in all of my "technical" degree-related +# classes this semester, which I unfortunately suspect will not have changed by +# the time I graduate. + +# program start: 0x0100 +jmp 0x8d01 # jump to main-like function + +##################################################################### +# This subroutine implements the world's sketchiest string reversal +# (using my favorite: self-modifying code!) + +# 0x0103: +push 0x8000 # do push instruction (to be modified) +push 0x0401 # push 1st operand of previous instruction to stack +pushi 0x0100 # push 0x01 to stack +add # add items to stack +dup # duplicate sum +pop 0x0401 # pop one of the sums to overwrite 1st operand of first 'push' +pushi 0x9400 # push 0x94 to top of stack (Note: because of self-modifying + # code, this 94 actually also determines the size of the buffer + # to be copied) +beq 0x2401 # jump ahead to 'pushed' if stack vals are equal +pop 0xffff # dispose at high memory +pop 0xffff # dispose at high memory +jmp 0x0301 # jump back + + +# pushed: (0x0124) +pop 0xffff # dispose at high memory +pop 0xffff # dispose at high memory + + +# current position: 0x012a +pop 0xa010 # do pop instruction (to be modified) +push 0x2b01 # push 1st operand of previous instruction to stack +pushi 0x0100 # push 0x01 to stack +add # add items to stack +dup # duplicate sum +pop 0x2b01 # pop one of the sums to overwrite 1st operand of first 'push' +pushi 0xb400 # push 0xb4 to top of stack (Note: because of self-modifying + # code, this b4 actually also determines the size of the buffer + # to be copied) +# 0x013f +beq 0x4b01 # jump ahead to 'pushed' if stack vals are equal +pop 0xffff # dispose at high memory +pop 0xffff # dispose at high memory +# 0x0148 +jmp 0x2a01 # jump back + +# popped: (0x014b) +pop 0xffff # dispose at high memory +pop 0xffff # dispose at high memory +ret +##################################################################### + + +##################################################################### +# This subroutine simply subtracts 27 from each of the bytes in the +# previously reversed string + +# 0x0154 +push 0xa010 +pushi 0x1b00 # 0x1b = 27dec +sub +pop 0xa010 # pop subtracted value back into zero page +push 0x5501 # push 1st operand of instruction at 0x154 to stack +pushi 0x0100 +add +dup +dup +pop 0x5501 # pop incremented 1st operand back to code +pop 0x5e01 # pop incremented 1st operand back to code +pushi 0xb400 # push 'pointer' (0xb4) to byte after last byte in string +beq 0x8401 # +pop 0xffff +pop 0xffff +jmp 0x5401 +pop 0xffff +pop 0xffff +ret + +# By this point, the reversed, subtracted flag looks like the following: +# e5 59 48 4a 59 15 57 35 32 3b 44 59 58 16 44 58 5e 47 19 27 +# ... and it is located at address 0x10a0 + +##################################################################### + + +##################################################################### +##################################################################### +# main-ish: this pretty calls everything else +0x41 0x41 0x41 +0x42 0x42 0x42 +0x43 0x43 0x43 +0x44 0x44 0x44 +0x45 0x45 0x45 +0x46 0x46 0x46 +0x47 0x47 0x47 +0x48 0x48 0x48 +0x49 0x49 0x49 +# 0x01a8 +nop +# 0x01ab +call 0x0301 +# 0x01ae +call 0x5401 +# 0x01b1 +call 0xba01 +0x61 0x62 0x63 # TODO: replace with call to flag_cmp +halt +# end main-ish +##################################################################### +##################################################################### + + +##################################################################### +# block_setup: +# +# Note: a "block" is a 4-byte section of the overall 20-byte flag (so +# there are 5 blocks in total) +# blocks get placed at 0x08d0, 0x09d0, 0x0ad0, 0x0bd0 + +# 0x01ba +stlr 0x1000 # store link-register at address 0x10 (16) +# 0x01bd +pushi 0x0800 +pushi 0xd000 # stack has 0x08d0 (->buffer) in little-endian (idk) +# 0x01c3 +pop 0x3c02 # write next block address to scramble_block scratchpad +pop 0x3b02 # write next block address to scramble_block scratchpad +push 0xa010 # push block[0] (character) +push 0xa110 # push block[1] (character) +push 0xa210 # push block[2] (character) +push 0xa310 # push block[3] (character) +# 0x01d5 +call 0x3802 # scramble_block: Note: <- need discard 4 pushed vals off stack + +# 0x01d8 +push 0xca01 # modify 1st push instruction +pushi 0x0400 +add +pop 0xca01 + +# 0x01e4 +push 0xcd01 # modify 2nd push instruction +pushi 0x0400 +add +pop 0xcd01 + +# 0x01f0 +push 0xd001 # modify 3rd push instruction +pushi 0x0400 +add +pop 0xd001 + +# 0x01fc +push 0xd301 # modify 4th push instruction +pushi 0x0400 +add +pop 0xd301 + +# 0x0208 +push 0xca01 # get 1st operand of 1st push instruction +pushi 0xb400 # push value to compare it to (1st operand + 20) +beq 0x2602 # if value matches limit, branch, else +pop 0xffff # discard stack byte +pop 0xffff # discard stack byte + +# 0x0217 +push 0xbe01 # get old pointer to flag buffer block +pushi 0x0100 # add new offset +add +pop 0xbe01 # update pointer to flag buffer block (self-modifying code) +jmp 0xba01 # loop back to scramble next block + +# 0x0226 +pop 0xffff # discard stack byte +pop 0xffff # discard stack byte + +# 0x022c +0x69 0x69 0x69 +0x69 0x69 0x69 +ldlr 0x1000 # restore link-register from address 0x10 + # (literally fails, and I'm too lazy to debug it rn) +jmp 0xc802 +##################################################################### + + + +##################################################################### +# scramble_block: takes a 4-byte block on the stack, write it to scratchpad, +# scrambles it somewhat, write scratchpad to place in memory, erase scratchpad +# return +# + +# 0x0238 +jmp 0x4402 # skip ahead (past function scratchpad) + +# 0x023b # scratchpad: +42 42 42 # first two bytes set to pointer to block location in mem +# 0x023e +43 43 43 # first 3 bytes of block +# 0x0241 +44 44 44 # 4th byte of block is first here + +# 0x0244 (I think) +pushi 0x4500 # push 69dec to stack (Nice) +xor +# 0x024a +pop 0x3e02 # written to by block_setup, points to block location in mem +0x77 0x77 0x77 # junk + +# 0x0250 +pop 0x4002 # write "2nd" byte to 3rd block position +pop 0x3f02 # write "3rd" byte to 2nd block position +push 0x3e02 # read 69-XORed value to stack +add +pop 0x4102 # add 69-XORed value to 4th value, write to block + +# 0x025f +jmp 0x7102 # GOTO set_lsb + +# 0x0262: write_block_to_mem +pop 0x0000 # write 1st byte of block +pop 0x0100 # write 2nd byte of block +pop 0x0200 # write 3rd byte of block +pop 0x0300 # write 4th byte of block +# 0x026e +jmp 0xc502 # GOTO END (ret) + +# 0x0271: set_lsb: +0x2f 0x2f 0x2f # junk instruction +# 0x0274 +push 0x3b02 # push MSB of pointer to block location in memory +seti 0x6402 # write MSB to code block that will eventually write to it +seti 0x6702 # write MSB to code block that will eventually write to it +seti 0x6a02 # write MSB to code block that will eventually write to it +seti 0x6d02 # write MSB to code block that will eventually write to it + +# 0x0283 +pop 0xffff + +# 0x0286 +push 0x6302 +push 0x3c02 +add +pop 0x6302 # add offset to block pointer LSB to make new LSB +# 0x0292 +push 0x6602 +push 0x3c02 +add +pop 0x6602 # add offset to block pointer LSB to make new LSB +# 0x029e +push 0x6902 +push 0x3c02 +add +pop 0x6902 # add offset to block pointer LSB to make new LSB +# 0x02aa +push 0x6c02 +push 0x3c02 +add +pop 0x6c02 # add offset to block pointer LSB to make new LSB + +# 0x02b6 +push 0x4102 # push 4th block value to stack +push 0x4002 # push 3rd block value to stack +push 0x3f02 # push 2nd block value to stack +push 0x3e02 # push 1st block value to stack + +# 0x02c2 +jmp 0x6202 # GOTO write_block_to_mem + +# 0x02c5: END +jmp 0xd801 + + + # Oh, the joy of working 80+ hours on writing a difficult CTF + # challenge and staring at bad assembly and tons of hex for + # a third of that time. At this point, it is just "getting it + # to werk!" + +# 0x02c8 +jmp 0xda02 +# 0x02cb +pushi 0x0700 # IMPORTANT!!! +seti 0x3000 # set address 0x30 to the number 7 to indicate correct flag. +# 0x02d1 +0x30 0x30 0x30 # junk NOP +0xee 0x07 0x30 # junk NOP +halt + +push 0xa009 # check second block +pushi 0x7000 +push 0xa109 +pushi 0x1500 +0x30 0x30 0x30 # junk NOP +push 0xa209 +pushi 0x5700 +push 0xa309 +pushi 0xc900 +0x30 0x30 0x30 # junk NOP + +# +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +pop 0xffff +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +0x30 0x30 0x30 # junk NOP +0xee 0x07 0x30 # junk NOP +pop 0xffff +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +pop 0xffff +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +pop 0xffff + +0xee 0x07 0x30 # junk NOP +push 0x700a # check third block +pushi 0x1c00 +push 0x710a +pushi 0x3b00 +push 0x720a +pushi 0x4400 +push 0x730a +pushi 0x4e00 + +# +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +0x30 0x30 0x30 # junk NOP +0xee 0x07 0x30 # junk NOP +pop 0xffff +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +pop 0xffff +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +pop 0xffff +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +pop 0xffff + +push 0xd008 # check first block +pushi 0x0f00 +0xee 0x07 0x30 # junk NOP +push 0xd108 +0x30 0x30 0x30 # junk NOP +pushi 0x5900 +push 0xd208 +pushi 0x4800 +push 0xd308 +pushi 0xf400 + +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +pop 0xffff +0xee 0x07 0x30 # junk NOP +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +pop 0xffff +0x30 0x30 0x30 # junk NOP +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +pop 0xffff +0x30 0x30 0x30 # junk NOP +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +pop 0xffff + + + +push 0x100c # check fifth block +pushi 0x6200 +push 0x110c +pushi 0x4700 +0xee 0x07 0x30 # junk NOP +push 0x120c +pushi 0x1900 +0x30 0x30 0x30 # junk NOP +push 0x130c +pushi 0xc000 + +# +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +pop 0xffff +bnq 0xd102 # Fail if any two bytes ever not equal +0x30 0x30 0x30 # junk NOP +0x30 0x30 0x30 # junk NOP +0x30 0x30 0x30 # junk NOP +pop 0xffff +pop 0xffff +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +0xee 0x07 0x30 # junk NOP +pop 0xffff +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +pop 0xffff + +push 0x400b # check fourth block +pushi 0x1d00 +push 0x410b +pushi 0x1600 +0xee 0x07 0x30 # junk NOP +0x30 0x30 0x30 # junk NOP +push 0x420b +pushi 0x4400 +push 0x430b +pushi 0x7500 + +# +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +pop 0xffff +0xee 0x07 0x30 # junk NOP +0xee 0x07 0x30 # junk NOP +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +0xee 0x07 0x30 # junk NOP +pop 0xffff +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +pop 0xffff +0xee 0x07 0x30 # junk NOP +bnq 0xd102 # Fail if any two bytes ever not equal +pop 0xffff +0x30 0x30 0x30 # junk NOP +pop 0xffff + +jmp 0xcb02 +push 0xcb02 +0x40 0x77 0x12 # junk NOP +halt +0x30 0x30 0x30 # junk NOP + diff --git a/src/rom.h b/src/rom.h new file mode 100644 index 0000000..1616f17 --- /dev/null +++ b/src/rom.h @@ -0,0 +1,97 @@ +unsigned char __src_rom_bin[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x0d, 0x8d, 0x01, 0x01, 0x80, 0x00, 0x01, 0x04, + 0x01, 0x03, 0x01, 0x00, 0x08, 0x00, 0x00, 0x07, 0x00, 0x00, 0x02, 0x04, + 0x01, 0x03, 0x94, 0x00, 0x0e, 0x24, 0x01, 0x02, 0xff, 0xff, 0x02, 0xff, + 0xff, 0x0d, 0x03, 0x01, 0x02, 0xff, 0xff, 0x02, 0xff, 0xff, 0x02, 0xa0, + 0x10, 0x01, 0x2b, 0x01, 0x03, 0x01, 0x00, 0x08, 0x00, 0x00, 0x07, 0x00, + 0x00, 0x02, 0x2b, 0x01, 0x03, 0xb4, 0x00, 0x0e, 0x4b, 0x01, 0x02, 0xff, + 0xff, 0x02, 0xff, 0xff, 0x0d, 0x2a, 0x01, 0x02, 0xff, 0xff, 0x02, 0xff, + 0xff, 0x0c, 0x00, 0x00, 0x01, 0xa0, 0x10, 0x03, 0x1b, 0x00, 0x09, 0x00, + 0x00, 0x02, 0xa0, 0x10, 0x01, 0x55, 0x01, 0x03, 0x01, 0x00, 0x08, 0x00, + 0x00, 0x07, 0x00, 0x00, 0x07, 0x00, 0x00, 0x02, 0x55, 0x01, 0x02, 0x5e, + 0x01, 0x03, 0xb4, 0x00, 0x0e, 0x84, 0x01, 0x02, 0xff, 0xff, 0x02, 0xff, + 0xff, 0x0d, 0x54, 0x01, 0x02, 0xff, 0xff, 0x02, 0xff, 0xff, 0x0c, 0x00, + 0x00, 0x41, 0x41, 0x41, 0x42, 0x42, 0x42, 0x43, 0x43, 0x43, 0x44, 0x44, + 0x44, 0x45, 0x45, 0x45, 0x46, 0x46, 0x46, 0x47, 0x47, 0x47, 0x48, 0x48, + 0x48, 0x49, 0x49, 0x49, 0xff, 0x00, 0x00, 0x0b, 0x03, 0x01, 0x0b, 0x54, + 0x01, 0x0b, 0xba, 0x01, 0x61, 0x62, 0x63, 0x00, 0x00, 0x00, 0x05, 0x10, + 0x00, 0x03, 0x08, 0x00, 0x03, 0xd0, 0x00, 0x02, 0x3c, 0x02, 0x02, 0x3b, + 0x02, 0x01, 0xa0, 0x10, 0x01, 0xa1, 0x10, 0x01, 0xa2, 0x10, 0x01, 0xa3, + 0x10, 0x0b, 0x38, 0x02, 0x01, 0xca, 0x01, 0x03, 0x04, 0x00, 0x08, 0x00, + 0x00, 0x02, 0xca, 0x01, 0x01, 0xcd, 0x01, 0x03, 0x04, 0x00, 0x08, 0x00, + 0x00, 0x02, 0xcd, 0x01, 0x01, 0xd0, 0x01, 0x03, 0x04, 0x00, 0x08, 0x00, + 0x00, 0x02, 0xd0, 0x01, 0x01, 0xd3, 0x01, 0x03, 0x04, 0x00, 0x08, 0x00, + 0x00, 0x02, 0xd3, 0x01, 0x01, 0xca, 0x01, 0x03, 0xb4, 0x00, 0x0e, 0x26, + 0x02, 0x02, 0xff, 0xff, 0x02, 0xff, 0xff, 0x01, 0xbe, 0x01, 0x03, 0x01, + 0x00, 0x08, 0x00, 0x00, 0x02, 0xbe, 0x01, 0x0d, 0xba, 0x01, 0x02, 0xff, + 0xff, 0x02, 0xff, 0xff, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x04, 0x10, + 0x00, 0x0d, 0xc8, 0x02, 0x0d, 0x44, 0x02, 0x42, 0x42, 0x42, 0x43, 0x43, + 0x43, 0x44, 0x44, 0x44, 0x03, 0x45, 0x00, 0x0a, 0x00, 0x00, 0x02, 0x3e, + 0x02, 0x77, 0x77, 0x77, 0x02, 0x40, 0x02, 0x02, 0x3f, 0x02, 0x01, 0x3e, + 0x02, 0x08, 0x00, 0x00, 0x02, 0x41, 0x02, 0x0d, 0x71, 0x02, 0x02, 0x00, + 0x00, 0x02, 0x01, 0x00, 0x02, 0x02, 0x00, 0x02, 0x03, 0x00, 0x0d, 0xc5, + 0x02, 0x2f, 0x2f, 0x2f, 0x01, 0x3b, 0x02, 0x06, 0x64, 0x02, 0x06, 0x67, + 0x02, 0x06, 0x6a, 0x02, 0x06, 0x6d, 0x02, 0x02, 0xff, 0xff, 0x01, 0x63, + 0x02, 0x01, 0x3c, 0x02, 0x08, 0x00, 0x00, 0x02, 0x63, 0x02, 0x01, 0x66, + 0x02, 0x01, 0x3c, 0x02, 0x08, 0x00, 0x00, 0x02, 0x66, 0x02, 0x01, 0x69, + 0x02, 0x01, 0x3c, 0x02, 0x08, 0x00, 0x00, 0x02, 0x69, 0x02, 0x01, 0x6c, + 0x02, 0x01, 0x3c, 0x02, 0x08, 0x00, 0x00, 0x02, 0x6c, 0x02, 0x01, 0x41, + 0x02, 0x01, 0x40, 0x02, 0x01, 0x3f, 0x02, 0x01, 0x3e, 0x02, 0x0d, 0x62, + 0x02, 0x0d, 0xd8, 0x01, 0x0d, 0xda, 0x02, 0x03, 0x07, 0x00, 0x06, 0x30, + 0x00, 0x30, 0x30, 0x30, 0xee, 0x07, 0x30, 0x00, 0x00, 0x00, 0x01, 0xa0, + 0x09, 0x03, 0x70, 0x00, 0x01, 0xa1, 0x09, 0x03, 0x15, 0x00, 0x30, 0x30, + 0x30, 0x01, 0xa2, 0x09, 0x03, 0x57, 0x00, 0x01, 0xa3, 0x09, 0x03, 0xc9, + 0x00, 0x30, 0x30, 0x30, 0x0f, 0xd1, 0x02, 0x02, 0xff, 0xff, 0x02, 0xff, + 0xff, 0x0f, 0xd1, 0x02, 0x02, 0xff, 0xff, 0x30, 0x30, 0x30, 0xee, 0x07, + 0x30, 0x02, 0xff, 0xff, 0x0f, 0xd1, 0x02, 0x02, 0xff, 0xff, 0x02, 0xff, + 0xff, 0x0f, 0xd1, 0x02, 0x02, 0xff, 0xff, 0x02, 0xff, 0xff, 0xee, 0x07, + 0x30, 0x01, 0x70, 0x0a, 0x03, 0x1c, 0x00, 0x01, 0x71, 0x0a, 0x03, 0x3b, + 0x00, 0x01, 0x72, 0x0a, 0x03, 0x44, 0x00, 0x01, 0x73, 0x0a, 0x03, 0x4e, + 0x00, 0x0f, 0xd1, 0x02, 0x02, 0xff, 0xff, 0x30, 0x30, 0x30, 0xee, 0x07, + 0x30, 0x02, 0xff, 0xff, 0x0f, 0xd1, 0x02, 0x02, 0xff, 0xff, 0x02, 0xff, + 0xff, 0x0f, 0xd1, 0x02, 0x02, 0xff, 0xff, 0x02, 0xff, 0xff, 0x0f, 0xd1, + 0x02, 0x02, 0xff, 0xff, 0x02, 0xff, 0xff, 0x01, 0xd0, 0x08, 0x03, 0x0f, + 0x00, 0xee, 0x07, 0x30, 0x01, 0xd1, 0x08, 0x30, 0x30, 0x30, 0x03, 0x59, + 0x00, 0x01, 0xd2, 0x08, 0x03, 0x48, 0x00, 0x01, 0xd3, 0x08, 0x03, 0xf4, + 0x00, 0x0f, 0xd1, 0x02, 0x02, 0xff, 0xff, 0x02, 0xff, 0xff, 0xee, 0x07, + 0x30, 0x0f, 0xd1, 0x02, 0x02, 0xff, 0xff, 0x02, 0xff, 0xff, 0x30, 0x30, + 0x30, 0x0f, 0xd1, 0x02, 0x02, 0xff, 0xff, 0x02, 0xff, 0xff, 0x30, 0x30, + 0x30, 0x0f, 0xd1, 0x02, 0x02, 0xff, 0xff, 0x02, 0xff, 0xff, 0x01, 0x10, + 0x0c, 0x03, 0x62, 0x00, 0x01, 0x11, 0x0c, 0x03, 0x47, 0x00, 0xee, 0x07, + 0x30, 0x01, 0x12, 0x0c, 0x03, 0x19, 0x00, 0x30, 0x30, 0x30, 0x01, 0x13, + 0x0c, 0x03, 0xc0, 0x00, 0x0f, 0xd1, 0x02, 0x02, 0xff, 0xff, 0x02, 0xff, + 0xff, 0x0f, 0xd1, 0x02, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, + 0x30, 0x02, 0xff, 0xff, 0x02, 0xff, 0xff, 0x0f, 0xd1, 0x02, 0x02, 0xff, + 0xff, 0xee, 0x07, 0x30, 0x02, 0xff, 0xff, 0x0f, 0xd1, 0x02, 0x02, 0xff, + 0xff, 0x02, 0xff, 0xff, 0x01, 0x40, 0x0b, 0x03, 0x1d, 0x00, 0x01, 0x41, + 0x0b, 0x03, 0x16, 0x00, 0xee, 0x07, 0x30, 0x30, 0x30, 0x30, 0x01, 0x42, + 0x0b, 0x03, 0x44, 0x00, 0x01, 0x43, 0x0b, 0x03, 0x75, 0x00, 0x0f, 0xd1, + 0x02, 0x02, 0xff, 0xff, 0x02, 0xff, 0xff, 0xee, 0x07, 0x30, 0xee, 0x07, + 0x30, 0x0f, 0xd1, 0x02, 0x02, 0xff, 0xff, 0xee, 0x07, 0x30, 0x02, 0xff, + 0xff, 0x0f, 0xd1, 0x02, 0x02, 0xff, 0xff, 0x02, 0xff, 0xff, 0xee, 0x07, + 0x30, 0x0f, 0xd1, 0x02, 0x02, 0xff, 0xff, 0x30, 0x30, 0x30, 0x02, 0xff, + 0xff, 0x0d, 0xcb, 0x02, 0x01, 0xcb, 0x02, 0x40, 0x77, 0x12, 0x00, 0x00, + 0x00, 0x30, 0x30, 0x30 +}; +unsigned int __src_rom_bin_len = 1120; diff --git a/src/vm.h b/src/vm.h new file mode 100644 index 0000000..13e399d --- /dev/null +++ b/src/vm.h @@ -0,0 +1,432 @@ +/* +3ByteBadVM: a bad virtual machine using 3-byte instructions +x1phosura 2021 + +VM specifications + +TODO: document VM stuff here once I write it in Markdown + +use assert()s to make sure pc, sp, etc... are valid +*/ + +#pragma once // WAAAAAY better than a dumb header guard ;) + +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG + // readline is L I T + #include + #include +#endif + +#define RAM_SIZE 16384 // highest it can be given 16-bit addresses +#define STACK_LIM 64 + +bool is_being_traced = 0; + +enum instructions_t { +HALT = 0, +PUSH = 1, +POP = 2, +PUSHI = 3, +LDLR = 4, +STLR = 5, +SETI = 6, +DUP = 7, +ADD = 8, +SUB = 9, +XOR = 10, +CALL = 11, +RET = 12, +JMP = 13, +BEQ = 14, +BNQ = 15, +// every opcode 0x10-0xfe basically acts like a valid NOP just to be annoying +NOP = 0xff +} instructions; + + +struct CPU { + uint16_t pc; + uint16_t lr; + uint16_t sp; + //uint16_t a; + uint8_t stack[STACK_LIM]; + enum state_t {STOPPED, RUNNING, PAUSED} state; +}; + +#ifdef DEBUG // TRACE_STRUCT_DECODE +struct TRACE_T { + uint16_t breakpoint; + enum tmode_t {STEP, CONT} mode; +} trace_state; + +char *trace_bad_cmd = "Unrecognized command.\n"; +char *trace_cmd_help = "Supported commands are:\n" + "s: step single instruction\n" + "c: continue code execution until breakpoint or end\n" + "b : set breakpoint at given memory address\n" + "pb: print current breakpoints\n" + "pr: print register contents\n" + "ps: print stack contents\n" + "pm : print bytes of memory starting at \n" + "pz: print zero page (1st 256 bytes)\n" + "h: print VM debugger command help message\n" + "q: quit VM debugger\n"; + + +void print_op_decoded(uint8_t i[3], bool pargs) +{ + char fmt[] = " 0x%02x 0x%02x"; + uint8_t opcode = i[0]; + + switch(opcode) { + case HALT: printf("HALT"); break; + case PUSH: printf("PUSH"); if (pargs) printf(fmt, i[1], i[2]); break; + case POP: printf("POP"); if (pargs) printf(fmt, i[1], i[2]); break; + case PUSHI: printf("PUSHI"); if (pargs) printf(fmt, i[1], i[2]); break; + case LDLR: printf("LDLR"); if (pargs) printf(fmt, i[1], i[2]); break; + case STLR: printf("STLR"); if (pargs) printf(fmt, i[1], i[2]); break; + case SETI: printf("SETI"); if (pargs) printf(fmt, i[1], i[2]); break; + case DUP: printf("DUP"); break; + case ADD: printf("ADD"); break; + case SUB: printf("SUB"); break; + case XOR: printf("XOR"); break; + case CALL: printf("CALL"); if (pargs) printf(fmt, i[1], i[2]); break; + case RET: printf("RET"); break; + case JMP: printf("JMP"); if (pargs) printf(fmt, i[1], i[2]); break; + case BEQ: printf("BEQ"); if (pargs) printf(fmt, i[1], i[2]); break; + case BNQ: printf("BNQ"); if (pargs) printf(fmt, i[1], i[2]); break; + case NOP: printf("NOP"); break; + default: printf("0x%02x 0x%02x 0x%02x", i[0], i[1], i[2]); + } + + putchar('\n'); +} +#endif // TRACE_STRUCT_DECODE + + +static inline void push(struct CPU *cpu, uint8_t val) +{ + cpu->stack[cpu->sp] = val; + if (cpu->sp < STACK_LIM-1) + ++cpu->sp; +} + + +static inline uint8_t pop(struct CPU *cpu) +{ + uint8_t val; + if (cpu->sp > 0) { + val = cpu->stack[cpu->sp-1]; + --cpu->sp; + } else + return 0; + return val; +} + + +/* vm_do_instruction: + */ +uint16_t vm_do_instruction(struct CPU *cpu, uint8_t *mem, uint8_t instr[3]) +{ + uint16_t pc = cpu->pc; + uint16_t sp = cpu->sp; + uint8_t val = 0; + uint8_t operands[2]; + uint8_t opcode = instr[0]; + operands[0] = instr[1]; + operands[1] = instr[2]; + // vvv- this format needed if operands treated as single 16-bit value + uint16_t operand = ((uint16_t)instr[2] * 256) + instr[1]; + + #ifdef DEBUG + printf("0x%04x: ", pc); + print_op_decoded(instr, true); + #endif + + /* Note: the below code will likely be very unsafe. */ + switch (opcode) { + case HALT: + cpu->state = STOPPED; + break; + case PUSH: + push(cpu, mem[operand]); + pc += 3; + break; + case POP: + mem[operand] = pop(cpu); + pc += 3; + break; + case PUSHI: + push(cpu, operands[0]); + pc += 3; + break; + case LDLR: // TODO: fix!! This is broken + // read little-endian + cpu->lr = (uint16_t)(mem[operand+1]); // set MSB + cpu->lr = cpu->lr << 8; // set MSB + cpu->lr += (uint16_t)(mem[operand]); // set LSB + pc += 3; + break; + case STLR: + // write little-endian + mem[operand] = (uint8_t)((cpu->lr) & 0x00ff); // set LSB + mem[operand+1] = (uint8_t)((cpu->lr >> 8) & 0x00ff); // set MSB + pc += 3; + break; + case SETI: // peek ToS, write to memory + mem[operand] = cpu->stack[sp-1]; + pc += 3; + break; + case DUP: + val = cpu->stack[sp-1]; + push(cpu, val); + pc += 3; + break; + case ADD: + val = pop(cpu); + val += pop(cpu); + push(cpu, val); + pc += 3; + break; + case SUB: + val = pop(cpu); + val = ~val; + ++val; + val += pop(cpu); + push(cpu, val); // subtract 2nd from top by top of stack + pc += 3; + break; + case XOR: + val = pop(cpu); + val ^= pop(cpu); + push(cpu, val); + pc += 3; + break; + case CALL: + cpu->lr = pc+3; + return operand; + break; + case RET: + return cpu->lr; + break; + case JMP: + return operand; + break; + case BEQ: + if (sp > 1) // if 2 or more items on stack + if (sp > 0 && cpu->stack[sp-1] == cpu->stack[sp-2]) + return operand; + pc += 3; + break; + case BNQ: + if (sp > 1) // if 2 or more items on stack + if (sp > 0 && cpu->stack[sp-1] != cpu->stack[sp-2]) + return operand; + pc += 3; + break; + case NOP: + pc += 3; + break; + default: + /* EVIL idea: have illegal instructions act as NOPs! */ + pc += 3; + } + + return pc; +} + + +static inline void vm_fetch_instruction(uint16_t pc, uint8_t *mem, + uint8_t instr[3]) +{ + assert(pc < RAM_SIZE - 3); + instr[0] = mem[pc]; + instr[1] = mem[pc+1]; + instr[2] = mem[pc+2]; // finish instruction fetch +} + + +#ifdef DEBUG // TRACE_FUNCS +void print_vm_registers(struct CPU *cpu) +{ + uint16_t pc = cpu->pc; + uint16_t lr = cpu->lr; + uint16_t sp = cpu->sp; + //uint16_t a = cpu->a; + printf("pc = %#x, lr = %#x, sp = %#x\n", pc, lr, sp); +} + + +void print_vm_stack(struct CPU *cpu) +{ + uint16_t sp = cpu->sp; + + for (uint8_t i = 0; i < sp; ++i) { + if (i % 16 == 0) + putchar('\n'); + printf("%02x ", cpu->stack[i]); + } + putchar('\n'); + for (int8_t j = 0; j < sp % 17; ++j) + printf(" "); + printf("^sp = 0x%02x\n", sp); +} + + +void print_vm_memory(uint8_t *mem, uint16_t start_addr, uint16_t num_bytes) +{ + for (uint16_t i = 0; i < num_bytes; ++i) { + if (i % 16 == 0) + printf("\n0x%04x: ", start_addr+i); + printf("%02x ", mem[start_addr + i]); + } + putchar('\n'); +} +#endif // TRACE_FUNCS + + + +#ifdef DEBUG // VMTRACE +void vm_trace(struct CPU *cpu, uint8_t *mem, struct TRACE_T *tstate) +{ + char *command; + //char command[24]; + int mem_start = 0; + int num_bytes = 0; + rl_bind_key('\t', rl_insert); // make readline treat tabs and tabs + + if (cpu->pc == tstate->breakpoint) { + printf("Breakpoint reached!\n"); + tstate->mode = STEP; + } + + while(tstate->mode == STEP) { + if ((command = readline("trace> ")) == NULL) { + printf("Error: readline returned NULL. Aborting...\n"); + exit(0); + } + // Note: technically, command should eventually be freed, but + // it's not actually that important here, and the OS will + // reclaim heap space when the program finishes anyway. + if (strlen(command) > 0) + add_history(command); + + // yeah, yeah, I KNOW a switch-case would be better for this + if (command[0] == 's') { + break; + } else if (command[0] == 'b') { + int addr = 0; + if (sscanf(command+1, "%i", &addr) == EOF) + printf("%s\n%s", trace_bad_cmd, trace_cmd_help); + tstate->breakpoint = (uint16_t)addr; + printf("Set breakpoint at address 0x%2x\n", addr); + } else if (command[0] == 'p') { + switch (command[1]) { + case 'b': + printf("Current breakpoint at 0x%2x\n", + tstate->breakpoint); + break; + case 'r': + print_vm_registers(cpu); + break; + case 's': + print_vm_stack(cpu); + break; + case 'z': + print_vm_memory(mem, 0, 0x100); + break; + case 'm': + if (sscanf(command+2,"%i %i", &mem_start, + &num_bytes) == EOF) + ; // So far, sscanf always returns EOF + // (idk why). TODO: fix eventually + printf("Dumping %d bytes of memory starting " + "at 0x%04x\n", num_bytes, mem_start); + print_vm_memory(mem, (uint16_t)mem_start, + (uint16_t)num_bytes); + break; + default: + printf("%s\n%s", trace_bad_cmd, trace_cmd_help); + } + } else if (command[0] == 'd' && command[1] == 'b') { + printf("Deleted breakpoint at 0x%x\n", + tstate->breakpoint); + // vvv - disable breakpoint by not point to instr start + tstate->breakpoint = 0x01; + + } else if (command[0] == 'c') { + tstate->mode = CONT; + } else if (command[0] == 'h') { + printf("%s", trace_cmd_help); + } else if (command[0] == 'q') { + printf("Exiting debugger and virtual machine...\n"); + free(command); + exit(0); + } else { + printf("%s\n%s", trace_bad_cmd, trace_cmd_help); + } + } + // Idea: maybe return value to exit debugger? +} +#endif // VMTRACE + + +void vm_run(struct CPU *cpu, uint8_t *mem) +{ + uint16_t pc = cpu->pc; + uint8_t curr_instr[3]; // current instruction + cpu->state = RUNNING; + #ifdef DEBUG + trace_state.mode = STEP; + #endif + + while (cpu->state == RUNNING) { + #ifdef DEBUG + if (is_being_traced) { + vm_trace(cpu, mem, &trace_state); + } + #endif + + vm_fetch_instruction(pc, mem, curr_instr); + pc = vm_do_instruction(cpu, mem, curr_instr); + cpu->pc = pc; + } + + // eventually, make it return some kind of CPU status code +} + + +/* vm_init(): initialize CPU/memory starting states + */ +void vm_init(struct CPU *cpu, uint8_t *mem, uint8_t *memimage, uint16_t imgsize) +{ + uint8_t placeholder_memimage[] = {65, 66, 66, 67, 68, 68, 69, 70, 70,0}; + + cpu->pc = 0x100; // default: start code execution after zero page + cpu->lr = 0; + cpu->sp = 0; + //cpu->a = 0; + cpu->state = STOPPED; + memset(cpu->stack, 0, 64); + + if (memimage == NULL) { + imgsize = sizeof(placeholder_memimage); + memimage = (uint8_t *)placeholder_memimage; + } + for (uint16_t i = 0; i < imgsize; ++i) + mem[i] = memimage[i]; + //mem[0x100+i] = memimage[i]; + + #ifdef DEBUG + trace_state.breakpoint = 0xffff; + trace_state.mode = CONT; + #endif +} + diff --git a/src/zeropage.incbin b/src/zeropage.incbin new file mode 100644 index 0000000..b8d9031 --- /dev/null +++ b/src/zeropage.incbin @@ -0,0 +1,16 @@ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00