#if !defined(__arm__) || (__SIZE_WIDTH__ != 32) #error "This code is 32-bit ARM only!" #endif #if defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7EM__) || defined(__ARM_ARCH_8M_MAIN__) \ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_8R__) #error "This code is only available on ARM7, ARM9, ARM11 and Cortex-A devices, not on Cortex-M or Cortex-R." #endif #include #include #include #if defined(__linux__) && !defined(__KERNEL__) // FIXME: (on musl) seems to crash upon entry before reaching main() #include #include #include #include #define iprintf(fmt, ...) printf((fmt), ##__VA_ARGS__) #elif defined(__KERNEL__) #include #include #define iprintf(fmt, ...) printk(KERN_ERR " " fmt, ##__VA_ARGS__) // TODO: is a memcpy impl needed? #elif defined(ZYNQ) /* zynq baremetal */ #include #include #include "xil_printf.h" #include "zynq_printf.h" #define iprintf(fmt, ...) printf((fmt), ##__VA_ARGS__) #else // newlib assumed #include #include #include #endif // platform // known ID table: // Chip name | ARM core | ARM CPUID | JTAG IDCODE | Jazelle ID // ------------+------------+------------+-------------+----------- Jazelle DBX: // ??? TODO | ARM7EJ-S | ??? | ??? | ??? // Cypress FX3 | ARM926EJ-S | 0x41069265 | 0x07926069 | 0x64100004 // TI Nspire | ARM926EJ-S?| 0x41069265?| ??? | 0x64100004? // TODO // Wii Starlet | ARM926EJ-S?| ??? | ??? | ??? // TODO // RPi v1.2 B+ |ARM1176JZF-S| 0x410FB767 | 0x07B7617F | 0x74100064 // FIXME: insn enumeration broken due to caching or something // Ninty .3DS | ARM11MPCore| 0x410FB025?| ??? | 0x74100064? // TODO // ??? TODO | Cortex-A8 | ??? | ??? | ??? // Xilinx Zynq7| Cortex-A9MP| 0x413FC090 | 0x4BA00477 | 0xF4100168 // ------------+------------+------------+-------------+----------- Jazelle RCT: // Xilinx Zynq7| Cortex-A9MP| 0x413FC090 | 0x4BA00477 | 0xF4100168 // ------------+------------+------------+-------------+----------- Nothing: // Qcom MSM8255| S2 Scorpion| 0x517100F2 | ??? | 0x00000000 // TODO: immediate next steps: // * check if we can override jazelle insn execution with custom handlers // * check what the control registers actually do // * enumerate what all insns do /* Cypress FX3: bytecode IDs that use a handler: 0x0f, 0x12, 0x13, 0x14, 0x53, 0x62, 0x63, 0x66, 0x67, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x77, 0x79, 0x7b, 0x7d, 0x86, 0x87, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x95, 0x96, 0x97, 0x98, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe 0xff is hardwired to bkpt #0 Raspberry Pi v1.2 B+, **Linux userspace**: bytecode IDs that use a handler: 0x0f, 0x12, 0x13, 0x14, 0x62, 0x63, 0x66, 0x67, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x77, 0x79, 0x7b, 0x7d, 0x86, 0x87, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x95, 0x96, 0x97, 0x98, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe seems to be the same as FX3 except for 0x53 (aastore). NOTE: baremetal is different, it is able to execute 0x6c (idiv) without problem! */ #ifdef __linux__ static bool getstr(char** ap, const char* name, uint32_t* out) { char* a = *ap, *aa = a; a = strstr(a, name); if (!a) { /*printf("no name\n");*/ return false; } a = strstr(a, ": "); if (!a) { /*printf("no colon\n");*/ return false; } a = a + strlen(": "); *out = strtoul(a, &aa, 0); if (a == aa) { /*printf("no int: %s\n", a);*/ return false; } *ap = aa; return true; } #endif #if !defined(__linux__) || defined(__KERNEL__) inline #endif static uint32_t arm_get_id(void) { #if defined(__linux__) && !defined(__KERNEL__) // NOTE: better to read out /sys/devices/system/cpu/cpu*/regs/identification/midr_el1 , // but this one doesn't seem to be available on my rpi running 5.4.x, // so, /proc/cpuinfo it is uint32_t midr = 0; FILE* f = fopen("/proc/cpuinfo", "rb"); if (!f) return 0; void* dest = malloc(4096); // should be enough size_t rrr = fread(dest, 1, 4096, f); fclose(f); if (rrr == 0) goto EXIT; char* a = (char*)dest; uint32_t impl, arch, var, part, rev; if (!getstr(&a, "CPU implementer", &impl)) { printf("no impl\n"); goto EXIT; } if (!getstr(&a, "CPU architecture", &arch)) { printf("no arch\n"); goto EXIT; } if (!getstr(&a, "CPU variant", &var)) { printf("no var\n"); goto EXIT; } if (!getstr(&a, "CPU part", &part)) { printf("no part\n"); goto EXIT; } if (!getstr(&a, "CPU revision", &rev)) { printf("no rev\n"); goto EXIT; } midr = (impl << 24) | (arch << 20) | (var << 16) | (part << 4) | (rev << 0); do { // try to read more values, may be useful in eg. a big.LITTLE setup if (!getstr(&a, "CPU implementer", &impl)) break; if (!getstr(&a, "CPU architecture", &arch)) break; if (!getstr(&a, "CPU variant", &var)) break; if (!getstr(&a, "CPU part", &part)) break; if (!getstr(&a, "CPU revision", &rev)) break; uint32_t m2 = (impl << 24) | (arch << 20) | (var << 16) | (part << 4) | (rev << 0); if (m2 != midr) { printf("found different MIDR on other core: 0x%08lx\n", m2); } } while (a); EXIT: free(dest); return midr; #else // NOTE: requires kernel mode execution level to read uint32_t res; asm volatile("mrc p15, 0, %0, c0, c0, 0\n":"=r"(res)); return res; #endif } inline static uint32_t jazelle_get_id(void) { uint32_t res; asm volatile("mrc p14, 7, %0, c0, c0, 0\n":"=r"(res)); return res; } #if defined(__linux__) && !defined(__KERNEL__) // TODO: better implementation lmao static void DC_FlushAll(void) { const size_t size = 65536*4; // shrug void* v = (void*)malloc(size); if (!v) return; // a for (size_t i = 0; i < size; i += 4) { volatile uint32_t* p = (volatile uint32_t*)v; p[i >> 2] = 0; asm volatile("":::"memory"); } free(v); } __attribute__((__unused__)) static void DC_InvalidateAll(void) { DC_FlushAll(); } __attribute__((__naked__, __no_inline__)) void IC_InvalidateAll(void) { asm volatile( ".fill 65536, 4, 0\n" // 64k nop instructions because eh "bx lr\n" : : :"memory" ); } #elif (__ARM_ARCH == 5 /* ARM9 */) || (__ARM_ARCH == 6 && defined(__ARM_ARCH_6KZ__)) __attribute__((__naked__)) static void DC_FlushAll(void) { // lifted from libnds asm volatile( #if __ARM_ARCH == 5 "mov r1, #0\n" "outer_loop:\n" "mov r0, #0\n" "inner_loop:\n" "orr r2, r1, r0 @ generate segment and line address\n" "mcr p15, 0, r2, c7, c14, 2 @ clean and flush the line\n" "add r0, r0, #32\n" // FIXME hardcoded cache line size "cmp r0, #0x1000/4\n" // FIXME hardcoded cache size "bne inner_loop\n" "add r1, r1, #0x40000000\n" "cmp r1, #0\n" "bne outer_loop\n" "mov r0, #0\n" #elif __ARM_ARCH == 6 && defined(__ARM_ARCH_6KZ__) // FIXME: ARM11 code no worky "mov r0, #0\n" "mcr p15, 0, r0, c7, c14, 0\n" // Clean and Invalidate Entire Data Cache #else #error "wut" #endif "mcr p15, 0, r0, c7, c10, 4 @ drain write buffer\n" // ARM9, ARM11 ok "bx lr\n" : : :"memory" ); } __attribute__((__unused__)) static inline void DC_InvalidateAll(void) { asm volatile("mcr p15, 0, %0, c7, c6, 0" : : "r"(0)); // ARM9, ARM11 ok #if __ARM_ARCH == 6 // FIXME: ARM11 code no worky asm volatile("mcr p15, 0, %0, c7, c7, 0" : : "r"(0)); // ARM9, ARM11 ok asm volatile("mcr p15, 0, %0, c8, c7, 0" : : "r"(0)); // invalidate tlb asm volatile("mcr p15, 0, %0, c7, c10, 4" : : "r"(0)); // dsb #endif } static inline void IC_InvalidateAll(void) { asm volatile("mcr p15, 0, %0, c7, c5, 0" : : "r"(0)); // ARM9, ARM11 ok #if __ARM_ARCH == 6 // FIXME: ARM11 code no worky asm volatile("mcr p15, 0, %0, c7, c7, 0" : : "r"(0)); // ARM9, ARM11 ok asm volatile("mcr p15, 0, %0, c8, c7, 0" : : "r"(0)); // invalidate tlb asm volatile("mcr p15, 0, %0, c7, c10, 4" : : "r"(0)); // dsb asm volatile("mcr p15, 0, %0, c7, c5, 4" : : "r"(0)); // flush prefetch buffer #endif } #elif __ARM_ARCH == 7 && defined(__ARM_ARCH_7A__) #include "cache_cortexa.c" /* go sue me */ #else #error "TODO: define data and instruction cache functions for your target!" #endif __attribute__((__aligned__(1024))) static struct { void* handlers[512]; uint8_t stack[256]; uint8_t locals[256]; } jazelle_block; /* * c0: Jazelle Identity register (read-only) * Bits 0-11: Subarchitecture-defined bits (reads as 4, meaning unknown) * Bits 12-19: Subarchitecture (reads as 0, Jazelle V1 according to documentation) * Bits 20-27: Implementor (reads as 0x41, ARM Limited according to documentation) * Bits 28-31: Architecture (reads as 6, ARMv5TEJ according to documentation) * c1: Operating System Control register * Bit 0: Configuration Disabled (CD) (documented) * Bit 1: Configuration Valid (CV) (documented) * c2: Main Configuration register * Bit 0: Jazelle Enable (JE) (documented) * Bits 26-28: Unknown * Bit 29: If set, array object contains its elements directly, otherwise it contains a pointer to its elements * Bit 31: Disable array instructions if set? * c3: Array object layout register * Bits 0-7: Unknown * Bits 8-11: Offset (in words) within array object of first element or of pointer to first element * Bits 12-15: Offset (in words) within array object of length * Bit 16: If set, offset to length is subtracted, otherwise added * Bits 17-19: Array length shift value (number of elements = stored length >> this) * Bits 20-21: Disable array instructions if set? */ static uint32_t jazelle_exit_save; __attribute__((__naked__)) static int jazelle_exec_native(const void* bytecode, const void* block) { // inline asm parameters seems to be borking in GCC sooooo lets do it this way (void)bytecode; (void)block; (void)&jazelle_exit_save; asm volatile( "push {r4-r12,lr}\n" "mov lr, r0\n" // init handler table pointer and stack pointer "mov r5, r1\n" "add r6, r5, #0x800\n" "add r7, r5, #0x900\n" // "r8: Pointer to constant pool? (haven't checked this yet)" -Hackspire // libjz contradicts this... // set configuration valid & jazelle enable bits #if !defined(__linux__) || defined(__KERNEL__) "mov r0, #2\n" "mcr p14, 7, r0, c1, c0, 0\n" #endif "mov r0, #1\n" "mcr p14, 7, r0, c2, c0, 0\n" // apparently there's no good way to find the exit point from jazelle, // so we're going to hack that into the stuff now "ldr r12, =jazelle_exit_save\n" "adr r0, .Ljazend\n" "str r0, [r12]\n" // switch to jazelle mode "adr r12, .Lno_jazelle\n" "bxj r12\n" ".Ljazend:\n" "mov r0, #0\n" "b .Lend\n" ".Lno_jazelle:\n" "mov r0, #1\n" ".Lend:\n" "mov r5, #0\n" #ifndef __linux__ "mcr p14, 7, %r5, c1, c0, 0\n" "mcr p14, 7, %r5, c2, c0, 0\n" #endif "pop {r4-r12,lr}\n" "bx lr\n" ".pool\n" ); } extern uint32_t backup_r5; uint32_t backup_r5 = 0; __attribute__((__naked__)) static void handler_0xfe(void) { (void)&backup_r5; asm volatile( "ldr r0, =backup_r5\n" "str r5, [r0]\n" // return to jazelle (yes lr has to be incremented otherwise the // current instruction keeps getting executed in a loop) "add lr, #1\n" "bxj r12\n" // FIXME: r12 can be modified by jazelle so it should be restored to something //: //:[br5]"m"(backup_r5) ".pool\n" ); } __attribute__((__naked__)) static void handler_idiv(void) { // r0 = 3 // r1 = 4 // r2 = 2 // editing the above has no effect (not on ARM9, nor on ARM11) // NOTE: these depend on the stack content, i.e. it's not a moving register // window. pushes happen in the following order: r0,r1,r2,r3 // TODO: when the 'register stack cache' is full, what happens? does // it loop or does it act in a FIFO way, moving r0<-r1<-r2<-r3? // how can the fillrate be known??? (libjz says r5 & 3 but i // dont see anything like that, maybe its an ARM11 thing?) // TODO: Hackspire also says something about the low bits of r5, not // seeing this on ARM9 nor ARM11... // "r4: Copy of local variable 0. Only valid when local variable 0 is a // single word (i.e. int, float, or Object; not long or double)" // -Hackspire // // [r6-4] is stack top (2) // [r6-8] is 4 // etc // use the above to manipulate the stack, eg. "iadd" is implemented as: // - add r1, r2 // or equivalently, read from stack i guess // - str r1, [r6, #-8] // store to the place where it will be read // - sub r6, #4 // pop off & discard stack top element // NOTE: this input usage (with r1 and r2) is NOT robust at all, use memory // reads instead! asm volatile( // FIXME: read out stack contents in a better way "add r1, r2\n" "str r1, [r6,#-8]\n" "sub r6, #4\n" // return to jazelle (yes lr has to be incremented otherwise the // current instruction keeps getting executed in a loop) "add lr, #1\n" "bxj r12\n" // FIXME: r12 can be modified by jazelle so it should be restored to something ); } __attribute__((__naked__)) static void handler_ireturn(void) { int result; asm volatile( "ldr %[res], [r6, #-4]!\n" :[res]"=r"(result) ); iprintf("result=%d\r\n", result); // FIXME: save & restore r0-r3 if ret implemented properly // get back to original code // TODO: later stage: get back to previous bytecode stuff asm volatile( "ldr r12, %[exsav]\n" "bx r12\n" : :[exsav]"m"(jazelle_exit_save) :"r12" ); } static int was_exec = -1; __attribute__((__naked__)) static void handler_wasexec() { asm volatile( // set was_exec flag "mov r0, #1\n" "str r0, %[we]\n" // get back to original stuff "ldr r12, %[exsav]\n" "bx r12" : :[exsav]"m"(jazelle_exit_save) ,[we]"m"(was_exec) :"r0","r12" ); } __attribute__((__naked__)) static void handler_noexec() { asm volatile( // get back to original stuff "ldr r12, %[exsav]\n" "bx r12" : :[exsav]"m"(jazelle_exit_save) :"r0","r12" ); } static uint8_t bytecode_testh_base[] = { 0x06, 0x06, 0x07, 0x07, 0x05, 0x05, 0x04, 0x04, 0x6C, // +8 0x00, 0x00, 0x00, 0x00, // up to 4 nops of argument bytes eg for invokeXYZ 0x00, 0x00, 0x00, 0x00, // also needs to be modified to make some insns work 0xBA, // +17 // invokedynamic, complex enough to never be implemented in hw 0x00, 0x00, 0x00, 0x00, // up to 4 nops of argument bytes eg for invokeXYZ }; static void jazelle_test_handlers(uint8_t hflags[256/8]) { #ifdef __linux__ #ifdef __KERNEL__ #error "TODO: kernel: alloc rwx page" #else uint8_t* bytecode_testh = mmap(NULL, 4096, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (!bytecode_testh || !~(uint32_t)bytecode_testh) { printf("Can't map a page RWX, need it for instruction enumeration.\n"); return; } memcpy(bytecode_testh, bytecode_testh_base, sizeof bytecode_testh_base); #endif #else #define bytecode_testh bytecode_testh_base #endif memset(hflags, 0, 256/8); for (int i = 0x00; i < 0xff /* bytecode 0xff is hardwired to bkpt #0 */; ++i) { if (i == 0xba) { hflags[i>>3]|=(1<<(i&7)); continue; // yeah } #ifdef __linux__ if (i == 0x2e) continue; // iaload, will do a random data load. avoid this in non baremetal contexts if (i >= 0x2f && i <= 0x35) continue; // laload, faload, daload, aaload, baload, caload, saload if (i >= 0x4f && i <= 0x56 #if __ARM_ARCH >= 6 && i != 0x53 #endif ) continue; // [ilfdabcs]astore if (i == 0xbe) continue; // arraylength #endif bytecode_testh[8] = i; memset(&bytecode_testh[9], 0, 8); switch (i) { // need offset fixups for some instructions case 0xa7: // goto case 0xa8: // jsr bytecode_testh[10] = 3; break; case 0xa9: // ret bytecode_testh[9] = 1; break; case 0xc8: // goto_w case 0xc9: // jsr_w bytecode_testh[12] = 3; break; default: break; } memset(&jazelle_block, 0, sizeof jazelle_block); // initialize local 1 for a return address for the 'ret' opcode uint32_t retval = (uint32_t)&bytecode_testh[17]; jazelle_block.locals[4] = retval >> 0; jazelle_block.locals[5] = retval >> 8; jazelle_block.locals[6] = retval >>16; jazelle_block.locals[7] = retval >>24; DC_FlushAll(); //DC_InvalidateAll(); IC_InvalidateAll(); jazelle_block.handlers[i] = handler_wasexec; jazelle_block.handlers[0xba] = handler_noexec; //iprintf("bc 0x%02x\r\n", i); was_exec = 0; jazelle_exec_native(bytecode_testh, &jazelle_block); if (was_exec == 1) { hflags[i>>3]|=(1<<(i&7)); //iprintf("bytecode 0x%02x: uses handler\r\n", i); } else { //iprintf("bytecode 0x%02x: hw\r\n", i); } jazelle_block.handlers[i] = NULL; } #ifdef __linux__ #ifdef __KERNEL__ #error "TODO: kernel: alloc rwx page" #else munmap(bytecode_testh, 4096); #endif #else #undef bytecode_testh #endif } // https://github.com/SonoSooS/libjz/wiki/Java-instruction-set __attribute__((__section__(".text"), __align__(4))) static uint8_t bytecode_test1[] = { 0x06, // iconst_3 0x07, // iconst_4 0x05, // iconst_2 // FIXME: if we put 0xFE here instead, the end result is 11 on RPi. why? 0x6C, // idiv 0xFE, // ??? (r5 readout) 0x04, // iconst_1 0x60, // iadd 0x60, // iadd 0xAC, // ireturn 0x00, 0x00, 0x00 // alignment to keep gcc happy }; // returns 6 if idiv implemented natively, else 10 static int jazelle_exec(const uint8_t* bytecode) { jazelle_block.handlers[0x6C] = handler_idiv; jazelle_block.handlers[0xAC] = handler_ireturn; jazelle_block.handlers[0xFE] = handler_0xfe; /* * +000-3FF: Unhandled bytecodes * The stack is flushed to memory before calling any of these handlers, so * they may modify r0-r3 freely * +400: Null pointer exception * +404: Array index out of bounds exception * +40C: Jazelle mode entered with JE = 0 * +410: Configuration invalid (Jazelle mode entered with CV = 0) * CV is automatically set to 1 on entering this handler * +414: Prefetch abort occurred in middle of instruction * * -Hackspire */ const void* block = &jazelle_block; return jazelle_exec_native(bytecode, block); } void jazelle_main(void) { uint32_t aid = arm_get_id(); uint32_t jid = jazelle_get_id(); iprintf("hello world! ARM coreID=0x%lx jazelle ID=0x%lx\r\n", aid, jid); if (jid == 0) return; int r = jazelle_exec(bytecode_test1); iprintf("retcode=%d; r5 was 0x%08lx\r\n", r, backup_r5); if (r == 0) { static uint8_t hflags[256/8]; jazelle_test_handlers(hflags); iprintf("bytecode IDs that use a handler:\r\n\t"); for (int i = 0, has = 0; i < 0x100; ++i) { if (hflags[i>>3]&(1<<(i&7))) { iprintf("%s0x%02x", ((has&15) ? ", " : ""), i); ++has; if ((has & 15) == 0) iprintf("\r\n\t"); } } printf("\r\n"); } }