From cb7386bc6f9a4e1505c28573fed5faa262429d98 Mon Sep 17 00:00:00 2001 From: sys64738 Date: Sat, 9 Apr 2022 21:55:52 +0200 Subject: [PATCH] dumper code that works! --- README.md | 56 ++++++++++++++++++++++----- logtracer.py | 19 +++++++-- src/main.c | 107 +++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 149 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 3fc3bb5..fc10833 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,9 @@ Tools to try to dump the MSP430FR BSL, mainly targetting the [MSP430FR5994 ](https://www.ti.com/product/MSP430FR5994) (on an MSP-EXP430FR5994 devboard). -## How +## The idea -Mhe MSP430FR bootloader ('BSL') resides at 0x1000, this memory cannot be read, +The MSP430FR bootloader ('BSL') resides at 0x1000, this memory cannot be read, and user code can only jump to 0x1000 or 0x1002 to run certian functions of the BSL. Though, it is very likely that when the CPU is running from inside this memory region, it can access this memory as data, because that's often needed @@ -52,14 +52,52 @@ The "use DMA to get ROP" trick comes from [here ](https://hexkyz.blogspot.com/2021/11/je-ne-sais-quoi-falcons-over-horizon.html), described near the end, the article is quite large. -## What has been found +## What has been implemented correctly -1. `0x1000` is a jump to `0x1014`. -1. `0x1002` is a jump to `0x1028`. -1. The other code in the Z-Area are infinite loops (to itself, instruction `ff 3f`). +1. Memory in the BSL region cannot be read using data accesses from user code. + Reads come back as `3f ff`, which decodes as an infinite loop. +1. Arbitrary code in the BSL region cannot be jumped to from user code, the + CPU execution path has to go through the Z-area. Doing this will cause an + infinite loop or a reset. -### 0x1014 (BSL_main()) +## Vulnerabilities of the BSL against a readout attack -1. `0x1014` sets `sp` to `0x3c00`. -1. It then calls `0x16fa` which fills a lot of RAM +1. When the CPU is executing the BSL, it can perform data accesses to other BSL + areas. Thus, if an arbitrary read gadget is found, it can be used to dump + the entire BSL region. +1. The routine at `0x1002` provides such a gadget, *as indicatd in SLAU550AA*. +1. The BSL execution is allowed to be interrutped, thus the instruction flow + can be traced by dumping CPU register values throughout the BSL execution. + This allows for finding arbitrary read gadgets. +1. Interrupts can also be used to change any register value while the BSL is + executing, even at a specific point in time. This can be used to skip over + certain instructions during analysis, for example. + +## Vulnerabilities of the BSL against use as a source of ROP gadgets + +1. The routine at `0x1002` returns quickly, *as indicatd in SLAU550AA*. + Therefore, it can be used as an easy ROP entrypoint. This bypasses the "only + call code from the Z-area" limitation. +1. Interrutps can be used to change return addresses etc., to jump to arbitrary + locations inside the BSL. +1. Potentially, DMA transfers can also be used to change the stack contents, + including return addresses, while the BSL is executing. + +## Inaccurracies of the datasheets + +1. The BSL clears all RAM from `0x1C00` to `0x3FC7`, not just `0x1C00` to + `0x1FFF`. +1. The BSL also clears Tiny RAM and some "reserved" low addresses, from `6` to + `0x1F`. +1. The BSL sets up Timer A, while the datasheet only mentions Timer B usage in + *other* BSLs, and nothing about this one. + +## What has not been checked + +1. Pipelining: can code running at `0x0FFE` (or a similar address) access the + BSL memory, (mis)using the possibility that the effective value of `pc` + might differ from the executed address due to pipelining effects? (cf. + MerryMage's GBA BIOS dump) +1. DMA: can a DMA transfer be used to change the stack contents during BSL + execution? (Most likely, just like interrupts can, I simply haven't checked.) diff --git a/logtracer.py b/logtracer.py index 412a47f..01af7b4 100755 --- a/logtracer.py +++ b/logtracer.py @@ -7,6 +7,7 @@ from typing import * class Rec(NamedTuple): cycle: int + delta: int pc: int sp: int sr: int @@ -14,7 +15,7 @@ class Rec(NamedTuple): stack: List[int] def __str__(self): - a = "cycle: %d" % self.cycle + a = "cycle: %d (%d)" % (self.cycle, self.delta) b = "pc = %05x sp = %05x sr = %03x" % (self.pc, self.sp, self.sr) gp1 = " ".join(("%sr%d: %05x" % ((" " if d < 6 else ""), d+4, self.gp[d])) for d in range(6)) gp2 = " ".join(("%sr%d: %05x" % ((" " if d < 6 else ""), d+4, self.gp[d])) for d in range(6,12)) @@ -39,7 +40,8 @@ class Rec(NamedTuple): return False -with serial.Serial(sys.argv[1] if len(sys.argv) > 1 else "/dev/ttyACM1", 9600, timeout=120) as ser: +TIMEOUT = 60*60*8 +with serial.Serial(sys.argv[1] if len(sys.argv) > 1 else "/dev/ttyACM1", 9600, timeout=TIMEOUT) as ser: with open("mspbsl.log", "w") as log: recs = [] @@ -69,11 +71,20 @@ with serial.Serial(sys.argv[1] if len(sys.argv) > 1 else "/dev/ttyACM1", 9600, t gp = [int(r4to15[i],16) for i in range(12)] stack = [int(x,16) for x in stack] - rec = Rec(ticks, pc, sp, sr, gp, stack) + delta = -1 + if len(recs) > 1: + rec_ = Rec(ticks, delta, pc, sp, sr, gp, stack) + for i in range(2, min(15, len(recs))): + if recs[-i] == recs[-1]: + continue + delta = ticks - recs[-i].cycle - 1 + break + + rec = Rec(ticks, delta, pc, sp, sr, gp, stack) if len(recs) == 0 or rec != recs[-1]: lstr = '-----\n%s' % str(rec) - log.write(lstr) + log.write(lstr+'\n') print(lstr) recs.append(rec) diff --git a/src/main.c b/src/main.c index 309778d..6cf707d 100644 --- a/src/main.c +++ b/src/main.c @@ -34,14 +34,31 @@ static void setup_io(void) { // --- __attribute__((__persistent__)) // put in FRAM (BSL clears RAM) -static uint32_t regbak[16]; +static uint32_t regbak[16]={0}; __attribute__((__persistent__)) -static uint8_t stackbak[16]; +static uint8_t stackbak[16]={0}; __attribute__((__persistent__)) static uint16_t curticks = 1; + +extern uint16_t curaddr; +__attribute__((__persistent__)) +uint16_t curaddr = 0x1000; + +extern uint16_t traceaddr; +__attribute__((__persistent__)) +uint16_t traceaddr = 0x1000; // changeme + typedef void (*bsl_fn)(void); +#define START_HARD 1/*36000*/ +/*#define START_SOFT 36990*/ +#define END_CYC 0xffffu + +// DUMP_MODE == 0 => insn trace mode +#define DUMP_MODE 1 +#define DUMP_CYC_OFF 14 + void do_trace(void); __attribute__((__no_inline__)) void do_trace(void) { // TODO: chain 2 timers for 32 bit tick number @@ -50,27 +67,38 @@ __attribute__((__no_inline__)) void do_trace(void) { // init timer TA0 __bic_SR_register(GIE); +#ifdef DUMP_MODE + traceaddr = 0x1002; + TA1CCR0 = DUMP_CYC_OFF; +#else TA1CCR0 = curticks; +#endif TA1CCTL0 = CCIE; // exec bsl asm volatile( "mov.a #(__stack-8), sp\n" - "mov.w #0x0000, r4\n" - "mov.w #0x0000, r5\n" - "mov.w #0x0000, r6\n" - "mov.w #0x0000, r7\n" - "mov.w #0x0000, r8\n" - "mov.w #0x0000, r9\n" - "mov.w #0x0000, r10\n" - "mov.w #0x0000, r11\n" - "mov.w #0x0000, r12\n" - "mov.w #0x0000, r13\n" - "mov.w #0x0000, r14\n" - "mov.w #0x0000, r15\n" + "mov.w #0xaaaa, r4\n" + "mov.w #0xaaaa, r5\n" + "mov.w #0xaaaa, r6\n" + "mov.w #0xaaaa, r7\n" + "mov.w #0xaaaa, r8\n" + "mov.w #0xaaaa, r9\n" + "mov.w #0xaaaa, r10\n" + "mov.w #0xaaaa, r11\n" + "mov.w #0x0002, r12\n" + "mov.w #0xdead, r13\n" + "mov.w #0xbeef, r14\n" + "mov.w #0xaaaa, r15\n" + +#ifdef DUMP_MODE + // extra 0x1002 magic + "mov.w curaddr, sp\n" +#endif + //TA1CTL = TASSEL__SMCLK | ID__1 | MC__UP | TACLR | TAIE; "mov.w #0x0216, TA1CTL\n" "eint\n" - "call #0x1000\n" + "call #0x1002\n" // CHANGEME (address to trace insn flow of) ); //while (1) ; __builtin_unreachable(); @@ -81,7 +109,24 @@ __attribute__((__used__, __no_inline__)) void do_collect(uint16_t* sp) { TA1CTL &= ~(uint16_t)(TAIE|MC__UP); - //if (curticks < 0x2380) goto next_iter; +#if DUMP_MODE + uint16_t v1 = sp[2*(12-4)]; + uint16_t v2 = sp[2*(13-4)]; + + if (!(curaddr & 0xf)) { + iprintf("%04x: ", curaddr); + } + iprintf("%02x %02x %02x %02x ", + v1 & 0xff, (v1 >> 8) & 0xff, + v2 & 0xff, (v2 >> 8) & 0xff + ); + if ((curaddr & 0xf) == 0xc) { + iprintf("\r\n"); + } +#else +#ifdef START_SOFT + if (curticks < START_SOFT) goto next_iter; +#endif // general purpose registers for (int i = 0; i < 12; ++i) { @@ -115,20 +160,29 @@ __attribute__((__used__, __no_inline__)) stackbak[8], stackbak[9], stackbak[10], stackbak[11], stackbak[12], stackbak[13], stackbak[14], stackbak[15]); //fflush(stdout); +#endif next_iter: - if (curticks == 0xffff)while(1); +#if DUMP_MODE + if (curaddr == 0x1800) while(1); // start of info mem + curaddr += 4; +#else + if (curticks == END_CYC) while(1); ++curticks; - //while (1) ; +#endif do_trace(); __builtin_unreachable(); } __attribute__((__interrupt__(TIMER1_A0_VECTOR), __naked__)) -void Timer_A0_ISR(void) { +void Timer_A1_ISR(void) { asm volatile( ".extern do_collect\n" + //"bis #0x3, P1OUT\n" +#if DUMP_MODE + "mov.a #(__stack-8), sp\n" +#endif "pushm.a #12, r15\n" "mov.a sp, r12\n" "call #do_collect\n" @@ -145,7 +199,20 @@ int main(void) { __bis_SR_register(GIE); // enable irq - curticks = 1; +#if DUMP_MODE + curticks = DUMP_CYC_OFF; + curaddr = 0x1000; +#else + curticks = START_HARD; +#endif + + /*uint8_t zarea[8]; + memcpy(zarea, (void*)0x1000, 8); + iprintf("zarea: %02x %02x %02x %02x %02x %02x %02x %02x\r\n", + zarea[0], zarea[1], zarea[2], zarea[3], + zarea[4], zarea[5], zarea[6], zarea[7] + );*/ + puts("hello world!\r\n"); do_trace();