From 1fe6850ed79bca76e1f8a3ba5e59de56ab2e979e Mon Sep 17 00:00:00 2001 From: sys64738 Date: Sun, 10 Apr 2022 19:11:45 +0200 Subject: [PATCH] working pc/sr modifier, but that doesn't work in the bsl, uuugh --- README.md | 12 +++--- logtracer.py | 96 +++++++++++++++++++++++++++------------------- src/main.c | 105 +++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 148 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index 2d93d3a..37b4795 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,9 @@ described near the end, the article is quite large. 1. Arbitrary code in the BSL region cannot be jumped to from user code, the CPU execution path has to go through the Z-area. Doing this will cause an infinite loop or a reset. +1. Even when returning from an interrupt serviced during BSL execution, it is + not possible to return from this interrupt directly back to BSL code, as + this counts as a jump-to-arbitrary-BSL-location. ## Vulnerabilities of the BSL against a readout attack @@ -81,17 +84,12 @@ described near the end, the article is quite large. 1. The BSL execution is allowed to be interrutped, thus the instruction flow can be traced by dumping CPU register values throughout the BSL execution. This allows for finding arbitrary read gadgets. -1. Interrupts can also be used to change any register value while the BSL is - executing, even at a specific point in time. This can be used to skip over - certain instructions during analysis, for example. ## Vulnerabilities of the BSL against use as a source of ROP gadgets 1. The routine at `0x1002` returns quickly, *as indicatd in SLAU550AA*. Therefore, it can be used as an easy ROP entrypoint. This bypasses the "only call code from the Z-area" limitation. -1. Interrutps can be used to change return addresses etc., to jump to arbitrary - locations inside the BSL. 1. Potentially, DMA transfers can also be used to change the stack contents, including return addresses, while the BSL is executing. @@ -112,7 +110,9 @@ described near the end, the article is quite large. own Z-area (also at the beginning, also 8 bytes in size). It has three entrypoints, the fourth is an infinite loop. (`0x3c00..0x3fff` looks like the same type of execute-only memory at first, but actually contains nothing, - at least not according to the techniques used here.) + at least not according to the techniques used here.) The first, documented + BSL region cannot access the second region directly, it must also go through + the corresponding Z-area. ## What has not been checked diff --git a/logtracer.py b/logtracer.py index 1754bc3..d5b772c 100755 --- a/logtracer.py +++ b/logtracer.py @@ -42,52 +42,70 @@ class Rec(NamedTuple): TIMEOUT = 60*60*8 with serial.Serial(sys.argv[1] if len(sys.argv) > 1 else "/dev/ttyACM1", 9600, timeout=TIMEOUT) as ser: - with open("mspbsl.log", "w") as log: + with open("mspbsl.log", "w", buffering=1) as log: recs = [] while True: - l = ser.readline().strip() - if b"hello world!" in l: - break - - while True: - l = [] - while len(l) == 0: - l = ser.readline().strip().decode('utf-8') - if len(l) == 0: continue - - ticks = int(l[2:]) - - pcspsr = ser.readline().strip().decode('utf-8').split() - assert len(pcspsr) == 3 - r4to15 = ser.readline().strip().decode('utf-8').split() - assert len(r4to15) == 12 - stack = ser.readline().strip().decode('utf-8').split() - assert len(stack) == 16 - - pc = int(pcspsr[0], 16) - sp = int(pcspsr[1], 16) - sr = int(pcspsr[2], 16) - gp = [int(r4to15[i],16) for i in range(12)] - stack = [int(x,16) for x in stack] - - delta = -1 - if len(recs) > 0: - rec_ = Rec(ticks, delta, pc, sp, sr, gp, stack) - for i in range(2, min(15, len(recs))): - if recs[-i] == recs[-1]: - continue - delta = ticks - recs[-i].cycle - 1 + # wait for "hello world" + while True: + l = ser.readline().strip() + if b"hello world!" in l: break - rec = Rec(ticks, delta, pc, sp, sr, gp, stack) + # start tracing + while True: + try: + l = [] + while len(l) == 0: + l = ser.readline().strip().decode('utf-8') + if len(l) == 0: continue - if len(recs) == 0 or rec != recs[-1]: - lstr = '-----\n%s' % str(rec) - log.write(lstr+'\n') - print(lstr) + if "hello world" in l: + print("no!") + break # start over + #if l[0:1] != "- ": + # print("no") + # #assert False, l + # break + ticks = int(l[2:]) - recs.append(rec) + pcspsr = ser.readline().strip().decode('utf-8').split() + if pcspsr == ["hello", "world!"]: continue + assert len(pcspsr) == 3, pcspsr + r4to15 = ser.readline().strip().decode('utf-8').split() + if pcspsr == ["hello", "world!"]: continue + assert len(r4to15) == 12, r4to15 + stack = ser.readline().strip().decode('utf-8').split() + if pcspsr == ["hello", "world!"]: continue + assert len(stack) == 16, stack + + pc = int(pcspsr[0], 16) + sp = int(pcspsr[1], 16) + sr = int(pcspsr[2], 16) + gp = [int(r4to15[i],16) for i in range(12)] + stack = [int(x,16) for x in stack] + + delta = -1 + if len(recs) > 0: + rec_ = Rec(ticks, delta, pc, sp, sr, gp, stack) + for i in range(2, min(15, len(recs))): + if recs[-i] == recs[-1]: + continue + delta = ticks - recs[-i].cycle - 1 + break + + rec = Rec(ticks, delta, pc, sp, sr, gp, stack) + + if len(recs) == 0 or rec != recs[-1]: + lstr = '-----\n%s' % str(rec) + log.write(lstr+'\n') + log.flush() + print(lstr) + + recs.append(rec) + except UnicodeDecodeError: pass # just continue from the next one + + print("\n"*4) """ ticks: 399 diff --git a/src/main.c b/src/main.c index 675d399..721277d 100644 --- a/src/main.c +++ b/src/main.c @@ -37,8 +37,12 @@ __attribute__((__persistent__)) // put in FRAM (BSL clears RAM) static uint32_t regbak[16]={0}; __attribute__((__persistent__)) static uint8_t stackbak[16]={0}; -__attribute__((__persistent__)) -static uint16_t curticks = 1; + +extern uint16_t curticks, curticks_; +__attribute__((__persistent__, __used__)) +uint16_t curticks = 1; +__attribute__((__persistent__, __used__)) +uint16_t curticks_ = 1; extern uint16_t done_irq; __attribute__((__persistent__, __used__)) @@ -46,22 +50,25 @@ uint16_t done_irq = 0; extern uint16_t curaddr; -__attribute__((__persistent__)) +__attribute__((__persistent__, __used__)) uint16_t curaddr = 0x1000; -extern uint16_t traceaddr; +/*extern uint16_t traceaddr; __attribute__((__persistent__)) -uint16_t traceaddr = 0x1000; // changeme +uint16_t traceaddr = 0x1000; // changeme*/ typedef void (*bsl_fn)(void); #define START_HARD 1/*36000*/ /*#define START_SOFT 36990*/ #define END_CYC 0xffffu +/*#define SKIP_CYC_OFF 5*/ // DUMP_MODE == 0 => insn trace mode -#define DUMP_MODE 0 +#define DUMP_MODE 1 #define DUMP_CYC_OFF 14 +#define DUMP_ADDR_START 0x1000 +#define DUMP_ADDR_END 0x1800 void do_trace(void); __attribute__((__no_inline__)) void do_trace(void) { @@ -72,12 +79,23 @@ __attribute__((__no_inline__)) void do_trace(void) { // init timer TA0 __bic_SR_register(GIE); #if DUMP_MODE - traceaddr = 0x1002; + //traceaddr = 0x1002; TA1CCR0 = DUMP_CYC_OFF; #else - done_irq = 0; - TA1CCR0 = curticks; +#ifdef SKIP_CYC_OFF + if (curticks > SKIP_CYC_OFF) { + done_irq = 0; + TA1CCR0 = SKIP_CYC_OFF; + curticks_ = curticks - SKIP_CYC_OFF /*+ 2*/; + } else #endif + { + done_irq = 1; + TA1CCR0 = curticks; + } +#endif + //TA1CCTL0 |= CCIFG; + //TA1CCTL0 &= ~(CCIE|CCIFG); TA1CCTL0 = CCIE; // exec bsl asm volatile( @@ -104,6 +122,35 @@ __attribute__((__no_inline__)) void do_trace(void) { "mov.w #0x0216, TA1CTL\n" "eint\n" "call #0x1002\n" // CHANGEME (address to trace insn flow of) + /*"nop\n" + "nop\n" + "nop\n" + "nop\n" + "nop\n" + "nop\n" + //"mov.w #0x1337, r8\n" + "dint\nnop\n" + "add.w #-1, r4\n" + "add.w #1, r5\n" + "add.w #2, r6\n" + "add.w #4, r7\n" + "add.w #8, r8\n" + "add.w #-1, r4\n" + "add.w #1, r5\n" + "add.w #2, r6\n" + "add.w #4, r7\n" + "add.w #8, r8\n" + "add.w #-1, r4\n" + "add.w #1, r5\n" + "add.w #2, r6\n" + "add.w #4, r7\n" + "add.w #8, r8\n" + "add.w #-1, r4\n" + "add.w #1, r5\n" + "add.w #2, r6\n" + "add.w #4, r7\n" + "add.w #8, r8\n" + "1: jmp 1b\n"*/ ); //while (1) ; __builtin_unreachable(); @@ -112,6 +159,7 @@ __attribute__((__no_inline__)) void do_trace(void) { void do_collect(uint16_t* sp); __attribute__((__used__, __no_inline__)) void do_collect(uint16_t* sp) { + //P1OUT=0; TA1CTL &= ~(uint16_t)(TAIE|MC__UP); // 0x1bc2/4 and 0x1bd6/8 contain a bic #GIE, sr instruction! these should be 2 bytes in size @@ -177,7 +225,7 @@ void do_collect(uint16_t* sp) { next_iter: #if DUMP_MODE - if (curaddr == 0x1800) while(1); // start of info mem + if (curaddr == DUMP_ADDR_END) while(1); // start of info mem curaddr += 4; #else if (curticks == END_CYC) while(1); @@ -201,26 +249,33 @@ void Timer_A1_ISR(void) { "mov.a #(__stack-8), sp\n" #endif -/*#if !DUMP_MODE +#if !DUMP_MODE && defined(SKIP_CYC_OFF) // do some hackery - "bit.w #0, done_irq\n" - "jnz .Lregular\n" + "cmp.w #0, done_irq\n" + "jne .Lregular\n" + + //"add.w #4, P1OUT\n" // skip pc forward by 4 "add.w #4, 2(sp)\n" - // set GIE in sr + // set/force GIE in sr "bis.w #8, 0(sp)\n" - // set timer counter for next IRQ for trace - "mov.w #1, TA1CCR0\n" - // restart timer - "bis.w #4, TA1CTL\n" // done it now "mov.w #1, done_irq\n" + // set timer counter for next IRQ for trace + "mov.w curticks_, TA1CCR0\n" + // clear TAIE, TAIFG, MC + "bic.w #0x0033, TA1CTL\n" + // clear irq flag, enable compare irq + "mov.w #16, TA1CCTL0\n" + // restart timer + "mov.w #0x0216, TA1CTL\n" // continue as usual "reti\n" -#endif*/ +#endif ".Lregular:\n" + //"add.w #8, P1OUT\n" "pushm.a #12, r15\n" "mov.a sp, r12\n" "call #do_collect\n" @@ -228,10 +283,20 @@ void Timer_A1_ISR(void) { "reti\n" ); } + +__attribute__((__interrupt__(UNMI_VECTOR))) +void NMI_ISR(void) { + SFRIFG1 &= ~NMIIE; + SYSUNIV = 0; + ++P1OUT; +} + int main(void) { setup_io(); setup_clocks(); stdio_msp_init(); + SFRIE1 = NMIIE; + SFRRPCR = SYSRSTRE__ENABLE | SYSRSTUP__PULLUP | SYSNMIIES__FALLING | SYSNMI__NMI; memset(regbak, 0, sizeof regbak); @@ -239,7 +304,7 @@ int main(void) { #if DUMP_MODE curticks = DUMP_CYC_OFF; - curaddr = 0x1000; + curaddr = DUMP_ADDR_START; #else curticks = START_HARD; #endif