dumper code that works!

This commit is contained in:
Triss 2022-04-09 21:55:52 +02:00
parent b43120968d
commit cb7386bc6f
3 changed files with 149 additions and 33 deletions

View File

@ -3,9 +3,9 @@
Tools to try to dump the MSP430FR BSL, mainly targetting the [MSP430FR5994
](https://www.ti.com/product/MSP430FR5994) (on an MSP-EXP430FR5994 devboard).
## How
## The idea
Mhe MSP430FR bootloader ('BSL') resides at 0x1000, this memory cannot be read,
The MSP430FR bootloader ('BSL') resides at 0x1000, this memory cannot be read,
and user code can only jump to 0x1000 or 0x1002 to run certian functions of the
BSL. Though, it is very likely that when the CPU is running from inside this
memory region, it can access this memory as data, because that's often needed
@ -52,14 +52,52 @@ The "use DMA to get ROP" trick comes from [here
](https://hexkyz.blogspot.com/2021/11/je-ne-sais-quoi-falcons-over-horizon.html),
described near the end, the article is quite large.
## What has been found
## What has been implemented correctly
1. `0x1000` is a jump to `0x1014`.
1. `0x1002` is a jump to `0x1028`.
1. The other code in the Z-Area are infinite loops (to itself, instruction `ff 3f`).
1. Memory in the BSL region cannot be read using data accesses from user code.
Reads come back as `3f ff`, which decodes as an infinite loop.
1. Arbitrary code in the BSL region cannot be jumped to from user code, the
CPU execution path has to go through the Z-area. Doing this will cause an
infinite loop or a reset.
### 0x1014 (BSL_main())
## Vulnerabilities of the BSL against a readout attack
1. `0x1014` sets `sp` to `0x3c00`.
1. It then calls `0x16fa` which fills a lot of RAM
1. When the CPU is executing the BSL, it can perform data accesses to other BSL
areas. Thus, if an arbitrary read gadget is found, it can be used to dump
the entire BSL region.
1. The routine at `0x1002` provides such a gadget, *as indicatd in SLAU550AA*.
1. The BSL execution is allowed to be interrutped, thus the instruction flow
can be traced by dumping CPU register values throughout the BSL execution.
This allows for finding arbitrary read gadgets.
1. Interrupts can also be used to change any register value while the BSL is
executing, even at a specific point in time. This can be used to skip over
certain instructions during analysis, for example.
## Vulnerabilities of the BSL against use as a source of ROP gadgets
1. The routine at `0x1002` returns quickly, *as indicatd in SLAU550AA*.
Therefore, it can be used as an easy ROP entrypoint. This bypasses the "only
call code from the Z-area" limitation.
1. Interrutps can be used to change return addresses etc., to jump to arbitrary
locations inside the BSL.
1. Potentially, DMA transfers can also be used to change the stack contents,
including return addresses, while the BSL is executing.
## Inaccurracies of the datasheets
1. The BSL clears all RAM from `0x1C00` to `0x3FC7`, not just `0x1C00` to
`0x1FFF`.
1. The BSL also clears Tiny RAM and some "reserved" low addresses, from `6` to
`0x1F`.
1. The BSL sets up Timer A, while the datasheet only mentions Timer B usage in
*other* BSLs, and nothing about this one.
## What has not been checked
1. Pipelining: can code running at `0x0FFE` (or a similar address) access the
BSL memory, (mis)using the possibility that the effective value of `pc`
might differ from the executed address due to pipelining effects? (cf.
MerryMage's GBA BIOS dump)
1. DMA: can a DMA transfer be used to change the stack contents during BSL
execution? (Most likely, just like interrupts can, I simply haven't checked.)

View File

@ -7,6 +7,7 @@ from typing import *
class Rec(NamedTuple):
cycle: int
delta: int
pc: int
sp: int
sr: int
@ -14,7 +15,7 @@ class Rec(NamedTuple):
stack: List[int]
def __str__(self):
a = "cycle: %d" % self.cycle
a = "cycle: %d (%d)" % (self.cycle, self.delta)
b = "pc = %05x sp = %05x sr = %03x" % (self.pc, self.sp, self.sr)
gp1 = " ".join(("%sr%d: %05x" % ((" " if d < 6 else ""), d+4, self.gp[d])) for d in range(6))
gp2 = " ".join(("%sr%d: %05x" % ((" " if d < 6 else ""), d+4, self.gp[d])) for d in range(6,12))
@ -39,7 +40,8 @@ class Rec(NamedTuple):
return False
with serial.Serial(sys.argv[1] if len(sys.argv) > 1 else "/dev/ttyACM1", 9600, timeout=120) as ser:
TIMEOUT = 60*60*8
with serial.Serial(sys.argv[1] if len(sys.argv) > 1 else "/dev/ttyACM1", 9600, timeout=TIMEOUT) as ser:
with open("mspbsl.log", "w") as log:
recs = []
@ -69,11 +71,20 @@ with serial.Serial(sys.argv[1] if len(sys.argv) > 1 else "/dev/ttyACM1", 9600, t
gp = [int(r4to15[i],16) for i in range(12)]
stack = [int(x,16) for x in stack]
rec = Rec(ticks, pc, sp, sr, gp, stack)
delta = -1
if len(recs) > 1:
rec_ = Rec(ticks, delta, pc, sp, sr, gp, stack)
for i in range(2, min(15, len(recs))):
if recs[-i] == recs[-1]:
continue
delta = ticks - recs[-i].cycle - 1
break
rec = Rec(ticks, delta, pc, sp, sr, gp, stack)
if len(recs) == 0 or rec != recs[-1]:
lstr = '-----\n%s' % str(rec)
log.write(lstr)
log.write(lstr+'\n')
print(lstr)
recs.append(rec)

View File

@ -34,14 +34,31 @@ static void setup_io(void) {
// ---
__attribute__((__persistent__)) // put in FRAM (BSL clears RAM)
static uint32_t regbak[16];
static uint32_t regbak[16]={0};
__attribute__((__persistent__))
static uint8_t stackbak[16];
static uint8_t stackbak[16]={0};
__attribute__((__persistent__))
static uint16_t curticks = 1;
extern uint16_t curaddr;
__attribute__((__persistent__))
uint16_t curaddr = 0x1000;
extern uint16_t traceaddr;
__attribute__((__persistent__))
uint16_t traceaddr = 0x1000; // changeme
typedef void (*bsl_fn)(void);
#define START_HARD 1/*36000*/
/*#define START_SOFT 36990*/
#define END_CYC 0xffffu
// DUMP_MODE == 0 => insn trace mode
#define DUMP_MODE 1
#define DUMP_CYC_OFF 14
void do_trace(void);
__attribute__((__no_inline__)) void do_trace(void) {
// TODO: chain 2 timers for 32 bit tick number
@ -50,27 +67,38 @@ __attribute__((__no_inline__)) void do_trace(void) {
// init timer TA0
__bic_SR_register(GIE);
#ifdef DUMP_MODE
traceaddr = 0x1002;
TA1CCR0 = DUMP_CYC_OFF;
#else
TA1CCR0 = curticks;
#endif
TA1CCTL0 = CCIE;
// exec bsl
asm volatile(
"mov.a #(__stack-8), sp\n"
"mov.w #0x0000, r4\n"
"mov.w #0x0000, r5\n"
"mov.w #0x0000, r6\n"
"mov.w #0x0000, r7\n"
"mov.w #0x0000, r8\n"
"mov.w #0x0000, r9\n"
"mov.w #0x0000, r10\n"
"mov.w #0x0000, r11\n"
"mov.w #0x0000, r12\n"
"mov.w #0x0000, r13\n"
"mov.w #0x0000, r14\n"
"mov.w #0x0000, r15\n"
"mov.w #0xaaaa, r4\n"
"mov.w #0xaaaa, r5\n"
"mov.w #0xaaaa, r6\n"
"mov.w #0xaaaa, r7\n"
"mov.w #0xaaaa, r8\n"
"mov.w #0xaaaa, r9\n"
"mov.w #0xaaaa, r10\n"
"mov.w #0xaaaa, r11\n"
"mov.w #0x0002, r12\n"
"mov.w #0xdead, r13\n"
"mov.w #0xbeef, r14\n"
"mov.w #0xaaaa, r15\n"
#ifdef DUMP_MODE
// extra 0x1002 magic
"mov.w curaddr, sp\n"
#endif
//TA1CTL = TASSEL__SMCLK | ID__1 | MC__UP | TACLR | TAIE;
"mov.w #0x0216, TA1CTL\n"
"eint\n"
"call #0x1000\n"
"call #0x1002\n" // CHANGEME (address to trace insn flow of)
);
//while (1) ;
__builtin_unreachable();
@ -81,7 +109,24 @@ __attribute__((__used__, __no_inline__))
void do_collect(uint16_t* sp) {
TA1CTL &= ~(uint16_t)(TAIE|MC__UP);
//if (curticks < 0x2380) goto next_iter;
#if DUMP_MODE
uint16_t v1 = sp[2*(12-4)];
uint16_t v2 = sp[2*(13-4)];
if (!(curaddr & 0xf)) {
iprintf("%04x: ", curaddr);
}
iprintf("%02x %02x %02x %02x ",
v1 & 0xff, (v1 >> 8) & 0xff,
v2 & 0xff, (v2 >> 8) & 0xff
);
if ((curaddr & 0xf) == 0xc) {
iprintf("\r\n");
}
#else
#ifdef START_SOFT
if (curticks < START_SOFT) goto next_iter;
#endif
// general purpose registers
for (int i = 0; i < 12; ++i) {
@ -115,20 +160,29 @@ __attribute__((__used__, __no_inline__))
stackbak[8], stackbak[9], stackbak[10], stackbak[11],
stackbak[12], stackbak[13], stackbak[14], stackbak[15]);
//fflush(stdout);
#endif
next_iter:
if (curticks == 0xffff)while(1);
#if DUMP_MODE
if (curaddr == 0x1800) while(1); // start of info mem
curaddr += 4;
#else
if (curticks == END_CYC) while(1);
++curticks;
//while (1) ;
#endif
do_trace();
__builtin_unreachable();
}
__attribute__((__interrupt__(TIMER1_A0_VECTOR), __naked__))
void Timer_A0_ISR(void) {
void Timer_A1_ISR(void) {
asm volatile(
".extern do_collect\n"
//"bis #0x3, P1OUT\n"
#if DUMP_MODE
"mov.a #(__stack-8), sp\n"
#endif
"pushm.a #12, r15\n"
"mov.a sp, r12\n"
"call #do_collect\n"
@ -145,7 +199,20 @@ int main(void) {
__bis_SR_register(GIE); // enable irq
curticks = 1;
#if DUMP_MODE
curticks = DUMP_CYC_OFF;
curaddr = 0x1000;
#else
curticks = START_HARD;
#endif
/*uint8_t zarea[8];
memcpy(zarea, (void*)0x1000, 8);
iprintf("zarea: %02x %02x %02x %02x %02x %02x %02x %02x\r\n",
zarea[0], zarea[1], zarea[2], zarea[3],
zarea[4], zarea[5], zarea[6], zarea[7]
);*/
puts("hello world!\r\n");
do_trace();