working pc/sr modifier, but that doesn't work in the bsl, uuugh

This commit is contained in:
Triss 2022-04-10 19:11:45 +02:00
parent 59fa779864
commit 1fe6850ed7
3 changed files with 148 additions and 65 deletions

View File

@ -70,6 +70,9 @@ described near the end, the article is quite large.
1. Arbitrary code in the BSL region cannot be jumped to from user code, the
CPU execution path has to go through the Z-area. Doing this will cause an
infinite loop or a reset.
1. Even when returning from an interrupt serviced during BSL execution, it is
not possible to return from this interrupt directly back to BSL code, as
this counts as a jump-to-arbitrary-BSL-location.
## Vulnerabilities of the BSL against a readout attack
@ -81,17 +84,12 @@ described near the end, the article is quite large.
1. The BSL execution is allowed to be interrutped, thus the instruction flow
can be traced by dumping CPU register values throughout the BSL execution.
This allows for finding arbitrary read gadgets.
1. Interrupts can also be used to change any register value while the BSL is
executing, even at a specific point in time. This can be used to skip over
certain instructions during analysis, for example.
## Vulnerabilities of the BSL against use as a source of ROP gadgets
1. The routine at `0x1002` returns quickly, *as indicatd in SLAU550AA*.
Therefore, it can be used as an easy ROP entrypoint. This bypasses the "only
call code from the Z-area" limitation.
1. Interrutps can be used to change return addresses etc., to jump to arbitrary
locations inside the BSL.
1. Potentially, DMA transfers can also be used to change the stack contents,
including return addresses, while the BSL is executing.
@ -112,7 +110,9 @@ described near the end, the article is quite large.
own Z-area (also at the beginning, also 8 bytes in size). It has three
entrypoints, the fourth is an infinite loop. (`0x3c00..0x3fff` looks like
the same type of execute-only memory at first, but actually contains nothing,
at least not according to the techniques used here.)
at least not according to the techniques used here.) The first, documented
BSL region cannot access the second region directly, it must also go through
the corresponding Z-area.
## What has not been checked

View File

@ -42,28 +42,42 @@ class Rec(NamedTuple):
TIMEOUT = 60*60*8
with serial.Serial(sys.argv[1] if len(sys.argv) > 1 else "/dev/ttyACM1", 9600, timeout=TIMEOUT) as ser:
with open("mspbsl.log", "w") as log:
with open("mspbsl.log", "w", buffering=1) as log:
recs = []
while True:
# wait for "hello world"
while True:
l = ser.readline().strip()
if b"hello world!" in l:
break
# start tracing
while True:
try:
l = []
while len(l) == 0:
l = ser.readline().strip().decode('utf-8')
if len(l) == 0: continue
if "hello world" in l:
print("no!")
break # start over
#if l[0:1] != "- ":
# print("no")
# #assert False, l
# break
ticks = int(l[2:])
pcspsr = ser.readline().strip().decode('utf-8').split()
assert len(pcspsr) == 3
if pcspsr == ["hello", "world!"]: continue
assert len(pcspsr) == 3, pcspsr
r4to15 = ser.readline().strip().decode('utf-8').split()
assert len(r4to15) == 12
if pcspsr == ["hello", "world!"]: continue
assert len(r4to15) == 12, r4to15
stack = ser.readline().strip().decode('utf-8').split()
assert len(stack) == 16
if pcspsr == ["hello", "world!"]: continue
assert len(stack) == 16, stack
pc = int(pcspsr[0], 16)
sp = int(pcspsr[1], 16)
@ -85,9 +99,13 @@ with serial.Serial(sys.argv[1] if len(sys.argv) > 1 else "/dev/ttyACM1", 9600, t
if len(recs) == 0 or rec != recs[-1]:
lstr = '-----\n%s' % str(rec)
log.write(lstr+'\n')
log.flush()
print(lstr)
recs.append(rec)
except UnicodeDecodeError: pass # just continue from the next one
print("\n"*4)
"""
ticks: 399

View File

@ -37,8 +37,12 @@ __attribute__((__persistent__)) // put in FRAM (BSL clears RAM)
static uint32_t regbak[16]={0};
__attribute__((__persistent__))
static uint8_t stackbak[16]={0};
__attribute__((__persistent__))
static uint16_t curticks = 1;
extern uint16_t curticks, curticks_;
__attribute__((__persistent__, __used__))
uint16_t curticks = 1;
__attribute__((__persistent__, __used__))
uint16_t curticks_ = 1;
extern uint16_t done_irq;
__attribute__((__persistent__, __used__))
@ -46,22 +50,25 @@ uint16_t done_irq = 0;
extern uint16_t curaddr;
__attribute__((__persistent__))
__attribute__((__persistent__, __used__))
uint16_t curaddr = 0x1000;
extern uint16_t traceaddr;
/*extern uint16_t traceaddr;
__attribute__((__persistent__))
uint16_t traceaddr = 0x1000; // changeme
uint16_t traceaddr = 0x1000; // changeme*/
typedef void (*bsl_fn)(void);
#define START_HARD 1/*36000*/
/*#define START_SOFT 36990*/
#define END_CYC 0xffffu
/*#define SKIP_CYC_OFF 5*/
// DUMP_MODE == 0 => insn trace mode
#define DUMP_MODE 0
#define DUMP_MODE 1
#define DUMP_CYC_OFF 14
#define DUMP_ADDR_START 0x1000
#define DUMP_ADDR_END 0x1800
void do_trace(void);
__attribute__((__no_inline__)) void do_trace(void) {
@ -72,12 +79,23 @@ __attribute__((__no_inline__)) void do_trace(void) {
// init timer TA0
__bic_SR_register(GIE);
#if DUMP_MODE
traceaddr = 0x1002;
//traceaddr = 0x1002;
TA1CCR0 = DUMP_CYC_OFF;
#else
#ifdef SKIP_CYC_OFF
if (curticks > SKIP_CYC_OFF) {
done_irq = 0;
TA1CCR0 = curticks;
TA1CCR0 = SKIP_CYC_OFF;
curticks_ = curticks - SKIP_CYC_OFF /*+ 2*/;
} else
#endif
{
done_irq = 1;
TA1CCR0 = curticks;
}
#endif
//TA1CCTL0 |= CCIFG;
//TA1CCTL0 &= ~(CCIE|CCIFG);
TA1CCTL0 = CCIE;
// exec bsl
asm volatile(
@ -104,6 +122,35 @@ __attribute__((__no_inline__)) void do_trace(void) {
"mov.w #0x0216, TA1CTL\n"
"eint\n"
"call #0x1002\n" // CHANGEME (address to trace insn flow of)
/*"nop\n"
"nop\n"
"nop\n"
"nop\n"
"nop\n"
"nop\n"
//"mov.w #0x1337, r8\n"
"dint\nnop\n"
"add.w #-1, r4\n"
"add.w #1, r5\n"
"add.w #2, r6\n"
"add.w #4, r7\n"
"add.w #8, r8\n"
"add.w #-1, r4\n"
"add.w #1, r5\n"
"add.w #2, r6\n"
"add.w #4, r7\n"
"add.w #8, r8\n"
"add.w #-1, r4\n"
"add.w #1, r5\n"
"add.w #2, r6\n"
"add.w #4, r7\n"
"add.w #8, r8\n"
"add.w #-1, r4\n"
"add.w #1, r5\n"
"add.w #2, r6\n"
"add.w #4, r7\n"
"add.w #8, r8\n"
"1: jmp 1b\n"*/
);
//while (1) ;
__builtin_unreachable();
@ -112,6 +159,7 @@ __attribute__((__no_inline__)) void do_trace(void) {
void do_collect(uint16_t* sp);
__attribute__((__used__, __no_inline__))
void do_collect(uint16_t* sp) {
//P1OUT=0;
TA1CTL &= ~(uint16_t)(TAIE|MC__UP);
// 0x1bc2/4 and 0x1bd6/8 contain a bic #GIE, sr instruction! these should be 2 bytes in size
@ -177,7 +225,7 @@ void do_collect(uint16_t* sp) {
next_iter:
#if DUMP_MODE
if (curaddr == 0x1800) while(1); // start of info mem
if (curaddr == DUMP_ADDR_END) while(1); // start of info mem
curaddr += 4;
#else
if (curticks == END_CYC) while(1);
@ -201,26 +249,33 @@ void Timer_A1_ISR(void) {
"mov.a #(__stack-8), sp\n"
#endif
/*#if !DUMP_MODE
#if !DUMP_MODE && defined(SKIP_CYC_OFF)
// do some hackery
"bit.w #0, done_irq\n"
"jnz .Lregular\n"
"cmp.w #0, done_irq\n"
"jne .Lregular\n"
//"add.w #4, P1OUT\n"
// skip pc forward by 4
"add.w #4, 2(sp)\n"
// set GIE in sr
// set/force GIE in sr
"bis.w #8, 0(sp)\n"
// set timer counter for next IRQ for trace
"mov.w #1, TA1CCR0\n"
// restart timer
"bis.w #4, TA1CTL\n"
// done it now
"mov.w #1, done_irq\n"
// set timer counter for next IRQ for trace
"mov.w curticks_, TA1CCR0\n"
// clear TAIE, TAIFG, MC
"bic.w #0x0033, TA1CTL\n"
// clear irq flag, enable compare irq
"mov.w #16, TA1CCTL0\n"
// restart timer
"mov.w #0x0216, TA1CTL\n"
// continue as usual
"reti\n"
#endif*/
#endif
".Lregular:\n"
//"add.w #8, P1OUT\n"
"pushm.a #12, r15\n"
"mov.a sp, r12\n"
"call #do_collect\n"
@ -228,10 +283,20 @@ void Timer_A1_ISR(void) {
"reti\n"
);
}
__attribute__((__interrupt__(UNMI_VECTOR)))
void NMI_ISR(void) {
SFRIFG1 &= ~NMIIE;
SYSUNIV = 0;
++P1OUT;
}
int main(void) {
setup_io();
setup_clocks();
stdio_msp_init();
SFRIE1 = NMIIE;
SFRRPCR = SYSRSTRE__ENABLE | SYSRSTUP__PULLUP | SYSNMIIES__FALLING | SYSNMI__NMI;
memset(regbak, 0, sizeof regbak);
@ -239,7 +304,7 @@ int main(void) {
#if DUMP_MODE
curticks = DUMP_CYC_OFF;
curaddr = 0x1000;
curaddr = DUMP_ADDR_START;
#else
curticks = START_HARD;
#endif