working pc/sr modifier, but that doesn't work in the bsl, uuugh

This commit is contained in:
Triss 2022-04-10 19:11:45 +02:00
parent 59fa779864
commit 1fe6850ed7
3 changed files with 148 additions and 65 deletions

View File

@ -70,6 +70,9 @@ described near the end, the article is quite large.
1. Arbitrary code in the BSL region cannot be jumped to from user code, the 1. Arbitrary code in the BSL region cannot be jumped to from user code, the
CPU execution path has to go through the Z-area. Doing this will cause an CPU execution path has to go through the Z-area. Doing this will cause an
infinite loop or a reset. infinite loop or a reset.
1. Even when returning from an interrupt serviced during BSL execution, it is
not possible to return from this interrupt directly back to BSL code, as
this counts as a jump-to-arbitrary-BSL-location.
## Vulnerabilities of the BSL against a readout attack ## Vulnerabilities of the BSL against a readout attack
@ -81,17 +84,12 @@ described near the end, the article is quite large.
1. The BSL execution is allowed to be interrutped, thus the instruction flow 1. The BSL execution is allowed to be interrutped, thus the instruction flow
can be traced by dumping CPU register values throughout the BSL execution. can be traced by dumping CPU register values throughout the BSL execution.
This allows for finding arbitrary read gadgets. This allows for finding arbitrary read gadgets.
1. Interrupts can also be used to change any register value while the BSL is
executing, even at a specific point in time. This can be used to skip over
certain instructions during analysis, for example.
## Vulnerabilities of the BSL against use as a source of ROP gadgets ## Vulnerabilities of the BSL against use as a source of ROP gadgets
1. The routine at `0x1002` returns quickly, *as indicatd in SLAU550AA*. 1. The routine at `0x1002` returns quickly, *as indicatd in SLAU550AA*.
Therefore, it can be used as an easy ROP entrypoint. This bypasses the "only Therefore, it can be used as an easy ROP entrypoint. This bypasses the "only
call code from the Z-area" limitation. call code from the Z-area" limitation.
1. Interrutps can be used to change return addresses etc., to jump to arbitrary
locations inside the BSL.
1. Potentially, DMA transfers can also be used to change the stack contents, 1. Potentially, DMA transfers can also be used to change the stack contents,
including return addresses, while the BSL is executing. including return addresses, while the BSL is executing.
@ -112,7 +110,9 @@ described near the end, the article is quite large.
own Z-area (also at the beginning, also 8 bytes in size). It has three own Z-area (also at the beginning, also 8 bytes in size). It has three
entrypoints, the fourth is an infinite loop. (`0x3c00..0x3fff` looks like entrypoints, the fourth is an infinite loop. (`0x3c00..0x3fff` looks like
the same type of execute-only memory at first, but actually contains nothing, the same type of execute-only memory at first, but actually contains nothing,
at least not according to the techniques used here.) at least not according to the techniques used here.) The first, documented
BSL region cannot access the second region directly, it must also go through
the corresponding Z-area.
## What has not been checked ## What has not been checked

View File

@ -42,52 +42,70 @@ class Rec(NamedTuple):
TIMEOUT = 60*60*8 TIMEOUT = 60*60*8
with serial.Serial(sys.argv[1] if len(sys.argv) > 1 else "/dev/ttyACM1", 9600, timeout=TIMEOUT) as ser: with serial.Serial(sys.argv[1] if len(sys.argv) > 1 else "/dev/ttyACM1", 9600, timeout=TIMEOUT) as ser:
with open("mspbsl.log", "w") as log: with open("mspbsl.log", "w", buffering=1) as log:
recs = [] recs = []
while True: while True:
l = ser.readline().strip() # wait for "hello world"
if b"hello world!" in l: while True:
break l = ser.readline().strip()
if b"hello world!" in l:
while True:
l = []
while len(l) == 0:
l = ser.readline().strip().decode('utf-8')
if len(l) == 0: continue
ticks = int(l[2:])
pcspsr = ser.readline().strip().decode('utf-8').split()
assert len(pcspsr) == 3
r4to15 = ser.readline().strip().decode('utf-8').split()
assert len(r4to15) == 12
stack = ser.readline().strip().decode('utf-8').split()
assert len(stack) == 16
pc = int(pcspsr[0], 16)
sp = int(pcspsr[1], 16)
sr = int(pcspsr[2], 16)
gp = [int(r4to15[i],16) for i in range(12)]
stack = [int(x,16) for x in stack]
delta = -1
if len(recs) > 0:
rec_ = Rec(ticks, delta, pc, sp, sr, gp, stack)
for i in range(2, min(15, len(recs))):
if recs[-i] == recs[-1]:
continue
delta = ticks - recs[-i].cycle - 1
break break
rec = Rec(ticks, delta, pc, sp, sr, gp, stack) # start tracing
while True:
try:
l = []
while len(l) == 0:
l = ser.readline().strip().decode('utf-8')
if len(l) == 0: continue
if len(recs) == 0 or rec != recs[-1]: if "hello world" in l:
lstr = '-----\n%s' % str(rec) print("no!")
log.write(lstr+'\n') break # start over
print(lstr) #if l[0:1] != "- ":
# print("no")
# #assert False, l
# break
ticks = int(l[2:])
recs.append(rec) pcspsr = ser.readline().strip().decode('utf-8').split()
if pcspsr == ["hello", "world!"]: continue
assert len(pcspsr) == 3, pcspsr
r4to15 = ser.readline().strip().decode('utf-8').split()
if pcspsr == ["hello", "world!"]: continue
assert len(r4to15) == 12, r4to15
stack = ser.readline().strip().decode('utf-8').split()
if pcspsr == ["hello", "world!"]: continue
assert len(stack) == 16, stack
pc = int(pcspsr[0], 16)
sp = int(pcspsr[1], 16)
sr = int(pcspsr[2], 16)
gp = [int(r4to15[i],16) for i in range(12)]
stack = [int(x,16) for x in stack]
delta = -1
if len(recs) > 0:
rec_ = Rec(ticks, delta, pc, sp, sr, gp, stack)
for i in range(2, min(15, len(recs))):
if recs[-i] == recs[-1]:
continue
delta = ticks - recs[-i].cycle - 1
break
rec = Rec(ticks, delta, pc, sp, sr, gp, stack)
if len(recs) == 0 or rec != recs[-1]:
lstr = '-----\n%s' % str(rec)
log.write(lstr+'\n')
log.flush()
print(lstr)
recs.append(rec)
except UnicodeDecodeError: pass # just continue from the next one
print("\n"*4)
""" """
ticks: 399 ticks: 399

View File

@ -37,8 +37,12 @@ __attribute__((__persistent__)) // put in FRAM (BSL clears RAM)
static uint32_t regbak[16]={0}; static uint32_t regbak[16]={0};
__attribute__((__persistent__)) __attribute__((__persistent__))
static uint8_t stackbak[16]={0}; static uint8_t stackbak[16]={0};
__attribute__((__persistent__))
static uint16_t curticks = 1; extern uint16_t curticks, curticks_;
__attribute__((__persistent__, __used__))
uint16_t curticks = 1;
__attribute__((__persistent__, __used__))
uint16_t curticks_ = 1;
extern uint16_t done_irq; extern uint16_t done_irq;
__attribute__((__persistent__, __used__)) __attribute__((__persistent__, __used__))
@ -46,22 +50,25 @@ uint16_t done_irq = 0;
extern uint16_t curaddr; extern uint16_t curaddr;
__attribute__((__persistent__)) __attribute__((__persistent__, __used__))
uint16_t curaddr = 0x1000; uint16_t curaddr = 0x1000;
extern uint16_t traceaddr; /*extern uint16_t traceaddr;
__attribute__((__persistent__)) __attribute__((__persistent__))
uint16_t traceaddr = 0x1000; // changeme uint16_t traceaddr = 0x1000; // changeme*/
typedef void (*bsl_fn)(void); typedef void (*bsl_fn)(void);
#define START_HARD 1/*36000*/ #define START_HARD 1/*36000*/
/*#define START_SOFT 36990*/ /*#define START_SOFT 36990*/
#define END_CYC 0xffffu #define END_CYC 0xffffu
/*#define SKIP_CYC_OFF 5*/
// DUMP_MODE == 0 => insn trace mode // DUMP_MODE == 0 => insn trace mode
#define DUMP_MODE 0 #define DUMP_MODE 1
#define DUMP_CYC_OFF 14 #define DUMP_CYC_OFF 14
#define DUMP_ADDR_START 0x1000
#define DUMP_ADDR_END 0x1800
void do_trace(void); void do_trace(void);
__attribute__((__no_inline__)) void do_trace(void) { __attribute__((__no_inline__)) void do_trace(void) {
@ -72,12 +79,23 @@ __attribute__((__no_inline__)) void do_trace(void) {
// init timer TA0 // init timer TA0
__bic_SR_register(GIE); __bic_SR_register(GIE);
#if DUMP_MODE #if DUMP_MODE
traceaddr = 0x1002; //traceaddr = 0x1002;
TA1CCR0 = DUMP_CYC_OFF; TA1CCR0 = DUMP_CYC_OFF;
#else #else
done_irq = 0; #ifdef SKIP_CYC_OFF
TA1CCR0 = curticks; if (curticks > SKIP_CYC_OFF) {
done_irq = 0;
TA1CCR0 = SKIP_CYC_OFF;
curticks_ = curticks - SKIP_CYC_OFF /*+ 2*/;
} else
#endif #endif
{
done_irq = 1;
TA1CCR0 = curticks;
}
#endif
//TA1CCTL0 |= CCIFG;
//TA1CCTL0 &= ~(CCIE|CCIFG);
TA1CCTL0 = CCIE; TA1CCTL0 = CCIE;
// exec bsl // exec bsl
asm volatile( asm volatile(
@ -104,6 +122,35 @@ __attribute__((__no_inline__)) void do_trace(void) {
"mov.w #0x0216, TA1CTL\n" "mov.w #0x0216, TA1CTL\n"
"eint\n" "eint\n"
"call #0x1002\n" // CHANGEME (address to trace insn flow of) "call #0x1002\n" // CHANGEME (address to trace insn flow of)
/*"nop\n"
"nop\n"
"nop\n"
"nop\n"
"nop\n"
"nop\n"
//"mov.w #0x1337, r8\n"
"dint\nnop\n"
"add.w #-1, r4\n"
"add.w #1, r5\n"
"add.w #2, r6\n"
"add.w #4, r7\n"
"add.w #8, r8\n"
"add.w #-1, r4\n"
"add.w #1, r5\n"
"add.w #2, r6\n"
"add.w #4, r7\n"
"add.w #8, r8\n"
"add.w #-1, r4\n"
"add.w #1, r5\n"
"add.w #2, r6\n"
"add.w #4, r7\n"
"add.w #8, r8\n"
"add.w #-1, r4\n"
"add.w #1, r5\n"
"add.w #2, r6\n"
"add.w #4, r7\n"
"add.w #8, r8\n"
"1: jmp 1b\n"*/
); );
//while (1) ; //while (1) ;
__builtin_unreachable(); __builtin_unreachable();
@ -112,6 +159,7 @@ __attribute__((__no_inline__)) void do_trace(void) {
void do_collect(uint16_t* sp); void do_collect(uint16_t* sp);
__attribute__((__used__, __no_inline__)) __attribute__((__used__, __no_inline__))
void do_collect(uint16_t* sp) { void do_collect(uint16_t* sp) {
//P1OUT=0;
TA1CTL &= ~(uint16_t)(TAIE|MC__UP); TA1CTL &= ~(uint16_t)(TAIE|MC__UP);
// 0x1bc2/4 and 0x1bd6/8 contain a bic #GIE, sr instruction! these should be 2 bytes in size // 0x1bc2/4 and 0x1bd6/8 contain a bic #GIE, sr instruction! these should be 2 bytes in size
@ -177,7 +225,7 @@ void do_collect(uint16_t* sp) {
next_iter: next_iter:
#if DUMP_MODE #if DUMP_MODE
if (curaddr == 0x1800) while(1); // start of info mem if (curaddr == DUMP_ADDR_END) while(1); // start of info mem
curaddr += 4; curaddr += 4;
#else #else
if (curticks == END_CYC) while(1); if (curticks == END_CYC) while(1);
@ -201,26 +249,33 @@ void Timer_A1_ISR(void) {
"mov.a #(__stack-8), sp\n" "mov.a #(__stack-8), sp\n"
#endif #endif
/*#if !DUMP_MODE #if !DUMP_MODE && defined(SKIP_CYC_OFF)
// do some hackery // do some hackery
"bit.w #0, done_irq\n" "cmp.w #0, done_irq\n"
"jnz .Lregular\n" "jne .Lregular\n"
//"add.w #4, P1OUT\n"
// skip pc forward by 4 // skip pc forward by 4
"add.w #4, 2(sp)\n" "add.w #4, 2(sp)\n"
// set GIE in sr // set/force GIE in sr
"bis.w #8, 0(sp)\n" "bis.w #8, 0(sp)\n"
// set timer counter for next IRQ for trace
"mov.w #1, TA1CCR0\n"
// restart timer
"bis.w #4, TA1CTL\n"
// done it now // done it now
"mov.w #1, done_irq\n" "mov.w #1, done_irq\n"
// set timer counter for next IRQ for trace
"mov.w curticks_, TA1CCR0\n"
// clear TAIE, TAIFG, MC
"bic.w #0x0033, TA1CTL\n"
// clear irq flag, enable compare irq
"mov.w #16, TA1CCTL0\n"
// restart timer
"mov.w #0x0216, TA1CTL\n"
// continue as usual // continue as usual
"reti\n" "reti\n"
#endif*/ #endif
".Lregular:\n" ".Lregular:\n"
//"add.w #8, P1OUT\n"
"pushm.a #12, r15\n" "pushm.a #12, r15\n"
"mov.a sp, r12\n" "mov.a sp, r12\n"
"call #do_collect\n" "call #do_collect\n"
@ -228,10 +283,20 @@ void Timer_A1_ISR(void) {
"reti\n" "reti\n"
); );
} }
__attribute__((__interrupt__(UNMI_VECTOR)))
void NMI_ISR(void) {
SFRIFG1 &= ~NMIIE;
SYSUNIV = 0;
++P1OUT;
}
int main(void) { int main(void) {
setup_io(); setup_io();
setup_clocks(); setup_clocks();
stdio_msp_init(); stdio_msp_init();
SFRIE1 = NMIIE;
SFRRPCR = SYSRSTRE__ENABLE | SYSRSTUP__PULLUP | SYSNMIIES__FALLING | SYSNMI__NMI;
memset(regbak, 0, sizeof regbak); memset(regbak, 0, sizeof regbak);
@ -239,7 +304,7 @@ int main(void) {
#if DUMP_MODE #if DUMP_MODE
curticks = DUMP_CYC_OFF; curticks = DUMP_CYC_OFF;
curaddr = 0x1000; curaddr = DUMP_ADDR_START;
#else #else
curticks = START_HARD; curticks = START_HARD;
#endif #endif