diff --git a/typetapper/data.py b/typetapper/data.py index c8e56e9..243baa1 100644 --- a/typetapper/data.py +++ b/typetapper/data.py @@ -7,18 +7,18 @@ import copy import networkx -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class CodeLoc: bbl_addr: int stmt_idx: int ins_addr: int -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class Atom: loc: CodeLoc size: int -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class RegisterAtom(Atom): name: str slot_name: str @@ -26,21 +26,21 @@ class RegisterAtom(Atom): def __repr__(self): return f'{self.name} @ {self.loc.ins_addr:#x}' -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class MemoryAtom(Atom): endness: str def __repr__(self): return f'MEM @ {self.loc.ins_addr:#x}' -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class TmpAtom(Atom): tmp: int def __repr__(self): return f'TMP#{self.tmp} @ {self.loc.ins_addr:#x}' -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class ConstAtom(Atom): value: int @@ -48,12 +48,12 @@ class ConstAtom(Atom): return f'CONST#{self.value:#x} @ {self.loc.ins_addr:#x}' -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class Op: def invert(self) -> 'Op': raise NotImplementedError -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class ConstOffsetOp(Op): const: int @@ -68,12 +68,19 @@ class ConstOffsetOp(Op): const -= 2**64 object.__setattr__(self, 'const', const) -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) +class StrideOffsetOp(Op): + stride: int + + def invert(self): + return self + +@dataclass(frozen=True, slots=True) class NegOp(Op): def invert(self): return self -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class VarOffsetOp(Op): var: Any @@ -81,26 +88,26 @@ class VarOffsetOp(Op): # TODO ???? return self -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class DerefOp(Op): size: int def invert(self): return RefOp(self.size) -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class RefOp(Op): size: int def invert(self): return DerefOp(self.size) -#@dataclass(frozen=True) +#@dataclass(frozen=True, slots=True) #class OtherOp(Op): # def invert(self) -> 'Op': # return self -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class OpSequence: ops: Tuple[Op, ...] = () @@ -161,6 +168,11 @@ def simplify_op_sequence(seq: List[Op]): if i > 0: i -= 1 continue + if isinstance(cur, StrideOffsetOp) and isinstance(nex, StrideOffsetOp) and cur.stride == nex.stride: + seq.pop(i) + if i > 0: + i -= 1 + continue i += 1 @@ -172,7 +184,7 @@ class DataKind(IntEnum): Float = auto() Pointer = auto() -@dataclass +@dataclass(slots=True) class Prop: self_data: Counter[DataKind] = field(default_factory=Counter) struct_data: defaultdict[int, defaultdict[int, Counter[DataKind]]] = field(default_factory=lambda: defaultdict(lambda: defaultdict(Counter))) @@ -230,6 +242,8 @@ class Prop: if saved: result.self_data[DataKind.Pointer] = saved result.unifications = Counter((x - op.const, y - op.const) for x, y in result.unifications) + elif isinstance(op, StrideOffsetOp): + result.self_data.clear() elif isinstance(op, VarOffsetOp): saved = result.self_data.get(DataKind.Pointer, None) result = Prop() @@ -239,36 +253,51 @@ class Prop: result = Prop() return result -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class LiveData: """ The in-flight data representation for the analysis. All sizes are in bytes """ - sources: List[Tuple[Atom, OpSequence]] - const: Optional[int] + loc: CodeLoc + sources: Tuple[Tuple[Atom, OpSequence], ...] size: int + # if this is non-empty it means the data is characterized SOLELY by the sum of a0*x + a1*y + a2*z + ... + strides: Tuple[Tuple[Optional['LiveData'], int], ...] + + @property + def const(self): + if len(self.strides) == 1 and self.strides[0][0] is None: + return self.strides[0][1] + return None @classmethod - def new_null(cls, size: int): - return cls([], None, size) + def new_null(cls, loc: CodeLoc, size: int, strides: Tuple[Tuple[Optional['LiveData'], int], ...]=()): + return cls(loc, (), size, strides) @classmethod - def new_atom(cls, atom: Atom) -> 'LiveData': - return cls([(atom, OpSequence())], None, atom.size) + def new_atom(cls, loc: CodeLoc, atom: Atom) -> 'LiveData': + return cls(loc, ((atom, OpSequence()),), atom.size, ()) @classmethod - def new_const(cls, value: int, size: int, codeloc: CodeLoc) -> 'LiveData': - return cls([(ConstAtom(codeloc, size, value), OpSequence())], value, size) + def new_const(cls, loc: CodeLoc, value: int, size: int) -> 'LiveData': + return cls(loc, ((ConstAtom(loc, size, value), OpSequence()),), size, ((None, value),)) - def appended(self, op: Op, size: int, const: Optional[int]=None) -> 'LiveData': + def appended(self, loc: CodeLoc, op: Op, size: int, strides: Optional[Tuple[Tuple[Optional['LiveData'], int], ...]]=None) -> 'LiveData': return LiveData( - [(atom, seq.appended(op)) for atom, seq in self.sources], - self.const if const is None else const, - size + loc, + tuple((atom, seq.appended(op)) for atom, seq in self.sources), + size, + self.strides if strides is None else strides, ) - def unioned(self, other: 'LiveData', size: int, const: Optional[int]=None) -> 'LiveData': - return LiveData(self.sources + other.sources, const, size) + def unioned( + self, + loc: CodeLoc, + other: 'LiveData', + size: int, + strides: Tuple[Tuple[Optional['LiveData'], int], ...]=(), + ) -> 'LiveData': + return LiveData(loc, self.sources + other.sources, size, strides) def commit(self, target: Atom, graph: networkx.DiGraph): for src, seq in self.sources: @@ -297,7 +326,7 @@ class LiveData: prop.unifications[(offset1, offset2)] += 1 self.prop(prop, graph) -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class RegisterInputInfo: atom: RegisterAtom callsites: Tuple[int, ...] @@ -328,20 +357,20 @@ class RegisterInputInfo: graph.add_edge(source, self.atom, ops=OpSequence(), cf=actions) -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class ControlFlowAction: pass -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class ControlFlowActionPush(ControlFlowAction): callsite: int -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class ControlFlowActionPop(ControlFlowAction): callsite: int -@dataclass +@dataclass(slots=True) class BlockInfo: outputs: Dict[str, RegisterAtom] = field(default_factory=dict) # slot names inputs: Dict[str, RegisterAtom] = field(default_factory=dict) # alias names diff --git a/typetapper/engine.py b/typetapper/engine.py index 6ba82fc..e745e02 100644 --- a/typetapper/engine.py +++ b/typetapper/engine.py @@ -1,5 +1,6 @@ from typing import Union import logging +from collections import Counter import angr import pyvex @@ -42,6 +43,15 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin): return self.kp.block_info[self._force_addr] return self.kp.block_info[self.irsb.addr] + def new_null(self, size: int, strides: Tuple[Tuple[Optional['LiveData'], int], ...]=()): + return LiveData.new_null(self.codeloc, size, strides) + + def new_atom(self, atom: Atom) -> 'LiveData': + return LiveData.new_atom(self.codeloc, atom) + + def new_const(self, value: int, size: int) -> 'LiveData': + return LiveData.new_const(self.codeloc, value, size) + def handle_vex_block(self, irsb): self._force_addr = None super().handle_vex_block(irsb) @@ -50,20 +60,20 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin): return self.const(const.value, get_type_size_bytes(const.type)) def const(self, val, size) -> LiveData: - atom = LiveData.new_const(val, size, self.codeloc) + atom = self.new_const(val, size) self.blockinfo.atoms.append(atom.sources[0][0]) return atom def _perform_vex_expr_RdTmp(self, tmp): if self.tmp_atoms: - return LiveData.new_atom(self.tmps[tmp]) + return self.new_atom(self.tmps[tmp]) else: return self.tmps[tmp] def _perform_vex_expr_Get(self, offset: LiveData, ty, **kwargs): size = get_type_size_bytes(ty) if type(offset.const) is not int: - return LiveData.new_null(size) + return self.new_null(size) name = self.project.arch.register_size_names[(offset.const, size)] # unsafe return self.get(name, offset.const, size) @@ -73,7 +83,7 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin): slot_info = self.project.arch.get_base_register(offset, size) if slot_info is None: l.error("??????? (%s, %s)", offset, size) - return LiveData.new_null(size) + return self.new_null(size) slot_name = self.project.arch.register_size_names[slot_info] reg_atom = RegisterAtom(self.codeloc, size, name, slot_name) self.blockinfo.atoms.append(reg_atom) @@ -85,12 +95,12 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin): else: pass # alias mismatch elif name in self.blockinfo.inputs: - return LiveData.new_atom(self.blockinfo.inputs[name]) + return self.new_atom(self.blockinfo.inputs[name]) else: self.blockinfo.inputs[name] = reg_atom self.blockinfo.ready_inputs.add(name) - return LiveData.new_atom(reg_atom) + return self.new_atom(reg_atom) def _perform_vex_expr_Load(self, addr: LiveData, ty, endness, **kwargs): size = get_type_size_bytes(ty) @@ -104,15 +114,15 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin): mem_atom = MemoryAtom(self.codeloc, size, endness) self.blockinfo.atoms.append(mem_atom) - addr.appended(DerefOp(size), size).commit(mem_atom, self.graph) - return LiveData.new_atom(mem_atom) + addr.appended(self.codeloc, DerefOp(size), size).commit(mem_atom, self.graph) + return self.new_atom(mem_atom) def _perform_vex_expr_CCall(self, func_name, ty, args, func=None): - return LiveData.new_null(get_type_size_bytes(ty)) + return self.new_null(get_type_size_bytes(ty)) def _perform_vex_expr_ITE(self, cond, ifTrue: LiveData, ifFalse: LiveData): assert ifTrue.size == ifFalse.size - return ifTrue.unioned(ifFalse, ifTrue.size) + return ifTrue.unioned(self.codeloc, ifFalse, ifTrue.size) def _perform_vex_expr_Op(self, op, args: List[LiveData]): return self.op(op, args) @@ -126,39 +136,96 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin): arg.prop_self(DataKind.Int, self.graph) size = get_type_size_bytes(ret_ty) + sign = None + mul0, mul1 = None, None + const0, const1 = None, None if op in ('Iop_Add8', 'Iop_Add16', 'Iop_Add32', 'Iop_Add64'): sign = 1 elif op in ('Iop_Sub8', 'Iop_Sub16', 'Iop_Sub32', 'Iop_Sub64'): sign = -1 - else: - sign = None + elif op in ('Iop_Mul8', 'Iop_Mul16', 'Iop_Mul32', 'Iop_Mul64', + 'Iop_MullS8', 'Iop_MullS16', 'Iop_MullS32', 'Iop_MullS64', + 'Iop_MullU8', 'Iop_MullU16', 'Iop_MullU32', 'Iop_MullU64'): + mul0 = args[0].strides + mul1 = args[1].strides + const0 = args[0].const + const1 = args[1].const + elif op in ('Iop_Shl8', 'Iop_Shl16', 'Iop_Shl32', 'Iop_Shl64'): + if args[1].const is not None and args[1].const >= 0: + const0 = args[0].const + const1 = 2**args[1].const + mul0 = args[0].strides + mul1 = ((None, const1)) + if sign is not None: assert size == args[0].size == args[1].size - addend0 = args[0].const - addend1 = args[1].const - if addend0 is not None and addend1 is not None: - const = addend0 + addend1 * sign + stride0 = args[0].strides + stride1 = args[1].strides + strideC = Counter() + if stride0: + for key, n in stride0: + strideC[key] += n else: - const = None + strideC[args[0]] += 1 + if stride1: + for key, n in stride1: + strideC[key] += n * sign + else: + strideC[args[1]] += sign + + neg1 = args[1] if sign == -1: - neg1 = neg1.appended(NegOp(), neg1.size, -addend1 if addend1 is not None else None) - input0 = args[0].appended(ConstOffsetOp(addend1 * sign) if addend1 is not None else VarOffsetOp(neg1), size) - input1 = neg1.appended(ConstOffsetOp(addend0) if addend0 is not None else VarOffsetOp(args[0]), size) - result = input0.unioned(input1, size, const) + neg1 = neg1.appended(self.codeloc, NegOp(), neg1.size, tuple((k, -n) for k, n in neg1.strides)) + + input0 = args[0] + if stride1: + for (key, stride) in stride1: + if key is None: + input0 = input0.appended(self.codeloc, ConstOffsetOp(stride), size) + else: + input0 = input0.appended(self.codeloc, StrideOffsetOp(abs(stride)), size) + else: + input0 = input0.appended(self.codeloc, VarOffsetOp(neg1), size) + + input1 = args[1] + if stride0: + for (key, stride) in stride0: + if key is None: + input1 = input1.appended(self.codeloc, ConstOffsetOp(stride), size) + else: + input1 = input1.appended(self.codeloc, StrideOffsetOp(abs(stride)), size) + else: + input1 = input1.appended(self.codeloc, VarOffsetOp(args[0]), size) + + result = input0.unioned(self.codeloc, input1, size, tuple((key, n) for key, n in strideC.items() if n != 0)) + + elif mul0 is not None and mul1 is not None: + if const0 is not None and const1 is not None: + result = self.new_null(size, strides=((None, const0 * const1),)) + elif const1 is not None and len(mul0) != 0: + result = self.new_null(size, strides=tuple((key, v * const1) for key, v in mul0)) + elif const0 is not None and len(mul1) != 0: + result = self.new_null(size, strides=tuple((key, v * const0) for key, v in mul1)) + elif const0 is not None: + result = self.new_null(size, strides=((args[1], const0),)) + elif const1 is not None: + result = self.new_null(size, strides=((args[0], const1),)) + else: + result = self.new_null(size) else: - result = LiveData.new_null(size) + result = self.new_null(size) return result def _handle_vex_expr_GSPTR(self, expr: pyvex.expr.GSPTR): - return LiveData.new_null(get_type_size_bytes(expr.result_type(self.irsb.tyenv))) + return self.new_null(get_type_size_bytes(expr.result_type(self.irsb.tyenv))) def _handle_vex_expr_VECRET(self, expr: pyvex.expr.VECRET): - return LiveData.new_null(get_type_size_bytes(expr.result_type(self.irsb.tyenv))) + return self.new_null(get_type_size_bytes(expr.result_type(self.irsb.tyenv))) def _handle_vex_expr_Binder(self, expr: pyvex.expr.Binder): - return LiveData.new_null(get_type_size_bytes(expr.result_type(self.irsb.tyenv))) + return self.new_null(get_type_size_bytes(expr.result_type(self.irsb.tyenv))) def _handle_vex_stmt_IMark(self, stmt: pyvex.stmt.IMark): @@ -166,7 +233,7 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin): def _perform_vex_stmt_Put(self, offset: LiveData, data: LiveData, **kwargs): if type(offset.const) is not int: - return LiveData.new_null(data.size) + return self.new_null(data.size) name = self.project.arch.register_size_names[(offset.const, data.size)] # unsafe return self.put(name, data, offset.const) @@ -177,7 +244,7 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin): slot_info = self.project.arch.get_base_register(offset, data.size) if slot_info is None: l.error("??????? (%s, %s)", offset, data.size) - return LiveData.new_null(data.size) + return self.new_null(data.size) slot_name = self.project.arch.register_size_names[slot_info] reg_atom = RegisterAtom(self.codeloc, data.size, name, slot_name) self.blockinfo.atoms.append(reg_atom) @@ -204,10 +271,10 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin): mem_atom = MemoryAtom(self.codeloc, data.size, endness) self.blockinfo.atoms.append(mem_atom) - addr.appended(DerefOp(data.size), data.size).commit(mem_atom, self.graph) + addr.appended(self.codeloc, DerefOp(data.size), data.size).commit(mem_atom, self.graph) data.commit(mem_atom, self.graph) def _perform_vex_stmt_Dirty_call(self, func_name, ty, args, func=None): if ty is None: return None - return LiveData.new_null(get_type_size_bytes(ty)) + return self.new_null(get_type_size_bytes(ty))