Add StrideOffsetOp
This commit is contained in:
parent
2b1bc63ef2
commit
5aab2b2a80
|
@ -7,18 +7,18 @@ import copy
|
|||
import networkx
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class CodeLoc:
|
||||
bbl_addr: int
|
||||
stmt_idx: int
|
||||
ins_addr: int
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class Atom:
|
||||
loc: CodeLoc
|
||||
size: int
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RegisterAtom(Atom):
|
||||
name: str
|
||||
slot_name: str
|
||||
|
@ -26,21 +26,21 @@ class RegisterAtom(Atom):
|
|||
def __repr__(self):
|
||||
return f'{self.name} @ {self.loc.ins_addr:#x}'
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class MemoryAtom(Atom):
|
||||
endness: str
|
||||
|
||||
def __repr__(self):
|
||||
return f'MEM @ {self.loc.ins_addr:#x}'
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class TmpAtom(Atom):
|
||||
tmp: int
|
||||
|
||||
def __repr__(self):
|
||||
return f'TMP#{self.tmp} @ {self.loc.ins_addr:#x}'
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ConstAtom(Atom):
|
||||
value: int
|
||||
|
||||
|
@ -48,12 +48,12 @@ class ConstAtom(Atom):
|
|||
return f'CONST#{self.value:#x} @ {self.loc.ins_addr:#x}'
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class Op:
|
||||
def invert(self) -> 'Op':
|
||||
raise NotImplementedError
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ConstOffsetOp(Op):
|
||||
const: int
|
||||
|
||||
|
@ -68,12 +68,19 @@ class ConstOffsetOp(Op):
|
|||
const -= 2**64
|
||||
object.__setattr__(self, 'const', const)
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class StrideOffsetOp(Op):
|
||||
stride: int
|
||||
|
||||
def invert(self):
|
||||
return self
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class NegOp(Op):
|
||||
def invert(self):
|
||||
return self
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class VarOffsetOp(Op):
|
||||
var: Any
|
||||
|
||||
|
@ -81,26 +88,26 @@ class VarOffsetOp(Op):
|
|||
# TODO ????
|
||||
return self
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class DerefOp(Op):
|
||||
size: int
|
||||
|
||||
def invert(self):
|
||||
return RefOp(self.size)
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RefOp(Op):
|
||||
size: int
|
||||
|
||||
def invert(self):
|
||||
return DerefOp(self.size)
|
||||
|
||||
#@dataclass(frozen=True)
|
||||
#@dataclass(frozen=True, slots=True)
|
||||
#class OtherOp(Op):
|
||||
# def invert(self) -> 'Op':
|
||||
# return self
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class OpSequence:
|
||||
ops: Tuple[Op, ...] = ()
|
||||
|
||||
|
@ -161,6 +168,11 @@ def simplify_op_sequence(seq: List[Op]):
|
|||
if i > 0:
|
||||
i -= 1
|
||||
continue
|
||||
if isinstance(cur, StrideOffsetOp) and isinstance(nex, StrideOffsetOp) and cur.stride == nex.stride:
|
||||
seq.pop(i)
|
||||
if i > 0:
|
||||
i -= 1
|
||||
continue
|
||||
|
||||
i += 1
|
||||
|
||||
|
@ -172,7 +184,7 @@ class DataKind(IntEnum):
|
|||
Float = auto()
|
||||
Pointer = auto()
|
||||
|
||||
@dataclass
|
||||
@dataclass(slots=True)
|
||||
class Prop:
|
||||
self_data: Counter[DataKind] = field(default_factory=Counter)
|
||||
struct_data: defaultdict[int, defaultdict[int, Counter[DataKind]]] = field(default_factory=lambda: defaultdict(lambda: defaultdict(Counter)))
|
||||
|
@ -230,6 +242,8 @@ class Prop:
|
|||
if saved:
|
||||
result.self_data[DataKind.Pointer] = saved
|
||||
result.unifications = Counter((x - op.const, y - op.const) for x, y in result.unifications)
|
||||
elif isinstance(op, StrideOffsetOp):
|
||||
result.self_data.clear()
|
||||
elif isinstance(op, VarOffsetOp):
|
||||
saved = result.self_data.get(DataKind.Pointer, None)
|
||||
result = Prop()
|
||||
|
@ -239,36 +253,51 @@ class Prop:
|
|||
result = Prop()
|
||||
return result
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class LiveData:
|
||||
"""
|
||||
The in-flight data representation for the analysis. All sizes are in bytes
|
||||
"""
|
||||
sources: List[Tuple[Atom, OpSequence]]
|
||||
const: Optional[int]
|
||||
loc: CodeLoc
|
||||
sources: Tuple[Tuple[Atom, OpSequence], ...]
|
||||
size: int
|
||||
# if this is non-empty it means the data is characterized SOLELY by the sum of a0*x + a1*y + a2*z + ...
|
||||
strides: Tuple[Tuple[Optional['LiveData'], int], ...]
|
||||
|
||||
@property
|
||||
def const(self):
|
||||
if len(self.strides) == 1 and self.strides[0][0] is None:
|
||||
return self.strides[0][1]
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def new_null(cls, size: int):
|
||||
return cls([], None, size)
|
||||
def new_null(cls, loc: CodeLoc, size: int, strides: Tuple[Tuple[Optional['LiveData'], int], ...]=()):
|
||||
return cls(loc, (), size, strides)
|
||||
|
||||
@classmethod
|
||||
def new_atom(cls, atom: Atom) -> 'LiveData':
|
||||
return cls([(atom, OpSequence())], None, atom.size)
|
||||
def new_atom(cls, loc: CodeLoc, atom: Atom) -> 'LiveData':
|
||||
return cls(loc, ((atom, OpSequence()),), atom.size, ())
|
||||
|
||||
@classmethod
|
||||
def new_const(cls, value: int, size: int, codeloc: CodeLoc) -> 'LiveData':
|
||||
return cls([(ConstAtom(codeloc, size, value), OpSequence())], value, size)
|
||||
def new_const(cls, loc: CodeLoc, value: int, size: int) -> 'LiveData':
|
||||
return cls(loc, ((ConstAtom(loc, size, value), OpSequence()),), size, ((None, value),))
|
||||
|
||||
def appended(self, op: Op, size: int, const: Optional[int]=None) -> 'LiveData':
|
||||
def appended(self, loc: CodeLoc, op: Op, size: int, strides: Optional[Tuple[Tuple[Optional['LiveData'], int], ...]]=None) -> 'LiveData':
|
||||
return LiveData(
|
||||
[(atom, seq.appended(op)) for atom, seq in self.sources],
|
||||
self.const if const is None else const,
|
||||
size
|
||||
loc,
|
||||
tuple((atom, seq.appended(op)) for atom, seq in self.sources),
|
||||
size,
|
||||
self.strides if strides is None else strides,
|
||||
)
|
||||
|
||||
def unioned(self, other: 'LiveData', size: int, const: Optional[int]=None) -> 'LiveData':
|
||||
return LiveData(self.sources + other.sources, const, size)
|
||||
def unioned(
|
||||
self,
|
||||
loc: CodeLoc,
|
||||
other: 'LiveData',
|
||||
size: int,
|
||||
strides: Tuple[Tuple[Optional['LiveData'], int], ...]=(),
|
||||
) -> 'LiveData':
|
||||
return LiveData(loc, self.sources + other.sources, size, strides)
|
||||
|
||||
def commit(self, target: Atom, graph: networkx.DiGraph):
|
||||
for src, seq in self.sources:
|
||||
|
@ -297,7 +326,7 @@ class LiveData:
|
|||
prop.unifications[(offset1, offset2)] += 1
|
||||
self.prop(prop, graph)
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RegisterInputInfo:
|
||||
atom: RegisterAtom
|
||||
callsites: Tuple[int, ...]
|
||||
|
@ -328,20 +357,20 @@ class RegisterInputInfo:
|
|||
graph.add_edge(source, self.atom, ops=OpSequence(), cf=actions)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ControlFlowAction:
|
||||
pass
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ControlFlowActionPush(ControlFlowAction):
|
||||
callsite: int
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ControlFlowActionPop(ControlFlowAction):
|
||||
callsite: int
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass(slots=True)
|
||||
class BlockInfo:
|
||||
outputs: Dict[str, RegisterAtom] = field(default_factory=dict) # slot names
|
||||
inputs: Dict[str, RegisterAtom] = field(default_factory=dict) # alias names
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from typing import Union
|
||||
import logging
|
||||
from collections import Counter
|
||||
|
||||
import angr
|
||||
import pyvex
|
||||
|
@ -42,6 +43,15 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin):
|
|||
return self.kp.block_info[self._force_addr]
|
||||
return self.kp.block_info[self.irsb.addr]
|
||||
|
||||
def new_null(self, size: int, strides: Tuple[Tuple[Optional['LiveData'], int], ...]=()):
|
||||
return LiveData.new_null(self.codeloc, size, strides)
|
||||
|
||||
def new_atom(self, atom: Atom) -> 'LiveData':
|
||||
return LiveData.new_atom(self.codeloc, atom)
|
||||
|
||||
def new_const(self, value: int, size: int) -> 'LiveData':
|
||||
return LiveData.new_const(self.codeloc, value, size)
|
||||
|
||||
def handle_vex_block(self, irsb):
|
||||
self._force_addr = None
|
||||
super().handle_vex_block(irsb)
|
||||
|
@ -50,20 +60,20 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin):
|
|||
return self.const(const.value, get_type_size_bytes(const.type))
|
||||
|
||||
def const(self, val, size) -> LiveData:
|
||||
atom = LiveData.new_const(val, size, self.codeloc)
|
||||
atom = self.new_const(val, size)
|
||||
self.blockinfo.atoms.append(atom.sources[0][0])
|
||||
return atom
|
||||
|
||||
def _perform_vex_expr_RdTmp(self, tmp):
|
||||
if self.tmp_atoms:
|
||||
return LiveData.new_atom(self.tmps[tmp])
|
||||
return self.new_atom(self.tmps[tmp])
|
||||
else:
|
||||
return self.tmps[tmp]
|
||||
|
||||
def _perform_vex_expr_Get(self, offset: LiveData, ty, **kwargs):
|
||||
size = get_type_size_bytes(ty)
|
||||
if type(offset.const) is not int:
|
||||
return LiveData.new_null(size)
|
||||
return self.new_null(size)
|
||||
name = self.project.arch.register_size_names[(offset.const, size)] # unsafe
|
||||
return self.get(name, offset.const, size)
|
||||
|
||||
|
@ -73,7 +83,7 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin):
|
|||
slot_info = self.project.arch.get_base_register(offset, size)
|
||||
if slot_info is None:
|
||||
l.error("??????? (%s, %s)", offset, size)
|
||||
return LiveData.new_null(size)
|
||||
return self.new_null(size)
|
||||
slot_name = self.project.arch.register_size_names[slot_info]
|
||||
reg_atom = RegisterAtom(self.codeloc, size, name, slot_name)
|
||||
self.blockinfo.atoms.append(reg_atom)
|
||||
|
@ -85,12 +95,12 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin):
|
|||
else:
|
||||
pass # alias mismatch
|
||||
elif name in self.blockinfo.inputs:
|
||||
return LiveData.new_atom(self.blockinfo.inputs[name])
|
||||
return self.new_atom(self.blockinfo.inputs[name])
|
||||
else:
|
||||
self.blockinfo.inputs[name] = reg_atom
|
||||
self.blockinfo.ready_inputs.add(name)
|
||||
|
||||
return LiveData.new_atom(reg_atom)
|
||||
return self.new_atom(reg_atom)
|
||||
|
||||
def _perform_vex_expr_Load(self, addr: LiveData, ty, endness, **kwargs):
|
||||
size = get_type_size_bytes(ty)
|
||||
|
@ -104,15 +114,15 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin):
|
|||
|
||||
mem_atom = MemoryAtom(self.codeloc, size, endness)
|
||||
self.blockinfo.atoms.append(mem_atom)
|
||||
addr.appended(DerefOp(size), size).commit(mem_atom, self.graph)
|
||||
return LiveData.new_atom(mem_atom)
|
||||
addr.appended(self.codeloc, DerefOp(size), size).commit(mem_atom, self.graph)
|
||||
return self.new_atom(mem_atom)
|
||||
|
||||
def _perform_vex_expr_CCall(self, func_name, ty, args, func=None):
|
||||
return LiveData.new_null(get_type_size_bytes(ty))
|
||||
return self.new_null(get_type_size_bytes(ty))
|
||||
|
||||
def _perform_vex_expr_ITE(self, cond, ifTrue: LiveData, ifFalse: LiveData):
|
||||
assert ifTrue.size == ifFalse.size
|
||||
return ifTrue.unioned(ifFalse, ifTrue.size)
|
||||
return ifTrue.unioned(self.codeloc, ifFalse, ifTrue.size)
|
||||
|
||||
def _perform_vex_expr_Op(self, op, args: List[LiveData]):
|
||||
return self.op(op, args)
|
||||
|
@ -126,39 +136,96 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin):
|
|||
arg.prop_self(DataKind.Int, self.graph)
|
||||
|
||||
size = get_type_size_bytes(ret_ty)
|
||||
sign = None
|
||||
mul0, mul1 = None, None
|
||||
const0, const1 = None, None
|
||||
if op in ('Iop_Add8', 'Iop_Add16', 'Iop_Add32', 'Iop_Add64'):
|
||||
sign = 1
|
||||
elif op in ('Iop_Sub8', 'Iop_Sub16', 'Iop_Sub32', 'Iop_Sub64'):
|
||||
sign = -1
|
||||
else:
|
||||
sign = None
|
||||
elif op in ('Iop_Mul8', 'Iop_Mul16', 'Iop_Mul32', 'Iop_Mul64',
|
||||
'Iop_MullS8', 'Iop_MullS16', 'Iop_MullS32', 'Iop_MullS64',
|
||||
'Iop_MullU8', 'Iop_MullU16', 'Iop_MullU32', 'Iop_MullU64'):
|
||||
mul0 = args[0].strides
|
||||
mul1 = args[1].strides
|
||||
const0 = args[0].const
|
||||
const1 = args[1].const
|
||||
elif op in ('Iop_Shl8', 'Iop_Shl16', 'Iop_Shl32', 'Iop_Shl64'):
|
||||
if args[1].const is not None and args[1].const >= 0:
|
||||
const0 = args[0].const
|
||||
const1 = 2**args[1].const
|
||||
mul0 = args[0].strides
|
||||
mul1 = ((None, const1))
|
||||
|
||||
if sign is not None:
|
||||
assert size == args[0].size == args[1].size
|
||||
addend0 = args[0].const
|
||||
addend1 = args[1].const
|
||||
if addend0 is not None and addend1 is not None:
|
||||
const = addend0 + addend1 * sign
|
||||
stride0 = args[0].strides
|
||||
stride1 = args[1].strides
|
||||
strideC = Counter()
|
||||
if stride0:
|
||||
for key, n in stride0:
|
||||
strideC[key] += n
|
||||
else:
|
||||
const = None
|
||||
strideC[args[0]] += 1
|
||||
if stride1:
|
||||
for key, n in stride1:
|
||||
strideC[key] += n * sign
|
||||
else:
|
||||
strideC[args[1]] += sign
|
||||
|
||||
|
||||
neg1 = args[1]
|
||||
if sign == -1:
|
||||
neg1 = neg1.appended(NegOp(), neg1.size, -addend1 if addend1 is not None else None)
|
||||
input0 = args[0].appended(ConstOffsetOp(addend1 * sign) if addend1 is not None else VarOffsetOp(neg1), size)
|
||||
input1 = neg1.appended(ConstOffsetOp(addend0) if addend0 is not None else VarOffsetOp(args[0]), size)
|
||||
result = input0.unioned(input1, size, const)
|
||||
neg1 = neg1.appended(self.codeloc, NegOp(), neg1.size, tuple((k, -n) for k, n in neg1.strides))
|
||||
|
||||
input0 = args[0]
|
||||
if stride1:
|
||||
for (key, stride) in stride1:
|
||||
if key is None:
|
||||
input0 = input0.appended(self.codeloc, ConstOffsetOp(stride), size)
|
||||
else:
|
||||
input0 = input0.appended(self.codeloc, StrideOffsetOp(abs(stride)), size)
|
||||
else:
|
||||
input0 = input0.appended(self.codeloc, VarOffsetOp(neg1), size)
|
||||
|
||||
input1 = args[1]
|
||||
if stride0:
|
||||
for (key, stride) in stride0:
|
||||
if key is None:
|
||||
input1 = input1.appended(self.codeloc, ConstOffsetOp(stride), size)
|
||||
else:
|
||||
input1 = input1.appended(self.codeloc, StrideOffsetOp(abs(stride)), size)
|
||||
else:
|
||||
input1 = input1.appended(self.codeloc, VarOffsetOp(args[0]), size)
|
||||
|
||||
result = input0.unioned(self.codeloc, input1, size, tuple((key, n) for key, n in strideC.items() if n != 0))
|
||||
|
||||
elif mul0 is not None and mul1 is not None:
|
||||
if const0 is not None and const1 is not None:
|
||||
result = self.new_null(size, strides=((None, const0 * const1),))
|
||||
elif const1 is not None and len(mul0) != 0:
|
||||
result = self.new_null(size, strides=tuple((key, v * const1) for key, v in mul0))
|
||||
elif const0 is not None and len(mul1) != 0:
|
||||
result = self.new_null(size, strides=tuple((key, v * const0) for key, v in mul1))
|
||||
elif const0 is not None:
|
||||
result = self.new_null(size, strides=((args[1], const0),))
|
||||
elif const1 is not None:
|
||||
result = self.new_null(size, strides=((args[0], const1),))
|
||||
else:
|
||||
result = self.new_null(size)
|
||||
else:
|
||||
result = LiveData.new_null(size)
|
||||
result = self.new_null(size)
|
||||
|
||||
return result
|
||||
|
||||
def _handle_vex_expr_GSPTR(self, expr: pyvex.expr.GSPTR):
|
||||
return LiveData.new_null(get_type_size_bytes(expr.result_type(self.irsb.tyenv)))
|
||||
return self.new_null(get_type_size_bytes(expr.result_type(self.irsb.tyenv)))
|
||||
|
||||
def _handle_vex_expr_VECRET(self, expr: pyvex.expr.VECRET):
|
||||
return LiveData.new_null(get_type_size_bytes(expr.result_type(self.irsb.tyenv)))
|
||||
return self.new_null(get_type_size_bytes(expr.result_type(self.irsb.tyenv)))
|
||||
|
||||
def _handle_vex_expr_Binder(self, expr: pyvex.expr.Binder):
|
||||
return LiveData.new_null(get_type_size_bytes(expr.result_type(self.irsb.tyenv)))
|
||||
return self.new_null(get_type_size_bytes(expr.result_type(self.irsb.tyenv)))
|
||||
|
||||
|
||||
def _handle_vex_stmt_IMark(self, stmt: pyvex.stmt.IMark):
|
||||
|
@ -166,7 +233,7 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin):
|
|||
|
||||
def _perform_vex_stmt_Put(self, offset: LiveData, data: LiveData, **kwargs):
|
||||
if type(offset.const) is not int:
|
||||
return LiveData.new_null(data.size)
|
||||
return self.new_null(data.size)
|
||||
name = self.project.arch.register_size_names[(offset.const, data.size)] # unsafe
|
||||
return self.put(name, data, offset.const)
|
||||
|
||||
|
@ -177,7 +244,7 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin):
|
|||
slot_info = self.project.arch.get_base_register(offset, data.size)
|
||||
if slot_info is None:
|
||||
l.error("??????? (%s, %s)", offset, data.size)
|
||||
return LiveData.new_null(data.size)
|
||||
return self.new_null(data.size)
|
||||
slot_name = self.project.arch.register_size_names[slot_info]
|
||||
reg_atom = RegisterAtom(self.codeloc, data.size, name, slot_name)
|
||||
self.blockinfo.atoms.append(reg_atom)
|
||||
|
@ -204,10 +271,10 @@ class TypeTapperEngine(angr.engines.vex.VEXMixin):
|
|||
|
||||
mem_atom = MemoryAtom(self.codeloc, data.size, endness)
|
||||
self.blockinfo.atoms.append(mem_atom)
|
||||
addr.appended(DerefOp(data.size), data.size).commit(mem_atom, self.graph)
|
||||
addr.appended(self.codeloc, DerefOp(data.size), data.size).commit(mem_atom, self.graph)
|
||||
data.commit(mem_atom, self.graph)
|
||||
|
||||
def _perform_vex_stmt_Dirty_call(self, func_name, ty, args, func=None):
|
||||
if ty is None:
|
||||
return None
|
||||
return LiveData.new_null(get_type_size_bytes(ty))
|
||||
return self.new_null(get_type_size_bytes(ty))
|
||||
|
|
Loading…
Reference in New Issue