# from https://github.com/porocyon/smol , which is WTFPL-2.0 # stolen from the contrib folder in https://github.com/blackle/LZMA-Vizualizer # (i.e. I'm stealing it from myself) # custom elf parser because a standard one wouldn't be trustable because the # ELFs we're parsing will be a bit wonky anyway from struct import unpack from typing import * ELFCLASS32 = 1 ELFCLASS64 = 2 EM_386 = 3 EM_X86_64 = 62 PT_NULL = 0 PT_LOAD = 1 PT_DYNAMIC = 2 PT_INTERP = 3 DT_NULL = 0 DT_NEEDED = 1 DT_PLTGOT = 3 DT_STRTAB = 5 DT_SYMTAB = 6 DT_RELA = 7 DT_RELASZ = 8 DT_RELAENT = 9 DT_STRSZ = 10 DT_SYMENT = 11 DT_SONAME = 14 DT_REL = 17 DT_RELSZ = 18 DT_RELENT = 19 DT_PLTREL = 20 DT_DEBUG = 21 DT_TEXTREL = 22 DT_JMPREL = 23 DT_BIND_NOW= 24 SHT_NULL = 0 SHT_PROGBITS = 1 SHT_SYMTAB = 2 SHT_STRTAB = 3 SHT_RELA = 4 SHT_DYNAMIC = 6 SHT_NOBITS = 8 SHT_REL = 9 SHT_DYNSYM = 11 SHF_WRITE = 1<<0 SHF_ALLOC = 1<<1 SHF_EXECINSTR = 1<<2 SHF_MERGE = 1<<4 SHF_STRINGS = 1<<5 SHF_INFO_LINK = 1<<6 STB_LOCAL = 0 STB_GLOBAL = 1 STB_WEAK = 2 STT_NOTYPE = 0 STT_OBJECT = 1 STT_FUNC = 2 STT_SECTION= 3 STT_FILE = 4 STT_COMMON = 5 STT_TLS = 6 STT_GNU_IFUNC = 10 STV_DEFAULT = 0 STV_INTERNAL = 1 STV_HIDDEN = 2 STV_PROTECTED = 3 class Phdr(NamedTuple): ptype: int off : int vaddr: int paddr: int filesz: int memsz: int flags: int align: int class Dyn(NamedTuple): tag: int val: int class Shdr(NamedTuple): name: Union[int, str] type: int flags: int addr: int offset: int size: int link: int info: int addralign: int entsize: int class Sym(NamedTuple): name: str value: int size: int type: int binding: int visibility: int shndx: int class Rel(NamedTuple): offset: int symbol: Sym type: int class Rela(NamedTuple): offset: int symbol: Sym type: int addend: int Reloc = Union[Rel, Rela] class ELF(NamedTuple): data : bytes ident : bytes eclass: int mach : int entry : int phdrs : Sequence[Phdr] dyn : Sequence[Dyn] shdrs : Sequence[Shdr] symtab: Sequence[Sym] dynsym: Sequence[Sym] relocs: Sequence[Reloc] is32bit: bool def readstr(data: bytes, off: int) -> str: strb = bytearray() while data[off] != 0 and off < len(data): strb.append(data[off]) off = off + 1 return strb.decode('utf-8') # yeah, there's some code duplication here # idgaf def parse_phdr32(data: bytes, phoff:int, phentsz:int, phnum:int) -> Sequence[Phdr]: ps = [] for off in range(phoff, phoff+phentsz*phnum, phentsz): ptype, off, vaddr, paddr, filesz, memsz, flags, align = \ unpack(' Dyn: ds = [] off = dynp.off while True: tag, val = unpack(' Reloc: rr=[] for off in range(reloff, reloff+entsz*nrel, entsz): off, inf, add = unpack('> 8] type = inf & 0xff rr.append(Rela(off, sym, type, add) if rela else Rel(off, sym, type)) return rr def parse_shdr32(data: bytes, shoff: int, shentsz: int, shnum: int, shstrndx: int) -> Sequence[Shdr]: if shnum*shentsz+shoff > len(data) or shentsz==0 or shnum==0 or shoff==0: print("snum*shentsz+shoff",shnum*shentsz+shoff) print("len(data)",len(data)) print("shentsz",shentsz) print("shnum",shnum) print("shoff",shoff) return [] ss = [] for off in range(shoff, shoff+shentsz*shnum, shentsz): noff, typ, flags, addr, off, size, link, info, align, entsz = \ unpack(' Sequence[Sym]: ss = [] for off in range(sym.offset, sym.offset+sym.size, sym.entsize): noff, val, sz, info, other, shndx = \ unpack('> 4), other, shndx) ss.append(s) return ss#sorted(ss, key=lambda x:x.value) def parse_32(data: bytes) -> ELF: ident = data[:16] eclass = data[4] mach = unpack(' 0 else [] if len(dynsymsh) and len(dynstrsh): dynsym = parse_sym32(data, symtabsh[0], strtabsh[0]) \ if len(shdrs) > 0 else [] relocs = [] # TODO: use sh.link to use the correct symbol table for sh in relash: relocs += parse_reloc32(data, sh.offset, sh.size//sh.entsize, sh.entsize, symtab, True) for sh in relsh: relocs += parse_reloc32(data, sh.offset, sh.size//sh.entsize, sh.entsize, symtab, False) # TODO: relocs from DT_RELA, DT_REL return ELF(data, ident, eclass, mach, entry, phdrs, dyn, shdrs, symtab, dynsym, relocs, True) def parse_phdr64(data: bytes, phoff:int, phentsz:int, phnum:int) -> Sequence[Phdr]: ps = [] for off in range(phoff, phoff+phentsz*phnum, phentsz): # TODO # what is TODO exactly?? ptype, flags, off, vaddr, paddr, filesz, memsz, align = \ unpack(' Dyn: ds = [] off = dynp.off while True: tag, val = unpack(' Reloc: rr=[] for off in range(reloff, reloff+entsz*nrel, entsz): off, inf, add = unpack('> 32] type = inf & 0xffffffff rr.append(Rela(off, sym, type, add) if rela else Rel(off, sym, type)) return rr def parse_shdr64(data: bytes, shoff: int, shentsz: int, shnum: int, shstrndx: int) -> Sequence[Shdr]: if shnum*shentsz+shoff > len(data) or shentsz==0 or shnum==0 or shoff==0: return [] ss = [] for off in range(shoff, shoff+shentsz*shnum, shentsz): noff, typ, flags, addr, off, size, link, info, align, entsz = \ unpack(' Sequence[Sym]: ss = [] for off in range(sym.offset, sym.offset+sym.size, sym.entsize): noff, info, other, shndx, value, sz = \ unpack('> 4), other, shndx) ss.append(s) return ss#sorted(ss, key=lambda x:x.value) def parse_64(data: bytes) -> ELF: ident = data[:16] eclass = data[4] mach = unpack(' 0 else [] if len(dynsymsh) and len(dynstrsh): dynsym = parse_sym64(data, symtabsh[0], strtabsh[0]) \ if len(shdrs) > 0 else [] relocs = [] # TODO: use sh.link to use the correct symbol table for sh in relash: relocs += parse_reloc32(data, sh.offset, sh.size//sh.entsize, sh.entsize, symtab, True) for sh in relsh: relocs += parse_reloc32(data, sh.offset, sh.size//sh.entsize, sh.entsize, symtab, False) # TODO: relocs from DT_RELA, DT_REL return ELF(data, ident, eclass, mach, entry, phdrs, dyn, shdrs, symtab, dynsym, relocs, False) def parse(data: bytes) -> ELF: assert data[:4] == b'\x7FELF', "Not a valid ELF file" # good enough ecls = data[4] if ecls == ELFCLASS32: return parse_32(data) elif ecls == ELFCLASS64: return parse_64(data) else: emch = unpack('