stuff
This commit is contained in:
commit
a532133a5d
|
@ -0,0 +1,3 @@
|
||||||
|
hello.glibc*
|
||||||
|
hello.musl*
|
||||||
|
manip-exe
|
|
@ -0,0 +1,26 @@
|
||||||
|
|
||||||
|
.PHONY: default all clean
|
||||||
|
|
||||||
|
GLIBCC ?= $(CC)
|
||||||
|
MUSLCC ?= x86_64-linux-musl-gcc
|
||||||
|
|
||||||
|
default: all
|
||||||
|
|
||||||
|
manip-exe: manip-exe.c manip-exe.h
|
||||||
|
$(CC) -o "$@" $< -Og -g
|
||||||
|
|
||||||
|
# 'export-dynamic' so that functions defined in the exe actually end up in .dynsym
|
||||||
|
hello.glibc: hello.c
|
||||||
|
$(GLIBCC) -o "$@" $< -Wl,--export-dynamic -Og -g
|
||||||
|
hello.musl: hello.c
|
||||||
|
$(MUSLCC) -o "$@" $< -Wl,--export-dynamic -Og -g
|
||||||
|
|
||||||
|
hello.%.manip: ./manip-exe hello.%
|
||||||
|
./$^ "$@" main2
|
||||||
|
@chmod +x "$@"
|
||||||
|
|
||||||
|
all: hello.glibc.manip hello.musl.manip
|
||||||
|
|
||||||
|
clean:
|
||||||
|
@$(RM) -v hello.glibc hello.glibc.manip manip-exe hello.musl hello.musl.manip
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
# relocmain
|
||||||
|
|
||||||
|
Swap out an ELF executable's `main` function with another function from its
|
||||||
|
symbol table, without touching its code at all.
|
||||||
|
|
||||||
|
## How it works
|
||||||
|
|
||||||
|
The typical ELF entrypoint isn't `main` directly, but rather `_start`, an
|
||||||
|
assembly stub that first does some runtime initialization before calling
|
||||||
|
`main`. But instead of directly calling the latter function, it is often
|
||||||
|
passed as a parameter to `__libc_start_main` (which in turn calls `main` after
|
||||||
|
doing *more* initialization stuff).
|
||||||
|
|
||||||
|
'Relocations' are instructions for an ELF linker or loader on how to patch a
|
||||||
|
binary when moving it around in memory or when resolving functions. Some
|
||||||
|
examples are "this value here is an absolute address, so when you move me
|
||||||
|
around, please keep it updated" (`R_<arch>_RELATIVE`), or "I'm using this
|
||||||
|
external function, and I'm accessing it through a PLT, so when you resolve
|
||||||
|
the symbol, please put it in the PLT" (`R_<arch>_JUMP_SLOT`).
|
||||||
|
|
||||||
|
This programs adds another relocation entry that replaces the code that loads
|
||||||
|
the address `main` (to pass it to `__libc_start_main`) with the address of a
|
||||||
|
different symbol. Due to how relocations work, it is restricted to replacing
|
||||||
|
it with another symbol that is either imported from another library, or
|
||||||
|
exported by the executable itself.
|
||||||
|
|
||||||
|
Currently, only x86_64 is supported (but it's not too hard to add support for
|
||||||
|
other instruction sets, 32-bit ELF support is a bit harder). The code looks for
|
||||||
|
a `lea rdi, [rel <pcrel32>]` instruction near `_start`, this is often the
|
||||||
|
instruction that loads `main` to pass it to `__libc_start_main` (at least on
|
||||||
|
glibc and musl it is). This then gets overwritten *at runtime by *`ld.so` due
|
||||||
|
to a relocation of type `R_X86_64_PC32` targetting the `pcrel32` operand of
|
||||||
|
that instruction --- all without touching any code of the executable.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# build only the tool
|
||||||
|
make manip-exe
|
||||||
|
# usage: ./manip-exe <input> <output> <symbol>
|
||||||
|
|
||||||
|
# also show some example stuff
|
||||||
|
make all
|
||||||
|
```
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
See [LICENSE](/LICENSE).
|
||||||
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
int main(int argc, char** argv[]){printf("hello, world\n");return 42;}
|
||||||
|
|
||||||
|
extern int main2(int argc, char** argv[]);
|
||||||
|
int main2(int argc, char** argv[]) {
|
||||||
|
printf("be gay do crimes\n");
|
||||||
|
return 69;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,66 @@
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
// TODO: allow for non-native-bitness ELF parsing
|
||||||
|
#include "manip-exe.h"
|
||||||
|
|
||||||
|
int main(int argc, char* argv[]) {
|
||||||
|
if (argc < 4) {
|
||||||
|
fprintf(stderr,"usage: %s <input> <output> <symbol>\n", argv[0]);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
const char* symname = argv[3];
|
||||||
|
|
||||||
|
FILE* f = fopen(argv[1], "rb");
|
||||||
|
if (f == NULL) {
|
||||||
|
fprintf(stderr, "%s: can't open input file '%s'\n", argv[0], argv[1]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fseek(f, 0, SEEK_END);
|
||||||
|
long s = ftell(f);
|
||||||
|
fseek(f, 0, SEEK_SET);
|
||||||
|
fprintf(stderr, "s=%zu\n", s);
|
||||||
|
|
||||||
|
void* blob = calloc(1, (size_t)s);
|
||||||
|
|
||||||
|
if (fread(blob, 1, (size_t)s, f) != (size_t)s) {
|
||||||
|
fprintf(stderr, "%s: can't read input file '%s'\n", argv[0], argv[1]);
|
||||||
|
free(blob);
|
||||||
|
fclose(f);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
fclose(f);
|
||||||
|
|
||||||
|
if (manip_elf(blob, symname)) {
|
||||||
|
fprintf(stderr, "%s: ELF patching failed\n", argv[0]);
|
||||||
|
free(blob);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
f = fopen(argv[2], "wb");
|
||||||
|
if (f == NULL) {
|
||||||
|
fprintf(stderr, "%s: can't open output file '%s'\n", argv[0], argv[2]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fwrite(blob, 1, (size_t)s, f) != (size_t)s) {
|
||||||
|
fprintf(stderr, "%s: can't write output file '%s'\n", argv[0], argv[2]);
|
||||||
|
free(blob);
|
||||||
|
fclose(f);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(f);
|
||||||
|
free(blob);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,196 @@
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <elf.h>
|
||||||
|
#include <link.h>
|
||||||
|
|
||||||
|
#if (sizeof(ElfW(Ehdr)) != sizeof(Elf64_Ehdr))
|
||||||
|
#error "Need 64-bit ELF for now, sorry"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static bool manip_elf(ElfW(Ehdr)* elf, const char* symname) {
|
||||||
|
if (elf->e_ident[0] != ELFMAG0 || elf->e_ident[1] != ELFMAG1
|
||||||
|
|| elf->e_ident[2] != ELFMAG2 || elf->e_ident[3] != ELFMAG3) {
|
||||||
|
printf("bad elf magic\n");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: port to other architectures
|
||||||
|
// what would be needed for a port:
|
||||||
|
// * fix code looking for 'lea rdi, [rel main]'
|
||||||
|
// * fix relocation type
|
||||||
|
// * thats it i think??? (well also fix this validation stuff)
|
||||||
|
if (elf->e_ident[EI_CLASS] != ELFCLASS64 || elf->e_ident[EI_DATA] != ELFDATA2LSB) {
|
||||||
|
printf("not a 64-bit little-endian elf\n");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (elf->e_type != ET_DYN || elf->e_machine != EM_X86_64) {
|
||||||
|
printf("need dynamic x86_64 elf\n");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
ElfW(Phdr)* phdr = (ElfW(Phdr)*)(elf->e_phoff + (size_t)elf), *ph_loadexec;
|
||||||
|
ElfW(Dyn)* dyn = NULL;
|
||||||
|
for (size_t i = 0; i < elf->e_phnum;
|
||||||
|
++i, phdr = (ElfW(Phdr)*)((size_t)phdr + elf->e_phentsize)) {
|
||||||
|
if (phdr->p_type == PT_LOAD) {
|
||||||
|
// need this at runtime for the relocs to work (yeah, sorry)
|
||||||
|
if (phdr->p_flags & PF_X) {
|
||||||
|
ph_loadexec = phdr;
|
||||||
|
phdr->p_flags |= PF_W;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (phdr->p_type == PT_DYNAMIC) {
|
||||||
|
dyn = (ElfW(Dyn)*)((size_t)elf + phdr->p_offset);
|
||||||
|
printf("dyn=%zu\n", phdr->p_offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dyn == NULL) {
|
||||||
|
fprintf(stderr, "No DYNAMIC table! bailing out...\n");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
ElfW(Addr)* relasz = NULL, *relaent = NULL, *jmprel = NULL;
|
||||||
|
ElfW(Rela)* relatab = NULL;
|
||||||
|
ElfW(Sym)* dynsym = NULL;
|
||||||
|
const char* dynstr = NULL;
|
||||||
|
size_t strsz = 0, syment = 0;
|
||||||
|
|
||||||
|
ElfW(Addr) relaoff = 0;
|
||||||
|
|
||||||
|
for (size_t i = 0; dyn->d_tag != DT_NULL; ++i, ++dyn) {
|
||||||
|
if (dyn->d_tag == DT_RELA) {
|
||||||
|
relatab = (ElfW(Rela)*)((size_t)elf + dyn->d_un.d_ptr);
|
||||||
|
relaoff = dyn->d_un.d_ptr;
|
||||||
|
}
|
||||||
|
if (dyn->d_tag == DT_RELASZ) relasz = (ElfW(Addr)*)(&dyn->d_un.d_ptr);
|
||||||
|
if (dyn->d_tag == DT_RELAENT) relaent = (ElfW(Addr)*)(&dyn->d_un.d_ptr);
|
||||||
|
if (dyn->d_tag == DT_JMPREL) jmprel = (ElfW(Addr)*)(&dyn->d_un.d_ptr);
|
||||||
|
|
||||||
|
if (dyn->d_tag == DT_REL) {
|
||||||
|
printf("WARN: REL table present in DYN\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dyn->d_tag == DT_SYMTAB) dynsym = (ElfW(Sym)*)((size_t)elf + dyn->d_un.d_ptr);
|
||||||
|
if (dyn->d_tag == DT_STRTAB) dynstr = (const char*)((size_t)elf + dyn->d_un.d_ptr);
|
||||||
|
if (dyn->d_tag == DT_SYMENT) syment = dyn->d_un.d_val;
|
||||||
|
if (dyn->d_tag == DT_STRSZ ) strsz = dyn->d_un.d_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!relasz || !relaent || !relatab) {
|
||||||
|
fprintf(stderr, "No RELA/RELASZ/RELAENT in DYN!\n");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (!dynsym || !dynstr || !strsz || !syment) {
|
||||||
|
fprintf(stderr, "no SYMTAB/STRTAB/STRSZ/SYMENT in DYN!\n");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// now we have everything to start patching stuff
|
||||||
|
|
||||||
|
static const uint8_t magic_code[] = {0x48,0x8d,0x3d}; // lea rdi, [rel <pcrel32>]
|
||||||
|
|
||||||
|
// assumption: the first matching instruction loads 'main' into rdi
|
||||||
|
// both glibc and musl load main this way, they could do it in a different
|
||||||
|
// way, but apparently it's not the case
|
||||||
|
ElfW(Addr) reloctarget = 0;
|
||||||
|
ElfW(Addr) entry_vma = elf->e_entry;
|
||||||
|
ElfW(Addr) entry_lma = elf->e_entry - ph_loadexec->p_vaddr + ph_loadexec->p_offset;
|
||||||
|
printf("entry_vma=0x%zx, entry_lma=0x%zx\n", entry_vma, entry_lma);
|
||||||
|
void* entry_mem = (void*)(entry_lma + (size_t)elf);
|
||||||
|
for (size_t i = 0; entry_lma+i - ph_loadexec->p_offset < ph_loadexec->p_filesz; ++i) {
|
||||||
|
const uint8_t* tocmp = (const uint8_t*)((size_t)entry_mem + i);
|
||||||
|
|
||||||
|
for (size_t j = 0; j < sizeof(magic_code)/sizeof(*magic_code); ++j) {
|
||||||
|
if (tocmp[j] != magic_code[j]) goto next;
|
||||||
|
}
|
||||||
|
|
||||||
|
// found it!
|
||||||
|
reloctarget = entry_vma + i + sizeof(magic_code)/sizeof(*magic_code);
|
||||||
|
printf("target = 0x%zx\n", reloctarget);
|
||||||
|
break;
|
||||||
|
|
||||||
|
next:;
|
||||||
|
}
|
||||||
|
if (!reloctarget) {
|
||||||
|
printf("couldn't find relocation target...\n");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
ElfW(Word) sym = 0;
|
||||||
|
|
||||||
|
for (size_t i = 0; (size_t)dynsym < (size_t)dynstr; ++i, dynsym = (ElfW(Sym)*)((size_t)dynsym + syment)) {
|
||||||
|
if (i && dynsym->st_name != 0 && dynsym->st_name < strsz
|
||||||
|
&& !strcmp(symname, dynstr + dynsym->st_name)) {
|
||||||
|
sym = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sym) {
|
||||||
|
printf("replacement symbol '%s' not found!\n", symname);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
printf("symbol '%s' -> index %zu\n", symname, sym);
|
||||||
|
|
||||||
|
ElfW(Rela) extrarel;
|
||||||
|
extrarel.r_offset = reloctarget;
|
||||||
|
extrarel.r_info = ELF64_R_INFO(sym, R_X86_64_PC32); // TODO: make portable
|
||||||
|
extrarel.r_addend = -4; // TODO: make portable
|
||||||
|
|
||||||
|
printf("extrarel: *0x%016zx = 0x%016zx + %d\n", extrarel.r_offset, extrarel.r_info, extrarel.r_addend);
|
||||||
|
|
||||||
|
// resize the phdr containing the relocations
|
||||||
|
phdr = (ElfW(Phdr)*)(elf->e_phoff + (size_t)elf);
|
||||||
|
for (size_t i = 0; i < elf->e_phnum;
|
||||||
|
++i, phdr = (ElfW(Phdr)*)((size_t)phdr + elf->e_phentsize)) {
|
||||||
|
if (phdr->p_offset >= relaoff && phdr->p_offset + phdr->p_filesz < relaoff) {
|
||||||
|
printf("rela phdr index = %zu\n", i);
|
||||||
|
phdr->p_filesz += *relaent;
|
||||||
|
phdr->p_memsz += *relaent;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// resize the shdr containing the relocations
|
||||||
|
// FIXME: ASSUMPTION: the relocation tables end up at the very end of this
|
||||||
|
// phdr, first .rela.dyn and then .rela.plt, and no others
|
||||||
|
// things will break BADLY if this isn't true, and it's hard to detect
|
||||||
|
// if this will happen, so watch out!
|
||||||
|
ElfW(Shdr)* shdr = (ElfW(Shdr)*)(elf->e_shoff + (size_t)elf);
|
||||||
|
for (size_t i = 0; i < elf->e_shnum;
|
||||||
|
++i, shdr = (ElfW(Shdr)*)((size_t)shdr + elf->e_shentsize)) {
|
||||||
|
if (shdr->sh_type == SHT_REL) {
|
||||||
|
printf("WARN: REL table present\n");
|
||||||
|
} else if (shdr->sh_type == SHT_RELA) {
|
||||||
|
if (shdr->sh_offset == relaoff) { // .rela.dyn
|
||||||
|
shdr->sh_size += shdr->sh_entsize; // one extra entry
|
||||||
|
} else { // .rela.plt
|
||||||
|
if (jmprel && *jmprel != shdr->sh_offset) {
|
||||||
|
printf("WARN: RELA tables look fishy\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// move one entry below to make room
|
||||||
|
memmove((void*)((size_t)elf + shdr->sh_offset + shdr->sh_entsize),
|
||||||
|
(void*)((size_t)elf + shdr->sh_offset),
|
||||||
|
shdr->sh_size);
|
||||||
|
shdr->sh_offset += shdr->sh_entsize;
|
||||||
|
if (jmprel) *jmprel += shdr->sh_entsize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FINALLY write back the actually relevant stuff
|
||||||
|
printf("relasz = %zu, index = %zu\n", *relasz, *relasz / *relaent);
|
||||||
|
relatab[*relasz / *relaent] = extrarel;
|
||||||
|
*relasz += *relaent;
|
||||||
|
|
||||||
|
printf("relasz now %zu\n", *relasz);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue