stuff
This commit is contained in:
commit
a532133a5d
|
@ -0,0 +1,3 @@
|
|||
hello.glibc*
|
||||
hello.musl*
|
||||
manip-exe
|
|
@ -0,0 +1,26 @@
|
|||
|
||||
.PHONY: default all clean
|
||||
|
||||
GLIBCC ?= $(CC)
|
||||
MUSLCC ?= x86_64-linux-musl-gcc
|
||||
|
||||
default: all
|
||||
|
||||
manip-exe: manip-exe.c manip-exe.h
|
||||
$(CC) -o "$@" $< -Og -g
|
||||
|
||||
# 'export-dynamic' so that functions defined in the exe actually end up in .dynsym
|
||||
hello.glibc: hello.c
|
||||
$(GLIBCC) -o "$@" $< -Wl,--export-dynamic -Og -g
|
||||
hello.musl: hello.c
|
||||
$(MUSLCC) -o "$@" $< -Wl,--export-dynamic -Og -g
|
||||
|
||||
hello.%.manip: ./manip-exe hello.%
|
||||
./$^ "$@" main2
|
||||
@chmod +x "$@"
|
||||
|
||||
all: hello.glibc.manip hello.musl.manip
|
||||
|
||||
clean:
|
||||
@$(RM) -v hello.glibc hello.glibc.manip manip-exe hello.musl hello.musl.manip
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
# relocmain
|
||||
|
||||
Swap out an ELF executable's `main` function with another function from its
|
||||
symbol table, without touching its code at all.
|
||||
|
||||
## How it works
|
||||
|
||||
The typical ELF entrypoint isn't `main` directly, but rather `_start`, an
|
||||
assembly stub that first does some runtime initialization before calling
|
||||
`main`. But instead of directly calling the latter function, it is often
|
||||
passed as a parameter to `__libc_start_main` (which in turn calls `main` after
|
||||
doing *more* initialization stuff).
|
||||
|
||||
'Relocations' are instructions for an ELF linker or loader on how to patch a
|
||||
binary when moving it around in memory or when resolving functions. Some
|
||||
examples are "this value here is an absolute address, so when you move me
|
||||
around, please keep it updated" (`R_<arch>_RELATIVE`), or "I'm using this
|
||||
external function, and I'm accessing it through a PLT, so when you resolve
|
||||
the symbol, please put it in the PLT" (`R_<arch>_JUMP_SLOT`).
|
||||
|
||||
This programs adds another relocation entry that replaces the code that loads
|
||||
the address `main` (to pass it to `__libc_start_main`) with the address of a
|
||||
different symbol. Due to how relocations work, it is restricted to replacing
|
||||
it with another symbol that is either imported from another library, or
|
||||
exported by the executable itself.
|
||||
|
||||
Currently, only x86_64 is supported (but it's not too hard to add support for
|
||||
other instruction sets, 32-bit ELF support is a bit harder). The code looks for
|
||||
a `lea rdi, [rel <pcrel32>]` instruction near `_start`, this is often the
|
||||
instruction that loads `main` to pass it to `__libc_start_main` (at least on
|
||||
glibc and musl it is). This then gets overwritten *at runtime by *`ld.so` due
|
||||
to a relocation of type `R_X86_64_PC32` targetting the `pcrel32` operand of
|
||||
that instruction --- all without touching any code of the executable.
|
||||
|
||||
## Usage
|
||||
|
||||
```sh
|
||||
# build only the tool
|
||||
make manip-exe
|
||||
# usage: ./manip-exe <input> <output> <symbol>
|
||||
|
||||
# also show some example stuff
|
||||
make all
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
See [LICENSE](/LICENSE).
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
#include <stdio.h>
|
||||
|
||||
int main(int argc, char** argv[]){printf("hello, world\n");return 42;}
|
||||
|
||||
extern int main2(int argc, char** argv[]);
|
||||
int main2(int argc, char** argv[]) {
|
||||
printf("be gay do crimes\n");
|
||||
return 69;
|
||||
}
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
// TODO: allow for non-native-bitness ELF parsing
|
||||
#include "manip-exe.h"
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if (argc < 4) {
|
||||
fprintf(stderr,"usage: %s <input> <output> <symbol>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
const char* symname = argv[3];
|
||||
|
||||
FILE* f = fopen(argv[1], "rb");
|
||||
if (f == NULL) {
|
||||
fprintf(stderr, "%s: can't open input file '%s'\n", argv[0], argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
fseek(f, 0, SEEK_END);
|
||||
long s = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
fprintf(stderr, "s=%zu\n", s);
|
||||
|
||||
void* blob = calloc(1, (size_t)s);
|
||||
|
||||
if (fread(blob, 1, (size_t)s, f) != (size_t)s) {
|
||||
fprintf(stderr, "%s: can't read input file '%s'\n", argv[0], argv[1]);
|
||||
free(blob);
|
||||
fclose(f);
|
||||
return 1;
|
||||
}
|
||||
fclose(f);
|
||||
|
||||
if (manip_elf(blob, symname)) {
|
||||
fprintf(stderr, "%s: ELF patching failed\n", argv[0]);
|
||||
free(blob);
|
||||
return 1;
|
||||
}
|
||||
|
||||
f = fopen(argv[2], "wb");
|
||||
if (f == NULL) {
|
||||
fprintf(stderr, "%s: can't open output file '%s'\n", argv[0], argv[2]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (fwrite(blob, 1, (size_t)s, f) != (size_t)s) {
|
||||
fprintf(stderr, "%s: can't write output file '%s'\n", argv[0], argv[2]);
|
||||
free(blob);
|
||||
fclose(f);
|
||||
return 1;
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
free(blob);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,196 @@
|
|||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <elf.h>
|
||||
#include <link.h>
|
||||
|
||||
#if (sizeof(ElfW(Ehdr)) != sizeof(Elf64_Ehdr))
|
||||
#error "Need 64-bit ELF for now, sorry"
|
||||
#endif
|
||||
|
||||
static bool manip_elf(ElfW(Ehdr)* elf, const char* symname) {
|
||||
if (elf->e_ident[0] != ELFMAG0 || elf->e_ident[1] != ELFMAG1
|
||||
|| elf->e_ident[2] != ELFMAG2 || elf->e_ident[3] != ELFMAG3) {
|
||||
printf("bad elf magic\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO: port to other architectures
|
||||
// what would be needed for a port:
|
||||
// * fix code looking for 'lea rdi, [rel main]'
|
||||
// * fix relocation type
|
||||
// * thats it i think??? (well also fix this validation stuff)
|
||||
if (elf->e_ident[EI_CLASS] != ELFCLASS64 || elf->e_ident[EI_DATA] != ELFDATA2LSB) {
|
||||
printf("not a 64-bit little-endian elf\n");
|
||||
return true;
|
||||
}
|
||||
if (elf->e_type != ET_DYN || elf->e_machine != EM_X86_64) {
|
||||
printf("need dynamic x86_64 elf\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
ElfW(Phdr)* phdr = (ElfW(Phdr)*)(elf->e_phoff + (size_t)elf), *ph_loadexec;
|
||||
ElfW(Dyn)* dyn = NULL;
|
||||
for (size_t i = 0; i < elf->e_phnum;
|
||||
++i, phdr = (ElfW(Phdr)*)((size_t)phdr + elf->e_phentsize)) {
|
||||
if (phdr->p_type == PT_LOAD) {
|
||||
// need this at runtime for the relocs to work (yeah, sorry)
|
||||
if (phdr->p_flags & PF_X) {
|
||||
ph_loadexec = phdr;
|
||||
phdr->p_flags |= PF_W;
|
||||
}
|
||||
}
|
||||
|
||||
if (phdr->p_type == PT_DYNAMIC) {
|
||||
dyn = (ElfW(Dyn)*)((size_t)elf + phdr->p_offset);
|
||||
printf("dyn=%zu\n", phdr->p_offset);
|
||||
}
|
||||
}
|
||||
|
||||
if (dyn == NULL) {
|
||||
fprintf(stderr, "No DYNAMIC table! bailing out...\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
ElfW(Addr)* relasz = NULL, *relaent = NULL, *jmprel = NULL;
|
||||
ElfW(Rela)* relatab = NULL;
|
||||
ElfW(Sym)* dynsym = NULL;
|
||||
const char* dynstr = NULL;
|
||||
size_t strsz = 0, syment = 0;
|
||||
|
||||
ElfW(Addr) relaoff = 0;
|
||||
|
||||
for (size_t i = 0; dyn->d_tag != DT_NULL; ++i, ++dyn) {
|
||||
if (dyn->d_tag == DT_RELA) {
|
||||
relatab = (ElfW(Rela)*)((size_t)elf + dyn->d_un.d_ptr);
|
||||
relaoff = dyn->d_un.d_ptr;
|
||||
}
|
||||
if (dyn->d_tag == DT_RELASZ) relasz = (ElfW(Addr)*)(&dyn->d_un.d_ptr);
|
||||
if (dyn->d_tag == DT_RELAENT) relaent = (ElfW(Addr)*)(&dyn->d_un.d_ptr);
|
||||
if (dyn->d_tag == DT_JMPREL) jmprel = (ElfW(Addr)*)(&dyn->d_un.d_ptr);
|
||||
|
||||
if (dyn->d_tag == DT_REL) {
|
||||
printf("WARN: REL table present in DYN\n");
|
||||
}
|
||||
|
||||
if (dyn->d_tag == DT_SYMTAB) dynsym = (ElfW(Sym)*)((size_t)elf + dyn->d_un.d_ptr);
|
||||
if (dyn->d_tag == DT_STRTAB) dynstr = (const char*)((size_t)elf + dyn->d_un.d_ptr);
|
||||
if (dyn->d_tag == DT_SYMENT) syment = dyn->d_un.d_val;
|
||||
if (dyn->d_tag == DT_STRSZ ) strsz = dyn->d_un.d_val;
|
||||
}
|
||||
|
||||
if (!relasz || !relaent || !relatab) {
|
||||
fprintf(stderr, "No RELA/RELASZ/RELAENT in DYN!\n");
|
||||
return true;
|
||||
}
|
||||
if (!dynsym || !dynstr || !strsz || !syment) {
|
||||
fprintf(stderr, "no SYMTAB/STRTAB/STRSZ/SYMENT in DYN!\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// now we have everything to start patching stuff
|
||||
|
||||
static const uint8_t magic_code[] = {0x48,0x8d,0x3d}; // lea rdi, [rel <pcrel32>]
|
||||
|
||||
// assumption: the first matching instruction loads 'main' into rdi
|
||||
// both glibc and musl load main this way, they could do it in a different
|
||||
// way, but apparently it's not the case
|
||||
ElfW(Addr) reloctarget = 0;
|
||||
ElfW(Addr) entry_vma = elf->e_entry;
|
||||
ElfW(Addr) entry_lma = elf->e_entry - ph_loadexec->p_vaddr + ph_loadexec->p_offset;
|
||||
printf("entry_vma=0x%zx, entry_lma=0x%zx\n", entry_vma, entry_lma);
|
||||
void* entry_mem = (void*)(entry_lma + (size_t)elf);
|
||||
for (size_t i = 0; entry_lma+i - ph_loadexec->p_offset < ph_loadexec->p_filesz; ++i) {
|
||||
const uint8_t* tocmp = (const uint8_t*)((size_t)entry_mem + i);
|
||||
|
||||
for (size_t j = 0; j < sizeof(magic_code)/sizeof(*magic_code); ++j) {
|
||||
if (tocmp[j] != magic_code[j]) goto next;
|
||||
}
|
||||
|
||||
// found it!
|
||||
reloctarget = entry_vma + i + sizeof(magic_code)/sizeof(*magic_code);
|
||||
printf("target = 0x%zx\n", reloctarget);
|
||||
break;
|
||||
|
||||
next:;
|
||||
}
|
||||
if (!reloctarget) {
|
||||
printf("couldn't find relocation target...\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
ElfW(Word) sym = 0;
|
||||
|
||||
for (size_t i = 0; (size_t)dynsym < (size_t)dynstr; ++i, dynsym = (ElfW(Sym)*)((size_t)dynsym + syment)) {
|
||||
if (i && dynsym->st_name != 0 && dynsym->st_name < strsz
|
||||
&& !strcmp(symname, dynstr + dynsym->st_name)) {
|
||||
sym = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!sym) {
|
||||
printf("replacement symbol '%s' not found!\n", symname);
|
||||
return true;
|
||||
}
|
||||
printf("symbol '%s' -> index %zu\n", symname, sym);
|
||||
|
||||
ElfW(Rela) extrarel;
|
||||
extrarel.r_offset = reloctarget;
|
||||
extrarel.r_info = ELF64_R_INFO(sym, R_X86_64_PC32); // TODO: make portable
|
||||
extrarel.r_addend = -4; // TODO: make portable
|
||||
|
||||
printf("extrarel: *0x%016zx = 0x%016zx + %d\n", extrarel.r_offset, extrarel.r_info, extrarel.r_addend);
|
||||
|
||||
// resize the phdr containing the relocations
|
||||
phdr = (ElfW(Phdr)*)(elf->e_phoff + (size_t)elf);
|
||||
for (size_t i = 0; i < elf->e_phnum;
|
||||
++i, phdr = (ElfW(Phdr)*)((size_t)phdr + elf->e_phentsize)) {
|
||||
if (phdr->p_offset >= relaoff && phdr->p_offset + phdr->p_filesz < relaoff) {
|
||||
printf("rela phdr index = %zu\n", i);
|
||||
phdr->p_filesz += *relaent;
|
||||
phdr->p_memsz += *relaent;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
// resize the shdr containing the relocations
|
||||
// FIXME: ASSUMPTION: the relocation tables end up at the very end of this
|
||||
// phdr, first .rela.dyn and then .rela.plt, and no others
|
||||
// things will break BADLY if this isn't true, and it's hard to detect
|
||||
// if this will happen, so watch out!
|
||||
ElfW(Shdr)* shdr = (ElfW(Shdr)*)(elf->e_shoff + (size_t)elf);
|
||||
for (size_t i = 0; i < elf->e_shnum;
|
||||
++i, shdr = (ElfW(Shdr)*)((size_t)shdr + elf->e_shentsize)) {
|
||||
if (shdr->sh_type == SHT_REL) {
|
||||
printf("WARN: REL table present\n");
|
||||
} else if (shdr->sh_type == SHT_RELA) {
|
||||
if (shdr->sh_offset == relaoff) { // .rela.dyn
|
||||
shdr->sh_size += shdr->sh_entsize; // one extra entry
|
||||
} else { // .rela.plt
|
||||
if (jmprel && *jmprel != shdr->sh_offset) {
|
||||
printf("WARN: RELA tables look fishy\n");
|
||||
}
|
||||
|
||||
// move one entry below to make room
|
||||
memmove((void*)((size_t)elf + shdr->sh_offset + shdr->sh_entsize),
|
||||
(void*)((size_t)elf + shdr->sh_offset),
|
||||
shdr->sh_size);
|
||||
shdr->sh_offset += shdr->sh_entsize;
|
||||
if (jmprel) *jmprel += shdr->sh_entsize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FINALLY write back the actually relevant stuff
|
||||
printf("relasz = %zu, index = %zu\n", *relasz, *relasz / *relaent);
|
||||
relatab[*relasz / *relaent] = extrarel;
|
||||
*relasz += *relaent;
|
||||
|
||||
printf("relasz now %zu\n", *relasz);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
Loading…
Reference in New Issue