From a532133a5d8438b839742550123eb2ff02d49077 Mon Sep 17 00:00:00 2001 From: sys64738 Date: Sat, 23 Jan 2021 05:20:24 +0100 Subject: [PATCH] stuff --- .gitignore | 3 + LICENSE | 2 + Makefile | 26 +++++++ README.md | 49 +++++++++++++ hello.c | 10 +++ manip-exe.c | 66 ++++++++++++++++++ manip-exe.h | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 352 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README.md create mode 100644 hello.c create mode 100644 manip-exe.c create mode 100644 manip-exe.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..96d327a --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +hello.glibc* +hello.musl* +manip-exe diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..195e819 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +be gay, do crimes, death to america + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..683bb22 --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ + +.PHONY: default all clean + +GLIBCC ?= $(CC) +MUSLCC ?= x86_64-linux-musl-gcc + +default: all + +manip-exe: manip-exe.c manip-exe.h + $(CC) -o "$@" $< -Og -g + +# 'export-dynamic' so that functions defined in the exe actually end up in .dynsym +hello.glibc: hello.c + $(GLIBCC) -o "$@" $< -Wl,--export-dynamic -Og -g +hello.musl: hello.c + $(MUSLCC) -o "$@" $< -Wl,--export-dynamic -Og -g + +hello.%.manip: ./manip-exe hello.% + ./$^ "$@" main2 + @chmod +x "$@" + +all: hello.glibc.manip hello.musl.manip + +clean: + @$(RM) -v hello.glibc hello.glibc.manip manip-exe hello.musl hello.musl.manip + diff --git a/README.md b/README.md new file mode 100644 index 0000000..b633155 --- /dev/null +++ b/README.md @@ -0,0 +1,49 @@ +# relocmain + +Swap out an ELF executable's `main` function with another function from its +symbol table, without touching its code at all. + +## How it works + +The typical ELF entrypoint isn't `main` directly, but rather `_start`, an +assembly stub that first does some runtime initialization before calling +`main`. But instead of directly calling the latter function, it is often +passed as a parameter to `__libc_start_main` (which in turn calls `main` after +doing *more* initialization stuff). + +'Relocations' are instructions for an ELF linker or loader on how to patch a +binary when moving it around in memory or when resolving functions. Some +examples are "this value here is an absolute address, so when you move me +around, please keep it updated" (`R__RELATIVE`), or "I'm using this +external function, and I'm accessing it through a PLT, so when you resolve +the symbol, please put it in the PLT" (`R__JUMP_SLOT`). + +This programs adds another relocation entry that replaces the code that loads +the address `main` (to pass it to `__libc_start_main`) with the address of a +different symbol. Due to how relocations work, it is restricted to replacing +it with another symbol that is either imported from another library, or +exported by the executable itself. + +Currently, only x86_64 is supported (but it's not too hard to add support for +other instruction sets, 32-bit ELF support is a bit harder). The code looks for +a `lea rdi, [rel ]` instruction near `_start`, this is often the +instruction that loads `main` to pass it to `__libc_start_main` (at least on +glibc and musl it is). This then gets overwritten *at runtime by *`ld.so` due +to a relocation of type `R_X86_64_PC32` targetting the `pcrel32` operand of +that instruction --- all without touching any code of the executable. + +## Usage + +```sh +# build only the tool +make manip-exe +# usage: ./manip-exe + +# also show some example stuff +make all +``` + +## License + +See [LICENSE](/LICENSE). + diff --git a/hello.c b/hello.c new file mode 100644 index 0000000..3291614 --- /dev/null +++ b/hello.c @@ -0,0 +1,10 @@ +#include + +int main(int argc, char** argv[]){printf("hello, world\n");return 42;} + +extern int main2(int argc, char** argv[]); +int main2(int argc, char** argv[]) { + printf("be gay do crimes\n"); + return 69; +} + diff --git a/manip-exe.c b/manip-exe.c new file mode 100644 index 0000000..2ddebc4 --- /dev/null +++ b/manip-exe.c @@ -0,0 +1,66 @@ + +#include +#include +#include +#include + +#include +#include +#include + +// TODO: allow for non-native-bitness ELF parsing +#include "manip-exe.h" + +int main(int argc, char* argv[]) { + if (argc < 4) { + fprintf(stderr,"usage: %s \n", argv[0]); + return 0; + } + const char* symname = argv[3]; + + FILE* f = fopen(argv[1], "rb"); + if (f == NULL) { + fprintf(stderr, "%s: can't open input file '%s'\n", argv[0], argv[1]); + return 1; + } + + fseek(f, 0, SEEK_END); + long s = ftell(f); + fseek(f, 0, SEEK_SET); + fprintf(stderr, "s=%zu\n", s); + + void* blob = calloc(1, (size_t)s); + + if (fread(blob, 1, (size_t)s, f) != (size_t)s) { + fprintf(stderr, "%s: can't read input file '%s'\n", argv[0], argv[1]); + free(blob); + fclose(f); + return 1; + } + fclose(f); + + if (manip_elf(blob, symname)) { + fprintf(stderr, "%s: ELF patching failed\n", argv[0]); + free(blob); + return 1; + } + + f = fopen(argv[2], "wb"); + if (f == NULL) { + fprintf(stderr, "%s: can't open output file '%s'\n", argv[0], argv[2]); + return 1; + } + + if (fwrite(blob, 1, (size_t)s, f) != (size_t)s) { + fprintf(stderr, "%s: can't write output file '%s'\n", argv[0], argv[2]); + free(blob); + fclose(f); + return 1; + } + + fclose(f); + free(blob); + + return 0; +} + diff --git a/manip-exe.h b/manip-exe.h new file mode 100644 index 0000000..095bb98 --- /dev/null +++ b/manip-exe.h @@ -0,0 +1,196 @@ + +#include +#include +#include + +#include +#include + +#if (sizeof(ElfW(Ehdr)) != sizeof(Elf64_Ehdr)) +#error "Need 64-bit ELF for now, sorry" +#endif + +static bool manip_elf(ElfW(Ehdr)* elf, const char* symname) { + if (elf->e_ident[0] != ELFMAG0 || elf->e_ident[1] != ELFMAG1 + || elf->e_ident[2] != ELFMAG2 || elf->e_ident[3] != ELFMAG3) { + printf("bad elf magic\n"); + return true; + } + + // TODO: port to other architectures + // what would be needed for a port: + // * fix code looking for 'lea rdi, [rel main]' + // * fix relocation type + // * thats it i think??? (well also fix this validation stuff) + if (elf->e_ident[EI_CLASS] != ELFCLASS64 || elf->e_ident[EI_DATA] != ELFDATA2LSB) { + printf("not a 64-bit little-endian elf\n"); + return true; + } + if (elf->e_type != ET_DYN || elf->e_machine != EM_X86_64) { + printf("need dynamic x86_64 elf\n"); + return true; + } + + ElfW(Phdr)* phdr = (ElfW(Phdr)*)(elf->e_phoff + (size_t)elf), *ph_loadexec; + ElfW(Dyn)* dyn = NULL; + for (size_t i = 0; i < elf->e_phnum; + ++i, phdr = (ElfW(Phdr)*)((size_t)phdr + elf->e_phentsize)) { + if (phdr->p_type == PT_LOAD) { + // need this at runtime for the relocs to work (yeah, sorry) + if (phdr->p_flags & PF_X) { + ph_loadexec = phdr; + phdr->p_flags |= PF_W; + } + } + + if (phdr->p_type == PT_DYNAMIC) { + dyn = (ElfW(Dyn)*)((size_t)elf + phdr->p_offset); + printf("dyn=%zu\n", phdr->p_offset); + } + } + + if (dyn == NULL) { + fprintf(stderr, "No DYNAMIC table! bailing out...\n"); + return true; + } + + ElfW(Addr)* relasz = NULL, *relaent = NULL, *jmprel = NULL; + ElfW(Rela)* relatab = NULL; + ElfW(Sym)* dynsym = NULL; + const char* dynstr = NULL; + size_t strsz = 0, syment = 0; + + ElfW(Addr) relaoff = 0; + + for (size_t i = 0; dyn->d_tag != DT_NULL; ++i, ++dyn) { + if (dyn->d_tag == DT_RELA) { + relatab = (ElfW(Rela)*)((size_t)elf + dyn->d_un.d_ptr); + relaoff = dyn->d_un.d_ptr; + } + if (dyn->d_tag == DT_RELASZ) relasz = (ElfW(Addr)*)(&dyn->d_un.d_ptr); + if (dyn->d_tag == DT_RELAENT) relaent = (ElfW(Addr)*)(&dyn->d_un.d_ptr); + if (dyn->d_tag == DT_JMPREL) jmprel = (ElfW(Addr)*)(&dyn->d_un.d_ptr); + + if (dyn->d_tag == DT_REL) { + printf("WARN: REL table present in DYN\n"); + } + + if (dyn->d_tag == DT_SYMTAB) dynsym = (ElfW(Sym)*)((size_t)elf + dyn->d_un.d_ptr); + if (dyn->d_tag == DT_STRTAB) dynstr = (const char*)((size_t)elf + dyn->d_un.d_ptr); + if (dyn->d_tag == DT_SYMENT) syment = dyn->d_un.d_val; + if (dyn->d_tag == DT_STRSZ ) strsz = dyn->d_un.d_val; + } + + if (!relasz || !relaent || !relatab) { + fprintf(stderr, "No RELA/RELASZ/RELAENT in DYN!\n"); + return true; + } + if (!dynsym || !dynstr || !strsz || !syment) { + fprintf(stderr, "no SYMTAB/STRTAB/STRSZ/SYMENT in DYN!\n"); + return true; + } + + // now we have everything to start patching stuff + + static const uint8_t magic_code[] = {0x48,0x8d,0x3d}; // lea rdi, [rel ] + + // assumption: the first matching instruction loads 'main' into rdi + // both glibc and musl load main this way, they could do it in a different + // way, but apparently it's not the case + ElfW(Addr) reloctarget = 0; + ElfW(Addr) entry_vma = elf->e_entry; + ElfW(Addr) entry_lma = elf->e_entry - ph_loadexec->p_vaddr + ph_loadexec->p_offset; + printf("entry_vma=0x%zx, entry_lma=0x%zx\n", entry_vma, entry_lma); + void* entry_mem = (void*)(entry_lma + (size_t)elf); + for (size_t i = 0; entry_lma+i - ph_loadexec->p_offset < ph_loadexec->p_filesz; ++i) { + const uint8_t* tocmp = (const uint8_t*)((size_t)entry_mem + i); + + for (size_t j = 0; j < sizeof(magic_code)/sizeof(*magic_code); ++j) { + if (tocmp[j] != magic_code[j]) goto next; + } + + // found it! + reloctarget = entry_vma + i + sizeof(magic_code)/sizeof(*magic_code); + printf("target = 0x%zx\n", reloctarget); + break; + + next:; + } + if (!reloctarget) { + printf("couldn't find relocation target...\n"); + return true; + } + + ElfW(Word) sym = 0; + + for (size_t i = 0; (size_t)dynsym < (size_t)dynstr; ++i, dynsym = (ElfW(Sym)*)((size_t)dynsym + syment)) { + if (i && dynsym->st_name != 0 && dynsym->st_name < strsz + && !strcmp(symname, dynstr + dynsym->st_name)) { + sym = i; + break; + } + } + + if (!sym) { + printf("replacement symbol '%s' not found!\n", symname); + return true; + } + printf("symbol '%s' -> index %zu\n", symname, sym); + + ElfW(Rela) extrarel; + extrarel.r_offset = reloctarget; + extrarel.r_info = ELF64_R_INFO(sym, R_X86_64_PC32); // TODO: make portable + extrarel.r_addend = -4; // TODO: make portable + + printf("extrarel: *0x%016zx = 0x%016zx + %d\n", extrarel.r_offset, extrarel.r_info, extrarel.r_addend); + + // resize the phdr containing the relocations + phdr = (ElfW(Phdr)*)(elf->e_phoff + (size_t)elf); + for (size_t i = 0; i < elf->e_phnum; + ++i, phdr = (ElfW(Phdr)*)((size_t)phdr + elf->e_phentsize)) { + if (phdr->p_offset >= relaoff && phdr->p_offset + phdr->p_filesz < relaoff) { + printf("rela phdr index = %zu\n", i); + phdr->p_filesz += *relaent; + phdr->p_memsz += *relaent; + + break; + } + } + // resize the shdr containing the relocations + // FIXME: ASSUMPTION: the relocation tables end up at the very end of this + // phdr, first .rela.dyn and then .rela.plt, and no others + // things will break BADLY if this isn't true, and it's hard to detect + // if this will happen, so watch out! + ElfW(Shdr)* shdr = (ElfW(Shdr)*)(elf->e_shoff + (size_t)elf); + for (size_t i = 0; i < elf->e_shnum; + ++i, shdr = (ElfW(Shdr)*)((size_t)shdr + elf->e_shentsize)) { + if (shdr->sh_type == SHT_REL) { + printf("WARN: REL table present\n"); + } else if (shdr->sh_type == SHT_RELA) { + if (shdr->sh_offset == relaoff) { // .rela.dyn + shdr->sh_size += shdr->sh_entsize; // one extra entry + } else { // .rela.plt + if (jmprel && *jmprel != shdr->sh_offset) { + printf("WARN: RELA tables look fishy\n"); + } + + // move one entry below to make room + memmove((void*)((size_t)elf + shdr->sh_offset + shdr->sh_entsize), + (void*)((size_t)elf + shdr->sh_offset), + shdr->sh_size); + shdr->sh_offset += shdr->sh_entsize; + if (jmprel) *jmprel += shdr->sh_entsize; + } + } + } + + // FINALLY write back the actually relevant stuff + printf("relasz = %zu, index = %zu\n", *relasz, *relasz / *relaent); + relatab[*relasz / *relaent] = extrarel; + *relasz += *relaent; + + printf("relasz now %zu\n", *relasz); + + return false; +} +