sys64738 11 months ago
commit
a532133a5d
  1. 3
      .gitignore
  2. 2
      LICENSE
  3. 26
      Makefile
  4. 49
      README.md
  5. 10
      hello.c
  6. 66
      manip-exe.c
  7. 196
      manip-exe.h

3
.gitignore

@ -0,0 +1,3 @@
hello.glibc*
hello.musl*
manip-exe

2
LICENSE

@ -0,0 +1,2 @@
be gay, do crimes, death to america

26
Makefile

@ -0,0 +1,26 @@
.PHONY: default all clean
GLIBCC ?= $(CC)
MUSLCC ?= x86_64-linux-musl-gcc
default: all
manip-exe: manip-exe.c manip-exe.h
$(CC) -o "$@" $< -Og -g
# 'export-dynamic' so that functions defined in the exe actually end up in .dynsym
hello.glibc: hello.c
$(GLIBCC) -o "$@" $< -Wl,--export-dynamic -Og -g
hello.musl: hello.c
$(MUSLCC) -o "$@" $< -Wl,--export-dynamic -Og -g
hello.%.manip: ./manip-exe hello.%
./$^ "$@" main2
@chmod +x "$@"
all: hello.glibc.manip hello.musl.manip
clean:
@$(RM) -v hello.glibc hello.glibc.manip manip-exe hello.musl hello.musl.manip

49
README.md

@ -0,0 +1,49 @@
# relocmain
Swap out an ELF executable's `main` function with another function from its
symbol table, without touching its code at all.
## How it works
The typical ELF entrypoint isn't `main` directly, but rather `_start`, an
assembly stub that first does some runtime initialization before calling
`main`. But instead of directly calling the latter function, it is often
passed as a parameter to `__libc_start_main` (which in turn calls `main` after
doing *more* initialization stuff).
'Relocations' are instructions for an ELF linker or loader on how to patch a
binary when moving it around in memory or when resolving functions. Some
examples are "this value here is an absolute address, so when you move me
around, please keep it updated" (`R_<arch>_RELATIVE`), or "I'm using this
external function, and I'm accessing it through a PLT, so when you resolve
the symbol, please put it in the PLT" (`R_<arch>_JUMP_SLOT`).
This programs adds another relocation entry that replaces the code that loads
the address `main` (to pass it to `__libc_start_main`) with the address of a
different symbol. Due to how relocations work, it is restricted to replacing
it with another symbol that is either imported from another library, or
exported by the executable itself.
Currently, only x86_64 is supported (but it's not too hard to add support for
other instruction sets, 32-bit ELF support is a bit harder). The code looks for
a `lea rdi, [rel <pcrel32>]` instruction near `_start`, this is often the
instruction that loads `main` to pass it to `__libc_start_main` (at least on
glibc and musl it is). This then gets overwritten *at runtime by *`ld.so` due
to a relocation of type `R_X86_64_PC32` targetting the `pcrel32` operand of
that instruction --- all without touching any code of the executable.
## Usage
```sh
# build only the tool
make manip-exe
# usage: ./manip-exe <input> <output> <symbol>
# also show some example stuff
make all
```
## License
See [LICENSE](/LICENSE).

10
hello.c

@ -0,0 +1,10 @@
#include <stdio.h>
int main(int argc, char** argv[]){printf("hello, world\n");return 42;}
extern int main2(int argc, char** argv[]);
int main2(int argc, char** argv[]) {
printf("be gay do crimes\n");
return 69;
}

66
manip-exe.c

@ -0,0 +1,66 @@
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
// TODO: allow for non-native-bitness ELF parsing
#include "manip-exe.h"
int main(int argc, char* argv[]) {
if (argc < 4) {
fprintf(stderr,"usage: %s <input> <output> <symbol>\n", argv[0]);
return 0;
}
const char* symname = argv[3];
FILE* f = fopen(argv[1], "rb");
if (f == NULL) {
fprintf(stderr, "%s: can't open input file '%s'\n", argv[0], argv[1]);
return 1;
}
fseek(f, 0, SEEK_END);
long s = ftell(f);
fseek(f, 0, SEEK_SET);
fprintf(stderr, "s=%zu\n", s);
void* blob = calloc(1, (size_t)s);
if (fread(blob, 1, (size_t)s, f) != (size_t)s) {
fprintf(stderr, "%s: can't read input file '%s'\n", argv[0], argv[1]);
free(blob);
fclose(f);
return 1;
}
fclose(f);
if (manip_elf(blob, symname)) {
fprintf(stderr, "%s: ELF patching failed\n", argv[0]);
free(blob);
return 1;
}
f = fopen(argv[2], "wb");
if (f == NULL) {
fprintf(stderr, "%s: can't open output file '%s'\n", argv[0], argv[2]);
return 1;
}
if (fwrite(blob, 1, (size_t)s, f) != (size_t)s) {
fprintf(stderr, "%s: can't write output file '%s'\n", argv[0], argv[2]);
free(blob);
fclose(f);
return 1;
}
fclose(f);
free(blob);
return 0;
}

196
manip-exe.h

@ -0,0 +1,196 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <elf.h>
#include <link.h>
#if (sizeof(ElfW(Ehdr)) != sizeof(Elf64_Ehdr))
#error "Need 64-bit ELF for now, sorry"
#endif
static bool manip_elf(ElfW(Ehdr)* elf, const char* symname) {
if (elf->e_ident[0] != ELFMAG0 || elf->e_ident[1] != ELFMAG1
|| elf->e_ident[2] != ELFMAG2 || elf->e_ident[3] != ELFMAG3) {
printf("bad elf magic\n");
return true;
}
// TODO: port to other architectures
// what would be needed for a port:
// * fix code looking for 'lea rdi, [rel main]'
// * fix relocation type
// * thats it i think??? (well also fix this validation stuff)
if (elf->e_ident[EI_CLASS] != ELFCLASS64 || elf->e_ident[EI_DATA] != ELFDATA2LSB) {
printf("not a 64-bit little-endian elf\n");
return true;
}
if (elf->e_type != ET_DYN || elf->e_machine != EM_X86_64) {
printf("need dynamic x86_64 elf\n");
return true;
}
ElfW(Phdr)* phdr = (ElfW(Phdr)*)(elf->e_phoff + (size_t)elf), *ph_loadexec;
ElfW(Dyn)* dyn = NULL;
for (size_t i = 0; i < elf->e_phnum;
++i, phdr = (ElfW(Phdr)*)((size_t)phdr + elf->e_phentsize)) {
if (phdr->p_type == PT_LOAD) {
// need this at runtime for the relocs to work (yeah, sorry)
if (phdr->p_flags & PF_X) {
ph_loadexec = phdr;
phdr->p_flags |= PF_W;
}
}
if (phdr->p_type == PT_DYNAMIC) {
dyn = (ElfW(Dyn)*)((size_t)elf + phdr->p_offset);
printf("dyn=%zu\n", phdr->p_offset);
}
}
if (dyn == NULL) {
fprintf(stderr, "No DYNAMIC table! bailing out...\n");
return true;
}
ElfW(Addr)* relasz = NULL, *relaent = NULL, *jmprel = NULL;
ElfW(Rela)* relatab = NULL;
ElfW(Sym)* dynsym = NULL;
const char* dynstr = NULL;
size_t strsz = 0, syment = 0;
ElfW(Addr) relaoff = 0;
for (size_t i = 0; dyn->d_tag != DT_NULL; ++i, ++dyn) {
if (dyn->d_tag == DT_RELA) {
relatab = (ElfW(Rela)*)((size_t)elf + dyn->d_un.d_ptr);
relaoff = dyn->d_un.d_ptr;
}
if (dyn->d_tag == DT_RELASZ) relasz = (ElfW(Addr)*)(&dyn->d_un.d_ptr);
if (dyn->d_tag == DT_RELAENT) relaent = (ElfW(Addr)*)(&dyn->d_un.d_ptr);
if (dyn->d_tag == DT_JMPREL) jmprel = (ElfW(Addr)*)(&dyn->d_un.d_ptr);
if (dyn->d_tag == DT_REL) {
printf("WARN: REL table present in DYN\n");
}
if (dyn->d_tag == DT_SYMTAB) dynsym = (ElfW(Sym)*)((size_t)elf + dyn->d_un.d_ptr);
if (dyn->d_tag == DT_STRTAB) dynstr = (const char*)((size_t)elf + dyn->d_un.d_ptr);
if (dyn->d_tag == DT_SYMENT) syment = dyn->d_un.d_val;
if (dyn->d_tag == DT_STRSZ ) strsz = dyn->d_un.d_val;
}
if (!relasz || !relaent || !relatab) {
fprintf(stderr, "No RELA/RELASZ/RELAENT in DYN!\n");
return true;
}
if (!dynsym || !dynstr || !strsz || !syment) {
fprintf(stderr, "no SYMTAB/STRTAB/STRSZ/SYMENT in DYN!\n");
return true;
}
// now we have everything to start patching stuff
static const uint8_t magic_code[] = {0x48,0x8d,0x3d}; // lea rdi, [rel <pcrel32>]
// assumption: the first matching instruction loads 'main' into rdi
// both glibc and musl load main this way, they could do it in a different
// way, but apparently it's not the case
ElfW(Addr) reloctarget = 0;
ElfW(Addr) entry_vma = elf->e_entry;
ElfW(Addr) entry_lma = elf->e_entry - ph_loadexec->p_vaddr + ph_loadexec->p_offset;
printf("entry_vma=0x%zx, entry_lma=0x%zx\n", entry_vma, entry_lma);
void* entry_mem = (void*)(entry_lma + (size_t)elf);
for (size_t i = 0; entry_lma+i - ph_loadexec->p_offset < ph_loadexec->p_filesz; ++i) {
const uint8_t* tocmp = (const uint8_t*)((size_t)entry_mem + i);
for (size_t j = 0; j < sizeof(magic_code)/sizeof(*magic_code); ++j) {
if (tocmp[j] != magic_code[j]) goto next;
}
// found it!
reloctarget = entry_vma + i + sizeof(magic_code)/sizeof(*magic_code);
printf("target = 0x%zx\n", reloctarget);
break;
next:;
}
if (!reloctarget) {
printf("couldn't find relocation target...\n");
return true;
}
ElfW(Word) sym = 0;
for (size_t i = 0; (size_t)dynsym < (size_t)dynstr; ++i, dynsym = (ElfW(Sym)*)((size_t)dynsym + syment)) {
if (i && dynsym->st_name != 0 && dynsym->st_name < strsz
&& !strcmp(symname, dynstr + dynsym->st_name)) {
sym = i;
break;
}
}
if (!sym) {
printf("replacement symbol '%s' not found!\n", symname);
return true;
}
printf("symbol '%s' -> index %zu\n", symname, sym);
ElfW(Rela) extrarel;
extrarel.r_offset = reloctarget;
extrarel.r_info = ELF64_R_INFO(sym, R_X86_64_PC32); // TODO: make portable
extrarel.r_addend = -4; // TODO: make portable
printf("extrarel: *0x%016zx = 0x%016zx + %d\n", extrarel.r_offset, extrarel.r_info, extrarel.r_addend);
// resize the phdr containing the relocations
phdr = (ElfW(Phdr)*)(elf->e_phoff + (size_t)elf);
for (size_t i = 0; i < elf->e_phnum;
++i, phdr = (ElfW(Phdr)*)((size_t)phdr + elf->e_phentsize)) {
if (phdr->p_offset >= relaoff && phdr->p_offset + phdr->p_filesz < relaoff) {
printf("rela phdr index = %zu\n", i);
phdr->p_filesz += *relaent;
phdr->p_memsz += *relaent;
break;
}
}
// resize the shdr containing the relocations
// FIXME: ASSUMPTION: the relocation tables end up at the very end of this
// phdr, first .rela.dyn and then .rela.plt, and no others
// things will break BADLY if this isn't true, and it's hard to detect
// if this will happen, so watch out!
ElfW(Shdr)* shdr = (ElfW(Shdr)*)(elf->e_shoff + (size_t)elf);
for (size_t i = 0; i < elf->e_shnum;
++i, shdr = (ElfW(Shdr)*)((size_t)shdr + elf->e_shentsize)) {
if (shdr->sh_type == SHT_REL) {
printf("WARN: REL table present\n");
} else if (shdr->sh_type == SHT_RELA) {
if (shdr->sh_offset == relaoff) { // .rela.dyn
shdr->sh_size += shdr->sh_entsize; // one extra entry
} else { // .rela.plt
if (jmprel && *jmprel != shdr->sh_offset) {
printf("WARN: RELA tables look fishy\n");
}
// move one entry below to make room
memmove((void*)((size_t)elf + shdr->sh_offset + shdr->sh_entsize),
(void*)((size_t)elf + shdr->sh_offset),
shdr->sh_size);
shdr->sh_offset += shdr->sh_entsize;
if (jmprel) *jmprel += shdr->sh_entsize;
}
}
}
// FINALLY write back the actually relevant stuff
printf("relasz = %zu, index = %zu\n", *relasz, *relasz / *relaent);
relatab[*relasz / *relaent] = extrarel;
*relasz += *relaent;
printf("relasz now %zu\n", *relasz);
return false;
}
Loading…
Cancel
Save