start elf parsing

This commit is contained in:
Audrey 2024-04-16 13:44:44 -07:00
parent c9b4263817
commit 68aa47a94e
3 changed files with 256 additions and 16 deletions

157
Cargo.lock generated
View File

@ -8,7 +8,7 @@ version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
dependencies = [ dependencies = [
"gimli", "gimli 0.28.1",
] ]
[[package]] [[package]]
@ -94,7 +94,7 @@ dependencies = [
"cfg-if", "cfg-if",
"libc", "libc",
"miniz_oxide", "miniz_oxide",
"object", "object 0.32.2",
"rustc-demangle", "rustc-demangle",
] ]
@ -119,6 +119,12 @@ dependencies = [
"generic-array", "generic-array",
] ]
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.0.92" version = "1.0.92"
@ -168,7 +174,7 @@ dependencies = [
"heck", "heck",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn", "syn 2.0.58",
] ]
[[package]] [[package]]
@ -192,6 +198,15 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "crc32fast"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "crypto-common" name = "crypto-common"
version = "0.1.6" version = "0.1.6"
@ -202,6 +217,17 @@ dependencies = [
"typenum", "typenum",
] ]
[[package]]
name = "derive_more"
version = "0.99.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]] [[package]]
name = "digest" name = "digest"
version = "0.10.7" version = "0.10.7"
@ -235,6 +261,28 @@ dependencies = [
"log", "log",
] ]
[[package]]
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "fallible-iterator"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
[[package]]
name = "flate2"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]] [[package]]
name = "generic-array" name = "generic-array"
version = "0.14.7" version = "0.14.7"
@ -251,6 +299,23 @@ version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
[[package]]
name = "gimli"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd"
dependencies = [
"fallible-iterator",
"indexmap",
"stable_deref_trait",
]
[[package]]
name = "hashbrown"
version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
[[package]] [[package]]
name = "heck" name = "heck"
version = "0.5.0" version = "0.5.0"
@ -272,6 +337,16 @@ version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "indexmap"
version = "2.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]] [[package]]
name = "itoa" name = "itoa"
version = "1.0.11" version = "1.0.11"
@ -306,6 +381,15 @@ version = "2.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
[[package]]
name = "memmap2"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "miniz_oxide" name = "miniz_oxide"
version = "0.7.2" version = "0.7.2"
@ -336,6 +420,17 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "object"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e"
dependencies = [
"flate2",
"memchr",
"ruzstd",
]
[[package]] [[package]]
name = "ontology" name = "ontology"
version = "0.1.0" version = "0.1.0"
@ -343,13 +438,17 @@ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
"env_logger", "env_logger",
"gimli 0.29.0",
"hex", "hex",
"linux-personality", "linux-personality",
"log", "log",
"memmap2",
"nix", "nix",
"object 0.35.0",
"serde", "serde",
"serde_json", "serde_json",
"sha2", "sha2",
"typed-arena",
"walkdir", "walkdir",
] ]
@ -406,6 +505,17 @@ version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "ruzstd"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5174a470eeb535a721ae9fdd6e291c2411a906b96592182d05217591d5c5cf7b"
dependencies = [
"byteorder",
"derive_more",
"twox-hash",
]
[[package]] [[package]]
name = "ryu" name = "ryu"
version = "1.0.17" version = "1.0.17"
@ -438,7 +548,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn", "syn 2.0.58",
] ]
[[package]] [[package]]
@ -463,12 +573,35 @@ dependencies = [
"digest", "digest",
] ]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.11.1" version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.58" version = "2.0.58"
@ -480,6 +613,22 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "twox-hash"
version = "1.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
dependencies = [
"cfg-if",
"static_assertions",
]
[[package]]
name = "typed-arena"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a"
[[package]] [[package]]
name = "typenum" name = "typenum"
version = "1.17.0" version = "1.17.0"

View File

@ -17,3 +17,7 @@ clap = { version = "4.5.4", features = ["derive"] }
sha2 = { version = "0.10" } sha2 = { version = "0.10" }
walkdir = "2" walkdir = "2"
hex = { version = "0.4.3", features = ["serde"] } hex = { version = "0.4.3", features = ["serde"] }
gimli = { version = "0.29.0" }
object = { version = "0.35" }
memmap2 = { version = "0.9.4" }
typed-arena = { version = "2" }

View File

@ -1,11 +1,13 @@
use std::{ use std::{
collections::{BTreeMap, HashMap, HashSet}, collections::{BTreeMap, HashMap, HashSet},
fs, io, fs, io,
path::PathBuf, path::PathBuf, borrow::Cow,
}; };
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use sha2::{digest::generic_array::{GenericArray, typenum::U32}, Digest, Sha256}; use sha2::{digest::generic_array::{GenericArray, typenum::U32}, Digest, Sha256};
use object::{Object, ReadCache, ObjectSection};
use typed_arena::Arena;
#[derive(Serialize, Deserialize, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] #[derive(Serialize, Deserialize, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
#[serde(transparent)] #[serde(transparent)]
@ -25,7 +27,7 @@ impl From<GenericArray<u8, U32>> for Sha256Hash {
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub struct FileStore { pub struct FileStore {
pub files: Vec<FileStoreEntry>, pub files: Vec<FileStoreEntry>,
pub input_mapping: HashMap<PathBuf, usize>, pub filenames: HashMap<PathBuf, usize>,
pub hashes: BTreeMap<Sha256Hash, usize>, pub hashes: BTreeMap<Sha256Hash, usize>,
} }
@ -33,15 +35,24 @@ pub struct FileStore {
pub struct FileStoreEntry { pub struct FileStoreEntry {
pub index: usize, pub index: usize,
pub hash: Sha256Hash, pub hash: Sha256Hash,
pub format: FileFormat,
pub input_names: HashSet<PathBuf>, pub input_names: HashSet<PathBuf>,
pub output_names: HashSet<PathBuf>, pub output_names: HashSet<PathBuf>,
} }
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum FileFormat {
ELF {
references: Vec<usize>,
},
Other,
}
impl FileStore { impl FileStore {
pub fn new(inputs: Vec<PathBuf>) -> anyhow::Result<Self> { pub fn new(inputs: Vec<PathBuf>) -> anyhow::Result<Self> {
let mut result = Self { let mut result = Self {
files: vec![], files: vec![],
input_mapping: HashMap::new(), filenames: HashMap::new(),
hashes: BTreeMap::new(), hashes: BTreeMap::new(),
}; };
@ -74,7 +85,7 @@ impl FileStore {
fn ingest_input_content( fn ingest_input_content(
&mut self, &mut self,
filename: PathBuf, filename: PathBuf,
mut content: impl io::Read, mut content: (impl io::Read + io::Seek),
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let mut h = Sha256::new(); let mut h = Sha256::new();
io::copy(&mut content, &mut h)?; io::copy(&mut content, &mut h)?;
@ -83,13 +94,15 @@ impl FileStore {
let index = match self.hashes.entry(hash) { let index = match self.hashes.entry(hash) {
std::collections::btree_map::Entry::Vacant(e) => { std::collections::btree_map::Entry::Vacant(e) => {
let index = self.files.len(); let index = self.files.len();
e.insert(index);
let format = self.parse_format(&mut content)?;
self.files.push(FileStoreEntry { self.files.push(FileStoreEntry {
index, index,
hash, hash,
format,
input_names: [filename.clone()].into(), input_names: [filename.clone()].into(),
output_names: HashSet::new(), output_names: HashSet::new(),
}); });
e.insert(index);
index index
} }
std::collections::btree_map::Entry::Occupied(e) => { std::collections::btree_map::Entry::Occupied(e) => {
@ -102,7 +115,10 @@ impl FileStore {
} }
}; };
self.input_mapping.insert(filename, index); if index == self.files.len() {
}
self.filenames.insert(filename, index);
Ok(()) Ok(())
} }
@ -119,32 +135,103 @@ impl FileStore {
pub fn ingest_output( pub fn ingest_output(
&mut self, &mut self,
filename: PathBuf, filename: PathBuf,
mut content: impl io::Read, mut content: (impl io::Read + io::Seek),
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let mut h = Sha256::new(); let mut h = Sha256::new();
io::copy(&mut content, &mut h)?; io::copy(&mut content, &mut h)?;
let hash = h.finalize().into(); let hash = h.finalize().into();
match self.hashes.entry(hash) { let index = match self.hashes.entry(hash) {
std::collections::btree_map::Entry::Vacant(e) => { std::collections::btree_map::Entry::Vacant(e) => {
let index = self.files.len(); let index = self.files.len();
e.insert(index);
let format = self.parse_format(&mut content)?;
self.files.push(FileStoreEntry { self.files.push(FileStoreEntry {
index, index,
format,
hash, hash,
input_names: HashSet::new(), input_names: HashSet::new(),
output_names: [filename].into(), output_names: [filename.clone()].into(),
}); });
e.insert(index); index
} }
std::collections::btree_map::Entry::Occupied(e) => { std::collections::btree_map::Entry::Occupied(e) => {
self.files self.files
.get_mut(*e.get()) .get_mut(*e.get())
.unwrap() .unwrap()
.output_names .output_names
.insert(filename); .insert(filename.clone());
} *e.get()
} }
};
self.filenames.insert(filename, index);
Ok(()) Ok(())
} }
fn parse_format(&mut self, fp: &mut (impl io::Read + io::Seek)) -> anyhow::Result<FileFormat> {
fp.seek(io::SeekFrom::Start(0))?;
let mut buf = [0; 4];
let count = read_exact_or_end(fp, &mut buf)?;
let buf = &buf[..count];
Ok(match buf {
[0x7f, b'E', b'L', b'F', ..] => {
let read_cache = ReadCache::new(fp);
let elf = object::File::parse(&read_cache)?;
let endian = if elf.is_little_endian() {
gimli::RunTimeEndian::Little
} else {
gimli::RunTimeEndian::Big
};
let arena_data = Arena::new();
let mut load_section = |id: gimli::SectionId| -> Result<_, _> {
load_file_section(id, &elf, endian, &arena_data)
};
let mut dwarf = gimli::Dwarf::load(&mut load_section).unwrap();
let mut units = dwarf.units();
while let Ok(Some(unit)) = units.next() {
let abbrev = dwarf.abbreviations(&unit)?;
let mut entries = unit.entries(&abbrev);
while let Ok(Some(entry)) = entries.next_sibling() {
}
}
assert!(elf.is_little_endian());
FileFormat::ELF {
references: vec![],
}
},
_ => FileFormat::Other,
})
}
}
fn load_file_section<'input, 'arena, Endian: gimli::Endianity, R: object::ReadRef<'input>>(
id: gimli::SectionId,
file: &object::File<'input, R>,
endian: Endian,
arena_data: &'arena Arena<Cow<'input, [u8]>>,
) -> Result<gimli::EndianSlice<'arena, Endian>, ()> {
// TODO: Unify with dwarfdump.rs in gimli.
let name = id.name();
match file.section_by_name(name) {
Some(section) => match section.uncompressed_data().unwrap() {
Cow::Borrowed(b) => Ok(gimli::EndianSlice::new(b, endian)),
Cow::Owned(b) => Ok(gimli::EndianSlice::new(arena_data.alloc(b.into()), endian)),
},
None => Ok(gimli::EndianSlice::new(&[][..], endian)),
}
}
fn read_exact_or_end(fp: &mut impl io::Read, buf: &mut [u8]) -> anyhow::Result<usize> {
let mut read_so_far = 0;
while read_so_far < buf.len() {
let n = fp.read(&mut buf[read_so_far..])?;
if n == 0 {
break;
}
read_so_far += n;
}
Ok(read_so_far)
} }