From 68aa47a94ec6b3faa73fad478c6cb6a8306d9f57 Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Tue, 16 Apr 2024 13:44:44 -0700 Subject: [PATCH] start elf parsing --- Cargo.lock | 157 +++++++++++++++++++++++++++++++++++++++++++++-- Cargo.toml | 4 ++ src/filestore.rs | 111 +++++++++++++++++++++++++++++---- 3 files changed, 256 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ab41b74..7f33e25 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,7 +8,7 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" dependencies = [ - "gimli", + "gimli 0.28.1", ] [[package]] @@ -94,7 +94,7 @@ dependencies = [ "cfg-if", "libc", "miniz_oxide", - "object", + "object 0.32.2", "rustc-demangle", ] @@ -119,6 +119,12 @@ dependencies = [ "generic-array", ] +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "cc" version = "1.0.92" @@ -168,7 +174,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.58", ] [[package]] @@ -192,6 +198,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +dependencies = [ + "cfg-if", +] + [[package]] name = "crypto-common" version = "0.1.6" @@ -202,6 +217,17 @@ dependencies = [ "typenum", ] +[[package]] +name = "derive_more" +version = "0.99.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "digest" version = "0.10.7" @@ -235,6 +261,28 @@ dependencies = [ "log", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -251,6 +299,23 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +[[package]] +name = "gimli" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" +dependencies = [ + "fallible-iterator", + "indexmap", + "stable_deref_trait", +] + +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" + [[package]] name = "heck" version = "0.5.0" @@ -272,6 +337,16 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "itoa" version = "1.0.11" @@ -306,6 +381,15 @@ version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +[[package]] +name = "memmap2" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" +dependencies = [ + "libc", +] + [[package]] name = "miniz_oxide" version = "0.7.2" @@ -336,6 +420,17 @@ dependencies = [ "memchr", ] +[[package]] +name = "object" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" +dependencies = [ + "flate2", + "memchr", + "ruzstd", +] + [[package]] name = "ontology" version = "0.1.0" @@ -343,13 +438,17 @@ dependencies = [ "anyhow", "clap", "env_logger", + "gimli 0.29.0", "hex", "linux-personality", "log", + "memmap2", "nix", + "object 0.35.0", "serde", "serde_json", "sha2", + "typed-arena", "walkdir", ] @@ -406,6 +505,17 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +[[package]] +name = "ruzstd" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5174a470eeb535a721ae9fdd6e291c2411a906b96592182d05217591d5c5cf7b" +dependencies = [ + "byteorder", + "derive_more", + "twox-hash", +] + [[package]] name = "ryu" version = "1.0.17" @@ -438,7 +548,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.58", ] [[package]] @@ -463,12 +573,35 @@ dependencies = [ "digest", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.58" @@ -480,6 +613,22 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + [[package]] name = "typenum" version = "1.17.0" diff --git a/Cargo.toml b/Cargo.toml index 3152a3f..1d48293 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,3 +17,7 @@ clap = { version = "4.5.4", features = ["derive"] } sha2 = { version = "0.10" } walkdir = "2" hex = { version = "0.4.3", features = ["serde"] } +gimli = { version = "0.29.0" } +object = { version = "0.35" } +memmap2 = { version = "0.9.4" } +typed-arena = { version = "2" } diff --git a/src/filestore.rs b/src/filestore.rs index 71b2bd2..e0c8f93 100644 --- a/src/filestore.rs +++ b/src/filestore.rs @@ -1,11 +1,13 @@ use std::{ collections::{BTreeMap, HashMap, HashSet}, fs, io, - path::PathBuf, + path::PathBuf, borrow::Cow, }; use serde::{Deserialize, Serialize}; use sha2::{digest::generic_array::{GenericArray, typenum::U32}, Digest, Sha256}; +use object::{Object, ReadCache, ObjectSection}; +use typed_arena::Arena; #[derive(Serialize, Deserialize, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] #[serde(transparent)] @@ -25,7 +27,7 @@ impl From> for Sha256Hash { #[derive(Serialize, Deserialize)] pub struct FileStore { pub files: Vec, - pub input_mapping: HashMap, + pub filenames: HashMap, pub hashes: BTreeMap, } @@ -33,15 +35,24 @@ pub struct FileStore { pub struct FileStoreEntry { pub index: usize, pub hash: Sha256Hash, + pub format: FileFormat, pub input_names: HashSet, pub output_names: HashSet, } +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum FileFormat { + ELF { + references: Vec, + }, + Other, +} + impl FileStore { pub fn new(inputs: Vec) -> anyhow::Result { let mut result = Self { files: vec![], - input_mapping: HashMap::new(), + filenames: HashMap::new(), hashes: BTreeMap::new(), }; @@ -74,7 +85,7 @@ impl FileStore { fn ingest_input_content( &mut self, filename: PathBuf, - mut content: impl io::Read, + mut content: (impl io::Read + io::Seek), ) -> anyhow::Result<()> { let mut h = Sha256::new(); io::copy(&mut content, &mut h)?; @@ -83,13 +94,15 @@ impl FileStore { let index = match self.hashes.entry(hash) { std::collections::btree_map::Entry::Vacant(e) => { let index = self.files.len(); + e.insert(index); + let format = self.parse_format(&mut content)?; self.files.push(FileStoreEntry { index, hash, + format, input_names: [filename.clone()].into(), output_names: HashSet::new(), }); - e.insert(index); index } std::collections::btree_map::Entry::Occupied(e) => { @@ -102,7 +115,10 @@ impl FileStore { } }; - self.input_mapping.insert(filename, index); + if index == self.files.len() { + } + + self.filenames.insert(filename, index); Ok(()) } @@ -119,32 +135,103 @@ impl FileStore { pub fn ingest_output( &mut self, filename: PathBuf, - mut content: impl io::Read, + mut content: (impl io::Read + io::Seek), ) -> anyhow::Result<()> { let mut h = Sha256::new(); io::copy(&mut content, &mut h)?; let hash = h.finalize().into(); - match self.hashes.entry(hash) { + let index = match self.hashes.entry(hash) { std::collections::btree_map::Entry::Vacant(e) => { let index = self.files.len(); + e.insert(index); + let format = self.parse_format(&mut content)?; self.files.push(FileStoreEntry { index, + format, hash, input_names: HashSet::new(), - output_names: [filename].into(), + output_names: [filename.clone()].into(), }); - e.insert(index); + index } std::collections::btree_map::Entry::Occupied(e) => { self.files .get_mut(*e.get()) .unwrap() .output_names - .insert(filename); + .insert(filename.clone()); + *e.get() } - } + }; + + self.filenames.insert(filename, index); Ok(()) } + + fn parse_format(&mut self, fp: &mut (impl io::Read + io::Seek)) -> anyhow::Result { + fp.seek(io::SeekFrom::Start(0))?; + let mut buf = [0; 4]; + let count = read_exact_or_end(fp, &mut buf)?; + let buf = &buf[..count]; + + Ok(match buf { + [0x7f, b'E', b'L', b'F', ..] => { + let read_cache = ReadCache::new(fp); + let elf = object::File::parse(&read_cache)?; + let endian = if elf.is_little_endian() { + gimli::RunTimeEndian::Little + } else { + gimli::RunTimeEndian::Big + }; + let arena_data = Arena::new(); + let mut load_section = |id: gimli::SectionId| -> Result<_, _> { + load_file_section(id, &elf, endian, &arena_data) + }; + let mut dwarf = gimli::Dwarf::load(&mut load_section).unwrap(); + let mut units = dwarf.units(); + while let Ok(Some(unit)) = units.next() { + let abbrev = dwarf.abbreviations(&unit)?; + let mut entries = unit.entries(&abbrev); + while let Ok(Some(entry)) = entries.next_sibling() { + } + } + assert!(elf.is_little_endian()); + FileFormat::ELF { + references: vec![], + } + }, + _ => FileFormat::Other, + }) + } +} + +fn load_file_section<'input, 'arena, Endian: gimli::Endianity, R: object::ReadRef<'input>>( + id: gimli::SectionId, + file: &object::File<'input, R>, + endian: Endian, + arena_data: &'arena Arena>, +) -> Result, ()> { + // TODO: Unify with dwarfdump.rs in gimli. + let name = id.name(); + match file.section_by_name(name) { + Some(section) => match section.uncompressed_data().unwrap() { + Cow::Borrowed(b) => Ok(gimli::EndianSlice::new(b, endian)), + Cow::Owned(b) => Ok(gimli::EndianSlice::new(arena_data.alloc(b.into()), endian)), + }, + None => Ok(gimli::EndianSlice::new(&[][..], endian)), + } +} + +fn read_exact_or_end(fp: &mut impl io::Read, buf: &mut [u8]) -> anyhow::Result { + let mut read_so_far = 0; + while read_so_far < buf.len() { + let n = fp.read(&mut buf[read_so_far..])?; + if n == 0 { + break; + } + read_so_far += n; + } + Ok(read_so_far) }