start elf parsing

This commit is contained in:
Audrey 2024-04-16 13:44:44 -07:00
parent c9b4263817
commit 68aa47a94e
3 changed files with 256 additions and 16 deletions

157
Cargo.lock generated
View File

@ -8,7 +8,7 @@ version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
dependencies = [
"gimli",
"gimli 0.28.1",
]
[[package]]
@ -94,7 +94,7 @@ dependencies = [
"cfg-if",
"libc",
"miniz_oxide",
"object",
"object 0.32.2",
"rustc-demangle",
]
@ -119,6 +119,12 @@ dependencies = [
"generic-array",
]
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "cc"
version = "1.0.92"
@ -168,7 +174,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
"syn 2.0.58",
]
[[package]]
@ -192,6 +198,15 @@ dependencies = [
"libc",
]
[[package]]
name = "crc32fast"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa"
dependencies = [
"cfg-if",
]
[[package]]
name = "crypto-common"
version = "0.1.6"
@ -202,6 +217,17 @@ dependencies = [
"typenum",
]
[[package]]
name = "derive_more"
version = "0.99.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "digest"
version = "0.10.7"
@ -235,6 +261,28 @@ dependencies = [
"log",
]
[[package]]
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "fallible-iterator"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
[[package]]
name = "flate2"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "generic-array"
version = "0.14.7"
@ -251,6 +299,23 @@ version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
[[package]]
name = "gimli"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd"
dependencies = [
"fallible-iterator",
"indexmap",
"stable_deref_trait",
]
[[package]]
name = "hashbrown"
version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
[[package]]
name = "heck"
version = "0.5.0"
@ -272,6 +337,16 @@ version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "indexmap"
version = "2.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]]
name = "itoa"
version = "1.0.11"
@ -306,6 +381,15 @@ version = "2.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
[[package]]
name = "memmap2"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322"
dependencies = [
"libc",
]
[[package]]
name = "miniz_oxide"
version = "0.7.2"
@ -336,6 +420,17 @@ dependencies = [
"memchr",
]
[[package]]
name = "object"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e"
dependencies = [
"flate2",
"memchr",
"ruzstd",
]
[[package]]
name = "ontology"
version = "0.1.0"
@ -343,13 +438,17 @@ dependencies = [
"anyhow",
"clap",
"env_logger",
"gimli 0.29.0",
"hex",
"linux-personality",
"log",
"memmap2",
"nix",
"object 0.35.0",
"serde",
"serde_json",
"sha2",
"typed-arena",
"walkdir",
]
@ -406,6 +505,17 @@ version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "ruzstd"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5174a470eeb535a721ae9fdd6e291c2411a906b96592182d05217591d5c5cf7b"
dependencies = [
"byteorder",
"derive_more",
"twox-hash",
]
[[package]]
name = "ryu"
version = "1.0.17"
@ -438,7 +548,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.58",
]
[[package]]
@ -463,12 +573,35 @@ dependencies = [
"digest",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.58"
@ -480,6 +613,22 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "twox-hash"
version = "1.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
dependencies = [
"cfg-if",
"static_assertions",
]
[[package]]
name = "typed-arena"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a"
[[package]]
name = "typenum"
version = "1.17.0"

View File

@ -17,3 +17,7 @@ clap = { version = "4.5.4", features = ["derive"] }
sha2 = { version = "0.10" }
walkdir = "2"
hex = { version = "0.4.3", features = ["serde"] }
gimli = { version = "0.29.0" }
object = { version = "0.35" }
memmap2 = { version = "0.9.4" }
typed-arena = { version = "2" }

View File

@ -1,11 +1,13 @@
use std::{
collections::{BTreeMap, HashMap, HashSet},
fs, io,
path::PathBuf,
path::PathBuf, borrow::Cow,
};
use serde::{Deserialize, Serialize};
use sha2::{digest::generic_array::{GenericArray, typenum::U32}, Digest, Sha256};
use object::{Object, ReadCache, ObjectSection};
use typed_arena::Arena;
#[derive(Serialize, Deserialize, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
#[serde(transparent)]
@ -25,7 +27,7 @@ impl From<GenericArray<u8, U32>> for Sha256Hash {
#[derive(Serialize, Deserialize)]
pub struct FileStore {
pub files: Vec<FileStoreEntry>,
pub input_mapping: HashMap<PathBuf, usize>,
pub filenames: HashMap<PathBuf, usize>,
pub hashes: BTreeMap<Sha256Hash, usize>,
}
@ -33,15 +35,24 @@ pub struct FileStore {
pub struct FileStoreEntry {
pub index: usize,
pub hash: Sha256Hash,
pub format: FileFormat,
pub input_names: HashSet<PathBuf>,
pub output_names: HashSet<PathBuf>,
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum FileFormat {
ELF {
references: Vec<usize>,
},
Other,
}
impl FileStore {
pub fn new(inputs: Vec<PathBuf>) -> anyhow::Result<Self> {
let mut result = Self {
files: vec![],
input_mapping: HashMap::new(),
filenames: HashMap::new(),
hashes: BTreeMap::new(),
};
@ -74,7 +85,7 @@ impl FileStore {
fn ingest_input_content(
&mut self,
filename: PathBuf,
mut content: impl io::Read,
mut content: (impl io::Read + io::Seek),
) -> anyhow::Result<()> {
let mut h = Sha256::new();
io::copy(&mut content, &mut h)?;
@ -83,13 +94,15 @@ impl FileStore {
let index = match self.hashes.entry(hash) {
std::collections::btree_map::Entry::Vacant(e) => {
let index = self.files.len();
e.insert(index);
let format = self.parse_format(&mut content)?;
self.files.push(FileStoreEntry {
index,
hash,
format,
input_names: [filename.clone()].into(),
output_names: HashSet::new(),
});
e.insert(index);
index
}
std::collections::btree_map::Entry::Occupied(e) => {
@ -102,7 +115,10 @@ impl FileStore {
}
};
self.input_mapping.insert(filename, index);
if index == self.files.len() {
}
self.filenames.insert(filename, index);
Ok(())
}
@ -119,32 +135,103 @@ impl FileStore {
pub fn ingest_output(
&mut self,
filename: PathBuf,
mut content: impl io::Read,
mut content: (impl io::Read + io::Seek),
) -> anyhow::Result<()> {
let mut h = Sha256::new();
io::copy(&mut content, &mut h)?;
let hash = h.finalize().into();
match self.hashes.entry(hash) {
let index = match self.hashes.entry(hash) {
std::collections::btree_map::Entry::Vacant(e) => {
let index = self.files.len();
e.insert(index);
let format = self.parse_format(&mut content)?;
self.files.push(FileStoreEntry {
index,
format,
hash,
input_names: HashSet::new(),
output_names: [filename].into(),
output_names: [filename.clone()].into(),
});
e.insert(index);
index
}
std::collections::btree_map::Entry::Occupied(e) => {
self.files
.get_mut(*e.get())
.unwrap()
.output_names
.insert(filename);
.insert(filename.clone());
*e.get()
}
}
};
self.filenames.insert(filename, index);
Ok(())
}
fn parse_format(&mut self, fp: &mut (impl io::Read + io::Seek)) -> anyhow::Result<FileFormat> {
fp.seek(io::SeekFrom::Start(0))?;
let mut buf = [0; 4];
let count = read_exact_or_end(fp, &mut buf)?;
let buf = &buf[..count];
Ok(match buf {
[0x7f, b'E', b'L', b'F', ..] => {
let read_cache = ReadCache::new(fp);
let elf = object::File::parse(&read_cache)?;
let endian = if elf.is_little_endian() {
gimli::RunTimeEndian::Little
} else {
gimli::RunTimeEndian::Big
};
let arena_data = Arena::new();
let mut load_section = |id: gimli::SectionId| -> Result<_, _> {
load_file_section(id, &elf, endian, &arena_data)
};
let mut dwarf = gimli::Dwarf::load(&mut load_section).unwrap();
let mut units = dwarf.units();
while let Ok(Some(unit)) = units.next() {
let abbrev = dwarf.abbreviations(&unit)?;
let mut entries = unit.entries(&abbrev);
while let Ok(Some(entry)) = entries.next_sibling() {
}
}
assert!(elf.is_little_endian());
FileFormat::ELF {
references: vec![],
}
},
_ => FileFormat::Other,
})
}
}
fn load_file_section<'input, 'arena, Endian: gimli::Endianity, R: object::ReadRef<'input>>(
id: gimli::SectionId,
file: &object::File<'input, R>,
endian: Endian,
arena_data: &'arena Arena<Cow<'input, [u8]>>,
) -> Result<gimli::EndianSlice<'arena, Endian>, ()> {
// TODO: Unify with dwarfdump.rs in gimli.
let name = id.name();
match file.section_by_name(name) {
Some(section) => match section.uncompressed_data().unwrap() {
Cow::Borrowed(b) => Ok(gimli::EndianSlice::new(b, endian)),
Cow::Owned(b) => Ok(gimli::EndianSlice::new(arena_data.alloc(b.into()), endian)),
},
None => Ok(gimli::EndianSlice::new(&[][..], endian)),
}
}
fn read_exact_or_end(fp: &mut impl io::Read, buf: &mut [u8]) -> anyhow::Result<usize> {
let mut read_so_far = 0;
while read_so_far < buf.len() {
let n = fp.read(&mut buf[read_so_far..])?;
if n == 0 {
break;
}
read_so_far += n;
}
Ok(read_so_far)
}