start elf parsing
This commit is contained in:
parent
c9b4263817
commit
68aa47a94e
|
@ -8,7 +8,7 @@ version = "0.21.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
|
||||
dependencies = [
|
||||
"gimli",
|
||||
"gimli 0.28.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -94,7 +94,7 @@ dependencies = [
|
|||
"cfg-if",
|
||||
"libc",
|
||||
"miniz_oxide",
|
||||
"object",
|
||||
"object 0.32.2",
|
||||
"rustc-demangle",
|
||||
]
|
||||
|
||||
|
@ -119,6 +119,12 @@ dependencies = [
|
|||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.92"
|
||||
|
@ -168,7 +174,7 @@ dependencies = [
|
|||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.58",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -192,6 +198,15 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crc32fast"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
version = "0.1.6"
|
||||
|
@ -202,6 +217,17 @@ dependencies = [
|
|||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_more"
|
||||
version = "0.99.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.7"
|
||||
|
@ -235,6 +261,28 @@ dependencies = [
|
|||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
|
||||
|
||||
[[package]]
|
||||
name = "fallible-iterator"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"miniz_oxide",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.7"
|
||||
|
@ -251,6 +299,23 @@ version = "0.28.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd"
|
||||
dependencies = [
|
||||
"fallible-iterator",
|
||||
"indexmap",
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
|
@ -272,6 +337,16 @@ version = "2.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.11"
|
||||
|
@ -306,6 +381,15 @@ version = "2.7.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.7.2"
|
||||
|
@ -336,6 +420,17 @@ dependencies = [
|
|||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.35.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e"
|
||||
dependencies = [
|
||||
"flate2",
|
||||
"memchr",
|
||||
"ruzstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ontology"
|
||||
version = "0.1.0"
|
||||
|
@ -343,13 +438,17 @@ dependencies = [
|
|||
"anyhow",
|
||||
"clap",
|
||||
"env_logger",
|
||||
"gimli 0.29.0",
|
||||
"hex",
|
||||
"linux-personality",
|
||||
"log",
|
||||
"memmap2",
|
||||
"nix",
|
||||
"object 0.35.0",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
"typed-arena",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
|
@ -406,6 +505,17 @@ version = "0.1.23"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
|
||||
|
||||
[[package]]
|
||||
name = "ruzstd"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5174a470eeb535a721ae9fdd6e291c2411a906b96592182d05217591d5c5cf7b"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"derive_more",
|
||||
"twox-hash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.17"
|
||||
|
@ -438,7 +548,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.58",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -463,12 +573,35 @@ dependencies = [
|
|||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stable_deref_trait"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
||||
|
||||
[[package]]
|
||||
name = "static_assertions"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.58"
|
||||
|
@ -480,6 +613,22 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "twox-hash"
|
||||
version = "1.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"static_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typed-arena"
|
||||
version = "2.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a"
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.17.0"
|
||||
|
|
|
@ -17,3 +17,7 @@ clap = { version = "4.5.4", features = ["derive"] }
|
|||
sha2 = { version = "0.10" }
|
||||
walkdir = "2"
|
||||
hex = { version = "0.4.3", features = ["serde"] }
|
||||
gimli = { version = "0.29.0" }
|
||||
object = { version = "0.35" }
|
||||
memmap2 = { version = "0.9.4" }
|
||||
typed-arena = { version = "2" }
|
||||
|
|
111
src/filestore.rs
111
src/filestore.rs
|
@ -1,11 +1,13 @@
|
|||
use std::{
|
||||
collections::{BTreeMap, HashMap, HashSet},
|
||||
fs, io,
|
||||
path::PathBuf,
|
||||
path::PathBuf, borrow::Cow,
|
||||
};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::{digest::generic_array::{GenericArray, typenum::U32}, Digest, Sha256};
|
||||
use object::{Object, ReadCache, ObjectSection};
|
||||
use typed_arena::Arena;
|
||||
|
||||
#[derive(Serialize, Deserialize, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
|
||||
#[serde(transparent)]
|
||||
|
@ -25,7 +27,7 @@ impl From<GenericArray<u8, U32>> for Sha256Hash {
|
|||
#[derive(Serialize, Deserialize)]
|
||||
pub struct FileStore {
|
||||
pub files: Vec<FileStoreEntry>,
|
||||
pub input_mapping: HashMap<PathBuf, usize>,
|
||||
pub filenames: HashMap<PathBuf, usize>,
|
||||
pub hashes: BTreeMap<Sha256Hash, usize>,
|
||||
}
|
||||
|
||||
|
@ -33,15 +35,24 @@ pub struct FileStore {
|
|||
pub struct FileStoreEntry {
|
||||
pub index: usize,
|
||||
pub hash: Sha256Hash,
|
||||
pub format: FileFormat,
|
||||
pub input_names: HashSet<PathBuf>,
|
||||
pub output_names: HashSet<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum FileFormat {
|
||||
ELF {
|
||||
references: Vec<usize>,
|
||||
},
|
||||
Other,
|
||||
}
|
||||
|
||||
impl FileStore {
|
||||
pub fn new(inputs: Vec<PathBuf>) -> anyhow::Result<Self> {
|
||||
let mut result = Self {
|
||||
files: vec![],
|
||||
input_mapping: HashMap::new(),
|
||||
filenames: HashMap::new(),
|
||||
hashes: BTreeMap::new(),
|
||||
};
|
||||
|
||||
|
@ -74,7 +85,7 @@ impl FileStore {
|
|||
fn ingest_input_content(
|
||||
&mut self,
|
||||
filename: PathBuf,
|
||||
mut content: impl io::Read,
|
||||
mut content: (impl io::Read + io::Seek),
|
||||
) -> anyhow::Result<()> {
|
||||
let mut h = Sha256::new();
|
||||
io::copy(&mut content, &mut h)?;
|
||||
|
@ -83,13 +94,15 @@ impl FileStore {
|
|||
let index = match self.hashes.entry(hash) {
|
||||
std::collections::btree_map::Entry::Vacant(e) => {
|
||||
let index = self.files.len();
|
||||
e.insert(index);
|
||||
let format = self.parse_format(&mut content)?;
|
||||
self.files.push(FileStoreEntry {
|
||||
index,
|
||||
hash,
|
||||
format,
|
||||
input_names: [filename.clone()].into(),
|
||||
output_names: HashSet::new(),
|
||||
});
|
||||
e.insert(index);
|
||||
index
|
||||
}
|
||||
std::collections::btree_map::Entry::Occupied(e) => {
|
||||
|
@ -102,7 +115,10 @@ impl FileStore {
|
|||
}
|
||||
};
|
||||
|
||||
self.input_mapping.insert(filename, index);
|
||||
if index == self.files.len() {
|
||||
}
|
||||
|
||||
self.filenames.insert(filename, index);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -119,32 +135,103 @@ impl FileStore {
|
|||
pub fn ingest_output(
|
||||
&mut self,
|
||||
filename: PathBuf,
|
||||
mut content: impl io::Read,
|
||||
mut content: (impl io::Read + io::Seek),
|
||||
) -> anyhow::Result<()> {
|
||||
let mut h = Sha256::new();
|
||||
io::copy(&mut content, &mut h)?;
|
||||
let hash = h.finalize().into();
|
||||
|
||||
match self.hashes.entry(hash) {
|
||||
let index = match self.hashes.entry(hash) {
|
||||
std::collections::btree_map::Entry::Vacant(e) => {
|
||||
let index = self.files.len();
|
||||
e.insert(index);
|
||||
let format = self.parse_format(&mut content)?;
|
||||
self.files.push(FileStoreEntry {
|
||||
index,
|
||||
format,
|
||||
hash,
|
||||
input_names: HashSet::new(),
|
||||
output_names: [filename].into(),
|
||||
output_names: [filename.clone()].into(),
|
||||
});
|
||||
e.insert(index);
|
||||
index
|
||||
}
|
||||
std::collections::btree_map::Entry::Occupied(e) => {
|
||||
self.files
|
||||
.get_mut(*e.get())
|
||||
.unwrap()
|
||||
.output_names
|
||||
.insert(filename);
|
||||
.insert(filename.clone());
|
||||
*e.get()
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
self.filenames.insert(filename, index);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_format(&mut self, fp: &mut (impl io::Read + io::Seek)) -> anyhow::Result<FileFormat> {
|
||||
fp.seek(io::SeekFrom::Start(0))?;
|
||||
let mut buf = [0; 4];
|
||||
let count = read_exact_or_end(fp, &mut buf)?;
|
||||
let buf = &buf[..count];
|
||||
|
||||
Ok(match buf {
|
||||
[0x7f, b'E', b'L', b'F', ..] => {
|
||||
let read_cache = ReadCache::new(fp);
|
||||
let elf = object::File::parse(&read_cache)?;
|
||||
let endian = if elf.is_little_endian() {
|
||||
gimli::RunTimeEndian::Little
|
||||
} else {
|
||||
gimli::RunTimeEndian::Big
|
||||
};
|
||||
let arena_data = Arena::new();
|
||||
let mut load_section = |id: gimli::SectionId| -> Result<_, _> {
|
||||
load_file_section(id, &elf, endian, &arena_data)
|
||||
};
|
||||
let mut dwarf = gimli::Dwarf::load(&mut load_section).unwrap();
|
||||
let mut units = dwarf.units();
|
||||
while let Ok(Some(unit)) = units.next() {
|
||||
let abbrev = dwarf.abbreviations(&unit)?;
|
||||
let mut entries = unit.entries(&abbrev);
|
||||
while let Ok(Some(entry)) = entries.next_sibling() {
|
||||
}
|
||||
}
|
||||
assert!(elf.is_little_endian());
|
||||
FileFormat::ELF {
|
||||
references: vec![],
|
||||
}
|
||||
},
|
||||
_ => FileFormat::Other,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn load_file_section<'input, 'arena, Endian: gimli::Endianity, R: object::ReadRef<'input>>(
|
||||
id: gimli::SectionId,
|
||||
file: &object::File<'input, R>,
|
||||
endian: Endian,
|
||||
arena_data: &'arena Arena<Cow<'input, [u8]>>,
|
||||
) -> Result<gimli::EndianSlice<'arena, Endian>, ()> {
|
||||
// TODO: Unify with dwarfdump.rs in gimli.
|
||||
let name = id.name();
|
||||
match file.section_by_name(name) {
|
||||
Some(section) => match section.uncompressed_data().unwrap() {
|
||||
Cow::Borrowed(b) => Ok(gimli::EndianSlice::new(b, endian)),
|
||||
Cow::Owned(b) => Ok(gimli::EndianSlice::new(arena_data.alloc(b.into()), endian)),
|
||||
},
|
||||
None => Ok(gimli::EndianSlice::new(&[][..], endian)),
|
||||
}
|
||||
}
|
||||
|
||||
fn read_exact_or_end(fp: &mut impl io::Read, buf: &mut [u8]) -> anyhow::Result<usize> {
|
||||
let mut read_so_far = 0;
|
||||
while read_so_far < buf.len() {
|
||||
let n = fp.read(&mut buf[read_so_far..])?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
read_so_far += n;
|
||||
}
|
||||
Ok(read_so_far)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue