From c9b42638174fda15482b26a1afd624675f4ed61e Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Tue, 16 Apr 2024 11:24:20 -0700 Subject: [PATCH] Serialize the whole report --- Cargo.lock | 130 +++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 4 +- src/filestore.rs | 127 +++++++++++++++++++++++++++++++++------------ src/main.rs | 4 +- src/tracer.rs | 31 ++++++----- 5 files changed, 249 insertions(+), 47 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d57aa70..ab41b74 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,21 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + [[package]] name = "aho-corasick" version = "1.1.3" @@ -64,6 +79,24 @@ name = "anyhow" version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" +dependencies = [ + "backtrace", +] + +[[package]] +name = "backtrace" +version = "0.3.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] [[package]] name = "bitflags" @@ -86,6 +119,12 @@ dependencies = [ "generic-array", ] +[[package]] +name = "cc" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2678b2e3449475e95b0aa6f9b506a28e61b3dc8996592b983695e8ebb58a8b41" + [[package]] name = "cfg-if" version = "1.0.0" @@ -206,12 +245,27 @@ dependencies = [ "version_check", ] +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +dependencies = [ + "serde", +] + [[package]] name = "humantime" version = "2.1.0" @@ -252,6 +306,15 @@ version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +[[package]] +name = "miniz_oxide" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +dependencies = [ + "adler", +] + [[package]] name = "nix" version = "0.28.0" @@ -264,6 +327,15 @@ dependencies = [ "libc", ] +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + [[package]] name = "ontology" version = "0.1.0" @@ -271,12 +343,14 @@ dependencies = [ "anyhow", "clap", "env_logger", + "hex", "linux-personality", "log", "nix", "serde", "serde_json", "sha2", + "walkdir", ] [[package]] @@ -326,12 +400,27 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + [[package]] name = "ryu" version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "serde" version = "1.0.197" @@ -415,6 +504,47 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-sys" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index f8e4092..3152a3f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,10 +8,12 @@ edition = "2021" [dependencies] nix = { version = "0.28.0", features = ["ptrace", "process"] } linux-personality = "1.0.0" -anyhow = "1" +anyhow = { version = "1", features = ["backtrace"] } log = "0.4" env_logger = "0.11" serde = { version = "1", features = ["derive"] } serde_json = "1.0" clap = { version = "4.5.4", features = ["derive"] } sha2 = { version = "0.10" } +walkdir = "2" +hex = { version = "0.4.3", features = ["serde"] } diff --git a/src/filestore.rs b/src/filestore.rs index efd0e1a..71b2bd2 100644 --- a/src/filestore.rs +++ b/src/filestore.rs @@ -1,15 +1,35 @@ -use std::{path::PathBuf, collections::{HashMap, BTreeMap, HashSet}, fs, io}; +use std::{ + collections::{BTreeMap, HashMap, HashSet}, + fs, io, + path::PathBuf, +}; -use sha2::{Sha256, Digest, digest::{generic_array::GenericArray, typenum::U32}}; +use serde::{Deserialize, Serialize}; +use sha2::{digest::generic_array::{GenericArray, typenum::U32}, Digest, Sha256}; -type Sha256Hash = GenericArray; +#[derive(Serialize, Deserialize, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] +#[serde(transparent)] +pub struct Sha256Hash { + #[serde(with = "hex")] + inner: [u8; 32], +} +impl From> for Sha256Hash { + fn from(value: GenericArray) -> Self { + Self { + inner: value.into() + } + } +} + +#[derive(Serialize, Deserialize)] pub struct FileStore { pub files: Vec, pub input_mapping: HashMap, pub hashes: BTreeMap, } +#[derive(Serialize, Deserialize)] pub struct FileStoreEntry { pub index: usize, pub hash: Sha256Hash, @@ -26,46 +46,85 @@ impl FileStore { }; for input in inputs { - let mut fp = fs::File::open(&input)?; - - let mut h = Sha256::new(); - io::copy(&mut fp, &mut h)?; - let hash = h.finalize(); - - let index = match result.hashes.entry(hash) { - std::collections::btree_map::Entry::Vacant(e) => { - let index = result.files.len(); - result.files.push(FileStoreEntry { - index, - hash, - input_names: HashSet::new(), - output_names: [input.clone()].into(), - }); - e.insert(index); - index - } - std::collections::btree_map::Entry::Occupied(e) => { - result.files.get_mut(*e.get()).unwrap().output_names.insert(input.clone()); - *e.get() - } - }; - - result.input_mapping.insert(input, index); + result.ingest_input(input)?; } Ok(result) } + fn ingest_input(&mut self, filename: PathBuf) -> anyhow::Result<()> { + let stat = fs::metadata(&filename)?; + if stat.is_dir() { + for entry in walkdir::WalkDir::new(&filename) + .into_iter() + .filter_map(|e| e.ok()) + { + if entry.file_type().is_file() { + let fp = fs::File::open(entry.path())?; + self.ingest_input_content(entry.path().to_owned(), fp)?; + } + } + } else { + let fp = fs::File::open(&filename)?; + self.ingest_input_content(filename, fp)?; + } + Ok(()) + } + + fn ingest_input_content( + &mut self, + filename: PathBuf, + mut content: impl io::Read, + ) -> anyhow::Result<()> { + let mut h = Sha256::new(); + io::copy(&mut content, &mut h)?; + let hash = h.finalize().into(); + + let index = match self.hashes.entry(hash) { + std::collections::btree_map::Entry::Vacant(e) => { + let index = self.files.len(); + self.files.push(FileStoreEntry { + index, + hash, + input_names: [filename.clone()].into(), + output_names: HashSet::new(), + }); + e.insert(index); + index + } + std::collections::btree_map::Entry::Occupied(e) => { + self.files + .get_mut(*e.get()) + .unwrap() + .output_names + .insert(filename.clone()); + *e.get() + } + }; + + self.input_mapping.insert(filename, index); + Ok(()) + } + pub fn ingest_output_local(&mut self, filename: PathBuf) -> anyhow::Result<()> { + let stat = fs::metadata(&filename)?; + if stat.is_dir() { + return Ok(()); + } + let fp = fs::File::open(&filename)?; self.ingest_output(filename, fp) } - pub fn ingest_output(&mut self, filename: PathBuf, mut content: impl io::Read) -> anyhow::Result<()> { + pub fn ingest_output( + &mut self, + filename: PathBuf, + mut content: impl io::Read, + ) -> anyhow::Result<()> { let mut h = Sha256::new(); io::copy(&mut content, &mut h)?; - let hash = h.finalize(); - + let hash = h.finalize().into(); + match self.hashes.entry(hash) { std::collections::btree_map::Entry::Vacant(e) => { let index = self.files.len(); @@ -78,7 +137,11 @@ impl FileStore { e.insert(index); } std::collections::btree_map::Entry::Occupied(e) => { - self.files.get_mut(*e.get()).unwrap().output_names.insert(filename); + self.files + .get_mut(*e.get()) + .unwrap() + .output_names + .insert(filename); } } diff --git a/src/main.rs b/src/main.rs index 6cef3ba..5060010 100644 --- a/src/main.rs +++ b/src/main.rs @@ -42,9 +42,9 @@ fn main() { t.start_root_process(cmd).unwrap(); if output.is_none() { - serde_json::to_writer_pretty(fp, &t.log).unwrap(); + serde_json::to_writer_pretty(fp, &t.report).unwrap(); } else { - serde_json::to_writer(fp, &t.log).unwrap(); + serde_json::to_writer(fp, &t.report).unwrap(); } } } diff --git a/src/tracer.rs b/src/tracer.rs index ab3c711..65217e8 100644 --- a/src/tracer.rs +++ b/src/tracer.rs @@ -272,6 +272,7 @@ pub fn read_interpreter(exe: &Path) -> Interpreter { } */ +#[derive(Default)] pub struct ProcessStateStore { processes: HashMap>, } @@ -308,12 +309,6 @@ pub struct ExecData { } impl ProcessStateStore { - pub fn new() -> Self { - Self { - processes: HashMap::new(), - } - } - pub fn insert(&mut self, state: ProcessState) { self.processes.entry(state.pid).or_default().push(state); } @@ -413,8 +408,13 @@ impl Display for Event { pub struct Tracer { pub store: ProcessStateStore, - pub log: Vec, pub start_time: Instant, + pub report: TracerReport, +} + +#[derive(Serialize, Deserialize)] +pub struct TracerReport { + pub log: Vec, pub files: FileStore, } @@ -430,7 +430,7 @@ fn ptrace_syscall(pid: Pid, sig: Option) -> Result<(), Errno> { impl Tracer { pub fn log(&mut self, ident: Identifier, event: Event) { - self.log.push(LogEntry { + self.report.log.push(LogEntry { ident, event, timestamp: Instant::now().duration_since(self.start_time), @@ -444,10 +444,12 @@ impl Tracer { pub fn new(input: Vec) -> anyhow::Result { let files = FileStore::new(input)?; Ok(Self { - store: ProcessStateStore::new(), - log: vec![], + store: ProcessStateStore::default(), start_time: Instant::now(), - files, + report: TracerReport { + log: vec![], + files, + }, }) } @@ -455,7 +457,7 @@ impl Tracer { let p = self.store.get_current_mut(pid).unwrap(); for mut event in p.pending_syscall_event.drain(..) { (filter)(&mut event); - self.log.push(LogEntry { + self.report.log.push(LogEntry { ident: Identifier { pid, machine: 0 }, event, timestamp: Instant::now().duration_since(self.start_time), @@ -768,6 +770,11 @@ impl Tracer { } nix::libc::SYS_open | nix::libc::SYS_openat => { if result >= 0 { + for pending in p.pending_syscall_event.iter_mut() { + if let Event::FdOpen { source: FdSource::File { path }, .. } = pending { + self.report.files.ingest_output_local(path.clone())?; + } + } Some(Box::new(move |event| match event { Event::FdOpen { fd: ref mut dest, ..