Serialize the whole report

This commit is contained in:
Audrey 2024-04-16 11:24:20 -07:00
parent 4b94442999
commit c9b4263817
5 changed files with 249 additions and 47 deletions

130
Cargo.lock generated
View File

@ -2,6 +2,21 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "addr2line"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
dependencies = [
"gimli",
]
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "aho-corasick"
version = "1.1.3"
@ -64,6 +79,24 @@ name = "anyhow"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519"
dependencies = [
"backtrace",
]
[[package]]
name = "backtrace"
version = "0.3.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d"
dependencies = [
"addr2line",
"cc",
"cfg-if",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
]
[[package]]
name = "bitflags"
@ -86,6 +119,12 @@ dependencies = [
"generic-array",
]
[[package]]
name = "cc"
version = "1.0.92"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2678b2e3449475e95b0aa6f9b506a28e61b3dc8996592b983695e8ebb58a8b41"
[[package]]
name = "cfg-if"
version = "1.0.0"
@ -206,12 +245,27 @@ dependencies = [
"version_check",
]
[[package]]
name = "gimli"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hex"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
dependencies = [
"serde",
]
[[package]]
name = "humantime"
version = "2.1.0"
@ -252,6 +306,15 @@ version = "2.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
[[package]]
name = "miniz_oxide"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7"
dependencies = [
"adler",
]
[[package]]
name = "nix"
version = "0.28.0"
@ -264,6 +327,15 @@ dependencies = [
"libc",
]
[[package]]
name = "object"
version = "0.32.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
dependencies = [
"memchr",
]
[[package]]
name = "ontology"
version = "0.1.0"
@ -271,12 +343,14 @@ dependencies = [
"anyhow",
"clap",
"env_logger",
"hex",
"linux-personality",
"log",
"nix",
"serde",
"serde_json",
"sha2",
"walkdir",
]
[[package]]
@ -326,12 +400,27 @@ version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
[[package]]
name = "rustc-demangle"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "ryu"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "serde"
version = "1.0.197"
@ -415,6 +504,47 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.52.0"

View File

@ -8,10 +8,12 @@ edition = "2021"
[dependencies]
nix = { version = "0.28.0", features = ["ptrace", "process"] }
linux-personality = "1.0.0"
anyhow = "1"
anyhow = { version = "1", features = ["backtrace"] }
log = "0.4"
env_logger = "0.11"
serde = { version = "1", features = ["derive"] }
serde_json = "1.0"
clap = { version = "4.5.4", features = ["derive"] }
sha2 = { version = "0.10" }
walkdir = "2"
hex = { version = "0.4.3", features = ["serde"] }

View File

@ -1,15 +1,35 @@
use std::{path::PathBuf, collections::{HashMap, BTreeMap, HashSet}, fs, io};
use std::{
collections::{BTreeMap, HashMap, HashSet},
fs, io,
path::PathBuf,
};
use sha2::{Sha256, Digest, digest::{generic_array::GenericArray, typenum::U32}};
use serde::{Deserialize, Serialize};
use sha2::{digest::generic_array::{GenericArray, typenum::U32}, Digest, Sha256};
type Sha256Hash = GenericArray<u8, U32>;
#[derive(Serialize, Deserialize, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
#[serde(transparent)]
pub struct Sha256Hash {
#[serde(with = "hex")]
inner: [u8; 32],
}
impl From<GenericArray<u8, U32>> for Sha256Hash {
fn from(value: GenericArray<u8, U32>) -> Self {
Self {
inner: value.into()
}
}
}
#[derive(Serialize, Deserialize)]
pub struct FileStore {
pub files: Vec<FileStoreEntry>,
pub input_mapping: HashMap<PathBuf, usize>,
pub hashes: BTreeMap<Sha256Hash, usize>,
}
#[derive(Serialize, Deserialize)]
pub struct FileStoreEntry {
pub index: usize,
pub hash: Sha256Hash,
@ -26,46 +46,85 @@ impl FileStore {
};
for input in inputs {
let mut fp = fs::File::open(&input)?;
let mut h = Sha256::new();
io::copy(&mut fp, &mut h)?;
let hash = h.finalize();
let index = match result.hashes.entry(hash) {
std::collections::btree_map::Entry::Vacant(e) => {
let index = result.files.len();
result.files.push(FileStoreEntry {
index,
hash,
input_names: HashSet::new(),
output_names: [input.clone()].into(),
});
e.insert(index);
index
}
std::collections::btree_map::Entry::Occupied(e) => {
result.files.get_mut(*e.get()).unwrap().output_names.insert(input.clone());
*e.get()
}
};
result.input_mapping.insert(input, index);
result.ingest_input(input)?;
}
Ok(result)
}
fn ingest_input(&mut self, filename: PathBuf) -> anyhow::Result<()> {
let stat = fs::metadata(&filename)?;
if stat.is_dir() {
for entry in walkdir::WalkDir::new(&filename)
.into_iter()
.filter_map(|e| e.ok())
{
if entry.file_type().is_file() {
let fp = fs::File::open(entry.path())?;
self.ingest_input_content(entry.path().to_owned(), fp)?;
}
}
} else {
let fp = fs::File::open(&filename)?;
self.ingest_input_content(filename, fp)?;
}
Ok(())
}
fn ingest_input_content(
&mut self,
filename: PathBuf,
mut content: impl io::Read,
) -> anyhow::Result<()> {
let mut h = Sha256::new();
io::copy(&mut content, &mut h)?;
let hash = h.finalize().into();
let index = match self.hashes.entry(hash) {
std::collections::btree_map::Entry::Vacant(e) => {
let index = self.files.len();
self.files.push(FileStoreEntry {
index,
hash,
input_names: [filename.clone()].into(),
output_names: HashSet::new(),
});
e.insert(index);
index
}
std::collections::btree_map::Entry::Occupied(e) => {
self.files
.get_mut(*e.get())
.unwrap()
.output_names
.insert(filename.clone());
*e.get()
}
};
self.input_mapping.insert(filename, index);
Ok(())
}
pub fn ingest_output_local(&mut self, filename: PathBuf) -> anyhow::Result<()> {
let stat = fs::metadata(&filename)?;
if stat.is_dir() {
return Ok(());
}
let fp = fs::File::open(&filename)?;
self.ingest_output(filename, fp)
}
pub fn ingest_output(&mut self, filename: PathBuf, mut content: impl io::Read) -> anyhow::Result<()> {
pub fn ingest_output(
&mut self,
filename: PathBuf,
mut content: impl io::Read,
) -> anyhow::Result<()> {
let mut h = Sha256::new();
io::copy(&mut content, &mut h)?;
let hash = h.finalize();
let hash = h.finalize().into();
match self.hashes.entry(hash) {
std::collections::btree_map::Entry::Vacant(e) => {
let index = self.files.len();
@ -78,7 +137,11 @@ impl FileStore {
e.insert(index);
}
std::collections::btree_map::Entry::Occupied(e) => {
self.files.get_mut(*e.get()).unwrap().output_names.insert(filename);
self.files
.get_mut(*e.get())
.unwrap()
.output_names
.insert(filename);
}
}

View File

@ -42,9 +42,9 @@ fn main() {
t.start_root_process(cmd).unwrap();
if output.is_none() {
serde_json::to_writer_pretty(fp, &t.log).unwrap();
serde_json::to_writer_pretty(fp, &t.report).unwrap();
} else {
serde_json::to_writer(fp, &t.log).unwrap();
serde_json::to_writer(fp, &t.report).unwrap();
}
}
}

View File

@ -272,6 +272,7 @@ pub fn read_interpreter(exe: &Path) -> Interpreter {
}
*/
#[derive(Default)]
pub struct ProcessStateStore {
processes: HashMap<Pid, Vec<ProcessState>>,
}
@ -308,12 +309,6 @@ pub struct ExecData {
}
impl ProcessStateStore {
pub fn new() -> Self {
Self {
processes: HashMap::new(),
}
}
pub fn insert(&mut self, state: ProcessState) {
self.processes.entry(state.pid).or_default().push(state);
}
@ -413,8 +408,13 @@ impl Display for Event {
pub struct Tracer {
pub store: ProcessStateStore,
pub log: Vec<LogEntry>,
pub start_time: Instant,
pub report: TracerReport,
}
#[derive(Serialize, Deserialize)]
pub struct TracerReport {
pub log: Vec<LogEntry>,
pub files: FileStore,
}
@ -430,7 +430,7 @@ fn ptrace_syscall(pid: Pid, sig: Option<Signal>) -> Result<(), Errno> {
impl Tracer {
pub fn log(&mut self, ident: Identifier, event: Event) {
self.log.push(LogEntry {
self.report.log.push(LogEntry {
ident,
event,
timestamp: Instant::now().duration_since(self.start_time),
@ -444,10 +444,12 @@ impl Tracer {
pub fn new(input: Vec<PathBuf>) -> anyhow::Result<Self> {
let files = FileStore::new(input)?;
Ok(Self {
store: ProcessStateStore::new(),
log: vec![],
store: ProcessStateStore::default(),
start_time: Instant::now(),
files,
report: TracerReport {
log: vec![],
files,
},
})
}
@ -455,7 +457,7 @@ impl Tracer {
let p = self.store.get_current_mut(pid).unwrap();
for mut event in p.pending_syscall_event.drain(..) {
(filter)(&mut event);
self.log.push(LogEntry {
self.report.log.push(LogEntry {
ident: Identifier { pid, machine: 0 },
event,
timestamp: Instant::now().duration_since(self.start_time),
@ -768,6 +770,11 @@ impl Tracer {
}
nix::libc::SYS_open | nix::libc::SYS_openat => {
if result >= 0 {
for pending in p.pending_syscall_event.iter_mut() {
if let Event::FdOpen { source: FdSource::File { path }, .. } = pending {
self.report.files.ingest_output_local(path.clone())?;
}
}
Some(Box::new(move |event| match event {
Event::FdOpen {
fd: ref mut dest, ..