Serialize the whole report
This commit is contained in:
parent
4b94442999
commit
c9b4263817
|
@ -2,6 +2,21 @@
|
|||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "addr2line"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
|
||||
dependencies = [
|
||||
"gimli",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "adler"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.3"
|
||||
|
@ -64,6 +79,24 @@ name = "anyhow"
|
|||
version = "1.0.82"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.71"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d"
|
||||
dependencies = [
|
||||
"addr2line",
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"miniz_oxide",
|
||||
"object",
|
||||
"rustc-demangle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
|
@ -86,6 +119,12 @@ dependencies = [
|
|||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.92"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2678b2e3449475e95b0aa6f9b506a28e61b3dc8996592b983695e8ebb58a8b41"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
|
@ -206,12 +245,27 @@ dependencies = [
|
|||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.28.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "hex"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "humantime"
|
||||
version = "2.1.0"
|
||||
|
@ -252,6 +306,15 @@ version = "2.7.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7"
|
||||
dependencies = [
|
||||
"adler",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.28.0"
|
||||
|
@ -264,6 +327,15 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.32.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ontology"
|
||||
version = "0.1.0"
|
||||
|
@ -271,12 +343,14 @@ dependencies = [
|
|||
"anyhow",
|
||||
"clap",
|
||||
"env_logger",
|
||||
"hex",
|
||||
"linux-personality",
|
||||
"log",
|
||||
"nix",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -326,12 +400,27 @@ version = "0.8.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1"
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.197"
|
||||
|
@ -415,6 +504,47 @@ version = "0.9.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
|
||||
dependencies = [
|
||||
"same-file",
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-util"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.52.0"
|
||||
|
|
|
@ -8,10 +8,12 @@ edition = "2021"
|
|||
[dependencies]
|
||||
nix = { version = "0.28.0", features = ["ptrace", "process"] }
|
||||
linux-personality = "1.0.0"
|
||||
anyhow = "1"
|
||||
anyhow = { version = "1", features = ["backtrace"] }
|
||||
log = "0.4"
|
||||
env_logger = "0.11"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
clap = { version = "4.5.4", features = ["derive"] }
|
||||
sha2 = { version = "0.10" }
|
||||
walkdir = "2"
|
||||
hex = { version = "0.4.3", features = ["serde"] }
|
||||
|
|
127
src/filestore.rs
127
src/filestore.rs
|
@ -1,15 +1,35 @@
|
|||
use std::{path::PathBuf, collections::{HashMap, BTreeMap, HashSet}, fs, io};
|
||||
use std::{
|
||||
collections::{BTreeMap, HashMap, HashSet},
|
||||
fs, io,
|
||||
path::PathBuf,
|
||||
};
|
||||
|
||||
use sha2::{Sha256, Digest, digest::{generic_array::GenericArray, typenum::U32}};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::{digest::generic_array::{GenericArray, typenum::U32}, Digest, Sha256};
|
||||
|
||||
type Sha256Hash = GenericArray<u8, U32>;
|
||||
#[derive(Serialize, Deserialize, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
|
||||
#[serde(transparent)]
|
||||
pub struct Sha256Hash {
|
||||
#[serde(with = "hex")]
|
||||
inner: [u8; 32],
|
||||
}
|
||||
|
||||
impl From<GenericArray<u8, U32>> for Sha256Hash {
|
||||
fn from(value: GenericArray<u8, U32>) -> Self {
|
||||
Self {
|
||||
inner: value.into()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct FileStore {
|
||||
pub files: Vec<FileStoreEntry>,
|
||||
pub input_mapping: HashMap<PathBuf, usize>,
|
||||
pub hashes: BTreeMap<Sha256Hash, usize>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct FileStoreEntry {
|
||||
pub index: usize,
|
||||
pub hash: Sha256Hash,
|
||||
|
@ -26,46 +46,85 @@ impl FileStore {
|
|||
};
|
||||
|
||||
for input in inputs {
|
||||
let mut fp = fs::File::open(&input)?;
|
||||
|
||||
let mut h = Sha256::new();
|
||||
io::copy(&mut fp, &mut h)?;
|
||||
let hash = h.finalize();
|
||||
|
||||
let index = match result.hashes.entry(hash) {
|
||||
std::collections::btree_map::Entry::Vacant(e) => {
|
||||
let index = result.files.len();
|
||||
result.files.push(FileStoreEntry {
|
||||
index,
|
||||
hash,
|
||||
input_names: HashSet::new(),
|
||||
output_names: [input.clone()].into(),
|
||||
});
|
||||
e.insert(index);
|
||||
index
|
||||
}
|
||||
std::collections::btree_map::Entry::Occupied(e) => {
|
||||
result.files.get_mut(*e.get()).unwrap().output_names.insert(input.clone());
|
||||
*e.get()
|
||||
}
|
||||
};
|
||||
|
||||
result.input_mapping.insert(input, index);
|
||||
result.ingest_input(input)?;
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn ingest_input(&mut self, filename: PathBuf) -> anyhow::Result<()> {
|
||||
let stat = fs::metadata(&filename)?;
|
||||
if stat.is_dir() {
|
||||
for entry in walkdir::WalkDir::new(&filename)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
{
|
||||
if entry.file_type().is_file() {
|
||||
let fp = fs::File::open(entry.path())?;
|
||||
self.ingest_input_content(entry.path().to_owned(), fp)?;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let fp = fs::File::open(&filename)?;
|
||||
self.ingest_input_content(filename, fp)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn ingest_input_content(
|
||||
&mut self,
|
||||
filename: PathBuf,
|
||||
mut content: impl io::Read,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut h = Sha256::new();
|
||||
io::copy(&mut content, &mut h)?;
|
||||
let hash = h.finalize().into();
|
||||
|
||||
let index = match self.hashes.entry(hash) {
|
||||
std::collections::btree_map::Entry::Vacant(e) => {
|
||||
let index = self.files.len();
|
||||
self.files.push(FileStoreEntry {
|
||||
index,
|
||||
hash,
|
||||
input_names: [filename.clone()].into(),
|
||||
output_names: HashSet::new(),
|
||||
});
|
||||
e.insert(index);
|
||||
index
|
||||
}
|
||||
std::collections::btree_map::Entry::Occupied(e) => {
|
||||
self.files
|
||||
.get_mut(*e.get())
|
||||
.unwrap()
|
||||
.output_names
|
||||
.insert(filename.clone());
|
||||
*e.get()
|
||||
}
|
||||
};
|
||||
|
||||
self.input_mapping.insert(filename, index);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn ingest_output_local(&mut self, filename: PathBuf) -> anyhow::Result<()> {
|
||||
let stat = fs::metadata(&filename)?;
|
||||
if stat.is_dir() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let fp = fs::File::open(&filename)?;
|
||||
self.ingest_output(filename, fp)
|
||||
}
|
||||
|
||||
pub fn ingest_output(&mut self, filename: PathBuf, mut content: impl io::Read) -> anyhow::Result<()> {
|
||||
pub fn ingest_output(
|
||||
&mut self,
|
||||
filename: PathBuf,
|
||||
mut content: impl io::Read,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut h = Sha256::new();
|
||||
io::copy(&mut content, &mut h)?;
|
||||
let hash = h.finalize();
|
||||
|
||||
let hash = h.finalize().into();
|
||||
|
||||
match self.hashes.entry(hash) {
|
||||
std::collections::btree_map::Entry::Vacant(e) => {
|
||||
let index = self.files.len();
|
||||
|
@ -78,7 +137,11 @@ impl FileStore {
|
|||
e.insert(index);
|
||||
}
|
||||
std::collections::btree_map::Entry::Occupied(e) => {
|
||||
self.files.get_mut(*e.get()).unwrap().output_names.insert(filename);
|
||||
self.files
|
||||
.get_mut(*e.get())
|
||||
.unwrap()
|
||||
.output_names
|
||||
.insert(filename);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -42,9 +42,9 @@ fn main() {
|
|||
t.start_root_process(cmd).unwrap();
|
||||
|
||||
if output.is_none() {
|
||||
serde_json::to_writer_pretty(fp, &t.log).unwrap();
|
||||
serde_json::to_writer_pretty(fp, &t.report).unwrap();
|
||||
} else {
|
||||
serde_json::to_writer(fp, &t.log).unwrap();
|
||||
serde_json::to_writer(fp, &t.report).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -272,6 +272,7 @@ pub fn read_interpreter(exe: &Path) -> Interpreter {
|
|||
}
|
||||
*/
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ProcessStateStore {
|
||||
processes: HashMap<Pid, Vec<ProcessState>>,
|
||||
}
|
||||
|
@ -308,12 +309,6 @@ pub struct ExecData {
|
|||
}
|
||||
|
||||
impl ProcessStateStore {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
processes: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, state: ProcessState) {
|
||||
self.processes.entry(state.pid).or_default().push(state);
|
||||
}
|
||||
|
@ -413,8 +408,13 @@ impl Display for Event {
|
|||
|
||||
pub struct Tracer {
|
||||
pub store: ProcessStateStore,
|
||||
pub log: Vec<LogEntry>,
|
||||
pub start_time: Instant,
|
||||
pub report: TracerReport,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TracerReport {
|
||||
pub log: Vec<LogEntry>,
|
||||
pub files: FileStore,
|
||||
}
|
||||
|
||||
|
@ -430,7 +430,7 @@ fn ptrace_syscall(pid: Pid, sig: Option<Signal>) -> Result<(), Errno> {
|
|||
|
||||
impl Tracer {
|
||||
pub fn log(&mut self, ident: Identifier, event: Event) {
|
||||
self.log.push(LogEntry {
|
||||
self.report.log.push(LogEntry {
|
||||
ident,
|
||||
event,
|
||||
timestamp: Instant::now().duration_since(self.start_time),
|
||||
|
@ -444,10 +444,12 @@ impl Tracer {
|
|||
pub fn new(input: Vec<PathBuf>) -> anyhow::Result<Self> {
|
||||
let files = FileStore::new(input)?;
|
||||
Ok(Self {
|
||||
store: ProcessStateStore::new(),
|
||||
log: vec![],
|
||||
store: ProcessStateStore::default(),
|
||||
start_time: Instant::now(),
|
||||
files,
|
||||
report: TracerReport {
|
||||
log: vec![],
|
||||
files,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -455,7 +457,7 @@ impl Tracer {
|
|||
let p = self.store.get_current_mut(pid).unwrap();
|
||||
for mut event in p.pending_syscall_event.drain(..) {
|
||||
(filter)(&mut event);
|
||||
self.log.push(LogEntry {
|
||||
self.report.log.push(LogEntry {
|
||||
ident: Identifier { pid, machine: 0 },
|
||||
event,
|
||||
timestamp: Instant::now().duration_since(self.start_time),
|
||||
|
@ -768,6 +770,11 @@ impl Tracer {
|
|||
}
|
||||
nix::libc::SYS_open | nix::libc::SYS_openat => {
|
||||
if result >= 0 {
|
||||
for pending in p.pending_syscall_event.iter_mut() {
|
||||
if let Event::FdOpen { source: FdSource::File { path }, .. } = pending {
|
||||
self.report.files.ingest_output_local(path.clone())?;
|
||||
}
|
||||
}
|
||||
Some(Box::new(move |event| match event {
|
||||
Event::FdOpen {
|
||||
fd: ref mut dest, ..
|
||||
|
|
Loading…
Reference in New Issue