Start adding filestore

This commit is contained in:
Audrey 2024-04-15 13:07:19 -07:00
parent 9c3349178e
commit 4b94442999
5 changed files with 168 additions and 2 deletions

72
Cargo.lock generated
View File

@ -77,6 +77,15 @@ version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
@ -135,6 +144,35 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "cpufeatures"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504"
dependencies = [
"libc",
]
[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
]
[[package]]
name = "env_filter"
version = "0.1.0"
@ -158,6 +196,16 @@ dependencies = [
"log",
]
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "heck"
version = "0.5.0"
@ -228,6 +276,7 @@ dependencies = [
"nix",
"serde",
"serde_json",
"sha2",
]
[[package]]
@ -314,6 +363,17 @@ dependencies = [
"serde",
]
[[package]]
name = "sha2"
version = "0.10.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "strsim"
version = "0.11.1"
@ -331,6 +391,12 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "typenum"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "unicode-ident"
version = "1.0.12"
@ -343,6 +409,12 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "windows-sys"
version = "0.52.0"

View File

@ -14,3 +14,4 @@ env_logger = "0.11"
serde = { version = "1", features = ["derive"] }
serde_json = "1.0"
clap = { version = "4.5.4", features = ["derive"] }
sha2 = { version = "0.10" }

87
src/filestore.rs Normal file
View File

@ -0,0 +1,87 @@
use std::{path::PathBuf, collections::{HashMap, BTreeMap, HashSet}, fs, io};
use sha2::{Sha256, Digest, digest::{generic_array::GenericArray, typenum::U32}};
type Sha256Hash = GenericArray<u8, U32>;
pub struct FileStore {
pub files: Vec<FileStoreEntry>,
pub input_mapping: HashMap<PathBuf, usize>,
pub hashes: BTreeMap<Sha256Hash, usize>,
}
pub struct FileStoreEntry {
pub index: usize,
pub hash: Sha256Hash,
pub input_names: HashSet<PathBuf>,
pub output_names: HashSet<PathBuf>,
}
impl FileStore {
pub fn new(inputs: Vec<PathBuf>) -> anyhow::Result<Self> {
let mut result = Self {
files: vec![],
input_mapping: HashMap::new(),
hashes: BTreeMap::new(),
};
for input in inputs {
let mut fp = fs::File::open(&input)?;
let mut h = Sha256::new();
io::copy(&mut fp, &mut h)?;
let hash = h.finalize();
let index = match result.hashes.entry(hash) {
std::collections::btree_map::Entry::Vacant(e) => {
let index = result.files.len();
result.files.push(FileStoreEntry {
index,
hash,
input_names: HashSet::new(),
output_names: [input.clone()].into(),
});
e.insert(index);
index
}
std::collections::btree_map::Entry::Occupied(e) => {
result.files.get_mut(*e.get()).unwrap().output_names.insert(input.clone());
*e.get()
}
};
result.input_mapping.insert(input, index);
}
Ok(result)
}
pub fn ingest_output_local(&mut self, filename: PathBuf) -> anyhow::Result<()> {
let fp = fs::File::open(&filename)?;
self.ingest_output(filename, fp)
}
pub fn ingest_output(&mut self, filename: PathBuf, mut content: impl io::Read) -> anyhow::Result<()> {
let mut h = Sha256::new();
io::copy(&mut content, &mut h)?;
let hash = h.finalize();
match self.hashes.entry(hash) {
std::collections::btree_map::Entry::Vacant(e) => {
let index = self.files.len();
self.files.push(FileStoreEntry {
index,
hash,
input_names: HashSet::new(),
output_names: [filename].into(),
});
e.insert(index);
}
std::collections::btree_map::Entry::Occupied(e) => {
self.files.get_mut(*e.get()).unwrap().output_names.insert(filename);
}
}
Ok(())
}
}

View File

@ -1,4 +1,5 @@
mod tracer;
mod filestore;
use std::path::PathBuf;
@ -37,7 +38,7 @@ fn main() {
} else {
Box::new(std::io::stdout())
};
let mut t = tracer::Tracer::new().unwrap();
let mut t = tracer::Tracer::new(input).unwrap();
t.start_root_process(cmd).unwrap();
if output.is_none() {

View File

@ -24,6 +24,8 @@ use nix::{
use serde::{Deserialize, Serialize};
use crate::filestore::FileStore;
#[derive(Copy, Clone, Serialize, Deserialize, Eq, PartialEq, Debug, Hash)]
pub struct Pid(i32);
@ -413,6 +415,7 @@ pub struct Tracer {
pub store: ProcessStateStore,
pub log: Vec<LogEntry>,
pub start_time: Instant,
pub files: FileStore,
}
fn ptrace_syscall(pid: Pid, sig: Option<Signal>) -> Result<(), Errno> {
@ -438,11 +441,13 @@ impl Tracer {
self.log(Identifier { pid, machine: 0 }, event);
}
pub fn new() -> anyhow::Result<Self> {
pub fn new(input: Vec<PathBuf>) -> anyhow::Result<Self> {
let files = FileStore::new(input)?;
Ok(Self {
store: ProcessStateStore::new(),
log: vec![],
start_time: Instant::now(),
files,
})
}