Start adding filestore
This commit is contained in:
parent
9c3349178e
commit
4b94442999
|
@ -77,6 +77,15 @@ version = "2.5.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.10.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
|
@ -135,6 +144,35 @@ version = "1.0.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
||||
dependencies = [
|
||||
"block-buffer",
|
||||
"crypto-common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_filter"
|
||||
version = "0.1.0"
|
||||
|
@ -158,6 +196,16 @@ dependencies = [
|
|||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
|
||||
dependencies = [
|
||||
"typenum",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
|
@ -228,6 +276,7 @@ dependencies = [
|
|||
"nix",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -314,6 +363,17 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.10.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
|
@ -331,6 +391,12 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
|
@ -343,6 +409,12 @@ version = "0.2.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.52.0"
|
||||
|
|
|
@ -14,3 +14,4 @@ env_logger = "0.11"
|
|||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
clap = { version = "4.5.4", features = ["derive"] }
|
||||
sha2 = { version = "0.10" }
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
use std::{path::PathBuf, collections::{HashMap, BTreeMap, HashSet}, fs, io};
|
||||
|
||||
use sha2::{Sha256, Digest, digest::{generic_array::GenericArray, typenum::U32}};
|
||||
|
||||
type Sha256Hash = GenericArray<u8, U32>;
|
||||
|
||||
pub struct FileStore {
|
||||
pub files: Vec<FileStoreEntry>,
|
||||
pub input_mapping: HashMap<PathBuf, usize>,
|
||||
pub hashes: BTreeMap<Sha256Hash, usize>,
|
||||
}
|
||||
|
||||
pub struct FileStoreEntry {
|
||||
pub index: usize,
|
||||
pub hash: Sha256Hash,
|
||||
pub input_names: HashSet<PathBuf>,
|
||||
pub output_names: HashSet<PathBuf>,
|
||||
}
|
||||
|
||||
impl FileStore {
|
||||
pub fn new(inputs: Vec<PathBuf>) -> anyhow::Result<Self> {
|
||||
let mut result = Self {
|
||||
files: vec![],
|
||||
input_mapping: HashMap::new(),
|
||||
hashes: BTreeMap::new(),
|
||||
};
|
||||
|
||||
for input in inputs {
|
||||
let mut fp = fs::File::open(&input)?;
|
||||
|
||||
let mut h = Sha256::new();
|
||||
io::copy(&mut fp, &mut h)?;
|
||||
let hash = h.finalize();
|
||||
|
||||
let index = match result.hashes.entry(hash) {
|
||||
std::collections::btree_map::Entry::Vacant(e) => {
|
||||
let index = result.files.len();
|
||||
result.files.push(FileStoreEntry {
|
||||
index,
|
||||
hash,
|
||||
input_names: HashSet::new(),
|
||||
output_names: [input.clone()].into(),
|
||||
});
|
||||
e.insert(index);
|
||||
index
|
||||
}
|
||||
std::collections::btree_map::Entry::Occupied(e) => {
|
||||
result.files.get_mut(*e.get()).unwrap().output_names.insert(input.clone());
|
||||
*e.get()
|
||||
}
|
||||
};
|
||||
|
||||
result.input_mapping.insert(input, index);
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn ingest_output_local(&mut self, filename: PathBuf) -> anyhow::Result<()> {
|
||||
let fp = fs::File::open(&filename)?;
|
||||
self.ingest_output(filename, fp)
|
||||
}
|
||||
|
||||
pub fn ingest_output(&mut self, filename: PathBuf, mut content: impl io::Read) -> anyhow::Result<()> {
|
||||
let mut h = Sha256::new();
|
||||
io::copy(&mut content, &mut h)?;
|
||||
let hash = h.finalize();
|
||||
|
||||
match self.hashes.entry(hash) {
|
||||
std::collections::btree_map::Entry::Vacant(e) => {
|
||||
let index = self.files.len();
|
||||
self.files.push(FileStoreEntry {
|
||||
index,
|
||||
hash,
|
||||
input_names: HashSet::new(),
|
||||
output_names: [filename].into(),
|
||||
});
|
||||
e.insert(index);
|
||||
}
|
||||
std::collections::btree_map::Entry::Occupied(e) => {
|
||||
self.files.get_mut(*e.get()).unwrap().output_names.insert(filename);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -1,4 +1,5 @@
|
|||
mod tracer;
|
||||
mod filestore;
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
|
@ -37,7 +38,7 @@ fn main() {
|
|||
} else {
|
||||
Box::new(std::io::stdout())
|
||||
};
|
||||
let mut t = tracer::Tracer::new().unwrap();
|
||||
let mut t = tracer::Tracer::new(input).unwrap();
|
||||
t.start_root_process(cmd).unwrap();
|
||||
|
||||
if output.is_none() {
|
||||
|
|
|
@ -24,6 +24,8 @@ use nix::{
|
|||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::filestore::FileStore;
|
||||
|
||||
#[derive(Copy, Clone, Serialize, Deserialize, Eq, PartialEq, Debug, Hash)]
|
||||
pub struct Pid(i32);
|
||||
|
||||
|
@ -413,6 +415,7 @@ pub struct Tracer {
|
|||
pub store: ProcessStateStore,
|
||||
pub log: Vec<LogEntry>,
|
||||
pub start_time: Instant,
|
||||
pub files: FileStore,
|
||||
}
|
||||
|
||||
fn ptrace_syscall(pid: Pid, sig: Option<Signal>) -> Result<(), Errno> {
|
||||
|
@ -438,11 +441,13 @@ impl Tracer {
|
|||
self.log(Identifier { pid, machine: 0 }, event);
|
||||
}
|
||||
|
||||
pub fn new() -> anyhow::Result<Self> {
|
||||
pub fn new(input: Vec<PathBuf>) -> anyhow::Result<Self> {
|
||||
let files = FileStore::new(input)?;
|
||||
Ok(Self {
|
||||
store: ProcessStateStore::new(),
|
||||
log: vec![],
|
||||
start_time: Instant::now(),
|
||||
files,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue