Compare commits

..

No commits in common. "4aa887573d36b1ed03c5a3794abe4ab47389ce38" and "6926711e83baf9db0f449cb10c0f3e9fa5644fdf" have entirely different histories.

13 changed files with 991 additions and 1875 deletions

View File

@ -1,2 +0,0 @@
[build]
target = "x86_64-unknown-linux-musl"

253
Cargo.lock generated
View File

@ -104,12 +104,6 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.5.0"
@ -131,12 +125,6 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "bytes"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
[[package]]
name = "cc"
version = "1.0.92"
@ -151,9 +139,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cfg_aliases"
version = "0.2.0"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77e53693616d3075149f4ead59bdeecd204ac6b8192d8969757601b74bddf00f"
checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e"
[[package]]
name = "clap"
@ -201,22 +189,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "core-foundation"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
[[package]]
name = "cpufeatures"
version = "0.2.12"
@ -245,23 +217,6 @@ dependencies = [
"typenum",
]
[[package]]
name = "default-net"
version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c5a6569a908354d49b10db3c516d69aca1eccd97562fd31c98b13f00b73ca66"
dependencies = [
"dlopen2",
"libc",
"memalloc",
"netlink-packet-core",
"netlink-packet-route",
"netlink-sys",
"once_cell",
"system-configuration",
"windows",
]
[[package]]
name = "derive_more"
version = "0.99.17"
@ -283,17 +238,6 @@ dependencies = [
"crypto-common",
]
[[package]]
name = "dlopen2"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09b4f5f101177ff01b8ec4ecc81eead416a8aa42819a2869311b3420fa114ffa"
dependencies = [
"libc",
"once_cell",
"winapi",
]
[[package]]
name = "env_filter"
version = "0.1.0"
@ -431,12 +375,6 @@ version = "0.4.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
[[package]]
name = "memalloc"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df39d232f5c40b0891c10216992c2f250c054105cb1e56f0fc9032db6203ecc1"
[[package]]
name = "memchr"
version = "2.7.2"
@ -461,58 +399,11 @@ dependencies = [
"adler",
]
[[package]]
name = "netlink-packet-core"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72724faf704479d67b388da142b186f916188505e7e0b26719019c525882eda4"
dependencies = [
"anyhow",
"byteorder",
"netlink-packet-utils",
]
[[package]]
name = "netlink-packet-route"
version = "0.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "053998cea5a306971f88580d0829e90f270f940befd7cf928da179d4187a5a66"
dependencies = [
"anyhow",
"bitflags 1.3.2",
"byteorder",
"libc",
"netlink-packet-core",
"netlink-packet-utils",
]
[[package]]
name = "netlink-packet-utils"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ede8a08c71ad5a95cdd0e4e52facd37190977039a4704eb82a283f713747d34"
dependencies = [
"anyhow",
"byteorder",
"paste",
"thiserror",
]
[[package]]
name = "netlink-sys"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "416060d346fbaf1f23f9512963e3e878f1a78e707cb699ba9215761754244307"
dependencies = [
"bytes",
"libc",
"log",
]
[[package]]
name = "nix"
version = "0.28.0"
source = "git+https://github.com/rhelmot/nix-rs?branch=master#e9f7c1b74ef7581adf1513a3f3c9a965824ee2d4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4"
dependencies = [
"bitflags 2.5.0",
"cfg-if",
@ -540,19 +431,12 @@ dependencies = [
"ruzstd",
]
[[package]]
name = "once_cell"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "ontology"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"default-net",
"env_logger",
"gimli 0.29.0",
"hex",
@ -568,12 +452,6 @@ dependencies = [
"walkdir",
]
[[package]]
name = "paste"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
[[package]]
name = "proc-macro2"
version = "1.0.79"
@ -735,47 +613,6 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "system-configuration"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7"
dependencies = [
"bitflags 1.3.2",
"core-foundation",
"system-configuration-sys",
]
[[package]]
name = "system-configuration-sys"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]]
name = "thiserror"
version = "1.0.59"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.59"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.58",
]
[[package]]
name = "twox-hash"
version = "1.6.3"
@ -857,37 +694,13 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets 0.52.4",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc 0.48.5",
"windows_i686_gnu 0.48.5",
"windows_i686_msvc 0.48.5",
"windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc 0.48.5",
"windows-targets",
]
[[package]]
@ -896,93 +709,51 @@ version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b"
dependencies = [
"windows_aarch64_gnullvm 0.52.4",
"windows_aarch64_msvc 0.52.4",
"windows_i686_gnu 0.52.4",
"windows_i686_msvc 0.52.4",
"windows_x86_64_gnu 0.52.4",
"windows_x86_64_gnullvm 0.52.4",
"windows_x86_64_msvc 0.52.4",
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_gnu"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_i686_msvc"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.4"

View File

@ -6,9 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
#nix = { version = "0.28.0", features = ["ptrace", "process", "fs", "poll"] }
nix = { git = "https://github.com/rhelmot/nix-rs", branch = "master", features = ["ptrace", "process", "fs", "poll"] }
#nix = { path = "../../rust/nix", features = ["ptrace", "process", "fs", "poll"] }
nix = { version = "0.28.0", features = ["ptrace", "process"] }
linux-personality = "1.0.0"
anyhow = { version = "1", features = ["backtrace"] }
log = "0.4"
@ -23,4 +21,3 @@ gimli = { version = "0.29.0" }
object = { version = "0.35" }
memmap2 = { version = "0.9.4" }
typed-arena = { version = "2" }
default-net = "0.22"

View File

@ -1,17 +1,13 @@
use std::{
borrow::Cow,
collections::{BTreeMap, BTreeSet, HashMap, HashSet},
collections::{BTreeMap, HashMap, HashSet},
fs, io,
path::{PathBuf, Path},
path::PathBuf, borrow::Cow,
};
use gimli::{constants, DW_TAG_compile_unit};
use object::{Object, ObjectSection, ReadCache};
use gimli::{constants, AttributeValue, DW_TAG_compile_unit};
use serde::{Deserialize, Serialize};
use sha2::{
digest::generic_array::{typenum::U32, GenericArray},
Digest, Sha256,
};
use sha2::{digest::generic_array::{GenericArray, typenum::U32}, Digest, Sha256};
use object::{Object, ReadCache, ObjectSection};
use typed_arena::Arena;
#[derive(Serialize, Deserialize, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
@ -24,7 +20,7 @@ pub struct Sha256Hash {
impl From<GenericArray<u8, U32>> for Sha256Hash {
fn from(value: GenericArray<u8, U32>) -> Self {
Self {
inner: value.into(),
inner: value.into()
}
}
}
@ -47,7 +43,9 @@ pub struct FileStoreEntry {
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum FileFormat {
ELF,
ELF {
references: Vec<usize>,
},
Other,
}
@ -66,35 +64,6 @@ impl FileStore {
Ok(result)
}
/// Register the minimal set of information associated with a file. Returns whether the file
/// was already known.
pub fn insert(&mut self, path: PathBuf, hash: Sha256Hash) -> bool {
if let Some(idx) = self.hashes.get_mut(&hash) {
self.files.get_mut(*idx).unwrap().output_names.insert(path);
true
} else {
let index = self.files.len();
self.files.push(FileStoreEntry {
index,
hash,
format: FileFormat::Other,
input_names: HashSet::new(),
output_names: HashSet::from([path.clone()]),
});
self.filenames.insert(path, index);
self.hashes.insert(hash, index);
false
}
}
pub fn update_format(&mut self, path: &Path, format: FileFormat) {
if !self.filenames.contains_key(path) {
panic!("update_format called with unknown path {}", path.to_string_lossy())
}
let idx = *self.filenames.get(path).unwrap();
self.files.get_mut(idx).unwrap().format = format;
}
fn ingest_input(&mut self, filename: PathBuf) -> anyhow::Result<()> {
let stat = fs::metadata(&filename)?;
if stat.is_dir() {
@ -120,7 +89,6 @@ impl FileStore {
mut content: (impl io::Read + io::Seek),
) -> anyhow::Result<()> {
let mut h = Sha256::new();
log::debug!("Hashing {}", filename.to_string_lossy());
io::copy(&mut content, &mut h)?;
let hash = h.finalize().into();
@ -128,7 +96,7 @@ impl FileStore {
std::collections::btree_map::Entry::Vacant(e) => {
let index = self.files.len();
e.insert(index);
let (format, refs) = parse_format(&mut content)?;
let format = self.parse_format(&mut content)?;
self.files.push(FileStoreEntry {
index,
hash,
@ -136,28 +104,25 @@ impl FileStore {
input_names: [filename.clone()].into(),
output_names: HashSet::new(),
});
for (reference_path, _reference_hash) in refs { // lazy...
self.ingest_input(reference_path)?;
}
index
}
std::collections::btree_map::Entry::Occupied(e) => {
self.files
.get_mut(*e.get())
.unwrap()
.input_names
.output_names
.insert(filename.clone());
*e.get()
}
};
if index == self.files.len() {}
if index == self.files.len() {
}
self.filenames.insert(filename, index);
Ok(())
}
/*
pub fn ingest_output_local(&mut self, filename: PathBuf) -> anyhow::Result<()> {
let stat = fs::metadata(&filename)?;
if stat.is_dir() {
@ -165,7 +130,7 @@ impl FileStore {
}
let fp = fs::File::open(&filename)?;
self.ingest_output(filename, fp)?;
self.ingest_output(filename, fp);
Ok(())
}
@ -182,7 +147,7 @@ impl FileStore {
std::collections::btree_map::Entry::Vacant(e) => {
let index = self.files.len();
e.insert(index);
let (format, refs) = self.parse_format(&mut content)?;
let format = self.parse_format(&mut content)?;
self.files.push(FileStoreEntry {
index,
format,
@ -206,7 +171,77 @@ impl FileStore {
Ok(index)
}
*/
fn parse_format(&mut self, fp: &mut (impl io::Read + io::Seek)) -> anyhow::Result<FileFormat> {
fp.seek(io::SeekFrom::Start(0))?;
let mut buf = [0; 4];
let count = read_exact_or_end(fp, &mut buf)?;
let buf = &buf[..count];
Ok(match buf {
[0x7f, b'E', b'L', b'F', ..] => {
let read_cache = ReadCache::new(fp);
let elf = object::File::parse(&read_cache)?;
let endian = if elf.is_little_endian() {
gimli::RunTimeEndian::Little
} else {
gimli::RunTimeEndian::Big
};
let arena_data = Arena::new();
let mut load_section = |id: gimli::SectionId| -> Result<_, _> {
load_file_section(id, &elf, endian, &arena_data)
};
let dwarf = gimli::Dwarf::load(&mut load_section).unwrap();
let mut units = dwarf.units();
let mut inputs = vec![];
while let Ok(Some(unit)) = units.next() {
let abbrev = dwarf.abbreviations(&unit)?;
let mut entries = unit.entries(&abbrev);
while let Some((_, entry)) = entries.next_dfs()? {
if entry.tag() == DW_TAG_compile_unit {
let mut basename = None;
let mut dirname = None;
if let Some(name) = entry.attr(constants::DW_AT_name)?.map(|a| a.value()) {
if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) {
basename = Some(PathBuf::from(name.to_string()?));
}
}
if let Some(name) = entry.attr(constants::DW_AT_comp_dir)?.map(|a| a.value()) {
if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) {
dirname = Some(PathBuf::from(name.to_string()?));
}
}
if let (Some(dirname), Some(basename)) = (dirname, basename) {
inputs.push(dirname.join(basename));
}
}
}
}
let references = inputs.into_iter().map(|input| self.ingest_dependency_local(input)).collect::<Result<Vec<_>, _>>()?.into_iter().filter_map(|x| x).collect();
FileFormat::ELF {
references,
}
},
_ => FileFormat::Other,
})
}
fn ingest_dependency_local(&mut self, filename: PathBuf) -> anyhow::Result<Option<usize>> {
// TODO: this needs to try suffixes of the filename against the filepath table to see if it
// was moved between compilation and ingestion
let metadata = match fs::metadata(&filename) {
Ok(m) => m,
Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(None),
Err(e) => return Err(e)?,
};
if !metadata.is_file() {
return Ok(None);
}
let fp = fs::File::open(&filename)?;
Ok(Some(self.ingest_output(filename, fp)?))
}
}
fn load_file_section<'input, 'arena, Endian: gimli::Endianity, R: object::ReadRef<'input>>(
@ -237,83 +272,3 @@ fn read_exact_or_end(fp: &mut impl io::Read, buf: &mut [u8]) -> anyhow::Result<u
}
Ok(read_so_far)
}
pub fn parse_format(fp: &mut (impl io::Read + io::Seek)) -> anyhow::Result<(FileFormat, BTreeSet<(PathBuf, Sha256Hash)>)> {
fp.seek(io::SeekFrom::Start(0))?;
let mut buf = [0; 4];
let count = read_exact_or_end(fp, &mut buf)?;
let buf = &buf[..count];
Ok(match buf {
[0x7f, b'E', b'L', b'F', ..] => {
let read_cache = ReadCache::new(fp);
let elf = object::File::parse(&read_cache)?;
let endian = if elf.is_little_endian() {
gimli::RunTimeEndian::Little
} else {
gimli::RunTimeEndian::Big
};
let arena_data = Arena::new();
let mut load_section = |id: gimli::SectionId| -> Result<_, _> {
load_file_section(id, &elf, endian, &arena_data)
};
let dwarf = gimli::Dwarf::load(&mut load_section).unwrap();
let mut units = dwarf.units();
let mut inputs = vec![];
while let Ok(Some(unit)) = units.next() {
let abbrev = dwarf.abbreviations(&unit)?;
let mut entries = unit.entries(&abbrev);
while let Some((_, entry)) = entries.next_dfs()? {
if entry.tag() == DW_TAG_compile_unit {
let mut basename = None;
let mut dirname = None;
if let Some(name) =
entry.attr(constants::DW_AT_name)?.map(|a| a.value())
{
if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) {
basename = Some(PathBuf::from(name.to_string()?));
}
}
if let Some(name) =
entry.attr(constants::DW_AT_comp_dir)?.map(|a| a.value())
{
if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) {
dirname = Some(PathBuf::from(name.to_string()?));
}
}
if let (Some(dirname), Some(basename)) = (dirname, basename) {
inputs.push(dirname.join(basename));
}
}
}
}
let references = inputs
.into_iter()
.map(|filename| -> anyhow::Result<_> {
// TODO: this needs to try suffixes of the filename against the filepath table to see if it
// was moved between compilation and ingestion. but how...
let metadata = match fs::metadata(&filename) {
Ok(m) => m,
Err(_) => return Ok(None),
};
if !metadata.is_file() {
return Ok(None);
}
let mut fp = fs::File::open(&filename)?;
let mut h = Sha256::new();
log::debug!("Hashing {}", filename.to_string_lossy());
io::copy(&mut fp, &mut h)?;
let result = Ok(Some((filename, h.finalize().into())));
result
})
.collect::<Result<Vec<_>, _>>()?
.into_iter()
.filter_map(|x| x)
.collect();
(FileFormat::ELF, references)
}
_ => (FileFormat::Other, BTreeSet::new()),
})
}

View File

@ -1,6 +1,5 @@
mod filestore;
mod reports;
mod tracer;
mod filestore;
use std::path::PathBuf;
@ -14,100 +13,39 @@ struct Cli {
#[derive(Subcommand, Debug, Clone)]
enum Subcommands {
/// Run a command and record its execution
Run {
/// Any number of filepaths to treat as inputs. They will be hashed and accesses to
/// equivalent files will be treated specially.
#[arg(short, long)]
file_scope: Vec<PathBuf>,
input: Vec<PathBuf>,
/// The filepath to dump the json report to. will dump to stdout if unspecified.
#[arg(short, long)]
output: Option<PathBuf>,
/// Set this to
#[arg(short, long)]
mute: bool,
/// The command to run. Have fun!
cmd: Vec<String>,
},
/// Query from the report how in-scope items were used
QueryParameters {
/// The filepath of the report ot open. will read from stdin if unspecified.
input: Option<PathBuf>,
/// The filepath to dump the json report to. will dump to stdout if unspecified.
output: Option<PathBuf>,
},
InternalLaunch {
machine: i32,
connect: String,
cmd: Vec<String>
},
}
}
fn main() {
env_logger::init();
let cli = Cli::parse();
match cli.cmd {
Subcommands::Run {
file_scope,
output,
cmd,
mute,
} => {
Subcommands::Run { input, output, cmd } => {
let fp: Box<dyn std::io::Write> = if let Some(output) = &output {
Box::new(
std::fs::File::options()
.write(true)
.create(true)
.open(output)
.unwrap(),
)
Box::new(std::fs::File::options().write(true).create(true).open(output).unwrap())
} else {
Box::new(std::io::stdout())
};
let t = tracer::server::Tracer::run(file_scope, cmd, mute).unwrap();
let mut t = tracer::Tracer::new(input).unwrap();
t.start_root_process(cmd).unwrap();
if output.is_none() {
serde_json::to_writer_pretty(fp, &t)
serde_json::to_writer_pretty(fp, &t.report).unwrap();
} else {
serde_json::to_writer(fp, &t)
serde_json::to_writer(fp, &t.report).unwrap();
}
.expect("Could not serialize json trace report");
}
Subcommands::QueryParameters { input, output } => {
let fp: Box<dyn std::io::Write> = if let Some(output) = &output {
Box::new(
std::fs::File::options()
.write(true)
.create(true)
.open(output)
.unwrap(),
)
} else {
Box::new(std::io::stdout())
};
let in_report: tracer::types::TracerReport = if let Some(input) = &input {
serde_json::from_reader(std::fs::File::open(input).unwrap())
} else {
serde_json::from_reader(std::io::stdin())
}
.expect("Could not deserialize json trace report");
let out_report = reports::parameters::run(&in_report).unwrap();
if output.is_none() {
serde_json::to_writer_pretty(fp, &out_report)
} else {
serde_json::to_writer(fp, &out_report)
}
.expect("Could not serialize json parameter report");
}
Subcommands::InternalLaunch { machine, connect, cmd } => {
tracer::client::TracerClient::run(machine, connect, cmd).expect("Tracing failed");
}
}
}

View File

@ -1 +0,0 @@
pub mod parameters;

View File

@ -1,34 +0,0 @@
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
use crate::tracer::types::TracerReport;
#[derive(Serialize, Deserialize)]
pub struct ParametersReport {
pub files: Vec<ParametersReportFile>,
}
#[derive(Serialize, Deserialize)]
pub struct ParametersReportFile {
source_name: PathBuf,
used_names: Vec<PathBuf>,
}
pub fn run(report: &TracerReport) -> anyhow::Result<ParametersReport> {
Ok(ParametersReport {
files: report
.files
.files
.iter()
.filter_map(|rf| {
(!rf.input_names.is_empty() && !rf.output_names.is_empty()).then(|| {
ParametersReportFile {
source_name: rf.input_names.iter().next().cloned().unwrap(),
used_names: rf.output_names.iter().cloned().collect(),
}
})
})
.collect(),
})
}

883
src/tracer.rs Normal file
View File

@ -0,0 +1,883 @@
use std::{
collections::HashMap,
ffi::CString,
ffi::OsString,
os::unix::prelude::OsStringExt,
path::PathBuf,
process::exit,
time::{Duration, Instant},
fmt::{Display, Formatter},
};
use core::fmt;
use nix::{
errno::Errno,
libc::{pid_t, raise, tcsetpgrp, AT_EMPTY_PATH, SIGSTOP, STDIN_FILENO, AT_FDCWD},
sys::{
ptrace::{self, traceme, AddressType},
signal::Signal,
wait::{waitpid, WaitPidFlag, WaitStatus},
},
unistd::{execvp, getpid, setpgid, ForkResult},
};
use serde::{Deserialize, Serialize};
use crate::filestore::FileStore;
#[derive(Copy, Clone, Serialize, Deserialize, Eq, PartialEq, Debug, Hash)]
pub struct Pid(i32);
impl From<nix::unistd::Pid> for Pid {
fn from(value: nix::unistd::Pid) -> Self {
Self(value.as_raw())
}
}
impl Into<nix::unistd::Pid> for Pid {
fn into(self) -> nix::unistd::Pid {
nix::unistd::Pid::from_raw(self.0)
}
}
impl Display for Pid {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
pub fn read_generic_string<TString>(
pid: Pid,
address: AddressType,
ctor: impl Fn(Vec<u8>) -> TString,
) -> anyhow::Result<TString> {
let mut buf = Vec::new();
let mut address = address;
const WORD_SIZE: usize = 8; // FIXME
loop {
let word = match ptrace::read(pid.into(), address) {
Err(e) => {
log::warn!("Cannot read tracee {pid} memory {address:?}: {e}");
return Ok(ctor(buf));
}
Ok(word) => word,
};
let word_bytes = word.to_ne_bytes();
for &byte in word_bytes.iter() {
if byte == 0 {
return Ok(ctor(buf));
}
buf.push(byte);
}
address = unsafe { address.add(WORD_SIZE) };
}
}
#[allow(unused)]
pub fn read_cstring(pid: Pid, address: AddressType) -> anyhow::Result<CString> {
read_generic_string(pid, address, |x| CString::new(x).unwrap())
}
pub fn read_pathbuf(pid: Pid, address: AddressType) -> anyhow::Result<PathBuf> {
read_generic_string(pid, address, |x| PathBuf::from(OsString::from_vec(x)))
}
pub fn read_string(pid: Pid, address: AddressType) -> anyhow::Result<String> {
// Waiting on https://github.com/rust-lang/libs-team/issues/116
read_generic_string(pid, address, |x| String::from_utf8_lossy(&x).to_string())
}
pub fn read_null_ended_array<TItem>(
pid: Pid,
mut address: AddressType,
reader: impl Fn(Pid, AddressType) -> anyhow::Result<TItem>,
) -> anyhow::Result<Vec<TItem>> {
let mut res = Vec::new();
const WORD_SIZE: usize = 8; // FIXME
loop {
let ptr = match ptrace::read(pid.into(), address) {
Err(e) => {
log::warn!("Cannot read tracee {pid} memory {address:?}: {e}");
return Ok(res);
}
Ok(ptr) => ptr,
};
if ptr == 0 {
return Ok(res);
} else {
res.push(reader(pid, ptr as AddressType)?);
}
address = unsafe { address.add(WORD_SIZE) };
}
}
#[allow(unused)]
pub fn read_cstring_array(pid: Pid, address: AddressType) -> anyhow::Result<Vec<CString>> {
read_null_ended_array(pid, address, read_cstring)
}
#[allow(unused)]
pub fn read_string_array(pid: Pid, address: AddressType) -> anyhow::Result<Vec<String>> {
read_null_ended_array(pid, address, read_string)
}
macro_rules! syscall_no_from_regs {
($regs:ident) => {
$regs.orig_rax as i64
};
}
macro_rules! syscall_res_from_regs {
($regs:ident) => {
$regs.rax as i64
};
}
macro_rules! syscall_arg {
($regs:ident, 0) => {
$regs.rdi
};
($regs:ident, 1) => {
$regs.rsi
};
($regs:ident, 2) => {
$regs.rdx
};
($regs:ident, 3) => {
$regs.r10
};
($regs:ident, 4) => {
$regs.r8
};
($regs:ident, 5) => {
$regs.r9
};
}
pub fn read_argv(pid: Pid) -> anyhow::Result<Vec<CString>> {
let filename = format!("/proc/{pid}/cmdline");
let buf = std::fs::read(filename)?;
Ok(buf
.split(|&c| c == 0)
.map(CString::new)
.collect::<Result<Vec<_>, _>>()?)
}
pub fn read_comm(pid: Pid) -> anyhow::Result<String> {
let filename = format!("/proc/{pid}/comm");
let mut buf = std::fs::read(filename)?;
buf.pop(); // remove trailing newline
Ok(String::from_utf8(buf)?)
}
pub fn read_cwd(pid: Pid) -> std::io::Result<PathBuf> {
let filename = format!("/proc/{pid}/cwd");
let buf = std::fs::read_link(filename)?;
Ok(buf)
}
pub fn read_fd(pid: Pid, fd: i32) -> std::io::Result<PathBuf> {
if fd == AT_FDCWD {
return read_cwd(pid);
}
let filename = format!("/proc/{pid}/fd/{fd}");
std::fs::read_link(filename)
}
/*
#[derive(Debug)]
pub enum Interpreter {
None,
Shebang(String),
ExecutableUnaccessible,
Error(io::Error),
}
impl Display for Interpreter {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Interpreter::None => write!(f, "none"),
Interpreter::Shebang(s) => write!(f, "{:?}", s),
Interpreter::ExecutableUnaccessible => {
write!(f, "executable unaccessible")
}
Interpreter::Error(e) => write!(f, "(err: {e})"),
}
}
}
pub fn read_interpreter_recursive(exe: impl AsRef<Path>) -> Vec<Interpreter> {
let mut exe = Cow::Borrowed(exe.as_ref());
let mut interpreters = Vec::new();
loop {
match read_interpreter(exe.as_ref()) {
Interpreter::Shebang(shebang) => {
exe = Cow::Owned(PathBuf::from(
shebang.split_ascii_whitespace().next().unwrap_or(""),
));
interpreters.push(Interpreter::Shebang(shebang));
}
Interpreter::None => break,
err => {
interpreters.push(err);
break;
}
};
}
interpreters
}
pub fn read_interpreter(exe: &Path) -> Interpreter {
fn err_to_interpreter(e: io::Error) -> Interpreter {
if e.kind() == io::ErrorKind::PermissionDenied || e.kind() == io::ErrorKind::NotFound {
Interpreter::ExecutableUnaccessible
} else {
Interpreter::Error(e)
}
}
let file = match std::fs::File::open(exe) {
Ok(file) => file,
Err(e) => return err_to_interpreter(e),
};
let mut reader = BufReader::new(file);
// First, check if it's a shebang script
let mut buf = [0u8; 2];
if let Err(e) = reader.read_exact(&mut buf) {
return Interpreter::Error(e);
};
if &buf != b"#!" {
return Interpreter::None;
}
// Read the rest of the line
let mut buf = Vec::new();
if let Err(e) = reader.read_until(b'\n', &mut buf) {
return Interpreter::Error(e);
};
// Get trimed shebang line [start, end) indices
// If the shebang line is empty, we don't care
let start = buf
.iter()
.position(|&c| !c.is_ascii_whitespace())
.unwrap_or(0);
let end = buf
.iter()
.rposition(|&c| !c.is_ascii_whitespace())
.map(|x| x + 1)
.unwrap_or(buf.len());
let shebang = String::from_utf8_lossy(&buf[start..end]);
Interpreter::Shebang(shebang.into_owned())
}
*/
#[derive(Default)]
pub struct ProcessStateStore {
processes: HashMap<Pid, Vec<ProcessState>>,
}
#[derive(Debug)]
pub struct ProcessState {
pub pid: Pid,
pub ppid: Option<Pid>,
pub status: ProcessStatus,
pub start_time: u64,
pub argv: Vec<CString>,
pub comm: String,
pub presyscall: bool,
pub is_exec_successful: bool,
pub syscall: i64,
pub pending_syscall_event: Vec<Event>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ProcessStatus {
SigstopReceived,
PtraceForkEventReceived,
Running,
Exited(i32),
}
#[derive(Debug)]
pub struct ExecData {
pub filename: PathBuf,
pub argv: Vec<String>,
pub envp: Vec<String>,
pub cwd: PathBuf,
//pub interpreters: Vec<Interpreter>,
}
impl ProcessStateStore {
pub fn insert(&mut self, state: ProcessState) {
self.processes.entry(state.pid).or_default().push(state);
}
pub fn get_current_mut(&mut self, pid: Pid) -> Option<&mut ProcessState> {
// The last process in the vector is the current process
// println!("Getting {pid}");
self.processes.get_mut(&pid)?.last_mut()
}
}
impl ProcessState {
pub fn new(pid: Pid, start_time: u64) -> anyhow::Result<Self> {
Ok(Self {
pid,
ppid: None,
status: ProcessStatus::Running,
comm: read_comm(pid)?,
argv: read_argv(pid)?,
start_time,
presyscall: true,
is_exec_successful: false,
syscall: -1,
pending_syscall_event: vec![],
})
}
}
#[derive(Debug, Serialize, Deserialize)]
pub enum Event {
Fork { child: Pid },
Exec { prog: PathBuf },
Exit { code: i32 },
FdOpen { fd: i32, source: FdSource },
FdDup { oldfd: i32, newfd: i32 },
FdClose { fd: i32 },
FdRead { fd: i32 },
FdWrite { fd: i32 },
}
#[derive(Debug, Serialize, Deserialize)]
pub enum FdSource {
File { path: PathBuf },
Tty,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Identifier {
machine: i32,
pid: Pid,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct LogEntry {
ident: Identifier,
event: Event,
timestamp: Duration,
}
impl Display for LogEntry {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(
f,
"[{}.{:03} m{}p{}] {}",
self.timestamp.as_secs(),
self.timestamp.as_millis() % 1000,
self.ident.machine,
self.ident.pid,
self.event
)
}
}
impl Display for FdSource {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
FdSource::File { path } => write!(f, "file {}", path.to_string_lossy()),
FdSource::Tty => write!(f, "the terminal"),
}
}
}
impl Display for Event {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Event::Fork { child } => write!(f, "fork {child}"),
Event::Exec { prog } => write!(f, "exec {}", prog.to_string_lossy()),
Event::Exit { code } => write!(f, "exit with {code}"),
Event::FdOpen { fd, source } => write!(f, "open fd {fd} from {source}"),
Event::FdDup { oldfd, newfd } => write!(f, "dup fd {oldfd} to {newfd}"),
Event::FdClose { fd } => write!(f, "close fd {fd}"),
Event::FdRead { fd } => write!(f, "read from fd {fd}"),
Event::FdWrite { fd } => write!(f, "write to fd {fd}"),
}
}
}
pub struct Tracer {
pub store: ProcessStateStore,
pub start_time: Instant,
pub report: TracerReport,
}
#[derive(Serialize, Deserialize)]
pub struct TracerReport {
pub log: Vec<LogEntry>,
pub files: FileStore,
}
fn ptrace_syscall(pid: Pid, sig: Option<Signal>) -> Result<(), Errno> {
match ptrace::syscall(pid.into(), sig) {
Err(Errno::ESRCH) => {
log::info!("ptrace syscall failed: {pid}, ESRCH, child probably gone!");
Ok(())
}
other => other,
}
}
impl Tracer {
pub fn log(&mut self, ident: Identifier, event: Event) {
self.report.log.push(LogEntry {
ident,
event,
timestamp: Instant::now().duration_since(self.start_time),
});
}
pub fn log_root(&mut self, pid: Pid, event: Event) {
self.log(Identifier { pid, machine: 0 }, event);
}
pub fn new(input: Vec<PathBuf>) -> anyhow::Result<Self> {
let files = FileStore::new(input)?;
Ok(Self {
store: ProcessStateStore::default(),
start_time: Instant::now(),
report: TracerReport {
log: vec![],
files,
},
})
}
fn drain_syscall_events(&mut self, pid: Pid, mut filter: Box<dyn FnMut(&mut Event)>) {
let p = self.store.get_current_mut(pid).unwrap();
for mut event in p.pending_syscall_event.drain(..) {
(filter)(&mut event);
self.report.log.push(LogEntry {
ident: Identifier { pid, machine: 0 },
event,
timestamp: Instant::now().duration_since(self.start_time),
});
}
}
pub fn start_root_process(&mut self, args: Vec<String>) -> anyhow::Result<()> {
log::trace!("start_root_process: {:?}", args);
if let ForkResult::Parent { child: root_child } = unsafe { nix::unistd::fork()? } {
waitpid(root_child, Some(WaitPidFlag::WSTOPPED))?; // wait for child to stop
let root_child = root_child.into();
log::trace!("child stopped");
let mut root_child_state = ProcessState::new(root_child, 0)?;
root_child_state.ppid = Some(getpid().into());
self.store.insert(root_child_state);
// Set foreground process group of the terminal
if -1 == unsafe { tcsetpgrp(STDIN_FILENO, root_child.0) } {
return Err(Errno::last().into());
}
// restart child
log::trace!("resuming child");
let ptrace_opts = {
use nix::sys::ptrace::Options;
Options::PTRACE_O_TRACEEXEC
| Options::PTRACE_O_TRACEEXIT
| Options::PTRACE_O_EXITKILL
| Options::PTRACE_O_TRACESYSGOOD
| Options::PTRACE_O_TRACEFORK
| Options::PTRACE_O_TRACECLONE
| Options::PTRACE_O_TRACEVFORK
};
ptrace::setoptions(root_child.into(), ptrace_opts)?;
// restart child
self.seccomp_aware_cont(root_child)?;
loop {
let status = waitpid(None, Some(WaitPidFlag::__WALL))?;
// log::trace!("waitpid: {:?}", status);
match status {
WaitStatus::Stopped(pid, sig) => {
let pid = pid.into();
log::trace!("stopped: {pid}, sig {:?}", sig);
match sig {
Signal::SIGSTOP => {
log::trace!("sigstop event, child: {pid}");
if let Some(state) = self.store.get_current_mut(pid) {
if state.status == ProcessStatus::PtraceForkEventReceived {
log::trace!("sigstop event received after ptrace fork event, pid: {pid}");
state.status = ProcessStatus::Running;
self.seccomp_aware_cont(pid)?;
} else if pid != root_child {
log::error!("Unexpected SIGSTOP: {state:?}")
}
} else {
log::trace!("sigstop event received before ptrace fork event, pid: {pid}");
let mut state = ProcessState::new(pid, 0)?;
state.status = ProcessStatus::SigstopReceived;
self.store.insert(state);
}
// https://stackoverflow.com/questions/29997244/occasionally-missing-ptrace-event-vfork-when-running-ptrace
// DO NOT send PTRACE_SYSCALL until we receive the PTRACE_EVENT_FORK, etc.
}
Signal::SIGCHLD => {
// From lurk:
//
// The SIGCHLD signal is sent to a process when a child process terminates, interrupted, or resumes after being interrupted
// This means, that if our tracee forked and said fork exits before the parent, the parent will get stopped.
// Therefor issue a PTRACE_SYSCALL request to the parent to continue execution.
// This is also important if we trace without the following forks option.
self.seccomp_aware_cont_with_signal(pid, Signal::SIGCHLD)?;
}
_ => {
// Just deliver the signal to tracee
self.seccomp_aware_cont_with_signal(pid, sig)?;
}
}
}
WaitStatus::Exited(pid, code) => {
let pid = pid.into();
log::trace!("exited: pid {}, code {:?}", pid, code);
self.log_root(pid, Event::Exit { code });
self.store.get_current_mut(pid).unwrap().status =
ProcessStatus::Exited(code);
if pid == root_child {
break;
}
}
WaitStatus::PtraceEvent(pid, sig, evt) => {
log::trace!("ptrace event: {:?} {:?}", sig, evt);
match evt {
nix::libc::PTRACE_EVENT_FORK
| nix::libc::PTRACE_EVENT_VFORK
| nix::libc::PTRACE_EVENT_CLONE => {
let new_child = Pid(ptrace::getevent(pid.into())? as pid_t);
log::trace!(
"ptrace fork event, evt {evt}, pid: {pid}, child: {new_child}"
);
self.log_root(pid.into(), Event::Fork { child: new_child });
if let Some(state) = self.store.get_current_mut(new_child) {
if state.status == ProcessStatus::SigstopReceived {
log::trace!("ptrace fork event received after sigstop, pid: {pid}, child: {new_child}");
state.status = ProcessStatus::Running;
state.ppid = Some(pid.into());
self.seccomp_aware_cont(new_child)?;
} else if new_child != root_child {
log::error!("Unexpected fork event: {state:?}")
}
} else {
log::trace!("ptrace fork event received before sigstop, pid: {pid}, child: {new_child}");
let mut state = ProcessState::new(new_child, 0)?;
state.status = ProcessStatus::PtraceForkEventReceived;
state.ppid = Some(pid.into());
self.store.insert(state);
}
// Resume parent
self.seccomp_aware_cont(pid.into())?;
}
nix::libc::PTRACE_EVENT_EXEC => {
log::trace!("exec event");
let p = self.store.get_current_mut(pid.into()).unwrap();
assert!(!p.presyscall);
// After execve or execveat, in syscall exit event,
// the registers might be clobbered(e.g. aarch64).
// So we need to determine whether exec is successful here.
// PTRACE_EVENT_EXEC only happens for successful exec.
p.is_exec_successful = true;
let path = p.pending_syscall_event.iter().find_map(|e| match e { Event::Exec { prog, .. } => Some(prog.clone()), _ => None }).unwrap();
self.report.files.ingest_output_local(path)?;
self.drain_syscall_events(pid.into(), Box::new(|_| {}));
// Don't use seccomp_aware_cont here because that will skip the next syscall exit stop
self.syscall_enter_cont(pid.into())?;
}
nix::libc::PTRACE_EVENT_EXIT => {
log::trace!("exit event");
self.seccomp_aware_cont(pid.into())?;
}
nix::libc::PTRACE_EVENT_SECCOMP => {
log::trace!("seccomp event");
self.on_syscall_enter(pid.into())?;
}
_ => {
log::trace!("other event");
self.seccomp_aware_cont(pid.into())?;
}
}
}
WaitStatus::Signaled(pid, sig, _) => {
let pid: Pid = pid.into();
log::debug!("signaled: {pid}, {:?}", sig);
if pid == root_child {
exit(128 + (sig as i32))
}
}
WaitStatus::PtraceSyscall(pid) => {
let pid = pid.into();
let presyscall = self.store.get_current_mut(pid).unwrap().presyscall;
if presyscall {
self.on_syscall_enter(pid)?;
} else {
self.on_syscall_exit(pid)?;
}
}
_ => {}
}
}
} else {
let me = getpid();
setpgid(me, me)?;
traceme()?;
if 0 != unsafe { raise(SIGSTOP) } {
log::error!("raise failed!");
exit(-1);
}
let args = args
.into_iter()
.map(CString::new)
.collect::<Result<Vec<CString>, _>>()?;
execvp(&args[0], &args)?;
}
Ok(())
}
fn on_syscall_enter(&mut self, pid: Pid) -> anyhow::Result<()> {
let p = self.store.get_current_mut(pid).unwrap();
p.presyscall = !p.presyscall;
// SYSCALL ENTRY
let regs = match ptrace::getregs(pid.into()) {
Ok(regs) => regs,
Err(Errno::ESRCH) => {
log::info!("ptrace getregs failed: {pid}, ESRCH, child probably gone!");
return Ok(());
}
e => e?,
};
let syscallno = syscall_no_from_regs!(regs);
p.syscall = syscallno;
// log::trace!("pre syscall: {syscallno}");
match syscallno {
nix::libc::SYS_execveat => {
log::trace!("pre execveat");
// int execveat(int dirfd, const char *pathname,
// char *const _Nullable argv[],
// char *const _Nullable envp[],
// int flags);
let dirfd = syscall_arg!(regs, 0) as i32;
let pathname = read_string(pid, syscall_arg!(regs, 1) as AddressType)?;
//let argv = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?;
//let envp = read_string_array(pid, syscall_arg!(regs, 3) as AddressType)?;
let flags = syscall_arg!(regs, 4) as i32;
let filename = resolve_filename_at_fd(pid, pathname, dirfd, flags)?;
//let interpreters = read_interpreter_recursive(&filename);
p.pending_syscall_event.push(Event::Exec { prog: filename });
}
nix::libc::SYS_execve => {
log::trace!("pre execve");
let filename = read_pathbuf(pid, syscall_arg!(regs, 0) as AddressType)?;
//let argv = read_string_array(pid, syscall_arg!(regs, 1) as AddressType)?;
//let envp = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?;
//let interpreters = read_interpreter_recursive(&filename);
p.pending_syscall_event.push(Event::Exec { prog: filename });
}
nix::libc::SYS_open => {
let path = read_pathbuf(pid, syscall_arg!(regs, 0) as AddressType)?;
p.pending_syscall_event.push(Event::FdOpen {
source: FdSource::File { path },
fd: -1,
});
}
nix::libc::SYS_openat => {
let dirfd = syscall_arg!(regs, 0) as i32;
let pathname = read_string(pid, syscall_arg!(regs, 1) as AddressType)?;
let flags = syscall_arg!(regs, 2) as i32;
let path = resolve_filename_at_fd(pid, pathname, dirfd, flags)?;
p.pending_syscall_event.push(Event::FdOpen {
source: FdSource::File { path },
fd: 0,
});
}
nix::libc::SYS_read
| nix::libc::SYS_readv
| nix::libc::SYS_preadv
| nix::libc::SYS_preadv2 => {
let fd = syscall_arg!(regs, 0) as i32;
p.pending_syscall_event.push(Event::FdRead { fd });
}
nix::libc::SYS_write
| nix::libc::SYS_writev
| nix::libc::SYS_pwritev
| nix::libc::SYS_pwritev2 => {
let fd = syscall_arg!(regs, 0) as i32;
p.pending_syscall_event.push(Event::FdWrite { fd });
}
nix::libc::SYS_dup | nix::libc::SYS_dup2 | nix::libc::SYS_dup3 => {
let oldfd = syscall_arg!(regs, 0) as i32;
p.pending_syscall_event
.push(Event::FdDup { oldfd, newfd: -1 });
}
nix::libc::SYS_fcntl => {
let fd = syscall_arg!(regs, 0) as i32;
let cmd = syscall_arg!(regs, 1) as i32;
match cmd {
nix::libc::F_DUPFD => p.pending_syscall_event.push(Event::FdDup {
oldfd: fd,
newfd: -1,
}),
_ => {}
}
}
nix::libc::SYS_close => {
let fd = syscall_arg!(regs, 0) as i32;
p.pending_syscall_event.push(Event::FdClose { fd });
}
_ => {}
}
self.syscall_enter_cont(pid)?;
Ok(())
}
fn on_syscall_exit(&mut self, pid: Pid) -> anyhow::Result<()> {
// SYSCALL EXIT
// log::trace!("post syscall {}", p.syscall);
let p = self.store.get_current_mut(pid).unwrap();
p.presyscall = !p.presyscall;
let regs = match ptrace::getregs(pid.into()) {
Ok(regs) => regs,
Err(Errno::ESRCH) => {
log::info!("ptrace getregs failed: {pid}, ESRCH, child probably gone!");
return Ok(());
}
e => e?,
};
let result = syscall_res_from_regs!(regs);
let filter: Option<Box<dyn FnMut(&mut Event)>> = match p.syscall {
nix::libc::SYS_execve => {
log::trace!("post execve");
// SAFETY: p.preexecve is false, so p.exec_data is Some
p.is_exec_successful = false;
// update comm
p.comm = read_comm(pid)?;
None
}
nix::libc::SYS_execveat => {
log::trace!("post execveat");
p.is_exec_successful = false;
// update comm
p.comm = read_comm(pid)?;
None
}
nix::libc::SYS_open | nix::libc::SYS_openat => {
if result >= 0 {
for pending in p.pending_syscall_event.iter_mut() {
if let Event::FdOpen { source: FdSource::File { path }, .. } = pending {
self.report.files.ingest_output_local(path.clone())?;
}
}
Some(Box::new(move |event| match event {
Event::FdOpen {
fd: ref mut dest, ..
} => {
*dest = result as i32;
}
_ => {}
}))
} else {
None
}
}
nix::libc::SYS_dup | nix::libc::SYS_dup2 | nix::libc::SYS_dup3 => {
if result >= 0 {
Some(Box::new(move |event| match event {
Event::FdDup {
newfd: ref mut dest,
..
} => {
*dest = result as i32;
}
_ => {}
}))
} else {
None
}
}
nix::libc::SYS_fcntl => {
if result >= 0 {
Some(Box::new(move |event| match event {
Event::FdDup {
newfd: ref mut dest,
..
} => {
*dest = result as i32;
}
_ => {}
}))
} else {
None
}
}
_ => {
if result >= 0 {
Some(Box::new(|_| {}))
} else {
None
}
}
};
if let Some(filter) = filter {
self.drain_syscall_events(pid, filter);
} else {
p.pending_syscall_event.clear();
}
self.seccomp_aware_cont(pid)?;
Ok(())
}
fn syscall_enter_cont(&self, pid: Pid) -> Result<(), Errno> {
ptrace_syscall(pid, None)
}
/// When seccomp-bpf is enabled, we use ptrace::cont instead of ptrace::syscall to improve performance.
/// Then the next syscall-entry stop is skipped and the seccomp stop is used as the syscall entry stop.
fn seccomp_aware_cont(&self, pid: Pid) -> Result<(), Errno> {
ptrace_syscall(pid, None)
}
fn seccomp_aware_cont_with_signal(&self, pid: Pid, sig: Signal) -> Result<(), Errno> {
ptrace_syscall(pid, Some(sig))
}
}
fn resolve_filename_at_fd(
pid: Pid,
pathname: String,
dirfd: i32,
flags: i32,
) -> anyhow::Result<PathBuf> {
let pathname_is_empty = pathname.is_empty();
let pathname = PathBuf::from(pathname);
Ok(
match (
pathname.is_absolute(),
pathname_is_empty && ((flags & AT_EMPTY_PATH) != 0),
) {
(true, _) => {
// If pathname is absolute, then dirfd is ignored.
pathname
}
(false, true) => {
// If pathname is an empty string and the AT_EMPTY_PATH flag is specified, then the file descriptor dirfd
// specifies the file to be executed
read_fd(pid, dirfd)?
}
(false, false) => {
// pathname is relative to dirfd
let dir = read_fd(pid, dirfd)?;
dir.join(pathname)
}
},
)
}

View File

@ -1,930 +0,0 @@
use std::{
collections::{BTreeSet, HashMap}, ffi::CString, ffi::OsString, io::Write, net::TcpStream, os::unix::prelude::OsStringExt, path::PathBuf, process::exit, time::Instant
};
use nix::{
errno::Errno,
libc::{pid_t, raise, tcsetpgrp, AT_EMPTY_PATH, AT_FDCWD, SIGSTOP, STDIN_FILENO, user_regs_struct},
sys::{
ptrace::{self, traceme, AddressType},
signal::Signal,
wait::{waitpid, WaitPidFlag, WaitStatus},
},
unistd::{execvp, getpid, setpgid, ForkResult},
};
use serde_json::de::IoRead;
use sha2::{Sha256, Digest};
use crate::filestore::{parse_format, Sha256Hash};
use super::{types::*, docker::instrument_docker_run_execve};
const WORD_SIZE: usize = 8; // FIXME
pub fn read_generic_string<TString>(
pid: Pid,
address: AddressType,
ctor: impl Fn(Vec<u8>) -> TString,
) -> anyhow::Result<TString> {
let mut buf = Vec::new();
let mut address = address;
loop {
let word = match ptrace::read(pid.into(), address) {
Err(e) => {
log::warn!("Cannot read tracee {pid} memory {address:?}: {e}");
return Ok(ctor(buf));
}
Ok(word) => word,
};
let word_bytes = word.to_ne_bytes();
for &byte in word_bytes.iter() {
if byte == 0 {
return Ok(ctor(buf));
}
buf.push(byte);
}
address = unsafe { address.add(WORD_SIZE) };
}
}
pub fn write_bytes(pid: Pid, mut address: AddressType, data: &[u8]) -> anyhow::Result<()> {
assert_eq!(address as usize % WORD_SIZE, 0);
for chunk in data.chunks(WORD_SIZE) {
let chunk: Vec<_> = chunk.into_iter().copied().chain(std::iter::repeat(0).take(WORD_SIZE - chunk.len())).collect();
let word = i64::from_ne_bytes(chunk.try_into().unwrap());
ptrace::write(pid.into(), address, word)?;
address = address.wrapping_byte_add(WORD_SIZE);
}
Ok(())
}
#[allow(unused)]
pub fn read_cstring(pid: Pid, address: AddressType) -> anyhow::Result<CString> {
read_generic_string(pid, address, |x| CString::new(x).unwrap())
}
pub fn read_pathbuf(pid: Pid, address: AddressType) -> anyhow::Result<PathBuf> {
read_generic_string(pid, address, |x| PathBuf::from(OsString::from_vec(x)))
}
pub fn read_string(pid: Pid, address: AddressType) -> anyhow::Result<String> {
// Waiting on https://github.com/rust-lang/libs-team/issues/116
read_generic_string(pid, address, |x| String::from_utf8_lossy(&x).to_string())
}
pub fn read_null_ended_array<TItem>(
pid: Pid,
mut address: AddressType,
reader: impl Fn(Pid, AddressType) -> anyhow::Result<TItem>,
) -> anyhow::Result<Vec<TItem>> {
let mut res = Vec::new();
loop {
let ptr = match ptrace::read(pid.into(), address) {
Err(e) => {
log::warn!("Cannot read tracee {pid} memory {address:?}: {e}");
return Ok(res);
}
Ok(ptr) => ptr,
};
if ptr == 0 {
return Ok(res);
} else {
res.push(reader(pid, ptr as AddressType)?);
}
address = unsafe { address.add(WORD_SIZE) };
}
}
#[allow(unused)]
pub fn read_cstring_array(pid: Pid, address: AddressType) -> anyhow::Result<Vec<CString>> {
read_null_ended_array(pid, address, read_cstring)
}
#[allow(unused)]
pub fn read_string_array(pid: Pid, address: AddressType) -> anyhow::Result<Vec<String>> {
read_null_ended_array(pid, address, read_string)
}
macro_rules! syscall_no_from_regs {
($regs:ident) => {
$regs.orig_rax as i64
};
}
macro_rules! syscall_res_from_regs {
($regs:ident) => {
$regs.rax as i64
};
}
macro_rules! stack_ptr_from_regs {
($regs:ident) => {
$regs.rsp as i64
};
}
fn syscall_arg(regs: &user_regs_struct, idx: usize) -> u64 {
match idx {
0 => regs.rdi,
1 => regs.rsi,
2 => regs.rdx,
3 => regs.r10,
4 => regs.r8,
5 => regs.r9,
_ => panic!("Bad syscall argument index"),
}
}
fn set_syscall_arg(regs: &mut user_regs_struct, idx: usize, value: u64) {
match idx {
0 => regs.rdi = value,
1 => regs.rsi = value,
2 => regs.rdx = value,
3 => regs.r10 = value,
4 => regs.r8 = value,
5 => regs.r9 = value,
_ => panic!("Bad syscall argument index"),
}
}
pub fn read_argv(pid: Pid) -> anyhow::Result<Vec<CString>> {
let filename = format!("/proc/{pid}/cmdline");
let buf = std::fs::read(filename)?;
Ok(buf
.split(|&c| c == 0)
.map(CString::new)
.collect::<Result<Vec<_>, _>>()?)
}
pub fn read_comm(pid: Pid) -> anyhow::Result<String> {
let filename = format!("/proc/{pid}/comm");
let mut buf = std::fs::read(filename)?;
buf.pop(); // remove trailing newline
Ok(String::from_utf8(buf)?)
}
pub fn read_cwd(pid: Pid) -> std::io::Result<PathBuf> {
let filename = format!("/proc/{pid}/cwd");
let buf = std::fs::read_link(filename)?;
Ok(buf)
}
pub fn read_fd(pid: Pid, fd: i32) -> std::io::Result<PathBuf> {
if fd == AT_FDCWD {
return read_cwd(pid);
}
let filename = format!("/proc/{pid}/fd/{fd}");
std::fs::read_link(filename)
}
/*
#[derive(Debug)]
pub enum Interpreter {
None,
Shebang(String),
ExecutableUnaccessible,
Error(io::Error),
}
impl Display for Interpreter {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Interpreter::None => write!(f, "none"),
Interpreter::Shebang(s) => write!(f, "{:?}", s),
Interpreter::ExecutableUnaccessible => {
write!(f, "executable unaccessible")
}
Interpreter::Error(e) => write!(f, "(err: {e})"),
}
}
}
pub fn read_interpreter_recursive(exe: impl AsRef<Path>) -> Vec<Interpreter> {
let mut exe = Cow::Borrowed(exe.as_ref());
let mut interpreters = Vec::new();
loop {
match read_interpreter(exe.as_ref()) {
Interpreter::Shebang(shebang) => {
exe = Cow::Owned(PathBuf::from(
shebang.split_ascii_whitespace().next().unwrap_or(""),
));
interpreters.push(Interpreter::Shebang(shebang));
}
Interpreter::None => break,
err => {
interpreters.push(err);
break;
}
};
}
interpreters
}
pub fn read_interpreter(exe: &Path) -> Interpreter {
fn err_to_interpreter(e: io::Error) -> Interpreter {
if e.kind() == io::ErrorKind::PermissionDenied || e.kind() == io::ErrorKind::NotFound {
Interpreter::ExecutableUnaccessible
} else {
Interpreter::Error(e)
}
}
let file = match std::fs::File::open(exe) {
Ok(file) => file,
Err(e) => return err_to_interpreter(e),
};
let mut reader = BufReader::new(file);
// First, check if it's a shebang script
let mut buf = [0u8; 2];
if let Err(e) = reader.read_exact(&mut buf) {
return Interpreter::Error(e);
};
if &buf != b"#!" {
return Interpreter::None;
}
// Read the rest of the line
let mut buf = Vec::new();
if let Err(e) = reader.read_until(b'\n', &mut buf) {
return Interpreter::Error(e);
};
// Get trimed shebang line [start, end) indices
// If the shebang line is empty, we don't care
let start = buf
.iter()
.position(|&c| !c.is_ascii_whitespace())
.unwrap_or(0);
let end = buf
.iter()
.rposition(|&c| !c.is_ascii_whitespace())
.map(|x| x + 1)
.unwrap_or(buf.len());
let shebang = String::from_utf8_lossy(&buf[start..end]);
Interpreter::Shebang(shebang.into_owned())
}
*/
#[derive(Default)]
pub struct ProcessStateStore {
processes: HashMap<Pid, Vec<ProcessState>>,
}
#[derive(Debug)]
pub struct ProcessState {
pub pid: Pid,
pub ppid: Option<Pid>,
pub status: ProcessStatus,
pub start_time: u64,
pub argv: Vec<CString>,
pub comm: String,
pub presyscall: bool,
pub is_exec_successful: bool,
pub syscall: i64,
pub pending_syscall_event: Vec<Event>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ProcessStatus {
SigstopReceived,
PtraceForkEventReceived,
Running,
Exited(i32),
}
#[derive(Debug)]
pub struct ExecData {
pub filename: PathBuf,
pub argv: Vec<String>,
pub envp: Vec<String>,
pub cwd: PathBuf,
//pub interpreters: Vec<Interpreter>,
}
impl ProcessStateStore {
pub fn insert(&mut self, state: ProcessState) {
self.processes.entry(state.pid).or_default().push(state);
}
pub fn get_current_mut(&mut self, pid: Pid) -> Option<&mut ProcessState> {
// The last process in the vector is the current process
// println!("Getting {pid}");
self.processes.get_mut(&pid)?.last_mut()
}
}
impl ProcessState {
pub fn new(pid: Pid, start_time: u64) -> anyhow::Result<Self> {
Ok(Self {
pid,
ppid: None,
status: ProcessStatus::Running,
comm: read_comm(pid)?,
argv: read_argv(pid)?,
start_time,
presyscall: true,
is_exec_successful: false,
syscall: -1,
pending_syscall_event: vec![],
})
}
pub fn is_docker(&self) -> bool {
self.argv.get(0).is_some_and(|c| c.to_str() == Ok("docker"))
}
pub fn update(&mut self) -> anyhow::Result<()> {
self.comm = read_comm(self.pid)?;
self.argv = read_argv(self.pid)?;
Ok(())
}
}
fn ptrace_syscall(pid: Pid, sig: Option<Signal>) -> Result<(), Errno> {
match ptrace::syscall(pid.into(), sig) {
Err(Errno::ESRCH) => {
log::info!("ptrace syscall failed: {pid}, ESRCH, child probably gone!");
Ok(())
}
other => other,
}
}
pub struct TracerClient {
connect: String,
sock: TcpStream,
store: ProcessStateStore,
start_time: Instant,
pending_events: Vec<LogEntry>,
pending_files: BTreeSet<(PathBuf, Sha256Hash)>,
machine: i32,
}
impl TracerClient {
pub fn log(&mut self, ident: Identifier, event: Event) {
self.pending_events.push(LogEntry {
ident,
event,
timestamp: Instant::now().duration_since(self.start_time),
});
}
pub fn log_root(&mut self, pid: Pid, event: Event) {
self.log(Identifier { pid, machine: self.machine }, event);
}
fn ingest_file(&mut self, pid: Pid, path: PathBuf) -> anyhow::Result<()> {
if self.store.get_current_mut(pid).unwrap().is_docker() {
return Ok(());
}
let stat = std::fs::metadata(&path)?;
if !stat.is_file() {
return Ok(());
}
let mut fp = std::fs::File::open(&path)?;
let mut h = Sha256::new();
log::debug!("Hashing {} (client)", path.to_string_lossy());
std::io::copy(&mut fp, &mut h)?;
let hash = h.finalize().into();
self.pending_files.insert((path, hash));
Ok(())
}
fn commune_server(&mut self, msg: TracerClientMessage) -> anyhow::Result<TracerServerRequest> {
serde_json::to_writer(&self.sock, &msg)?;
self.sock.write_all("\n".as_bytes())?;
Ok(serde_json::StreamDeserializer::new(&mut IoRead::new(&self.sock)).next().unwrap()?)
}
fn allocate_machine(&mut self) -> anyhow::Result<i32> {
let msg = self.commune_server(TracerClientMessage::AllocateId {})?;
let TracerServerRequest::AllocatedId { id } = msg else { panic!("Server did not respone to AllocateId with AllocatedId") };
Ok(id)
}
fn drain_syscall_events(&mut self, pid: Pid, mut filter: Box<dyn FnMut(&mut Event)>) {
let p = self.store.get_current_mut(pid).unwrap();
if p.is_docker() {
return;
}
for mut event in p.pending_syscall_event.drain(..) {
(filter)(&mut event);
self.pending_events.push(LogEntry {
ident: Identifier { pid, machine: self.machine },
event,
timestamp: Instant::now().duration_since(self.start_time),
});
}
}
pub fn run(machine: i32, connect: String, args: Vec<String>) -> anyhow::Result<()> {
let sock = TcpStream::connect(&connect).expect(format!("Could not connect to {connect}").as_str());
if let ForkResult::Parent { child } = unsafe { nix::unistd::fork()? } {
let mut this = Self {
connect,
sock,
store: ProcessStateStore::default(),
start_time: Instant::now(),
pending_events: vec![],
pending_files: BTreeSet::new(),
machine,
};
this.run_internal(child.into())
} else {
let me = getpid();
setpgid(me, me)?;
traceme()?;
if 0 != unsafe { raise(SIGSTOP) } {
log::error!("raise failed!");
exit(-1);
}
let args = args
.into_iter()
.map(CString::new)
.collect::<Result<Vec<CString>, _>>()?;
execvp(&args[0], &args).expect(format!("Failed to execute {args:?}").as_str());
unreachable!();
}
}
fn run_internal(&mut self, root_child: Pid) -> anyhow::Result<()> {
waitpid(nix::unistd::Pid::from(root_child.into()), Some(WaitPidFlag::WSTOPPED))?; // wait for child to stop
log::trace!("child stopped");
let mut root_child_state = ProcessState::new(root_child, 0)?;
root_child_state.ppid = Some(getpid().into());
self.store.insert(root_child_state);
// Set foreground process group of the terminal
if -1 == unsafe { tcsetpgrp(STDIN_FILENO, root_child.0) } {
return Err(Errno::last().into());
}
// restart child
log::trace!("resuming child");
let ptrace_opts = {
use nix::sys::ptrace::Options;
Options::PTRACE_O_TRACEEXEC
| Options::PTRACE_O_TRACEEXIT
| Options::PTRACE_O_EXITKILL
| Options::PTRACE_O_TRACESYSGOOD
| Options::PTRACE_O_TRACEFORK
| Options::PTRACE_O_TRACECLONE
| Options::PTRACE_O_TRACEVFORK
};
ptrace::setoptions(root_child.into(), ptrace_opts)?;
// restart child
ptrace::syscall(nix::unistd::Pid::from(root_child.into()), None)?;
let mut continuing = true;
while continuing {
let status = {
let status = waitpid(None, Some(WaitPidFlag::__WALL));
if status.is_err_and(|e| e == nix::errno::Errno::ECHILD) {
continuing = false;
Ok(WaitStatus::StillAlive)
} else {
status
}
}?;
// log::trace!("waitpid: {:?}", status);
let signal = match status {
WaitStatus::Stopped(pid, sig) => {
let pid = pid.into();
log::trace!("stopped: {pid}, sig {:?}", sig);
match sig {
Signal::SIGSTOP => {
log::trace!("sigstop event, child: {pid}");
if let Some(state) = self.store.get_current_mut(pid) {
if state.status == ProcessStatus::PtraceForkEventReceived {
log::trace!("sigstop event received after ptrace fork event, pid: {pid}");
state.status = ProcessStatus::Running;
} else if pid != root_child {
log::error!("Unexpected SIGSTOP: {state:?}")
}
} else {
log::trace!("sigstop event received before ptrace fork event, pid: {pid}");
let mut state = ProcessState::new(pid, 0)?;
state.status = ProcessStatus::SigstopReceived;
self.store.insert(state);
}
None
}
Signal::SIGCHLD => {
// From lurk:
//
// The SIGCHLD signal is sent to a process when a child process terminates, interrupted, or resumes after being interrupted
// This means, that if our tracee forked and said fork exits before the parent, the parent will get stopped.
// Therefor issue a PTRACE_SYSCALL request to the parent to continue execution.
// This is also important if we trace without the following forks option.
Some(Signal::SIGCHLD)
}
_ => {
// Just deliver the signal to tracee
Some(sig)
}
}
}
WaitStatus::Exited(pid, code) => {
let pid = pid.into();
log::trace!("exited: pid {}, code {:?}", pid, code);
self.log_root(pid, Event::Exit { code });
self.store.get_current_mut(pid).unwrap().status =
ProcessStatus::Exited(code);
None
}
WaitStatus::PtraceEvent(pid, sig, evt) => {
log::trace!("ptrace event: {:?} {:?}", sig, evt);
match evt {
nix::libc::PTRACE_EVENT_FORK
| nix::libc::PTRACE_EVENT_VFORK
| nix::libc::PTRACE_EVENT_CLONE => {
let new_child = Pid(ptrace::getevent(pid.into())? as pid_t);
log::trace!(
"ptrace fork event, evt {evt}, pid: {pid}, child: {new_child}"
);
self.log_root(pid.into(), Event::Fork { child: new_child });
if let Some(state) = self.store.get_current_mut(new_child) {
if state.status == ProcessStatus::SigstopReceived {
log::trace!("ptrace fork event received after sigstop, pid: {pid}, child: {new_child}");
state.status = ProcessStatus::Running;
state.ppid = Some(pid.into());
} else if new_child != root_child {
log::error!("Unexpected fork event: {state:?}")
}
} else {
log::trace!("ptrace fork event received before sigstop, pid: {pid}, child: {new_child}");
let mut state = ProcessState::new(new_child, 0)?;
state.status = ProcessStatus::PtraceForkEventReceived;
state.ppid = Some(pid.into());
self.store.insert(state);
}
// Resume parent
None
}
nix::libc::PTRACE_EVENT_EXEC => {
log::trace!("exec event");
let p = self.store.get_current_mut(pid.into()).unwrap();
assert!(!p.presyscall);
// After execve or execveat, in syscall exit event,
// the registers might be clobbered(e.g. aarch64).
// So we need to determine whether exec is successful here.
// PTRACE_EVENT_EXEC only happens for successful exec.
p.is_exec_successful = true;
let path = p
.pending_syscall_event
.iter()
.find_map(|e| match e {
Event::Exec { prog, .. } => Some(prog.clone()),
_ => None,
})
.unwrap();
self.ingest_file(pid.into(), path)?;
self.drain_syscall_events(pid.into(), Box::new(|_| {}));
// Don't use seccomp_aware_cont here because that will skip the next syscall exit stop
None
}
nix::libc::PTRACE_EVENT_EXIT => {
log::trace!("exit event");
None
}
nix::libc::PTRACE_EVENT_SECCOMP => {
log::trace!("seccomp event");
self.on_syscall_enter(pid.into())?;
None
}
_ => {
log::trace!("other event");
None
}
}
}
WaitStatus::Signaled(pid, sig, _) => {
let pid: Pid = pid.into();
log::debug!("signaled: {pid}, {:?}", sig);
if pid == root_child {
exit(128 + (sig as i32))
}
None
}
WaitStatus::PtraceSyscall(pid) => {
let pid = pid.into();
let presyscall = self.store.get_current_mut(pid).unwrap().presyscall;
if presyscall {
self.on_syscall_enter(pid)?;
} else {
self.on_syscall_exit(pid)?;
}
None
}
_ => None
};
if !self.pending_files.is_empty() || !continuing {
let mut events = vec![];
let mut files = BTreeSet::new();
std::mem::swap(&mut events, &mut self.pending_events);
std::mem::swap(&mut files, &mut self.pending_files);
let mut msg = TracerClientMessage::Events { events, files };
loop {
let event = self.commune_server(msg)?;
match event {
TracerServerRequest::Continue => break,
TracerServerRequest::AnalyzeFiles { paths } => {
let mut formats = HashMap::new();
let mut files = BTreeSet::new();
for path in paths {
let mut fp = std::fs::File::open(&path)?;
log::debug!("Parsing format of {} (client)", path.to_string_lossy());
let (format, mut references) = parse_format(&mut fp)?;
formats.insert(path, format);
files.append(&mut references);
}
msg = TracerClientMessage::FileFormats { formats, files }
},
TracerServerRequest::AllocatedId { id } => {
panic!("Receieved unsolicited AllocatedId({id})");
}
}
}
}
// https://stackoverflow.com/questions/29997244/occasionally-missing-ptrace-event-vfork-when-running-ptrace
// DO NOT send PTRACE_SYSCALL until we receive the PTRACE_EVENT_FORK, etc.
if let Some(pid) = status.pid() {
let pid = pid.into();
let p = self.store.get_current_mut(pid).expect("No such process??");
if !matches!(p.status, ProcessStatus::SigstopReceived | ProcessStatus::Exited(_)) {
ptrace_syscall(pid, signal)?;
}
}
}
Ok(())
}
fn on_syscall_enter(&mut self, pid: Pid) -> anyhow::Result<()> {
let p = self.store.get_current_mut(pid).unwrap();
p.presyscall = !p.presyscall;
// SYSCALL ENTRY
let regs = match ptrace::getregs(pid.into()) {
Ok(regs) => regs,
Err(Errno::ESRCH) => {
log::info!("ptrace getregs failed: {pid}, ESRCH, child probably gone!");
return Ok(());
}
e => e?,
};
let syscallno = syscall_no_from_regs!(regs);
log::trace!("Got syscall {} from {}", syscallno, pid);
p.syscall = syscallno;
match syscallno {
nix::libc::SYS_execveat => {
// int execveat(int dirfd, const char *pathname,
// char *const _Nullable argv[],
// char *const _Nullable envp[],
// int flags);
let dirfd = syscall_arg(&regs, 0) as i32;
let pathname = read_string(pid, syscall_arg(&regs, 1) as AddressType)?;
//let argv = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?;
//let envp = read_string_array(pid, syscall_arg!(regs, 3) as AddressType)?;
let flags = syscall_arg(&regs, 4) as i32;
let filename = resolve_filename_at_fd(pid, pathname, dirfd, flags)?;
//let interpreters = read_interpreter_recursive(&filename);
p.pending_syscall_event.push(Event::Exec { prog: filename.clone() });
self.instrument_exec(pid, filename.to_str().unwrap(), &regs, 1)?;
}
nix::libc::SYS_execve => {
let filename = read_pathbuf(pid, syscall_arg(&regs, 0) as AddressType)?;
//let argv = read_string_array(pid, syscall_arg!(regs, 1) as AddressType)?;
//let envp = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?;
//let interpreters = read_interpreter_recursive(&filename);
p.pending_syscall_event.push(Event::Exec { prog: filename.clone() });
self.instrument_exec(pid, filename.to_str().unwrap(), &regs, 0)?;
}
nix::libc::SYS_open => {
let path = read_pathbuf(pid, syscall_arg(&regs, 0) as AddressType)?;
p.pending_syscall_event.push(Event::FdOpen {
source: FdSource::File { path },
fd: -1,
});
}
nix::libc::SYS_openat => {
let dirfd = syscall_arg(&regs, 0) as i32;
let pathname = read_string(pid, syscall_arg(&regs, 1) as AddressType)?;
let flags = syscall_arg(&regs, 2) as i32;
let path = resolve_filename_at_fd(pid, pathname, dirfd, flags)?;
p.pending_syscall_event.push(Event::FdOpen {
source: FdSource::File { path },
fd: 0,
});
}
nix::libc::SYS_read
| nix::libc::SYS_readv
| nix::libc::SYS_preadv
| nix::libc::SYS_preadv2 => {
let fd = syscall_arg(&regs, 0) as i32;
p.pending_syscall_event.push(Event::FdRead { fd });
}
nix::libc::SYS_write
| nix::libc::SYS_writev
| nix::libc::SYS_pwritev
| nix::libc::SYS_pwritev2 => {
let fd = syscall_arg(&regs, 0) as i32;
p.pending_syscall_event.push(Event::FdWrite { fd });
}
nix::libc::SYS_dup | nix::libc::SYS_dup2 | nix::libc::SYS_dup3 => {
let oldfd = syscall_arg(&regs, 0) as i32;
p.pending_syscall_event
.push(Event::FdDup { oldfd, newfd: -1 });
}
nix::libc::SYS_fcntl => {
let fd = syscall_arg(&regs, 0) as i32;
let cmd = syscall_arg(&regs, 1) as i32;
match cmd {
nix::libc::F_DUPFD => p.pending_syscall_event.push(Event::FdDup {
oldfd: fd,
newfd: -1,
}),
_ => {}
}
}
nix::libc::SYS_close => {
let fd = syscall_arg(&regs, 0) as i32;
p.pending_syscall_event.push(Event::FdClose { fd });
}
_ => {}
}
//self.syscall_enter_cont(pid)?;
Ok(())
}
fn on_syscall_exit(&mut self, pid: Pid) -> anyhow::Result<()> {
// SYSCALL EXIT
// log::trace!("post syscall {}", p.syscall);
let p = self.store.get_current_mut(pid).unwrap();
p.presyscall = !p.presyscall;
let regs = match ptrace::getregs(pid.into()) {
Ok(regs) => regs,
Err(Errno::ESRCH) => {
log::info!("ptrace getregs failed: {pid}, ESRCH, child probably gone!");
return Ok(());
}
e => e?,
};
let result = syscall_res_from_regs!(regs);
let mut pending_files = vec![];
let filter: Option<Box<dyn FnMut(&mut Event)>> = match p.syscall {
nix::libc::SYS_execve => {
// SAFETY: p.preexecve is false, so p.exec_data is Some
p.is_exec_successful = false;
p.update()?;
None
}
nix::libc::SYS_execveat => {
p.is_exec_successful = false;
p.update()?;
None
}
nix::libc::SYS_open | nix::libc::SYS_openat => {
if result >= 0 {
for pending in p.pending_syscall_event.iter_mut() {
if let Event::FdOpen {
source: FdSource::File { path },
..
} = pending
{
pending_files.push(path.clone());
}
}
Some(Box::new(move |event| match event {
Event::FdOpen {
fd: ref mut dest, ..
} => {
*dest = result as i32;
}
_ => {}
}))
} else {
None
}
}
nix::libc::SYS_dup | nix::libc::SYS_dup2 | nix::libc::SYS_dup3 => {
if result >= 0 {
Some(Box::new(move |event| match event {
Event::FdDup {
newfd: ref mut dest,
..
} => {
*dest = result as i32;
}
_ => {}
}))
} else {
None
}
}
nix::libc::SYS_fcntl => {
if result >= 0 {
Some(Box::new(move |event| match event {
Event::FdDup {
newfd: ref mut dest,
..
} => {
*dest = result as i32;
}
_ => {}
}))
} else {
None
}
}
_ => {
if result >= 0 {
Some(Box::new(|_| {}))
} else {
None
}
}
};
if let Some(filter) = filter {
self.drain_syscall_events(pid, filter);
} else {
p.pending_syscall_event.clear();
}
for path in pending_files {
self.ingest_file(pid, path)?;
}
Ok(())
}
fn instrument_exec(&mut self, pid: Pid, filename: &str, regs: &user_regs_struct, prog_idx: usize) -> anyhow::Result<()> {
if let Some(new_args) = if filename.ends_with("/docker") && std::fs::metadata(&filename).is_ok() {
let mut args = read_cstring_array(pid, syscall_arg(&regs, prog_idx + 1) as AddressType)?;
if args.get(1).is_some_and(|c| c.to_str() == Ok("run")) {
let new_machine = self.allocate_machine()?;
let new_args = instrument_docker_run_execve(&mut args, new_machine, self.connect.as_str())?;
if new_args != args {
log::debug!("Launching docker child: {}", new_args.iter().map(|x| x.to_str().unwrap()).collect::<Vec<_>>().join(" "));
}
Some(new_args)
} else {
None
}
} else {
None
} {
let mut regs2 = regs.clone();
let mut stacktop = stack_ptr_from_regs!(regs);
stacktop -= 128;
let mut argv_pointers = new_args.iter().map(|argstr| -> anyhow::Result<i64> {
let bytes = argstr.as_bytes_with_nul();
stacktop -= bytes.len() as i64;
while stacktop % WORD_SIZE as i64 != 0 {
stacktop -= 1;
}
write_bytes(pid, stacktop as AddressType, bytes)?;
Ok(stacktop)
}).collect::<anyhow::Result<Vec<i64>>>()?;
assert_eq!(stacktop % WORD_SIZE as i64, 0);
argv_pointers.push(0);
for ptr in argv_pointers.iter().copied().rev() {
stacktop -= WORD_SIZE as i64;
ptrace::write(pid.into(), stacktop as AddressType, ptr)?;
}
set_syscall_arg(&mut regs2, prog_idx + 1, stacktop as u64);
ptrace::setregs(pid.into(), regs2)?;
}
Ok(())
}
}
fn resolve_filename_at_fd(
pid: Pid,
pathname: String,
dirfd: i32,
flags: i32,
) -> anyhow::Result<PathBuf> {
let pathname_is_empty = pathname.is_empty();
let pathname = PathBuf::from(pathname);
Ok(
match (
pathname.is_absolute(),
pathname_is_empty && ((flags & AT_EMPTY_PATH) != 0),
) {
(true, _) => {
// If pathname is absolute, then dirfd is ignored.
pathname
}
(false, true) => {
// If pathname is an empty string and the AT_EMPTY_PATH flag is specified, then the file descriptor dirfd
// specifies the file to be executed
read_fd(pid, dirfd)?
}
(false, false) => {
// pathname is relative to dirfd
let dir = read_fd(pid, dirfd)?;
dir.join(pathname)
}
},
)
}

View File

@ -1,178 +0,0 @@
use std::{
collections::HashSet,
env::current_exe,
ffi::CString,
process::Command,
};
pub fn instrument_docker_run_execve(
args: &Vec<CString>,
machine: i32,
connect: &str,
) -> anyhow::Result<Vec<CString>> {
enum Argument<'a> {
Zero(&'a str),
One(&'a str, &'a str),
}
#[derive(Default)]
struct ArgsParsed<'a> {
preamble: Vec<&'a str>,
args: Vec<Argument<'a>>,
image: Option<&'a str>,
cmd: Vec<&'a str>,
}
impl<'a> ArgsParsed<'a> {
fn take_entrypoint(&mut self) -> Option<&'a str> {
if let Some((idx, _)) = self
.args
.iter()
.enumerate()
.find(|(_, val)| matches!(val, Argument::One("--entrypoint", _)))
{
let Argument::One(_, arg) = self.args.remove(idx) else {
unreachable!()
};
Some(arg)
} else {
None
}
}
fn take_cmd(&mut self) -> Option<Vec<&'a str>> {
if self.cmd.is_empty() {
None
} else {
let target = &mut self.cmd;
let mut result = vec![];
std::mem::swap(target, &mut result);
Some(result)
}
}
fn reserialize(self) -> Vec<CString> {
let mut result = self.preamble.into_iter().map(|x| CString::new(x).unwrap()).collect::<Vec<_>>();
for arg in self.args {
match arg {
Argument::Zero(a) => result.push(CString::new(a).unwrap()),
Argument::One(a, b) => {
result.push(CString::new(a).unwrap());
result.push(CString::new(b).unwrap());
}
}
}
if let Some(image) = self.image {
result.push(CString::new(image).unwrap());
for cmd in self.cmd {
result.push(CString::new(cmd).unwrap());
}
}
result
}
}
let unary_args = HashSet::from([
"-d",
"--detach",
"--disable-content-trust",
"--help",
"--init",
"-i",
"--interactive",
"--no-healthcheck",
"--oom-kill-disable",
"--privileged",
"-P",
"--publish-all",
"-q",
"--quiet",
"--read-only",
"--rm",
"--sig-proxy",
"-t",
"--tty",
]);
let mut string_args = ArgsParsed::default();
let mut args_iter = args.iter();
string_args.preamble.push(args_iter.next().unwrap().to_str().unwrap());
string_args.preamble.push(args_iter.next().unwrap().to_str().unwrap());
while let Some(arg) = args_iter.next() {
let arg = arg.to_str()?;
if arg.starts_with('-') {
let no_parameter = unary_args.contains(arg);
if !no_parameter {
let Some(parameter) = args_iter.next() else {
log::debug!("Docker: arg {} missing required argument", arg);
return Ok(args.clone());
};
string_args.args.push(Argument::One(arg, parameter.to_str()?));
} else {
string_args.args.push(Argument::Zero(arg));
}
} else {
string_args.image = Some(arg);
while let Some(arg) = args_iter.next() {
let arg = arg.to_str()?;
string_args.cmd.push(arg);
}
break;
}
}
if let Some(image) = string_args.image {
let output = Command::new("docker").args(["inspect", image]).output()?;
if !output.status.success() {
log::debug!("Docker: image inspect for {} returned bad error code", image);
return Ok(args.clone());
}
let value: serde_json::Value = serde_json::from_slice(&output.stdout)?;
let config = value
.as_array()
.unwrap()
.get(0)
.unwrap()
.as_object()
.unwrap()
.get("Config")
.unwrap()
.as_object()
.unwrap();
let mut entrypoint = string_args
.take_entrypoint()
.map(|s| vec![s])
.or_else(|| {
config.get("Entrypoint").unwrap().as_array().map(|a| {
a.into_iter()
.map(|s| s.as_str().unwrap())
.collect::<Vec<_>>()
})
})
.unwrap_or_else(Vec::new);
let cmd = string_args
.take_cmd()
.or_else(|| {
config.get("Cmd").unwrap().as_array().map(|a| {
a.into_iter()
.map(|s| s.as_str().unwrap())
.collect::<Vec<_>>()
})
})
.unwrap_or_else(Vec::new);
entrypoint.extend(cmd);
entrypoint.insert(0, "/.ontology");
entrypoint.insert(1, "internal-launch");
let machine = machine.to_string();
entrypoint.insert(2, &machine);
entrypoint.insert(3, connect);
string_args
.args
.push(Argument::One("--entrypoint", entrypoint.remove(0)));
let volume = format!("{}:/.ontology", current_exe().unwrap().to_str().unwrap());
string_args.args.push(Argument::One("-v", &volume));
string_args.cmd = entrypoint;
Ok(string_args.reserialize())
} else {
Ok(string_args.reserialize())
}
}

View File

@ -1,4 +0,0 @@
pub mod client;
pub mod server;
pub mod types;
pub(self) mod docker;

View File

@ -1,154 +0,0 @@
use std::{collections::BTreeMap, net::{TcpListener, TcpStream}, os::fd::{AsFd, AsRawFd, BorrowedFd}, path::PathBuf, process::{Command, Stdio}, ffi::OsStr};
use serde_json::de::IoRead;
use crate::filestore::FileStore;
use super::types::*;
pub struct Tracer {
pub report: TracerReport,
}
impl Tracer {
pub fn run(input: Vec<PathBuf>, args: Vec<String>, mute: bool) -> anyhow::Result<TracerReport> {
let mut files = FileStore::new(input)?;
let mut log = vec![];
let connect = format!("{}:9995", default_net::get_default_interface().expect("Could not obtain default interface").ipv4.get(0).expect("Default interface has no ipv4 address").addr);
log::debug!("Using {} for server", connect);
let listener = TcpListener::bind(&connect).expect("Could not bind listener socket");
let executable = std::env::current_exe().expect("Could not obtain current executable");
let mut proc = Command::new(executable);
proc.args(["internal-launch".to_owned(), "--".to_owned(), "0".to_owned(), connect].iter().chain(args.iter()));
if mute {
proc.stdin(Stdio::null()).stdout(Stdio::null()).stderr(Stdio::null());
}
log::debug!("Launching tracer child {}", proc.get_args().collect::<Vec<_>>().join(OsStr::new(" ")).to_string_lossy());
let mut child = proc.spawn().expect("Could not spawn child");
let mut next_child_id = 1;
struct ChildData {
tcp_stream: TcpStream,
json_stream: serde_json::StreamDeserializer<'static, IoRead<TcpStream>, TracerClientMessage>,
duped: i32,
}
enum ParentOrChild {
Parent(TcpListener),
Child(ChildData),
Dup(i32),
}
impl AsFd for ParentOrChild {
fn as_fd(&self) -> std::os::fd::BorrowedFd<'_> {
match self {
ParentOrChild::Parent(i) => i.as_fd(),
ParentOrChild::Child(i) => i.tcp_stream.as_fd(),
ParentOrChild::Dup(i) => unsafe { BorrowedFd::borrow_raw(*i) }
}
}
}
let mut children = BTreeMap::new();
let (first_child, _first_addr) = listener.accept().expect("Accept failed");
let duped = first_child.try_clone().expect("Dup failed");
children.insert(duped.as_raw_fd(), ParentOrChild::Dup(first_child.as_raw_fd()));
children.insert(first_child.as_raw_fd(), ParentOrChild::Child(ChildData {
tcp_stream: duped,
duped: first_child.as_raw_fd(),
json_stream: serde_json::StreamDeserializer::new(IoRead::new(first_child)),
}));
children.insert(listener.as_raw_fd(), ParentOrChild::Parent(listener));
loop {
if children.len() <= 1 {
break;
}
let mut fdset = children.values().into();
nix::sys::select::select(None, Some(&mut fdset), None, None, None).expect("Select failed");
let chosen = fdset.fds(None).next().unwrap().as_raw_fd();
let mut child = children.get_mut(&chosen).unwrap();
if let ParentOrChild::Dup(i) = child {
let i = *i;
child = children.get_mut(&i).unwrap();
}
match child {
ParentOrChild::Parent(p) => {
let (new_tcp, new_addr) = p.accept().expect("Accept failed");
log::info!("New child connected from {new_addr}");
let duped = new_tcp.try_clone().expect("Dup failed");
children.insert(duped.as_raw_fd(), ParentOrChild::Dup(new_tcp.as_raw_fd()));
children.insert(new_tcp.as_raw_fd(), ParentOrChild::Child(ChildData {
tcp_stream: duped,
duped: new_tcp.as_raw_fd(),
json_stream: serde_json::StreamDeserializer::new(IoRead::new(new_tcp)),
}));
},
ParentOrChild::Dup(_) => unreachable!(),
ParentOrChild::Child(c) => {
let Some(msg) = c.json_stream.next() else {
let fd1 = c.duped;
let fd2 = c.tcp_stream.as_raw_fd();
children.remove(&fd1);
children.remove(&fd2);
continue;
};
log::trace!("recv: {msg:?}");
let msg = match msg {
Ok(msg) => msg,
Err(e) => {
log::error!("Child socket disconnected unexpectedly: {e:?}");
children.remove(&chosen);
continue;
}
};
let response = match msg {
TracerClientMessage::Events { events, files: file_events } => {
log.extend(events);
let mut paths = vec![];
for (path, hash) in file_events {
if !files.insert(path.clone(), hash) {
paths.push(path);
}
}
if paths.is_empty() {
TracerServerRequest::Continue
} else {
TracerServerRequest::AnalyzeFiles { paths }
}
},
TracerClientMessage::FileFormats { formats, files: file_list } => {
for (path, fmt) in formats {
files.update_format(&path, fmt);
}
let paths: Vec<_> = file_list.into_iter().filter_map(|(path, hash)| (!files.hashes.contains_key(&hash)).then_some(path)).collect();
if paths.is_empty() {
TracerServerRequest::Continue
} else {
TracerServerRequest::AnalyzeFiles { paths }
}
}
TracerClientMessage::AllocateId { } => {
let result = TracerServerRequest::AllocatedId { id: next_child_id };
next_child_id += 1;
result
},
};
log::trace!("send: {response:?}");
serde_json::to_writer(&c.tcp_stream, &response)?;
},
}
}
child.wait().expect("Failed to wait for child");
Ok(TracerReport { log, files })
}
}

View File

@ -1,125 +0,0 @@
use std::{collections::{BTreeSet, HashMap}, path::PathBuf, fmt::{Display, Formatter}, time::Duration};
use serde::{Serialize, Deserialize};
use crate::filestore::{FileFormat, Sha256Hash, FileStore};
#[derive(Copy, Clone, Serialize, Deserialize, Eq, PartialEq, Debug, Hash)]
pub struct Pid(pub(crate) i32);
impl From<nix::unistd::Pid> for Pid {
fn from(value: nix::unistd::Pid) -> Self {
Self(value.as_raw())
}
}
impl Into<nix::unistd::Pid> for Pid {
fn into(self) -> nix::unistd::Pid {
nix::unistd::Pid::from_raw(self.0)
}
}
impl Display for Pid {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
#[derive(Debug, Serialize, Deserialize)]
pub enum Event {
Fork { child: Pid },
Exec { prog: PathBuf },
Exit { code: i32 },
FdOpen { fd: i32, source: FdSource },
FdDup { oldfd: i32, newfd: i32 },
FdClose { fd: i32 },
FdRead { fd: i32 },
FdWrite { fd: i32 },
}
#[derive(Debug, Serialize, Deserialize)]
pub enum FdSource {
File { path: PathBuf },
Tty,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Identifier {
pub machine: i32,
pub pid: Pid,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct LogEntry {
pub ident: Identifier,
pub event: Event,
pub timestamp: Duration,
}
impl Display for LogEntry {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"[{}.{:03} m{}p{}] {}",
self.timestamp.as_secs(),
self.timestamp.as_millis() % 1000,
self.ident.machine,
self.ident.pid,
self.event
)
}
}
impl Display for FdSource {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
FdSource::File { path } => write!(f, "file {}", path.to_string_lossy()),
FdSource::Tty => write!(f, "the terminal"),
}
}
}
impl Display for Event {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Event::Fork { child } => write!(f, "fork {child}"),
Event::Exec { prog } => write!(f, "exec {}", prog.to_string_lossy()),
Event::Exit { code } => write!(f, "exit with {code}"),
Event::FdOpen { fd, source } => write!(f, "open fd {fd} from {source}"),
Event::FdDup { oldfd, newfd } => write!(f, "dup fd {oldfd} to {newfd}"),
Event::FdClose { fd } => write!(f, "close fd {fd}"),
Event::FdRead { fd } => write!(f, "read from fd {fd}"),
Event::FdWrite { fd } => write!(f, "write to fd {fd}"),
}
}
}
#[derive(Serialize, Deserialize, Debug)]
pub enum TracerClientMessage {
Events {
events: Vec<LogEntry>,
files: BTreeSet<(PathBuf, Sha256Hash)>,
},
FileFormats {
formats: HashMap<PathBuf, FileFormat>,
files: BTreeSet<(PathBuf, Sha256Hash)>,
},
AllocateId {},
}
#[derive(Serialize, Deserialize, Debug)]
pub enum TracerServerRequest {
Continue,
AnalyzeFiles {
paths: Vec<PathBuf>,
},
AllocatedId { id: i32 },
}
#[derive(Serialize, Deserialize)]
pub struct TracerReport {
pub log: Vec<LogEntry>,
pub files: FileStore,
}