Compare commits
10 Commits
6926711e83
...
4aa887573d
Author | SHA1 | Date |
---|---|---|
|
4aa887573d | |
|
6830c8af28 | |
|
576b776cbd | |
|
2f6c583f3a | |
|
90c9d9b784 | |
|
3504ee15dd | |
|
4ab8c409a0 | |
|
96cea8dff3 | |
|
fbc5634ad9 | |
|
62f7e40c64 |
|
@ -0,0 +1,2 @@
|
|||
[build]
|
||||
target = "x86_64-unknown-linux-musl"
|
|
@ -104,6 +104,12 @@ version = "0.7.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.5.0"
|
||||
|
@ -125,6 +131,12 @@ version = "1.5.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
||||
|
||||
[[package]]
|
||||
name = "bytes"
|
||||
version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.92"
|
||||
|
@ -139,9 +151,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
|||
|
||||
[[package]]
|
||||
name = "cfg_aliases"
|
||||
version = "0.1.1"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e"
|
||||
checksum = "77e53693616d3075149f4ead59bdeecd204ac6b8192d8969757601b74bddf00f"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
|
@ -189,6 +201,22 @@ version = "1.0.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
|
||||
dependencies = [
|
||||
"core-foundation-sys",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation-sys"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.12"
|
||||
|
@ -217,6 +245,23 @@ dependencies = [
|
|||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "default-net"
|
||||
version = "0.22.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c5a6569a908354d49b10db3c516d69aca1eccd97562fd31c98b13f00b73ca66"
|
||||
dependencies = [
|
||||
"dlopen2",
|
||||
"libc",
|
||||
"memalloc",
|
||||
"netlink-packet-core",
|
||||
"netlink-packet-route",
|
||||
"netlink-sys",
|
||||
"once_cell",
|
||||
"system-configuration",
|
||||
"windows",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_more"
|
||||
version = "0.99.17"
|
||||
|
@ -238,6 +283,17 @@ dependencies = [
|
|||
"crypto-common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dlopen2"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09b4f5f101177ff01b8ec4ecc81eead416a8aa42819a2869311b3420fa114ffa"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"once_cell",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_filter"
|
||||
version = "0.1.0"
|
||||
|
@ -375,6 +431,12 @@ version = "0.4.21"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
|
||||
|
||||
[[package]]
|
||||
name = "memalloc"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df39d232f5c40b0891c10216992c2f250c054105cb1e56f0fc9032db6203ecc1"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.2"
|
||||
|
@ -399,11 +461,58 @@ dependencies = [
|
|||
"adler",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "netlink-packet-core"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72724faf704479d67b388da142b186f916188505e7e0b26719019c525882eda4"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"byteorder",
|
||||
"netlink-packet-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "netlink-packet-route"
|
||||
version = "0.17.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "053998cea5a306971f88580d0829e90f270f940befd7cf928da179d4187a5a66"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bitflags 1.3.2",
|
||||
"byteorder",
|
||||
"libc",
|
||||
"netlink-packet-core",
|
||||
"netlink-packet-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "netlink-packet-utils"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ede8a08c71ad5a95cdd0e4e52facd37190977039a4704eb82a283f713747d34"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"byteorder",
|
||||
"paste",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "netlink-sys"
|
||||
version = "0.8.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "416060d346fbaf1f23f9512963e3e878f1a78e707cb699ba9215761754244307"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"libc",
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.28.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4"
|
||||
source = "git+https://github.com/rhelmot/nix-rs?branch=master#e9f7c1b74ef7581adf1513a3f3c9a965824ee2d4"
|
||||
dependencies = [
|
||||
"bitflags 2.5.0",
|
||||
"cfg-if",
|
||||
|
@ -431,12 +540,19 @@ dependencies = [
|
|||
"ruzstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
|
||||
|
||||
[[package]]
|
||||
name = "ontology"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"default-net",
|
||||
"env_logger",
|
||||
"gimli 0.29.0",
|
||||
"hex",
|
||||
|
@ -452,6 +568,12 @@ dependencies = [
|
|||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "paste"
|
||||
version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.79"
|
||||
|
@ -613,6 +735,47 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "system-configuration"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"core-foundation",
|
||||
"system-configuration-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "system-configuration-sys"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9"
|
||||
dependencies = [
|
||||
"core-foundation-sys",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.59"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.59"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.58",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "twox-hash"
|
||||
version = "1.6.3"
|
||||
|
@ -694,13 +857,37 @@ version = "0.4.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "windows"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
|
||||
dependencies = [
|
||||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
|
||||
dependencies = [
|
||||
"windows-targets",
|
||||
"windows-targets 0.52.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm 0.48.5",
|
||||
"windows_aarch64_msvc 0.48.5",
|
||||
"windows_i686_gnu 0.48.5",
|
||||
"windows_i686_msvc 0.48.5",
|
||||
"windows_x86_64_gnu 0.48.5",
|
||||
"windows_x86_64_gnullvm 0.48.5",
|
||||
"windows_x86_64_msvc 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -709,51 +896,93 @@ version = "0.52.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
"windows_aarch64_gnullvm 0.52.4",
|
||||
"windows_aarch64_msvc 0.52.4",
|
||||
"windows_i686_gnu 0.52.4",
|
||||
"windows_i686_msvc 0.52.4",
|
||||
"windows_x86_64_gnu 0.52.4",
|
||||
"windows_x86_64_gnullvm 0.52.4",
|
||||
"windows_x86_64_msvc 0.52.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.52.4"
|
||||
|
|
|
@ -6,7 +6,9 @@ edition = "2021"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
nix = { version = "0.28.0", features = ["ptrace", "process"] }
|
||||
#nix = { version = "0.28.0", features = ["ptrace", "process", "fs", "poll"] }
|
||||
nix = { git = "https://github.com/rhelmot/nix-rs", branch = "master", features = ["ptrace", "process", "fs", "poll"] }
|
||||
#nix = { path = "../../rust/nix", features = ["ptrace", "process", "fs", "poll"] }
|
||||
linux-personality = "1.0.0"
|
||||
anyhow = { version = "1", features = ["backtrace"] }
|
||||
log = "0.4"
|
||||
|
@ -21,3 +23,4 @@ gimli = { version = "0.29.0" }
|
|||
object = { version = "0.35" }
|
||||
memmap2 = { version = "0.9.4" }
|
||||
typed-arena = { version = "2" }
|
||||
default-net = "0.22"
|
||||
|
|
217
src/filestore.rs
217
src/filestore.rs
|
@ -1,13 +1,17 @@
|
|||
use std::{
|
||||
collections::{BTreeMap, HashMap, HashSet},
|
||||
borrow::Cow,
|
||||
collections::{BTreeMap, BTreeSet, HashMap, HashSet},
|
||||
fs, io,
|
||||
path::PathBuf, borrow::Cow,
|
||||
path::{PathBuf, Path},
|
||||
};
|
||||
|
||||
use gimli::{constants, AttributeValue, DW_TAG_compile_unit};
|
||||
use gimli::{constants, DW_TAG_compile_unit};
|
||||
use object::{Object, ObjectSection, ReadCache};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::{digest::generic_array::{GenericArray, typenum::U32}, Digest, Sha256};
|
||||
use object::{Object, ReadCache, ObjectSection};
|
||||
use sha2::{
|
||||
digest::generic_array::{typenum::U32, GenericArray},
|
||||
Digest, Sha256,
|
||||
};
|
||||
use typed_arena::Arena;
|
||||
|
||||
#[derive(Serialize, Deserialize, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)]
|
||||
|
@ -20,7 +24,7 @@ pub struct Sha256Hash {
|
|||
impl From<GenericArray<u8, U32>> for Sha256Hash {
|
||||
fn from(value: GenericArray<u8, U32>) -> Self {
|
||||
Self {
|
||||
inner: value.into()
|
||||
inner: value.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -43,9 +47,7 @@ pub struct FileStoreEntry {
|
|||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum FileFormat {
|
||||
ELF {
|
||||
references: Vec<usize>,
|
||||
},
|
||||
ELF,
|
||||
Other,
|
||||
}
|
||||
|
||||
|
@ -64,6 +66,35 @@ impl FileStore {
|
|||
Ok(result)
|
||||
}
|
||||
|
||||
/// Register the minimal set of information associated with a file. Returns whether the file
|
||||
/// was already known.
|
||||
pub fn insert(&mut self, path: PathBuf, hash: Sha256Hash) -> bool {
|
||||
if let Some(idx) = self.hashes.get_mut(&hash) {
|
||||
self.files.get_mut(*idx).unwrap().output_names.insert(path);
|
||||
true
|
||||
} else {
|
||||
let index = self.files.len();
|
||||
self.files.push(FileStoreEntry {
|
||||
index,
|
||||
hash,
|
||||
format: FileFormat::Other,
|
||||
input_names: HashSet::new(),
|
||||
output_names: HashSet::from([path.clone()]),
|
||||
});
|
||||
self.filenames.insert(path, index);
|
||||
self.hashes.insert(hash, index);
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_format(&mut self, path: &Path, format: FileFormat) {
|
||||
if !self.filenames.contains_key(path) {
|
||||
panic!("update_format called with unknown path {}", path.to_string_lossy())
|
||||
}
|
||||
let idx = *self.filenames.get(path).unwrap();
|
||||
self.files.get_mut(idx).unwrap().format = format;
|
||||
}
|
||||
|
||||
fn ingest_input(&mut self, filename: PathBuf) -> anyhow::Result<()> {
|
||||
let stat = fs::metadata(&filename)?;
|
||||
if stat.is_dir() {
|
||||
|
@ -89,6 +120,7 @@ impl FileStore {
|
|||
mut content: (impl io::Read + io::Seek),
|
||||
) -> anyhow::Result<()> {
|
||||
let mut h = Sha256::new();
|
||||
log::debug!("Hashing {}", filename.to_string_lossy());
|
||||
io::copy(&mut content, &mut h)?;
|
||||
let hash = h.finalize().into();
|
||||
|
||||
|
@ -96,7 +128,7 @@ impl FileStore {
|
|||
std::collections::btree_map::Entry::Vacant(e) => {
|
||||
let index = self.files.len();
|
||||
e.insert(index);
|
||||
let format = self.parse_format(&mut content)?;
|
||||
let (format, refs) = parse_format(&mut content)?;
|
||||
self.files.push(FileStoreEntry {
|
||||
index,
|
||||
hash,
|
||||
|
@ -104,25 +136,28 @@ impl FileStore {
|
|||
input_names: [filename.clone()].into(),
|
||||
output_names: HashSet::new(),
|
||||
});
|
||||
for (reference_path, _reference_hash) in refs { // lazy...
|
||||
self.ingest_input(reference_path)?;
|
||||
}
|
||||
index
|
||||
}
|
||||
std::collections::btree_map::Entry::Occupied(e) => {
|
||||
self.files
|
||||
.get_mut(*e.get())
|
||||
.unwrap()
|
||||
.output_names
|
||||
.input_names
|
||||
.insert(filename.clone());
|
||||
*e.get()
|
||||
}
|
||||
};
|
||||
|
||||
if index == self.files.len() {
|
||||
}
|
||||
if index == self.files.len() {}
|
||||
|
||||
self.filenames.insert(filename, index);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/*
|
||||
pub fn ingest_output_local(&mut self, filename: PathBuf) -> anyhow::Result<()> {
|
||||
let stat = fs::metadata(&filename)?;
|
||||
if stat.is_dir() {
|
||||
|
@ -130,7 +165,7 @@ impl FileStore {
|
|||
}
|
||||
|
||||
let fp = fs::File::open(&filename)?;
|
||||
self.ingest_output(filename, fp);
|
||||
self.ingest_output(filename, fp)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -147,7 +182,7 @@ impl FileStore {
|
|||
std::collections::btree_map::Entry::Vacant(e) => {
|
||||
let index = self.files.len();
|
||||
e.insert(index);
|
||||
let format = self.parse_format(&mut content)?;
|
||||
let (format, refs) = self.parse_format(&mut content)?;
|
||||
self.files.push(FileStoreEntry {
|
||||
index,
|
||||
format,
|
||||
|
@ -171,77 +206,7 @@ impl FileStore {
|
|||
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
fn parse_format(&mut self, fp: &mut (impl io::Read + io::Seek)) -> anyhow::Result<FileFormat> {
|
||||
fp.seek(io::SeekFrom::Start(0))?;
|
||||
let mut buf = [0; 4];
|
||||
let count = read_exact_or_end(fp, &mut buf)?;
|
||||
let buf = &buf[..count];
|
||||
|
||||
Ok(match buf {
|
||||
[0x7f, b'E', b'L', b'F', ..] => {
|
||||
let read_cache = ReadCache::new(fp);
|
||||
let elf = object::File::parse(&read_cache)?;
|
||||
let endian = if elf.is_little_endian() {
|
||||
gimli::RunTimeEndian::Little
|
||||
} else {
|
||||
gimli::RunTimeEndian::Big
|
||||
};
|
||||
let arena_data = Arena::new();
|
||||
let mut load_section = |id: gimli::SectionId| -> Result<_, _> {
|
||||
load_file_section(id, &elf, endian, &arena_data)
|
||||
};
|
||||
let dwarf = gimli::Dwarf::load(&mut load_section).unwrap();
|
||||
let mut units = dwarf.units();
|
||||
let mut inputs = vec![];
|
||||
while let Ok(Some(unit)) = units.next() {
|
||||
let abbrev = dwarf.abbreviations(&unit)?;
|
||||
let mut entries = unit.entries(&abbrev);
|
||||
while let Some((_, entry)) = entries.next_dfs()? {
|
||||
if entry.tag() == DW_TAG_compile_unit {
|
||||
let mut basename = None;
|
||||
let mut dirname = None;
|
||||
if let Some(name) = entry.attr(constants::DW_AT_name)?.map(|a| a.value()) {
|
||||
if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) {
|
||||
basename = Some(PathBuf::from(name.to_string()?));
|
||||
}
|
||||
}
|
||||
if let Some(name) = entry.attr(constants::DW_AT_comp_dir)?.map(|a| a.value()) {
|
||||
if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) {
|
||||
dirname = Some(PathBuf::from(name.to_string()?));
|
||||
}
|
||||
}
|
||||
if let (Some(dirname), Some(basename)) = (dirname, basename) {
|
||||
inputs.push(dirname.join(basename));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let references = inputs.into_iter().map(|input| self.ingest_dependency_local(input)).collect::<Result<Vec<_>, _>>()?.into_iter().filter_map(|x| x).collect();
|
||||
FileFormat::ELF {
|
||||
references,
|
||||
}
|
||||
},
|
||||
_ => FileFormat::Other,
|
||||
})
|
||||
}
|
||||
|
||||
fn ingest_dependency_local(&mut self, filename: PathBuf) -> anyhow::Result<Option<usize>> {
|
||||
// TODO: this needs to try suffixes of the filename against the filepath table to see if it
|
||||
// was moved between compilation and ingestion
|
||||
let metadata = match fs::metadata(&filename) {
|
||||
Ok(m) => m,
|
||||
Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(None),
|
||||
Err(e) => return Err(e)?,
|
||||
};
|
||||
if !metadata.is_file() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let fp = fs::File::open(&filename)?;
|
||||
Ok(Some(self.ingest_output(filename, fp)?))
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
fn load_file_section<'input, 'arena, Endian: gimli::Endianity, R: object::ReadRef<'input>>(
|
||||
|
@ -272,3 +237,83 @@ fn read_exact_or_end(fp: &mut impl io::Read, buf: &mut [u8]) -> anyhow::Result<u
|
|||
}
|
||||
Ok(read_so_far)
|
||||
}
|
||||
|
||||
pub fn parse_format(fp: &mut (impl io::Read + io::Seek)) -> anyhow::Result<(FileFormat, BTreeSet<(PathBuf, Sha256Hash)>)> {
|
||||
fp.seek(io::SeekFrom::Start(0))?;
|
||||
let mut buf = [0; 4];
|
||||
let count = read_exact_or_end(fp, &mut buf)?;
|
||||
let buf = &buf[..count];
|
||||
|
||||
Ok(match buf {
|
||||
[0x7f, b'E', b'L', b'F', ..] => {
|
||||
let read_cache = ReadCache::new(fp);
|
||||
let elf = object::File::parse(&read_cache)?;
|
||||
let endian = if elf.is_little_endian() {
|
||||
gimli::RunTimeEndian::Little
|
||||
} else {
|
||||
gimli::RunTimeEndian::Big
|
||||
};
|
||||
let arena_data = Arena::new();
|
||||
let mut load_section = |id: gimli::SectionId| -> Result<_, _> {
|
||||
load_file_section(id, &elf, endian, &arena_data)
|
||||
};
|
||||
let dwarf = gimli::Dwarf::load(&mut load_section).unwrap();
|
||||
let mut units = dwarf.units();
|
||||
let mut inputs = vec![];
|
||||
while let Ok(Some(unit)) = units.next() {
|
||||
let abbrev = dwarf.abbreviations(&unit)?;
|
||||
let mut entries = unit.entries(&abbrev);
|
||||
while let Some((_, entry)) = entries.next_dfs()? {
|
||||
if entry.tag() == DW_TAG_compile_unit {
|
||||
let mut basename = None;
|
||||
let mut dirname = None;
|
||||
if let Some(name) =
|
||||
entry.attr(constants::DW_AT_name)?.map(|a| a.value())
|
||||
{
|
||||
if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) {
|
||||
basename = Some(PathBuf::from(name.to_string()?));
|
||||
}
|
||||
}
|
||||
if let Some(name) =
|
||||
entry.attr(constants::DW_AT_comp_dir)?.map(|a| a.value())
|
||||
{
|
||||
if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) {
|
||||
dirname = Some(PathBuf::from(name.to_string()?));
|
||||
}
|
||||
}
|
||||
if let (Some(dirname), Some(basename)) = (dirname, basename) {
|
||||
inputs.push(dirname.join(basename));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let references = inputs
|
||||
.into_iter()
|
||||
.map(|filename| -> anyhow::Result<_> {
|
||||
// TODO: this needs to try suffixes of the filename against the filepath table to see if it
|
||||
// was moved between compilation and ingestion. but how...
|
||||
let metadata = match fs::metadata(&filename) {
|
||||
Ok(m) => m,
|
||||
Err(_) => return Ok(None),
|
||||
};
|
||||
if !metadata.is_file() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let mut fp = fs::File::open(&filename)?;
|
||||
let mut h = Sha256::new();
|
||||
log::debug!("Hashing {}", filename.to_string_lossy());
|
||||
io::copy(&mut fp, &mut h)?;
|
||||
let result = Ok(Some((filename, h.finalize().into())));
|
||||
result
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?
|
||||
.into_iter()
|
||||
.filter_map(|x| x)
|
||||
.collect();
|
||||
(FileFormat::ELF, references)
|
||||
}
|
||||
_ => (FileFormat::Other, BTreeSet::new()),
|
||||
})
|
||||
}
|
||||
|
|
80
src/main.rs
80
src/main.rs
|
@ -1,5 +1,6 @@
|
|||
mod tracer;
|
||||
mod filestore;
|
||||
mod reports;
|
||||
mod tracer;
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
|
@ -13,39 +14,100 @@ struct Cli {
|
|||
|
||||
#[derive(Subcommand, Debug, Clone)]
|
||||
enum Subcommands {
|
||||
/// Run a command and record its execution
|
||||
Run {
|
||||
/// Any number of filepaths to treat as inputs. They will be hashed and accesses to
|
||||
/// equivalent files will be treated specially.
|
||||
#[arg(short, long)]
|
||||
input: Vec<PathBuf>,
|
||||
file_scope: Vec<PathBuf>,
|
||||
|
||||
/// The filepath to dump the json report to. will dump to stdout if unspecified.
|
||||
#[arg(short, long)]
|
||||
output: Option<PathBuf>,
|
||||
|
||||
/// Set this to
|
||||
#[arg(short, long)]
|
||||
mute: bool,
|
||||
|
||||
/// The command to run. Have fun!
|
||||
cmd: Vec<String>,
|
||||
}
|
||||
},
|
||||
/// Query from the report how in-scope items were used
|
||||
QueryParameters {
|
||||
/// The filepath of the report ot open. will read from stdin if unspecified.
|
||||
input: Option<PathBuf>,
|
||||
|
||||
/// The filepath to dump the json report to. will dump to stdout if unspecified.
|
||||
output: Option<PathBuf>,
|
||||
},
|
||||
InternalLaunch {
|
||||
machine: i32,
|
||||
connect: String,
|
||||
cmd: Vec<String>
|
||||
},
|
||||
}
|
||||
|
||||
fn main() {
|
||||
env_logger::init();
|
||||
let cli = Cli::parse();
|
||||
match cli.cmd {
|
||||
Subcommands::Run { input, output, cmd } => {
|
||||
Subcommands::Run {
|
||||
file_scope,
|
||||
output,
|
||||
cmd,
|
||||
mute,
|
||||
} => {
|
||||
let fp: Box<dyn std::io::Write> = if let Some(output) = &output {
|
||||
Box::new(std::fs::File::options().write(true).create(true).open(output).unwrap())
|
||||
Box::new(
|
||||
std::fs::File::options()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.open(output)
|
||||
.unwrap(),
|
||||
)
|
||||
} else {
|
||||
Box::new(std::io::stdout())
|
||||
};
|
||||
let mut t = tracer::Tracer::new(input).unwrap();
|
||||
t.start_root_process(cmd).unwrap();
|
||||
let t = tracer::server::Tracer::run(file_scope, cmd, mute).unwrap();
|
||||
|
||||
if output.is_none() {
|
||||
serde_json::to_writer_pretty(fp, &t.report).unwrap();
|
||||
serde_json::to_writer_pretty(fp, &t)
|
||||
} else {
|
||||
serde_json::to_writer(fp, &t.report).unwrap();
|
||||
serde_json::to_writer(fp, &t)
|
||||
}
|
||||
.expect("Could not serialize json trace report");
|
||||
}
|
||||
Subcommands::QueryParameters { input, output } => {
|
||||
let fp: Box<dyn std::io::Write> = if let Some(output) = &output {
|
||||
Box::new(
|
||||
std::fs::File::options()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.open(output)
|
||||
.unwrap(),
|
||||
)
|
||||
} else {
|
||||
Box::new(std::io::stdout())
|
||||
};
|
||||
|
||||
let in_report: tracer::types::TracerReport = if let Some(input) = &input {
|
||||
serde_json::from_reader(std::fs::File::open(input).unwrap())
|
||||
} else {
|
||||
serde_json::from_reader(std::io::stdin())
|
||||
}
|
||||
.expect("Could not deserialize json trace report");
|
||||
|
||||
let out_report = reports::parameters::run(&in_report).unwrap();
|
||||
|
||||
if output.is_none() {
|
||||
serde_json::to_writer_pretty(fp, &out_report)
|
||||
} else {
|
||||
serde_json::to_writer(fp, &out_report)
|
||||
}
|
||||
.expect("Could not serialize json parameter report");
|
||||
}
|
||||
Subcommands::InternalLaunch { machine, connect, cmd } => {
|
||||
tracer::client::TracerClient::run(machine, connect, cmd).expect("Tracing failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
pub mod parameters;
|
|
@ -0,0 +1,34 @@
|
|||
use std::path::PathBuf;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::tracer::types::TracerReport;
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct ParametersReport {
|
||||
pub files: Vec<ParametersReportFile>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct ParametersReportFile {
|
||||
source_name: PathBuf,
|
||||
used_names: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
pub fn run(report: &TracerReport) -> anyhow::Result<ParametersReport> {
|
||||
Ok(ParametersReport {
|
||||
files: report
|
||||
.files
|
||||
.files
|
||||
.iter()
|
||||
.filter_map(|rf| {
|
||||
(!rf.input_names.is_empty() && !rf.output_names.is_empty()).then(|| {
|
||||
ParametersReportFile {
|
||||
source_name: rf.input_names.iter().next().cloned().unwrap(),
|
||||
used_names: rf.output_names.iter().cloned().collect(),
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect(),
|
||||
})
|
||||
}
|
883
src/tracer.rs
883
src/tracer.rs
|
@ -1,883 +0,0 @@
|
|||
use std::{
|
||||
collections::HashMap,
|
||||
ffi::CString,
|
||||
ffi::OsString,
|
||||
os::unix::prelude::OsStringExt,
|
||||
path::PathBuf,
|
||||
process::exit,
|
||||
time::{Duration, Instant},
|
||||
fmt::{Display, Formatter},
|
||||
};
|
||||
|
||||
use core::fmt;
|
||||
|
||||
use nix::{
|
||||
errno::Errno,
|
||||
libc::{pid_t, raise, tcsetpgrp, AT_EMPTY_PATH, SIGSTOP, STDIN_FILENO, AT_FDCWD},
|
||||
sys::{
|
||||
ptrace::{self, traceme, AddressType},
|
||||
signal::Signal,
|
||||
wait::{waitpid, WaitPidFlag, WaitStatus},
|
||||
},
|
||||
unistd::{execvp, getpid, setpgid, ForkResult},
|
||||
};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::filestore::FileStore;
|
||||
|
||||
#[derive(Copy, Clone, Serialize, Deserialize, Eq, PartialEq, Debug, Hash)]
|
||||
pub struct Pid(i32);
|
||||
|
||||
impl From<nix::unistd::Pid> for Pid {
|
||||
fn from(value: nix::unistd::Pid) -> Self {
|
||||
Self(value.as_raw())
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<nix::unistd::Pid> for Pid {
|
||||
fn into(self) -> nix::unistd::Pid {
|
||||
nix::unistd::Pid::from_raw(self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Pid {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_generic_string<TString>(
|
||||
pid: Pid,
|
||||
address: AddressType,
|
||||
ctor: impl Fn(Vec<u8>) -> TString,
|
||||
) -> anyhow::Result<TString> {
|
||||
let mut buf = Vec::new();
|
||||
let mut address = address;
|
||||
const WORD_SIZE: usize = 8; // FIXME
|
||||
loop {
|
||||
let word = match ptrace::read(pid.into(), address) {
|
||||
Err(e) => {
|
||||
log::warn!("Cannot read tracee {pid} memory {address:?}: {e}");
|
||||
return Ok(ctor(buf));
|
||||
}
|
||||
Ok(word) => word,
|
||||
};
|
||||
let word_bytes = word.to_ne_bytes();
|
||||
for &byte in word_bytes.iter() {
|
||||
if byte == 0 {
|
||||
return Ok(ctor(buf));
|
||||
}
|
||||
buf.push(byte);
|
||||
}
|
||||
address = unsafe { address.add(WORD_SIZE) };
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn read_cstring(pid: Pid, address: AddressType) -> anyhow::Result<CString> {
|
||||
read_generic_string(pid, address, |x| CString::new(x).unwrap())
|
||||
}
|
||||
|
||||
pub fn read_pathbuf(pid: Pid, address: AddressType) -> anyhow::Result<PathBuf> {
|
||||
read_generic_string(pid, address, |x| PathBuf::from(OsString::from_vec(x)))
|
||||
}
|
||||
|
||||
pub fn read_string(pid: Pid, address: AddressType) -> anyhow::Result<String> {
|
||||
// Waiting on https://github.com/rust-lang/libs-team/issues/116
|
||||
read_generic_string(pid, address, |x| String::from_utf8_lossy(&x).to_string())
|
||||
}
|
||||
|
||||
pub fn read_null_ended_array<TItem>(
|
||||
pid: Pid,
|
||||
mut address: AddressType,
|
||||
reader: impl Fn(Pid, AddressType) -> anyhow::Result<TItem>,
|
||||
) -> anyhow::Result<Vec<TItem>> {
|
||||
let mut res = Vec::new();
|
||||
const WORD_SIZE: usize = 8; // FIXME
|
||||
loop {
|
||||
let ptr = match ptrace::read(pid.into(), address) {
|
||||
Err(e) => {
|
||||
log::warn!("Cannot read tracee {pid} memory {address:?}: {e}");
|
||||
return Ok(res);
|
||||
}
|
||||
Ok(ptr) => ptr,
|
||||
};
|
||||
if ptr == 0 {
|
||||
return Ok(res);
|
||||
} else {
|
||||
res.push(reader(pid, ptr as AddressType)?);
|
||||
}
|
||||
address = unsafe { address.add(WORD_SIZE) };
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn read_cstring_array(pid: Pid, address: AddressType) -> anyhow::Result<Vec<CString>> {
|
||||
read_null_ended_array(pid, address, read_cstring)
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn read_string_array(pid: Pid, address: AddressType) -> anyhow::Result<Vec<String>> {
|
||||
read_null_ended_array(pid, address, read_string)
|
||||
}
|
||||
|
||||
macro_rules! syscall_no_from_regs {
|
||||
($regs:ident) => {
|
||||
$regs.orig_rax as i64
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! syscall_res_from_regs {
|
||||
($regs:ident) => {
|
||||
$regs.rax as i64
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! syscall_arg {
|
||||
($regs:ident, 0) => {
|
||||
$regs.rdi
|
||||
};
|
||||
($regs:ident, 1) => {
|
||||
$regs.rsi
|
||||
};
|
||||
($regs:ident, 2) => {
|
||||
$regs.rdx
|
||||
};
|
||||
($regs:ident, 3) => {
|
||||
$regs.r10
|
||||
};
|
||||
($regs:ident, 4) => {
|
||||
$regs.r8
|
||||
};
|
||||
($regs:ident, 5) => {
|
||||
$regs.r9
|
||||
};
|
||||
}
|
||||
|
||||
pub fn read_argv(pid: Pid) -> anyhow::Result<Vec<CString>> {
|
||||
let filename = format!("/proc/{pid}/cmdline");
|
||||
let buf = std::fs::read(filename)?;
|
||||
Ok(buf
|
||||
.split(|&c| c == 0)
|
||||
.map(CString::new)
|
||||
.collect::<Result<Vec<_>, _>>()?)
|
||||
}
|
||||
|
||||
pub fn read_comm(pid: Pid) -> anyhow::Result<String> {
|
||||
let filename = format!("/proc/{pid}/comm");
|
||||
let mut buf = std::fs::read(filename)?;
|
||||
buf.pop(); // remove trailing newline
|
||||
Ok(String::from_utf8(buf)?)
|
||||
}
|
||||
|
||||
pub fn read_cwd(pid: Pid) -> std::io::Result<PathBuf> {
|
||||
let filename = format!("/proc/{pid}/cwd");
|
||||
let buf = std::fs::read_link(filename)?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
pub fn read_fd(pid: Pid, fd: i32) -> std::io::Result<PathBuf> {
|
||||
if fd == AT_FDCWD {
|
||||
return read_cwd(pid);
|
||||
}
|
||||
let filename = format!("/proc/{pid}/fd/{fd}");
|
||||
std::fs::read_link(filename)
|
||||
}
|
||||
|
||||
/*
|
||||
#[derive(Debug)]
|
||||
pub enum Interpreter {
|
||||
None,
|
||||
Shebang(String),
|
||||
ExecutableUnaccessible,
|
||||
Error(io::Error),
|
||||
}
|
||||
|
||||
impl Display for Interpreter {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Interpreter::None => write!(f, "none"),
|
||||
Interpreter::Shebang(s) => write!(f, "{:?}", s),
|
||||
Interpreter::ExecutableUnaccessible => {
|
||||
write!(f, "executable unaccessible")
|
||||
}
|
||||
Interpreter::Error(e) => write!(f, "(err: {e})"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_interpreter_recursive(exe: impl AsRef<Path>) -> Vec<Interpreter> {
|
||||
let mut exe = Cow::Borrowed(exe.as_ref());
|
||||
let mut interpreters = Vec::new();
|
||||
loop {
|
||||
match read_interpreter(exe.as_ref()) {
|
||||
Interpreter::Shebang(shebang) => {
|
||||
exe = Cow::Owned(PathBuf::from(
|
||||
shebang.split_ascii_whitespace().next().unwrap_or(""),
|
||||
));
|
||||
interpreters.push(Interpreter::Shebang(shebang));
|
||||
}
|
||||
Interpreter::None => break,
|
||||
err => {
|
||||
interpreters.push(err);
|
||||
break;
|
||||
}
|
||||
};
|
||||
}
|
||||
interpreters
|
||||
}
|
||||
|
||||
pub fn read_interpreter(exe: &Path) -> Interpreter {
|
||||
fn err_to_interpreter(e: io::Error) -> Interpreter {
|
||||
if e.kind() == io::ErrorKind::PermissionDenied || e.kind() == io::ErrorKind::NotFound {
|
||||
Interpreter::ExecutableUnaccessible
|
||||
} else {
|
||||
Interpreter::Error(e)
|
||||
}
|
||||
}
|
||||
let file = match std::fs::File::open(exe) {
|
||||
Ok(file) => file,
|
||||
Err(e) => return err_to_interpreter(e),
|
||||
};
|
||||
let mut reader = BufReader::new(file);
|
||||
// First, check if it's a shebang script
|
||||
let mut buf = [0u8; 2];
|
||||
|
||||
if let Err(e) = reader.read_exact(&mut buf) {
|
||||
return Interpreter::Error(e);
|
||||
};
|
||||
if &buf != b"#!" {
|
||||
return Interpreter::None;
|
||||
}
|
||||
// Read the rest of the line
|
||||
let mut buf = Vec::new();
|
||||
|
||||
if let Err(e) = reader.read_until(b'\n', &mut buf) {
|
||||
return Interpreter::Error(e);
|
||||
};
|
||||
// Get trimed shebang line [start, end) indices
|
||||
// If the shebang line is empty, we don't care
|
||||
let start = buf
|
||||
.iter()
|
||||
.position(|&c| !c.is_ascii_whitespace())
|
||||
.unwrap_or(0);
|
||||
let end = buf
|
||||
.iter()
|
||||
.rposition(|&c| !c.is_ascii_whitespace())
|
||||
.map(|x| x + 1)
|
||||
.unwrap_or(buf.len());
|
||||
let shebang = String::from_utf8_lossy(&buf[start..end]);
|
||||
Interpreter::Shebang(shebang.into_owned())
|
||||
}
|
||||
*/
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ProcessStateStore {
|
||||
processes: HashMap<Pid, Vec<ProcessState>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ProcessState {
|
||||
pub pid: Pid,
|
||||
pub ppid: Option<Pid>,
|
||||
pub status: ProcessStatus,
|
||||
pub start_time: u64,
|
||||
pub argv: Vec<CString>,
|
||||
pub comm: String,
|
||||
pub presyscall: bool,
|
||||
pub is_exec_successful: bool,
|
||||
pub syscall: i64,
|
||||
pub pending_syscall_event: Vec<Event>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum ProcessStatus {
|
||||
SigstopReceived,
|
||||
PtraceForkEventReceived,
|
||||
Running,
|
||||
Exited(i32),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ExecData {
|
||||
pub filename: PathBuf,
|
||||
pub argv: Vec<String>,
|
||||
pub envp: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
//pub interpreters: Vec<Interpreter>,
|
||||
}
|
||||
|
||||
impl ProcessStateStore {
|
||||
pub fn insert(&mut self, state: ProcessState) {
|
||||
self.processes.entry(state.pid).or_default().push(state);
|
||||
}
|
||||
|
||||
pub fn get_current_mut(&mut self, pid: Pid) -> Option<&mut ProcessState> {
|
||||
// The last process in the vector is the current process
|
||||
// println!("Getting {pid}");
|
||||
self.processes.get_mut(&pid)?.last_mut()
|
||||
}
|
||||
}
|
||||
|
||||
impl ProcessState {
|
||||
pub fn new(pid: Pid, start_time: u64) -> anyhow::Result<Self> {
|
||||
Ok(Self {
|
||||
pid,
|
||||
ppid: None,
|
||||
status: ProcessStatus::Running,
|
||||
comm: read_comm(pid)?,
|
||||
argv: read_argv(pid)?,
|
||||
start_time,
|
||||
presyscall: true,
|
||||
is_exec_successful: false,
|
||||
syscall: -1,
|
||||
pending_syscall_event: vec![],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum Event {
|
||||
Fork { child: Pid },
|
||||
Exec { prog: PathBuf },
|
||||
Exit { code: i32 },
|
||||
FdOpen { fd: i32, source: FdSource },
|
||||
FdDup { oldfd: i32, newfd: i32 },
|
||||
FdClose { fd: i32 },
|
||||
FdRead { fd: i32 },
|
||||
FdWrite { fd: i32 },
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum FdSource {
|
||||
File { path: PathBuf },
|
||||
Tty,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct Identifier {
|
||||
machine: i32,
|
||||
pid: Pid,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct LogEntry {
|
||||
ident: Identifier,
|
||||
event: Event,
|
||||
timestamp: Duration,
|
||||
}
|
||||
|
||||
impl Display for LogEntry {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"[{}.{:03} m{}p{}] {}",
|
||||
self.timestamp.as_secs(),
|
||||
self.timestamp.as_millis() % 1000,
|
||||
self.ident.machine,
|
||||
self.ident.pid,
|
||||
self.event
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for FdSource {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
FdSource::File { path } => write!(f, "file {}", path.to_string_lossy()),
|
||||
FdSource::Tty => write!(f, "the terminal"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Event {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Event::Fork { child } => write!(f, "fork {child}"),
|
||||
Event::Exec { prog } => write!(f, "exec {}", prog.to_string_lossy()),
|
||||
Event::Exit { code } => write!(f, "exit with {code}"),
|
||||
Event::FdOpen { fd, source } => write!(f, "open fd {fd} from {source}"),
|
||||
Event::FdDup { oldfd, newfd } => write!(f, "dup fd {oldfd} to {newfd}"),
|
||||
Event::FdClose { fd } => write!(f, "close fd {fd}"),
|
||||
Event::FdRead { fd } => write!(f, "read from fd {fd}"),
|
||||
Event::FdWrite { fd } => write!(f, "write to fd {fd}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Tracer {
|
||||
pub store: ProcessStateStore,
|
||||
pub start_time: Instant,
|
||||
pub report: TracerReport,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TracerReport {
|
||||
pub log: Vec<LogEntry>,
|
||||
pub files: FileStore,
|
||||
}
|
||||
|
||||
fn ptrace_syscall(pid: Pid, sig: Option<Signal>) -> Result<(), Errno> {
|
||||
match ptrace::syscall(pid.into(), sig) {
|
||||
Err(Errno::ESRCH) => {
|
||||
log::info!("ptrace syscall failed: {pid}, ESRCH, child probably gone!");
|
||||
Ok(())
|
||||
}
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
|
||||
impl Tracer {
|
||||
pub fn log(&mut self, ident: Identifier, event: Event) {
|
||||
self.report.log.push(LogEntry {
|
||||
ident,
|
||||
event,
|
||||
timestamp: Instant::now().duration_since(self.start_time),
|
||||
});
|
||||
}
|
||||
|
||||
pub fn log_root(&mut self, pid: Pid, event: Event) {
|
||||
self.log(Identifier { pid, machine: 0 }, event);
|
||||
}
|
||||
|
||||
pub fn new(input: Vec<PathBuf>) -> anyhow::Result<Self> {
|
||||
let files = FileStore::new(input)?;
|
||||
Ok(Self {
|
||||
store: ProcessStateStore::default(),
|
||||
start_time: Instant::now(),
|
||||
report: TracerReport {
|
||||
log: vec![],
|
||||
files,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn drain_syscall_events(&mut self, pid: Pid, mut filter: Box<dyn FnMut(&mut Event)>) {
|
||||
let p = self.store.get_current_mut(pid).unwrap();
|
||||
for mut event in p.pending_syscall_event.drain(..) {
|
||||
(filter)(&mut event);
|
||||
self.report.log.push(LogEntry {
|
||||
ident: Identifier { pid, machine: 0 },
|
||||
event,
|
||||
timestamp: Instant::now().duration_since(self.start_time),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start_root_process(&mut self, args: Vec<String>) -> anyhow::Result<()> {
|
||||
log::trace!("start_root_process: {:?}", args);
|
||||
|
||||
if let ForkResult::Parent { child: root_child } = unsafe { nix::unistd::fork()? } {
|
||||
waitpid(root_child, Some(WaitPidFlag::WSTOPPED))?; // wait for child to stop
|
||||
let root_child = root_child.into();
|
||||
log::trace!("child stopped");
|
||||
let mut root_child_state = ProcessState::new(root_child, 0)?;
|
||||
root_child_state.ppid = Some(getpid().into());
|
||||
self.store.insert(root_child_state);
|
||||
// Set foreground process group of the terminal
|
||||
if -1 == unsafe { tcsetpgrp(STDIN_FILENO, root_child.0) } {
|
||||
return Err(Errno::last().into());
|
||||
}
|
||||
// restart child
|
||||
log::trace!("resuming child");
|
||||
let ptrace_opts = {
|
||||
use nix::sys::ptrace::Options;
|
||||
Options::PTRACE_O_TRACEEXEC
|
||||
| Options::PTRACE_O_TRACEEXIT
|
||||
| Options::PTRACE_O_EXITKILL
|
||||
| Options::PTRACE_O_TRACESYSGOOD
|
||||
| Options::PTRACE_O_TRACEFORK
|
||||
| Options::PTRACE_O_TRACECLONE
|
||||
| Options::PTRACE_O_TRACEVFORK
|
||||
};
|
||||
ptrace::setoptions(root_child.into(), ptrace_opts)?;
|
||||
// restart child
|
||||
self.seccomp_aware_cont(root_child)?;
|
||||
loop {
|
||||
let status = waitpid(None, Some(WaitPidFlag::__WALL))?;
|
||||
// log::trace!("waitpid: {:?}", status);
|
||||
match status {
|
||||
WaitStatus::Stopped(pid, sig) => {
|
||||
let pid = pid.into();
|
||||
log::trace!("stopped: {pid}, sig {:?}", sig);
|
||||
match sig {
|
||||
Signal::SIGSTOP => {
|
||||
log::trace!("sigstop event, child: {pid}");
|
||||
if let Some(state) = self.store.get_current_mut(pid) {
|
||||
if state.status == ProcessStatus::PtraceForkEventReceived {
|
||||
log::trace!("sigstop event received after ptrace fork event, pid: {pid}");
|
||||
state.status = ProcessStatus::Running;
|
||||
self.seccomp_aware_cont(pid)?;
|
||||
} else if pid != root_child {
|
||||
log::error!("Unexpected SIGSTOP: {state:?}")
|
||||
}
|
||||
} else {
|
||||
log::trace!("sigstop event received before ptrace fork event, pid: {pid}");
|
||||
let mut state = ProcessState::new(pid, 0)?;
|
||||
state.status = ProcessStatus::SigstopReceived;
|
||||
self.store.insert(state);
|
||||
}
|
||||
// https://stackoverflow.com/questions/29997244/occasionally-missing-ptrace-event-vfork-when-running-ptrace
|
||||
// DO NOT send PTRACE_SYSCALL until we receive the PTRACE_EVENT_FORK, etc.
|
||||
}
|
||||
Signal::SIGCHLD => {
|
||||
// From lurk:
|
||||
//
|
||||
// The SIGCHLD signal is sent to a process when a child process terminates, interrupted, or resumes after being interrupted
|
||||
// This means, that if our tracee forked and said fork exits before the parent, the parent will get stopped.
|
||||
// Therefor issue a PTRACE_SYSCALL request to the parent to continue execution.
|
||||
// This is also important if we trace without the following forks option.
|
||||
self.seccomp_aware_cont_with_signal(pid, Signal::SIGCHLD)?;
|
||||
}
|
||||
_ => {
|
||||
// Just deliver the signal to tracee
|
||||
self.seccomp_aware_cont_with_signal(pid, sig)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
WaitStatus::Exited(pid, code) => {
|
||||
let pid = pid.into();
|
||||
log::trace!("exited: pid {}, code {:?}", pid, code);
|
||||
self.log_root(pid, Event::Exit { code });
|
||||
self.store.get_current_mut(pid).unwrap().status =
|
||||
ProcessStatus::Exited(code);
|
||||
if pid == root_child {
|
||||
break;
|
||||
}
|
||||
}
|
||||
WaitStatus::PtraceEvent(pid, sig, evt) => {
|
||||
log::trace!("ptrace event: {:?} {:?}", sig, evt);
|
||||
match evt {
|
||||
nix::libc::PTRACE_EVENT_FORK
|
||||
| nix::libc::PTRACE_EVENT_VFORK
|
||||
| nix::libc::PTRACE_EVENT_CLONE => {
|
||||
let new_child = Pid(ptrace::getevent(pid.into())? as pid_t);
|
||||
log::trace!(
|
||||
"ptrace fork event, evt {evt}, pid: {pid}, child: {new_child}"
|
||||
);
|
||||
self.log_root(pid.into(), Event::Fork { child: new_child });
|
||||
if let Some(state) = self.store.get_current_mut(new_child) {
|
||||
if state.status == ProcessStatus::SigstopReceived {
|
||||
log::trace!("ptrace fork event received after sigstop, pid: {pid}, child: {new_child}");
|
||||
state.status = ProcessStatus::Running;
|
||||
state.ppid = Some(pid.into());
|
||||
self.seccomp_aware_cont(new_child)?;
|
||||
} else if new_child != root_child {
|
||||
log::error!("Unexpected fork event: {state:?}")
|
||||
}
|
||||
} else {
|
||||
log::trace!("ptrace fork event received before sigstop, pid: {pid}, child: {new_child}");
|
||||
let mut state = ProcessState::new(new_child, 0)?;
|
||||
state.status = ProcessStatus::PtraceForkEventReceived;
|
||||
state.ppid = Some(pid.into());
|
||||
self.store.insert(state);
|
||||
}
|
||||
// Resume parent
|
||||
self.seccomp_aware_cont(pid.into())?;
|
||||
}
|
||||
nix::libc::PTRACE_EVENT_EXEC => {
|
||||
log::trace!("exec event");
|
||||
let p = self.store.get_current_mut(pid.into()).unwrap();
|
||||
assert!(!p.presyscall);
|
||||
// After execve or execveat, in syscall exit event,
|
||||
// the registers might be clobbered(e.g. aarch64).
|
||||
// So we need to determine whether exec is successful here.
|
||||
// PTRACE_EVENT_EXEC only happens for successful exec.
|
||||
p.is_exec_successful = true;
|
||||
let path = p.pending_syscall_event.iter().find_map(|e| match e { Event::Exec { prog, .. } => Some(prog.clone()), _ => None }).unwrap();
|
||||
self.report.files.ingest_output_local(path)?;
|
||||
self.drain_syscall_events(pid.into(), Box::new(|_| {}));
|
||||
// Don't use seccomp_aware_cont here because that will skip the next syscall exit stop
|
||||
self.syscall_enter_cont(pid.into())?;
|
||||
}
|
||||
nix::libc::PTRACE_EVENT_EXIT => {
|
||||
log::trace!("exit event");
|
||||
self.seccomp_aware_cont(pid.into())?;
|
||||
}
|
||||
nix::libc::PTRACE_EVENT_SECCOMP => {
|
||||
log::trace!("seccomp event");
|
||||
self.on_syscall_enter(pid.into())?;
|
||||
}
|
||||
_ => {
|
||||
log::trace!("other event");
|
||||
self.seccomp_aware_cont(pid.into())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
WaitStatus::Signaled(pid, sig, _) => {
|
||||
let pid: Pid = pid.into();
|
||||
log::debug!("signaled: {pid}, {:?}", sig);
|
||||
if pid == root_child {
|
||||
exit(128 + (sig as i32))
|
||||
}
|
||||
}
|
||||
WaitStatus::PtraceSyscall(pid) => {
|
||||
let pid = pid.into();
|
||||
let presyscall = self.store.get_current_mut(pid).unwrap().presyscall;
|
||||
if presyscall {
|
||||
self.on_syscall_enter(pid)?;
|
||||
} else {
|
||||
self.on_syscall_exit(pid)?;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let me = getpid();
|
||||
setpgid(me, me)?;
|
||||
traceme()?;
|
||||
if 0 != unsafe { raise(SIGSTOP) } {
|
||||
log::error!("raise failed!");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
let args = args
|
||||
.into_iter()
|
||||
.map(CString::new)
|
||||
.collect::<Result<Vec<CString>, _>>()?;
|
||||
|
||||
execvp(&args[0], &args)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn on_syscall_enter(&mut self, pid: Pid) -> anyhow::Result<()> {
|
||||
let p = self.store.get_current_mut(pid).unwrap();
|
||||
p.presyscall = !p.presyscall;
|
||||
// SYSCALL ENTRY
|
||||
let regs = match ptrace::getregs(pid.into()) {
|
||||
Ok(regs) => regs,
|
||||
Err(Errno::ESRCH) => {
|
||||
log::info!("ptrace getregs failed: {pid}, ESRCH, child probably gone!");
|
||||
return Ok(());
|
||||
}
|
||||
e => e?,
|
||||
};
|
||||
let syscallno = syscall_no_from_regs!(regs);
|
||||
p.syscall = syscallno;
|
||||
// log::trace!("pre syscall: {syscallno}");
|
||||
match syscallno {
|
||||
nix::libc::SYS_execveat => {
|
||||
log::trace!("pre execveat");
|
||||
// int execveat(int dirfd, const char *pathname,
|
||||
// char *const _Nullable argv[],
|
||||
// char *const _Nullable envp[],
|
||||
// int flags);
|
||||
let dirfd = syscall_arg!(regs, 0) as i32;
|
||||
let pathname = read_string(pid, syscall_arg!(regs, 1) as AddressType)?;
|
||||
//let argv = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?;
|
||||
//let envp = read_string_array(pid, syscall_arg!(regs, 3) as AddressType)?;
|
||||
let flags = syscall_arg!(regs, 4) as i32;
|
||||
let filename = resolve_filename_at_fd(pid, pathname, dirfd, flags)?;
|
||||
//let interpreters = read_interpreter_recursive(&filename);
|
||||
p.pending_syscall_event.push(Event::Exec { prog: filename });
|
||||
}
|
||||
nix::libc::SYS_execve => {
|
||||
log::trace!("pre execve");
|
||||
let filename = read_pathbuf(pid, syscall_arg!(regs, 0) as AddressType)?;
|
||||
//let argv = read_string_array(pid, syscall_arg!(regs, 1) as AddressType)?;
|
||||
//let envp = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?;
|
||||
//let interpreters = read_interpreter_recursive(&filename);
|
||||
p.pending_syscall_event.push(Event::Exec { prog: filename });
|
||||
}
|
||||
nix::libc::SYS_open => {
|
||||
let path = read_pathbuf(pid, syscall_arg!(regs, 0) as AddressType)?;
|
||||
p.pending_syscall_event.push(Event::FdOpen {
|
||||
source: FdSource::File { path },
|
||||
fd: -1,
|
||||
});
|
||||
}
|
||||
nix::libc::SYS_openat => {
|
||||
let dirfd = syscall_arg!(regs, 0) as i32;
|
||||
let pathname = read_string(pid, syscall_arg!(regs, 1) as AddressType)?;
|
||||
let flags = syscall_arg!(regs, 2) as i32;
|
||||
let path = resolve_filename_at_fd(pid, pathname, dirfd, flags)?;
|
||||
p.pending_syscall_event.push(Event::FdOpen {
|
||||
source: FdSource::File { path },
|
||||
fd: 0,
|
||||
});
|
||||
}
|
||||
nix::libc::SYS_read
|
||||
| nix::libc::SYS_readv
|
||||
| nix::libc::SYS_preadv
|
||||
| nix::libc::SYS_preadv2 => {
|
||||
let fd = syscall_arg!(regs, 0) as i32;
|
||||
p.pending_syscall_event.push(Event::FdRead { fd });
|
||||
}
|
||||
nix::libc::SYS_write
|
||||
| nix::libc::SYS_writev
|
||||
| nix::libc::SYS_pwritev
|
||||
| nix::libc::SYS_pwritev2 => {
|
||||
let fd = syscall_arg!(regs, 0) as i32;
|
||||
p.pending_syscall_event.push(Event::FdWrite { fd });
|
||||
}
|
||||
nix::libc::SYS_dup | nix::libc::SYS_dup2 | nix::libc::SYS_dup3 => {
|
||||
let oldfd = syscall_arg!(regs, 0) as i32;
|
||||
p.pending_syscall_event
|
||||
.push(Event::FdDup { oldfd, newfd: -1 });
|
||||
}
|
||||
nix::libc::SYS_fcntl => {
|
||||
let fd = syscall_arg!(regs, 0) as i32;
|
||||
let cmd = syscall_arg!(regs, 1) as i32;
|
||||
match cmd {
|
||||
nix::libc::F_DUPFD => p.pending_syscall_event.push(Event::FdDup {
|
||||
oldfd: fd,
|
||||
newfd: -1,
|
||||
}),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
nix::libc::SYS_close => {
|
||||
let fd = syscall_arg!(regs, 0) as i32;
|
||||
p.pending_syscall_event.push(Event::FdClose { fd });
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
self.syscall_enter_cont(pid)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn on_syscall_exit(&mut self, pid: Pid) -> anyhow::Result<()> {
|
||||
// SYSCALL EXIT
|
||||
// log::trace!("post syscall {}", p.syscall);
|
||||
let p = self.store.get_current_mut(pid).unwrap();
|
||||
p.presyscall = !p.presyscall;
|
||||
let regs = match ptrace::getregs(pid.into()) {
|
||||
Ok(regs) => regs,
|
||||
Err(Errno::ESRCH) => {
|
||||
log::info!("ptrace getregs failed: {pid}, ESRCH, child probably gone!");
|
||||
return Ok(());
|
||||
}
|
||||
e => e?,
|
||||
};
|
||||
let result = syscall_res_from_regs!(regs);
|
||||
|
||||
let filter: Option<Box<dyn FnMut(&mut Event)>> = match p.syscall {
|
||||
nix::libc::SYS_execve => {
|
||||
log::trace!("post execve");
|
||||
// SAFETY: p.preexecve is false, so p.exec_data is Some
|
||||
p.is_exec_successful = false;
|
||||
// update comm
|
||||
p.comm = read_comm(pid)?;
|
||||
None
|
||||
}
|
||||
nix::libc::SYS_execveat => {
|
||||
log::trace!("post execveat");
|
||||
p.is_exec_successful = false;
|
||||
// update comm
|
||||
p.comm = read_comm(pid)?;
|
||||
None
|
||||
}
|
||||
nix::libc::SYS_open | nix::libc::SYS_openat => {
|
||||
if result >= 0 {
|
||||
for pending in p.pending_syscall_event.iter_mut() {
|
||||
if let Event::FdOpen { source: FdSource::File { path }, .. } = pending {
|
||||
self.report.files.ingest_output_local(path.clone())?;
|
||||
}
|
||||
}
|
||||
Some(Box::new(move |event| match event {
|
||||
Event::FdOpen {
|
||||
fd: ref mut dest, ..
|
||||
} => {
|
||||
*dest = result as i32;
|
||||
}
|
||||
_ => {}
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
nix::libc::SYS_dup | nix::libc::SYS_dup2 | nix::libc::SYS_dup3 => {
|
||||
if result >= 0 {
|
||||
Some(Box::new(move |event| match event {
|
||||
Event::FdDup {
|
||||
newfd: ref mut dest,
|
||||
..
|
||||
} => {
|
||||
*dest = result as i32;
|
||||
}
|
||||
_ => {}
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
nix::libc::SYS_fcntl => {
|
||||
if result >= 0 {
|
||||
Some(Box::new(move |event| match event {
|
||||
Event::FdDup {
|
||||
newfd: ref mut dest,
|
||||
..
|
||||
} => {
|
||||
*dest = result as i32;
|
||||
}
|
||||
_ => {}
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if result >= 0 {
|
||||
Some(Box::new(|_| {}))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
if let Some(filter) = filter {
|
||||
self.drain_syscall_events(pid, filter);
|
||||
} else {
|
||||
p.pending_syscall_event.clear();
|
||||
}
|
||||
self.seccomp_aware_cont(pid)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn syscall_enter_cont(&self, pid: Pid) -> Result<(), Errno> {
|
||||
ptrace_syscall(pid, None)
|
||||
}
|
||||
|
||||
/// When seccomp-bpf is enabled, we use ptrace::cont instead of ptrace::syscall to improve performance.
|
||||
/// Then the next syscall-entry stop is skipped and the seccomp stop is used as the syscall entry stop.
|
||||
fn seccomp_aware_cont(&self, pid: Pid) -> Result<(), Errno> {
|
||||
ptrace_syscall(pid, None)
|
||||
}
|
||||
|
||||
fn seccomp_aware_cont_with_signal(&self, pid: Pid, sig: Signal) -> Result<(), Errno> {
|
||||
ptrace_syscall(pid, Some(sig))
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_filename_at_fd(
|
||||
pid: Pid,
|
||||
pathname: String,
|
||||
dirfd: i32,
|
||||
flags: i32,
|
||||
) -> anyhow::Result<PathBuf> {
|
||||
let pathname_is_empty = pathname.is_empty();
|
||||
let pathname = PathBuf::from(pathname);
|
||||
Ok(
|
||||
match (
|
||||
pathname.is_absolute(),
|
||||
pathname_is_empty && ((flags & AT_EMPTY_PATH) != 0),
|
||||
) {
|
||||
(true, _) => {
|
||||
// If pathname is absolute, then dirfd is ignored.
|
||||
pathname
|
||||
}
|
||||
(false, true) => {
|
||||
// If pathname is an empty string and the AT_EMPTY_PATH flag is specified, then the file descriptor dirfd
|
||||
// specifies the file to be executed
|
||||
read_fd(pid, dirfd)?
|
||||
}
|
||||
(false, false) => {
|
||||
// pathname is relative to dirfd
|
||||
let dir = read_fd(pid, dirfd)?;
|
||||
dir.join(pathname)
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
|
@ -0,0 +1,930 @@
|
|||
use std::{
|
||||
collections::{BTreeSet, HashMap}, ffi::CString, ffi::OsString, io::Write, net::TcpStream, os::unix::prelude::OsStringExt, path::PathBuf, process::exit, time::Instant
|
||||
};
|
||||
|
||||
use nix::{
|
||||
errno::Errno,
|
||||
libc::{pid_t, raise, tcsetpgrp, AT_EMPTY_PATH, AT_FDCWD, SIGSTOP, STDIN_FILENO, user_regs_struct},
|
||||
sys::{
|
||||
ptrace::{self, traceme, AddressType},
|
||||
signal::Signal,
|
||||
wait::{waitpid, WaitPidFlag, WaitStatus},
|
||||
},
|
||||
unistd::{execvp, getpid, setpgid, ForkResult},
|
||||
};
|
||||
use serde_json::de::IoRead;
|
||||
use sha2::{Sha256, Digest};
|
||||
|
||||
use crate::filestore::{parse_format, Sha256Hash};
|
||||
|
||||
use super::{types::*, docker::instrument_docker_run_execve};
|
||||
|
||||
const WORD_SIZE: usize = 8; // FIXME
|
||||
|
||||
pub fn read_generic_string<TString>(
|
||||
pid: Pid,
|
||||
address: AddressType,
|
||||
ctor: impl Fn(Vec<u8>) -> TString,
|
||||
) -> anyhow::Result<TString> {
|
||||
let mut buf = Vec::new();
|
||||
let mut address = address;
|
||||
loop {
|
||||
let word = match ptrace::read(pid.into(), address) {
|
||||
Err(e) => {
|
||||
log::warn!("Cannot read tracee {pid} memory {address:?}: {e}");
|
||||
return Ok(ctor(buf));
|
||||
}
|
||||
Ok(word) => word,
|
||||
};
|
||||
let word_bytes = word.to_ne_bytes();
|
||||
for &byte in word_bytes.iter() {
|
||||
if byte == 0 {
|
||||
return Ok(ctor(buf));
|
||||
}
|
||||
buf.push(byte);
|
||||
}
|
||||
address = unsafe { address.add(WORD_SIZE) };
|
||||
}
|
||||
}
|
||||
|
||||
pub fn write_bytes(pid: Pid, mut address: AddressType, data: &[u8]) -> anyhow::Result<()> {
|
||||
assert_eq!(address as usize % WORD_SIZE, 0);
|
||||
for chunk in data.chunks(WORD_SIZE) {
|
||||
let chunk: Vec<_> = chunk.into_iter().copied().chain(std::iter::repeat(0).take(WORD_SIZE - chunk.len())).collect();
|
||||
let word = i64::from_ne_bytes(chunk.try_into().unwrap());
|
||||
ptrace::write(pid.into(), address, word)?;
|
||||
address = address.wrapping_byte_add(WORD_SIZE);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn read_cstring(pid: Pid, address: AddressType) -> anyhow::Result<CString> {
|
||||
read_generic_string(pid, address, |x| CString::new(x).unwrap())
|
||||
}
|
||||
|
||||
pub fn read_pathbuf(pid: Pid, address: AddressType) -> anyhow::Result<PathBuf> {
|
||||
read_generic_string(pid, address, |x| PathBuf::from(OsString::from_vec(x)))
|
||||
}
|
||||
|
||||
pub fn read_string(pid: Pid, address: AddressType) -> anyhow::Result<String> {
|
||||
// Waiting on https://github.com/rust-lang/libs-team/issues/116
|
||||
read_generic_string(pid, address, |x| String::from_utf8_lossy(&x).to_string())
|
||||
}
|
||||
|
||||
pub fn read_null_ended_array<TItem>(
|
||||
pid: Pid,
|
||||
mut address: AddressType,
|
||||
reader: impl Fn(Pid, AddressType) -> anyhow::Result<TItem>,
|
||||
) -> anyhow::Result<Vec<TItem>> {
|
||||
let mut res = Vec::new();
|
||||
loop {
|
||||
let ptr = match ptrace::read(pid.into(), address) {
|
||||
Err(e) => {
|
||||
log::warn!("Cannot read tracee {pid} memory {address:?}: {e}");
|
||||
return Ok(res);
|
||||
}
|
||||
Ok(ptr) => ptr,
|
||||
};
|
||||
if ptr == 0 {
|
||||
return Ok(res);
|
||||
} else {
|
||||
res.push(reader(pid, ptr as AddressType)?);
|
||||
}
|
||||
address = unsafe { address.add(WORD_SIZE) };
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn read_cstring_array(pid: Pid, address: AddressType) -> anyhow::Result<Vec<CString>> {
|
||||
read_null_ended_array(pid, address, read_cstring)
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn read_string_array(pid: Pid, address: AddressType) -> anyhow::Result<Vec<String>> {
|
||||
read_null_ended_array(pid, address, read_string)
|
||||
}
|
||||
|
||||
macro_rules! syscall_no_from_regs {
|
||||
($regs:ident) => {
|
||||
$regs.orig_rax as i64
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! syscall_res_from_regs {
|
||||
($regs:ident) => {
|
||||
$regs.rax as i64
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! stack_ptr_from_regs {
|
||||
($regs:ident) => {
|
||||
$regs.rsp as i64
|
||||
};
|
||||
}
|
||||
|
||||
fn syscall_arg(regs: &user_regs_struct, idx: usize) -> u64 {
|
||||
match idx {
|
||||
0 => regs.rdi,
|
||||
1 => regs.rsi,
|
||||
2 => regs.rdx,
|
||||
3 => regs.r10,
|
||||
4 => regs.r8,
|
||||
5 => regs.r9,
|
||||
_ => panic!("Bad syscall argument index"),
|
||||
}
|
||||
}
|
||||
|
||||
fn set_syscall_arg(regs: &mut user_regs_struct, idx: usize, value: u64) {
|
||||
match idx {
|
||||
0 => regs.rdi = value,
|
||||
1 => regs.rsi = value,
|
||||
2 => regs.rdx = value,
|
||||
3 => regs.r10 = value,
|
||||
4 => regs.r8 = value,
|
||||
5 => regs.r9 = value,
|
||||
_ => panic!("Bad syscall argument index"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_argv(pid: Pid) -> anyhow::Result<Vec<CString>> {
|
||||
let filename = format!("/proc/{pid}/cmdline");
|
||||
let buf = std::fs::read(filename)?;
|
||||
Ok(buf
|
||||
.split(|&c| c == 0)
|
||||
.map(CString::new)
|
||||
.collect::<Result<Vec<_>, _>>()?)
|
||||
}
|
||||
|
||||
pub fn read_comm(pid: Pid) -> anyhow::Result<String> {
|
||||
let filename = format!("/proc/{pid}/comm");
|
||||
let mut buf = std::fs::read(filename)?;
|
||||
buf.pop(); // remove trailing newline
|
||||
Ok(String::from_utf8(buf)?)
|
||||
}
|
||||
|
||||
pub fn read_cwd(pid: Pid) -> std::io::Result<PathBuf> {
|
||||
let filename = format!("/proc/{pid}/cwd");
|
||||
let buf = std::fs::read_link(filename)?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
pub fn read_fd(pid: Pid, fd: i32) -> std::io::Result<PathBuf> {
|
||||
if fd == AT_FDCWD {
|
||||
return read_cwd(pid);
|
||||
}
|
||||
let filename = format!("/proc/{pid}/fd/{fd}");
|
||||
std::fs::read_link(filename)
|
||||
}
|
||||
|
||||
/*
|
||||
#[derive(Debug)]
|
||||
pub enum Interpreter {
|
||||
None,
|
||||
Shebang(String),
|
||||
ExecutableUnaccessible,
|
||||
Error(io::Error),
|
||||
}
|
||||
|
||||
impl Display for Interpreter {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Interpreter::None => write!(f, "none"),
|
||||
Interpreter::Shebang(s) => write!(f, "{:?}", s),
|
||||
Interpreter::ExecutableUnaccessible => {
|
||||
write!(f, "executable unaccessible")
|
||||
}
|
||||
Interpreter::Error(e) => write!(f, "(err: {e})"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_interpreter_recursive(exe: impl AsRef<Path>) -> Vec<Interpreter> {
|
||||
let mut exe = Cow::Borrowed(exe.as_ref());
|
||||
let mut interpreters = Vec::new();
|
||||
loop {
|
||||
match read_interpreter(exe.as_ref()) {
|
||||
Interpreter::Shebang(shebang) => {
|
||||
exe = Cow::Owned(PathBuf::from(
|
||||
shebang.split_ascii_whitespace().next().unwrap_or(""),
|
||||
));
|
||||
interpreters.push(Interpreter::Shebang(shebang));
|
||||
}
|
||||
Interpreter::None => break,
|
||||
err => {
|
||||
interpreters.push(err);
|
||||
break;
|
||||
}
|
||||
};
|
||||
}
|
||||
interpreters
|
||||
}
|
||||
|
||||
pub fn read_interpreter(exe: &Path) -> Interpreter {
|
||||
fn err_to_interpreter(e: io::Error) -> Interpreter {
|
||||
if e.kind() == io::ErrorKind::PermissionDenied || e.kind() == io::ErrorKind::NotFound {
|
||||
Interpreter::ExecutableUnaccessible
|
||||
} else {
|
||||
Interpreter::Error(e)
|
||||
}
|
||||
}
|
||||
let file = match std::fs::File::open(exe) {
|
||||
Ok(file) => file,
|
||||
Err(e) => return err_to_interpreter(e),
|
||||
};
|
||||
let mut reader = BufReader::new(file);
|
||||
// First, check if it's a shebang script
|
||||
let mut buf = [0u8; 2];
|
||||
|
||||
if let Err(e) = reader.read_exact(&mut buf) {
|
||||
return Interpreter::Error(e);
|
||||
};
|
||||
if &buf != b"#!" {
|
||||
return Interpreter::None;
|
||||
}
|
||||
// Read the rest of the line
|
||||
let mut buf = Vec::new();
|
||||
|
||||
if let Err(e) = reader.read_until(b'\n', &mut buf) {
|
||||
return Interpreter::Error(e);
|
||||
};
|
||||
// Get trimed shebang line [start, end) indices
|
||||
// If the shebang line is empty, we don't care
|
||||
let start = buf
|
||||
.iter()
|
||||
.position(|&c| !c.is_ascii_whitespace())
|
||||
.unwrap_or(0);
|
||||
let end = buf
|
||||
.iter()
|
||||
.rposition(|&c| !c.is_ascii_whitespace())
|
||||
.map(|x| x + 1)
|
||||
.unwrap_or(buf.len());
|
||||
let shebang = String::from_utf8_lossy(&buf[start..end]);
|
||||
Interpreter::Shebang(shebang.into_owned())
|
||||
}
|
||||
*/
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ProcessStateStore {
|
||||
processes: HashMap<Pid, Vec<ProcessState>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ProcessState {
|
||||
pub pid: Pid,
|
||||
pub ppid: Option<Pid>,
|
||||
pub status: ProcessStatus,
|
||||
pub start_time: u64,
|
||||
pub argv: Vec<CString>,
|
||||
pub comm: String,
|
||||
pub presyscall: bool,
|
||||
pub is_exec_successful: bool,
|
||||
pub syscall: i64,
|
||||
pub pending_syscall_event: Vec<Event>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum ProcessStatus {
|
||||
SigstopReceived,
|
||||
PtraceForkEventReceived,
|
||||
Running,
|
||||
Exited(i32),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ExecData {
|
||||
pub filename: PathBuf,
|
||||
pub argv: Vec<String>,
|
||||
pub envp: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
//pub interpreters: Vec<Interpreter>,
|
||||
}
|
||||
|
||||
impl ProcessStateStore {
|
||||
pub fn insert(&mut self, state: ProcessState) {
|
||||
self.processes.entry(state.pid).or_default().push(state);
|
||||
}
|
||||
|
||||
pub fn get_current_mut(&mut self, pid: Pid) -> Option<&mut ProcessState> {
|
||||
// The last process in the vector is the current process
|
||||
// println!("Getting {pid}");
|
||||
self.processes.get_mut(&pid)?.last_mut()
|
||||
}
|
||||
}
|
||||
|
||||
impl ProcessState {
|
||||
pub fn new(pid: Pid, start_time: u64) -> anyhow::Result<Self> {
|
||||
Ok(Self {
|
||||
pid,
|
||||
ppid: None,
|
||||
status: ProcessStatus::Running,
|
||||
comm: read_comm(pid)?,
|
||||
argv: read_argv(pid)?,
|
||||
start_time,
|
||||
presyscall: true,
|
||||
is_exec_successful: false,
|
||||
syscall: -1,
|
||||
pending_syscall_event: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
pub fn is_docker(&self) -> bool {
|
||||
self.argv.get(0).is_some_and(|c| c.to_str() == Ok("docker"))
|
||||
}
|
||||
|
||||
pub fn update(&mut self) -> anyhow::Result<()> {
|
||||
self.comm = read_comm(self.pid)?;
|
||||
self.argv = read_argv(self.pid)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn ptrace_syscall(pid: Pid, sig: Option<Signal>) -> Result<(), Errno> {
|
||||
match ptrace::syscall(pid.into(), sig) {
|
||||
Err(Errno::ESRCH) => {
|
||||
log::info!("ptrace syscall failed: {pid}, ESRCH, child probably gone!");
|
||||
Ok(())
|
||||
}
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TracerClient {
|
||||
connect: String,
|
||||
sock: TcpStream,
|
||||
store: ProcessStateStore,
|
||||
start_time: Instant,
|
||||
pending_events: Vec<LogEntry>,
|
||||
pending_files: BTreeSet<(PathBuf, Sha256Hash)>,
|
||||
machine: i32,
|
||||
}
|
||||
|
||||
impl TracerClient {
|
||||
pub fn log(&mut self, ident: Identifier, event: Event) {
|
||||
self.pending_events.push(LogEntry {
|
||||
ident,
|
||||
event,
|
||||
timestamp: Instant::now().duration_since(self.start_time),
|
||||
});
|
||||
}
|
||||
|
||||
pub fn log_root(&mut self, pid: Pid, event: Event) {
|
||||
self.log(Identifier { pid, machine: self.machine }, event);
|
||||
}
|
||||
|
||||
fn ingest_file(&mut self, pid: Pid, path: PathBuf) -> anyhow::Result<()> {
|
||||
if self.store.get_current_mut(pid).unwrap().is_docker() {
|
||||
return Ok(());
|
||||
}
|
||||
let stat = std::fs::metadata(&path)?;
|
||||
if !stat.is_file() {
|
||||
return Ok(());
|
||||
}
|
||||
let mut fp = std::fs::File::open(&path)?;
|
||||
let mut h = Sha256::new();
|
||||
log::debug!("Hashing {} (client)", path.to_string_lossy());
|
||||
std::io::copy(&mut fp, &mut h)?;
|
||||
let hash = h.finalize().into();
|
||||
self.pending_files.insert((path, hash));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn commune_server(&mut self, msg: TracerClientMessage) -> anyhow::Result<TracerServerRequest> {
|
||||
serde_json::to_writer(&self.sock, &msg)?;
|
||||
self.sock.write_all("\n".as_bytes())?;
|
||||
Ok(serde_json::StreamDeserializer::new(&mut IoRead::new(&self.sock)).next().unwrap()?)
|
||||
}
|
||||
|
||||
fn allocate_machine(&mut self) -> anyhow::Result<i32> {
|
||||
let msg = self.commune_server(TracerClientMessage::AllocateId {})?;
|
||||
let TracerServerRequest::AllocatedId { id } = msg else { panic!("Server did not respone to AllocateId with AllocatedId") };
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
fn drain_syscall_events(&mut self, pid: Pid, mut filter: Box<dyn FnMut(&mut Event)>) {
|
||||
let p = self.store.get_current_mut(pid).unwrap();
|
||||
if p.is_docker() {
|
||||
return;
|
||||
}
|
||||
for mut event in p.pending_syscall_event.drain(..) {
|
||||
(filter)(&mut event);
|
||||
self.pending_events.push(LogEntry {
|
||||
ident: Identifier { pid, machine: self.machine },
|
||||
event,
|
||||
timestamp: Instant::now().duration_since(self.start_time),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run(machine: i32, connect: String, args: Vec<String>) -> anyhow::Result<()> {
|
||||
let sock = TcpStream::connect(&connect).expect(format!("Could not connect to {connect}").as_str());
|
||||
if let ForkResult::Parent { child } = unsafe { nix::unistd::fork()? } {
|
||||
let mut this = Self {
|
||||
connect,
|
||||
sock,
|
||||
store: ProcessStateStore::default(),
|
||||
start_time: Instant::now(),
|
||||
pending_events: vec![],
|
||||
pending_files: BTreeSet::new(),
|
||||
machine,
|
||||
};
|
||||
this.run_internal(child.into())
|
||||
} else {
|
||||
let me = getpid();
|
||||
setpgid(me, me)?;
|
||||
traceme()?;
|
||||
if 0 != unsafe { raise(SIGSTOP) } {
|
||||
log::error!("raise failed!");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
let args = args
|
||||
.into_iter()
|
||||
.map(CString::new)
|
||||
.collect::<Result<Vec<CString>, _>>()?;
|
||||
|
||||
execvp(&args[0], &args).expect(format!("Failed to execute {args:?}").as_str());
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
|
||||
fn run_internal(&mut self, root_child: Pid) -> anyhow::Result<()> {
|
||||
waitpid(nix::unistd::Pid::from(root_child.into()), Some(WaitPidFlag::WSTOPPED))?; // wait for child to stop
|
||||
log::trace!("child stopped");
|
||||
let mut root_child_state = ProcessState::new(root_child, 0)?;
|
||||
root_child_state.ppid = Some(getpid().into());
|
||||
self.store.insert(root_child_state);
|
||||
// Set foreground process group of the terminal
|
||||
if -1 == unsafe { tcsetpgrp(STDIN_FILENO, root_child.0) } {
|
||||
return Err(Errno::last().into());
|
||||
}
|
||||
// restart child
|
||||
log::trace!("resuming child");
|
||||
let ptrace_opts = {
|
||||
use nix::sys::ptrace::Options;
|
||||
Options::PTRACE_O_TRACEEXEC
|
||||
| Options::PTRACE_O_TRACEEXIT
|
||||
| Options::PTRACE_O_EXITKILL
|
||||
| Options::PTRACE_O_TRACESYSGOOD
|
||||
| Options::PTRACE_O_TRACEFORK
|
||||
| Options::PTRACE_O_TRACECLONE
|
||||
| Options::PTRACE_O_TRACEVFORK
|
||||
};
|
||||
ptrace::setoptions(root_child.into(), ptrace_opts)?;
|
||||
// restart child
|
||||
ptrace::syscall(nix::unistd::Pid::from(root_child.into()), None)?;
|
||||
let mut continuing = true;
|
||||
while continuing {
|
||||
let status = {
|
||||
let status = waitpid(None, Some(WaitPidFlag::__WALL));
|
||||
if status.is_err_and(|e| e == nix::errno::Errno::ECHILD) {
|
||||
continuing = false;
|
||||
Ok(WaitStatus::StillAlive)
|
||||
} else {
|
||||
status
|
||||
}
|
||||
}?;
|
||||
// log::trace!("waitpid: {:?}", status);
|
||||
let signal = match status {
|
||||
WaitStatus::Stopped(pid, sig) => {
|
||||
let pid = pid.into();
|
||||
log::trace!("stopped: {pid}, sig {:?}", sig);
|
||||
match sig {
|
||||
Signal::SIGSTOP => {
|
||||
log::trace!("sigstop event, child: {pid}");
|
||||
if let Some(state) = self.store.get_current_mut(pid) {
|
||||
if state.status == ProcessStatus::PtraceForkEventReceived {
|
||||
log::trace!("sigstop event received after ptrace fork event, pid: {pid}");
|
||||
state.status = ProcessStatus::Running;
|
||||
} else if pid != root_child {
|
||||
log::error!("Unexpected SIGSTOP: {state:?}")
|
||||
}
|
||||
} else {
|
||||
log::trace!("sigstop event received before ptrace fork event, pid: {pid}");
|
||||
let mut state = ProcessState::new(pid, 0)?;
|
||||
state.status = ProcessStatus::SigstopReceived;
|
||||
self.store.insert(state);
|
||||
}
|
||||
None
|
||||
}
|
||||
Signal::SIGCHLD => {
|
||||
// From lurk:
|
||||
//
|
||||
// The SIGCHLD signal is sent to a process when a child process terminates, interrupted, or resumes after being interrupted
|
||||
// This means, that if our tracee forked and said fork exits before the parent, the parent will get stopped.
|
||||
// Therefor issue a PTRACE_SYSCALL request to the parent to continue execution.
|
||||
// This is also important if we trace without the following forks option.
|
||||
Some(Signal::SIGCHLD)
|
||||
}
|
||||
_ => {
|
||||
// Just deliver the signal to tracee
|
||||
Some(sig)
|
||||
}
|
||||
}
|
||||
}
|
||||
WaitStatus::Exited(pid, code) => {
|
||||
let pid = pid.into();
|
||||
log::trace!("exited: pid {}, code {:?}", pid, code);
|
||||
self.log_root(pid, Event::Exit { code });
|
||||
self.store.get_current_mut(pid).unwrap().status =
|
||||
ProcessStatus::Exited(code);
|
||||
None
|
||||
}
|
||||
WaitStatus::PtraceEvent(pid, sig, evt) => {
|
||||
log::trace!("ptrace event: {:?} {:?}", sig, evt);
|
||||
match evt {
|
||||
nix::libc::PTRACE_EVENT_FORK
|
||||
| nix::libc::PTRACE_EVENT_VFORK
|
||||
| nix::libc::PTRACE_EVENT_CLONE => {
|
||||
let new_child = Pid(ptrace::getevent(pid.into())? as pid_t);
|
||||
log::trace!(
|
||||
"ptrace fork event, evt {evt}, pid: {pid}, child: {new_child}"
|
||||
);
|
||||
self.log_root(pid.into(), Event::Fork { child: new_child });
|
||||
if let Some(state) = self.store.get_current_mut(new_child) {
|
||||
if state.status == ProcessStatus::SigstopReceived {
|
||||
log::trace!("ptrace fork event received after sigstop, pid: {pid}, child: {new_child}");
|
||||
state.status = ProcessStatus::Running;
|
||||
state.ppid = Some(pid.into());
|
||||
} else if new_child != root_child {
|
||||
log::error!("Unexpected fork event: {state:?}")
|
||||
}
|
||||
} else {
|
||||
log::trace!("ptrace fork event received before sigstop, pid: {pid}, child: {new_child}");
|
||||
let mut state = ProcessState::new(new_child, 0)?;
|
||||
state.status = ProcessStatus::PtraceForkEventReceived;
|
||||
state.ppid = Some(pid.into());
|
||||
self.store.insert(state);
|
||||
}
|
||||
// Resume parent
|
||||
None
|
||||
}
|
||||
nix::libc::PTRACE_EVENT_EXEC => {
|
||||
log::trace!("exec event");
|
||||
let p = self.store.get_current_mut(pid.into()).unwrap();
|
||||
assert!(!p.presyscall);
|
||||
// After execve or execveat, in syscall exit event,
|
||||
// the registers might be clobbered(e.g. aarch64).
|
||||
// So we need to determine whether exec is successful here.
|
||||
// PTRACE_EVENT_EXEC only happens for successful exec.
|
||||
p.is_exec_successful = true;
|
||||
let path = p
|
||||
.pending_syscall_event
|
||||
.iter()
|
||||
.find_map(|e| match e {
|
||||
Event::Exec { prog, .. } => Some(prog.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.unwrap();
|
||||
self.ingest_file(pid.into(), path)?;
|
||||
self.drain_syscall_events(pid.into(), Box::new(|_| {}));
|
||||
// Don't use seccomp_aware_cont here because that will skip the next syscall exit stop
|
||||
None
|
||||
}
|
||||
nix::libc::PTRACE_EVENT_EXIT => {
|
||||
log::trace!("exit event");
|
||||
None
|
||||
}
|
||||
nix::libc::PTRACE_EVENT_SECCOMP => {
|
||||
log::trace!("seccomp event");
|
||||
self.on_syscall_enter(pid.into())?;
|
||||
None
|
||||
}
|
||||
_ => {
|
||||
log::trace!("other event");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
WaitStatus::Signaled(pid, sig, _) => {
|
||||
let pid: Pid = pid.into();
|
||||
log::debug!("signaled: {pid}, {:?}", sig);
|
||||
if pid == root_child {
|
||||
exit(128 + (sig as i32))
|
||||
}
|
||||
None
|
||||
}
|
||||
WaitStatus::PtraceSyscall(pid) => {
|
||||
let pid = pid.into();
|
||||
let presyscall = self.store.get_current_mut(pid).unwrap().presyscall;
|
||||
if presyscall {
|
||||
self.on_syscall_enter(pid)?;
|
||||
} else {
|
||||
self.on_syscall_exit(pid)?;
|
||||
}
|
||||
None
|
||||
}
|
||||
_ => None
|
||||
};
|
||||
|
||||
if !self.pending_files.is_empty() || !continuing {
|
||||
let mut events = vec![];
|
||||
let mut files = BTreeSet::new();
|
||||
std::mem::swap(&mut events, &mut self.pending_events);
|
||||
std::mem::swap(&mut files, &mut self.pending_files);
|
||||
let mut msg = TracerClientMessage::Events { events, files };
|
||||
|
||||
loop {
|
||||
let event = self.commune_server(msg)?;
|
||||
|
||||
match event {
|
||||
TracerServerRequest::Continue => break,
|
||||
TracerServerRequest::AnalyzeFiles { paths } => {
|
||||
let mut formats = HashMap::new();
|
||||
let mut files = BTreeSet::new();
|
||||
for path in paths {
|
||||
let mut fp = std::fs::File::open(&path)?;
|
||||
log::debug!("Parsing format of {} (client)", path.to_string_lossy());
|
||||
let (format, mut references) = parse_format(&mut fp)?;
|
||||
formats.insert(path, format);
|
||||
files.append(&mut references);
|
||||
}
|
||||
msg = TracerClientMessage::FileFormats { formats, files }
|
||||
},
|
||||
TracerServerRequest::AllocatedId { id } => {
|
||||
panic!("Receieved unsolicited AllocatedId({id})");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// https://stackoverflow.com/questions/29997244/occasionally-missing-ptrace-event-vfork-when-running-ptrace
|
||||
// DO NOT send PTRACE_SYSCALL until we receive the PTRACE_EVENT_FORK, etc.
|
||||
if let Some(pid) = status.pid() {
|
||||
let pid = pid.into();
|
||||
let p = self.store.get_current_mut(pid).expect("No such process??");
|
||||
if !matches!(p.status, ProcessStatus::SigstopReceived | ProcessStatus::Exited(_)) {
|
||||
ptrace_syscall(pid, signal)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn on_syscall_enter(&mut self, pid: Pid) -> anyhow::Result<()> {
|
||||
let p = self.store.get_current_mut(pid).unwrap();
|
||||
p.presyscall = !p.presyscall;
|
||||
// SYSCALL ENTRY
|
||||
let regs = match ptrace::getregs(pid.into()) {
|
||||
Ok(regs) => regs,
|
||||
Err(Errno::ESRCH) => {
|
||||
log::info!("ptrace getregs failed: {pid}, ESRCH, child probably gone!");
|
||||
return Ok(());
|
||||
}
|
||||
e => e?,
|
||||
};
|
||||
let syscallno = syscall_no_from_regs!(regs);
|
||||
log::trace!("Got syscall {} from {}", syscallno, pid);
|
||||
p.syscall = syscallno;
|
||||
match syscallno {
|
||||
nix::libc::SYS_execveat => {
|
||||
// int execveat(int dirfd, const char *pathname,
|
||||
// char *const _Nullable argv[],
|
||||
// char *const _Nullable envp[],
|
||||
// int flags);
|
||||
let dirfd = syscall_arg(®s, 0) as i32;
|
||||
let pathname = read_string(pid, syscall_arg(®s, 1) as AddressType)?;
|
||||
//let argv = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?;
|
||||
//let envp = read_string_array(pid, syscall_arg!(regs, 3) as AddressType)?;
|
||||
let flags = syscall_arg(®s, 4) as i32;
|
||||
let filename = resolve_filename_at_fd(pid, pathname, dirfd, flags)?;
|
||||
//let interpreters = read_interpreter_recursive(&filename);
|
||||
p.pending_syscall_event.push(Event::Exec { prog: filename.clone() });
|
||||
self.instrument_exec(pid, filename.to_str().unwrap(), ®s, 1)?;
|
||||
}
|
||||
nix::libc::SYS_execve => {
|
||||
let filename = read_pathbuf(pid, syscall_arg(®s, 0) as AddressType)?;
|
||||
//let argv = read_string_array(pid, syscall_arg!(regs, 1) as AddressType)?;
|
||||
//let envp = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?;
|
||||
//let interpreters = read_interpreter_recursive(&filename);
|
||||
p.pending_syscall_event.push(Event::Exec { prog: filename.clone() });
|
||||
self.instrument_exec(pid, filename.to_str().unwrap(), ®s, 0)?;
|
||||
}
|
||||
nix::libc::SYS_open => {
|
||||
let path = read_pathbuf(pid, syscall_arg(®s, 0) as AddressType)?;
|
||||
p.pending_syscall_event.push(Event::FdOpen {
|
||||
source: FdSource::File { path },
|
||||
fd: -1,
|
||||
});
|
||||
}
|
||||
nix::libc::SYS_openat => {
|
||||
let dirfd = syscall_arg(®s, 0) as i32;
|
||||
let pathname = read_string(pid, syscall_arg(®s, 1) as AddressType)?;
|
||||
let flags = syscall_arg(®s, 2) as i32;
|
||||
let path = resolve_filename_at_fd(pid, pathname, dirfd, flags)?;
|
||||
p.pending_syscall_event.push(Event::FdOpen {
|
||||
source: FdSource::File { path },
|
||||
fd: 0,
|
||||
});
|
||||
}
|
||||
nix::libc::SYS_read
|
||||
| nix::libc::SYS_readv
|
||||
| nix::libc::SYS_preadv
|
||||
| nix::libc::SYS_preadv2 => {
|
||||
let fd = syscall_arg(®s, 0) as i32;
|
||||
p.pending_syscall_event.push(Event::FdRead { fd });
|
||||
}
|
||||
nix::libc::SYS_write
|
||||
| nix::libc::SYS_writev
|
||||
| nix::libc::SYS_pwritev
|
||||
| nix::libc::SYS_pwritev2 => {
|
||||
let fd = syscall_arg(®s, 0) as i32;
|
||||
p.pending_syscall_event.push(Event::FdWrite { fd });
|
||||
}
|
||||
nix::libc::SYS_dup | nix::libc::SYS_dup2 | nix::libc::SYS_dup3 => {
|
||||
let oldfd = syscall_arg(®s, 0) as i32;
|
||||
p.pending_syscall_event
|
||||
.push(Event::FdDup { oldfd, newfd: -1 });
|
||||
}
|
||||
nix::libc::SYS_fcntl => {
|
||||
let fd = syscall_arg(®s, 0) as i32;
|
||||
let cmd = syscall_arg(®s, 1) as i32;
|
||||
match cmd {
|
||||
nix::libc::F_DUPFD => p.pending_syscall_event.push(Event::FdDup {
|
||||
oldfd: fd,
|
||||
newfd: -1,
|
||||
}),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
nix::libc::SYS_close => {
|
||||
let fd = syscall_arg(®s, 0) as i32;
|
||||
p.pending_syscall_event.push(Event::FdClose { fd });
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
//self.syscall_enter_cont(pid)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn on_syscall_exit(&mut self, pid: Pid) -> anyhow::Result<()> {
|
||||
// SYSCALL EXIT
|
||||
// log::trace!("post syscall {}", p.syscall);
|
||||
let p = self.store.get_current_mut(pid).unwrap();
|
||||
p.presyscall = !p.presyscall;
|
||||
let regs = match ptrace::getregs(pid.into()) {
|
||||
Ok(regs) => regs,
|
||||
Err(Errno::ESRCH) => {
|
||||
log::info!("ptrace getregs failed: {pid}, ESRCH, child probably gone!");
|
||||
return Ok(());
|
||||
}
|
||||
e => e?,
|
||||
};
|
||||
let result = syscall_res_from_regs!(regs);
|
||||
let mut pending_files = vec![];
|
||||
|
||||
let filter: Option<Box<dyn FnMut(&mut Event)>> = match p.syscall {
|
||||
nix::libc::SYS_execve => {
|
||||
// SAFETY: p.preexecve is false, so p.exec_data is Some
|
||||
p.is_exec_successful = false;
|
||||
p.update()?;
|
||||
None
|
||||
}
|
||||
nix::libc::SYS_execveat => {
|
||||
p.is_exec_successful = false;
|
||||
p.update()?;
|
||||
None
|
||||
}
|
||||
nix::libc::SYS_open | nix::libc::SYS_openat => {
|
||||
if result >= 0 {
|
||||
for pending in p.pending_syscall_event.iter_mut() {
|
||||
if let Event::FdOpen {
|
||||
source: FdSource::File { path },
|
||||
..
|
||||
} = pending
|
||||
{
|
||||
pending_files.push(path.clone());
|
||||
}
|
||||
}
|
||||
Some(Box::new(move |event| match event {
|
||||
Event::FdOpen {
|
||||
fd: ref mut dest, ..
|
||||
} => {
|
||||
*dest = result as i32;
|
||||
}
|
||||
_ => {}
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
nix::libc::SYS_dup | nix::libc::SYS_dup2 | nix::libc::SYS_dup3 => {
|
||||
if result >= 0 {
|
||||
Some(Box::new(move |event| match event {
|
||||
Event::FdDup {
|
||||
newfd: ref mut dest,
|
||||
..
|
||||
} => {
|
||||
*dest = result as i32;
|
||||
}
|
||||
_ => {}
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
nix::libc::SYS_fcntl => {
|
||||
if result >= 0 {
|
||||
Some(Box::new(move |event| match event {
|
||||
Event::FdDup {
|
||||
newfd: ref mut dest,
|
||||
..
|
||||
} => {
|
||||
*dest = result as i32;
|
||||
}
|
||||
_ => {}
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if result >= 0 {
|
||||
Some(Box::new(|_| {}))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
if let Some(filter) = filter {
|
||||
self.drain_syscall_events(pid, filter);
|
||||
} else {
|
||||
p.pending_syscall_event.clear();
|
||||
}
|
||||
for path in pending_files {
|
||||
self.ingest_file(pid, path)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn instrument_exec(&mut self, pid: Pid, filename: &str, regs: &user_regs_struct, prog_idx: usize) -> anyhow::Result<()> {
|
||||
if let Some(new_args) = if filename.ends_with("/docker") && std::fs::metadata(&filename).is_ok() {
|
||||
let mut args = read_cstring_array(pid, syscall_arg(®s, prog_idx + 1) as AddressType)?;
|
||||
if args.get(1).is_some_and(|c| c.to_str() == Ok("run")) {
|
||||
let new_machine = self.allocate_machine()?;
|
||||
let new_args = instrument_docker_run_execve(&mut args, new_machine, self.connect.as_str())?;
|
||||
if new_args != args {
|
||||
log::debug!("Launching docker child: {}", new_args.iter().map(|x| x.to_str().unwrap()).collect::<Vec<_>>().join(" "));
|
||||
}
|
||||
Some(new_args)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
} {
|
||||
let mut regs2 = regs.clone();
|
||||
let mut stacktop = stack_ptr_from_regs!(regs);
|
||||
stacktop -= 128;
|
||||
let mut argv_pointers = new_args.iter().map(|argstr| -> anyhow::Result<i64> {
|
||||
let bytes = argstr.as_bytes_with_nul();
|
||||
stacktop -= bytes.len() as i64;
|
||||
while stacktop % WORD_SIZE as i64 != 0 {
|
||||
stacktop -= 1;
|
||||
}
|
||||
write_bytes(pid, stacktop as AddressType, bytes)?;
|
||||
Ok(stacktop)
|
||||
}).collect::<anyhow::Result<Vec<i64>>>()?;
|
||||
assert_eq!(stacktop % WORD_SIZE as i64, 0);
|
||||
argv_pointers.push(0);
|
||||
for ptr in argv_pointers.iter().copied().rev() {
|
||||
stacktop -= WORD_SIZE as i64;
|
||||
ptrace::write(pid.into(), stacktop as AddressType, ptr)?;
|
||||
}
|
||||
set_syscall_arg(&mut regs2, prog_idx + 1, stacktop as u64);
|
||||
ptrace::setregs(pid.into(), regs2)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_filename_at_fd(
|
||||
pid: Pid,
|
||||
pathname: String,
|
||||
dirfd: i32,
|
||||
flags: i32,
|
||||
) -> anyhow::Result<PathBuf> {
|
||||
let pathname_is_empty = pathname.is_empty();
|
||||
let pathname = PathBuf::from(pathname);
|
||||
Ok(
|
||||
match (
|
||||
pathname.is_absolute(),
|
||||
pathname_is_empty && ((flags & AT_EMPTY_PATH) != 0),
|
||||
) {
|
||||
(true, _) => {
|
||||
// If pathname is absolute, then dirfd is ignored.
|
||||
pathname
|
||||
}
|
||||
(false, true) => {
|
||||
// If pathname is an empty string and the AT_EMPTY_PATH flag is specified, then the file descriptor dirfd
|
||||
// specifies the file to be executed
|
||||
read_fd(pid, dirfd)?
|
||||
}
|
||||
(false, false) => {
|
||||
// pathname is relative to dirfd
|
||||
let dir = read_fd(pid, dirfd)?;
|
||||
dir.join(pathname)
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
|
@ -0,0 +1,178 @@
|
|||
use std::{
|
||||
collections::HashSet,
|
||||
env::current_exe,
|
||||
ffi::CString,
|
||||
process::Command,
|
||||
};
|
||||
|
||||
pub fn instrument_docker_run_execve(
|
||||
args: &Vec<CString>,
|
||||
machine: i32,
|
||||
connect: &str,
|
||||
) -> anyhow::Result<Vec<CString>> {
|
||||
enum Argument<'a> {
|
||||
Zero(&'a str),
|
||||
One(&'a str, &'a str),
|
||||
}
|
||||
#[derive(Default)]
|
||||
struct ArgsParsed<'a> {
|
||||
preamble: Vec<&'a str>,
|
||||
args: Vec<Argument<'a>>,
|
||||
image: Option<&'a str>,
|
||||
cmd: Vec<&'a str>,
|
||||
}
|
||||
impl<'a> ArgsParsed<'a> {
|
||||
fn take_entrypoint(&mut self) -> Option<&'a str> {
|
||||
if let Some((idx, _)) = self
|
||||
.args
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_, val)| matches!(val, Argument::One("--entrypoint", _)))
|
||||
{
|
||||
let Argument::One(_, arg) = self.args.remove(idx) else {
|
||||
unreachable!()
|
||||
};
|
||||
Some(arg)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn take_cmd(&mut self) -> Option<Vec<&'a str>> {
|
||||
if self.cmd.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let target = &mut self.cmd;
|
||||
let mut result = vec![];
|
||||
std::mem::swap(target, &mut result);
|
||||
Some(result)
|
||||
}
|
||||
}
|
||||
|
||||
fn reserialize(self) -> Vec<CString> {
|
||||
let mut result = self.preamble.into_iter().map(|x| CString::new(x).unwrap()).collect::<Vec<_>>();
|
||||
for arg in self.args {
|
||||
match arg {
|
||||
Argument::Zero(a) => result.push(CString::new(a).unwrap()),
|
||||
Argument::One(a, b) => {
|
||||
result.push(CString::new(a).unwrap());
|
||||
result.push(CString::new(b).unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(image) = self.image {
|
||||
result.push(CString::new(image).unwrap());
|
||||
for cmd in self.cmd {
|
||||
result.push(CString::new(cmd).unwrap());
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
let unary_args = HashSet::from([
|
||||
"-d",
|
||||
"--detach",
|
||||
"--disable-content-trust",
|
||||
"--help",
|
||||
"--init",
|
||||
"-i",
|
||||
"--interactive",
|
||||
"--no-healthcheck",
|
||||
"--oom-kill-disable",
|
||||
"--privileged",
|
||||
"-P",
|
||||
"--publish-all",
|
||||
"-q",
|
||||
"--quiet",
|
||||
"--read-only",
|
||||
"--rm",
|
||||
"--sig-proxy",
|
||||
"-t",
|
||||
"--tty",
|
||||
]);
|
||||
let mut string_args = ArgsParsed::default();
|
||||
let mut args_iter = args.iter();
|
||||
string_args.preamble.push(args_iter.next().unwrap().to_str().unwrap());
|
||||
string_args.preamble.push(args_iter.next().unwrap().to_str().unwrap());
|
||||
while let Some(arg) = args_iter.next() {
|
||||
let arg = arg.to_str()?;
|
||||
|
||||
if arg.starts_with('-') {
|
||||
let no_parameter = unary_args.contains(arg);
|
||||
if !no_parameter {
|
||||
let Some(parameter) = args_iter.next() else {
|
||||
log::debug!("Docker: arg {} missing required argument", arg);
|
||||
return Ok(args.clone());
|
||||
};
|
||||
string_args.args.push(Argument::One(arg, parameter.to_str()?));
|
||||
} else {
|
||||
string_args.args.push(Argument::Zero(arg));
|
||||
}
|
||||
} else {
|
||||
string_args.image = Some(arg);
|
||||
while let Some(arg) = args_iter.next() {
|
||||
let arg = arg.to_str()?;
|
||||
string_args.cmd.push(arg);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(image) = string_args.image {
|
||||
let output = Command::new("docker").args(["inspect", image]).output()?;
|
||||
if !output.status.success() {
|
||||
log::debug!("Docker: image inspect for {} returned bad error code", image);
|
||||
return Ok(args.clone());
|
||||
}
|
||||
let value: serde_json::Value = serde_json::from_slice(&output.stdout)?;
|
||||
let config = value
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.get(0)
|
||||
.unwrap()
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.get("Config")
|
||||
.unwrap()
|
||||
.as_object()
|
||||
.unwrap();
|
||||
let mut entrypoint = string_args
|
||||
.take_entrypoint()
|
||||
.map(|s| vec![s])
|
||||
.or_else(|| {
|
||||
config.get("Entrypoint").unwrap().as_array().map(|a| {
|
||||
a.into_iter()
|
||||
.map(|s| s.as_str().unwrap())
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
})
|
||||
.unwrap_or_else(Vec::new);
|
||||
let cmd = string_args
|
||||
.take_cmd()
|
||||
.or_else(|| {
|
||||
config.get("Cmd").unwrap().as_array().map(|a| {
|
||||
a.into_iter()
|
||||
.map(|s| s.as_str().unwrap())
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
})
|
||||
.unwrap_or_else(Vec::new);
|
||||
entrypoint.extend(cmd);
|
||||
|
||||
entrypoint.insert(0, "/.ontology");
|
||||
entrypoint.insert(1, "internal-launch");
|
||||
let machine = machine.to_string();
|
||||
entrypoint.insert(2, &machine);
|
||||
entrypoint.insert(3, connect);
|
||||
|
||||
string_args
|
||||
.args
|
||||
.push(Argument::One("--entrypoint", entrypoint.remove(0)));
|
||||
let volume = format!("{}:/.ontology", current_exe().unwrap().to_str().unwrap());
|
||||
string_args.args.push(Argument::One("-v", &volume));
|
||||
string_args.cmd = entrypoint;
|
||||
Ok(string_args.reserialize())
|
||||
} else {
|
||||
Ok(string_args.reserialize())
|
||||
}
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
pub mod client;
|
||||
pub mod server;
|
||||
pub mod types;
|
||||
pub(self) mod docker;
|
|
@ -0,0 +1,154 @@
|
|||
use std::{collections::BTreeMap, net::{TcpListener, TcpStream}, os::fd::{AsFd, AsRawFd, BorrowedFd}, path::PathBuf, process::{Command, Stdio}, ffi::OsStr};
|
||||
|
||||
use serde_json::de::IoRead;
|
||||
|
||||
use crate::filestore::FileStore;
|
||||
|
||||
use super::types::*;
|
||||
|
||||
|
||||
pub struct Tracer {
|
||||
pub report: TracerReport,
|
||||
}
|
||||
|
||||
impl Tracer {
|
||||
pub fn run(input: Vec<PathBuf>, args: Vec<String>, mute: bool) -> anyhow::Result<TracerReport> {
|
||||
let mut files = FileStore::new(input)?;
|
||||
let mut log = vec![];
|
||||
|
||||
let connect = format!("{}:9995", default_net::get_default_interface().expect("Could not obtain default interface").ipv4.get(0).expect("Default interface has no ipv4 address").addr);
|
||||
log::debug!("Using {} for server", connect);
|
||||
let listener = TcpListener::bind(&connect).expect("Could not bind listener socket");
|
||||
|
||||
let executable = std::env::current_exe().expect("Could not obtain current executable");
|
||||
let mut proc = Command::new(executable);
|
||||
proc.args(["internal-launch".to_owned(), "--".to_owned(), "0".to_owned(), connect].iter().chain(args.iter()));
|
||||
if mute {
|
||||
proc.stdin(Stdio::null()).stdout(Stdio::null()).stderr(Stdio::null());
|
||||
}
|
||||
log::debug!("Launching tracer child {}", proc.get_args().collect::<Vec<_>>().join(OsStr::new(" ")).to_string_lossy());
|
||||
let mut child = proc.spawn().expect("Could not spawn child");
|
||||
let mut next_child_id = 1;
|
||||
|
||||
struct ChildData {
|
||||
tcp_stream: TcpStream,
|
||||
json_stream: serde_json::StreamDeserializer<'static, IoRead<TcpStream>, TracerClientMessage>,
|
||||
duped: i32,
|
||||
}
|
||||
|
||||
enum ParentOrChild {
|
||||
Parent(TcpListener),
|
||||
Child(ChildData),
|
||||
Dup(i32),
|
||||
}
|
||||
|
||||
impl AsFd for ParentOrChild {
|
||||
fn as_fd(&self) -> std::os::fd::BorrowedFd<'_> {
|
||||
match self {
|
||||
ParentOrChild::Parent(i) => i.as_fd(),
|
||||
ParentOrChild::Child(i) => i.tcp_stream.as_fd(),
|
||||
ParentOrChild::Dup(i) => unsafe { BorrowedFd::borrow_raw(*i) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut children = BTreeMap::new();
|
||||
|
||||
let (first_child, _first_addr) = listener.accept().expect("Accept failed");
|
||||
let duped = first_child.try_clone().expect("Dup failed");
|
||||
children.insert(duped.as_raw_fd(), ParentOrChild::Dup(first_child.as_raw_fd()));
|
||||
children.insert(first_child.as_raw_fd(), ParentOrChild::Child(ChildData {
|
||||
tcp_stream: duped,
|
||||
duped: first_child.as_raw_fd(),
|
||||
json_stream: serde_json::StreamDeserializer::new(IoRead::new(first_child)),
|
||||
}));
|
||||
|
||||
children.insert(listener.as_raw_fd(), ParentOrChild::Parent(listener));
|
||||
|
||||
loop {
|
||||
if children.len() <= 1 {
|
||||
break;
|
||||
}
|
||||
|
||||
let mut fdset = children.values().into();
|
||||
nix::sys::select::select(None, Some(&mut fdset), None, None, None).expect("Select failed");
|
||||
let chosen = fdset.fds(None).next().unwrap().as_raw_fd();
|
||||
let mut child = children.get_mut(&chosen).unwrap();
|
||||
if let ParentOrChild::Dup(i) = child {
|
||||
let i = *i;
|
||||
child = children.get_mut(&i).unwrap();
|
||||
}
|
||||
match child {
|
||||
ParentOrChild::Parent(p) => {
|
||||
let (new_tcp, new_addr) = p.accept().expect("Accept failed");
|
||||
log::info!("New child connected from {new_addr}");
|
||||
let duped = new_tcp.try_clone().expect("Dup failed");
|
||||
children.insert(duped.as_raw_fd(), ParentOrChild::Dup(new_tcp.as_raw_fd()));
|
||||
children.insert(new_tcp.as_raw_fd(), ParentOrChild::Child(ChildData {
|
||||
tcp_stream: duped,
|
||||
duped: new_tcp.as_raw_fd(),
|
||||
json_stream: serde_json::StreamDeserializer::new(IoRead::new(new_tcp)),
|
||||
}));
|
||||
},
|
||||
ParentOrChild::Dup(_) => unreachable!(),
|
||||
ParentOrChild::Child(c) => {
|
||||
let Some(msg) = c.json_stream.next() else {
|
||||
let fd1 = c.duped;
|
||||
let fd2 = c.tcp_stream.as_raw_fd();
|
||||
children.remove(&fd1);
|
||||
children.remove(&fd2);
|
||||
continue;
|
||||
};
|
||||
log::trace!("recv: {msg:?}");
|
||||
let msg = match msg {
|
||||
Ok(msg) => msg,
|
||||
Err(e) => {
|
||||
log::error!("Child socket disconnected unexpectedly: {e:?}");
|
||||
children.remove(&chosen);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let response = match msg {
|
||||
TracerClientMessage::Events { events, files: file_events } => {
|
||||
log.extend(events);
|
||||
let mut paths = vec![];
|
||||
for (path, hash) in file_events {
|
||||
if !files.insert(path.clone(), hash) {
|
||||
paths.push(path);
|
||||
}
|
||||
}
|
||||
if paths.is_empty() {
|
||||
TracerServerRequest::Continue
|
||||
} else {
|
||||
TracerServerRequest::AnalyzeFiles { paths }
|
||||
}
|
||||
},
|
||||
TracerClientMessage::FileFormats { formats, files: file_list } => {
|
||||
for (path, fmt) in formats {
|
||||
files.update_format(&path, fmt);
|
||||
}
|
||||
let paths: Vec<_> = file_list.into_iter().filter_map(|(path, hash)| (!files.hashes.contains_key(&hash)).then_some(path)).collect();
|
||||
if paths.is_empty() {
|
||||
TracerServerRequest::Continue
|
||||
} else {
|
||||
TracerServerRequest::AnalyzeFiles { paths }
|
||||
}
|
||||
}
|
||||
TracerClientMessage::AllocateId { } => {
|
||||
let result = TracerServerRequest::AllocatedId { id: next_child_id };
|
||||
next_child_id += 1;
|
||||
result
|
||||
},
|
||||
};
|
||||
log::trace!("send: {response:?}");
|
||||
serde_json::to_writer(&c.tcp_stream, &response)?;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
child.wait().expect("Failed to wait for child");
|
||||
|
||||
Ok(TracerReport { log, files })
|
||||
}
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
use std::{collections::{BTreeSet, HashMap}, path::PathBuf, fmt::{Display, Formatter}, time::Duration};
|
||||
|
||||
use serde::{Serialize, Deserialize};
|
||||
|
||||
use crate::filestore::{FileFormat, Sha256Hash, FileStore};
|
||||
|
||||
|
||||
#[derive(Copy, Clone, Serialize, Deserialize, Eq, PartialEq, Debug, Hash)]
|
||||
pub struct Pid(pub(crate) i32);
|
||||
|
||||
impl From<nix::unistd::Pid> for Pid {
|
||||
fn from(value: nix::unistd::Pid) -> Self {
|
||||
Self(value.as_raw())
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<nix::unistd::Pid> for Pid {
|
||||
fn into(self) -> nix::unistd::Pid {
|
||||
nix::unistd::Pid::from_raw(self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Pid {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum Event {
|
||||
Fork { child: Pid },
|
||||
Exec { prog: PathBuf },
|
||||
Exit { code: i32 },
|
||||
FdOpen { fd: i32, source: FdSource },
|
||||
FdDup { oldfd: i32, newfd: i32 },
|
||||
FdClose { fd: i32 },
|
||||
FdRead { fd: i32 },
|
||||
FdWrite { fd: i32 },
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum FdSource {
|
||||
File { path: PathBuf },
|
||||
Tty,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct Identifier {
|
||||
pub machine: i32,
|
||||
pub pid: Pid,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct LogEntry {
|
||||
pub ident: Identifier,
|
||||
pub event: Event,
|
||||
pub timestamp: Duration,
|
||||
}
|
||||
|
||||
impl Display for LogEntry {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"[{}.{:03} m{}p{}] {}",
|
||||
self.timestamp.as_secs(),
|
||||
self.timestamp.as_millis() % 1000,
|
||||
self.ident.machine,
|
||||
self.ident.pid,
|
||||
self.event
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for FdSource {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
FdSource::File { path } => write!(f, "file {}", path.to_string_lossy()),
|
||||
FdSource::Tty => write!(f, "the terminal"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Event {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Event::Fork { child } => write!(f, "fork {child}"),
|
||||
Event::Exec { prog } => write!(f, "exec {}", prog.to_string_lossy()),
|
||||
Event::Exit { code } => write!(f, "exit with {code}"),
|
||||
Event::FdOpen { fd, source } => write!(f, "open fd {fd} from {source}"),
|
||||
Event::FdDup { oldfd, newfd } => write!(f, "dup fd {oldfd} to {newfd}"),
|
||||
Event::FdClose { fd } => write!(f, "close fd {fd}"),
|
||||
Event::FdRead { fd } => write!(f, "read from fd {fd}"),
|
||||
Event::FdWrite { fd } => write!(f, "write to fd {fd}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub enum TracerClientMessage {
|
||||
Events {
|
||||
events: Vec<LogEntry>,
|
||||
files: BTreeSet<(PathBuf, Sha256Hash)>,
|
||||
},
|
||||
FileFormats {
|
||||
formats: HashMap<PathBuf, FileFormat>,
|
||||
files: BTreeSet<(PathBuf, Sha256Hash)>,
|
||||
},
|
||||
AllocateId {},
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub enum TracerServerRequest {
|
||||
Continue,
|
||||
AnalyzeFiles {
|
||||
paths: Vec<PathBuf>,
|
||||
},
|
||||
AllocatedId { id: i32 },
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TracerReport {
|
||||
pub log: Vec<LogEntry>,
|
||||
pub files: FileStore,
|
||||
}
|
Loading…
Reference in New Issue