diff --git a/Cargo.lock b/Cargo.lock index 7f33e25..e54e4be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -139,9 +139,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cfg_aliases" -version = "0.1.1" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" +checksum = "77e53693616d3075149f4ead59bdeecd204ac6b8192d8969757601b74bddf00f" [[package]] name = "clap" @@ -207,6 +207,62 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + [[package]] name = "crypto-common" version = "0.1.6" @@ -402,8 +458,6 @@ dependencies = [ [[package]] name = "nix" version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" dependencies = [ "bitflags 2.5.0", "cfg-if", @@ -437,6 +491,7 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", + "crossbeam", "env_logger", "gimli 0.29.0", "hex", diff --git a/Cargo.toml b/Cargo.toml index b5b64e1..7c8baf1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,8 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -nix = { version = "0.28.0", features = ["ptrace", "process", "fs"] } +#nix = { version = "0.28.0", features = ["ptrace", "process", "fs", "poll"] } +nix = { path = "../../rust/nix", features = ["ptrace", "process", "fs", "poll"] } linux-personality = "1.0.0" anyhow = { version = "1", features = ["backtrace"] } log = "0.4" @@ -21,3 +22,4 @@ gimli = { version = "0.29.0" } object = { version = "0.35" } memmap2 = { version = "0.9.4" } typed-arena = { version = "2" } +crossbeam = { version = "0.8" } diff --git a/src/filestore.rs b/src/filestore.rs index 93b5126..07c7a4a 100644 --- a/src/filestore.rs +++ b/src/filestore.rs @@ -2,7 +2,7 @@ use std::{ borrow::Cow, collections::{BTreeMap, HashMap, HashSet}, fs, io, - path::PathBuf, + path::{PathBuf, Path}, }; use gimli::{constants, DW_TAG_compile_unit}; @@ -66,6 +66,30 @@ impl FileStore { Ok(result) } + /// Register the minimal set of information associated with a file. Returns whether the file + /// was already known. + pub fn insert(&mut self, path: PathBuf, hash: Sha256Hash) -> bool { + if let Some(idx) = self.hashes.get_mut(&hash) { + self.files.get_mut(*idx).unwrap().output_names.insert(path); + true + } else { + let index = self.files.len(); + self.files.push(FileStoreEntry { + index, + hash, + format: FileFormat::Other, + input_names: HashSet::new(), + output_names: HashSet::from([path]), + }); + false + } + } + + pub fn update_format(&mut self, path: &Path, format: FileFormat) { + let idx = *self.filenames.get(path).expect("update_format called with unknown path"); + self.files.get_mut(idx).unwrap().format = format; + } + fn ingest_input(&mut self, filename: PathBuf) -> anyhow::Result<()> { let stat = fs::metadata(&filename)?; if stat.is_dir() { diff --git a/src/main.rs b/src/main.rs index a3eee1b..62e35bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,6 +25,7 @@ enum Subcommands { #[arg(short, long)] output: Option, + /// Set this to #[arg(short, long)] mute: bool, @@ -39,6 +40,10 @@ enum Subcommands { /// The filepath to dump the json report to. will dump to stdout if unspecified. output: Option, }, + _InternalLaunch { + connect: String, + cmd: Vec + }, } fn main() { @@ -62,7 +67,7 @@ fn main() { } else { Box::new(std::io::stdout()) }; - let mut t = tracer::Tracer::new(file_scope).unwrap(); + let mut t = tracer::server::Tracer::run(file_scope).unwrap(); t.start_root_process(cmd, mute).unwrap(); if output.is_none() { @@ -102,4 +107,6 @@ fn main() { .expect("Could not serialize json parameter report"); } } + Subcommands::_InternalLaunch { connect, cmd } => { + } } diff --git a/src/reports/parameters.rs b/src/reports/parameters.rs index 5bc3d3c..bcf27c0 100644 --- a/src/reports/parameters.rs +++ b/src/reports/parameters.rs @@ -2,7 +2,7 @@ use std::path::PathBuf; use serde::{Deserialize, Serialize}; -use crate::tracer::TracerReport; +use crate::tracer::types::TracerReport; #[derive(Serialize, Deserialize)] pub struct ParametersReport { diff --git a/src/tracer/client.rs b/src/tracer/client.rs index 2f262f0..d303f3f 100644 --- a/src/tracer/client.rs +++ b/src/tracer/client.rs @@ -2,15 +2,12 @@ use std::{ collections::HashMap, ffi::CString, ffi::OsString, - fmt::{Display, Formatter}, os::{fd::AsRawFd, unix::prelude::OsStringExt}, path::PathBuf, process::exit, - time::{Duration, Instant}, + time::Instant, }; -use core::fmt; - use nix::{ errno::Errno, libc::{pid_t, raise, tcsetpgrp, AT_EMPTY_PATH, AT_FDCWD, SIGSTOP, STDIN_FILENO}, @@ -22,30 +19,9 @@ use nix::{ unistd::{execvp, getpid, setpgid, ForkResult}, }; -use serde::{Deserialize, Serialize}; +use crate::filestore::Sha256Hash; -use crate::filestore::FileStore; - -#[derive(Copy, Clone, Serialize, Deserialize, Eq, PartialEq, Debug, Hash)] -pub struct Pid(i32); - -impl From for Pid { - fn from(value: nix::unistd::Pid) -> Self { - Self(value.as_raw()) - } -} - -impl Into for Pid { - fn into(self) -> nix::unistd::Pid { - nix::unistd::Pid::from_raw(self.0) - } -} - -impl Display for Pid { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} +use super::types::*; pub fn read_generic_string( pid: Pid, @@ -337,18 +313,6 @@ impl ProcessState { } } -pub struct Tracer { - pub store: ProcessStateStore, - pub start_time: Instant, - pub report: TracerReport, -} - -#[derive(Serialize, Deserialize)] -pub struct TracerReport { - pub log: Vec, - pub files: FileStore, -} - fn ptrace_syscall(pid: Pid, sig: Option) -> Result<(), Errno> { match ptrace::syscall(pid.into(), sig) { Err(Errno::ESRCH) => { @@ -359,9 +323,17 @@ fn ptrace_syscall(pid: Pid, sig: Option) -> Result<(), Errno> { } } -impl Tracer { +struct TracerClient { + store: ProcessStateStore, + start_time: Instant, + pending_events: Vec, + pending_files: Vec<(PathBuf, Sha256Hash)>, + machine: i32, +} + +impl TracerClient { pub fn log(&mut self, ident: Identifier, event: Event) { - self.report.log.push(LogEntry { + self.pending_events.push(LogEntry { ident, event, timestamp: Instant::now().duration_since(self.start_time), @@ -369,24 +341,15 @@ impl Tracer { } pub fn log_root(&mut self, pid: Pid, event: Event) { - self.log(Identifier { pid, machine: 0 }, event); - } - - pub fn new(input: Vec) -> anyhow::Result { - let files = FileStore::new(input)?; - Ok(Self { - store: ProcessStateStore::default(), - start_time: Instant::now(), - report: TracerReport { log: vec![], files }, - }) + self.log(Identifier { pid, machine: self.machine }, event); } fn drain_syscall_events(&mut self, pid: Pid, mut filter: Box) { let p = self.store.get_current_mut(pid).unwrap(); for mut event in p.pending_syscall_event.drain(..) { (filter)(&mut event); - self.report.log.push(LogEntry { - ident: Identifier { pid, machine: 0 }, + self.pending_events.push(LogEntry { + ident: Identifier { pid, machine: self.machine }, event, timestamp: Instant::now().duration_since(self.start_time), }); @@ -396,7 +359,7 @@ impl Tracer { pub fn start_root_process(&mut self, args: Vec, mute: bool) -> anyhow::Result<()> { log::trace!("start_root_process: {:?}", args); - if let ForkResult::Parent { child: root_child } = unsafe { nix::unistd::fork()? } { + if let ForkResult::Parent { child: root_child } = unsafe { nix::unistd::fork()? } { waitpid(root_child, Some(WaitPidFlag::WSTOPPED))?; // wait for child to stop let root_child = root_child.into(); log::trace!("child stopped"); diff --git a/src/tracer/mod.rs b/src/tracer/mod.rs index ea1769a..a858f69 100644 --- a/src/tracer/mod.rs +++ b/src/tracer/mod.rs @@ -1,2 +1,3 @@ -mod client; -mod server; +pub mod client; +pub mod server; +pub mod types; diff --git a/src/tracer/server.rs b/src/tracer/server.rs index 1e6b9b4..1072c10 100644 --- a/src/tracer/server.rs +++ b/src/tracer/server.rs @@ -1,91 +1,95 @@ -use std::collections::HashSet; +use std::{path::PathBuf, net::{TcpListener, TcpStream}, collections::BTreeMap, os::fd::{AsFd, AsRawFd}}; -use serde::{Serialize, Deserialize}; +use serde_json::de::IoRead; -use crate::filestore::{FileFormat, Sha256Hash}; +use crate::filestore::FileStore; -#[derive(Debug, Serialize, Deserialize)] -pub enum Event { - Fork { child: Pid }, - Exec { prog: PathBuf }, - Exit { code: i32 }, - FdOpen { fd: i32, source: FdSource }, - FdDup { oldfd: i32, newfd: i32 }, - FdClose { fd: i32 }, - FdRead { fd: i32 }, - FdWrite { fd: i32 }, +use super::types::*; + + +pub struct Tracer { + pub report: TracerReport, } -#[derive(Debug, Serialize, Deserialize)] -pub enum FdSource { - File { path: PathBuf }, - Tty, -} +impl Tracer { + pub fn run(input: Vec, args: Vec) -> anyhow::Result { + let mut files = FileStore::new(input)?; + let mut log = vec![]; -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct Identifier { - machine: i32, - pid: Pid, -} + let listener = TcpListener::bind("127.0.0.1:9995").expect("Could not bind listener socket"); -#[derive(Debug, Serialize, Deserialize)] -pub struct LogEntry { - ident: Identifier, - event: Event, - timestamp: Duration, -} - -impl Display for LogEntry { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!( - f, - "[{}.{:03} m{}p{}] {}", - self.timestamp.as_secs(), - self.timestamp.as_millis() % 1000, - self.ident.machine, - self.ident.pid, - self.event - ) - } -} - -impl Display for FdSource { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - FdSource::File { path } => write!(f, "file {}", path.to_string_lossy()), - FdSource::Tty => write!(f, "the terminal"), + struct ChildData { + tcp_stream: TcpStream, + json_stream: serde_json::StreamDeserializer<'static, IoRead, TracerClientMessage> } - } -} -impl Display for Event { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - Event::Fork { child } => write!(f, "fork {child}"), - Event::Exec { prog } => write!(f, "exec {}", prog.to_string_lossy()), - Event::Exit { code } => write!(f, "exit with {code}"), - Event::FdOpen { fd, source } => write!(f, "open fd {fd} from {source}"), - Event::FdDup { oldfd, newfd } => write!(f, "dup fd {oldfd} to {newfd}"), - Event::FdClose { fd } => write!(f, "close fd {fd}"), - Event::FdRead { fd } => write!(f, "read from fd {fd}"), - Event::FdWrite { fd } => write!(f, "write to fd {fd}"), + enum ParentOrChild { + Parent(TcpListener), + Child(ChildData), } + + impl AsFd for ParentOrChild { + fn as_fd(&self) -> std::os::fd::BorrowedFd<'_> { + match self { + ParentOrChild::Parent(i) => i.as_fd(), + ParentOrChild::Child(i) => i.tcp_stream.as_fd(), + } + } + } + + let mut children = BTreeMap::new(); + children.insert(listener.as_raw_fd(), ParentOrChild::Parent(listener)); + + + loop { + if children.len() <= 1 { + break; + } + + let mut fdset = children.values().into(); + nix::sys::select::select(None, Some(&mut fdset), None, None, None).expect("Select failed"); + let chosen = fdset.fds(None).next().unwrap().as_raw_fd(); + let child = children.get(&chosen).unwrap(); + match child { + ParentOrChild::Parent(p) => { + let (new_tcp, _new_addr) = p.accept().expect("Accept failed"); + }, + ParentOrChild::Child(c) => { + let msg = c.json_stream.next().expect("Should NEVER be seen - StreamDeserializer is inexhaustable"); + let msg = match msg { + Ok(msg) => msg, + Err(e) => { + log::error!("Child socket disconnected unexpectedly: {e:?}"); + children.remove(&chosen); + continue; + } + }; + serde_json::to_writer(&c.tcp_stream, &match msg { + TracerClientMessage::Events { events, files: file_events } => { + log.extend(events); + let mut paths = vec![]; + for (path, hash) in file_events { + if !files.insert(path, hash) { + paths.push(path); + } + } + if paths.is_empty() { + TracerServerRequest::Continue + } else { + TracerServerRequest::AnalyzeFiles { paths } + } + }, + TracerClientMessage::FileFormats { formats } => { + for (path, fmt) in formats { + files.update_format(&path, fmt) + } + TracerServerRequest::Continue + }, + }); + }, + } + } + + Ok(TracerReport { log, files }) } } - -pub enum TracerClientMessage { - Events { - events: Vec, - files: HashSet<(PathBuf, Sha256Hash)>, - }, - FileFormat { - format: FileFormat - }, -} - -pub enum TracerServerRequest { - Continue, - AnalyzeFile { - path: PathBuf, - }, -} diff --git a/src/tracer/types.rs b/src/tracer/types.rs new file mode 100644 index 0000000..422df8a --- /dev/null +++ b/src/tracer/types.rs @@ -0,0 +1,122 @@ +use std::{collections::{BTreeSet, HashMap}, path::PathBuf, fmt::{Display, Formatter}, time::Duration}; + +use serde::{Serialize, Deserialize}; + +use crate::filestore::{FileFormat, Sha256Hash, FileStore}; + + +#[derive(Copy, Clone, Serialize, Deserialize, Eq, PartialEq, Debug, Hash)] +pub struct Pid(pub(crate) i32); + +impl From for Pid { + fn from(value: nix::unistd::Pid) -> Self { + Self(value.as_raw()) + } +} + +impl Into for Pid { + fn into(self) -> nix::unistd::Pid { + nix::unistd::Pid::from_raw(self.0) + } +} + +impl Display for Pid { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + + +#[derive(Debug, Serialize, Deserialize)] +pub enum Event { + Fork { child: Pid }, + Exec { prog: PathBuf }, + Exit { code: i32 }, + FdOpen { fd: i32, source: FdSource }, + FdDup { oldfd: i32, newfd: i32 }, + FdClose { fd: i32 }, + FdRead { fd: i32 }, + FdWrite { fd: i32 }, +} + +#[derive(Debug, Serialize, Deserialize)] +pub enum FdSource { + File { path: PathBuf }, + Tty, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Identifier { + pub machine: i32, + pub pid: Pid, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct LogEntry { + pub ident: Identifier, + pub event: Event, + pub timestamp: Duration, +} + +impl Display for LogEntry { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "[{}.{:03} m{}p{}] {}", + self.timestamp.as_secs(), + self.timestamp.as_millis() % 1000, + self.ident.machine, + self.ident.pid, + self.event + ) + } +} + +impl Display for FdSource { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + FdSource::File { path } => write!(f, "file {}", path.to_string_lossy()), + FdSource::Tty => write!(f, "the terminal"), + } + } +} + +impl Display for Event { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Event::Fork { child } => write!(f, "fork {child}"), + Event::Exec { prog } => write!(f, "exec {}", prog.to_string_lossy()), + Event::Exit { code } => write!(f, "exit with {code}"), + Event::FdOpen { fd, source } => write!(f, "open fd {fd} from {source}"), + Event::FdDup { oldfd, newfd } => write!(f, "dup fd {oldfd} to {newfd}"), + Event::FdClose { fd } => write!(f, "close fd {fd}"), + Event::FdRead { fd } => write!(f, "read from fd {fd}"), + Event::FdWrite { fd } => write!(f, "write to fd {fd}"), + } + } +} + +#[derive(Serialize, Deserialize)] +pub enum TracerClientMessage { + Events { + events: Vec, + files: BTreeSet<(PathBuf, Sha256Hash)>, + }, + FileFormats { + formats: HashMap, + }, +} + +#[derive(Serialize, Deserialize)] +pub enum TracerServerRequest { + Continue, + AnalyzeFiles { + paths: Vec, + }, +} + +#[derive(Serialize, Deserialize)] +pub struct TracerReport { + pub log: Vec, + pub files: FileStore, +}