diff --git a/.cargo/config b/.cargo/config new file mode 100644 index 0000000..d67b66e --- /dev/null +++ b/.cargo/config @@ -0,0 +1,2 @@ +[build] +target = "x86_64-unknown-linux-musl" diff --git a/Cargo.lock b/Cargo.lock index e54e4be..f8b27e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -207,62 +207,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "crossbeam" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" -dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-epoch", - "crossbeam-queue", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-channel" -version = "0.5.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-queue" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" - [[package]] name = "crypto-common" version = "0.1.6" @@ -458,6 +402,7 @@ dependencies = [ [[package]] name = "nix" version = "0.28.0" +source = "git+https://github.com/rhelmot/nix-rs?branch=master#e9f7c1b74ef7581adf1513a3f3c9a965824ee2d4" dependencies = [ "bitflags 2.5.0", "cfg-if", @@ -491,7 +436,6 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "crossbeam", "env_logger", "gimli 0.29.0", "hex", diff --git a/Cargo.toml b/Cargo.toml index 7c8baf1..e5e3051 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,8 @@ edition = "2021" [dependencies] #nix = { version = "0.28.0", features = ["ptrace", "process", "fs", "poll"] } -nix = { path = "../../rust/nix", features = ["ptrace", "process", "fs", "poll"] } +nix = { git = "https://github.com/rhelmot/nix-rs", branch = "master", features = ["ptrace", "process", "fs", "poll"] } +#nix = { path = "../../rust/nix", features = ["ptrace", "process", "fs", "poll"] } linux-personality = "1.0.0" anyhow = { version = "1", features = ["backtrace"] } log = "0.4" @@ -22,4 +23,3 @@ gimli = { version = "0.29.0" } object = { version = "0.35" } memmap2 = { version = "0.9.4" } typed-arena = { version = "2" } -crossbeam = { version = "0.8" } diff --git a/src/filestore.rs b/src/filestore.rs index 07c7a4a..2a7e005 100644 --- a/src/filestore.rs +++ b/src/filestore.rs @@ -1,6 +1,6 @@ use std::{ borrow::Cow, - collections::{BTreeMap, HashMap, HashSet}, + collections::{BTreeMap, BTreeSet, HashMap, HashSet}, fs, io, path::{PathBuf, Path}, }; @@ -47,7 +47,7 @@ pub struct FileStoreEntry { #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub enum FileFormat { - ELF { references: Vec }, + ELF, Other, } @@ -79,14 +79,19 @@ impl FileStore { hash, format: FileFormat::Other, input_names: HashSet::new(), - output_names: HashSet::from([path]), + output_names: HashSet::from([path.clone()]), }); + self.filenames.insert(path, index); + self.hashes.insert(hash, index); false } } pub fn update_format(&mut self, path: &Path, format: FileFormat) { - let idx = *self.filenames.get(path).expect("update_format called with unknown path"); + if !self.filenames.contains_key(path) { + panic!("update_format called with unknown path {}", path.to_string_lossy()) + } + let idx = *self.filenames.get(path).unwrap(); self.files.get_mut(idx).unwrap().format = format; } @@ -115,6 +120,7 @@ impl FileStore { mut content: (impl io::Read + io::Seek), ) -> anyhow::Result<()> { let mut h = Sha256::new(); + log::debug!("Hashing {}", filename.to_string_lossy()); io::copy(&mut content, &mut h)?; let hash = h.finalize().into(); @@ -122,7 +128,7 @@ impl FileStore { std::collections::btree_map::Entry::Vacant(e) => { let index = self.files.len(); e.insert(index); - let format = self.parse_format(&mut content)?; + let (format, refs) = parse_format(&mut content)?; self.files.push(FileStoreEntry { index, hash, @@ -130,6 +136,9 @@ impl FileStore { input_names: [filename.clone()].into(), output_names: HashSet::new(), }); + for (reference_path, _reference_hash) in refs { // lazy... + self.ingest_input(reference_path)?; + } index } std::collections::btree_map::Entry::Occupied(e) => { @@ -148,6 +157,7 @@ impl FileStore { Ok(()) } + /* pub fn ingest_output_local(&mut self, filename: PathBuf) -> anyhow::Result<()> { let stat = fs::metadata(&filename)?; if stat.is_dir() { @@ -172,7 +182,7 @@ impl FileStore { std::collections::btree_map::Entry::Vacant(e) => { let index = self.files.len(); e.insert(index); - let format = self.parse_format(&mut content)?; + let (format, refs) = self.parse_format(&mut content)?; self.files.push(FileStoreEntry { index, format, @@ -196,85 +206,7 @@ impl FileStore { Ok(index) } - - fn parse_format(&mut self, fp: &mut (impl io::Read + io::Seek)) -> anyhow::Result { - fp.seek(io::SeekFrom::Start(0))?; - let mut buf = [0; 4]; - let count = read_exact_or_end(fp, &mut buf)?; - let buf = &buf[..count]; - - Ok(match buf { - [0x7f, b'E', b'L', b'F', ..] => { - let read_cache = ReadCache::new(fp); - let elf = object::File::parse(&read_cache)?; - let endian = if elf.is_little_endian() { - gimli::RunTimeEndian::Little - } else { - gimli::RunTimeEndian::Big - }; - let arena_data = Arena::new(); - let mut load_section = |id: gimli::SectionId| -> Result<_, _> { - load_file_section(id, &elf, endian, &arena_data) - }; - let dwarf = gimli::Dwarf::load(&mut load_section).unwrap(); - let mut units = dwarf.units(); - let mut inputs = vec![]; - while let Ok(Some(unit)) = units.next() { - let abbrev = dwarf.abbreviations(&unit)?; - let mut entries = unit.entries(&abbrev); - while let Some((_, entry)) = entries.next_dfs()? { - if entry.tag() == DW_TAG_compile_unit { - let mut basename = None; - let mut dirname = None; - if let Some(name) = - entry.attr(constants::DW_AT_name)?.map(|a| a.value()) - { - if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) { - basename = Some(PathBuf::from(name.to_string()?)); - } - } - if let Some(name) = - entry.attr(constants::DW_AT_comp_dir)?.map(|a| a.value()) - { - if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) { - dirname = Some(PathBuf::from(name.to_string()?)); - } - } - if let (Some(dirname), Some(basename)) = (dirname, basename) { - inputs.push(dirname.join(basename)); - } - } - } - } - - let references = inputs - .into_iter() - .map(|input| self.ingest_dependency_local(input)) - .collect::, _>>()? - .into_iter() - .filter_map(|x| x) - .collect(); - FileFormat::ELF { references } - } - _ => FileFormat::Other, - }) - } - - fn ingest_dependency_local(&mut self, filename: PathBuf) -> anyhow::Result> { - // TODO: this needs to try suffixes of the filename against the filepath table to see if it - // was moved between compilation and ingestion - let metadata = match fs::metadata(&filename) { - Ok(m) => m, - Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(None), - Err(e) => return Err(e)?, - }; - if !metadata.is_file() { - return Ok(None); - } - - let fp = fs::File::open(&filename)?; - Ok(Some(self.ingest_output(filename, fp)?)) - } + */ } fn load_file_section<'input, 'arena, Endian: gimli::Endianity, R: object::ReadRef<'input>>( @@ -305,3 +237,84 @@ fn read_exact_or_end(fp: &mut impl io::Read, buf: &mut [u8]) -> anyhow::Result anyhow::Result<(FileFormat, BTreeSet<(PathBuf, Sha256Hash)>)> { + fp.seek(io::SeekFrom::Start(0))?; + let mut buf = [0; 4]; + let count = read_exact_or_end(fp, &mut buf)?; + let buf = &buf[..count]; + + Ok(match buf { + [0x7f, b'E', b'L', b'F', ..] => { + let read_cache = ReadCache::new(fp); + let elf = object::File::parse(&read_cache)?; + let endian = if elf.is_little_endian() { + gimli::RunTimeEndian::Little + } else { + gimli::RunTimeEndian::Big + }; + let arena_data = Arena::new(); + let mut load_section = |id: gimli::SectionId| -> Result<_, _> { + load_file_section(id, &elf, endian, &arena_data) + }; + let dwarf = gimli::Dwarf::load(&mut load_section).unwrap(); + let mut units = dwarf.units(); + let mut inputs = vec![]; + while let Ok(Some(unit)) = units.next() { + let abbrev = dwarf.abbreviations(&unit)?; + let mut entries = unit.entries(&abbrev); + while let Some((_, entry)) = entries.next_dfs()? { + if entry.tag() == DW_TAG_compile_unit { + let mut basename = None; + let mut dirname = None; + if let Some(name) = + entry.attr(constants::DW_AT_name)?.map(|a| a.value()) + { + if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) { + basename = Some(PathBuf::from(name.to_string()?)); + } + } + if let Some(name) = + entry.attr(constants::DW_AT_comp_dir)?.map(|a| a.value()) + { + if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) { + dirname = Some(PathBuf::from(name.to_string()?)); + } + } + if let (Some(dirname), Some(basename)) = (dirname, basename) { + inputs.push(dirname.join(basename)); + } + } + } + } + + let references = inputs + .into_iter() + .map(|filename| -> anyhow::Result<_> { + // TODO: this needs to try suffixes of the filename against the filepath table to see if it + // was moved between compilation and ingestion. but how... + let metadata = match fs::metadata(&filename) { + Ok(m) => m, + Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(None), + Err(e) => return Err(e)?, + }; + if !metadata.is_file() { + return Ok(None); + } + + let mut fp = fs::File::open(&filename)?; + let mut h = Sha256::new(); + log::debug!("Hashing {}", filename.to_string_lossy()); + io::copy(&mut fp, &mut h)?; + let result = Ok(Some((filename, h.finalize().into()))); + result + }) + .collect::, _>>()? + .into_iter() + .filter_map(|x| x) + .collect(); + (FileFormat::ELF, references) + } + _ => (FileFormat::Other, BTreeSet::new()), + }) +} diff --git a/src/main.rs b/src/main.rs index 62e35bd..015230d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -40,7 +40,8 @@ enum Subcommands { /// The filepath to dump the json report to. will dump to stdout if unspecified. output: Option, }, - _InternalLaunch { + InternalLaunch { + machine: i32, connect: String, cmd: Vec }, @@ -67,13 +68,12 @@ fn main() { } else { Box::new(std::io::stdout()) }; - let mut t = tracer::server::Tracer::run(file_scope).unwrap(); - t.start_root_process(cmd, mute).unwrap(); + let t = tracer::server::Tracer::run(file_scope, cmd, mute).unwrap(); if output.is_none() { - serde_json::to_writer_pretty(fp, &t.report) + serde_json::to_writer_pretty(fp, &t) } else { - serde_json::to_writer(fp, &t.report) + serde_json::to_writer(fp, &t) } .expect("Could not serialize json trace report"); } @@ -90,7 +90,7 @@ fn main() { Box::new(std::io::stdout()) }; - let in_report: tracer::TracerReport = if let Some(input) = &input { + let in_report: tracer::types::TracerReport = if let Some(input) = &input { serde_json::from_reader(std::fs::File::open(input).unwrap()) } else { serde_json::from_reader(std::io::stdin()) @@ -106,7 +106,8 @@ fn main() { } .expect("Could not serialize json parameter report"); } - } - Subcommands::_InternalLaunch { connect, cmd } => { + Subcommands::InternalLaunch { machine, connect, cmd } => { + tracer::client::TracerClient::run(machine, connect, cmd).expect("Tracing failed"); + } } } diff --git a/src/tracer/client.rs b/src/tracer/client.rs index d303f3f..45fdc9d 100644 --- a/src/tracer/client.rs +++ b/src/tracer/client.rs @@ -1,11 +1,5 @@ use std::{ - collections::HashMap, - ffi::CString, - ffi::OsString, - os::{fd::AsRawFd, unix::prelude::OsStringExt}, - path::PathBuf, - process::exit, - time::Instant, + collections::{BTreeSet, HashMap}, ffi::CString, ffi::OsString, io::Write, net::TcpStream, os::unix::prelude::OsStringExt, path::PathBuf, process::exit, time::Instant }; use nix::{ @@ -18,8 +12,10 @@ use nix::{ }, unistd::{execvp, getpid, setpgid, ForkResult}, }; +use serde_json::de::IoRead; +use sha2::{Sha256, Digest}; -use crate::filestore::Sha256Hash; +use crate::filestore::{parse_format, Sha256Hash}; use super::types::*; @@ -323,11 +319,11 @@ fn ptrace_syscall(pid: Pid, sig: Option) -> Result<(), Errno> { } } -struct TracerClient { +pub struct TracerClient { store: ProcessStateStore, start_time: Instant, pending_events: Vec, - pending_files: Vec<(PathBuf, Sha256Hash)>, + pending_files: BTreeSet<(PathBuf, Sha256Hash)>, machine: i32, } @@ -344,6 +340,20 @@ impl TracerClient { self.log(Identifier { pid, machine: self.machine }, event); } + fn ingest_file(&mut self, path: PathBuf) -> anyhow::Result<()> { + let stat = std::fs::metadata(&path)?; + if !stat.is_file() { + return Ok(()); + } + let mut fp = std::fs::File::open(&path)?; + let mut h = Sha256::new(); + log::debug!("Hashing {} (client)", path.to_string_lossy()); + std::io::copy(&mut fp, &mut h)?; + let hash = h.finalize().into(); + self.pending_files.insert((path, hash)); + Ok(()) + } + fn drain_syscall_events(&mut self, pid: Pid, mut filter: Box) { let p = self.store.get_current_mut(pid).unwrap(); for mut event in p.pending_syscall_event.drain(..) { @@ -356,172 +366,17 @@ impl TracerClient { } } - pub fn start_root_process(&mut self, args: Vec, mute: bool) -> anyhow::Result<()> { - log::trace!("start_root_process: {:?}", args); - - if let ForkResult::Parent { child: root_child } = unsafe { nix::unistd::fork()? } { - waitpid(root_child, Some(WaitPidFlag::WSTOPPED))?; // wait for child to stop - let root_child = root_child.into(); - log::trace!("child stopped"); - let mut root_child_state = ProcessState::new(root_child, 0)?; - root_child_state.ppid = Some(getpid().into()); - self.store.insert(root_child_state); - // Set foreground process group of the terminal - if -1 == unsafe { tcsetpgrp(STDIN_FILENO, root_child.0) } { - return Err(Errno::last().into()); - } - // restart child - log::trace!("resuming child"); - let ptrace_opts = { - use nix::sys::ptrace::Options; - Options::PTRACE_O_TRACEEXEC - | Options::PTRACE_O_TRACEEXIT - | Options::PTRACE_O_EXITKILL - | Options::PTRACE_O_TRACESYSGOOD - | Options::PTRACE_O_TRACEFORK - | Options::PTRACE_O_TRACECLONE - | Options::PTRACE_O_TRACEVFORK - }; - ptrace::setoptions(root_child.into(), ptrace_opts)?; - // restart child - self.seccomp_aware_cont(root_child)?; - loop { - let status = waitpid(None, Some(WaitPidFlag::__WALL))?; - // log::trace!("waitpid: {:?}", status); - match status { - WaitStatus::Stopped(pid, sig) => { - let pid = pid.into(); - log::trace!("stopped: {pid}, sig {:?}", sig); - match sig { - Signal::SIGSTOP => { - log::trace!("sigstop event, child: {pid}"); - if let Some(state) = self.store.get_current_mut(pid) { - if state.status == ProcessStatus::PtraceForkEventReceived { - log::trace!("sigstop event received after ptrace fork event, pid: {pid}"); - state.status = ProcessStatus::Running; - self.seccomp_aware_cont(pid)?; - } else if pid != root_child { - log::error!("Unexpected SIGSTOP: {state:?}") - } - } else { - log::trace!("sigstop event received before ptrace fork event, pid: {pid}"); - let mut state = ProcessState::new(pid, 0)?; - state.status = ProcessStatus::SigstopReceived; - self.store.insert(state); - } - // https://stackoverflow.com/questions/29997244/occasionally-missing-ptrace-event-vfork-when-running-ptrace - // DO NOT send PTRACE_SYSCALL until we receive the PTRACE_EVENT_FORK, etc. - } - Signal::SIGCHLD => { - // From lurk: - // - // The SIGCHLD signal is sent to a process when a child process terminates, interrupted, or resumes after being interrupted - // This means, that if our tracee forked and said fork exits before the parent, the parent will get stopped. - // Therefor issue a PTRACE_SYSCALL request to the parent to continue execution. - // This is also important if we trace without the following forks option. - self.seccomp_aware_cont_with_signal(pid, Signal::SIGCHLD)?; - } - _ => { - // Just deliver the signal to tracee - self.seccomp_aware_cont_with_signal(pid, sig)?; - } - } - } - WaitStatus::Exited(pid, code) => { - let pid = pid.into(); - log::trace!("exited: pid {}, code {:?}", pid, code); - self.log_root(pid, Event::Exit { code }); - self.store.get_current_mut(pid).unwrap().status = - ProcessStatus::Exited(code); - if pid == root_child { - break; - } - } - WaitStatus::PtraceEvent(pid, sig, evt) => { - log::trace!("ptrace event: {:?} {:?}", sig, evt); - match evt { - nix::libc::PTRACE_EVENT_FORK - | nix::libc::PTRACE_EVENT_VFORK - | nix::libc::PTRACE_EVENT_CLONE => { - let new_child = Pid(ptrace::getevent(pid.into())? as pid_t); - log::trace!( - "ptrace fork event, evt {evt}, pid: {pid}, child: {new_child}" - ); - self.log_root(pid.into(), Event::Fork { child: new_child }); - if let Some(state) = self.store.get_current_mut(new_child) { - if state.status == ProcessStatus::SigstopReceived { - log::trace!("ptrace fork event received after sigstop, pid: {pid}, child: {new_child}"); - state.status = ProcessStatus::Running; - state.ppid = Some(pid.into()); - self.seccomp_aware_cont(new_child)?; - } else if new_child != root_child { - log::error!("Unexpected fork event: {state:?}") - } - } else { - log::trace!("ptrace fork event received before sigstop, pid: {pid}, child: {new_child}"); - let mut state = ProcessState::new(new_child, 0)?; - state.status = ProcessStatus::PtraceForkEventReceived; - state.ppid = Some(pid.into()); - self.store.insert(state); - } - // Resume parent - self.seccomp_aware_cont(pid.into())?; - } - nix::libc::PTRACE_EVENT_EXEC => { - log::trace!("exec event"); - let p = self.store.get_current_mut(pid.into()).unwrap(); - assert!(!p.presyscall); - // After execve or execveat, in syscall exit event, - // the registers might be clobbered(e.g. aarch64). - // So we need to determine whether exec is successful here. - // PTRACE_EVENT_EXEC only happens for successful exec. - p.is_exec_successful = true; - let path = p - .pending_syscall_event - .iter() - .find_map(|e| match e { - Event::Exec { prog, .. } => Some(prog.clone()), - _ => None, - }) - .unwrap(); - self.report.files.ingest_output_local(path)?; - self.drain_syscall_events(pid.into(), Box::new(|_| {})); - // Don't use seccomp_aware_cont here because that will skip the next syscall exit stop - self.syscall_enter_cont(pid.into())?; - } - nix::libc::PTRACE_EVENT_EXIT => { - log::trace!("exit event"); - self.seccomp_aware_cont(pid.into())?; - } - nix::libc::PTRACE_EVENT_SECCOMP => { - log::trace!("seccomp event"); - self.on_syscall_enter(pid.into())?; - } - _ => { - log::trace!("other event"); - self.seccomp_aware_cont(pid.into())?; - } - } - } - WaitStatus::Signaled(pid, sig, _) => { - let pid: Pid = pid.into(); - log::debug!("signaled: {pid}, {:?}", sig); - if pid == root_child { - exit(128 + (sig as i32)) - } - } - WaitStatus::PtraceSyscall(pid) => { - let pid = pid.into(); - let presyscall = self.store.get_current_mut(pid).unwrap().presyscall; - if presyscall { - self.on_syscall_enter(pid)?; - } else { - self.on_syscall_exit(pid)?; - } - } - _ => {} - } - } + pub fn run(machine: i32, connect: String, args: Vec) -> anyhow::Result<()> { + let mut this = Self { + store: ProcessStateStore::default(), + start_time: Instant::now(), + pending_events: vec![], + pending_files: BTreeSet::new(), + machine, + }; + let sock = TcpStream::connect(&connect).expect(format!("Could not connect to {connect}").as_str()); + if let ForkResult::Parent { child } = unsafe { nix::unistd::fork()? } { + this.run_internal(sock, child.into()) } else { let me = getpid(); setpgid(me, me)?; @@ -531,26 +386,222 @@ impl TracerClient { exit(-1); } - if mute { - let null = std::fs::File::options() - .read(true) - .write(true) - .open("/dev/null") - .expect("Could not open /dev/null"); - nix::unistd::dup2(null.as_raw_fd(), 0) - .expect("Could not dup /dev/null to /dev/stdin"); - nix::unistd::dup2(null.as_raw_fd(), 1) - .expect("Could not dup /dev/null to /dev/stdout"); - nix::unistd::dup2(null.as_raw_fd(), 2) - .expect("Could not dup /dev/null to /dev/stderr"); - } - let args = args .into_iter() .map(CString::new) .collect::, _>>()?; - execvp(&args[0], &args)?; + execvp(&args[0], &args).expect(format!("Failed to execute {args:?}").as_str()); + unreachable!(); + } + } + + fn run_internal(&mut self, mut sock: TcpStream, root_child: Pid) -> anyhow::Result<()> { + waitpid(nix::unistd::Pid::from(root_child.into()), Some(WaitPidFlag::WSTOPPED))?; // wait for child to stop + log::trace!("child stopped"); + let mut root_child_state = ProcessState::new(root_child, 0)?; + root_child_state.ppid = Some(getpid().into()); + self.store.insert(root_child_state); + // Set foreground process group of the terminal + if -1 == unsafe { tcsetpgrp(STDIN_FILENO, root_child.0) } { + return Err(Errno::last().into()); + } + // restart child + log::trace!("resuming child"); + let ptrace_opts = { + use nix::sys::ptrace::Options; + Options::PTRACE_O_TRACEEXEC + | Options::PTRACE_O_TRACEEXIT + | Options::PTRACE_O_EXITKILL + | Options::PTRACE_O_TRACESYSGOOD + | Options::PTRACE_O_TRACEFORK + | Options::PTRACE_O_TRACECLONE + | Options::PTRACE_O_TRACEVFORK + }; + ptrace::setoptions(root_child.into(), ptrace_opts)?; + // restart child + ptrace::syscall(nix::unistd::Pid::from(root_child.into()), None)?; + loop { + let status = waitpid(None, Some(WaitPidFlag::__WALL))?; + // log::trace!("waitpid: {:?}", status); + let signal = match status { + WaitStatus::Stopped(pid, sig) => { + let pid = pid.into(); + log::trace!("stopped: {pid}, sig {:?}", sig); + match sig { + Signal::SIGSTOP => { + log::trace!("sigstop event, child: {pid}"); + if let Some(state) = self.store.get_current_mut(pid) { + if state.status == ProcessStatus::PtraceForkEventReceived { + log::trace!("sigstop event received after ptrace fork event, pid: {pid}"); + state.status = ProcessStatus::Running; + } else if pid != root_child { + log::error!("Unexpected SIGSTOP: {state:?}") + } + } else { + log::trace!("sigstop event received before ptrace fork event, pid: {pid}"); + let mut state = ProcessState::new(pid, 0)?; + state.status = ProcessStatus::SigstopReceived; + self.store.insert(state); + } + None + } + Signal::SIGCHLD => { + // From lurk: + // + // The SIGCHLD signal is sent to a process when a child process terminates, interrupted, or resumes after being interrupted + // This means, that if our tracee forked and said fork exits before the parent, the parent will get stopped. + // Therefor issue a PTRACE_SYSCALL request to the parent to continue execution. + // This is also important if we trace without the following forks option. + Some(Signal::SIGCHLD) + } + _ => { + // Just deliver the signal to tracee + Some(sig) + } + } + } + WaitStatus::Exited(pid, code) => { + let pid = pid.into(); + log::trace!("exited: pid {}, code {:?}", pid, code); + self.log_root(pid, Event::Exit { code }); + self.store.get_current_mut(pid).unwrap().status = + ProcessStatus::Exited(code); + if pid == root_child { + break; + } + None + } + WaitStatus::PtraceEvent(pid, sig, evt) => { + log::trace!("ptrace event: {:?} {:?}", sig, evt); + match evt { + nix::libc::PTRACE_EVENT_FORK + | nix::libc::PTRACE_EVENT_VFORK + | nix::libc::PTRACE_EVENT_CLONE => { + let new_child = Pid(ptrace::getevent(pid.into())? as pid_t); + log::trace!( + "ptrace fork event, evt {evt}, pid: {pid}, child: {new_child}" + ); + self.log_root(pid.into(), Event::Fork { child: new_child }); + if let Some(state) = self.store.get_current_mut(new_child) { + if state.status == ProcessStatus::SigstopReceived { + log::trace!("ptrace fork event received after sigstop, pid: {pid}, child: {new_child}"); + state.status = ProcessStatus::Running; + state.ppid = Some(pid.into()); + } else if new_child != root_child { + log::error!("Unexpected fork event: {state:?}") + } + } else { + log::trace!("ptrace fork event received before sigstop, pid: {pid}, child: {new_child}"); + let mut state = ProcessState::new(new_child, 0)?; + state.status = ProcessStatus::PtraceForkEventReceived; + state.ppid = Some(pid.into()); + self.store.insert(state); + } + // Resume parent + None + } + nix::libc::PTRACE_EVENT_EXEC => { + log::trace!("exec event"); + let p = self.store.get_current_mut(pid.into()).unwrap(); + assert!(!p.presyscall); + // After execve or execveat, in syscall exit event, + // the registers might be clobbered(e.g. aarch64). + // So we need to determine whether exec is successful here. + // PTRACE_EVENT_EXEC only happens for successful exec. + p.is_exec_successful = true; + let path = p + .pending_syscall_event + .iter() + .find_map(|e| match e { + Event::Exec { prog, .. } => Some(prog.clone()), + _ => None, + }) + .unwrap(); + self.ingest_file(path)?; + self.drain_syscall_events(pid.into(), Box::new(|_| {})); + // Don't use seccomp_aware_cont here because that will skip the next syscall exit stop + None + } + nix::libc::PTRACE_EVENT_EXIT => { + log::trace!("exit event"); + None + } + nix::libc::PTRACE_EVENT_SECCOMP => { + log::trace!("seccomp event"); + self.on_syscall_enter(pid.into())?; + None + } + _ => { + log::trace!("other event"); + None + } + } + } + WaitStatus::Signaled(pid, sig, _) => { + let pid: Pid = pid.into(); + log::debug!("signaled: {pid}, {:?}", sig); + if pid == root_child { + exit(128 + (sig as i32)) + } + None + } + WaitStatus::PtraceSyscall(pid) => { + let pid = pid.into(); + let presyscall = self.store.get_current_mut(pid).unwrap().presyscall; + if presyscall { + self.on_syscall_enter(pid)?; + } else { + self.on_syscall_exit(pid)?; + } + None + } + _ => None + }; + + if !self.pending_files.is_empty() || !self.pending_events.is_empty() { + let mut events = vec![]; + let mut files = BTreeSet::new(); + std::mem::swap(&mut events, &mut self.pending_events); + std::mem::swap(&mut files, &mut self.pending_files); + let mut msg = TracerClientMessage::Events { events, files }; + + loop { + serde_json::to_writer(&sock, &msg)?; + sock.write_all("\n".as_bytes())?; + + let event: TracerServerRequest = serde_json::StreamDeserializer::new(&mut IoRead::new(&sock)).next().unwrap()?; + + match event { + TracerServerRequest::Continue => break, + TracerServerRequest::AnalyzeFiles { paths } => { + let mut formats = HashMap::new(); + let mut files = BTreeSet::new(); + for path in paths { + let mut fp = std::fs::File::open(&path)?; + log::debug!("Parsing format of {} (client)", path.to_string_lossy()); + let (format, mut references) = parse_format(&mut fp)?; + formats.insert(path, format); + files.append(&mut references); + } + msg = TracerClientMessage::FileFormats { formats, files } + }, + TracerServerRequest::AllocatedId { id } => { + panic!("Receieved unsolicited AllocatedId({id})"); + } + } + } + } + + // https://stackoverflow.com/questions/29997244/occasionally-missing-ptrace-event-vfork-when-running-ptrace + // DO NOT send PTRACE_SYSCALL until we receive the PTRACE_EVENT_FORK, etc. + if let Some(pid) = status.pid() { + let pid = pid.into(); + let p = self.store.get_current_mut(pid).expect("No such process??"); + if !matches!(p.status, ProcessStatus::SigstopReceived | ProcessStatus::Exited(_)) { + ptrace_syscall(pid, signal)?; + } + } } Ok(()) } @@ -572,7 +623,6 @@ impl TracerClient { // log::trace!("pre syscall: {syscallno}"); match syscallno { nix::libc::SYS_execveat => { - log::trace!("pre execveat"); // int execveat(int dirfd, const char *pathname, // char *const _Nullable argv[], // char *const _Nullable envp[], @@ -587,7 +637,6 @@ impl TracerClient { p.pending_syscall_event.push(Event::Exec { prog: filename }); } nix::libc::SYS_execve => { - log::trace!("pre execve"); let filename = read_pathbuf(pid, syscall_arg!(regs, 0) as AddressType)?; //let argv = read_string_array(pid, syscall_arg!(regs, 1) as AddressType)?; //let envp = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?; @@ -647,7 +696,7 @@ impl TracerClient { } _ => {} } - self.syscall_enter_cont(pid)?; + //self.syscall_enter_cont(pid)?; Ok(()) } @@ -665,10 +714,10 @@ impl TracerClient { e => e?, }; let result = syscall_res_from_regs!(regs); + let mut pending_files = vec![]; let filter: Option> = match p.syscall { nix::libc::SYS_execve => { - log::trace!("post execve"); // SAFETY: p.preexecve is false, so p.exec_data is Some p.is_exec_successful = false; // update comm @@ -676,7 +725,6 @@ impl TracerClient { None } nix::libc::SYS_execveat => { - log::trace!("post execveat"); p.is_exec_successful = false; // update comm p.comm = read_comm(pid)?; @@ -690,7 +738,7 @@ impl TracerClient { .. } = pending { - self.report.files.ingest_output_local(path.clone())?; + pending_files.push(path.clone()); } } Some(Box::new(move |event| match event { @@ -748,23 +796,11 @@ impl TracerClient { } else { p.pending_syscall_event.clear(); } - self.seccomp_aware_cont(pid)?; + for path in pending_files { + self.ingest_file(path)?; + } Ok(()) } - - fn syscall_enter_cont(&self, pid: Pid) -> Result<(), Errno> { - ptrace_syscall(pid, None) - } - - /// When seccomp-bpf is enabled, we use ptrace::cont instead of ptrace::syscall to improve performance. - /// Then the next syscall-entry stop is skipped and the seccomp stop is used as the syscall entry stop. - fn seccomp_aware_cont(&self, pid: Pid) -> Result<(), Errno> { - ptrace_syscall(pid, None) - } - - fn seccomp_aware_cont_with_signal(&self, pid: Pid, sig: Signal) -> Result<(), Errno> { - ptrace_syscall(pid, Some(sig)) - } } fn resolve_filename_at_fd( diff --git a/src/tracer/server.rs b/src/tracer/server.rs index 1072c10..5f5ea0e 100644 --- a/src/tracer/server.rs +++ b/src/tracer/server.rs @@ -1,4 +1,4 @@ -use std::{path::PathBuf, net::{TcpListener, TcpStream}, collections::BTreeMap, os::fd::{AsFd, AsRawFd}}; +use std::{collections::BTreeMap, net::{TcpListener, TcpStream}, os::fd::{AsFd, AsRawFd, BorrowedFd}, path::PathBuf, process::{Command, Stdio}}; use serde_json::de::IoRead; @@ -12,20 +12,32 @@ pub struct Tracer { } impl Tracer { - pub fn run(input: Vec, args: Vec) -> anyhow::Result { + pub fn run(input: Vec, args: Vec, mute: bool) -> anyhow::Result { let mut files = FileStore::new(input)?; let mut log = vec![]; - let listener = TcpListener::bind("127.0.0.1:9995").expect("Could not bind listener socket"); + let connect = "127.0.0.1:9995".to_owned(); + let listener = TcpListener::bind(&connect).expect("Could not bind listener socket"); + + let executable = std::env::current_exe().expect("Could not obtain current executable"); + let mut proc = Command::new(executable); + proc.args(["internal-launch".to_owned(), "0".to_owned(), connect].iter().chain(args.iter())); + if mute { + proc.stdin(Stdio::null()).stdout(Stdio::null()).stderr(Stdio::null()); + } + let mut child = proc.spawn().expect("Could not spawn child"); + let mut next_child_id = 1; struct ChildData { tcp_stream: TcpStream, - json_stream: serde_json::StreamDeserializer<'static, IoRead, TracerClientMessage> + json_stream: serde_json::StreamDeserializer<'static, IoRead, TracerClientMessage>, + duped: i32, } enum ParentOrChild { Parent(TcpListener), Child(ChildData), + Dup(i32), } impl AsFd for ParentOrChild { @@ -33,13 +45,23 @@ impl Tracer { match self { ParentOrChild::Parent(i) => i.as_fd(), ParentOrChild::Child(i) => i.tcp_stream.as_fd(), + ParentOrChild::Dup(i) => unsafe { BorrowedFd::borrow_raw(*i) } } } } let mut children = BTreeMap::new(); - children.insert(listener.as_raw_fd(), ParentOrChild::Parent(listener)); + let (first_child, _first_addr) = listener.accept().expect("Accept failed"); + let duped = first_child.try_clone().expect("Dup failed"); + children.insert(duped.as_raw_fd(), ParentOrChild::Dup(first_child.as_raw_fd())); + children.insert(first_child.as_raw_fd(), ParentOrChild::Child(ChildData { + tcp_stream: duped, + duped: first_child.as_raw_fd(), + json_stream: serde_json::StreamDeserializer::new(IoRead::new(first_child)), + })); + + children.insert(listener.as_raw_fd(), ParentOrChild::Parent(listener)); loop { if children.len() <= 1 { @@ -49,13 +71,32 @@ impl Tracer { let mut fdset = children.values().into(); nix::sys::select::select(None, Some(&mut fdset), None, None, None).expect("Select failed"); let chosen = fdset.fds(None).next().unwrap().as_raw_fd(); - let child = children.get(&chosen).unwrap(); + let mut child = children.get_mut(&chosen).unwrap(); + if let ParentOrChild::Dup(i) = child { + let i = *i; + child = children.get_mut(&i).unwrap(); + } match child { ParentOrChild::Parent(p) => { let (new_tcp, _new_addr) = p.accept().expect("Accept failed"); + let duped = new_tcp.try_clone().expect("Dup failed"); + children.insert(duped.as_raw_fd(), ParentOrChild::Dup(new_tcp.as_raw_fd())); + children.insert(new_tcp.as_raw_fd(), ParentOrChild::Child(ChildData { + tcp_stream: duped, + duped: new_tcp.as_raw_fd(), + json_stream: serde_json::StreamDeserializer::new(IoRead::new(new_tcp)), + })); }, + ParentOrChild::Dup(_) => unreachable!(), ParentOrChild::Child(c) => { - let msg = c.json_stream.next().expect("Should NEVER be seen - StreamDeserializer is inexhaustable"); + let Some(msg) = c.json_stream.next() else { + let fd1 = c.duped; + let fd2 = c.tcp_stream.as_raw_fd(); + children.remove(&fd1); + children.remove(&fd2); + continue; + }; + log::trace!("recv: {msg:?}"); let msg = match msg { Ok(msg) => msg, Err(e) => { @@ -64,12 +105,13 @@ impl Tracer { continue; } }; - serde_json::to_writer(&c.tcp_stream, &match msg { + + let response = match msg { TracerClientMessage::Events { events, files: file_events } => { log.extend(events); let mut paths = vec![]; for (path, hash) in file_events { - if !files.insert(path, hash) { + if !files.insert(path.clone(), hash) { paths.push(path); } } @@ -79,17 +121,31 @@ impl Tracer { TracerServerRequest::AnalyzeFiles { paths } } }, - TracerClientMessage::FileFormats { formats } => { + TracerClientMessage::FileFormats { formats, files: file_list } => { for (path, fmt) in formats { - files.update_format(&path, fmt) + files.update_format(&path, fmt); } - TracerServerRequest::Continue + let paths: Vec<_> = file_list.into_iter().filter_map(|(path, hash)| (!files.hashes.contains_key(&hash)).then_some(path)).collect(); + if paths.is_empty() { + TracerServerRequest::Continue + } else { + TracerServerRequest::AnalyzeFiles { paths } + } + } + TracerClientMessage::AllocateId { } => { + let result = TracerServerRequest::AllocatedId { id: next_child_id }; + next_child_id += 1; + result }, - }); + }; + log::trace!("send: {response:?}"); + serde_json::to_writer(&c.tcp_stream, &response)?; }, } } + child.wait().expect("Failed to wait for child"); + Ok(TracerReport { log, files }) } } diff --git a/src/tracer/types.rs b/src/tracer/types.rs index 422df8a..ab1f600 100644 --- a/src/tracer/types.rs +++ b/src/tracer/types.rs @@ -96,7 +96,7 @@ impl Display for Event { } } -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Debug)] pub enum TracerClientMessage { Events { events: Vec, @@ -104,15 +104,18 @@ pub enum TracerClientMessage { }, FileFormats { formats: HashMap, + files: BTreeSet<(PathBuf, Sha256Hash)>, }, + AllocateId {}, } -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Debug)] pub enum TracerServerRequest { Continue, AnalyzeFiles { paths: Vec, }, + AllocatedId { id: i32 }, } #[derive(Serialize, Deserialize)]