From 61caed9ccd4fb702107a176a000b79a6638f02fe Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Fri, 12 Apr 2024 13:49:10 -0700 Subject: [PATCH] Add json output --- .gitignore | 2 + Cargo.lock | 133 ++++++++ Cargo.toml | 3 + src/main.rs | 852 ++----------------------------------------------- src/tracer.rs | 869 ++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 1037 insertions(+), 822 deletions(-) create mode 100644 src/tracer.rs diff --git a/.gitignore b/.gitignore index ea8c4bf..88d7e6d 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ /target +*.log +*.json diff --git a/Cargo.lock b/Cargo.lock index 1c89536..07a95ae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,6 +89,46 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" +[[package]] +name = "clap" +version = "4.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" + [[package]] name = "colorchoice" version = "1.0.0" @@ -118,12 +158,24 @@ dependencies = [ "log", ] +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "humantime" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + [[package]] name = "libc" version = "0.2.153" @@ -169,10 +221,31 @@ name = "ontology" version = "0.1.0" dependencies = [ "anyhow", + "clap", "env_logger", "linux-personality", "log", "nix", + "serde", + "serde_json", +] + +[[package]] +name = "proc-macro2" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", ] [[package]] @@ -204,6 +277,66 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +[[package]] +name = "ryu" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" + +[[package]] +name = "serde" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.115" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + [[package]] name = "utf8parse" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index ad410bb..d54387a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,6 @@ linux-personality = "1.0.0" anyhow = "1" log = "0.4" env_logger = "0.11" +serde = { version = "1", features = ["derive"] } +serde_json = "1.0" +clap = { version = "4.5.4", features = ["derive"] } diff --git a/src/main.rs b/src/main.rs index d19c3c7..5a6023d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,834 +1,42 @@ -use std::{ - collections::HashMap, - ffi::CString, - path::PathBuf, - process::exit, - time::{Duration, Instant}, -}; +mod tracer; -use nix::{ - errno::Errno, - libc::{pid_t, raise, tcsetpgrp, SYS_clone, SYS_clone3, AT_EMPTY_PATH, SIGSTOP, STDIN_FILENO}, - sys::{ - ptrace::{self, traceme, AddressType}, - signal::Signal, - wait::{waitpid, WaitPidFlag, WaitStatus}, - }, - unistd::{execvp, getpid, setpgid, ForkResult, Pid}, -}; +use std::path::PathBuf; -use core::fmt; -use std::{ - borrow::Cow, - fmt::{Display, Formatter}, - io::{self, BufRead, BufReader, Read}, - path::Path, -}; +use clap::{Parser, Subcommand}; -use nix::libc::AT_FDCWD; +#[derive(Parser, Debug)] +struct Cli { + #[command(subcommand)] + cmd: Subcommands, +} -use std::{ffi::OsString, os::unix::prelude::OsStringExt}; - -pub fn read_generic_string( - pid: Pid, - address: AddressType, - ctor: impl Fn(Vec) -> TString, -) -> anyhow::Result { - let mut buf = Vec::new(); - let mut address = address; - const WORD_SIZE: usize = 8; // FIXME - loop { - let word = match ptrace::read(pid, address) { - Err(e) => { - log::warn!("Cannot read tracee {pid} memory {address:?}: {e}"); - return Ok(ctor(buf)); - } - Ok(word) => word, - }; - let word_bytes = word.to_ne_bytes(); - for &byte in word_bytes.iter() { - if byte == 0 { - return Ok(ctor(buf)); - } - buf.push(byte); - } - address = unsafe { address.add(WORD_SIZE) }; +#[derive(Subcommand, Debug, Clone)] +enum Subcommands { + Run { + #[arg(short, long)] + output: Option, + cmd: Vec, } } -#[allow(unused)] -pub fn read_cstring(pid: Pid, address: AddressType) -> anyhow::Result { - read_generic_string(pid, address, |x| CString::new(x).unwrap()) -} - -pub fn read_pathbuf(pid: Pid, address: AddressType) -> anyhow::Result { - read_generic_string(pid, address, |x| PathBuf::from(OsString::from_vec(x))) -} - -pub fn read_string(pid: Pid, address: AddressType) -> anyhow::Result { - // Waiting on https://github.com/rust-lang/libs-team/issues/116 - read_generic_string(pid, address, |x| String::from_utf8_lossy(&x).to_string()) -} - -pub fn read_null_ended_array( - pid: Pid, - mut address: AddressType, - reader: impl Fn(Pid, AddressType) -> anyhow::Result, -) -> anyhow::Result> { - let mut res = Vec::new(); - const WORD_SIZE: usize = 8; // FIXME - loop { - let ptr = match ptrace::read(pid, address) { - Err(e) => { - log::warn!("Cannot read tracee {pid} memory {address:?}: {e}"); - return Ok(res); - } - Ok(ptr) => ptr, - }; - if ptr == 0 { - return Ok(res); - } else { - res.push(reader(pid, ptr as AddressType)?); - } - address = unsafe { address.add(WORD_SIZE) }; - } -} - -#[allow(unused)] -pub fn read_cstring_array(pid: Pid, address: AddressType) -> anyhow::Result> { - read_null_ended_array(pid, address, read_cstring) -} - -pub fn read_string_array(pid: Pid, address: AddressType) -> anyhow::Result> { - read_null_ended_array(pid, address, read_string) -} - -macro_rules! syscall_no_from_regs { - ($regs:ident) => { - $regs.orig_rax as i64 - }; -} - -macro_rules! syscall_res_from_regs { - ($regs:ident) => { - $regs.rax as i64 - }; -} - -macro_rules! syscall_arg { - ($regs:ident, 0) => { - $regs.rdi - }; - ($regs:ident, 1) => { - $regs.rsi - }; - ($regs:ident, 2) => { - $regs.rdx - }; - ($regs:ident, 3) => { - $regs.r10 - }; - ($regs:ident, 4) => { - $regs.r8 - }; - ($regs:ident, 5) => { - $regs.r9 - }; -} - -pub fn read_argv(pid: Pid) -> anyhow::Result> { - let filename = format!("/proc/{pid}/cmdline"); - let buf = std::fs::read(filename)?; - Ok(buf - .split(|&c| c == 0) - .map(CString::new) - .collect::, _>>()?) -} - -pub fn read_comm(pid: Pid) -> anyhow::Result { - let filename = format!("/proc/{pid}/comm"); - let mut buf = std::fs::read(filename)?; - buf.pop(); // remove trailing newline - Ok(String::from_utf8(buf)?) -} - -pub fn read_cwd(pid: Pid) -> std::io::Result { - let filename = format!("/proc/{pid}/cwd"); - let buf = std::fs::read_link(filename)?; - Ok(buf) -} - -pub fn read_fd(pid: Pid, fd: i32) -> std::io::Result { - if fd == AT_FDCWD { - return read_cwd(pid); - } - let filename = format!("/proc/{pid}/fd/{fd}"); - std::fs::read_link(filename) -} - -#[derive(Debug)] -pub enum Interpreter { - None, - Shebang(String), - ExecutableUnaccessible, - Error(io::Error), -} - -impl Display for Interpreter { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - Interpreter::None => write!(f, "none"), - Interpreter::Shebang(s) => write!(f, "{:?}", s), - Interpreter::ExecutableUnaccessible => { - write!(f, "executable unaccessible") - } - Interpreter::Error(e) => write!(f, "(err: {e})"), - } - } -} - -pub fn read_interpreter_recursive(exe: impl AsRef) -> Vec { - let mut exe = Cow::Borrowed(exe.as_ref()); - let mut interpreters = Vec::new(); - loop { - match read_interpreter(exe.as_ref()) { - Interpreter::Shebang(shebang) => { - exe = Cow::Owned(PathBuf::from( - shebang.split_ascii_whitespace().next().unwrap_or(""), - )); - interpreters.push(Interpreter::Shebang(shebang)); - } - Interpreter::None => break, - err => { - interpreters.push(err); - break; - } - }; - } - interpreters -} - -pub fn read_interpreter(exe: &Path) -> Interpreter { - fn err_to_interpreter(e: io::Error) -> Interpreter { - if e.kind() == io::ErrorKind::PermissionDenied || e.kind() == io::ErrorKind::NotFound { - Interpreter::ExecutableUnaccessible - } else { - Interpreter::Error(e) - } - } - let file = match std::fs::File::open(exe) { - Ok(file) => file, - Err(e) => return err_to_interpreter(e), - }; - let mut reader = BufReader::new(file); - // First, check if it's a shebang script - let mut buf = [0u8; 2]; - - if let Err(e) = reader.read_exact(&mut buf) { - return Interpreter::Error(e); - }; - if &buf != b"#!" { - return Interpreter::None; - } - // Read the rest of the line - let mut buf = Vec::new(); - - if let Err(e) = reader.read_until(b'\n', &mut buf) { - return Interpreter::Error(e); - }; - // Get trimed shebang line [start, end) indices - // If the shebang line is empty, we don't care - let start = buf - .iter() - .position(|&c| !c.is_ascii_whitespace()) - .unwrap_or(0); - let end = buf - .iter() - .rposition(|&c| !c.is_ascii_whitespace()) - .map(|x| x + 1) - .unwrap_or(buf.len()); - let shebang = String::from_utf8_lossy(&buf[start..end]); - Interpreter::Shebang(shebang.into_owned()) -} - -pub struct ProcessStateStore { - processes: HashMap>, -} - -#[derive(Debug)] -pub struct ProcessState { - pub pid: Pid, - pub ppid: Option, - pub status: ProcessStatus, - pub start_time: u64, - pub argv: Vec, - pub comm: String, - pub presyscall: bool, - pub is_exec_successful: bool, - pub syscall: i64, - pub pending_syscall_event: Vec, -} - -#[derive(Debug, Clone, PartialEq)] -pub enum ProcessStatus { - SigstopReceived, - PtraceForkEventReceived, - Running, - Exited(i32), -} - -#[derive(Debug)] -pub struct ExecData { - pub filename: PathBuf, - pub argv: Vec, - pub envp: Vec, - pub cwd: PathBuf, - pub interpreters: Vec, -} - -impl ProcessStateStore { - pub fn new() -> Self { - Self { - processes: HashMap::new(), - } - } - - pub fn insert(&mut self, state: ProcessState) { - self.processes.entry(state.pid).or_default().push(state); - } - - pub fn get_current_mut(&mut self, pid: Pid) -> Option<&mut ProcessState> { - // The last process in the vector is the current process - // println!("Getting {pid}"); - self.processes.get_mut(&pid)?.last_mut() - } -} - -impl ProcessState { - pub fn new(pid: Pid, start_time: u64) -> anyhow::Result { - Ok(Self { - pid, - ppid: None, - status: ProcessStatus::Running, - comm: read_comm(pid)?, - argv: read_argv(pid)?, - start_time, - presyscall: true, - is_exec_successful: false, - syscall: -1, - pending_syscall_event: vec![], - }) - } -} - -#[derive(Debug)] -pub enum Event { - Fork { child: Pid }, - Exec { prog: PathBuf }, - Exit { code: i32 }, - FdOpen { fd: i32, source: FdSource }, - FdDup { oldfd: i32, newfd: i32 }, - FdClose { fd: i32 }, - FdRead { fd: i32 }, - FdWrite { fd: i32 }, -} - -#[derive(Debug)] -pub enum FdSource { - File { path: PathBuf }, -} - -#[derive(Clone, Debug)] -pub struct Identifier { - machine: i32, - pid: Pid, -} - -#[derive(Debug)] -pub struct LogEntry { - ident: Identifier, - event: Event, - timestamp: Duration, -} - -impl Display for LogEntry { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!( - f, - "[{}.{:03} m{}p{}] {}", - self.timestamp.as_secs(), - self.timestamp.as_millis() % 1000, - self.ident.machine, - self.ident.pid, - self.event - ) - } -} - -impl Display for FdSource { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - FdSource::File { path } => write!(f, "File {}", path.to_string_lossy()), - } - } -} - -impl Display for Event { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - Event::Fork { child } => write!(f, "fork {child}"), - Event::Exec { prog } => write!(f, "exec {}", prog.to_string_lossy()), - Event::Exit { code } => write!(f, "exit with {code}"), - Event::FdOpen { fd, source } => write!(f, "open fd {fd} from {source}"), - Event::FdDup { oldfd, newfd } => write!(f, "dup fd {oldfd} to {newfd}"), - Event::FdClose { fd } => write!(f, "close fd {fd}"), - Event::FdRead { fd } => write!(f, "read from fd {fd}"), - Event::FdWrite { fd } => write!(f, "write to fd {fd}"), - } - } -} - -pub struct Tracer { - pub store: ProcessStateStore, - pub log: Vec, - pub start_time: Instant, -} - -fn ptrace_syscall(pid: Pid, sig: Option) -> Result<(), Errno> { - match ptrace::syscall(pid, sig) { - Err(Errno::ESRCH) => { - log::info!("ptrace syscall failed: {pid}, ESRCH, child probably gone!"); - Ok(()) - } - other => other, - } -} - -impl Tracer { - pub fn log(&mut self, ident: Identifier, event: Event) { - self.log.push(LogEntry { - ident, - event, - timestamp: Instant::now().duration_since(self.start_time), - }); - } - - pub fn log_root(&mut self, pid: Pid, event: Event) { - self.log(Identifier { pid, machine: 0 }, event); - } - - pub fn new() -> anyhow::Result { - Ok(Self { - store: ProcessStateStore::new(), - log: vec![], - start_time: Instant::now(), - }) - } - - fn drain_syscall_events(&mut self, pid: Pid, mut filter: Box) { - let p = self.store.get_current_mut(pid).unwrap(); - for mut event in p.pending_syscall_event.drain(..) { - (filter)(&mut event); - self.log.push(LogEntry { - ident: Identifier { pid, machine: 0 }, - event, - timestamp: Instant::now().duration_since(self.start_time), - }); - } - } - - pub fn start_root_process(&mut self, args: Vec) -> anyhow::Result<()> { - log::trace!("start_root_process: {:?}", args); - - if let ForkResult::Parent { child: root_child } = unsafe { nix::unistd::fork()? } { - waitpid(root_child, Some(WaitPidFlag::WSTOPPED))?; // wait for child to stop - log::trace!("child stopped"); - let mut root_child_state = ProcessState::new(root_child, 0)?; - root_child_state.ppid = Some(getpid()); - self.store.insert(root_child_state); - // Set foreground process group of the terminal - if -1 == unsafe { tcsetpgrp(STDIN_FILENO, root_child.as_raw()) } { - return Err(Errno::last().into()); - } - // restart child - log::trace!("resuming child"); - let ptrace_opts = { - use nix::sys::ptrace::Options; - Options::PTRACE_O_TRACEEXEC - | Options::PTRACE_O_TRACEEXIT - | Options::PTRACE_O_EXITKILL - | Options::PTRACE_O_TRACESYSGOOD - | Options::PTRACE_O_TRACEFORK - | Options::PTRACE_O_TRACECLONE - | Options::PTRACE_O_TRACEVFORK - }; - ptrace::setoptions(root_child, ptrace_opts)?; - // restart child - self.seccomp_aware_cont(root_child)?; - loop { - let status = waitpid(None, Some(WaitPidFlag::__WALL))?; - // log::trace!("waitpid: {:?}", status); - match status { - WaitStatus::Stopped(pid, sig) => { - log::trace!("stopped: {pid}, sig {:?}", sig); - match sig { - Signal::SIGSTOP => { - log::trace!("sigstop event, child: {pid}"); - if let Some(state) = self.store.get_current_mut(pid) { - if state.status == ProcessStatus::PtraceForkEventReceived { - log::trace!("sigstop event received after ptrace fork event, pid: {pid}"); - state.status = ProcessStatus::Running; - self.seccomp_aware_cont(pid)?; - } else if pid != root_child { - log::error!("Unexpected SIGSTOP: {state:?}") - } - } else { - log::trace!("sigstop event received before ptrace fork event, pid: {pid}"); - let mut state = ProcessState::new(pid, 0)?; - state.status = ProcessStatus::SigstopReceived; - self.store.insert(state); - } - // https://stackoverflow.com/questions/29997244/occasionally-missing-ptrace-event-vfork-when-running-ptrace - // DO NOT send PTRACE_SYSCALL until we receive the PTRACE_EVENT_FORK, etc. - } - Signal::SIGCHLD => { - // From lurk: - // - // The SIGCHLD signal is sent to a process when a child process terminates, interrupted, or resumes after being interrupted - // This means, that if our tracee forked and said fork exits before the parent, the parent will get stopped. - // Therefor issue a PTRACE_SYSCALL request to the parent to continue execution. - // This is also important if we trace without the following forks option. - self.seccomp_aware_cont_with_signal(pid, Signal::SIGCHLD)?; - } - _ => { - // Just deliver the signal to tracee - self.seccomp_aware_cont_with_signal(pid, sig)?; - } - } - } - WaitStatus::Exited(pid, code) => { - log::trace!("exited: pid {}, code {:?}", pid, code); - self.log_root(pid, Event::Exit { code }); - self.store.get_current_mut(pid).unwrap().status = - ProcessStatus::Exited(code); - if pid == root_child { - break; - } - } - WaitStatus::PtraceEvent(pid, sig, evt) => { - log::trace!("ptrace event: {:?} {:?}", sig, evt); - match evt { - nix::libc::PTRACE_EVENT_FORK - | nix::libc::PTRACE_EVENT_VFORK - | nix::libc::PTRACE_EVENT_CLONE => { - let new_child = Pid::from_raw(ptrace::getevent(pid)? as pid_t); - log::trace!( - "ptrace fork event, evt {evt}, pid: {pid}, child: {new_child}" - ); - self.log_root(pid, Event::Fork { child: new_child }); - if let Some(state) = self.store.get_current_mut(new_child) { - if state.status == ProcessStatus::SigstopReceived { - log::trace!("ptrace fork event received after sigstop, pid: {pid}, child: {new_child}"); - state.status = ProcessStatus::Running; - state.ppid = Some(pid); - self.seccomp_aware_cont(new_child)?; - } else if new_child != root_child { - log::error!("Unexpected fork event: {state:?}") - } - } else { - log::trace!("ptrace fork event received before sigstop, pid: {pid}, child: {new_child}"); - let mut state = ProcessState::new(new_child, 0)?; - state.status = ProcessStatus::PtraceForkEventReceived; - state.ppid = Some(pid); - self.store.insert(state); - } - // Resume parent - self.seccomp_aware_cont(pid)?; - } - nix::libc::PTRACE_EVENT_EXEC => { - log::trace!("exec event"); - let p = self.store.get_current_mut(pid).unwrap(); - assert!(!p.presyscall); - // After execve or execveat, in syscall exit event, - // the registers might be clobbered(e.g. aarch64). - // So we need to determine whether exec is successful here. - // PTRACE_EVENT_EXEC only happens for successful exec. - p.is_exec_successful = true; - self.drain_syscall_events(pid, Box::new(|_| {})); - // Don't use seccomp_aware_cont here because that will skip the next syscall exit stop - self.syscall_enter_cont(pid)?; - } - nix::libc::PTRACE_EVENT_EXIT => { - log::trace!("exit event"); - self.seccomp_aware_cont(pid)?; - } - nix::libc::PTRACE_EVENT_SECCOMP => { - log::trace!("seccomp event"); - self.on_syscall_enter(pid)?; - } - _ => { - log::trace!("other event"); - self.seccomp_aware_cont(pid)?; - } - } - } - WaitStatus::Signaled(pid, sig, _) => { - log::debug!("signaled: {pid}, {:?}", sig); - if pid == root_child { - exit(128 + (sig as i32)) - } - } - WaitStatus::PtraceSyscall(pid) => { - let presyscall = self.store.get_current_mut(pid).unwrap().presyscall; - if presyscall { - self.on_syscall_enter(pid)?; - } else { - self.on_syscall_exit(pid)?; - } - } - _ => {} - } - } - } else { - let me = getpid(); - setpgid(me, me)?; - traceme()?; - if 0 != unsafe { raise(SIGSTOP) } { - log::error!("raise failed!"); - exit(-1); - } - - let args = args - .into_iter() - .map(CString::new) - .collect::, _>>()?; - - execvp(&args[0], &args)?; - } - Ok(()) - } - - fn on_syscall_enter(&mut self, pid: Pid) -> anyhow::Result<()> { - let p = self.store.get_current_mut(pid).unwrap(); - p.presyscall = !p.presyscall; - // SYSCALL ENTRY - let regs = match ptrace::getregs(pid) { - Ok(regs) => regs, - Err(Errno::ESRCH) => { - log::info!("ptrace getregs failed: {pid}, ESRCH, child probably gone!"); - return Ok(()); - } - e => e?, - }; - let syscallno = syscall_no_from_regs!(regs); - p.syscall = syscallno; - // log::trace!("pre syscall: {syscallno}"); - match syscallno { - nix::libc::SYS_execveat => { - log::trace!("pre execveat"); - // int execveat(int dirfd, const char *pathname, - // char *const _Nullable argv[], - // char *const _Nullable envp[], - // int flags); - let dirfd = syscall_arg!(regs, 0) as i32; - let pathname = read_string(pid, syscall_arg!(regs, 1) as AddressType)?; - //let argv = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?; - //let envp = read_string_array(pid, syscall_arg!(regs, 3) as AddressType)?; - let flags = syscall_arg!(regs, 4) as i32; - let filename = resolve_filename_at_fd(pid, pathname, dirfd, flags)?; - //let interpreters = read_interpreter_recursive(&filename); - p.pending_syscall_event.push(Event::Exec { - prog: filename, - }); - } - nix::libc::SYS_execve => { - log::trace!("pre execve"); - let filename = read_pathbuf(pid, syscall_arg!(regs, 0) as AddressType)?; - //let argv = read_string_array(pid, syscall_arg!(regs, 1) as AddressType)?; - //let envp = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?; - //let interpreters = read_interpreter_recursive(&filename); - p.pending_syscall_event.push(Event::Exec { - prog: filename, - }); - } - nix::libc::SYS_open => { - let path = read_pathbuf(pid, syscall_arg!(regs, 0) as AddressType)?; - p.pending_syscall_event.push(Event::FdOpen { - source: FdSource::File { path }, - fd: -1, - }); - } - nix::libc::SYS_openat => { - let dirfd = syscall_arg!(regs, 0) as i32; - let pathname = read_string(pid, syscall_arg!(regs, 1) as AddressType)?; - let flags = syscall_arg!(regs, 2) as i32; - let path = resolve_filename_at_fd(pid, pathname, dirfd, flags)?; - p.pending_syscall_event.push(Event::FdOpen { - source: FdSource::File { path }, - fd: 0, - }); - } - nix::libc::SYS_read | nix::libc::SYS_readv | nix::libc::SYS_preadv | nix::libc::SYS_preadv2 => { - let fd = syscall_arg!(regs, 0) as i32; - p.pending_syscall_event.push(Event::FdRead { fd }); - } - nix::libc::SYS_write | nix::libc::SYS_writev | nix::libc::SYS_pwritev | nix::libc::SYS_pwritev2 => { - let fd = syscall_arg!(regs, 0) as i32; - p.pending_syscall_event.push(Event::FdWrite { fd }); - } - nix::libc::SYS_dup | nix::libc::SYS_dup2 | nix::libc::SYS_dup3 => { - let oldfd = syscall_arg!(regs, 0) as i32; - p.pending_syscall_event.push(Event::FdDup { oldfd, newfd: -1 }); - } - nix::libc::SYS_fcntl => { - let fd = syscall_arg!(regs, 0) as i32; - let cmd = syscall_arg!(regs, 1) as i32; - match cmd { - nix::libc::F_DUPFD => p.pending_syscall_event.push(Event::FdDup { oldfd: fd, newfd: -1 }), - _ => {} - } - } - nix::libc::SYS_close => { - let fd = syscall_arg!(regs, 0) as i32; - p.pending_syscall_event.push(Event::FdClose { fd }); - } - _ => {} - } - self.syscall_enter_cont(pid)?; - Ok(()) - } - - fn on_syscall_exit(&mut self, pid: Pid) -> anyhow::Result<()> { - // SYSCALL EXIT - // log::trace!("post syscall {}", p.syscall); - let p = self.store.get_current_mut(pid).unwrap(); - p.presyscall = !p.presyscall; - let regs = match ptrace::getregs(pid) { - Ok(regs) => regs, - Err(Errno::ESRCH) => { - log::info!("ptrace getregs failed: {pid}, ESRCH, child probably gone!"); - return Ok(()); - } - e => e?, - }; - let result = syscall_res_from_regs!(regs); - - let filter: Option> = match p.syscall { - nix::libc::SYS_execve => { - log::trace!("post execve"); - // SAFETY: p.preexecve is false, so p.exec_data is Some - p.is_exec_successful = false; - // update comm - p.comm = read_comm(pid)?; - None - } - nix::libc::SYS_execveat => { - log::trace!("post execveat"); - p.is_exec_successful = false; - // update comm - p.comm = read_comm(pid)?; - None - } - nix::libc::SYS_open | nix::libc::SYS_openat => { - if result >= 0 { - Some(Box::new(move |event| match event { - Event::FdOpen { fd: ref mut dest, ..} => { *dest = result as i32; } - _ => {} - })) - } else { - None - } - } - nix::libc::SYS_dup | nix::libc::SYS_dup2 | nix::libc::SYS_dup3 => { - if result >= 0 { - Some(Box::new(move |event| match event { - Event::FdDup { newfd: ref mut dest, ..} => { *dest = result as i32; } - _ => {} - })) - } else { - None - } - } - nix::libc::SYS_fcntl => { - if result >= 0 { - Some(Box::new(move |event| match event { - Event::FdDup { newfd: ref mut dest, ..} => { *dest = result as i32; } - _ => {} - })) - } else { - None - } - } - _ => { - if result >= 0 { - Some(Box::new(|_| {})) - } else { - None - } - } - }; - if let Some(filter) = filter { - self.drain_syscall_events(pid, filter); - } else { - p.pending_syscall_event.clear(); - } - self.seccomp_aware_cont(pid)?; - Ok(()) - } - - fn syscall_enter_cont(&self, pid: Pid) -> Result<(), Errno> { - ptrace_syscall(pid, None) - } - - /// When seccomp-bpf is enabled, we use ptrace::cont instead of ptrace::syscall to improve performance. - /// Then the next syscall-entry stop is skipped and the seccomp stop is used as the syscall entry stop. - fn seccomp_aware_cont(&self, pid: Pid) -> Result<(), Errno> { - ptrace_syscall(pid, None) - } - - fn seccomp_aware_cont_with_signal(&self, pid: Pid, sig: Signal) -> Result<(), Errno> { - ptrace_syscall(pid, Some(sig)) - } -} - -fn resolve_filename_at_fd( - pid: Pid, - pathname: String, - dirfd: i32, - flags: i32, -) -> anyhow::Result { - let pathname_is_empty = pathname.is_empty(); - let pathname = PathBuf::from(pathname); - Ok( - match ( - pathname.is_absolute(), - pathname_is_empty && ((flags & AT_EMPTY_PATH) != 0), - ) { - (true, _) => { - // If pathname is absolute, then dirfd is ignored. - pathname - } - (false, true) => { - // If pathname is an empty string and the AT_EMPTY_PATH flag is specified, then the file descriptor dirfd - // specifies the file to be executed - read_fd(pid, dirfd)? - } - (false, false) => { - // pathname is relative to dirfd - let dir = read_fd(pid, dirfd)?; - dir.join(pathname) - } - }, - ) -} - fn main() { env_logger::init(); - let mut t = Tracer::new().unwrap(); - let args = std::env::args().into_iter().skip(1).collect(); - t.start_root_process(args).unwrap(); + let cli = Cli::parse(); + match cli.cmd { + Subcommands::Run { output, cmd } => { + let fp: Box = if let Some(output) = &output { + Box::new(std::fs::File::options().write(true).create(true).open(output).unwrap()) + } else { + Box::new(std::io::stdout()) + }; + let mut t = tracer::Tracer::new().unwrap(); + t.start_root_process(cmd).unwrap(); - for event in &t.log { - println!("{event}"); + if output.is_none() { + serde_json::to_writer_pretty(fp, &t.log).unwrap(); + } else { + serde_json::to_writer(fp, &t.log).unwrap(); + } + } } } diff --git a/src/tracer.rs b/src/tracer.rs new file mode 100644 index 0000000..850c51c --- /dev/null +++ b/src/tracer.rs @@ -0,0 +1,869 @@ +use std::{ + collections::HashMap, + ffi::CString, + ffi::OsString, + os::unix::prelude::OsStringExt, + path::PathBuf, + process::exit, + time::{Duration, Instant}, + fmt::{Display, Formatter}, +}; + +use core::fmt; + +use nix::{ + errno::Errno, + libc::{pid_t, raise, tcsetpgrp, AT_EMPTY_PATH, SIGSTOP, STDIN_FILENO, AT_FDCWD}, + sys::{ + ptrace::{self, traceme, AddressType}, + signal::Signal, + wait::{waitpid, WaitPidFlag, WaitStatus}, + }, + unistd::{execvp, getpid, setpgid, ForkResult}, +}; + +use serde::{Deserialize, Serialize}; + +#[derive(Copy, Clone, Serialize, Deserialize, Eq, PartialEq, Debug, Hash)] +pub struct Pid(i32); + +impl From for Pid { + fn from(value: nix::unistd::Pid) -> Self { + Self(value.as_raw()) + } +} + +impl Into for Pid { + fn into(self) -> nix::unistd::Pid { + nix::unistd::Pid::from_raw(self.0) + } +} + +impl Display for Pid { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +pub fn read_generic_string( + pid: Pid, + address: AddressType, + ctor: impl Fn(Vec) -> TString, +) -> anyhow::Result { + let mut buf = Vec::new(); + let mut address = address; + const WORD_SIZE: usize = 8; // FIXME + loop { + let word = match ptrace::read(pid.into(), address) { + Err(e) => { + log::warn!("Cannot read tracee {pid} memory {address:?}: {e}"); + return Ok(ctor(buf)); + } + Ok(word) => word, + }; + let word_bytes = word.to_ne_bytes(); + for &byte in word_bytes.iter() { + if byte == 0 { + return Ok(ctor(buf)); + } + buf.push(byte); + } + address = unsafe { address.add(WORD_SIZE) }; + } +} + +#[allow(unused)] +pub fn read_cstring(pid: Pid, address: AddressType) -> anyhow::Result { + read_generic_string(pid, address, |x| CString::new(x).unwrap()) +} + +pub fn read_pathbuf(pid: Pid, address: AddressType) -> anyhow::Result { + read_generic_string(pid, address, |x| PathBuf::from(OsString::from_vec(x))) +} + +pub fn read_string(pid: Pid, address: AddressType) -> anyhow::Result { + // Waiting on https://github.com/rust-lang/libs-team/issues/116 + read_generic_string(pid, address, |x| String::from_utf8_lossy(&x).to_string()) +} + +pub fn read_null_ended_array( + pid: Pid, + mut address: AddressType, + reader: impl Fn(Pid, AddressType) -> anyhow::Result, +) -> anyhow::Result> { + let mut res = Vec::new(); + const WORD_SIZE: usize = 8; // FIXME + loop { + let ptr = match ptrace::read(pid.into(), address) { + Err(e) => { + log::warn!("Cannot read tracee {pid} memory {address:?}: {e}"); + return Ok(res); + } + Ok(ptr) => ptr, + }; + if ptr == 0 { + return Ok(res); + } else { + res.push(reader(pid, ptr as AddressType)?); + } + address = unsafe { address.add(WORD_SIZE) }; + } +} + +#[allow(unused)] +pub fn read_cstring_array(pid: Pid, address: AddressType) -> anyhow::Result> { + read_null_ended_array(pid, address, read_cstring) +} + +#[allow(unused)] +pub fn read_string_array(pid: Pid, address: AddressType) -> anyhow::Result> { + read_null_ended_array(pid, address, read_string) +} + +macro_rules! syscall_no_from_regs { + ($regs:ident) => { + $regs.orig_rax as i64 + }; +} + +macro_rules! syscall_res_from_regs { + ($regs:ident) => { + $regs.rax as i64 + }; +} + +macro_rules! syscall_arg { + ($regs:ident, 0) => { + $regs.rdi + }; + ($regs:ident, 1) => { + $regs.rsi + }; + ($regs:ident, 2) => { + $regs.rdx + }; + ($regs:ident, 3) => { + $regs.r10 + }; + ($regs:ident, 4) => { + $regs.r8 + }; + ($regs:ident, 5) => { + $regs.r9 + }; +} + +pub fn read_argv(pid: Pid) -> anyhow::Result> { + let filename = format!("/proc/{pid}/cmdline"); + let buf = std::fs::read(filename)?; + Ok(buf + .split(|&c| c == 0) + .map(CString::new) + .collect::, _>>()?) +} + +pub fn read_comm(pid: Pid) -> anyhow::Result { + let filename = format!("/proc/{pid}/comm"); + let mut buf = std::fs::read(filename)?; + buf.pop(); // remove trailing newline + Ok(String::from_utf8(buf)?) +} + +pub fn read_cwd(pid: Pid) -> std::io::Result { + let filename = format!("/proc/{pid}/cwd"); + let buf = std::fs::read_link(filename)?; + Ok(buf) +} + +pub fn read_fd(pid: Pid, fd: i32) -> std::io::Result { + if fd == AT_FDCWD { + return read_cwd(pid); + } + let filename = format!("/proc/{pid}/fd/{fd}"); + std::fs::read_link(filename) +} + +/* +#[derive(Debug)] +pub enum Interpreter { + None, + Shebang(String), + ExecutableUnaccessible, + Error(io::Error), +} + +impl Display for Interpreter { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Interpreter::None => write!(f, "none"), + Interpreter::Shebang(s) => write!(f, "{:?}", s), + Interpreter::ExecutableUnaccessible => { + write!(f, "executable unaccessible") + } + Interpreter::Error(e) => write!(f, "(err: {e})"), + } + } +} + +pub fn read_interpreter_recursive(exe: impl AsRef) -> Vec { + let mut exe = Cow::Borrowed(exe.as_ref()); + let mut interpreters = Vec::new(); + loop { + match read_interpreter(exe.as_ref()) { + Interpreter::Shebang(shebang) => { + exe = Cow::Owned(PathBuf::from( + shebang.split_ascii_whitespace().next().unwrap_or(""), + )); + interpreters.push(Interpreter::Shebang(shebang)); + } + Interpreter::None => break, + err => { + interpreters.push(err); + break; + } + }; + } + interpreters +} + +pub fn read_interpreter(exe: &Path) -> Interpreter { + fn err_to_interpreter(e: io::Error) -> Interpreter { + if e.kind() == io::ErrorKind::PermissionDenied || e.kind() == io::ErrorKind::NotFound { + Interpreter::ExecutableUnaccessible + } else { + Interpreter::Error(e) + } + } + let file = match std::fs::File::open(exe) { + Ok(file) => file, + Err(e) => return err_to_interpreter(e), + }; + let mut reader = BufReader::new(file); + // First, check if it's a shebang script + let mut buf = [0u8; 2]; + + if let Err(e) = reader.read_exact(&mut buf) { + return Interpreter::Error(e); + }; + if &buf != b"#!" { + return Interpreter::None; + } + // Read the rest of the line + let mut buf = Vec::new(); + + if let Err(e) = reader.read_until(b'\n', &mut buf) { + return Interpreter::Error(e); + }; + // Get trimed shebang line [start, end) indices + // If the shebang line is empty, we don't care + let start = buf + .iter() + .position(|&c| !c.is_ascii_whitespace()) + .unwrap_or(0); + let end = buf + .iter() + .rposition(|&c| !c.is_ascii_whitespace()) + .map(|x| x + 1) + .unwrap_or(buf.len()); + let shebang = String::from_utf8_lossy(&buf[start..end]); + Interpreter::Shebang(shebang.into_owned()) +} +*/ + +pub struct ProcessStateStore { + processes: HashMap>, +} + +#[derive(Debug)] +pub struct ProcessState { + pub pid: Pid, + pub ppid: Option, + pub status: ProcessStatus, + pub start_time: u64, + pub argv: Vec, + pub comm: String, + pub presyscall: bool, + pub is_exec_successful: bool, + pub syscall: i64, + pub pending_syscall_event: Vec, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ProcessStatus { + SigstopReceived, + PtraceForkEventReceived, + Running, + Exited(i32), +} + +#[derive(Debug)] +pub struct ExecData { + pub filename: PathBuf, + pub argv: Vec, + pub envp: Vec, + pub cwd: PathBuf, + //pub interpreters: Vec, +} + +impl ProcessStateStore { + pub fn new() -> Self { + Self { + processes: HashMap::new(), + } + } + + pub fn insert(&mut self, state: ProcessState) { + self.processes.entry(state.pid).or_default().push(state); + } + + pub fn get_current_mut(&mut self, pid: Pid) -> Option<&mut ProcessState> { + // The last process in the vector is the current process + // println!("Getting {pid}"); + self.processes.get_mut(&pid)?.last_mut() + } +} + +impl ProcessState { + pub fn new(pid: Pid, start_time: u64) -> anyhow::Result { + Ok(Self { + pid, + ppid: None, + status: ProcessStatus::Running, + comm: read_comm(pid)?, + argv: read_argv(pid)?, + start_time, + presyscall: true, + is_exec_successful: false, + syscall: -1, + pending_syscall_event: vec![], + }) + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub enum Event { + Fork { child: Pid }, + Exec { prog: PathBuf }, + Exit { code: i32 }, + FdOpen { fd: i32, source: FdSource }, + FdDup { oldfd: i32, newfd: i32 }, + FdClose { fd: i32 }, + FdRead { fd: i32 }, + FdWrite { fd: i32 }, +} + +#[derive(Debug, Serialize, Deserialize)] +pub enum FdSource { + File { path: PathBuf }, + Tty, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Identifier { + machine: i32, + pid: Pid, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct LogEntry { + ident: Identifier, + event: Event, + timestamp: Duration, +} + +impl Display for LogEntry { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "[{}.{:03} m{}p{}] {}", + self.timestamp.as_secs(), + self.timestamp.as_millis() % 1000, + self.ident.machine, + self.ident.pid, + self.event + ) + } +} + +impl Display for FdSource { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + FdSource::File { path } => write!(f, "file {}", path.to_string_lossy()), + FdSource::Tty => write!(f, "the terminal"), + } + } +} + +impl Display for Event { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Event::Fork { child } => write!(f, "fork {child}"), + Event::Exec { prog } => write!(f, "exec {}", prog.to_string_lossy()), + Event::Exit { code } => write!(f, "exit with {code}"), + Event::FdOpen { fd, source } => write!(f, "open fd {fd} from {source}"), + Event::FdDup { oldfd, newfd } => write!(f, "dup fd {oldfd} to {newfd}"), + Event::FdClose { fd } => write!(f, "close fd {fd}"), + Event::FdRead { fd } => write!(f, "read from fd {fd}"), + Event::FdWrite { fd } => write!(f, "write to fd {fd}"), + } + } +} + +pub struct Tracer { + pub store: ProcessStateStore, + pub log: Vec, + pub start_time: Instant, +} + +fn ptrace_syscall(pid: Pid, sig: Option) -> Result<(), Errno> { + match ptrace::syscall(pid.into(), sig) { + Err(Errno::ESRCH) => { + log::info!("ptrace syscall failed: {pid}, ESRCH, child probably gone!"); + Ok(()) + } + other => other, + } +} + +impl Tracer { + pub fn log(&mut self, ident: Identifier, event: Event) { + self.log.push(LogEntry { + ident, + event, + timestamp: Instant::now().duration_since(self.start_time), + }); + } + + pub fn log_root(&mut self, pid: Pid, event: Event) { + self.log(Identifier { pid, machine: 0 }, event); + } + + pub fn new() -> anyhow::Result { + Ok(Self { + store: ProcessStateStore::new(), + log: vec![], + start_time: Instant::now(), + }) + } + + fn drain_syscall_events(&mut self, pid: Pid, mut filter: Box) { + let p = self.store.get_current_mut(pid).unwrap(); + for mut event in p.pending_syscall_event.drain(..) { + (filter)(&mut event); + self.log.push(LogEntry { + ident: Identifier { pid, machine: 0 }, + event, + timestamp: Instant::now().duration_since(self.start_time), + }); + } + } + + pub fn start_root_process(&mut self, args: Vec) -> anyhow::Result<()> { + log::trace!("start_root_process: {:?}", args); + + if let ForkResult::Parent { child: root_child } = unsafe { nix::unistd::fork()? } { + waitpid(root_child, Some(WaitPidFlag::WSTOPPED))?; // wait for child to stop + let root_child = root_child.into(); + log::trace!("child stopped"); + let mut root_child_state = ProcessState::new(root_child, 0)?; + root_child_state.ppid = Some(getpid().into()); + self.store.insert(root_child_state); + // Set foreground process group of the terminal + if -1 == unsafe { tcsetpgrp(STDIN_FILENO, root_child.0) } { + return Err(Errno::last().into()); + } + // restart child + log::trace!("resuming child"); + let ptrace_opts = { + use nix::sys::ptrace::Options; + Options::PTRACE_O_TRACEEXEC + | Options::PTRACE_O_TRACEEXIT + | Options::PTRACE_O_EXITKILL + | Options::PTRACE_O_TRACESYSGOOD + | Options::PTRACE_O_TRACEFORK + | Options::PTRACE_O_TRACECLONE + | Options::PTRACE_O_TRACEVFORK + }; + ptrace::setoptions(root_child.into(), ptrace_opts)?; + // restart child + self.seccomp_aware_cont(root_child)?; + loop { + let status = waitpid(None, Some(WaitPidFlag::__WALL))?; + // log::trace!("waitpid: {:?}", status); + match status { + WaitStatus::Stopped(pid, sig) => { + let pid = pid.into(); + log::trace!("stopped: {pid}, sig {:?}", sig); + match sig { + Signal::SIGSTOP => { + log::trace!("sigstop event, child: {pid}"); + if let Some(state) = self.store.get_current_mut(pid) { + if state.status == ProcessStatus::PtraceForkEventReceived { + log::trace!("sigstop event received after ptrace fork event, pid: {pid}"); + state.status = ProcessStatus::Running; + self.seccomp_aware_cont(pid)?; + } else if pid != root_child { + log::error!("Unexpected SIGSTOP: {state:?}") + } + } else { + log::trace!("sigstop event received before ptrace fork event, pid: {pid}"); + let mut state = ProcessState::new(pid, 0)?; + state.status = ProcessStatus::SigstopReceived; + self.store.insert(state); + } + // https://stackoverflow.com/questions/29997244/occasionally-missing-ptrace-event-vfork-when-running-ptrace + // DO NOT send PTRACE_SYSCALL until we receive the PTRACE_EVENT_FORK, etc. + } + Signal::SIGCHLD => { + // From lurk: + // + // The SIGCHLD signal is sent to a process when a child process terminates, interrupted, or resumes after being interrupted + // This means, that if our tracee forked and said fork exits before the parent, the parent will get stopped. + // Therefor issue a PTRACE_SYSCALL request to the parent to continue execution. + // This is also important if we trace without the following forks option. + self.seccomp_aware_cont_with_signal(pid, Signal::SIGCHLD)?; + } + _ => { + // Just deliver the signal to tracee + self.seccomp_aware_cont_with_signal(pid, sig)?; + } + } + } + WaitStatus::Exited(pid, code) => { + let pid = pid.into(); + log::trace!("exited: pid {}, code {:?}", pid, code); + self.log_root(pid, Event::Exit { code }); + self.store.get_current_mut(pid).unwrap().status = + ProcessStatus::Exited(code); + if pid == root_child { + break; + } + } + WaitStatus::PtraceEvent(pid, sig, evt) => { + log::trace!("ptrace event: {:?} {:?}", sig, evt); + match evt { + nix::libc::PTRACE_EVENT_FORK + | nix::libc::PTRACE_EVENT_VFORK + | nix::libc::PTRACE_EVENT_CLONE => { + let new_child = Pid(ptrace::getevent(pid.into())? as pid_t); + log::trace!( + "ptrace fork event, evt {evt}, pid: {pid}, child: {new_child}" + ); + self.log_root(pid.into(), Event::Fork { child: new_child }); + if let Some(state) = self.store.get_current_mut(new_child) { + if state.status == ProcessStatus::SigstopReceived { + log::trace!("ptrace fork event received after sigstop, pid: {pid}, child: {new_child}"); + state.status = ProcessStatus::Running; + state.ppid = Some(pid.into()); + self.seccomp_aware_cont(new_child)?; + } else if new_child != root_child { + log::error!("Unexpected fork event: {state:?}") + } + } else { + log::trace!("ptrace fork event received before sigstop, pid: {pid}, child: {new_child}"); + let mut state = ProcessState::new(new_child, 0)?; + state.status = ProcessStatus::PtraceForkEventReceived; + state.ppid = Some(pid.into()); + self.store.insert(state); + } + // Resume parent + self.seccomp_aware_cont(pid.into())?; + } + nix::libc::PTRACE_EVENT_EXEC => { + log::trace!("exec event"); + let p = self.store.get_current_mut(pid.into()).unwrap(); + assert!(!p.presyscall); + // After execve or execveat, in syscall exit event, + // the registers might be clobbered(e.g. aarch64). + // So we need to determine whether exec is successful here. + // PTRACE_EVENT_EXEC only happens for successful exec. + p.is_exec_successful = true; + self.drain_syscall_events(pid.into(), Box::new(|_| {})); + // Don't use seccomp_aware_cont here because that will skip the next syscall exit stop + self.syscall_enter_cont(pid.into())?; + } + nix::libc::PTRACE_EVENT_EXIT => { + log::trace!("exit event"); + self.seccomp_aware_cont(pid.into())?; + } + nix::libc::PTRACE_EVENT_SECCOMP => { + log::trace!("seccomp event"); + self.on_syscall_enter(pid.into())?; + } + _ => { + log::trace!("other event"); + self.seccomp_aware_cont(pid.into())?; + } + } + } + WaitStatus::Signaled(pid, sig, _) => { + let pid: Pid = pid.into(); + log::debug!("signaled: {pid}, {:?}", sig); + if pid == root_child { + exit(128 + (sig as i32)) + } + } + WaitStatus::PtraceSyscall(pid) => { + let pid = pid.into(); + let presyscall = self.store.get_current_mut(pid).unwrap().presyscall; + if presyscall { + self.on_syscall_enter(pid)?; + } else { + self.on_syscall_exit(pid)?; + } + } + _ => {} + } + } + } else { + let me = getpid(); + setpgid(me, me)?; + traceme()?; + if 0 != unsafe { raise(SIGSTOP) } { + log::error!("raise failed!"); + exit(-1); + } + + let args = args + .into_iter() + .map(CString::new) + .collect::, _>>()?; + + execvp(&args[0], &args)?; + } + Ok(()) + } + + fn on_syscall_enter(&mut self, pid: Pid) -> anyhow::Result<()> { + let p = self.store.get_current_mut(pid).unwrap(); + p.presyscall = !p.presyscall; + // SYSCALL ENTRY + let regs = match ptrace::getregs(pid.into()) { + Ok(regs) => regs, + Err(Errno::ESRCH) => { + log::info!("ptrace getregs failed: {pid}, ESRCH, child probably gone!"); + return Ok(()); + } + e => e?, + }; + let syscallno = syscall_no_from_regs!(regs); + p.syscall = syscallno; + // log::trace!("pre syscall: {syscallno}"); + match syscallno { + nix::libc::SYS_execveat => { + log::trace!("pre execveat"); + // int execveat(int dirfd, const char *pathname, + // char *const _Nullable argv[], + // char *const _Nullable envp[], + // int flags); + let dirfd = syscall_arg!(regs, 0) as i32; + let pathname = read_string(pid, syscall_arg!(regs, 1) as AddressType)?; + //let argv = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?; + //let envp = read_string_array(pid, syscall_arg!(regs, 3) as AddressType)?; + let flags = syscall_arg!(regs, 4) as i32; + let filename = resolve_filename_at_fd(pid, pathname, dirfd, flags)?; + //let interpreters = read_interpreter_recursive(&filename); + p.pending_syscall_event.push(Event::Exec { prog: filename }); + } + nix::libc::SYS_execve => { + log::trace!("pre execve"); + let filename = read_pathbuf(pid, syscall_arg!(regs, 0) as AddressType)?; + //let argv = read_string_array(pid, syscall_arg!(regs, 1) as AddressType)?; + //let envp = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?; + //let interpreters = read_interpreter_recursive(&filename); + p.pending_syscall_event.push(Event::Exec { prog: filename }); + } + nix::libc::SYS_open => { + let path = read_pathbuf(pid, syscall_arg!(regs, 0) as AddressType)?; + p.pending_syscall_event.push(Event::FdOpen { + source: FdSource::File { path }, + fd: -1, + }); + } + nix::libc::SYS_openat => { + let dirfd = syscall_arg!(regs, 0) as i32; + let pathname = read_string(pid, syscall_arg!(regs, 1) as AddressType)?; + let flags = syscall_arg!(regs, 2) as i32; + let path = resolve_filename_at_fd(pid, pathname, dirfd, flags)?; + p.pending_syscall_event.push(Event::FdOpen { + source: FdSource::File { path }, + fd: 0, + }); + } + nix::libc::SYS_read + | nix::libc::SYS_readv + | nix::libc::SYS_preadv + | nix::libc::SYS_preadv2 => { + let fd = syscall_arg!(regs, 0) as i32; + p.pending_syscall_event.push(Event::FdRead { fd }); + } + nix::libc::SYS_write + | nix::libc::SYS_writev + | nix::libc::SYS_pwritev + | nix::libc::SYS_pwritev2 => { + let fd = syscall_arg!(regs, 0) as i32; + p.pending_syscall_event.push(Event::FdWrite { fd }); + } + nix::libc::SYS_dup | nix::libc::SYS_dup2 | nix::libc::SYS_dup3 => { + let oldfd = syscall_arg!(regs, 0) as i32; + p.pending_syscall_event + .push(Event::FdDup { oldfd, newfd: -1 }); + } + nix::libc::SYS_fcntl => { + let fd = syscall_arg!(regs, 0) as i32; + let cmd = syscall_arg!(regs, 1) as i32; + match cmd { + nix::libc::F_DUPFD => p.pending_syscall_event.push(Event::FdDup { + oldfd: fd, + newfd: -1, + }), + _ => {} + } + } + nix::libc::SYS_close => { + let fd = syscall_arg!(regs, 0) as i32; + p.pending_syscall_event.push(Event::FdClose { fd }); + } + _ => {} + } + self.syscall_enter_cont(pid)?; + Ok(()) + } + + fn on_syscall_exit(&mut self, pid: Pid) -> anyhow::Result<()> { + // SYSCALL EXIT + // log::trace!("post syscall {}", p.syscall); + let p = self.store.get_current_mut(pid).unwrap(); + p.presyscall = !p.presyscall; + let regs = match ptrace::getregs(pid.into()) { + Ok(regs) => regs, + Err(Errno::ESRCH) => { + log::info!("ptrace getregs failed: {pid}, ESRCH, child probably gone!"); + return Ok(()); + } + e => e?, + }; + let result = syscall_res_from_regs!(regs); + + let filter: Option> = match p.syscall { + nix::libc::SYS_execve => { + log::trace!("post execve"); + // SAFETY: p.preexecve is false, so p.exec_data is Some + p.is_exec_successful = false; + // update comm + p.comm = read_comm(pid)?; + None + } + nix::libc::SYS_execveat => { + log::trace!("post execveat"); + p.is_exec_successful = false; + // update comm + p.comm = read_comm(pid)?; + None + } + nix::libc::SYS_open | nix::libc::SYS_openat => { + if result >= 0 { + Some(Box::new(move |event| match event { + Event::FdOpen { + fd: ref mut dest, .. + } => { + *dest = result as i32; + } + _ => {} + })) + } else { + None + } + } + nix::libc::SYS_dup | nix::libc::SYS_dup2 | nix::libc::SYS_dup3 => { + if result >= 0 { + Some(Box::new(move |event| match event { + Event::FdDup { + newfd: ref mut dest, + .. + } => { + *dest = result as i32; + } + _ => {} + })) + } else { + None + } + } + nix::libc::SYS_fcntl => { + if result >= 0 { + Some(Box::new(move |event| match event { + Event::FdDup { + newfd: ref mut dest, + .. + } => { + *dest = result as i32; + } + _ => {} + })) + } else { + None + } + } + _ => { + if result >= 0 { + Some(Box::new(|_| {})) + } else { + None + } + } + }; + if let Some(filter) = filter { + self.drain_syscall_events(pid, filter); + } else { + p.pending_syscall_event.clear(); + } + self.seccomp_aware_cont(pid)?; + Ok(()) + } + + fn syscall_enter_cont(&self, pid: Pid) -> Result<(), Errno> { + ptrace_syscall(pid, None) + } + + /// When seccomp-bpf is enabled, we use ptrace::cont instead of ptrace::syscall to improve performance. + /// Then the next syscall-entry stop is skipped and the seccomp stop is used as the syscall entry stop. + fn seccomp_aware_cont(&self, pid: Pid) -> Result<(), Errno> { + ptrace_syscall(pid, None) + } + + fn seccomp_aware_cont_with_signal(&self, pid: Pid, sig: Signal) -> Result<(), Errno> { + ptrace_syscall(pid, Some(sig)) + } +} + +fn resolve_filename_at_fd( + pid: Pid, + pathname: String, + dirfd: i32, + flags: i32, +) -> anyhow::Result { + let pathname_is_empty = pathname.is_empty(); + let pathname = PathBuf::from(pathname); + Ok( + match ( + pathname.is_absolute(), + pathname_is_empty && ((flags & AT_EMPTY_PATH) != 0), + ) { + (true, _) => { + // If pathname is absolute, then dirfd is ignored. + pathname + } + (false, true) => { + // If pathname is an empty string and the AT_EMPTY_PATH flag is specified, then the file descriptor dirfd + // specifies the file to be executed + read_fd(pid, dirfd)? + } + (false, false) => { + // pathname is relative to dirfd + let dir = read_fd(pid, dirfd)?; + dir.join(pathname) + } + }, + ) +}