diff --git a/Cargo.lock b/Cargo.lock index b946c78..1c89536 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,69 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" + [[package]] name = "bitflags" version = "0.7.0" @@ -26,6 +89,41 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "env_filter" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "humantime", + "log", +] + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "libc" version = "0.2.153" @@ -42,6 +140,18 @@ dependencies = [ "libc", ] +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + [[package]] name = "nix" version = "0.28.0" @@ -58,6 +168,110 @@ dependencies = [ name = "ontology" version = "0.1.0" dependencies = [ + "anyhow", + "env_logger", "linux-personality", + "log", "nix", ] + +[[package]] +name = "regex" +version = "1.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" diff --git a/Cargo.toml b/Cargo.toml index 140662d..ad410bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,3 +8,6 @@ edition = "2021" [dependencies] nix = { version = "0.28.0", features = ["ptrace", "process"] } linux-personality = "1.0.0" +anyhow = "1" +log = "0.4" +env_logger = "0.11" diff --git a/src/main.rs b/src/main.rs index 6d0cc45..2159dfc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,25 +1,331 @@ -use linux_personality::{personality, ADDR_NO_RANDOMIZE}; -use nix::{ - sys::{ - ptrace, - signal::Signal, - wait::{wait, WaitStatus}, - }, - unistd::{fork, ForkResult, Pid}, -}; use std::{ - cell::RefCell, - collections::{HashMap, HashSet}, - env::args, - ffi::{CStr, CString}, - os::unix::{fs::FileExt, process::CommandExt}, - process::Command, + collections::HashMap, + ffi::CString, + path::PathBuf, + process::exit, + time::{Duration, Instant}, }; +use nix::{ + errno::Errno, + libc::{pid_t, raise, tcsetpgrp, SYS_clone, SYS_clone3, AT_EMPTY_PATH, SIGSTOP, STDIN_FILENO}, + sys::{ + ptrace::{self, traceme, AddressType}, + signal::Signal, + wait::{waitpid, WaitPidFlag, WaitStatus}, + }, + unistd::{execvp, getpid, setpgid, ForkResult, Pid}, +}; + +use core::fmt; +use std::{ + borrow::Cow, + fmt::{Display, Formatter}, + io::{self, BufRead, BufReader, Read}, + path::Path, +}; + +use nix::libc::AT_FDCWD; + +use std::{ffi::OsString, os::unix::prelude::OsStringExt}; + +pub fn read_generic_string( + pid: Pid, + address: AddressType, + ctor: impl Fn(Vec) -> TString, +) -> anyhow::Result { + let mut buf = Vec::new(); + let mut address = address; + const WORD_SIZE: usize = 8; // FIXME + loop { + let word = match ptrace::read(pid, address) { + Err(e) => { + log::warn!("Cannot read tracee {pid} memory {address:?}: {e}"); + return Ok(ctor(buf)); + } + Ok(word) => word, + }; + let word_bytes = word.to_ne_bytes(); + for &byte in word_bytes.iter() { + if byte == 0 { + return Ok(ctor(buf)); + } + buf.push(byte); + } + address = unsafe { address.add(WORD_SIZE) }; + } +} + +#[allow(unused)] +pub fn read_cstring(pid: Pid, address: AddressType) -> anyhow::Result { + read_generic_string(pid, address, |x| CString::new(x).unwrap()) +} + +pub fn read_pathbuf(pid: Pid, address: AddressType) -> anyhow::Result { + read_generic_string(pid, address, |x| PathBuf::from(OsString::from_vec(x))) +} + +pub fn read_string(pid: Pid, address: AddressType) -> anyhow::Result { + // Waiting on https://github.com/rust-lang/libs-team/issues/116 + read_generic_string(pid, address, |x| String::from_utf8_lossy(&x).to_string()) +} + +pub fn read_null_ended_array( + pid: Pid, + mut address: AddressType, + reader: impl Fn(Pid, AddressType) -> anyhow::Result, +) -> anyhow::Result> { + let mut res = Vec::new(); + const WORD_SIZE: usize = 8; // FIXME + loop { + let ptr = match ptrace::read(pid, address) { + Err(e) => { + log::warn!("Cannot read tracee {pid} memory {address:?}: {e}"); + return Ok(res); + } + Ok(ptr) => ptr, + }; + if ptr == 0 { + return Ok(res); + } else { + res.push(reader(pid, ptr as AddressType)?); + } + address = unsafe { address.add(WORD_SIZE) }; + } +} + +#[allow(unused)] +pub fn read_cstring_array(pid: Pid, address: AddressType) -> anyhow::Result> { + read_null_ended_array(pid, address, read_cstring) +} + +pub fn read_string_array(pid: Pid, address: AddressType) -> anyhow::Result> { + read_null_ended_array(pid, address, read_string) +} + +macro_rules! syscall_no_from_regs { + ($regs:ident) => { + $regs.orig_rax as i64 + }; +} + +#[allow(unused)] +macro_rules! syscall_res_from_regs { + ($regs:ident) => { + $regs.rax as i64 + }; +} + +macro_rules! syscall_arg { + ($regs:ident, 0) => { + $regs.rdi + }; + ($regs:ident, 1) => { + $regs.rsi + }; + ($regs:ident, 2) => { + $regs.rdx + }; + ($regs:ident, 3) => { + $regs.r10 + }; + ($regs:ident, 4) => { + $regs.r8 + }; + ($regs:ident, 5) => { + $regs.r9 + }; +} + +pub fn read_argv(pid: Pid) -> anyhow::Result> { + let filename = format!("/proc/{pid}/cmdline"); + let buf = std::fs::read(filename)?; + Ok(buf + .split(|&c| c == 0) + .map(CString::new) + .collect::, _>>()?) +} + +pub fn read_comm(pid: Pid) -> anyhow::Result { + let filename = format!("/proc/{pid}/comm"); + let mut buf = std::fs::read(filename)?; + buf.pop(); // remove trailing newline + Ok(String::from_utf8(buf)?) +} + +pub fn read_cwd(pid: Pid) -> std::io::Result { + let filename = format!("/proc/{pid}/cwd"); + let buf = std::fs::read_link(filename)?; + Ok(buf) +} + +pub fn read_fd(pid: Pid, fd: i32) -> std::io::Result { + if fd == AT_FDCWD { + return read_cwd(pid); + } + let filename = format!("/proc/{pid}/fd/{fd}"); + std::fs::read_link(filename) +} + +#[derive(Debug)] +pub enum Interpreter { + None, + Shebang(String), + ExecutableUnaccessible, + Error(io::Error), +} + +impl Display for Interpreter { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Interpreter::None => write!(f, "none"), + Interpreter::Shebang(s) => write!(f, "{:?}", s), + Interpreter::ExecutableUnaccessible => { + write!(f, "executable unaccessible") + } + Interpreter::Error(e) => write!(f, "(err: {e})"), + } + } +} + +pub fn read_interpreter_recursive(exe: impl AsRef) -> Vec { + let mut exe = Cow::Borrowed(exe.as_ref()); + let mut interpreters = Vec::new(); + loop { + match read_interpreter(exe.as_ref()) { + Interpreter::Shebang(shebang) => { + exe = Cow::Owned(PathBuf::from( + shebang.split_ascii_whitespace().next().unwrap_or(""), + )); + interpreters.push(Interpreter::Shebang(shebang)); + } + Interpreter::None => break, + err => { + interpreters.push(err); + break; + } + }; + } + interpreters +} + +pub fn read_interpreter(exe: &Path) -> Interpreter { + fn err_to_interpreter(e: io::Error) -> Interpreter { + if e.kind() == io::ErrorKind::PermissionDenied || e.kind() == io::ErrorKind::NotFound { + Interpreter::ExecutableUnaccessible + } else { + Interpreter::Error(e) + } + } + let file = match std::fs::File::open(exe) { + Ok(file) => file, + Err(e) => return err_to_interpreter(e), + }; + let mut reader = BufReader::new(file); + // First, check if it's a shebang script + let mut buf = [0u8; 2]; + + if let Err(e) = reader.read_exact(&mut buf) { + return Interpreter::Error(e); + }; + if &buf != b"#!" { + return Interpreter::None; + } + // Read the rest of the line + let mut buf = Vec::new(); + + if let Err(e) = reader.read_until(b'\n', &mut buf) { + return Interpreter::Error(e); + }; + // Get trimed shebang line [start, end) indices + // If the shebang line is empty, we don't care + let start = buf + .iter() + .position(|&c| !c.is_ascii_whitespace()) + .unwrap_or(0); + let end = buf + .iter() + .rposition(|&c| !c.is_ascii_whitespace()) + .map(|x| x + 1) + .unwrap_or(buf.len()); + let shebang = String::from_utf8_lossy(&buf[start..end]); + Interpreter::Shebang(shebang.into_owned()) +} + +pub struct ProcessStateStore { + processes: HashMap>, +} + +#[derive(Debug)] +pub struct ProcessState { + pub pid: Pid, + pub ppid: Option, + pub status: ProcessStatus, + pub start_time: u64, + pub argv: Vec, + pub comm: String, + pub presyscall: bool, + pub is_exec_successful: bool, + pub syscall: i64, + pub exec_data: Option, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ProcessStatus { + SigstopReceived, + PtraceForkEventReceived, + Running, + Exited(i32), +} + +#[derive(Debug)] +pub struct ExecData { + pub filename: PathBuf, + pub argv: Vec, + pub envp: Vec, + pub cwd: PathBuf, + pub interpreters: Vec, +} + +impl ProcessStateStore { + pub fn new() -> Self { + Self { + processes: HashMap::new(), + } + } + + pub fn insert(&mut self, state: ProcessState) { + self.processes.entry(state.pid).or_default().push(state); + } + + pub fn get_current_mut(&mut self, pid: Pid) -> Option<&mut ProcessState> { + // The last process in the vector is the current process + // println!("Getting {pid}"); + self.processes.get_mut(&pid)?.last_mut() + } +} + +impl ProcessState { + pub fn new(pid: Pid, start_time: u64) -> anyhow::Result { + Ok(Self { + pid, + ppid: None, + status: ProcessStatus::Running, + comm: read_comm(pid)?, + argv: read_argv(pid)?, + start_time, + presyscall: true, + is_exec_successful: false, + syscall: -1, + exec_data: None, + }) + } +} + #[derive(Debug)] pub enum Event { Fork { child: Pid }, - Exec { prog: CString }, + Exec { prog: PathBuf }, Exit { code: i32 }, } @@ -33,175 +339,379 @@ pub struct Identifier { pub struct LogEntry { ident: Identifier, event: Event, + timestamp: Duration, } -impl Event { - pub fn log(self, ident: Identifier, event_log: &mut Vec) { - event_log.push(LogEntry { ident, event: self }); +impl Display for LogEntry { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "[{}.{:03} m{}p{}] {}", + self.timestamp.as_secs(), + self.timestamp.as_millis() % 1000, + self.ident.machine, + self.ident.pid, + self.event + ) } } -fn run_child() { - ptrace::traceme().unwrap(); - personality(ADDR_NO_RANDOMIZE).unwrap(); - let mut args = args(); - args.next().unwrap(); - let argv0 = args.next().unwrap(); - panic!("exec: {}", Command::new(argv0).args(args).exec()); -} - -fn run_parent(first_child: Pid) { - //ptrace::attach(first_child).unwrap(); - //assert!(matches!(dbg!(wait().unwrap()), WaitStatus::Stopped(c, s) if c == first_child && s as i32 == nix::libc::SIGSTOP)); - //ptrace::cont(first_child, None).unwrap(); - assert!( - matches!((wait().unwrap()), WaitStatus::Stopped(c, s) if c == first_child && s as i32 == nix::libc::SIGTRAP) - ); - ptrace::setoptions( - first_child, - ptrace::Options::PTRACE_O_TRACESYSGOOD - | ptrace::Options::PTRACE_O_TRACEFORK - | ptrace::Options::PTRACE_O_TRACEVFORK - | ptrace::Options::PTRACE_O_TRACECLONE - | ptrace::Options::PTRACE_O_TRACEEXEC - | ptrace::Options::PTRACE_O_TRACEEXIT, - ) - .unwrap(); - ptrace::syscall(first_child, None).unwrap(); - - let events = RefCell::new(vec![]); - let fdcache = RefCell::new(HashMap::new()); - let mut pending_execve = HashMap::new(); - #[derive(PartialEq, Debug)] - enum ForkState { - SeenFork, - SeenStop, - } - let mut suppress_stops = HashMap::new(); - - fn root_pid(pid: Pid) -> Identifier { - Identifier { machine: 0, pid } - } - let log = |event: Event, ident: Identifier| { - events.borrow_mut().push(dbg!(LogEntry { event, ident })); - }; - let readstr = |pid: Pid, addr: u64| -> std::io::Result { - let mut fdcache = fdcache.borrow_mut(); - let fp = fdcache - .entry(pid) - .or_insert_with(|| std::fs::File::open(format!("/proc/{pid}/mem")).unwrap()); - let mut buf = vec![]; - loop { - buf.extend_from_slice(&[0; 1024]); - let len = buf.len(); - fp.read_at(&mut buf[len - 1024..], addr)?; - match CStr::from_bytes_until_nul(&buf) { - Ok(cstr) => return Ok(cstr.to_owned()), - Err(_) => continue, - } +impl Display for Event { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Event::Fork { child } => write!(f, "fork {child}"), + Event::Exec { prog } => write!(f, "exec {}", prog.to_string_lossy()), + Event::Exit { code } => write!(f, "exit with {code}"), } - }; + } +} - enum ResumeType { - Cont(Option), - Syscall(Option), - Hold, +pub struct Tracer { + pub store: ProcessStateStore, + pub log: Vec, + pub start_time: Instant, +} + +fn ptrace_syscall(pid: Pid, sig: Option) -> Result<(), Errno> { + match ptrace::syscall(pid, sig) { + Err(Errno::ESRCH) => { + log::info!("ptrace syscall failed: {pid}, ESRCH, child probably gone!"); + Ok(()) + } + other => other, + } +} + +impl Tracer { + pub fn log(&mut self, ident: Identifier, event: Event) { + self.log.push(LogEntry { + ident, + event, + timestamp: Instant::now().duration_since(self.start_time), + }); } - loop { - let waited = dbg!(wait()); - let signal = match waited { - Ok(WaitStatus::PtraceEvent(pid, _sig, event)) - if event == ptrace::Event::PTRACE_EVENT_VFORK as i32 - || event == ptrace::Event::PTRACE_EVENT_FORK as i32 - || event == ptrace::Event::PTRACE_EVENT_CLONE as i32 => - { - let newpid = Pid::from_raw(ptrace::getevent(pid).unwrap() as i32); - log(Event::Fork { child: newpid }, root_pid(pid)); - if suppress_stops.insert(newpid, ForkState::SeenFork) == Some(ForkState::SeenStop) { - suppress_stops.remove(&newpid); - ResumeType::Syscall(None) - } else { - ResumeType::Hold - } + pub fn log_root(&mut self, pid: Pid, event: Event) { + self.log(Identifier { pid, machine: 0 }, event); + } + + pub fn new() -> anyhow::Result { + Ok(Self { + store: ProcessStateStore::new(), + log: vec![], + start_time: Instant::now(), + }) + } + + pub fn start_root_process(&mut self, args: Vec) -> anyhow::Result<()> { + log::trace!("start_root_process: {:?}", args); + + if let ForkResult::Parent { child: root_child } = unsafe { nix::unistd::fork()? } { + waitpid(root_child, Some(WaitPidFlag::WSTOPPED))?; // wait for child to stop + log::trace!("child stopped"); + let mut root_child_state = ProcessState::new(root_child, 0)?; + root_child_state.ppid = Some(getpid()); + self.store.insert(root_child_state); + // Set foreground process group of the terminal + if -1 == unsafe { tcsetpgrp(STDIN_FILENO, root_child.as_raw()) } { + return Err(Errno::last().into()); } - Ok(WaitStatus::PtraceEvent(pid, _sig, event)) - if event == ptrace::Event::PTRACE_EVENT_EXEC as i32 => - { - log( - Event::Exec { - prog: pending_execve.remove(&pid).unwrap(), - }, - root_pid(pid), - ); - ResumeType::Syscall(None) - } - Ok(WaitStatus::PtraceSyscall(pid)) => { - let regs = ptrace::getregs(pid).unwrap(); - match (dbg!(regs.orig_rax)) as i64 { - nix::libc::SYS_execve => { - let Ok(name) = (readstr(pid, regs.rdi)) else { - continue; - }; - pending_execve.insert(pid, name); + // restart child + log::trace!("resuming child"); + let ptrace_opts = { + use nix::sys::ptrace::Options; + Options::PTRACE_O_TRACEEXEC + | Options::PTRACE_O_TRACEEXIT + | Options::PTRACE_O_EXITKILL + | Options::PTRACE_O_TRACESYSGOOD + | Options::PTRACE_O_TRACEFORK + | Options::PTRACE_O_TRACECLONE + | Options::PTRACE_O_TRACEVFORK + }; + ptrace::setoptions(root_child, ptrace_opts)?; + // restart child + self.seccomp_aware_cont(root_child)?; + loop { + let status = waitpid(None, Some(WaitPidFlag::__WALL))?; + // log::trace!("waitpid: {:?}", status); + match status { + WaitStatus::Stopped(pid, sig) => { + log::trace!("stopped: {pid}, sig {:?}", sig); + match sig { + Signal::SIGSTOP => { + log::trace!("sigstop event, child: {pid}"); + if let Some(state) = self.store.get_current_mut(pid) { + if state.status == ProcessStatus::PtraceForkEventReceived { + log::trace!("sigstop event received after ptrace fork event, pid: {pid}"); + state.status = ProcessStatus::Running; + self.seccomp_aware_cont(pid)?; + } else if pid != root_child { + log::error!("Unexpected SIGSTOP: {state:?}") + } + } else { + log::trace!("sigstop event received before ptrace fork event, pid: {pid}"); + let mut state = ProcessState::new(pid, 0)?; + state.status = ProcessStatus::SigstopReceived; + self.store.insert(state); + } + // https://stackoverflow.com/questions/29997244/occasionally-missing-ptrace-event-vfork-when-running-ptrace + // DO NOT send PTRACE_SYSCALL until we receive the PTRACE_EVENT_FORK, etc. + } + Signal::SIGCHLD => { + // From lurk: + // + // The SIGCHLD signal is sent to a process when a child process terminates, interrupted, or resumes after being interrupted + // This means, that if our tracee forked and said fork exits before the parent, the parent will get stopped. + // Therefor issue a PTRACE_SYSCALL request to the parent to continue execution. + // This is also important if we trace without the following forks option. + self.seccomp_aware_cont_with_signal(pid, Signal::SIGCHLD)?; + } + _ => { + // Just deliver the signal to tracee + self.seccomp_aware_cont_with_signal(pid, sig)?; + } + } } - nix::libc::SYS_execveat => { - let Ok(name) = (readstr(pid, regs.rsi)) else { - continue; - }; - pending_execve.insert(pid, name); + WaitStatus::Exited(pid, code) => { + log::trace!("exited: pid {}, code {:?}", pid, code); + self.log_root(pid, Event::Exit { code }); + self.store.get_current_mut(pid).unwrap().status = + ProcessStatus::Exited(code); + if pid == root_child { + break; + } + } + WaitStatus::PtraceEvent(pid, sig, evt) => { + log::trace!("ptrace event: {:?} {:?}", sig, evt); + match evt { + nix::libc::PTRACE_EVENT_FORK + | nix::libc::PTRACE_EVENT_VFORK + | nix::libc::PTRACE_EVENT_CLONE => { + let new_child = Pid::from_raw(ptrace::getevent(pid)? as pid_t); + log::trace!( + "ptrace fork event, evt {evt}, pid: {pid}, child: {new_child}" + ); + self.log_root(pid, Event::Fork { child: new_child }); + if let Some(state) = self.store.get_current_mut(new_child) { + if state.status == ProcessStatus::SigstopReceived { + log::trace!("ptrace fork event received after sigstop, pid: {pid}, child: {new_child}"); + state.status = ProcessStatus::Running; + state.ppid = Some(pid); + self.seccomp_aware_cont(new_child)?; + } else if new_child != root_child { + log::error!("Unexpected fork event: {state:?}") + } + } else { + log::trace!("ptrace fork event received before sigstop, pid: {pid}, child: {new_child}"); + let mut state = ProcessState::new(new_child, 0)?; + state.status = ProcessStatus::PtraceForkEventReceived; + state.ppid = Some(pid); + self.store.insert(state); + } + // Resume parent + self.seccomp_aware_cont(pid)?; + } + nix::libc::PTRACE_EVENT_EXEC => { + log::trace!("exec event"); + let p = self.store.get_current_mut(pid).unwrap(); + assert!(!p.presyscall); + // After execve or execveat, in syscall exit event, + // the registers might be clobbered(e.g. aarch64). + // So we need to determine whether exec is successful here. + // PTRACE_EVENT_EXEC only happens for successful exec. + p.is_exec_successful = true; + let prog = p.exec_data.as_ref().unwrap().filename.clone(); + self.log_root(pid, Event::Exec { prog }); + // Don't use seccomp_aware_cont here because that will skip the next syscall exit stop + self.syscall_enter_cont(pid)?; + } + nix::libc::PTRACE_EVENT_EXIT => { + log::trace!("exit event"); + self.seccomp_aware_cont(pid)?; + } + nix::libc::PTRACE_EVENT_SECCOMP => { + log::trace!("seccomp event"); + self.on_syscall_enter(pid)?; + } + _ => { + log::trace!("other event"); + self.seccomp_aware_cont(pid)?; + } + } + } + WaitStatus::Signaled(pid, sig, _) => { + log::debug!("signaled: {pid}, {:?}", sig); + if pid == root_child { + exit(128 + (sig as i32)) + } + } + WaitStatus::PtraceSyscall(pid) => { + let presyscall = self.store.get_current_mut(pid).unwrap().presyscall; + if presyscall { + self.on_syscall_enter(pid)?; + } else { + self.on_syscall_exit(pid)?; + } } _ => {} } - ResumeType::Syscall(None) - GROUP STOP???? } - Ok(WaitStatus::Exited(pid, code)) => { - fdcache.borrow_mut().remove(&pid); - log(Event::Exit { code }, root_pid(pid)); - continue; + } else { + let me = getpid(); + setpgid(me, me)?; + traceme()?; + log::trace!("traceme setup!"); + if 0 != unsafe { raise(SIGSTOP) } { + log::error!("raise failed!"); + exit(-1); } + log::trace!("raise success!"); - Ok(WaitStatus::Stopped(pid, signal)) => { - if signal == Signal::SIGSTOP { - if suppress_stops.insert(pid, ForkState::SeenStop) == Some(ForkState::SeenFork) - { - suppress_stops.remove(&pid); - ResumeType::Syscall(None) - } else { - ResumeType::Hold - } - } else if signal == Signal::SIGTRAP { - panic!("does this happen?"); - ResumeType::Syscall(None) - } else { - ResumeType::Cont(Some(signal)) - } - } + let args = args + .into_iter() + .map(CString::new) + .collect::, _>>()?; - Err(nix::errno::Errno::ECHILD) => { - break; - } - _ => todo!(), - }; - if let Some(pid) = waited.unwrap().pid() { - match signal { - ResumeType::Cont(signal) => ptrace::cont(pid, signal).unwrap(), - ResumeType::Syscall(signal) => ptrace::syscall(pid, signal).unwrap(), - ResumeType::Hold => {} - } + execvp(&args[0], &args)?; } + Ok(()) } + + fn on_syscall_enter(&mut self, pid: Pid) -> anyhow::Result<()> { + let p = self.store.get_current_mut(pid).unwrap(); + p.presyscall = !p.presyscall; + // SYSCALL ENTRY + let regs = match ptrace::getregs(pid) { + Ok(regs) => regs, + Err(Errno::ESRCH) => { + log::info!("ptrace getregs failed: {pid}, ESRCH, child probably gone!"); + return Ok(()); + } + e => e?, + }; + let syscallno = syscall_no_from_regs!(regs); + p.syscall = syscallno; + // log::trace!("pre syscall: {syscallno}"); + if syscallno == nix::libc::SYS_execveat { + log::trace!("pre execveat"); + // int execveat(int dirfd, const char *pathname, + // char *const _Nullable argv[], + // char *const _Nullable envp[], + // int flags); + let dirfd = syscall_arg!(regs, 0) as i32; + let pathname = read_string(pid, syscall_arg!(regs, 1) as AddressType)?; + let argv = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?; + let envp = read_string_array(pid, syscall_arg!(regs, 3) as AddressType)?; + let flags = syscall_arg!(regs, 4) as i32; + let filename = resolve_filename_at_fd(pid, pathname, dirfd, flags)?; + let interpreters = read_interpreter_recursive(&filename); + p.exec_data = Some(ExecData { + filename, + argv, + envp, + cwd: read_cwd(pid)?, + interpreters, + }); + } else if syscallno == nix::libc::SYS_execve { + log::trace!("pre execve",); + let filename = read_pathbuf(pid, syscall_arg!(regs, 0) as AddressType)?; + let argv = read_string_array(pid, syscall_arg!(regs, 1) as AddressType)?; + let envp = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?; + let interpreters = read_interpreter_recursive(&filename); + p.exec_data = Some(ExecData { + filename, + argv, + envp, + cwd: read_cwd(pid)?, + interpreters, + }); + } else if syscallno == SYS_clone || syscallno == SYS_clone3 { + } + self.syscall_enter_cont(pid)?; + Ok(()) + } + + fn on_syscall_exit(&mut self, pid: Pid) -> anyhow::Result<()> { + // SYSCALL EXIT + // log::trace!("post syscall {}", p.syscall); + let p = self.store.get_current_mut(pid).unwrap(); + p.presyscall = !p.presyscall; + + match p.syscall { + nix::libc::SYS_execve => { + log::trace!("post execve"); + // SAFETY: p.preexecve is false, so p.exec_data is Some + p.exec_data = None; + p.is_exec_successful = false; + // update comm + p.comm = read_comm(pid)?; + } + nix::libc::SYS_execveat => { + log::trace!("post execveat"); + p.exec_data = None; + p.is_exec_successful = false; + // update comm + p.comm = read_comm(pid)?; + } + _ => (), + } + self.seccomp_aware_cont(pid)?; + Ok(()) + } + + fn syscall_enter_cont(&self, pid: Pid) -> Result<(), Errno> { + ptrace_syscall(pid, None) + } + + /// When seccomp-bpf is enabled, we use ptrace::cont instead of ptrace::syscall to improve performance. + /// Then the next syscall-entry stop is skipped and the seccomp stop is used as the syscall entry stop. + fn seccomp_aware_cont(&self, pid: Pid) -> Result<(), Errno> { + ptrace_syscall(pid, None) + } + + fn seccomp_aware_cont_with_signal(&self, pid: Pid, sig: Signal) -> Result<(), Errno> { + ptrace_syscall(pid, Some(sig)) + } +} + +fn resolve_filename_at_fd( + pid: Pid, + pathname: String, + dirfd: i32, + flags: i32, +) -> anyhow::Result { + let pathname_is_empty = pathname.is_empty(); + let pathname = PathBuf::from(pathname); + Ok( + match ( + pathname.is_absolute(), + pathname_is_empty && ((flags & AT_EMPTY_PATH) != 0), + ) { + (true, _) => { + // If pathname is absolute, then dirfd is ignored. + pathname + } + (false, true) => { + // If pathname is an empty string and the AT_EMPTY_PATH flag is specified, then the file descriptor dirfd + // specifies the file to be executed + read_fd(pid, dirfd)? + } + (false, false) => { + // pathname is relative to dirfd + let dir = read_fd(pid, dirfd)?; + dir.join(pathname) + } + }, + ) } fn main() { - match unsafe { fork() } { - Ok(ForkResult::Child) => { - run_child(); - } - Ok(ForkResult::Parent { child }) => { - run_parent(child); - } - Err(e) => panic!("fork: {e}"), + env_logger::init(); + let mut t = Tracer::new().unwrap(); + let args = std::env::args().into_iter().skip(1).collect(); + t.start_root_process(args).unwrap(); + + for event in &t.log { + println!("{event}"); } }