diff --git a/src/tracer/client.rs b/src/tracer/client.rs index 42cc1c8..311a163 100644 --- a/src/tracer/client.rs +++ b/src/tracer/client.rs @@ -4,7 +4,7 @@ use std::{ use nix::{ errno::Errno, - libc::{pid_t, raise, tcsetpgrp, AT_EMPTY_PATH, AT_FDCWD, SIGSTOP, STDIN_FILENO}, + libc::{pid_t, raise, tcsetpgrp, AT_EMPTY_PATH, AT_FDCWD, SIGSTOP, STDIN_FILENO, user_regs_struct}, sys::{ ptrace::{self, traceme, AddressType}, signal::Signal, @@ -17,7 +17,9 @@ use sha2::{Sha256, Digest}; use crate::filestore::{parse_format, Sha256Hash}; -use super::types::*; +use super::{types::*, docker::instrument_docker_run_execve}; + +const WORD_SIZE: usize = 8; // FIXME pub fn read_generic_string( pid: Pid, @@ -26,7 +28,6 @@ pub fn read_generic_string( ) -> anyhow::Result { let mut buf = Vec::new(); let mut address = address; - const WORD_SIZE: usize = 8; // FIXME loop { let word = match ptrace::read(pid.into(), address) { Err(e) => { @@ -46,6 +47,17 @@ pub fn read_generic_string( } } +pub fn write_bytes(pid: Pid, mut address: AddressType, data: &[u8]) -> anyhow::Result<()> { + assert_eq!(address as usize % WORD_SIZE, 0); + for chunk in data.chunks(WORD_SIZE) { + let chunk: Vec<_> = chunk.into_iter().copied().chain(std::iter::repeat(0).take(WORD_SIZE - chunk.len())).collect(); + let word = i64::from_ne_bytes(chunk.try_into().unwrap()); + ptrace::write(pid.into(), address, word)?; + address = address.wrapping_byte_add(WORD_SIZE); + } + Ok(()) +} + #[allow(unused)] pub fn read_cstring(pid: Pid, address: AddressType) -> anyhow::Result { read_generic_string(pid, address, |x| CString::new(x).unwrap()) @@ -66,7 +78,6 @@ pub fn read_null_ended_array( reader: impl Fn(Pid, AddressType) -> anyhow::Result, ) -> anyhow::Result> { let mut res = Vec::new(); - const WORD_SIZE: usize = 8; // FIXME loop { let ptr = match ptrace::read(pid.into(), address) { Err(e) => { @@ -106,27 +117,36 @@ macro_rules! syscall_res_from_regs { }; } -macro_rules! syscall_arg { - ($regs:ident, 0) => { - $regs.rdi - }; - ($regs:ident, 1) => { - $regs.rsi - }; - ($regs:ident, 2) => { - $regs.rdx - }; - ($regs:ident, 3) => { - $regs.r10 - }; - ($regs:ident, 4) => { - $regs.r8 - }; - ($regs:ident, 5) => { - $regs.r9 +macro_rules! stack_ptr_from_regs { + ($regs:ident) => { + $regs.rsp as i64 }; } +fn syscall_arg(regs: &user_regs_struct, idx: usize) -> u64 { + match idx { + 0 => regs.rdi, + 1 => regs.rsi, + 2 => regs.rdx, + 3 => regs.r10, + 4 => regs.r8, + 5 => regs.r9, + _ => panic!("Bad syscall argument index"), + } +} + +fn set_syscall_arg(regs: &mut user_regs_struct, idx: usize, value: u64) { + match idx { + 0 => regs.rdi = value, + 1 => regs.rsi = value, + 2 => regs.rdx = value, + 3 => regs.r10 = value, + 4 => regs.r8 = value, + 5 => regs.r9 = value, + _ => panic!("Bad syscall argument index"), + } +} + pub fn read_argv(pid: Pid) -> anyhow::Result> { let filename = format!("/proc/{pid}/cmdline"); let buf = std::fs::read(filename)?; @@ -307,6 +327,16 @@ impl ProcessState { pending_syscall_event: vec![], }) } + + pub fn is_docker(&self) -> bool { + self.argv.get(0).is_some_and(|c| c.to_str() == Ok("docker")) + } + + pub fn update(&mut self) -> anyhow::Result<()> { + self.comm = read_comm(self.pid)?; + self.argv = read_argv(self.pid)?; + Ok(()) + } } fn ptrace_syscall(pid: Pid, sig: Option) -> Result<(), Errno> { @@ -320,6 +350,8 @@ fn ptrace_syscall(pid: Pid, sig: Option) -> Result<(), Errno> { } pub struct TracerClient { + connect: String, + sock: TcpStream, store: ProcessStateStore, start_time: Instant, pending_events: Vec, @@ -340,7 +372,10 @@ impl TracerClient { self.log(Identifier { pid, machine: self.machine }, event); } - fn ingest_file(&mut self, path: PathBuf) -> anyhow::Result<()> { + fn ingest_file(&mut self, pid: Pid, path: PathBuf) -> anyhow::Result<()> { + if self.store.get_current_mut(pid).unwrap().is_docker() { + return Ok(()); + } let stat = std::fs::metadata(&path)?; if !stat.is_file() { return Ok(()); @@ -354,8 +389,23 @@ impl TracerClient { Ok(()) } + fn commune_server(&mut self, msg: TracerClientMessage) -> anyhow::Result { + serde_json::to_writer(&self.sock, &msg)?; + self.sock.write_all("\n".as_bytes())?; + Ok(serde_json::StreamDeserializer::new(&mut IoRead::new(&self.sock)).next().unwrap()?) + } + + fn allocate_machine(&mut self) -> anyhow::Result { + let msg = self.commune_server(TracerClientMessage::AllocateId {})?; + let TracerServerRequest::AllocatedId { id } = msg else { panic!("Server did not respone to AllocateId with AllocatedId") }; + Ok(id) + } + fn drain_syscall_events(&mut self, pid: Pid, mut filter: Box) { let p = self.store.get_current_mut(pid).unwrap(); + if p.is_docker() { + return; + } for mut event in p.pending_syscall_event.drain(..) { (filter)(&mut event); self.pending_events.push(LogEntry { @@ -367,16 +417,18 @@ impl TracerClient { } pub fn run(machine: i32, connect: String, args: Vec) -> anyhow::Result<()> { - let mut this = Self { - store: ProcessStateStore::default(), - start_time: Instant::now(), - pending_events: vec![], - pending_files: BTreeSet::new(), - machine, - }; let sock = TcpStream::connect(&connect).expect(format!("Could not connect to {connect}").as_str()); if let ForkResult::Parent { child } = unsafe { nix::unistd::fork()? } { - this.run_internal(sock, child.into()) + let mut this = Self { + connect, + sock, + store: ProcessStateStore::default(), + start_time: Instant::now(), + pending_events: vec![], + pending_files: BTreeSet::new(), + machine, + }; + this.run_internal(child.into()) } else { let me = getpid(); setpgid(me, me)?; @@ -396,7 +448,7 @@ impl TracerClient { } } - fn run_internal(&mut self, mut sock: TcpStream, root_child: Pid) -> anyhow::Result<()> { + fn run_internal(&mut self, root_child: Pid) -> anyhow::Result<()> { waitpid(nix::unistd::Pid::from(root_child.into()), Some(WaitPidFlag::WSTOPPED))?; // wait for child to stop log::trace!("child stopped"); let mut root_child_state = ProcessState::new(root_child, 0)?; @@ -524,7 +576,7 @@ impl TracerClient { _ => None, }) .unwrap(); - self.ingest_file(path)?; + self.ingest_file(pid.into(), path)?; self.drain_syscall_events(pid.into(), Box::new(|_| {})); // Don't use seccomp_aware_cont here because that will skip the next syscall exit stop None @@ -573,10 +625,7 @@ impl TracerClient { let mut msg = TracerClientMessage::Events { events, files }; loop { - serde_json::to_writer(&sock, &msg)?; - sock.write_all("\n".as_bytes())?; - - let event: TracerServerRequest = serde_json::StreamDeserializer::new(&mut IoRead::new(&sock)).next().unwrap()?; + let event = self.commune_server(msg)?; match event { TracerServerRequest::Continue => break, @@ -633,33 +682,35 @@ impl TracerClient { // char *const _Nullable argv[], // char *const _Nullable envp[], // int flags); - let dirfd = syscall_arg!(regs, 0) as i32; - let pathname = read_string(pid, syscall_arg!(regs, 1) as AddressType)?; + let dirfd = syscall_arg(®s, 0) as i32; + let pathname = read_string(pid, syscall_arg(®s, 1) as AddressType)?; //let argv = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?; //let envp = read_string_array(pid, syscall_arg!(regs, 3) as AddressType)?; - let flags = syscall_arg!(regs, 4) as i32; + let flags = syscall_arg(®s, 4) as i32; let filename = resolve_filename_at_fd(pid, pathname, dirfd, flags)?; //let interpreters = read_interpreter_recursive(&filename); - p.pending_syscall_event.push(Event::Exec { prog: filename }); + p.pending_syscall_event.push(Event::Exec { prog: filename.clone() }); + self.instrument_exec(pid, filename.to_str().unwrap(), ®s, 1)?; } nix::libc::SYS_execve => { - let filename = read_pathbuf(pid, syscall_arg!(regs, 0) as AddressType)?; + let filename = read_pathbuf(pid, syscall_arg(®s, 0) as AddressType)?; //let argv = read_string_array(pid, syscall_arg!(regs, 1) as AddressType)?; //let envp = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?; //let interpreters = read_interpreter_recursive(&filename); - p.pending_syscall_event.push(Event::Exec { prog: filename }); + p.pending_syscall_event.push(Event::Exec { prog: filename.clone() }); + self.instrument_exec(pid, filename.to_str().unwrap(), ®s, 0)?; } nix::libc::SYS_open => { - let path = read_pathbuf(pid, syscall_arg!(regs, 0) as AddressType)?; + let path = read_pathbuf(pid, syscall_arg(®s, 0) as AddressType)?; p.pending_syscall_event.push(Event::FdOpen { source: FdSource::File { path }, fd: -1, }); } nix::libc::SYS_openat => { - let dirfd = syscall_arg!(regs, 0) as i32; - let pathname = read_string(pid, syscall_arg!(regs, 1) as AddressType)?; - let flags = syscall_arg!(regs, 2) as i32; + let dirfd = syscall_arg(®s, 0) as i32; + let pathname = read_string(pid, syscall_arg(®s, 1) as AddressType)?; + let flags = syscall_arg(®s, 2) as i32; let path = resolve_filename_at_fd(pid, pathname, dirfd, flags)?; p.pending_syscall_event.push(Event::FdOpen { source: FdSource::File { path }, @@ -670,24 +721,24 @@ impl TracerClient { | nix::libc::SYS_readv | nix::libc::SYS_preadv | nix::libc::SYS_preadv2 => { - let fd = syscall_arg!(regs, 0) as i32; + let fd = syscall_arg(®s, 0) as i32; p.pending_syscall_event.push(Event::FdRead { fd }); } nix::libc::SYS_write | nix::libc::SYS_writev | nix::libc::SYS_pwritev | nix::libc::SYS_pwritev2 => { - let fd = syscall_arg!(regs, 0) as i32; + let fd = syscall_arg(®s, 0) as i32; p.pending_syscall_event.push(Event::FdWrite { fd }); } nix::libc::SYS_dup | nix::libc::SYS_dup2 | nix::libc::SYS_dup3 => { - let oldfd = syscall_arg!(regs, 0) as i32; + let oldfd = syscall_arg(®s, 0) as i32; p.pending_syscall_event .push(Event::FdDup { oldfd, newfd: -1 }); } nix::libc::SYS_fcntl => { - let fd = syscall_arg!(regs, 0) as i32; - let cmd = syscall_arg!(regs, 1) as i32; + let fd = syscall_arg(®s, 0) as i32; + let cmd = syscall_arg(®s, 1) as i32; match cmd { nix::libc::F_DUPFD => p.pending_syscall_event.push(Event::FdDup { oldfd: fd, @@ -697,7 +748,7 @@ impl TracerClient { } } nix::libc::SYS_close => { - let fd = syscall_arg!(regs, 0) as i32; + let fd = syscall_arg(®s, 0) as i32; p.pending_syscall_event.push(Event::FdClose { fd }); } _ => {} @@ -726,14 +777,12 @@ impl TracerClient { nix::libc::SYS_execve => { // SAFETY: p.preexecve is false, so p.exec_data is Some p.is_exec_successful = false; - // update comm - p.comm = read_comm(pid)?; + p.update()?; None } nix::libc::SYS_execveat => { p.is_exec_successful = false; - // update comm - p.comm = read_comm(pid)?; + p.update()?; None } nix::libc::SYS_open | nix::libc::SYS_openat => { @@ -803,7 +852,47 @@ impl TracerClient { p.pending_syscall_event.clear(); } for path in pending_files { - self.ingest_file(path)?; + self.ingest_file(pid, path)?; + } + Ok(()) + } + + fn instrument_exec(&mut self, pid: Pid, filename: &str, regs: &user_regs_struct, prog_idx: usize) -> anyhow::Result<()> { + if let Some(new_args) = if filename.ends_with("/docker") { + let mut args = read_cstring_array(pid, syscall_arg(®s, prog_idx + 1) as AddressType)?; + if args.get(1).is_some_and(|c| c.to_str() == Ok("run")) { + let new_machine = self.allocate_machine()?; + let new_args = instrument_docker_run_execve(&mut args, new_machine, self.connect.as_str())?; + if new_args != args { + log::debug!("Launching docker child: {}", new_args.iter().map(|x| x.to_str().unwrap()).collect::>().join(" ")); + } + Some(new_args) + } else { + None + } + } else { + None + } { + let mut regs2 = regs.clone(); + let mut stacktop = stack_ptr_from_regs!(regs); + stacktop -= 128; + let mut argv_pointers = new_args.iter().map(|argstr| -> anyhow::Result { + let bytes = argstr.as_bytes_with_nul(); + stacktop -= bytes.len() as i64; + while stacktop % WORD_SIZE as i64 != 0 { + stacktop -= 1; + } + write_bytes(pid, stacktop as AddressType, bytes)?; + Ok(stacktop) + }).collect::>>()?; + assert_eq!(stacktop % WORD_SIZE as i64, 0); + argv_pointers.push(0); + for ptr in argv_pointers.iter().copied().rev() { + stacktop -= WORD_SIZE as i64; + ptrace::write(pid.into(), stacktop as AddressType, ptr)?; + } + set_syscall_arg(&mut regs2, prog_idx + 1, stacktop as u64); + ptrace::setregs(pid.into(), regs2)?; } Ok(()) } diff --git a/src/tracer/docker.rs b/src/tracer/docker.rs new file mode 100644 index 0000000..6e56b8f --- /dev/null +++ b/src/tracer/docker.rs @@ -0,0 +1,177 @@ +use std::{ + collections::HashSet, + env::current_exe, + ffi::CString, + process::Command, +}; + +pub fn instrument_docker_run_execve( + args: &Vec, + machine: i32, + connect: &str, +) -> anyhow::Result> { + enum Argument<'a> { + Zero(&'a str), + One(&'a str, &'a str), + } + #[derive(Default)] + struct ArgsParsed<'a> { + args: Vec>, + image: Option<&'a str>, + cmd: Vec<&'a str>, + } + impl<'a> ArgsParsed<'a> { + fn take_entrypoint(&mut self) -> Option<&'a str> { + if let Some((idx, _)) = self + .args + .iter() + .enumerate() + .find(|(_, val)| matches!(val, Argument::One("--entrypoint", _))) + { + let Argument::One(_, arg) = self.args.remove(idx) else { + unreachable!() + }; + Some(arg) + } else { + None + } + } + + fn take_cmd(&mut self) -> Option> { + if self.cmd.is_empty() { + None + } else { + let target = &mut self.cmd; + let mut result = vec![]; + std::mem::swap(target, &mut result); + Some(result) + } + } + + fn reserialize(self) -> Vec { + let mut result = vec![]; + for arg in self.args { + match arg { + Argument::Zero(a) => result.push(CString::new(a).unwrap()), + Argument::One(a, b) => { + result.push(CString::new(a).unwrap()); + result.push(CString::new(b).unwrap()); + } + } + } + if let Some(image) = self.image { + result.push(CString::new(image).unwrap()); + for cmd in self.cmd { + result.push(CString::new(cmd).unwrap()); + } + } + result + } + } + let unary_args = HashSet::from([ + "-d", + "--detach", + "--disable-content-trust", + "--help", + "--init", + "-i", + "--interactive", + "--no-healthcheck", + "--oom-kill-disable", + "--privileged", + "-P", + "--publish-all", + "-q", + "--quiet", + "--read-only", + "--rm", + "--sig-proxy", + "-t", + "--tty", + ]); + let mut string_args = ArgsParsed::default(); + let mut args_iter = args.iter(); + assert_eq!(args_iter.next().map(|x| x.to_str().unwrap()), Some("docker")); + assert_eq!(args_iter.next().map(|x| x.to_str().unwrap()), Some("run")); + while let Some(arg) = args_iter.next() { + let arg = arg.to_str()?; + + if arg.starts_with('-') { + let no_parameter = unary_args.contains(arg); + if !no_parameter { + let Some(parameter) = args_iter.next() else { + log::debug!("Docker: arg {} missing required argument", arg); + return Ok(args.clone()); + }; + string_args.args.push(Argument::One(arg, parameter.to_str()?)); + } else { + string_args.args.push(Argument::Zero(arg)); + } + } else { + string_args.image = Some(arg); + while let Some(arg) = args_iter.next() { + let arg = arg.to_str()?; + string_args.cmd.push(arg); + } + break; + } + } + + if let Some(image) = string_args.image { + let output = Command::new("docker").args(["inspect", image]).output()?; + if !output.status.success() { + log::debug!("Docker: image inspect for {} returned bad error code", image); + return Ok(args.clone()); + } + let value: serde_json::Value = serde_json::from_slice(&output.stdout)?; + let config = value + .as_array() + .unwrap() + .get(0) + .unwrap() + .as_object() + .unwrap() + .get("Config") + .unwrap() + .as_object() + .unwrap(); + let mut entrypoint = string_args + .take_entrypoint() + .map(|s| vec![s]) + .or_else(|| { + config.get("Entrypoint").unwrap().as_array().map(|a| { + a.into_iter() + .map(|s| s.as_str().unwrap()) + .collect::>() + }) + }) + .unwrap_or_else(Vec::new); + let cmd = string_args + .take_cmd() + .or_else(|| { + config.get("Cmd").unwrap().as_array().map(|a| { + a.into_iter() + .map(|s| s.as_str().unwrap()) + .collect::>() + }) + }) + .unwrap_or_else(Vec::new); + entrypoint.extend(cmd); + + entrypoint.insert(0, "/.ontology"); + entrypoint.insert(1, "internal-launch"); + let machine = machine.to_string(); + entrypoint.insert(2, &machine); + entrypoint.insert(3, connect); + + string_args + .args + .push(Argument::One("--entrypoint", entrypoint.remove(0))); + let volume = format!("{}:/.ontology", current_exe().unwrap().to_str().unwrap()); + string_args.args.push(Argument::One("-v", &volume)); + string_args.cmd = entrypoint; + Ok(string_args.reserialize()) + } else { + Ok(string_args.reserialize()) + } +} diff --git a/src/tracer/mod.rs b/src/tracer/mod.rs index a858f69..c8b14f5 100644 --- a/src/tracer/mod.rs +++ b/src/tracer/mod.rs @@ -1,3 +1,4 @@ pub mod client; pub mod server; pub mod types; +pub(self) mod docker; diff --git a/src/tracer/server.rs b/src/tracer/server.rs index b819976..a3722ee 100644 --- a/src/tracer/server.rs +++ b/src/tracer/server.rs @@ -1,4 +1,4 @@ -use std::{collections::BTreeMap, net::{TcpListener, TcpStream}, os::fd::{AsFd, AsRawFd, BorrowedFd}, path::PathBuf, process::{Command, Stdio}}; +use std::{collections::BTreeMap, net::{TcpListener, TcpStream}, os::fd::{AsFd, AsRawFd, BorrowedFd}, path::PathBuf, process::{Command, Stdio}, ffi::OsStr}; use serde_json::de::IoRead; @@ -22,10 +22,11 @@ impl Tracer { let executable = std::env::current_exe().expect("Could not obtain current executable"); let mut proc = Command::new(executable); - proc.args(["internal-launch".to_owned(), "0".to_owned(), connect].iter().chain(args.iter())); + proc.args(["internal-launch".to_owned(), "--".to_owned(), "0".to_owned(), connect].iter().chain(args.iter())); if mute { proc.stdin(Stdio::null()).stdout(Stdio::null()).stderr(Stdio::null()); } + log::debug!("Launching tracer child {}", proc.get_args().collect::>().join(OsStr::new(" ")).to_string_lossy()); let mut child = proc.spawn().expect("Could not spawn child"); let mut next_child_id = 1; @@ -79,7 +80,8 @@ impl Tracer { } match child { ParentOrChild::Parent(p) => { - let (new_tcp, _new_addr) = p.accept().expect("Accept failed"); + let (new_tcp, new_addr) = p.accept().expect("Accept failed"); + log::info!("New child connected from {new_addr}"); let duped = new_tcp.try_clone().expect("Dup failed"); children.insert(duped.as_raw_fd(), ParentOrChild::Dup(new_tcp.as_raw_fd())); children.insert(new_tcp.as_raw_fd(), ParentOrChild::Child(ChildData {