It's networked now

This commit is contained in:
Audrey 2024-04-22 19:38:16 -07:00
parent 3504ee15dd
commit 90c9d9b784
8 changed files with 434 additions and 379 deletions

2
.cargo/config Normal file
View File

@ -0,0 +1,2 @@
[build]
target = "x86_64-unknown-linux-musl"

58
Cargo.lock generated
View File

@ -207,62 +207,6 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-epoch",
"crossbeam-queue",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-queue"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
[[package]]
name = "crypto-common"
version = "0.1.6"
@ -458,6 +402,7 @@ dependencies = [
[[package]]
name = "nix"
version = "0.28.0"
source = "git+https://github.com/rhelmot/nix-rs?branch=master#e9f7c1b74ef7581adf1513a3f3c9a965824ee2d4"
dependencies = [
"bitflags 2.5.0",
"cfg-if",
@ -491,7 +436,6 @@ version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"crossbeam",
"env_logger",
"gimli 0.29.0",
"hex",

View File

@ -7,7 +7,8 @@ edition = "2021"
[dependencies]
#nix = { version = "0.28.0", features = ["ptrace", "process", "fs", "poll"] }
nix = { path = "../../rust/nix", features = ["ptrace", "process", "fs", "poll"] }
nix = { git = "https://github.com/rhelmot/nix-rs", branch = "master", features = ["ptrace", "process", "fs", "poll"] }
#nix = { path = "../../rust/nix", features = ["ptrace", "process", "fs", "poll"] }
linux-personality = "1.0.0"
anyhow = { version = "1", features = ["backtrace"] }
log = "0.4"
@ -22,4 +23,3 @@ gimli = { version = "0.29.0" }
object = { version = "0.35" }
memmap2 = { version = "0.9.4" }
typed-arena = { version = "2" }
crossbeam = { version = "0.8" }

View File

@ -1,6 +1,6 @@
use std::{
borrow::Cow,
collections::{BTreeMap, HashMap, HashSet},
collections::{BTreeMap, BTreeSet, HashMap, HashSet},
fs, io,
path::{PathBuf, Path},
};
@ -47,7 +47,7 @@ pub struct FileStoreEntry {
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum FileFormat {
ELF { references: Vec<usize> },
ELF,
Other,
}
@ -79,14 +79,19 @@ impl FileStore {
hash,
format: FileFormat::Other,
input_names: HashSet::new(),
output_names: HashSet::from([path]),
output_names: HashSet::from([path.clone()]),
});
self.filenames.insert(path, index);
self.hashes.insert(hash, index);
false
}
}
pub fn update_format(&mut self, path: &Path, format: FileFormat) {
let idx = *self.filenames.get(path).expect("update_format called with unknown path");
if !self.filenames.contains_key(path) {
panic!("update_format called with unknown path {}", path.to_string_lossy())
}
let idx = *self.filenames.get(path).unwrap();
self.files.get_mut(idx).unwrap().format = format;
}
@ -115,6 +120,7 @@ impl FileStore {
mut content: (impl io::Read + io::Seek),
) -> anyhow::Result<()> {
let mut h = Sha256::new();
log::debug!("Hashing {}", filename.to_string_lossy());
io::copy(&mut content, &mut h)?;
let hash = h.finalize().into();
@ -122,7 +128,7 @@ impl FileStore {
std::collections::btree_map::Entry::Vacant(e) => {
let index = self.files.len();
e.insert(index);
let format = self.parse_format(&mut content)?;
let (format, refs) = parse_format(&mut content)?;
self.files.push(FileStoreEntry {
index,
hash,
@ -130,6 +136,9 @@ impl FileStore {
input_names: [filename.clone()].into(),
output_names: HashSet::new(),
});
for (reference_path, _reference_hash) in refs { // lazy...
self.ingest_input(reference_path)?;
}
index
}
std::collections::btree_map::Entry::Occupied(e) => {
@ -148,6 +157,7 @@ impl FileStore {
Ok(())
}
/*
pub fn ingest_output_local(&mut self, filename: PathBuf) -> anyhow::Result<()> {
let stat = fs::metadata(&filename)?;
if stat.is_dir() {
@ -172,7 +182,7 @@ impl FileStore {
std::collections::btree_map::Entry::Vacant(e) => {
let index = self.files.len();
e.insert(index);
let format = self.parse_format(&mut content)?;
let (format, refs) = self.parse_format(&mut content)?;
self.files.push(FileStoreEntry {
index,
format,
@ -196,85 +206,7 @@ impl FileStore {
Ok(index)
}
fn parse_format(&mut self, fp: &mut (impl io::Read + io::Seek)) -> anyhow::Result<FileFormat> {
fp.seek(io::SeekFrom::Start(0))?;
let mut buf = [0; 4];
let count = read_exact_or_end(fp, &mut buf)?;
let buf = &buf[..count];
Ok(match buf {
[0x7f, b'E', b'L', b'F', ..] => {
let read_cache = ReadCache::new(fp);
let elf = object::File::parse(&read_cache)?;
let endian = if elf.is_little_endian() {
gimli::RunTimeEndian::Little
} else {
gimli::RunTimeEndian::Big
};
let arena_data = Arena::new();
let mut load_section = |id: gimli::SectionId| -> Result<_, _> {
load_file_section(id, &elf, endian, &arena_data)
};
let dwarf = gimli::Dwarf::load(&mut load_section).unwrap();
let mut units = dwarf.units();
let mut inputs = vec![];
while let Ok(Some(unit)) = units.next() {
let abbrev = dwarf.abbreviations(&unit)?;
let mut entries = unit.entries(&abbrev);
while let Some((_, entry)) = entries.next_dfs()? {
if entry.tag() == DW_TAG_compile_unit {
let mut basename = None;
let mut dirname = None;
if let Some(name) =
entry.attr(constants::DW_AT_name)?.map(|a| a.value())
{
if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) {
basename = Some(PathBuf::from(name.to_string()?));
}
}
if let Some(name) =
entry.attr(constants::DW_AT_comp_dir)?.map(|a| a.value())
{
if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) {
dirname = Some(PathBuf::from(name.to_string()?));
}
}
if let (Some(dirname), Some(basename)) = (dirname, basename) {
inputs.push(dirname.join(basename));
}
}
}
}
let references = inputs
.into_iter()
.map(|input| self.ingest_dependency_local(input))
.collect::<Result<Vec<_>, _>>()?
.into_iter()
.filter_map(|x| x)
.collect();
FileFormat::ELF { references }
}
_ => FileFormat::Other,
})
}
fn ingest_dependency_local(&mut self, filename: PathBuf) -> anyhow::Result<Option<usize>> {
// TODO: this needs to try suffixes of the filename against the filepath table to see if it
// was moved between compilation and ingestion
let metadata = match fs::metadata(&filename) {
Ok(m) => m,
Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(None),
Err(e) => return Err(e)?,
};
if !metadata.is_file() {
return Ok(None);
}
let fp = fs::File::open(&filename)?;
Ok(Some(self.ingest_output(filename, fp)?))
}
*/
}
fn load_file_section<'input, 'arena, Endian: gimli::Endianity, R: object::ReadRef<'input>>(
@ -305,3 +237,84 @@ fn read_exact_or_end(fp: &mut impl io::Read, buf: &mut [u8]) -> anyhow::Result<u
}
Ok(read_so_far)
}
pub fn parse_format(fp: &mut (impl io::Read + io::Seek)) -> anyhow::Result<(FileFormat, BTreeSet<(PathBuf, Sha256Hash)>)> {
fp.seek(io::SeekFrom::Start(0))?;
let mut buf = [0; 4];
let count = read_exact_or_end(fp, &mut buf)?;
let buf = &buf[..count];
Ok(match buf {
[0x7f, b'E', b'L', b'F', ..] => {
let read_cache = ReadCache::new(fp);
let elf = object::File::parse(&read_cache)?;
let endian = if elf.is_little_endian() {
gimli::RunTimeEndian::Little
} else {
gimli::RunTimeEndian::Big
};
let arena_data = Arena::new();
let mut load_section = |id: gimli::SectionId| -> Result<_, _> {
load_file_section(id, &elf, endian, &arena_data)
};
let dwarf = gimli::Dwarf::load(&mut load_section).unwrap();
let mut units = dwarf.units();
let mut inputs = vec![];
while let Ok(Some(unit)) = units.next() {
let abbrev = dwarf.abbreviations(&unit)?;
let mut entries = unit.entries(&abbrev);
while let Some((_, entry)) = entries.next_dfs()? {
if entry.tag() == DW_TAG_compile_unit {
let mut basename = None;
let mut dirname = None;
if let Some(name) =
entry.attr(constants::DW_AT_name)?.map(|a| a.value())
{
if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) {
basename = Some(PathBuf::from(name.to_string()?));
}
}
if let Some(name) =
entry.attr(constants::DW_AT_comp_dir)?.map(|a| a.value())
{
if let Ok(name) = dwarf.attr_string(&dwarf.unit(unit)?, name) {
dirname = Some(PathBuf::from(name.to_string()?));
}
}
if let (Some(dirname), Some(basename)) = (dirname, basename) {
inputs.push(dirname.join(basename));
}
}
}
}
let references = inputs
.into_iter()
.map(|filename| -> anyhow::Result<_> {
// TODO: this needs to try suffixes of the filename against the filepath table to see if it
// was moved between compilation and ingestion. but how...
let metadata = match fs::metadata(&filename) {
Ok(m) => m,
Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(None),
Err(e) => return Err(e)?,
};
if !metadata.is_file() {
return Ok(None);
}
let mut fp = fs::File::open(&filename)?;
let mut h = Sha256::new();
log::debug!("Hashing {}", filename.to_string_lossy());
io::copy(&mut fp, &mut h)?;
let result = Ok(Some((filename, h.finalize().into())));
result
})
.collect::<Result<Vec<_>, _>>()?
.into_iter()
.filter_map(|x| x)
.collect();
(FileFormat::ELF, references)
}
_ => (FileFormat::Other, BTreeSet::new()),
})
}

View File

@ -40,7 +40,8 @@ enum Subcommands {
/// The filepath to dump the json report to. will dump to stdout if unspecified.
output: Option<PathBuf>,
},
_InternalLaunch {
InternalLaunch {
machine: i32,
connect: String,
cmd: Vec<String>
},
@ -67,13 +68,12 @@ fn main() {
} else {
Box::new(std::io::stdout())
};
let mut t = tracer::server::Tracer::run(file_scope).unwrap();
t.start_root_process(cmd, mute).unwrap();
let t = tracer::server::Tracer::run(file_scope, cmd, mute).unwrap();
if output.is_none() {
serde_json::to_writer_pretty(fp, &t.report)
serde_json::to_writer_pretty(fp, &t)
} else {
serde_json::to_writer(fp, &t.report)
serde_json::to_writer(fp, &t)
}
.expect("Could not serialize json trace report");
}
@ -90,7 +90,7 @@ fn main() {
Box::new(std::io::stdout())
};
let in_report: tracer::TracerReport = if let Some(input) = &input {
let in_report: tracer::types::TracerReport = if let Some(input) = &input {
serde_json::from_reader(std::fs::File::open(input).unwrap())
} else {
serde_json::from_reader(std::io::stdin())
@ -106,7 +106,8 @@ fn main() {
}
.expect("Could not serialize json parameter report");
}
}
Subcommands::_InternalLaunch { connect, cmd } => {
Subcommands::InternalLaunch { machine, connect, cmd } => {
tracer::client::TracerClient::run(machine, connect, cmd).expect("Tracing failed");
}
}
}

View File

@ -1,11 +1,5 @@
use std::{
collections::HashMap,
ffi::CString,
ffi::OsString,
os::{fd::AsRawFd, unix::prelude::OsStringExt},
path::PathBuf,
process::exit,
time::Instant,
collections::{BTreeSet, HashMap}, ffi::CString, ffi::OsString, io::Write, net::TcpStream, os::unix::prelude::OsStringExt, path::PathBuf, process::exit, time::Instant
};
use nix::{
@ -18,8 +12,10 @@ use nix::{
},
unistd::{execvp, getpid, setpgid, ForkResult},
};
use serde_json::de::IoRead;
use sha2::{Sha256, Digest};
use crate::filestore::Sha256Hash;
use crate::filestore::{parse_format, Sha256Hash};
use super::types::*;
@ -323,11 +319,11 @@ fn ptrace_syscall(pid: Pid, sig: Option<Signal>) -> Result<(), Errno> {
}
}
struct TracerClient {
pub struct TracerClient {
store: ProcessStateStore,
start_time: Instant,
pending_events: Vec<LogEntry>,
pending_files: Vec<(PathBuf, Sha256Hash)>,
pending_files: BTreeSet<(PathBuf, Sha256Hash)>,
machine: i32,
}
@ -344,6 +340,20 @@ impl TracerClient {
self.log(Identifier { pid, machine: self.machine }, event);
}
fn ingest_file(&mut self, path: PathBuf) -> anyhow::Result<()> {
let stat = std::fs::metadata(&path)?;
if !stat.is_file() {
return Ok(());
}
let mut fp = std::fs::File::open(&path)?;
let mut h = Sha256::new();
log::debug!("Hashing {} (client)", path.to_string_lossy());
std::io::copy(&mut fp, &mut h)?;
let hash = h.finalize().into();
self.pending_files.insert((path, hash));
Ok(())
}
fn drain_syscall_events(&mut self, pid: Pid, mut filter: Box<dyn FnMut(&mut Event)>) {
let p = self.store.get_current_mut(pid).unwrap();
for mut event in p.pending_syscall_event.drain(..) {
@ -356,172 +366,17 @@ impl TracerClient {
}
}
pub fn start_root_process(&mut self, args: Vec<String>, mute: bool) -> anyhow::Result<()> {
log::trace!("start_root_process: {:?}", args);
if let ForkResult::Parent { child: root_child } = unsafe { nix::unistd::fork()? } {
waitpid(root_child, Some(WaitPidFlag::WSTOPPED))?; // wait for child to stop
let root_child = root_child.into();
log::trace!("child stopped");
let mut root_child_state = ProcessState::new(root_child, 0)?;
root_child_state.ppid = Some(getpid().into());
self.store.insert(root_child_state);
// Set foreground process group of the terminal
if -1 == unsafe { tcsetpgrp(STDIN_FILENO, root_child.0) } {
return Err(Errno::last().into());
}
// restart child
log::trace!("resuming child");
let ptrace_opts = {
use nix::sys::ptrace::Options;
Options::PTRACE_O_TRACEEXEC
| Options::PTRACE_O_TRACEEXIT
| Options::PTRACE_O_EXITKILL
| Options::PTRACE_O_TRACESYSGOOD
| Options::PTRACE_O_TRACEFORK
| Options::PTRACE_O_TRACECLONE
| Options::PTRACE_O_TRACEVFORK
};
ptrace::setoptions(root_child.into(), ptrace_opts)?;
// restart child
self.seccomp_aware_cont(root_child)?;
loop {
let status = waitpid(None, Some(WaitPidFlag::__WALL))?;
// log::trace!("waitpid: {:?}", status);
match status {
WaitStatus::Stopped(pid, sig) => {
let pid = pid.into();
log::trace!("stopped: {pid}, sig {:?}", sig);
match sig {
Signal::SIGSTOP => {
log::trace!("sigstop event, child: {pid}");
if let Some(state) = self.store.get_current_mut(pid) {
if state.status == ProcessStatus::PtraceForkEventReceived {
log::trace!("sigstop event received after ptrace fork event, pid: {pid}");
state.status = ProcessStatus::Running;
self.seccomp_aware_cont(pid)?;
} else if pid != root_child {
log::error!("Unexpected SIGSTOP: {state:?}")
}
} else {
log::trace!("sigstop event received before ptrace fork event, pid: {pid}");
let mut state = ProcessState::new(pid, 0)?;
state.status = ProcessStatus::SigstopReceived;
self.store.insert(state);
}
// https://stackoverflow.com/questions/29997244/occasionally-missing-ptrace-event-vfork-when-running-ptrace
// DO NOT send PTRACE_SYSCALL until we receive the PTRACE_EVENT_FORK, etc.
}
Signal::SIGCHLD => {
// From lurk:
//
// The SIGCHLD signal is sent to a process when a child process terminates, interrupted, or resumes after being interrupted
// This means, that if our tracee forked and said fork exits before the parent, the parent will get stopped.
// Therefor issue a PTRACE_SYSCALL request to the parent to continue execution.
// This is also important if we trace without the following forks option.
self.seccomp_aware_cont_with_signal(pid, Signal::SIGCHLD)?;
}
_ => {
// Just deliver the signal to tracee
self.seccomp_aware_cont_with_signal(pid, sig)?;
}
}
}
WaitStatus::Exited(pid, code) => {
let pid = pid.into();
log::trace!("exited: pid {}, code {:?}", pid, code);
self.log_root(pid, Event::Exit { code });
self.store.get_current_mut(pid).unwrap().status =
ProcessStatus::Exited(code);
if pid == root_child {
break;
}
}
WaitStatus::PtraceEvent(pid, sig, evt) => {
log::trace!("ptrace event: {:?} {:?}", sig, evt);
match evt {
nix::libc::PTRACE_EVENT_FORK
| nix::libc::PTRACE_EVENT_VFORK
| nix::libc::PTRACE_EVENT_CLONE => {
let new_child = Pid(ptrace::getevent(pid.into())? as pid_t);
log::trace!(
"ptrace fork event, evt {evt}, pid: {pid}, child: {new_child}"
);
self.log_root(pid.into(), Event::Fork { child: new_child });
if let Some(state) = self.store.get_current_mut(new_child) {
if state.status == ProcessStatus::SigstopReceived {
log::trace!("ptrace fork event received after sigstop, pid: {pid}, child: {new_child}");
state.status = ProcessStatus::Running;
state.ppid = Some(pid.into());
self.seccomp_aware_cont(new_child)?;
} else if new_child != root_child {
log::error!("Unexpected fork event: {state:?}")
}
} else {
log::trace!("ptrace fork event received before sigstop, pid: {pid}, child: {new_child}");
let mut state = ProcessState::new(new_child, 0)?;
state.status = ProcessStatus::PtraceForkEventReceived;
state.ppid = Some(pid.into());
self.store.insert(state);
}
// Resume parent
self.seccomp_aware_cont(pid.into())?;
}
nix::libc::PTRACE_EVENT_EXEC => {
log::trace!("exec event");
let p = self.store.get_current_mut(pid.into()).unwrap();
assert!(!p.presyscall);
// After execve or execveat, in syscall exit event,
// the registers might be clobbered(e.g. aarch64).
// So we need to determine whether exec is successful here.
// PTRACE_EVENT_EXEC only happens for successful exec.
p.is_exec_successful = true;
let path = p
.pending_syscall_event
.iter()
.find_map(|e| match e {
Event::Exec { prog, .. } => Some(prog.clone()),
_ => None,
})
.unwrap();
self.report.files.ingest_output_local(path)?;
self.drain_syscall_events(pid.into(), Box::new(|_| {}));
// Don't use seccomp_aware_cont here because that will skip the next syscall exit stop
self.syscall_enter_cont(pid.into())?;
}
nix::libc::PTRACE_EVENT_EXIT => {
log::trace!("exit event");
self.seccomp_aware_cont(pid.into())?;
}
nix::libc::PTRACE_EVENT_SECCOMP => {
log::trace!("seccomp event");
self.on_syscall_enter(pid.into())?;
}
_ => {
log::trace!("other event");
self.seccomp_aware_cont(pid.into())?;
}
}
}
WaitStatus::Signaled(pid, sig, _) => {
let pid: Pid = pid.into();
log::debug!("signaled: {pid}, {:?}", sig);
if pid == root_child {
exit(128 + (sig as i32))
}
}
WaitStatus::PtraceSyscall(pid) => {
let pid = pid.into();
let presyscall = self.store.get_current_mut(pid).unwrap().presyscall;
if presyscall {
self.on_syscall_enter(pid)?;
} else {
self.on_syscall_exit(pid)?;
}
}
_ => {}
}
}
pub fn run(machine: i32, connect: String, args: Vec<String>) -> anyhow::Result<()> {
let mut this = Self {
store: ProcessStateStore::default(),
start_time: Instant::now(),
pending_events: vec![],
pending_files: BTreeSet::new(),
machine,
};
let sock = TcpStream::connect(&connect).expect(format!("Could not connect to {connect}").as_str());
if let ForkResult::Parent { child } = unsafe { nix::unistd::fork()? } {
this.run_internal(sock, child.into())
} else {
let me = getpid();
setpgid(me, me)?;
@ -531,26 +386,222 @@ impl TracerClient {
exit(-1);
}
if mute {
let null = std::fs::File::options()
.read(true)
.write(true)
.open("/dev/null")
.expect("Could not open /dev/null");
nix::unistd::dup2(null.as_raw_fd(), 0)
.expect("Could not dup /dev/null to /dev/stdin");
nix::unistd::dup2(null.as_raw_fd(), 1)
.expect("Could not dup /dev/null to /dev/stdout");
nix::unistd::dup2(null.as_raw_fd(), 2)
.expect("Could not dup /dev/null to /dev/stderr");
}
let args = args
.into_iter()
.map(CString::new)
.collect::<Result<Vec<CString>, _>>()?;
execvp(&args[0], &args)?;
execvp(&args[0], &args).expect(format!("Failed to execute {args:?}").as_str());
unreachable!();
}
}
fn run_internal(&mut self, mut sock: TcpStream, root_child: Pid) -> anyhow::Result<()> {
waitpid(nix::unistd::Pid::from(root_child.into()), Some(WaitPidFlag::WSTOPPED))?; // wait for child to stop
log::trace!("child stopped");
let mut root_child_state = ProcessState::new(root_child, 0)?;
root_child_state.ppid = Some(getpid().into());
self.store.insert(root_child_state);
// Set foreground process group of the terminal
if -1 == unsafe { tcsetpgrp(STDIN_FILENO, root_child.0) } {
return Err(Errno::last().into());
}
// restart child
log::trace!("resuming child");
let ptrace_opts = {
use nix::sys::ptrace::Options;
Options::PTRACE_O_TRACEEXEC
| Options::PTRACE_O_TRACEEXIT
| Options::PTRACE_O_EXITKILL
| Options::PTRACE_O_TRACESYSGOOD
| Options::PTRACE_O_TRACEFORK
| Options::PTRACE_O_TRACECLONE
| Options::PTRACE_O_TRACEVFORK
};
ptrace::setoptions(root_child.into(), ptrace_opts)?;
// restart child
ptrace::syscall(nix::unistd::Pid::from(root_child.into()), None)?;
loop {
let status = waitpid(None, Some(WaitPidFlag::__WALL))?;
// log::trace!("waitpid: {:?}", status);
let signal = match status {
WaitStatus::Stopped(pid, sig) => {
let pid = pid.into();
log::trace!("stopped: {pid}, sig {:?}", sig);
match sig {
Signal::SIGSTOP => {
log::trace!("sigstop event, child: {pid}");
if let Some(state) = self.store.get_current_mut(pid) {
if state.status == ProcessStatus::PtraceForkEventReceived {
log::trace!("sigstop event received after ptrace fork event, pid: {pid}");
state.status = ProcessStatus::Running;
} else if pid != root_child {
log::error!("Unexpected SIGSTOP: {state:?}")
}
} else {
log::trace!("sigstop event received before ptrace fork event, pid: {pid}");
let mut state = ProcessState::new(pid, 0)?;
state.status = ProcessStatus::SigstopReceived;
self.store.insert(state);
}
None
}
Signal::SIGCHLD => {
// From lurk:
//
// The SIGCHLD signal is sent to a process when a child process terminates, interrupted, or resumes after being interrupted
// This means, that if our tracee forked and said fork exits before the parent, the parent will get stopped.
// Therefor issue a PTRACE_SYSCALL request to the parent to continue execution.
// This is also important if we trace without the following forks option.
Some(Signal::SIGCHLD)
}
_ => {
// Just deliver the signal to tracee
Some(sig)
}
}
}
WaitStatus::Exited(pid, code) => {
let pid = pid.into();
log::trace!("exited: pid {}, code {:?}", pid, code);
self.log_root(pid, Event::Exit { code });
self.store.get_current_mut(pid).unwrap().status =
ProcessStatus::Exited(code);
if pid == root_child {
break;
}
None
}
WaitStatus::PtraceEvent(pid, sig, evt) => {
log::trace!("ptrace event: {:?} {:?}", sig, evt);
match evt {
nix::libc::PTRACE_EVENT_FORK
| nix::libc::PTRACE_EVENT_VFORK
| nix::libc::PTRACE_EVENT_CLONE => {
let new_child = Pid(ptrace::getevent(pid.into())? as pid_t);
log::trace!(
"ptrace fork event, evt {evt}, pid: {pid}, child: {new_child}"
);
self.log_root(pid.into(), Event::Fork { child: new_child });
if let Some(state) = self.store.get_current_mut(new_child) {
if state.status == ProcessStatus::SigstopReceived {
log::trace!("ptrace fork event received after sigstop, pid: {pid}, child: {new_child}");
state.status = ProcessStatus::Running;
state.ppid = Some(pid.into());
} else if new_child != root_child {
log::error!("Unexpected fork event: {state:?}")
}
} else {
log::trace!("ptrace fork event received before sigstop, pid: {pid}, child: {new_child}");
let mut state = ProcessState::new(new_child, 0)?;
state.status = ProcessStatus::PtraceForkEventReceived;
state.ppid = Some(pid.into());
self.store.insert(state);
}
// Resume parent
None
}
nix::libc::PTRACE_EVENT_EXEC => {
log::trace!("exec event");
let p = self.store.get_current_mut(pid.into()).unwrap();
assert!(!p.presyscall);
// After execve or execveat, in syscall exit event,
// the registers might be clobbered(e.g. aarch64).
// So we need to determine whether exec is successful here.
// PTRACE_EVENT_EXEC only happens for successful exec.
p.is_exec_successful = true;
let path = p
.pending_syscall_event
.iter()
.find_map(|e| match e {
Event::Exec { prog, .. } => Some(prog.clone()),
_ => None,
})
.unwrap();
self.ingest_file(path)?;
self.drain_syscall_events(pid.into(), Box::new(|_| {}));
// Don't use seccomp_aware_cont here because that will skip the next syscall exit stop
None
}
nix::libc::PTRACE_EVENT_EXIT => {
log::trace!("exit event");
None
}
nix::libc::PTRACE_EVENT_SECCOMP => {
log::trace!("seccomp event");
self.on_syscall_enter(pid.into())?;
None
}
_ => {
log::trace!("other event");
None
}
}
}
WaitStatus::Signaled(pid, sig, _) => {
let pid: Pid = pid.into();
log::debug!("signaled: {pid}, {:?}", sig);
if pid == root_child {
exit(128 + (sig as i32))
}
None
}
WaitStatus::PtraceSyscall(pid) => {
let pid = pid.into();
let presyscall = self.store.get_current_mut(pid).unwrap().presyscall;
if presyscall {
self.on_syscall_enter(pid)?;
} else {
self.on_syscall_exit(pid)?;
}
None
}
_ => None
};
if !self.pending_files.is_empty() || !self.pending_events.is_empty() {
let mut events = vec![];
let mut files = BTreeSet::new();
std::mem::swap(&mut events, &mut self.pending_events);
std::mem::swap(&mut files, &mut self.pending_files);
let mut msg = TracerClientMessage::Events { events, files };
loop {
serde_json::to_writer(&sock, &msg)?;
sock.write_all("\n".as_bytes())?;
let event: TracerServerRequest = serde_json::StreamDeserializer::new(&mut IoRead::new(&sock)).next().unwrap()?;
match event {
TracerServerRequest::Continue => break,
TracerServerRequest::AnalyzeFiles { paths } => {
let mut formats = HashMap::new();
let mut files = BTreeSet::new();
for path in paths {
let mut fp = std::fs::File::open(&path)?;
log::debug!("Parsing format of {} (client)", path.to_string_lossy());
let (format, mut references) = parse_format(&mut fp)?;
formats.insert(path, format);
files.append(&mut references);
}
msg = TracerClientMessage::FileFormats { formats, files }
},
TracerServerRequest::AllocatedId { id } => {
panic!("Receieved unsolicited AllocatedId({id})");
}
}
}
}
// https://stackoverflow.com/questions/29997244/occasionally-missing-ptrace-event-vfork-when-running-ptrace
// DO NOT send PTRACE_SYSCALL until we receive the PTRACE_EVENT_FORK, etc.
if let Some(pid) = status.pid() {
let pid = pid.into();
let p = self.store.get_current_mut(pid).expect("No such process??");
if !matches!(p.status, ProcessStatus::SigstopReceived | ProcessStatus::Exited(_)) {
ptrace_syscall(pid, signal)?;
}
}
}
Ok(())
}
@ -572,7 +623,6 @@ impl TracerClient {
// log::trace!("pre syscall: {syscallno}");
match syscallno {
nix::libc::SYS_execveat => {
log::trace!("pre execveat");
// int execveat(int dirfd, const char *pathname,
// char *const _Nullable argv[],
// char *const _Nullable envp[],
@ -587,7 +637,6 @@ impl TracerClient {
p.pending_syscall_event.push(Event::Exec { prog: filename });
}
nix::libc::SYS_execve => {
log::trace!("pre execve");
let filename = read_pathbuf(pid, syscall_arg!(regs, 0) as AddressType)?;
//let argv = read_string_array(pid, syscall_arg!(regs, 1) as AddressType)?;
//let envp = read_string_array(pid, syscall_arg!(regs, 2) as AddressType)?;
@ -647,7 +696,7 @@ impl TracerClient {
}
_ => {}
}
self.syscall_enter_cont(pid)?;
//self.syscall_enter_cont(pid)?;
Ok(())
}
@ -665,10 +714,10 @@ impl TracerClient {
e => e?,
};
let result = syscall_res_from_regs!(regs);
let mut pending_files = vec![];
let filter: Option<Box<dyn FnMut(&mut Event)>> = match p.syscall {
nix::libc::SYS_execve => {
log::trace!("post execve");
// SAFETY: p.preexecve is false, so p.exec_data is Some
p.is_exec_successful = false;
// update comm
@ -676,7 +725,6 @@ impl TracerClient {
None
}
nix::libc::SYS_execveat => {
log::trace!("post execveat");
p.is_exec_successful = false;
// update comm
p.comm = read_comm(pid)?;
@ -690,7 +738,7 @@ impl TracerClient {
..
} = pending
{
self.report.files.ingest_output_local(path.clone())?;
pending_files.push(path.clone());
}
}
Some(Box::new(move |event| match event {
@ -748,23 +796,11 @@ impl TracerClient {
} else {
p.pending_syscall_event.clear();
}
self.seccomp_aware_cont(pid)?;
for path in pending_files {
self.ingest_file(path)?;
}
Ok(())
}
fn syscall_enter_cont(&self, pid: Pid) -> Result<(), Errno> {
ptrace_syscall(pid, None)
}
/// When seccomp-bpf is enabled, we use ptrace::cont instead of ptrace::syscall to improve performance.
/// Then the next syscall-entry stop is skipped and the seccomp stop is used as the syscall entry stop.
fn seccomp_aware_cont(&self, pid: Pid) -> Result<(), Errno> {
ptrace_syscall(pid, None)
}
fn seccomp_aware_cont_with_signal(&self, pid: Pid, sig: Signal) -> Result<(), Errno> {
ptrace_syscall(pid, Some(sig))
}
}
fn resolve_filename_at_fd(

View File

@ -1,4 +1,4 @@
use std::{path::PathBuf, net::{TcpListener, TcpStream}, collections::BTreeMap, os::fd::{AsFd, AsRawFd}};
use std::{collections::BTreeMap, net::{TcpListener, TcpStream}, os::fd::{AsFd, AsRawFd, BorrowedFd}, path::PathBuf, process::{Command, Stdio}};
use serde_json::de::IoRead;
@ -12,20 +12,32 @@ pub struct Tracer {
}
impl Tracer {
pub fn run(input: Vec<PathBuf>, args: Vec<String>) -> anyhow::Result<TracerReport> {
pub fn run(input: Vec<PathBuf>, args: Vec<String>, mute: bool) -> anyhow::Result<TracerReport> {
let mut files = FileStore::new(input)?;
let mut log = vec![];
let listener = TcpListener::bind("127.0.0.1:9995").expect("Could not bind listener socket");
let connect = "127.0.0.1:9995".to_owned();
let listener = TcpListener::bind(&connect).expect("Could not bind listener socket");
let executable = std::env::current_exe().expect("Could not obtain current executable");
let mut proc = Command::new(executable);
proc.args(["internal-launch".to_owned(), "0".to_owned(), connect].iter().chain(args.iter()));
if mute {
proc.stdin(Stdio::null()).stdout(Stdio::null()).stderr(Stdio::null());
}
let mut child = proc.spawn().expect("Could not spawn child");
let mut next_child_id = 1;
struct ChildData {
tcp_stream: TcpStream,
json_stream: serde_json::StreamDeserializer<'static, IoRead<TcpStream>, TracerClientMessage>
json_stream: serde_json::StreamDeserializer<'static, IoRead<TcpStream>, TracerClientMessage>,
duped: i32,
}
enum ParentOrChild {
Parent(TcpListener),
Child(ChildData),
Dup(i32),
}
impl AsFd for ParentOrChild {
@ -33,13 +45,23 @@ impl Tracer {
match self {
ParentOrChild::Parent(i) => i.as_fd(),
ParentOrChild::Child(i) => i.tcp_stream.as_fd(),
ParentOrChild::Dup(i) => unsafe { BorrowedFd::borrow_raw(*i) }
}
}
}
let mut children = BTreeMap::new();
children.insert(listener.as_raw_fd(), ParentOrChild::Parent(listener));
let (first_child, _first_addr) = listener.accept().expect("Accept failed");
let duped = first_child.try_clone().expect("Dup failed");
children.insert(duped.as_raw_fd(), ParentOrChild::Dup(first_child.as_raw_fd()));
children.insert(first_child.as_raw_fd(), ParentOrChild::Child(ChildData {
tcp_stream: duped,
duped: first_child.as_raw_fd(),
json_stream: serde_json::StreamDeserializer::new(IoRead::new(first_child)),
}));
children.insert(listener.as_raw_fd(), ParentOrChild::Parent(listener));
loop {
if children.len() <= 1 {
@ -49,13 +71,32 @@ impl Tracer {
let mut fdset = children.values().into();
nix::sys::select::select(None, Some(&mut fdset), None, None, None).expect("Select failed");
let chosen = fdset.fds(None).next().unwrap().as_raw_fd();
let child = children.get(&chosen).unwrap();
let mut child = children.get_mut(&chosen).unwrap();
if let ParentOrChild::Dup(i) = child {
let i = *i;
child = children.get_mut(&i).unwrap();
}
match child {
ParentOrChild::Parent(p) => {
let (new_tcp, _new_addr) = p.accept().expect("Accept failed");
let duped = new_tcp.try_clone().expect("Dup failed");
children.insert(duped.as_raw_fd(), ParentOrChild::Dup(new_tcp.as_raw_fd()));
children.insert(new_tcp.as_raw_fd(), ParentOrChild::Child(ChildData {
tcp_stream: duped,
duped: new_tcp.as_raw_fd(),
json_stream: serde_json::StreamDeserializer::new(IoRead::new(new_tcp)),
}));
},
ParentOrChild::Dup(_) => unreachable!(),
ParentOrChild::Child(c) => {
let msg = c.json_stream.next().expect("Should NEVER be seen - StreamDeserializer is inexhaustable");
let Some(msg) = c.json_stream.next() else {
let fd1 = c.duped;
let fd2 = c.tcp_stream.as_raw_fd();
children.remove(&fd1);
children.remove(&fd2);
continue;
};
log::trace!("recv: {msg:?}");
let msg = match msg {
Ok(msg) => msg,
Err(e) => {
@ -64,12 +105,13 @@ impl Tracer {
continue;
}
};
serde_json::to_writer(&c.tcp_stream, &match msg {
let response = match msg {
TracerClientMessage::Events { events, files: file_events } => {
log.extend(events);
let mut paths = vec![];
for (path, hash) in file_events {
if !files.insert(path, hash) {
if !files.insert(path.clone(), hash) {
paths.push(path);
}
}
@ -79,17 +121,31 @@ impl Tracer {
TracerServerRequest::AnalyzeFiles { paths }
}
},
TracerClientMessage::FileFormats { formats } => {
TracerClientMessage::FileFormats { formats, files: file_list } => {
for (path, fmt) in formats {
files.update_format(&path, fmt)
files.update_format(&path, fmt);
}
TracerServerRequest::Continue
let paths: Vec<_> = file_list.into_iter().filter_map(|(path, hash)| (!files.hashes.contains_key(&hash)).then_some(path)).collect();
if paths.is_empty() {
TracerServerRequest::Continue
} else {
TracerServerRequest::AnalyzeFiles { paths }
}
}
TracerClientMessage::AllocateId { } => {
let result = TracerServerRequest::AllocatedId { id: next_child_id };
next_child_id += 1;
result
},
});
};
log::trace!("send: {response:?}");
serde_json::to_writer(&c.tcp_stream, &response)?;
},
}
}
child.wait().expect("Failed to wait for child");
Ok(TracerReport { log, files })
}
}

View File

@ -96,7 +96,7 @@ impl Display for Event {
}
}
#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Debug)]
pub enum TracerClientMessage {
Events {
events: Vec<LogEntry>,
@ -104,15 +104,18 @@ pub enum TracerClientMessage {
},
FileFormats {
formats: HashMap<PathBuf, FileFormat>,
files: BTreeSet<(PathBuf, Sha256Hash)>,
},
AllocateId {},
}
#[derive(Serialize, Deserialize)]
#[derive(Serialize, Deserialize, Debug)]
pub enum TracerServerRequest {
Continue,
AnalyzeFiles {
paths: Vec<PathBuf>,
},
AllocatedId { id: i32 },
}
#[derive(Serialize, Deserialize)]