More client-server progress

This commit is contained in:
Audrey 2024-04-22 10:04:35 -07:00
parent 4ab8c409a0
commit 3504ee15dd
9 changed files with 321 additions and 143 deletions

63
Cargo.lock generated
View File

@ -139,9 +139,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]] [[package]]
name = "cfg_aliases" name = "cfg_aliases"
version = "0.1.1" version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" checksum = "77e53693616d3075149f4ead59bdeecd204ac6b8192d8969757601b74bddf00f"
[[package]] [[package]]
name = "clap" name = "clap"
@ -207,6 +207,62 @@ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]]
name = "crossbeam"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-epoch",
"crossbeam-queue",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-queue"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
[[package]] [[package]]
name = "crypto-common" name = "crypto-common"
version = "0.1.6" version = "0.1.6"
@ -402,8 +458,6 @@ dependencies = [
[[package]] [[package]]
name = "nix" name = "nix"
version = "0.28.0" version = "0.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4"
dependencies = [ dependencies = [
"bitflags 2.5.0", "bitflags 2.5.0",
"cfg-if", "cfg-if",
@ -437,6 +491,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
"crossbeam",
"env_logger", "env_logger",
"gimli 0.29.0", "gimli 0.29.0",
"hex", "hex",

View File

@ -6,7 +6,8 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
nix = { version = "0.28.0", features = ["ptrace", "process", "fs"] } #nix = { version = "0.28.0", features = ["ptrace", "process", "fs", "poll"] }
nix = { path = "../../rust/nix", features = ["ptrace", "process", "fs", "poll"] }
linux-personality = "1.0.0" linux-personality = "1.0.0"
anyhow = { version = "1", features = ["backtrace"] } anyhow = { version = "1", features = ["backtrace"] }
log = "0.4" log = "0.4"
@ -21,3 +22,4 @@ gimli = { version = "0.29.0" }
object = { version = "0.35" } object = { version = "0.35" }
memmap2 = { version = "0.9.4" } memmap2 = { version = "0.9.4" }
typed-arena = { version = "2" } typed-arena = { version = "2" }
crossbeam = { version = "0.8" }

View File

@ -2,7 +2,7 @@ use std::{
borrow::Cow, borrow::Cow,
collections::{BTreeMap, HashMap, HashSet}, collections::{BTreeMap, HashMap, HashSet},
fs, io, fs, io,
path::PathBuf, path::{PathBuf, Path},
}; };
use gimli::{constants, DW_TAG_compile_unit}; use gimli::{constants, DW_TAG_compile_unit};
@ -66,6 +66,30 @@ impl FileStore {
Ok(result) Ok(result)
} }
/// Register the minimal set of information associated with a file. Returns whether the file
/// was already known.
pub fn insert(&mut self, path: PathBuf, hash: Sha256Hash) -> bool {
if let Some(idx) = self.hashes.get_mut(&hash) {
self.files.get_mut(*idx).unwrap().output_names.insert(path);
true
} else {
let index = self.files.len();
self.files.push(FileStoreEntry {
index,
hash,
format: FileFormat::Other,
input_names: HashSet::new(),
output_names: HashSet::from([path]),
});
false
}
}
pub fn update_format(&mut self, path: &Path, format: FileFormat) {
let idx = *self.filenames.get(path).expect("update_format called with unknown path");
self.files.get_mut(idx).unwrap().format = format;
}
fn ingest_input(&mut self, filename: PathBuf) -> anyhow::Result<()> { fn ingest_input(&mut self, filename: PathBuf) -> anyhow::Result<()> {
let stat = fs::metadata(&filename)?; let stat = fs::metadata(&filename)?;
if stat.is_dir() { if stat.is_dir() {

View File

@ -25,6 +25,7 @@ enum Subcommands {
#[arg(short, long)] #[arg(short, long)]
output: Option<PathBuf>, output: Option<PathBuf>,
/// Set this to
#[arg(short, long)] #[arg(short, long)]
mute: bool, mute: bool,
@ -39,6 +40,10 @@ enum Subcommands {
/// The filepath to dump the json report to. will dump to stdout if unspecified. /// The filepath to dump the json report to. will dump to stdout if unspecified.
output: Option<PathBuf>, output: Option<PathBuf>,
}, },
_InternalLaunch {
connect: String,
cmd: Vec<String>
},
} }
fn main() { fn main() {
@ -62,7 +67,7 @@ fn main() {
} else { } else {
Box::new(std::io::stdout()) Box::new(std::io::stdout())
}; };
let mut t = tracer::Tracer::new(file_scope).unwrap(); let mut t = tracer::server::Tracer::run(file_scope).unwrap();
t.start_root_process(cmd, mute).unwrap(); t.start_root_process(cmd, mute).unwrap();
if output.is_none() { if output.is_none() {
@ -102,4 +107,6 @@ fn main() {
.expect("Could not serialize json parameter report"); .expect("Could not serialize json parameter report");
} }
} }
Subcommands::_InternalLaunch { connect, cmd } => {
}
} }

View File

@ -2,7 +2,7 @@ use std::path::PathBuf;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::tracer::TracerReport; use crate::tracer::types::TracerReport;
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub struct ParametersReport { pub struct ParametersReport {

View File

@ -2,15 +2,12 @@ use std::{
collections::HashMap, collections::HashMap,
ffi::CString, ffi::CString,
ffi::OsString, ffi::OsString,
fmt::{Display, Formatter},
os::{fd::AsRawFd, unix::prelude::OsStringExt}, os::{fd::AsRawFd, unix::prelude::OsStringExt},
path::PathBuf, path::PathBuf,
process::exit, process::exit,
time::{Duration, Instant}, time::Instant,
}; };
use core::fmt;
use nix::{ use nix::{
errno::Errno, errno::Errno,
libc::{pid_t, raise, tcsetpgrp, AT_EMPTY_PATH, AT_FDCWD, SIGSTOP, STDIN_FILENO}, libc::{pid_t, raise, tcsetpgrp, AT_EMPTY_PATH, AT_FDCWD, SIGSTOP, STDIN_FILENO},
@ -22,30 +19,9 @@ use nix::{
unistd::{execvp, getpid, setpgid, ForkResult}, unistd::{execvp, getpid, setpgid, ForkResult},
}; };
use serde::{Deserialize, Serialize}; use crate::filestore::Sha256Hash;
use crate::filestore::FileStore; use super::types::*;
#[derive(Copy, Clone, Serialize, Deserialize, Eq, PartialEq, Debug, Hash)]
pub struct Pid(i32);
impl From<nix::unistd::Pid> for Pid {
fn from(value: nix::unistd::Pid) -> Self {
Self(value.as_raw())
}
}
impl Into<nix::unistd::Pid> for Pid {
fn into(self) -> nix::unistd::Pid {
nix::unistd::Pid::from_raw(self.0)
}
}
impl Display for Pid {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
pub fn read_generic_string<TString>( pub fn read_generic_string<TString>(
pid: Pid, pid: Pid,
@ -337,18 +313,6 @@ impl ProcessState {
} }
} }
pub struct Tracer {
pub store: ProcessStateStore,
pub start_time: Instant,
pub report: TracerReport,
}
#[derive(Serialize, Deserialize)]
pub struct TracerReport {
pub log: Vec<LogEntry>,
pub files: FileStore,
}
fn ptrace_syscall(pid: Pid, sig: Option<Signal>) -> Result<(), Errno> { fn ptrace_syscall(pid: Pid, sig: Option<Signal>) -> Result<(), Errno> {
match ptrace::syscall(pid.into(), sig) { match ptrace::syscall(pid.into(), sig) {
Err(Errno::ESRCH) => { Err(Errno::ESRCH) => {
@ -359,9 +323,17 @@ fn ptrace_syscall(pid: Pid, sig: Option<Signal>) -> Result<(), Errno> {
} }
} }
impl Tracer { struct TracerClient {
store: ProcessStateStore,
start_time: Instant,
pending_events: Vec<LogEntry>,
pending_files: Vec<(PathBuf, Sha256Hash)>,
machine: i32,
}
impl TracerClient {
pub fn log(&mut self, ident: Identifier, event: Event) { pub fn log(&mut self, ident: Identifier, event: Event) {
self.report.log.push(LogEntry { self.pending_events.push(LogEntry {
ident, ident,
event, event,
timestamp: Instant::now().duration_since(self.start_time), timestamp: Instant::now().duration_since(self.start_time),
@ -369,24 +341,15 @@ impl Tracer {
} }
pub fn log_root(&mut self, pid: Pid, event: Event) { pub fn log_root(&mut self, pid: Pid, event: Event) {
self.log(Identifier { pid, machine: 0 }, event); self.log(Identifier { pid, machine: self.machine }, event);
}
pub fn new(input: Vec<PathBuf>) -> anyhow::Result<Self> {
let files = FileStore::new(input)?;
Ok(Self {
store: ProcessStateStore::default(),
start_time: Instant::now(),
report: TracerReport { log: vec![], files },
})
} }
fn drain_syscall_events(&mut self, pid: Pid, mut filter: Box<dyn FnMut(&mut Event)>) { fn drain_syscall_events(&mut self, pid: Pid, mut filter: Box<dyn FnMut(&mut Event)>) {
let p = self.store.get_current_mut(pid).unwrap(); let p = self.store.get_current_mut(pid).unwrap();
for mut event in p.pending_syscall_event.drain(..) { for mut event in p.pending_syscall_event.drain(..) {
(filter)(&mut event); (filter)(&mut event);
self.report.log.push(LogEntry { self.pending_events.push(LogEntry {
ident: Identifier { pid, machine: 0 }, ident: Identifier { pid, machine: self.machine },
event, event,
timestamp: Instant::now().duration_since(self.start_time), timestamp: Instant::now().duration_since(self.start_time),
}); });

View File

@ -1,2 +1,3 @@
mod client; pub mod client;
mod server; pub mod server;
pub mod types;

View File

@ -1,91 +1,95 @@
use std::collections::HashSet; use std::{path::PathBuf, net::{TcpListener, TcpStream}, collections::BTreeMap, os::fd::{AsFd, AsRawFd}};
use serde::{Serialize, Deserialize}; use serde_json::de::IoRead;
use crate::filestore::{FileFormat, Sha256Hash}; use crate::filestore::FileStore;
#[derive(Debug, Serialize, Deserialize)] use super::types::*;
pub enum Event {
Fork { child: Pid },
Exec { prog: PathBuf }, pub struct Tracer {
Exit { code: i32 }, pub report: TracerReport,
FdOpen { fd: i32, source: FdSource },
FdDup { oldfd: i32, newfd: i32 },
FdClose { fd: i32 },
FdRead { fd: i32 },
FdWrite { fd: i32 },
} }
#[derive(Debug, Serialize, Deserialize)] impl Tracer {
pub enum FdSource { pub fn run(input: Vec<PathBuf>, args: Vec<String>) -> anyhow::Result<TracerReport> {
File { path: PathBuf }, let mut files = FileStore::new(input)?;
Tty, let mut log = vec![];
let listener = TcpListener::bind("127.0.0.1:9995").expect("Could not bind listener socket");
struct ChildData {
tcp_stream: TcpStream,
json_stream: serde_json::StreamDeserializer<'static, IoRead<TcpStream>, TracerClientMessage>
} }
#[derive(Clone, Debug, Serialize, Deserialize)] enum ParentOrChild {
pub struct Identifier { Parent(TcpListener),
machine: i32, Child(ChildData),
pid: Pid,
} }
#[derive(Debug, Serialize, Deserialize)] impl AsFd for ParentOrChild {
pub struct LogEntry { fn as_fd(&self) -> std::os::fd::BorrowedFd<'_> {
ident: Identifier,
event: Event,
timestamp: Duration,
}
impl Display for LogEntry {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(
f,
"[{}.{:03} m{}p{}] {}",
self.timestamp.as_secs(),
self.timestamp.as_millis() % 1000,
self.ident.machine,
self.ident.pid,
self.event
)
}
}
impl Display for FdSource {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self { match self {
FdSource::File { path } => write!(f, "file {}", path.to_string_lossy()), ParentOrChild::Parent(i) => i.as_fd(),
FdSource::Tty => write!(f, "the terminal"), ParentOrChild::Child(i) => i.tcp_stream.as_fd(),
} }
} }
} }
impl Display for Event { let mut children = BTreeMap::new();
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { children.insert(listener.as_raw_fd(), ParentOrChild::Parent(listener));
match self {
Event::Fork { child } => write!(f, "fork {child}"),
Event::Exec { prog } => write!(f, "exec {}", prog.to_string_lossy()), loop {
Event::Exit { code } => write!(f, "exit with {code}"), if children.len() <= 1 {
Event::FdOpen { fd, source } => write!(f, "open fd {fd} from {source}"), break;
Event::FdDup { oldfd, newfd } => write!(f, "dup fd {oldfd} to {newfd}"),
Event::FdClose { fd } => write!(f, "close fd {fd}"),
Event::FdRead { fd } => write!(f, "read from fd {fd}"),
Event::FdWrite { fd } => write!(f, "write to fd {fd}"),
} }
let mut fdset = children.values().into();
nix::sys::select::select(None, Some(&mut fdset), None, None, None).expect("Select failed");
let chosen = fdset.fds(None).next().unwrap().as_raw_fd();
let child = children.get(&chosen).unwrap();
match child {
ParentOrChild::Parent(p) => {
let (new_tcp, _new_addr) = p.accept().expect("Accept failed");
},
ParentOrChild::Child(c) => {
let msg = c.json_stream.next().expect("Should NEVER be seen - StreamDeserializer is inexhaustable");
let msg = match msg {
Ok(msg) => msg,
Err(e) => {
log::error!("Child socket disconnected unexpectedly: {e:?}");
children.remove(&chosen);
continue;
}
};
serde_json::to_writer(&c.tcp_stream, &match msg {
TracerClientMessage::Events { events, files: file_events } => {
log.extend(events);
let mut paths = vec![];
for (path, hash) in file_events {
if !files.insert(path, hash) {
paths.push(path);
}
}
if paths.is_empty() {
TracerServerRequest::Continue
} else {
TracerServerRequest::AnalyzeFiles { paths }
}
},
TracerClientMessage::FileFormats { formats } => {
for (path, fmt) in formats {
files.update_format(&path, fmt)
}
TracerServerRequest::Continue
},
});
},
} }
} }
pub enum TracerClientMessage { Ok(TracerReport { log, files })
Events {
events: Vec<Event>,
files: HashSet<(PathBuf, Sha256Hash)>,
},
FileFormat {
format: FileFormat
},
} }
pub enum TracerServerRequest {
Continue,
AnalyzeFile {
path: PathBuf,
},
} }

122
src/tracer/types.rs Normal file
View File

@ -0,0 +1,122 @@
use std::{collections::{BTreeSet, HashMap}, path::PathBuf, fmt::{Display, Formatter}, time::Duration};
use serde::{Serialize, Deserialize};
use crate::filestore::{FileFormat, Sha256Hash, FileStore};
#[derive(Copy, Clone, Serialize, Deserialize, Eq, PartialEq, Debug, Hash)]
pub struct Pid(pub(crate) i32);
impl From<nix::unistd::Pid> for Pid {
fn from(value: nix::unistd::Pid) -> Self {
Self(value.as_raw())
}
}
impl Into<nix::unistd::Pid> for Pid {
fn into(self) -> nix::unistd::Pid {
nix::unistd::Pid::from_raw(self.0)
}
}
impl Display for Pid {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
#[derive(Debug, Serialize, Deserialize)]
pub enum Event {
Fork { child: Pid },
Exec { prog: PathBuf },
Exit { code: i32 },
FdOpen { fd: i32, source: FdSource },
FdDup { oldfd: i32, newfd: i32 },
FdClose { fd: i32 },
FdRead { fd: i32 },
FdWrite { fd: i32 },
}
#[derive(Debug, Serialize, Deserialize)]
pub enum FdSource {
File { path: PathBuf },
Tty,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Identifier {
pub machine: i32,
pub pid: Pid,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct LogEntry {
pub ident: Identifier,
pub event: Event,
pub timestamp: Duration,
}
impl Display for LogEntry {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"[{}.{:03} m{}p{}] {}",
self.timestamp.as_secs(),
self.timestamp.as_millis() % 1000,
self.ident.machine,
self.ident.pid,
self.event
)
}
}
impl Display for FdSource {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
FdSource::File { path } => write!(f, "file {}", path.to_string_lossy()),
FdSource::Tty => write!(f, "the terminal"),
}
}
}
impl Display for Event {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Event::Fork { child } => write!(f, "fork {child}"),
Event::Exec { prog } => write!(f, "exec {}", prog.to_string_lossy()),
Event::Exit { code } => write!(f, "exit with {code}"),
Event::FdOpen { fd, source } => write!(f, "open fd {fd} from {source}"),
Event::FdDup { oldfd, newfd } => write!(f, "dup fd {oldfd} to {newfd}"),
Event::FdClose { fd } => write!(f, "close fd {fd}"),
Event::FdRead { fd } => write!(f, "read from fd {fd}"),
Event::FdWrite { fd } => write!(f, "write to fd {fd}"),
}
}
}
#[derive(Serialize, Deserialize)]
pub enum TracerClientMessage {
Events {
events: Vec<LogEntry>,
files: BTreeSet<(PathBuf, Sha256Hash)>,
},
FileFormats {
formats: HashMap<PathBuf, FileFormat>,
},
}
#[derive(Serialize, Deserialize)]
pub enum TracerServerRequest {
Continue,
AnalyzeFiles {
paths: Vec<PathBuf>,
},
}
#[derive(Serialize, Deserialize)]
pub struct TracerReport {
pub log: Vec<LogEntry>,
pub files: FileStore,
}