diff --git a/crates/memtrack/src/main.rs b/crates/memtrack/src/main.rs index 8771e7ef..8310b8d9 100644 --- a/crates/memtrack/src/main.rs +++ b/crates/memtrack/src/main.rs @@ -138,20 +138,20 @@ fn track_command( let Ok(event) = event_rx.recv_timeout(Duration::from_millis(100)) else { continue; }; - let _ = write_tx_clone.send(event.into()); + let _ = write_tx_clone.send(event); } // Final aggressive drain - keep trying until truly empty loop { match event_rx.try_recv() { Ok(event) => { - let _ = write_tx_clone.send(event.into()); + let _ = write_tx_clone.send(event); } Err(_) => { // Sleep briefly and try once more to catch late arrivals thread::sleep(Duration::from_millis(50)); if let Ok(event) = event_rx.try_recv() { - let _ = write_tx_clone.send(event.into()); + let _ = write_tx_clone.send(event); } else { break; } diff --git a/src/cli/exec/mod.rs b/src/cli/exec/mod.rs index 15e9c670..6388441d 100644 --- a/src/cli/exec/mod.rs +++ b/src/cli/exec/mod.rs @@ -103,10 +103,7 @@ pub async fn execute_with_harness( } debug!("config: {:#?}", execution_context.config); - let executor = executor::get_executor_from_mode( - &execution_context.config.mode, - executor::ExecutorCommand::Exec, - ); + let executor = executor::get_executor_from_mode(&execution_context.config.mode); let get_exec_harness_installer_url = || { format!( diff --git a/src/cli/run/mod.rs b/src/cli/run/mod.rs index af991b00..d00f9b18 100644 --- a/src/cli/run/mod.rs +++ b/src/cli/run/mod.rs @@ -152,10 +152,7 @@ pub async fn run( debug!("config: {:#?}", execution_context.config); // Execute benchmarks - let executor = executor::get_executor_from_mode( - &execution_context.config.mode, - executor::ExecutorCommand::Run, - ); + let executor = executor::get_executor_from_mode(&execution_context.config.mode); let poll_results_fn = async |upload_result: &UploadResult| { poll_results::poll_results(api_client, upload_result, output_json).await diff --git a/src/executor/mod.rs b/src/executor/mod.rs index d2147245..60bd470d 100644 --- a/src/executor/mod.rs +++ b/src/executor/mod.rs @@ -42,24 +42,11 @@ impl Display for RunnerMode { pub const EXECUTOR_TARGET: &str = "executor"; -/// Whether the executor is used for a `run` or an `exec` -/// FIXME: This should not really be a concern for the executor itself -pub enum ExecutorCommand { - Run, - Exec, -} - -pub fn get_executor_from_mode(mode: &RunnerMode, command: ExecutorCommand) -> Box { +pub fn get_executor_from_mode(mode: &RunnerMode) -> Box { match mode { #[allow(deprecated)] RunnerMode::Instrumentation | RunnerMode::Simulation => Box::new(ValgrindExecutor), - RunnerMode::Walltime => { - let output_pipedata = match command { - ExecutorCommand::Run => true, - ExecutorCommand::Exec => false, - }; - Box::new(WallTimeExecutor::new_with_output_pipedata(output_pipedata)) - } + RunnerMode::Walltime => Box::new(WallTimeExecutor::new()), RunnerMode::Memory => Box::new(MemoryExecutor), } } diff --git a/src/executor/wall_time/executor.rs b/src/executor/wall_time/executor.rs index 9ddd76f5..54d84ef5 100644 --- a/src/executor/wall_time/executor.rs +++ b/src/executor/wall_time/executor.rs @@ -78,13 +78,7 @@ pub struct WallTimeExecutor { impl WallTimeExecutor { pub fn new() -> Self { Self { - perf: cfg!(target_os = "linux").then(|| PerfRunner::new(true)), - } - } - - pub fn new_with_output_pipedata(output_pipedata: bool) -> Self { - Self { - perf: cfg!(target_os = "linux").then(|| PerfRunner::new(output_pipedata)), + perf: cfg!(target_os = "linux").then(PerfRunner::new), } } diff --git a/src/executor/wall_time/perf/memory_mappings.rs b/src/executor/wall_time/perf/memory_mappings.rs deleted file mode 100644 index 54ac76b1..00000000 --- a/src/executor/wall_time/perf/memory_mappings.rs +++ /dev/null @@ -1,75 +0,0 @@ -use super::perf_map::ProcessSymbols; -use super::unwind_data::UnwindDataExt; -use crate::prelude::*; -use libc::pid_t; -use runner_shared::unwind_data::UnwindData; -use std::collections::HashMap; - -#[cfg(target_os = "linux")] -pub(super) fn process_memory_mappings( - pid: pid_t, - symbols_by_pid: &mut HashMap, - unwind_data_by_pid: &mut HashMap>, -) -> anyhow::Result<()> { - use procfs::process::MMPermissions; - let bench_proc = - procfs::process::Process::new(pid as _).expect("Failed to find benchmark process"); - let exe_maps = bench_proc.maps().expect("Failed to read /proc/{pid}/maps"); - - debug!("Process memory mappings for PID {pid}:"); - for map in exe_maps.iter().sorted_by_key(|m| m.address.0) { - let (base_addr, end_addr) = map.address; - debug!( - " {:016x}-{:016x} {:08x} {:?} {:?} ", - base_addr, end_addr, map.offset, map.pathname, map.perms, - ); - } - - for map in &exe_maps { - let page_offset = map.offset; - let (base_addr, end_addr) = map.address; - let path = match &map.pathname { - procfs::process::MMapPath::Path(path) => Some(path.clone()), - _ => None, - }; - - let Some(path) = &path else { - if map.perms.contains(MMPermissions::EXECUTE) { - debug!("Found executable mapping without path: {base_addr:x} - {end_addr:x}"); - } - continue; - }; - - if !map.perms.contains(MMPermissions::EXECUTE) { - continue; - } - - symbols_by_pid - .entry(pid) - .or_insert(ProcessSymbols::new(pid)) - .add_mapping(pid, path, base_addr, end_addr, map.offset); - debug!("Added mapping for module {path:?}"); - - match UnwindData::new( - path.to_string_lossy().as_bytes(), - page_offset, - base_addr, - end_addr, - None, - ) { - Ok(unwind_data) => { - unwind_data_by_pid.entry(pid).or_default().push(unwind_data); - debug!("Added unwind data for {path:?} ({base_addr:x} - {end_addr:x})"); - } - Err(error) => { - debug!( - "Failed to create unwind data for module {}: {}", - path.display(), - error - ); - } - } - } - - Ok(()) -} diff --git a/src/executor/wall_time/perf/mod.rs b/src/executor/wall_time/perf/mod.rs index 0f54cb75..833be992 100644 --- a/src/executor/wall_time/perf/mod.rs +++ b/src/executor/wall_time/perf/mod.rs @@ -17,10 +17,9 @@ use crate::executor::wall_time::perf::perf_executable::get_working_perf_executab use crate::prelude::*; use anyhow::Context; use fifo::PerfFifo; -use libc::pid_t; +use parse_perf_file::MemmapRecordsOutput; use perf_executable::get_compression_flags; use perf_executable::get_event_flags; -use perf_map::ProcessSymbols; use rayon::prelude::*; use runner_shared::artifacts::ArtifactExt; use runner_shared::artifacts::ExecutionTimestamps; @@ -28,13 +27,11 @@ use runner_shared::debug_info::ModuleDebugInfo; use runner_shared::fifo::Command as FifoCommand; use runner_shared::fifo::IntegrationMode; use runner_shared::metadata::PerfMetadata; -use runner_shared::unwind_data::UnwindData; use std::path::Path; use std::path::PathBuf; use std::{cell::OnceCell, collections::HashMap, process::ExitStatus}; mod jit_dump; -mod memory_mappings; mod parse_perf_file; mod setup; @@ -46,15 +43,10 @@ pub mod perf_map; pub mod unwind_data; const PERF_METADATA_CURRENT_VERSION: u64 = 1; -const PERF_DATA_FILE_NAME: &str = "perf.data"; const PERF_PIPEDATA_FILE_NAME: &str = "perf.pipedata"; pub struct PerfRunner { benchmark_data: OnceCell, - /// Whether to output the perf data to a streamable .pipedata file - /// This can be removed once we have upstreamed the the linux-perf-data crate changes to parse - /// from pipedata directly, to only support pipedata. - output_pipedata: bool, } impl PerfRunner { @@ -89,9 +81,8 @@ impl PerfRunner { Ok(()) } - pub fn new(output_pipedata: bool) -> Self { + pub fn new() -> Self { Self { - output_pipedata, benchmark_data: OnceCell::new(), } } @@ -160,36 +151,18 @@ impl PerfRunner { perf_fifo.ctl_path().to_string_lossy(), perf_fifo.ack_path().to_string_lossy() ), + "-o", + "-", // Output to stdout for piping + "--", ]); - if self.output_pipedata { - perf_wrapper_builder.args([ - "-o", "-", // forces pipe mode - ]); - } else { - perf_wrapper_builder.args([ - "-o", - self.get_perf_file_path(profile_folder) - .to_string_lossy() - .as_ref(), - ]); - } - - perf_wrapper_builder.arg("--"); cmd_builder.wrap_with(perf_wrapper_builder); - // Output the perf data to the profile folder - let perf_data_file_path = self.get_perf_file_path(profile_folder); - - let raw_command = if self.output_pipedata { - format!( - "set -o pipefail && {} | cat > {}", - &cmd_builder.as_command_line(), - perf_data_file_path.to_string_lossy() - ) - } else { - cmd_builder.as_command_line() - }; + let raw_command = format!( + "set -o pipefail && {} | cat > {}", + &cmd_builder.as_command_line(), + self.get_perf_file_path(profile_folder).to_string_lossy() + ); let mut wrapped_builder = CommandBuilder::new("bash"); wrapped_builder.args(["-c", &raw_command]); @@ -205,8 +178,7 @@ impl PerfRunner { let on_process_started = |mut child: std::process::Child| async move { // If we output pipedata, we do not parse the perf map during teardown yet, so we need to parse memory // maps as we receive the `CurrentBenchmark` fifo commands. - let (data, exit_status) = - Self::handle_fifo(runner_fifo, perf_fifo, self.output_pipedata, &mut child).await?; + let (data, exit_status) = Self::handle_fifo(runner_fifo, perf_fifo, &mut child).await?; self.benchmark_data.set(data).unwrap_or_else(|_| { error!("Failed to set benchmark data in PerfRunner"); }); @@ -247,12 +219,8 @@ impl PerfRunner { async fn handle_fifo( mut runner_fifo: RunnerFifo, mut perf_fifo: PerfFifo, - parse_memory_maps: bool, child: &mut std::process::Child, ) -> anyhow::Result<(BenchmarkData, std::process::ExitStatus)> { - let mut symbols_by_pid = HashMap::::new(); - let mut unwind_data_by_pid = HashMap::>::new(); - let on_cmd = async |cmd: &FifoCommand| { #[allow(deprecated)] match cmd { @@ -262,19 +230,6 @@ impl PerfRunner { FifoCommand::StopBenchmark => { perf_fifo.stop_events().await?; } - FifoCommand::CurrentBenchmark { pid, .. } => { - #[cfg(target_os = "linux")] - if parse_memory_maps - && !symbols_by_pid.contains_key(pid) - && !unwind_data_by_pid.contains_key(pid) - { - memory_mappings::process_memory_mappings( - *pid, - &mut symbols_by_pid, - &mut unwind_data_by_pid, - )?; - } - } FifoCommand::PingPerf => { if perf_fifo.ping().await.is_err() { return Ok(Some(FifoCommand::Err)); @@ -299,27 +254,19 @@ impl PerfRunner { BenchmarkData { fifo_data, marker_result, - symbols_by_pid, - unwind_data_by_pid, }, exit_status, )) } fn get_perf_file_path>(&self, profile_folder: P) -> PathBuf { - if self.output_pipedata { - profile_folder.as_ref().join(PERF_PIPEDATA_FILE_NAME) - } else { - profile_folder.as_ref().join(PERF_DATA_FILE_NAME) - } + profile_folder.as_ref().join(PERF_PIPEDATA_FILE_NAME) } } pub struct BenchmarkData { fifo_data: FifoBenchmarkData, marker_result: ExecutionTimestamps, - pub symbols_by_pid: HashMap, - pub unwind_data_by_pid: HashMap>, } #[derive(Debug)] @@ -336,28 +283,16 @@ impl BenchmarkData { ) -> Result<(), BenchmarkDataSaveError> { self.marker_result.save_to(&path).unwrap(); - let parsed_perf_map_output = - if self.symbols_by_pid.is_empty() && self.unwind_data_by_pid.is_empty() { - debug!("Reading perf data from file for mmap extraction"); - Some( - parse_perf_file::parse_for_memmap2(perf_file_path).map_err(|e| { - error!("Failed to parse perf file: {e}"); - BenchmarkDataSaveError::FailedToParsePerfFile - })?, - ) - } else { - None - }; - - let (symbols_by_pid, unwind_data_by_pid) = - if let Some(parsed_perf_map_output) = parsed_perf_map_output.as_ref() { - ( - &parsed_perf_map_output.symbols_by_pid, - &parsed_perf_map_output.unwind_data_by_pid, - ) - } else { - (&self.symbols_by_pid, &self.unwind_data_by_pid) - }; + debug!("Reading perf data from file for mmap extraction"); + let MemmapRecordsOutput { + symbols_by_pid, + unwind_data_by_pid, + } = { + parse_perf_file::parse_for_memmap2(perf_file_path).map_err(|e| { + error!("Failed to parse perf file: {e}"); + BenchmarkDataSaveError::FailedToParsePerfFile + })? + }; let path_ref = path.as_ref(); debug!("Saving symbols addresses"); diff --git a/src/executor/wall_time/perf/parse_perf_file.rs b/src/executor/wall_time/perf/parse_perf_file.rs index 05b930c6..729c7b4a 100644 --- a/src/executor/wall_time/perf/parse_perf_file.rs +++ b/src/executor/wall_time/perf/parse_perf_file.rs @@ -1,11 +1,11 @@ use super::perf_map::ProcessSymbols; use super::unwind_data::UnwindDataExt; -use crate::executor::helpers::run_with_sudo::run_with_sudo; use crate::prelude::*; use libc::pid_t; use linux_perf_data::PerfFileReader; use linux_perf_data::PerfFileRecord; use linux_perf_data::linux_perf_event_reader::EventRecord; +use linux_perf_data::linux_perf_event_reader::RecordType; use runner_shared::unwind_data::UnwindData; use std::collections::HashMap; use std::path::Path; @@ -19,71 +19,60 @@ pub(super) fn parse_for_memmap2>(perf_file_path: P) -> Result::new(); let mut unwind_data_by_pid = HashMap::>::new(); - //FIXME: Remove this once again when we parse directly from pipedata - { - let tmp_perf_file_path = perf_file_path.as_ref().to_string_lossy(); - - // We ran perf with sudo, so we have to change the ownership of the perf.data - run_with_sudo( - "chown", - [ - "-R", - &format!( - "{}:{}", - nix::unistd::Uid::current(), - nix::unistd::Gid::current() - ), - &tmp_perf_file_path, - ], - )?; - } - let reader = std::fs::File::open(perf_file_path.as_ref()).unwrap(); + // 1MiB buffer + let reader = std::io::BufReader::with_capacity( + 1024 * 1024, + std::fs::File::open(perf_file_path.as_ref())?, + ); let PerfFileReader { mut perf_file, mut record_iter, - } = PerfFileReader::parse_file(reader)?; + } = PerfFileReader::parse_pipe(reader)?; while let Some(record) = record_iter.next_record(&mut perf_file).unwrap() { let PerfFileRecord::EventRecord { record, .. } = record else { continue; }; + // Check the type from the raw record to avoid parsing overhead since we do not care about + // most records. + if record.record_type != RecordType::MMAP2 { + continue; + } + let Ok(parsed_record) = record.parse() else { continue; }; + // Should never fail since we already checked the type in the raw record let EventRecord::Mmap2(record) = parsed_record else { continue; }; - let record_path_string = { - let path_slice = record.path.as_slice(); - String::from_utf8_lossy(&path_slice).into_owned() - }; + // Check PROT_EXEC early to avoid string allocation for non-executable mappings + if record.protection as i32 & libc::PROT_EXEC == 0 { + continue; + } - let end_addr = record.address + record.length; + // Filter on raw bytes before allocating a String + let path_slice: &[u8] = &record.path.as_slice(); - if record_path_string == "//anon" { - // Skip anonymous mappings - trace!( - "Skipping anonymous mapping: {:x}-{:x}", - record.address, end_addr - ); + // Skip anonymous mappings + if path_slice == b"//anon" { continue; } - if record_path_string.starts_with("[") && record_path_string.ends_with("]") { - // Skip special mappings - trace!( - "Skipping special mapping: {} - {:x}-{:x}", - record_path_string, record.address, end_addr - ); + // Skip special mappings like [vdso], [heap], etc. + if path_slice.first() == Some(&b'[') && path_slice.last() == Some(&b']') { continue; } + let record_path_string = String::from_utf8_lossy(path_slice).into_owned(); + let end_addr = record.address + record.length; + trace!( - "Pid {}: {:016x}-{:016x} {:08x} {:?} (Prot {:?})", + "Mapping: Pid {}: {:016x}-{:016x} {:08x} {:?} (Prot {:?})", record.pid, record.address, end_addr, @@ -91,11 +80,6 @@ pub(super) fn parse_for_memmap2>(perf_file_path: P) -> Result>(perf_file_path: P) -> Result