diff --git a/src/bin/test_solver.rs b/src/bin/test_solver.rs index 6c312a0..d7d8764 100644 --- a/src/bin/test_solver.rs +++ b/src/bin/test_solver.rs @@ -108,6 +108,14 @@ fn main() { .unwrap(); } + let dummy_memory = if let Some(size) = opts.extra_alloc { + let mut vec: Vec = black_box(vec![0u8; size]); + vec.fill(1); // actually access the memory + vec + } else { + Vec::new() + }; + if opts.wait_seconds > 0.0 { let start = Instant::now(); while start.elapsed().as_secs_f64() < opts.wait_seconds { @@ -154,10 +162,6 @@ fn main() { print!("{}", contents); } - if let Some(size) = opts.extra_alloc { - let mut vec: Vec = black_box(vec![0u8; size]); - vec.fill(1); // acutally access the memory - } - + let _ = &dummy_memory; // keep around std::process::exit(opts.exit_code); } diff --git a/src/commands/arguments.rs b/src/commands/arguments.rs index c053c28..43036a4 100644 --- a/src/commands/arguments.rs +++ b/src/commands/arguments.rs @@ -6,6 +6,7 @@ use url::Url; pub const ENV_SOLVER: &str = "STRIDE_SOLVER"; pub const ENV_SOFT_TIMEOUT: &str = "STRIDE_TIMEOUT"; pub const ENV_GRACE_PERIOD: &str = "STRIDE_GRACE"; +pub const ENV_MEM_LIMT: &str = "STRIDE_MEMORY_LIMIT"; pub const ENV_PARALLEL_JOBS: &str = "STRIDE_PARALLEL"; pub const ENV_REQUIRE_OPTIMAL: &str = "STRIDE_OPTIMAL"; pub const ENV_KEEP_LOGS: &str = "STRIDE_KEEP"; @@ -36,6 +37,13 @@ pub struct CommandProfileArgs { #[arg(help = "Solver program to execute")] pub solver: PathBuf, + #[arg( + short = 'm', + long, + help = "Tries to limit the memory used by a solver in MiB." + )] + pub memory_limit: Option, + #[arg(help = "Arguments passed to solver")] pub solver_args: Vec, } @@ -104,6 +112,9 @@ pub struct CommandRunArgs { )] pub keep_successful_logs: bool, + #[arg(short = 'm', long, env = ENV_MEM_LIMT, help = "Tries to limit the memory used by a solver in MiB; incompatible with -P/--no-profile")] + pub memory_limit: Option, + #[arg( short = 'P', long, @@ -114,7 +125,7 @@ pub struct CommandRunArgs { #[arg( short = 'E', long, - help = "Do not set STRIDE_* enviroment variable for solver" + help = "Do not set STRIDE_* environment variable for solver" )] pub no_envs: bool, @@ -203,6 +214,11 @@ pub fn parse_prog_arguments() -> Arguments { "It seems like you provided a relative solver path without './' prefix. Please add './' to the solver path or provide an absolute path." ); } + + if opts.memory_limit.is_some() && opts.no_profile { + error!("The arguments -m/--memory-limit and -P/--no-profile are mutually exclusive"); + panic!("The arguments -m/--memory-limit and -P/--no-profile are mutually exclusive"); + } } opts diff --git a/src/commands/profile.rs b/src/commands/profile.rs index bf0ffd1..d8edf81 100644 --- a/src/commands/profile.rs +++ b/src/commands/profile.rs @@ -1,9 +1,10 @@ use std::mem::MaybeUninit; use std::process::{Stdio, exit}; -use std::time::Instant; +use std::time::{Instant}; use super::arguments::CommandProfileArgs; -use libc::rusage; +use crate::job::job_processor::OOM_EXIT_CODE; +use libc::{rlimit, rusage, setrlimit}; use thiserror::Error; use tokio::process::Command; use tokio::signal::unix::{SignalKind, signal}; @@ -19,12 +20,41 @@ pub enum CommandProfileError { pub async fn command_profile(args: &CommandProfileArgs) -> Result<(), CommandProfileError> { // we are using the blocking variant here, since we have nothing else to do anyhow let start = Instant::now(); - let mut child = Command::new(args.solver.clone()) + + let mut command = Command::new(args.solver.clone()); + let command = command .args(args.solver_args.clone()) .stdin(Stdio::inherit()) .stdout(Stdio::inherit()) - .stderr(Stdio::inherit()) - .spawn()?; + .stderr(Stdio::inherit()); + + if let Some(memory_limit_in_mib) = args.memory_limit { + #[cfg(any(target_os = "linux", target_os = "macos"))] + unsafe { + command.pre_exec(move || { + let limit: u64 = memory_limit_in_mib as u64 * 1024 * 1024; // 512 MB + + let rlim = rlimit { + rlim_cur: limit, + rlim_max: limit, + }; + + #[cfg(target_os = "linux")] + if setrlimit(libc::RLIMIT_AS, &rlim) != 0 { + return Err(std::io::Error::last_os_error()); + } + + #[cfg(target_os = "macos")] + if setrlimit(libc::RLIMIT_RSS, &rlim) != 0 { + return Err(std::io::Error::last_os_error()); + } + + Ok(()) + }); + } + } + + let mut child = command.spawn()?; let mut stream_sigint = signal(SignalKind::interrupt())?; let mut stream_sigterm = signal(SignalKind::terminate())?; @@ -43,9 +73,8 @@ pub async fn command_profile(args: &CommandProfileArgs) -> Result<(), CommandPro } }, - status = child.wait() => { - break status?.code().unwrap_or(1); + break status?.code(); } } }; @@ -60,7 +89,15 @@ pub async fn command_profile(args: &CommandProfileArgs) -> Result<(), CommandPro let usage = get_rusage_children(); report_usage(usage); - exit(code); + if let Some(memory_limit_in_mib) = args.memory_limit + && code.is_none() + && usage.ru_maxrss as usize > memory_limit_in_mib * 1024 * 1024 + { + println!("#s out_of_memory true"); + exit(OOM_EXIT_CODE); + } + + exit(code.unwrap_or(1)); } fn get_rusage_children() -> rusage { diff --git a/src/commands/run/command.rs b/src/commands/run/command.rs index c8dc2f5..c35060c 100644 --- a/src/commands/run/command.rs +++ b/src/commands/run/command.rs @@ -153,6 +153,7 @@ impl TaskContext { let run_dir = RunDirectory::new()?; let display = ProgressDisplay::new(0); + display.enable_oom_report(args.memory_limit.is_some()); let summary_writer = SummaryWriter::new(&run_dir.path().join("summary.json")).await?; @@ -194,6 +195,7 @@ async fn task_main( .solver_args(context.args.solver_args.clone()) .soft_timeout(context.args.soft_timeout) .grace_period(context.args.grace_period) + .memory_limit_in_mib(context.args.memory_limit) .instance_path(instance.path().to_path_buf()) .profiler(!context.args.no_profile) .set_stride_envs(!context.args.no_envs) diff --git a/src/commands/run/display.rs b/src/commands/run/display.rs index cf27c3e..5c00457 100644 --- a/src/commands/run/display.rs +++ b/src/commands/run/display.rs @@ -2,7 +2,7 @@ use crate::instances::instance::Instance; use crate::job::job_processor::{JobProgress, JobResult}; use console::{Attribute, Style}; use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; -use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::time::Duration; use tokio::time::Instant; pub struct ProgressDisplay { @@ -11,6 +11,8 @@ pub struct ProgressDisplay { stride_line: ProgressBar, pb_total: ProgressBar, + report_oom: AtomicBool, + num_valid: AtomicU64, num_infeasible: AtomicU64, num_emptysolution: AtomicU64, @@ -19,6 +21,7 @@ pub struct ProgressDisplay { num_systemerror: AtomicU64, num_solvererror: AtomicU64, num_timeout: AtomicU64, + num_outofmemory: AtomicU64, num_stride_instances: AtomicU64, num_stride_queued: AtomicU64, @@ -52,6 +55,8 @@ impl ProgressDisplay { pb_total, stride_line, + report_oom: AtomicBool::new(false), + num_valid: Default::default(), num_infeasible: Default::default(), num_invalidinstance: Default::default(), @@ -60,6 +65,7 @@ impl ProgressDisplay { num_solvererror: Default::default(), num_timeout: Default::default(), num_emptysolution: Default::default(), + num_outofmemory: Default::default(), num_stride_instances: Default::default(), num_stride_queued: Default::default(), @@ -74,6 +80,10 @@ impl ProgressDisplay { self.pb_total.set_length(num_instances as u64); } + pub fn enable_oom_report(&self, enabled: bool) { + self.report_oom.store(enabled, Ordering::Relaxed); + } + fn multi_progress(&self) -> &MultiProgress { &self.mpb } @@ -124,16 +134,18 @@ impl ProgressDisplay { const CRITICAL: [Attribute; 2] = [Attribute::Bold, Attribute::Underlined]; { - let parts = [ - format_num!(num_valid, "Valid", green), - format_num!(num_emptysolution, "Empty ", yellow), - format_num!(num_infeasible, "Infeas", yellow, CRITICAL), - format_num!(num_timeout, "Timeout", yellow), - format_num!(num_syntaxerror, "SyntErr", red), - format_num!(num_solvererror, "SolvErr ", red), - format_num!(num_systemerror, "SysErr", red), - format!("Running: {running}"), - ]; + let mut parts = Vec::with_capacity(10); + parts.push(format_num!(num_valid, "Valid", green)); + parts.push(format_num!(num_emptysolution, "Empty ", yellow)); + parts.push(format_num!(num_infeasible, "Infeas", yellow, CRITICAL)); + parts.push(format_num!(num_timeout, "Timeout", yellow)); + if self.report_oom.load(Ordering::Acquire) { + parts.push(format_num!(num_outofmemory, "OutOfMem", yellow)); + } + parts.push(format_num!(num_syntaxerror, "SyntErr", red)); + parts.push(format_num!(num_solvererror, "SolvErr ", red)); + parts.push(format_num!(num_systemerror, "SysErr", red)); + parts.push(format!("Running: {running}")); self.status_line.set_message(parts.join(" | ")); } @@ -184,6 +196,9 @@ impl ProgressDisplay { JobResult::EmptySolution => { self.num_emptysolution.fetch_add(1, Ordering::AcqRel); } + JobResult::OutOfMemory => { + self.num_outofmemory.fetch_add(1, Ordering::AcqRel); + } } } diff --git a/src/job/job_processor.rs b/src/job/job_processor.rs index 47525db..6813a1b 100644 --- a/src/job/job_processor.rs +++ b/src/job/job_processor.rs @@ -18,6 +18,8 @@ use crate::{ use std::fmt::Display; use std::path::PathBuf; +pub const OOM_EXIT_CODE: i32 = 137; + #[derive(Error, Debug)] pub enum JobError { #[error("IO error: {0}")] @@ -93,6 +95,7 @@ pub enum JobResult { SystemError, SolverError, Timeout, + OutOfMemory, } impl JobResult { @@ -113,6 +116,7 @@ impl Display for JobResult { JobResult::SystemError => "SystemError", JobResult::SolverError => "SolverError", JobResult::Timeout => "Timeout", + JobResult::OutOfMemory => "OutOfMemory", }); write!(f, "{}", str) } @@ -127,6 +131,9 @@ pub struct JobProcessor { soft_timeout: Duration, grace_period: Duration, + #[builder(default)] + memory_limit_in_mib: Option, + #[builder(default)] solver_args: Vec, @@ -162,6 +169,10 @@ impl JobProcessor { self.grace_period } + pub fn memory_limit_in_mib(&self) -> Option { + self.memory_limit_in_mib + } + pub fn progress(&self) -> JobProgress { self.progress.load() } @@ -192,6 +203,7 @@ impl JobProcessor { .instance_path(self.instance_path.clone()) .working_dir(self.work_dir.clone()) .env(self.env_vars()) + .memory_limit_in_mib(self.memory_limit_in_mib) .timeout(self.soft_timeout) .grace(self.grace_period); @@ -210,7 +222,14 @@ impl JobProcessor { .expect("Convert solver path into String") .into(); - let mut args: Vec = vec!["p".into(), solver_path, "--".into()]; + let mut args: Vec = vec!["p".into(), solver_path]; + + if let Some(limit) = self.memory_limit_in_mib { + args.push("-m".into()); + args.push(limit.to_string()); + } + + args.push("--".into()); args.extend_from_slice(&self.solver_args); executor_builder.solver_path(profiler_path).args(args); @@ -242,6 +261,7 @@ impl JobProcessor { ChildExitStatus::Timeout | ChildExitStatus::WithinGrace(_) => { JobResult::Timeout } + ChildExitStatus::OutOfMemory => JobResult::OutOfMemory, }, None, )); diff --git a/src/job/solver_executor.rs b/src/job/solver_executor.rs index cb3c9a3..d1cea58 100644 --- a/src/job/solver_executor.rs +++ b/src/job/solver_executor.rs @@ -1,5 +1,6 @@ use std::{fs::File, io::Write, path::PathBuf, process::ExitStatus, time::Duration}; +use crate::job::job_processor::OOM_EXIT_CODE; use derive_builder::Builder; use thiserror::Error; use tokio::{ @@ -13,6 +14,7 @@ pub enum ChildExitStatus { BeforeTimeout(ExitStatus), WithinGrace(ExitStatus), Timeout, + OutOfMemory, } impl ChildExitStatus { @@ -21,6 +23,7 @@ impl ChildExitStatus { ChildExitStatus::BeforeTimeout(exit_status) => exit_status.success(), ChildExitStatus::WithinGrace(exit_status) => exit_status.success(), ChildExitStatus::Timeout => false, + ChildExitStatus::OutOfMemory => false, } } } @@ -45,6 +48,10 @@ pub struct SolverExecutor { timeout: Duration, grace: Duration, + #[allow(dead_code)] + #[builder(default)] + memory_limit_in_mib: Option, + #[builder(default)] runtime: Option, } @@ -103,7 +110,13 @@ impl SolverExecutor { // we get an error if we run into the timeout if let Ok(res) = timeout(self.timeout, child.wait()).await { trace!("Child terminated within time: {res:?}"); - return Ok(ChildExitStatus::BeforeTimeout(res?)); + + let res = res?; + if res.code().is_some_and(|c| c == OOM_EXIT_CODE) { + return Ok(ChildExitStatus::OutOfMemory); + } + + return Ok(ChildExitStatus::BeforeTimeout(res)); } debug!( @@ -124,7 +137,12 @@ impl SolverExecutor { if !self.grace.is_zero() && let Ok(res) = timeout(self.grace, child.wait()).await { - return Ok(ChildExitStatus::WithinGrace(res?)); + let res = res?; + if res.code().is_some_and(|c| c == OOM_EXIT_CODE) { + return Ok(ChildExitStatus::OutOfMemory); + } + + return Ok(ChildExitStatus::WithinGrace(res)); } debug!( diff --git a/tests/profile.rs b/tests/profile.rs index 4d8cc8c..0fa5519 100644 --- a/tests/profile.rs +++ b/tests/profile.rs @@ -15,13 +15,18 @@ fn test_stride_path() -> PathBuf { PathBuf::from(env!("CARGO_BIN_EXE_stride")) } -async fn run(instance: PathBuf, profiler: bool) -> (JobResult, HashMap) { +async fn run( + instance: PathBuf, + profiler: bool, + memory_limit: Option, +) -> (JobResult, HashMap) { let instance = test_testcases_dir().join(instance); let tempdir = TempDir::new("profile_test").unwrap(); let run_dir = RunDirectory::new_within(tempdir.path()).unwrap(); let work_dir = run_dir.create_task_dir_for(&instance).unwrap(); - let job = JobProcessorBuilder::default() + let mut builder = JobProcessorBuilder::default(); + builder .soft_timeout(Duration::from_secs_f64(1.5)) .grace_period(Duration::from_secs_f64(1.5)) .solver(test_solver_path()) @@ -30,8 +35,9 @@ async fn run(instance: PathBuf, profiler: bool) -> (JobResult, HashMap (JobResult, HashMap> 20; + + let (result, _) = run( + PathBuf::from("test_solver_valid/valid.in"), + true, + Some(memory_in_mib + 10), + ) + .await; + assert_eq!(result, JobResult::Valid { size: 2 }); + + let (result, _) = run( + PathBuf::from("test_solver_valid/alloc50mb.in"), + true, + Some(30 + memory_in_mib), + ) + .await; + assert!(result == JobResult::SolverError || result == JobResult::OutOfMemory); +}