diff --git a/.github/workflows/Benchmarks.yml b/.github/workflows/Benchmarks.yml index 45b782025..ad95313db 100644 --- a/.github/workflows/Benchmarks.yml +++ b/.github/workflows/Benchmarks.yml @@ -28,7 +28,7 @@ jobs: - uses: actions/checkout@v5 - - uses: hyperlight-dev/ci-setup-workflow@v1.6.0 + - uses: hyperlight-dev/ci-setup-workflow@v1.8.0 with: rust-toolchain: "1.86" env: diff --git a/.github/workflows/CargoPublish.yml b/.github/workflows/CargoPublish.yml index a9e77f08e..0c4ad8304 100644 --- a/.github/workflows/CargoPublish.yml +++ b/.github/workflows/CargoPublish.yml @@ -32,7 +32,7 @@ jobs: fetch-depth: 0 fetch-tags: true - - uses: hyperlight-dev/ci-setup-workflow@v1.6.0 + - uses: hyperlight-dev/ci-setup-workflow@v1.8.0 with: rust-toolchain: "1.86" diff --git a/.github/workflows/CreateRelease.yml b/.github/workflows/CreateRelease.yml index 2adc5069d..07e8e6da2 100644 --- a/.github/workflows/CreateRelease.yml +++ b/.github/workflows/CreateRelease.yml @@ -31,7 +31,7 @@ jobs: steps: - uses: actions/checkout@v5 - - uses: hyperlight-dev/ci-setup-workflow@v1.6.0 + - uses: hyperlight-dev/ci-setup-workflow@v1.8.0 with: rust-toolchain: "1.86" env: @@ -52,7 +52,7 @@ jobs: steps: - uses: actions/checkout@v5 - - uses: hyperlight-dev/ci-setup-workflow@v1.6.0 + - uses: hyperlight-dev/ci-setup-workflow@v1.8.0 with: rust-toolchain: "1.86" env: @@ -112,7 +112,7 @@ jobs: fetch-depth: 0 fetch-tags: true - - uses: hyperlight-dev/ci-setup-workflow@v1.6.0 + - uses: hyperlight-dev/ci-setup-workflow@v1.8.0 with: rust-toolchain: "1.86" env: diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml index 6abf73e74..e50e7792c 100644 --- a/.github/workflows/copilot-setup-steps.yml +++ b/.github/workflows/copilot-setup-steps.yml @@ -26,7 +26,7 @@ jobs: with: components: rustfmt - - uses: hyperlight-dev/ci-setup-workflow@v1.6.0 + - uses: hyperlight-dev/ci-setup-workflow@v1.8.0 with: rust-toolchain: "1.86" env: diff --git a/.github/workflows/dep_build_guest_binaries.yml b/.github/workflows/dep_build_guest_binaries.yml index 89ebf4dca..9af15774e 100644 --- a/.github/workflows/dep_build_guest_binaries.yml +++ b/.github/workflows/dep_build_guest_binaries.yml @@ -31,7 +31,7 @@ jobs: steps: - uses: actions/checkout@v5 - - uses: hyperlight-dev/ci-setup-workflow@v1.6.0 + - uses: hyperlight-dev/ci-setup-workflow@v1.8.0 with: rust-toolchain: "1.86" env: diff --git a/.github/workflows/dep_fuzzing.yml b/.github/workflows/dep_fuzzing.yml index 95008efd4..4386cc58a 100644 --- a/.github/workflows/dep_fuzzing.yml +++ b/.github/workflows/dep_fuzzing.yml @@ -32,7 +32,7 @@ jobs: - name: Checkout code uses: actions/checkout@v5 - - uses: hyperlight-dev/ci-setup-workflow@v1.6.0 + - uses: hyperlight-dev/ci-setup-workflow@v1.8.0 with: rust-toolchain: "1.86" env: diff --git a/.github/workflows/dep_rust.yml b/.github/workflows/dep_rust.yml index b93042c50..25777ca43 100644 --- a/.github/workflows/dep_rust.yml +++ b/.github/workflows/dep_rust.yml @@ -46,7 +46,7 @@ jobs: steps: - uses: actions/checkout@v5 - - uses: hyperlight-dev/ci-setup-workflow@v1.6.0 + - uses: hyperlight-dev/ci-setup-workflow@v1.8.0 with: rust-toolchain: "1.86" env: @@ -98,7 +98,7 @@ jobs: with: components: rustfmt - - uses: hyperlight-dev/ci-setup-workflow@v1.6.0 + - uses: hyperlight-dev/ci-setup-workflow@v1.8.0 with: rust-toolchain: "1.86" env: diff --git a/CHANGELOG.md b/CHANGELOG.md index a774369a4..24def0858 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Prerelease] - Unreleased +## [v0.9.1] - 2025-10-29 + +### Fixed + +- Fix race condition when killing sandboxes by @simongdavies in https://github.com/hyperlight-dev/hyperlight/pull/959 and https://github.com/hyperlight-dev/hyperlight/pull/994 + +### Changed + +- Added poison sandbox detection by @ludfjig in https://github.com/hyperlight-dev/hyperlight/pull/931 + ## [v0.9.0] - 2025-08-28 ### Fixed @@ -188,7 +198,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). The Initial Hyperlight Release šŸŽ‰ -[Prerelease]: +[Prerelease]: +[v0.9.1]: [v0.9.0]: [v0.8.0]: [v0.7.0]: diff --git a/Cargo.lock b/Cargo.lock index 9770b25e7..7049437ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1344,7 +1344,7 @@ dependencies = [ [[package]] name = "hyperlight-common" -version = "0.9.0" +version = "0.9.1" dependencies = [ "anyhow", "arbitrary", @@ -1357,7 +1357,7 @@ dependencies = [ [[package]] name = "hyperlight-component-macro" -version = "0.9.0" +version = "0.9.1" dependencies = [ "env_logger", "hyperlight-component-util", @@ -1371,7 +1371,7 @@ dependencies = [ [[package]] name = "hyperlight-component-util" -version = "0.9.0" +version = "0.9.1" dependencies = [ "itertools 0.14.0", "log", @@ -1393,7 +1393,7 @@ dependencies = [ [[package]] name = "hyperlight-guest" -version = "0.9.0" +version = "0.9.1" dependencies = [ "anyhow", "flatbuffers", @@ -1404,7 +1404,7 @@ dependencies = [ [[package]] name = "hyperlight-guest-bin" -version = "0.9.0" +version = "0.9.1" dependencies = [ "buddy_system_allocator", "cc", @@ -1419,7 +1419,7 @@ dependencies = [ [[package]] name = "hyperlight-guest-tracing" -version = "0.9.0" +version = "0.9.1" dependencies = [ "hyperlight-common", "hyperlight-guest-tracing-macro", @@ -1428,7 +1428,7 @@ dependencies = [ [[package]] name = "hyperlight-guest-tracing-macro" -version = "0.9.0" +version = "0.9.1" dependencies = [ "proc-macro2", "quote", @@ -1437,7 +1437,7 @@ dependencies = [ [[package]] name = "hyperlight-host" -version = "0.9.0" +version = "0.9.1" dependencies = [ "anyhow", "bitflags 2.9.3", @@ -1527,7 +1527,7 @@ dependencies = [ [[package]] name = "hyperlight_guest_capi" -version = "0.9.0" +version = "0.9.1" dependencies = [ "cbindgen", "hyperlight-common", diff --git a/Cargo.toml b/Cargo.toml index d5f9a4092..c714bc62d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,7 @@ exclude = [ ] [workspace.package] -version = "0.9.0" +version = "0.9.1" edition = "2024" rust-version = "1.86" license = "Apache-2.0" diff --git a/src/hyperlight_host/src/error.rs b/src/hyperlight_host/src/error.rs index d2fb382a0..1192737e4 100644 --- a/src/hyperlight_host/src/error.rs +++ b/src/hyperlight_host/src/error.rs @@ -196,6 +196,27 @@ pub enum HyperlightError { #[error("Failure processing PE File {0:?}")] PEFileProcessingFailure(#[from] goblin::error::Error), + /// The sandbox becomes **poisoned** when the guest is not run to completion, leaving it in + /// an inconsistent state that could compromise memory safety, data integrity, or security. + /// + /// ### When Does Poisoning Occur? + /// + /// Poisoning happens when guest execution is interrupted before normal completion: + /// + /// - **Guest panics or aborts** - When a guest function panics, crashes, or calls `abort()`, + /// the normal cleanup and unwinding process is interrupted + /// - **Invalid memory access** - Attempts to read/write/execute memory outside allowed regions + /// - **Stack overflow** - Guest exhausts its stack space during execution + /// - **Heap exhaustion** - Guest runs out of heap memory + /// - **Host-initiated cancellation** - Calling [`InterruptHandle::kill()`] to forcefully + /// terminate an in-progress guest function + /// + /// ## Recovery + /// + /// Use [`crate::MultiUseSandbox::restore()`] to recover from a poisoned sandbox. + #[error("The sandbox was poisoned")] + PoisonedSandbox, + /// Raw pointer is less than base address #[error("Raw pointer ({0:?}) was less than the base address ({1})")] RawPointerLessThanBaseAddress(RawPtr, u64), @@ -301,6 +322,96 @@ impl From>> for HyperlightError { } } +impl HyperlightError { + /// Internal helper to determines if the given error has potential to poison the sandbox. + /// + /// Errors that poison the sandbox are those that can leave the sandbox in an inconsistent + /// state where memory, resources, or data structures may be corrupted or leaked. Usually + /// due to the guest not running to completion. + /// + /// If this method returns `true`, the sandbox will be poisoned and all further operations + /// will fail until the sandbox is restored from a non-poisoned snapshot using + /// [`crate::MultiUseSandbox::restore()`]. + pub(crate) fn is_poison_error(&self) -> bool { + // wildcard _ or matches! not used here purposefully to ensure that new error variants + // are explicitly considered for poisoning behavior. + match self { + // These errors poison the sandbox because they can leave it in an inconsistent state due + // to the guest not running to completion. + HyperlightError::GuestAborted(_, _) + | HyperlightError::ExecutionCanceledByHost() + | HyperlightError::PoisonedSandbox + | HyperlightError::ExecutionAccessViolation(_) + | HyperlightError::StackOverflow() + | HyperlightError::MemoryAccessViolation(_, _, _) => true, + #[cfg(all(feature = "seccomp", target_os = "linux"))] + HyperlightError::DisallowedSyscall => true, + + // All other errors do not poison the sandbox. + HyperlightError::AnyhowError(_) + | HyperlightError::BoundsCheckFailed(_, _) + | HyperlightError::CheckedAddOverflow(_, _) + | HyperlightError::CStringConversionError(_) + | HyperlightError::Error(_) + | HyperlightError::FailedToGetValueFromParameter() + | HyperlightError::FieldIsMissingInGuestLogData(_) + | HyperlightError::GuestError(_, _) + | HyperlightError::GuestExecutionHungOnHostFunctionCall() + | HyperlightError::GuestFunctionCallAlreadyInProgress() + | HyperlightError::GuestInterfaceUnsupportedType(_) + | HyperlightError::GuestOffsetIsInvalid(_) + | HyperlightError::HostFunctionNotFound(_) + | HyperlightError::IOError(_) + | HyperlightError::IntConversionFailure(_) + | HyperlightError::InvalidFlatBuffer(_) + | HyperlightError::JsonConversionFailure(_) + | HyperlightError::LockAttemptFailed(_) + | HyperlightError::MemoryAllocationFailed(_) + | HyperlightError::MemoryProtectionFailed(_) + | HyperlightError::MemoryRequestTooBig(_, _) + | HyperlightError::MetricNotFound(_) + | HyperlightError::MmapFailed(_) + | HyperlightError::MprotectFailed(_) + | HyperlightError::NoHypervisorFound() + | HyperlightError::NoMemorySnapshot + | HyperlightError::ParameterValueConversionFailure(_, _) + | HyperlightError::PEFileProcessingFailure(_) + | HyperlightError::RawPointerLessThanBaseAddress(_, _) + | HyperlightError::RefCellBorrowFailed(_) + | HyperlightError::RefCellMutBorrowFailed(_) + | HyperlightError::ReturnValueConversionFailure(_, _) + | HyperlightError::SnapshotSandboxMismatch + | HyperlightError::SystemTimeError(_) + | HyperlightError::TryFromSliceError(_) + | HyperlightError::UnexpectedNoOfArguments(_, _) + | HyperlightError::UnexpectedParameterValueType(_, _) + | HyperlightError::UnexpectedReturnValueType(_, _) + | HyperlightError::UTF8StringConversionFailure(_) + | HyperlightError::VectorCapacityIncorrect(_, _, _) => false, + + #[cfg(target_os = "windows")] + HyperlightError::CrossBeamReceiveError(_) => false, + #[cfg(target_os = "windows")] + HyperlightError::CrossBeamSendError(_) => false, + #[cfg(target_os = "windows")] + HyperlightError::WindowsAPIError(_) => false, + #[cfg(target_os = "linux")] + HyperlightError::VmmSysError(_) => false, + #[cfg(kvm)] + HyperlightError::KVMError(_) => false, + #[cfg(mshv)] + HyperlightError::MSHVError(_) => false, + #[cfg(gdb)] + HyperlightError::TranslateGuestAddress(_) => false, + #[cfg(all(feature = "seccomp", target_os = "linux"))] + HyperlightError::SeccompFilterError(_) => false, + + #[cfg(all(feature = "seccomp", target_os = "linux"))] + HyperlightError::SeccompFilterBackendError(_) => false, + } + } +} + /// Creates a `HyperlightError::Error` from a string literal or format string #[macro_export] macro_rules! new_error { diff --git a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs index 1907d8e0b..075a53385 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs @@ -70,7 +70,7 @@ use super::{ CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, EFER_SCE, }; -use super::{HyperlightExit, Hypervisor, InterruptHandle, LinuxInterruptHandle, VirtualCPU}; +use super::{HyperlightExit, Hypervisor, LinuxInterruptHandle, VirtualCPU}; #[cfg(gdb)] use crate::HyperlightError; use crate::hypervisor::get_memory_access_violation; @@ -419,7 +419,8 @@ impl HypervLinuxDriver { let interrupt_handle = Arc::new(LinuxInterruptHandle { running: AtomicU64::new(0), - cancel_requested: AtomicBool::new(false), + cancel_requested: AtomicU64::new(0), + call_active: AtomicBool::new(false), #[cfg(gdb)] debug_interrupt: AtomicBool::new(false), #[cfg(all( @@ -761,17 +762,18 @@ impl Hypervisor for HypervLinuxDriver { self.interrupt_handle .tid - .store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed); - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call - self.interrupt_handle - .set_running_and_increment_generation() - .map_err(|e| { - new_error!( - "Error setting running state and incrementing generation: {}", - e - ) - })?; + .store(unsafe { libc::pthread_self() as u64 }, Ordering::Release); + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // Cast to internal trait for access to internal methods + let interrupt_handle_internal = + self.interrupt_handle.as_ref() as &dyn super::InterruptHandleInternal; + + // (after set_running_bit but before checking cancel_requested): + // - kill() will stamp cancel_requested with the current generation + // - We will check cancel_requested below and skip the VcpuFd::run() call + // - This is the desired behavior - the kill takes effect immediately + let generation = interrupt_handle_internal.set_running_bit(); + #[cfg(not(gdb))] let debug_interrupt = false; #[cfg(gdb)] @@ -780,14 +782,15 @@ impl Hypervisor for HypervLinuxDriver { .debug_interrupt .load(Ordering::Relaxed); - // Don't run the vcpu if `cancel_requested` is true + // Don't run the vcpu if `cancel_requested` is set for our generation // - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call - let exit_reason = if self - .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed) + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after checking cancel_requested but before vcpu.run()): + // - kill() will stamp cancel_requested with the current generation + // - We will proceed with vcpu.run(), but signals will be sent to interrupt it + // - The vcpu will be interrupted and return EINTR (handled below) + let exit_reason = if interrupt_handle_internal + .is_cancel_requested_for_generation(generation) || debug_interrupt { Err(mshv_ioctls::MshvError::from(libc::EINTR)) @@ -813,27 +816,31 @@ impl Hypervisor for HypervLinuxDriver { #[cfg(mshv3)] self.vcpu_fd.run() }; - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then signals will be sent to this thread until `running` is set to false. - // This is fine since the signal handler is a no-op. - let cancel_requested = self - .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed); + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after vcpu.run() returns but before clear_running_bit): + // - kill() continues sending signals to this thread (running bit is still set) + // - The signals are harmless (no-op handler), we just need to check cancel_requested + // - We load cancel_requested below to determine if this run was cancelled + let cancel_requested = + interrupt_handle_internal.is_cancel_requested_for_generation(generation); #[cfg(gdb)] let debug_interrupt = self .interrupt_handle .debug_interrupt .load(Ordering::Relaxed); - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**. - // Additionally signals will be sent to this thread until `running` is set to false. - // This is fine since the signal handler is a no-op. - self.interrupt_handle.clear_running_bit(); - // At this point, `running` is false so no more signals will be sent to this thread, - // but we may still receive async signals that were sent before this point. - // To prevent those signals from interrupting subsequent calls to `run()`, - // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below). + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after loading cancel_requested but before clear_running_bit): + // - kill() stamps cancel_requested with the CURRENT generation (not the one we just loaded) + // - kill() continues sending signals until running bit is cleared + // - The newly stamped cancel_requested will affect the NEXT vcpu.run() call + // - Signals sent now are harmless (no-op handler) + interrupt_handle_internal.clear_running_bit(); + // At this point, running bit is clear so kill() will stop sending signals. + // However, we may still receive delayed signals that were sent before clear_running_bit. + // These stale signals are harmless because: + // - The signal handler is a no-op + // - We check generation matching in cancel_requested before treating EINTR as cancellation + // - If generation doesn't match, we return Retry instead of Cancelled let result = match exit_reason { Ok(m) => match m.header.message_type { HALT_MESSAGE => { @@ -913,14 +920,16 @@ impl Hypervisor for HypervLinuxDriver { } }, Err(e) => match e.errno() { - // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled + // We send a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR libc::EINTR => { - // If cancellation was not requested for this specific vm, the vcpu was interrupted because of debug interrupt or - // a stale signal that meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it + // Check if cancellation was requested for THIS specific generation. + // If not, the EINTR came from: + // - A debug interrupt (if GDB is enabled) + // - A stale signal from a previous guest call (generation mismatch) + // - A signal meant for a different sandbox on the same thread + // In these cases, we return Retry to continue execution. if cancel_requested { - self.interrupt_handle - .cancel_requested - .store(false, Ordering::Relaxed); + interrupt_handle_internal.clear_cancel_requested(); HyperlightExit::Cancelled() } else { #[cfg(gdb)] @@ -956,7 +965,7 @@ impl Hypervisor for HypervLinuxDriver { self as &mut dyn Hypervisor } - fn interrupt_handle(&self) -> Arc { + fn interrupt_handle(&self) -> Arc { self.interrupt_handle.clone() } diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index f8b4727d1..05e7b65e7 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -17,7 +17,7 @@ limitations under the License. use std::fmt; use std::fmt::{Debug, Formatter}; use std::string::String; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; use log::LevelFilter; @@ -55,8 +55,8 @@ use super::{ }; use super::{HyperlightExit, Hypervisor, InterruptHandle, VirtualCPU}; use crate::hypervisor::fpu::FP_CONTROL_WORD_DEFAULT; -use crate::hypervisor::get_memory_access_violation; use crate::hypervisor::wrappers::WHvGeneralRegisters; +use crate::hypervisor::{InterruptHandleInternal, get_memory_access_violation}; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::mem::ptr::{GuestPtr, RawPtr}; use crate::mem::shared_mem::HostSharedMemory; @@ -344,10 +344,11 @@ impl HypervWindowsDriver { }; let interrupt_handle = Arc::new(WindowsInterruptHandle { - running: AtomicBool::new(false), - cancel_requested: AtomicBool::new(false), + running: AtomicU64::new(0), + cancel_requested: AtomicU64::new(0), #[cfg(gdb)] debug_interrupt: AtomicBool::new(false), + call_active: AtomicBool::new(false), partition_handle, dropped: AtomicBool::new(false), }); @@ -736,7 +737,12 @@ impl Hypervisor for HypervWindowsDriver { #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] fn run(&mut self) -> Result { - self.interrupt_handle.running.store(true, Ordering::Relaxed); + // Cast to internal trait for access to internal methods + let interrupt_handle_internal = + self.interrupt_handle.as_ref() as &dyn super::InterruptHandleInternal; + + // Get current generation and set running bit + let generation = interrupt_handle_internal.set_running_bit(); #[cfg(not(gdb))] let debug_interrupt = false; @@ -746,11 +752,9 @@ impl Hypervisor for HypervWindowsDriver { .debug_interrupt .load(Ordering::Relaxed); - // Don't run the vcpu if `cancel_requested` is true - let exit_context = if self - .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed) + // Check if cancellation was requested for THIS generation + let exit_context = if interrupt_handle_internal + .is_cancel_requested_for_generation(generation) || debug_interrupt { WHV_RUN_VP_EXIT_CONTEXT { @@ -770,12 +774,20 @@ impl Hypervisor for HypervWindowsDriver { } self.processor.run()? }; - self.interrupt_handle - .cancel_requested - .store(false, Ordering::Relaxed); - self.interrupt_handle - .running - .store(false, Ordering::Relaxed); + + // Clear running bit + interrupt_handle_internal.clear_running_bit(); + + let is_canceled = exit_context.ExitReason == WHV_RUN_VP_EXIT_REASON(8193i32); // WHvRunVpExitReasonCanceled + + // Check if this was a manual cancellation (vs internal Windows cancellation) + let cancel_was_requested_manually = + interrupt_handle_internal.is_cancel_requested_for_generation(generation); + + // Only clear cancel_requested if we're actually processing a cancellation for this generation + if is_canceled && cancel_was_requested_manually { + interrupt_handle_internal.clear_cancel_requested(); + } #[cfg(gdb)] let debug_interrupt = self @@ -851,12 +863,32 @@ impl Hypervisor for HypervWindowsDriver { // return a special exit reason so that the gdb thread can handle it // and resume execution HyperlightExit::Debug(VcpuStopReason::Interrupt) + } else if !cancel_was_requested_manually { + // This was an internal cancellation + // The virtualization stack can use this function to return the control + // of a virtual processor back to the virtualization stack in case it + // needs to change the state of a VM or to inject an event into the processor + // see https://learn.microsoft.com/en-us/virtualization/api/hypervisor-platform/funcs/whvcancelrunvirtualprocessor#remarks + debug!("Internal cancellation detected, returning Retry error"); + HyperlightExit::Retry() } else { HyperlightExit::Cancelled() } #[cfg(not(gdb))] - HyperlightExit::Cancelled() + { + if !cancel_was_requested_manually { + // This was an internal cancellation + // The virtualization stack can use this function to return the control + // of a virtual processor back to the virtualization stack in case it + // needs to change the state of a VM or to inject an event into the processor + // see https://learn.microsoft.com/en-us/virtualization/api/hypervisor-platform/funcs/whvcancelrunvirtualprocessor#remarks + debug!("Internal cancellation detected, returning Retry error"); + HyperlightExit::Retry() + } else { + HyperlightExit::Cancelled() + } + } } #[cfg(gdb)] WHV_RUN_VP_EXIT_REASON(4098i32) => { @@ -885,7 +917,7 @@ impl Hypervisor for HypervWindowsDriver { Ok(result) } - fn interrupt_handle(&self) -> Arc { + fn interrupt_handle(&self) -> Arc { self.interrupt_handle.clone() } @@ -1123,30 +1155,80 @@ impl Drop for HypervWindowsDriver { #[derive(Debug)] pub struct WindowsInterruptHandle { - // `WHvCancelRunVirtualProcessor()` will return Ok even if the vcpu is not running, which is the reason we need this flag. - running: AtomicBool, - cancel_requested: AtomicBool, + /// Combined running flag (bit 63) and generation counter (bits 0-62). + /// + /// The generation increments with each guest function call to prevent + /// stale cancellations from affecting new calls (ABA problem). + /// + /// Layout: `[running:1 bit][generation:63 bits]` + running: AtomicU64, + + /// Combined cancel_requested flag (bit 63) and generation counter (bits 0-62). + /// + /// When kill() is called, this stores the current generation along with + /// the cancellation flag. The VCPU only honors the cancellation if the + /// generation matches its current generation. + /// + /// Layout: `[cancel_requested:1 bit][generation:63 bits]` + cancel_requested: AtomicU64, + // This is used to signal the GDB thread to stop the vCPU #[cfg(gdb)] debug_interrupt: AtomicBool, + /// Flag indicating whether a guest function call is currently in progress. + /// + /// **true**: A guest function call is active (between call start and completion) + /// **false**: No guest function call is active + /// + /// # Purpose + /// + /// This flag prevents kill() from having any effect when called outside of a + /// guest function call. This solves the "kill-in-advance" problem where kill() + /// could be called before a guest function starts and would incorrectly cancel it. + call_active: AtomicBool, partition_handle: WHV_PARTITION_HANDLE, dropped: AtomicBool, } impl InterruptHandle for WindowsInterruptHandle { fn kill(&self) -> bool { - self.cancel_requested.store(true, Ordering::Relaxed); - self.running.load(Ordering::Relaxed) - && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } + // Check if a call is actually active first + if !self.call_active.load(Ordering::Acquire) { + return false; + } + + // Get the current running state and generation + let (running, generation) = self.get_running_and_generation(); + + // Set cancel_requested with the current generation + self.set_cancel_requested(generation); + + // Only call WHvCancelRunVirtualProcessor if VCPU is actually running in guest mode + running && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } } + #[cfg(gdb)] fn kill_from_debugger(&self) -> bool { self.debug_interrupt.store(true, Ordering::Relaxed); - self.running.load(Ordering::Relaxed) - && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } + let (running, _) = self.get_running_and_generation(); + running && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } } fn dropped(&self) -> bool { self.dropped.load(Ordering::Relaxed) } } + +impl InterruptHandleInternal for WindowsInterruptHandle { + fn get_call_active(&self) -> &AtomicBool { + &self.call_active + } + + fn get_running(&self) -> &AtomicU64 { + &self.running + } + + fn get_cancel_requested(&self) -> &AtomicU64 { + &self.cancel_requested + } +} diff --git a/src/hyperlight_host/src/hypervisor/kvm.rs b/src/hyperlight_host/src/hypervisor/kvm.rs index be76129fb..75b6841b8 100644 --- a/src/hyperlight_host/src/hypervisor/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/kvm.rs @@ -37,7 +37,7 @@ use super::{ CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, EFER_SCE, }; -use super::{HyperlightExit, Hypervisor, InterruptHandle, LinuxInterruptHandle, VirtualCPU}; +use super::{HyperlightExit, Hypervisor, LinuxInterruptHandle, VirtualCPU}; #[cfg(gdb)] use crate::HyperlightError; use crate::hypervisor::get_memory_access_violation; @@ -356,7 +356,8 @@ impl KVMDriver { let interrupt_handle = Arc::new(LinuxInterruptHandle { running: AtomicU64::new(0), - cancel_requested: AtomicBool::new(false), + cancel_requested: AtomicU64::new(0), + call_active: AtomicBool::new(false), #[cfg(gdb)] debug_interrupt: AtomicBool::new(false), #[cfg(all( @@ -664,17 +665,18 @@ impl Hypervisor for KVMDriver { fn run(&mut self) -> Result { self.interrupt_handle .tid - .store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed); - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call - self.interrupt_handle - .set_running_and_increment_generation() - .map_err(|e| { - new_error!( - "Error setting running state and incrementing generation: {}", - e - ) - })?; + .store(unsafe { libc::pthread_self() as u64 }, Ordering::Release); + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // Cast to internal trait for access to internal methods + let interrupt_handle_internal = + self.interrupt_handle.as_ref() as &dyn super::InterruptHandleInternal; + + // (after set_running_bit but before checking cancel_requested): + // - kill() will stamp cancel_requested with the current generation + // - We will check cancel_requested below and skip the VcpuFd::run() call + // - This is the desired behavior - the kill takes effect immediately + let generation = interrupt_handle_internal.set_running_bit(); + #[cfg(not(gdb))] let debug_interrupt = false; #[cfg(gdb)] @@ -682,14 +684,15 @@ impl Hypervisor for KVMDriver { .interrupt_handle .debug_interrupt .load(Ordering::Relaxed); - // Don't run the vcpu if `cancel_requested` is true + // Don't run the vcpu if `cancel_requested` is set for our generation // - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call - let exit_reason = if self - .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed) + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after checking cancel_requested but before vcpu.run()): + // - kill() will stamp cancel_requested with the current generation + // - We will proceed with vcpu.run(), but signals will be sent to interrupt it + // - The vcpu will be interrupted and return EINTR (handled below) + let exit_reason = if interrupt_handle_internal + .is_cancel_requested_for_generation(generation) || debug_interrupt { Err(kvm_ioctls::Error::new(libc::EINTR)) @@ -703,34 +706,39 @@ impl Hypervisor for KVMDriver { Some(hyperlight_guest_tracing::invariant_tsc::read_tsc()); } - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then the vcpu will run, but we will keep sending signals to this thread - // to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will - // return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error, - // both of which are fine. + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (during vcpu.run() execution): + // - kill() stamps cancel_requested with the current generation + // - kill() sends signals (SIGRTMIN+offset) to this thread repeatedly + // - The signal handler is a no-op, but it causes vcpu.run() to return EINTR + // - We check cancel_requested below and return Cancelled if generation matches self.vcpu_fd.run() }; - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then signals will be sent to this thread until `running` is set to false. - // This is fine since the signal handler is a no-op. - let cancel_requested = self - .interrupt_handle - .cancel_requested - .load(Ordering::Relaxed); + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after vcpu.run() returns but before clear_running_bit): + // - kill() continues sending signals to this thread (running bit is still set) + // - The signals are harmless (no-op handler), we just need to check cancel_requested + // - We load cancel_requested below to determine if this run was cancelled + let cancel_requested = + interrupt_handle_internal.is_cancel_requested_for_generation(generation); #[cfg(gdb)] let debug_interrupt = self .interrupt_handle .debug_interrupt .load(Ordering::Relaxed); - // Note: if a `InterruptHandle::kill()` called while this thread is **here** - // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**. - // Additionally signals will be sent to this thread until `running` is set to false. - // This is fine since the signal handler is a no-op. - self.interrupt_handle.clear_running_bit(); - // At this point, `running` is false so no more signals will be sent to this thread, - // but we may still receive async signals that were sent before this point. - // To prevent those signals from interrupting subsequent calls to `run()` (on other vms!), - // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below). + // Note: if `InterruptHandle::kill()` is called while this thread is **here** + // (after loading cancel_requested but before clear_running_bit): + // - kill() stamps cancel_requested with the CURRENT generation (not the one we just loaded) + // - kill() continues sending signals until running bit is cleared + // - The newly stamped cancel_requested will affect the NEXT vcpu.run() call + // - Signals sent now are harmless (no-op handler) + interrupt_handle_internal.clear_running_bit(); + // At this point, running bit is clear so kill() will stop sending signals. + // However, we may still receive delayed signals that were sent before clear_running_bit. + // These stale signals are harmless because: + // - The signal handler is a no-op + // - We check generation matching in cancel_requested before treating EINTR as cancellation + // - If generation doesn't match, we return Retry instead of Cancelled let result = match exit_reason { Ok(VcpuExit::Hlt) => { crate::debug!("KVM - Halt Details : {:#?}", &self); @@ -779,14 +787,16 @@ impl Hypervisor for KVMDriver { } }, Err(e) => match e.errno() { - // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled + // We send a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR libc::EINTR => { - // If cancellation was not requested for this specific vm, the vcpu was interrupted because of debug interrupt or - // a stale signal that meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it + // Check if cancellation was requested for THIS specific generation. + // If not, the EINTR came from: + // - A debug interrupt (if GDB is enabled) + // - A stale signal from a previous guest call (generation mismatch) + // - A signal meant for a different sandbox on the same thread + // In these cases, we return Retry to continue execution. if cancel_requested { - self.interrupt_handle - .cancel_requested - .store(false, Ordering::Relaxed); + interrupt_handle_internal.clear_cancel_requested(); HyperlightExit::Cancelled() } else { #[cfg(gdb)] @@ -827,7 +837,7 @@ impl Hypervisor for KVMDriver { self as &mut dyn Hypervisor } - fn interrupt_handle(&self) -> Arc { + fn interrupt_handle(&self) -> Arc { self.interrupt_handle.clone() } diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 4e55e7ac5..7969bb44a 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -use log::LevelFilter; +use log::{LevelFilter, debug}; use tracing::{Span, instrument}; use crate::HyperlightError::StackOverflow; @@ -61,7 +61,6 @@ pub(crate) mod crashdump; use std::fmt::Debug; use std::str::FromStr; -#[cfg(any(kvm, mshv))] use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; #[cfg(any(kvm, mshv))] @@ -112,7 +111,9 @@ pub enum HyperlightExit { Cancelled(), /// The vCPU has exited for a reason that is not handled by Hyperlight Unknown(String), - /// The operation should be retried, for example this can happen on Linux where a call to run the CPU can return EAGAIN + /// The operation should be retried + /// On Linux this can happen where a call to run the CPU can return EAGAIN + /// On Windows the platform could cause a cancelation of the VM run Retry(), } @@ -186,8 +187,8 @@ pub(crate) trait Hypervisor: Debug + Send { /// Run the vCPU fn run(&mut self) -> Result; - /// Get InterruptHandle to underlying VM - fn interrupt_handle(&self) -> Arc; + /// Get InterruptHandle to underlying VM (returns internal trait) + fn interrupt_handle(&self) -> Arc; /// Get the logging level to pass to the guest entrypoint fn get_max_log_level(&self) -> u32 { @@ -350,7 +351,10 @@ impl VirtualCPU { log_then_return!("Unexpected VM Exit {:?}", reason); } - Ok(HyperlightExit::Retry()) => continue, + Ok(HyperlightExit::Retry()) => { + debug!("[VCPU] Retry - continuing VM run loop"); + continue; + } Err(e) => { #[cfg(crashdump)] crashdump::generate_crashdump(hv)?; @@ -368,17 +372,54 @@ impl VirtualCPU { } } -/// A trait for handling interrupts to a sandbox's vcpu +/// A trait for handling interrupts to a sandbox's vcpu (public API) pub trait InterruptHandle: Debug + Send + Sync { /// Interrupt the corresponding sandbox from running. /// - /// - If this is called while the vcpu is running, then it will interrupt the vcpu and return `true`. - /// - If this is called while the vcpu is not running, (for example during a host call), the - /// vcpu will not immediately be interrupted, but will prevent the vcpu from running **the next time** - /// it's scheduled, and returns `false`. + /// This method attempts to cancel a currently executing guest function call by sending + /// a signal to the VCPU thread. It uses generation tracking and call_active flag to + /// ensure the interruption is safe and precise. + /// + /// # Behavior + /// + /// - **Guest function running**: If called while a guest function is executing (VCPU running + /// or in a host function call), this stamps the current generation into cancel_requested + /// and sends a signal to interrupt the VCPU. Returns `true`. + /// + /// - **No active call**: If called when no guest function call is in progress (call_active=false), + /// this has no effect and returns `false`. This prevents "kill-in-advance" where kill() + /// is called before a guest function starts. + /// + /// - **During host function**: If the guest call is currently executing a host function + /// (VCPU not running but call_active=true), this stamps cancel_requested. When the + /// host function returns and attempts to re-enter the guest, the cancellation will + /// be detected and the call will abort. Returns `true`. + /// + /// # Generation Tracking + /// + /// The method stamps the current generation number along with the cancellation request. + /// This ensures that: + /// - Stale signals from previous calls are ignored (generation mismatch) + /// - Only the intended guest function call is affected + /// - Multiple rapid kill() calls on the same generation are idempotent + /// + /// # Blocking Behavior + /// + /// This function will block while attempting to deliver the signal to the VCPU thread, + /// retrying until either: + /// - The signal is successfully delivered (VCPU transitions from running to not running) + /// - The VCPU stops running for another reason (e.g., call completes normally) + /// + /// # Returns + /// + /// - `true`: Cancellation request was stamped (kill will take effect) + /// - `false`: No active call, cancellation request was not stamped (no effect) /// /// # Note - /// This function will block for the duration of the time it takes for the vcpu thread to be interrupted. + /// + /// To reliably interrupt a guest call, ensure `kill()` is called while the guest + /// function is actually executing. Calling kill() before call_guest_function() will + /// have no effect. fn kill(&self) -> bool; /// Used by a debugger to interrupt the corresponding sandbox from running. @@ -393,94 +434,309 @@ pub trait InterruptHandle: Debug + Send + Sync { #[cfg(gdb)] fn kill_from_debugger(&self) -> bool; - /// Returns true if the corresponding sandbox has been dropped + /// Check if the corresponding VM has been dropped. fn dropped(&self) -> bool; } +/// Internal trait for interrupt handle implementation details (private, cross-platform). +/// +/// This trait contains all the internal atomics access methods and helper functions +/// that are shared between Linux and Windows implementations. It extends InterruptHandle +/// to inherit the public API. +/// +/// This trait should NOT be used outside of hypervisor implementations. +pub(crate) trait InterruptHandleInternal: InterruptHandle { + /// Returns the call_active atomic bool reference for internal implementations. + fn get_call_active(&self) -> &AtomicBool; + + /// Returns the running atomic u64 reference for internal implementations. + fn get_running(&self) -> &AtomicU64; + + /// Returns the cancel_requested atomic u64 reference for internal implementations. + fn get_cancel_requested(&self) -> &AtomicU64; + + /// Set call_active - increments generation and sets flag. + /// + /// Increments the generation counter and sets the call_active flag to true, + /// indicating that a guest function call is now in progress. This allows + /// kill() to stamp cancel_requested with the correct generation. + /// + /// Must be called at the start of call_guest_function_by_name_no_reset(), + /// before any VCPU execution begins. + /// + /// Returns true if call_active was already set (indicating a guard already exists), + /// false otherwise. + fn set_call_active(&self) -> bool { + self.increment_generation(); + self.get_call_active().swap(true, Ordering::AcqRel) + } + + /// Clear call_active - clears the call_active flag. + /// + /// Clears the call_active flag, indicating that no guest function call is + /// in progress. After this, kill() will have no effect and will return false. + /// + /// Must be called at the end of call_guest_function_by_name_no_reset(), + /// after the guest call has fully completed (whether successfully or with error). + fn clear_call_active(&self) { + self.get_call_active().store(false, Ordering::Release) + } + + /// Set cancel_requested to true with the given generation. + /// + /// This stamps the cancellation request with the current generation number, + /// ensuring that only the VCPU running with this exact generation will honor + /// the cancellation. + fn set_cancel_requested(&self, generation: u64) { + const CANCEL_REQUESTED_BIT: u64 = 1 << 63; + const MAX_GENERATION: u64 = CANCEL_REQUESTED_BIT - 1; + let value = CANCEL_REQUESTED_BIT | (generation & MAX_GENERATION); + self.get_cancel_requested().store(value, Ordering::Release); + } + + /// Clear cancel_requested (reset to no cancellation). + /// + /// This is called after a cancellation has been processed to reset the + /// cancellation flag for the next guest call. + fn clear_cancel_requested(&self) { + self.get_cancel_requested().store(0, Ordering::Release); + } + + /// Check if cancel_requested is set for the given generation. + /// + /// Returns true only if BOTH: + /// - The cancellation flag is set + /// - The stored generation matches the provided generation + /// + /// This prevents stale cancellations from affecting new guest calls. + fn is_cancel_requested_for_generation(&self, generation: u64) -> bool { + const CANCEL_REQUESTED_BIT: u64 = 1 << 63; + const MAX_GENERATION: u64 = CANCEL_REQUESTED_BIT - 1; + let raw = self.get_cancel_requested().load(Ordering::Acquire); + let is_set = raw & CANCEL_REQUESTED_BIT != 0; + let stored_generation = raw & MAX_GENERATION; + is_set && stored_generation == generation + } + + /// Set running bit to true, return current generation. + /// + /// This is called when the VCPU is about to enter guest mode. It atomically + /// sets the running flag while preserving the generation counter. + fn set_running_bit(&self) -> u64 { + const RUNNING_BIT: u64 = 1 << 63; + self.get_running() + .fetch_update(Ordering::Release, Ordering::Acquire, |raw| { + Some(raw | RUNNING_BIT) + }) + .map(|raw| raw & !RUNNING_BIT) // Return the current generation + .unwrap_or(0) + } + + /// Increment the generation for a new guest function call. + /// + /// The generation counter wraps around at MAX_GENERATION (2^63 - 1). + /// This is called at the start of each new guest function call to provide + /// a unique identifier that prevents ABA problems with stale cancellations. + /// + /// Returns the NEW generation number (after incrementing). + fn increment_generation(&self) -> u64 { + const RUNNING_BIT: u64 = 1 << 63; + const MAX_GENERATION: u64 = RUNNING_BIT - 1; + self.get_running() + .fetch_update(Ordering::Release, Ordering::Acquire, |raw| { + let current_generation = raw & !RUNNING_BIT; + let running_bit = raw & RUNNING_BIT; + if current_generation == MAX_GENERATION { + // Restart generation from 0 + return Some(running_bit); + } + Some((current_generation + 1) | running_bit) + }) + .map(|raw| (raw & !RUNNING_BIT) + 1) // Return the NEW generation + .unwrap_or(1) // If wrapped, return 1 + } + + /// Get the current running state and generation counter. + /// + /// Returns a tuple of (running, generation) where: + /// - running: true if VCPU is currently in guest mode + /// - generation: current generation counter value + fn get_running_and_generation(&self) -> (bool, u64) { + const RUNNING_BIT: u64 = 1 << 63; + let raw = self.get_running().load(Ordering::Acquire); + let running = raw & RUNNING_BIT != 0; + let generation = raw & !RUNNING_BIT; + (running, generation) + } + + /// Clear the running bit and return the old value. + /// + /// This is called when the VCPU exits from guest mode back to host mode. + /// The return value (which includes the generation and the old running bit) + /// is currently unused by all callers. + fn clear_running_bit(&self) -> u64 { + const RUNNING_BIT: u64 = 1 << 63; + self.get_running() + .fetch_and(!RUNNING_BIT, Ordering::Release) + } +} + #[cfg(any(kvm, mshv))] #[derive(Debug)] pub(super) struct LinuxInterruptHandle { - /// Invariant: vcpu is running => most significant bit (63) of `running` is set. (Neither converse nor inverse is true) + /// Atomic flag combining running state and generation counter. /// - /// Additionally, bit 0-62 tracks how many times the VCPU has been run. Incremented each time `run()` is called. + /// **Bit 63**: VCPU running state (1 = running, 0 = not running) + /// **Bits 0-62**: Generation counter (incremented once per guest function call) /// - /// This prevents an ABA problem where: - /// 1. The VCPU is running (generation N), - /// 2. It gets cancelled, - /// 3. Then quickly restarted (generation N+1), - /// before the original thread has observed that it was cancelled. + /// # Generation Tracking /// - /// Without this generation counter, the interrupt logic might assume the VCPU is still - /// in the *original* run (generation N), see that it's `running`, and re-send the signal. - /// But the new VCPU run (generation N+1) would treat this as a stale signal and ignore it, - /// potentially causing an infinite loop where no effective interrupt is delivered. + /// The generation counter is incremented once at the start of each guest function call + /// and remains constant throughout that call, even if the VCPU is run multiple times + /// (due to host function calls, retries, etc.). This design solves the race condition + /// where a kill() from a previous call could spuriously cancel a new call. /// - /// Invariant: If the VCPU is running, `run_generation[bit 0-62]` matches the current run's generation. + /// ## Why Generations Are Needed + /// + /// Consider this scenario WITHOUT generation tracking: + /// 1. Thread A starts guest call 1, VCPU runs + /// 2. Thread B calls kill(), sends signal to Thread A + /// 3. Guest call 1 completes before signal arrives + /// 4. Thread A starts guest call 2, VCPU runs again + /// 5. Stale signal from step 2 arrives and incorrectly cancels call 2 + /// + /// WITH generation tracking: + /// 1. Thread A starts guest call 1 (generation N), VCPU runs + /// 2. Thread B calls kill(), stamps cancel_requested with generation N + /// 3. Guest call 1 completes, signal may or may not have arrived yet + /// 4. Thread A starts guest call 2 (generation N+1), VCPU runs again + /// 5. If stale signal arrives, signal handler checks: cancel_requested.generation (N) != current generation (N+1) + /// 6. Stale signal is ignored, call 2 continues normally + /// + /// ## Per-Call vs Per-Run Generation + /// + /// It's critical that generation is incremented per GUEST FUNCTION CALL, not per vcpu.run(): + /// - A single guest function call may invoke vcpu.run() multiple times (host calls, retries) + /// - All run() calls within the same guest call must share the same generation + /// - This ensures kill() affects the entire guest function call atomically + /// + /// # Invariants + /// + /// - If VCPU is running: bit 63 is set (neither converse nor inverse holds) + /// - If VCPU is running: bits 0-62 match the current guest call's generation running: AtomicU64, - /// Invariant: vcpu is running => `tid` is the thread on which it is running. - /// Note: multiple vms may have the same `tid`, but at most one vm will have `running` set to true. + + /// Thread ID where the VCPU is currently running. + /// + /// # Invariants + /// + /// - If VCPU is running: tid contains the thread ID of the executing thread + /// - Multiple VMs may share the same tid, but at most one will have running=true tid: AtomicU64, - /// True when an "interruptor" has requested the VM to be cancelled. Set immediately when - /// `kill()` is called, and cleared when the vcpu is no longer running. - /// This is used to - /// 1. make sure stale signals do not interrupt the - /// the wrong vcpu (a vcpu may only be interrupted iff `cancel_requested` is true), - /// 2. ensure that if a vm is killed while a host call is running, - /// the vm will not re-enter the guest after the host call returns. - cancel_requested: AtomicBool, - /// True when the debugger has requested the VM to be interrupted. Set immediately when - /// `kill_from_debugger()` is called, and cleared when the vcpu is no longer running. - /// This is used to make sure stale signals do not interrupt the the wrong vcpu - /// (a vcpu may only be interrupted by a debugger if `debug_interrupt` is true), + + /// Generation-aware cancellation request flag. + /// + /// **Bit 63**: Cancellation requested flag (1 = kill requested, 0 = no kill) + /// **Bits 0-62**: Generation number when cancellation was requested + /// + /// # Purpose + /// + /// This flag serves three critical functions: + /// + /// 1. **Prevent stale signals**: A VCPU may only be interrupted if cancel_requested + /// is set AND the generation matches the current call's generation + /// + /// 2. **Handle host function calls**: If kill() is called while a host function is + /// executing (VCPU not running but call is active), cancel_requested is stamped + /// with the current generation. When the host function returns and the VCPU + /// attempts to re-enter the guest, it will see the cancellation and abort. + /// + /// 3. **Detect stale kills**: If cancel_requested.generation doesn't match the + /// current generation, it's from a previous call and should be ignored + /// + /// # States and Transitions + /// + /// - **No cancellation**: cancel_requested = 0 (bit 63 clear) + /// - **Cancellation for generation N**: cancel_requested = (1 << 63) | N + /// - Signal handler checks: (cancel_requested & 0x7FFFFFFFFFFFFFFF) == current_generation + cancel_requested: AtomicU64, + + /// Flag indicating whether a guest function call is currently in progress. + /// + /// **true**: A guest function call is active (between call start and completion) + /// **false**: No guest function call is active + /// + /// # Purpose + /// + /// This flag prevents kill() from having any effect when called outside of a + /// guest function call. This solves the "kill-in-advance" problem where kill() + /// could be called before a guest function starts and would incorrectly cancel it. + /// + /// # Behavior + /// + /// - Set to true at the start of call_guest_function_by_name_no_reset() + /// - Cleared at the end of call_guest_function_by_name_no_reset() + /// - kill() only stamps cancel_requested if call_active is true + /// - If kill() is called when call_active=false, it returns false and has no effect + /// + /// # Why AtomicBool is Safe + /// + /// Although there's a theoretical race where: + /// 1. Thread A checks call_active (false) + /// 2. Thread B sets call_active (true) and starts guest call + /// 3. Thread A's kill() returns false (no effect) + /// + /// This is acceptable because the generation tracking provides an additional + /// safety layer. Even if a stale kill somehow stamped cancel_requested, the + /// generation mismatch would cause it to be ignored. + call_active: AtomicBool, + + /// Debugger interrupt request flag (GDB only). + /// + /// Set when kill_from_debugger() is called, cleared when VCPU stops running. + /// Used to distinguish debugger interrupts from normal kill() interrupts. #[cfg(gdb)] debug_interrupt: AtomicBool, - /// Whether the corresponding vm is dropped + + /// Whether the corresponding VM has been dropped. dropped: AtomicBool, - /// Retry delay between signals sent to the vcpu thread + + /// Delay between retry attempts when sending signals to the VCPU thread. retry_delay: Duration, - /// The offset of the SIGRTMIN signal used to interrupt the vcpu thread + + /// Offset from SIGRTMIN for the signal used to interrupt the VCPU thread. sig_rt_min_offset: u8, } #[cfg(any(kvm, mshv))] impl LinuxInterruptHandle { - const RUNNING_BIT: u64 = 1 << 63; - const MAX_GENERATION: u64 = Self::RUNNING_BIT - 1; - - // set running to true and increment the generation. Generation will wrap around at `MAX_GENERATION`. - fn set_running_and_increment_generation(&self) -> std::result::Result { - self.running - .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |raw| { - let generation = raw & !Self::RUNNING_BIT; - if generation == Self::MAX_GENERATION { - // restart generation from 0 - return Some(Self::RUNNING_BIT); - } - Some((generation + 1) | Self::RUNNING_BIT) - }) - } - - // clear the running bit and return the generation - fn clear_running_bit(&self) -> u64 { - self.running - .fetch_and(!Self::RUNNING_BIT, Ordering::Relaxed) - } - - fn get_running_and_generation(&self) -> (bool, u64) { - let raw = self.running.load(Ordering::Relaxed); - let running = raw & Self::RUNNING_BIT != 0; - let generation = raw & !Self::RUNNING_BIT; - (running, generation) - } - - fn send_signal(&self) -> bool { + fn send_signal(&self, stamp_generation: bool) -> bool { let signal_number = libc::SIGRTMIN() + self.sig_rt_min_offset as libc::c_int; let mut sent_signal = false; let mut target_generation: Option = None; loop { + if !self.call_active.load(Ordering::Acquire) { + // No active call, so no need to send signal + break; + } + let (running, generation) = self.get_running_and_generation(); + // Stamp generation into cancel_requested if requested and this is the first iteration + // We stamp even when running=false to support killing during host function calls + // The generation tracking will prevent stale kills from affecting new calls + // Only stamp if a call is actually active (call_active=true) + if stamp_generation + && target_generation.is_none() + && self.call_active.load(Ordering::Acquire) + { + self.set_cancel_requested(generation); + target_generation = Some(generation); + } + + // If not running, we've stamped the generation (if requested), so we're done + // This handles the host function call scenario if !running { break; } @@ -495,7 +751,7 @@ impl LinuxInterruptHandle { log::info!("Sending signal to kill vcpu thread..."); sent_signal = true; unsafe { - libc::pthread_kill(self.tid.load(Ordering::Relaxed) as _, signal_number); + libc::pthread_kill(self.tid.load(Ordering::Acquire) as _, signal_number); } std::thread::sleep(self.retry_delay); } @@ -507,20 +763,42 @@ impl LinuxInterruptHandle { #[cfg(any(kvm, mshv))] impl InterruptHandle for LinuxInterruptHandle { fn kill(&self) -> bool { - self.cancel_requested.store(true, Ordering::Relaxed); + if !(self.call_active.load(Ordering::Acquire)) { + // No active call, so no effect + return false; + } - self.send_signal() + // send_signal will stamp the generation into cancel_requested + // right before sending each signal, ensuring they're always in sync + self.send_signal(true) } + #[cfg(gdb)] fn kill_from_debugger(&self) -> bool { self.debug_interrupt.store(true, Ordering::Relaxed); - self.send_signal() + self.send_signal(false) } + fn dropped(&self) -> bool { self.dropped.load(Ordering::Relaxed) } } +#[cfg(any(kvm, mshv))] +impl InterruptHandleInternal for LinuxInterruptHandle { + fn get_call_active(&self) -> &AtomicBool { + &self.call_active + } + + fn get_running(&self) -> &AtomicU64 { + &self.running + } + + fn get_cancel_requested(&self) -> &AtomicU64 { + &self.cancel_requested + } +} + #[cfg(all(test, any(target_os = "windows", kvm)))] pub(crate) mod tests { use std::sync::{Arc, Mutex}; diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 82a920158..708f5b0a5 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -45,18 +45,90 @@ use crate::mem::ptr::RawPtr; use crate::mem::shared_mem::HostSharedMemory; use crate::metrics::maybe_time_and_emit_guest_call; use crate::sandbox::mem_mgr::MemMgrWrapper; -use crate::{Result, log_then_return}; +use crate::{Result, log_then_return, new_error}; /// Global counter for assigning unique IDs to sandboxes static SANDBOX_ID_COUNTER: AtomicU64 = AtomicU64::new(0); +/// RAII guard that automatically calls `clear_call_active()` when dropped. +/// +/// This ensures that the call_active flag is always cleared when a guest function +/// call completes, even if the function returns early due to an error. +/// +/// Only one guard can exist per interrupt handle at a time - attempting to create +/// a second guard will return an error. +struct CallActiveGuard { + interrupt_handle: Arc, +} + +impl CallActiveGuard { + /// Creates a new guard and marks a guest function call as active. + /// + /// # Errors + /// + /// Returns an error if `call_active` is already true (i.e., another guard already exists). + fn new(interrupt_handle: Arc) -> Result { + // Atomically check that call_active is false and set it to true. + // This prevents creating multiple guards for the same interrupt handle. + let was_active = interrupt_handle.set_call_active(); + if was_active { + return Err(new_error!( + "Attempted to create CallActiveGuard when a call is already active" + )); + } + Ok(Self { interrupt_handle }) + } +} + +impl Drop for CallActiveGuard { + fn drop(&mut self) { + self.interrupt_handle.clear_call_active(); + } +} + /// A fully initialized sandbox that can execute guest functions multiple times. /// /// Guest functions can be called repeatedly while maintaining state between calls. /// The sandbox supports creating snapshots and restoring to previous states. +/// +/// ## Sandbox Poisoning +/// +/// The sandbox becomes **poisoned** when the guest is not run to completion, leaving it in +/// an inconsistent state that could compromise memory safety, data integrity, or security. +/// +/// ### When Does Poisoning Occur? +/// +/// Poisoning happens when guest execution is interrupted before normal completion: +/// +/// - **Guest panics or aborts** - When a guest function panics, crashes, or calls `abort()`, +/// the normal cleanup and unwinding process is interrupted +/// - **Invalid memory access** - Attempts to read/write/execute memory outside allowed regions +/// - **Stack overflow** - Guest exhausts its stack space during execution +/// - **Heap exhaustion** - Guest runs out of heap memory +/// - **Host-initiated cancellation** - Calling [`InterruptHandle::kill()`] to forcefully +/// terminate an in-progress guest function +/// +/// ### Why This Is Unsafe +/// +/// When guest execution doesn't complete normally, critical cleanup operations are skipped: +/// +/// - **Memory leaks** - Heap allocations remain unreachable as the call stack is unwound +/// - **Corrupted allocator state** - Memory allocator metadata (free lists, heap headers) +/// left inconsistent +/// - **Locked resources** - Mutexes or other synchronization primitives remain locked +/// - **Partial state updates** - Data structures left half-modified (corrupted linked lists, +/// inconsistent hash tables, etc.) +/// +/// ### Recovery +/// +/// Use [`restore()`](Self::restore) with a snapshot taken before poisoning occurred. +/// This is the **only safe way** to recover - it completely replaces all memory state, +/// eliminating any inconsistencies. See [`restore()`](Self::restore) for details. pub struct MultiUseSandbox { /// Unique identifier for this sandbox instance id: u64, + /// Whether this sandbox is poisoned + poisoned: bool, // We need to keep a reference to the host functions, even if the compiler marks it as unused. The compiler cannot detect our dynamic usages of the host function in `HyperlightFunction::call`. pub(super) _host_funcs: Arc>, pub(crate) mem_mgr: MemMgrWrapper, @@ -85,6 +157,7 @@ impl MultiUseSandbox { ) -> MultiUseSandbox { Self { id: SANDBOX_ID_COUNTER.fetch_add(1, Ordering::Relaxed), + poisoned: false, _host_funcs: host_funcs, mem_mgr: mgr, vm, @@ -100,6 +173,11 @@ impl MultiUseSandbox { /// The snapshot is tied to this specific sandbox instance and can only be /// restored to the same sandbox it was created from. /// + /// ## Poisoned Sandbox + /// + /// This method will return [`crate::HyperlightError::PoisonedSandbox`] if the sandbox + /// is currently poisoned. Snapshots can only be taken from non-poisoned sandboxes. + /// /// # Examples /// /// ```no_run @@ -120,6 +198,10 @@ impl MultiUseSandbox { /// ``` #[instrument(err(Debug), skip_all, parent = Span::current())] pub fn snapshot(&mut self) -> Result { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } + if let Some(snapshot) = &self.snapshot { return Ok(snapshot.clone()); } @@ -141,6 +223,22 @@ impl MultiUseSandbox { /// Attempting to restore a snapshot from a different sandbox will return /// a [`SnapshotSandboxMismatch`](crate::HyperlightError::SnapshotSandboxMismatch) error. /// + /// ## Poison State Recovery + /// + /// This method automatically clears any poison state when successful. This is safe because: + /// - Snapshots can only be taken from non-poisoned sandboxes + /// - Restoration completely replaces all memory state, eliminating any inconsistencies + /// caused by incomplete guest execution + /// + /// ### What Gets Fixed During Restore + /// + /// When a poisoned sandbox is restored, the memory state is completely reset: + /// - **Leaked heap memory** - All allocations from interrupted execution are discarded + /// - **Corrupted allocator metadata** - Free lists and heap headers restored to consistent state + /// - **Locked mutexes** - All lock state is reset + /// - **Partial updates** - Data structures restored to their pre-execution state + /// + /// /// # Examples /// /// ```no_run @@ -166,6 +264,35 @@ impl MultiUseSandbox { /// # Ok(()) /// # } /// ``` + /// + /// ## Recovering from Poison + /// + /// ```no_run + /// # use hyperlight_host::{MultiUseSandbox, UninitializedSandbox, GuestBinary, HyperlightError}; + /// # fn example() -> Result<(), Box> { + /// let mut sandbox: MultiUseSandbox = UninitializedSandbox::new( + /// GuestBinary::FilePath("guest.bin".into()), + /// None + /// )?.evolve()?; + /// + /// // Take snapshot before potentially poisoning operation + /// let snapshot = sandbox.snapshot()?; + /// + /// // This might poison the sandbox (guest not run to completion) + /// let result = sandbox.call::<()>("guest_panic", ()); + /// if result.is_err() { + /// if sandbox.poisoned() { + /// // Restore from snapshot to clear poison + /// sandbox.restore(&snapshot)?; + /// assert!(!sandbox.poisoned()); + /// + /// // Sandbox is now usable again + /// sandbox.call::("Echo", "hello".to_string())?; + /// } + /// } + /// # Ok(()) + /// # } + /// ``` #[instrument(err(Debug), skip_all, parent = Span::current())] pub fn restore(&mut self, snapshot: &Snapshot) -> Result<()> { if let Some(snap) = &self.snapshot { @@ -200,6 +327,17 @@ impl MultiUseSandbox { // The restored snapshot is now our most current snapshot self.snapshot = Some(snapshot.clone()); + // Clear poison state when successfully restoring from snapshot. + // + // # Safety: + // This is safe because: + // 1. Snapshots can only be taken from non-poisoned sandboxes (verified at snapshot creation) + // 2. Restoration completely replaces all memory state, eliminating: + // - All leaked heap allocations (memory is restored to snapshot state) + // - All corrupted data structures (overwritten with consistent snapshot data) + // - All inconsistent global state (reset to snapshot values) + self.poisoned = false; + Ok(()) } @@ -207,6 +345,11 @@ impl MultiUseSandbox { /// /// Changes made to the sandbox during execution are *not* persisted. /// + /// ## Poisoned Sandbox + /// + /// This method will return [`crate::HyperlightError::PoisonedSandbox`] if the sandbox + /// is currently poisoned. Use [`restore()`](Self::restore) to recover from a poisoned state. + /// /// # Examples /// /// ```no_run @@ -245,6 +388,9 @@ impl MultiUseSandbox { func_name: &str, args: impl ParameterTuple, ) -> Result { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } let snapshot = self.snapshot()?; let res = self.call(func_name, args); self.restore(&snapshot)?; @@ -255,6 +401,22 @@ impl MultiUseSandbox { /// /// Changes made to the sandbox during execution are persisted. /// + /// ## Poisoned Sandbox + /// + /// This method will return [`crate::HyperlightError::PoisonedSandbox`] if the sandbox + /// is already poisoned before the call. Use [`restore()`](Self::restore) to recover from + /// a poisoned state. + /// + /// ## Sandbox Poisoning + /// + /// If this method returns an error, the sandbox may be poisoned if the guest was not run + /// to completion (due to panic, abort, memory violation, stack/heap exhaustion, or forced + /// termination). Use [`poisoned()`](Self::poisoned) to check the poison state and + /// [`restore()`](Self::restore) to recover if needed. + /// + /// If this method returns `Ok`, the sandbox is guaranteed to **not** be poisoned - the guest + /// function completed successfully and the sandbox state is consistent. + /// /// # Examples /// /// ```no_run @@ -282,12 +444,44 @@ impl MultiUseSandbox { /// # Ok(()) /// # } /// ``` + /// + /// ## Handling Potential Poisoning + /// + /// ```no_run + /// # use hyperlight_host::{MultiUseSandbox, UninitializedSandbox, GuestBinary}; + /// # fn example() -> Result<(), Box> { + /// let mut sandbox: MultiUseSandbox = UninitializedSandbox::new( + /// GuestBinary::FilePath("guest.bin".into()), + /// None + /// )?.evolve()?; + /// + /// // Take snapshot before risky operation + /// let snapshot = sandbox.snapshot()?; + /// + /// // Call potentially unsafe guest function + /// let result = sandbox.call::("RiskyOperation", "input".to_string()); + /// + /// // Check if the call failed and poisoned the sandbox + /// if let Err(e) = result { + /// eprintln!("Guest function failed: {}", e); + /// + /// if sandbox.poisoned() { + /// eprintln!("Sandbox was poisoned, restoring from snapshot"); + /// sandbox.restore(&snapshot)?; + /// } + /// } + /// # Ok(()) + /// # } + /// ``` #[instrument(err(Debug), skip(self, args), parent = Span::current())] pub fn call( &mut self, func_name: &str, args: impl ParameterTuple, ) -> Result { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } // Reset snapshot since we are mutating the sandbox state self.snapshot = None; maybe_time_and_emit_guest_call(func_name, || { @@ -306,12 +500,20 @@ impl MultiUseSandbox { /// (typically page-aligned). The `region_type` field is ignored as guest /// page table entries are not created. /// + /// ## Poisoned Sandbox + /// + /// This method will return [`crate::HyperlightError::PoisonedSandbox`] if the sandbox + /// is currently poisoned. Use [`restore()`](Self::restore) to recover from a poisoned state. + /// /// # Safety /// /// The caller must ensure the host memory region remains valid and unmodified /// for the lifetime of `self`. #[instrument(err(Debug), skip(self, rgn), parent = Span::current())] pub unsafe fn map_region(&mut self, rgn: &MemoryRegion) -> Result<()> { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } if rgn.flags.contains(MemoryRegionFlags::STACK_GUARD) { // Stack guard pages are an internal implementation detail // (which really should be moved into the guest) @@ -333,9 +535,16 @@ impl MultiUseSandbox { /// Map the contents of a file into the guest at a particular address /// /// Returns the length of the mapping in bytes. - #[allow(dead_code)] + /// + /// ## Poisoned Sandbox + /// + /// This method will return [`crate::HyperlightError::PoisonedSandbox`] if the sandbox + /// is currently poisoned. Use [`restore()`](Self::restore) to recover from a poisoned state. #[instrument(err(Debug), skip(self, _fp, _guest_base), parent = Span::current())] pub fn map_file_cow(&mut self, _fp: &Path, _guest_base: u64) -> Result { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } #[cfg(windows)] log_then_return!("mmap'ing a file into the guest is not yet supported on Windows"); #[cfg(unix)] @@ -373,6 +582,11 @@ impl MultiUseSandbox { /// Calls a guest function with type-erased parameters and return values. /// /// This function is used for fuzz testing parameter and return type handling. + /// + /// ## Poisoned Sandbox + /// + /// This method will return [`crate::HyperlightError::PoisonedSandbox`] if the sandbox + /// is currently poisoned. Use [`restore()`](Self::restore) to recover from a poisoned state. #[cfg(feature = "fuzzing")] #[instrument(err(Debug), skip(self, args), parent = Span::current())] pub fn call_type_erased_guest_function_by_name( @@ -381,6 +595,9 @@ impl MultiUseSandbox { ret_type: ReturnType, args: Vec, ) -> Result { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } // Reset snapshot since we are mutating the sandbox state self.snapshot = None; maybe_time_and_emit_guest_call(func_name, || { @@ -394,6 +611,14 @@ impl MultiUseSandbox { return_type: ReturnType, args: Vec, ) -> Result { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } + // Mark that a guest function call is now active + // (This also increments the generation counter internally) + // The guard will automatically clear call_active when dropped + let _guard = CallActiveGuard::new(self.vm.interrupt_handle())?; + let res = (|| { let estimated_capacity = estimate_flatbuffer_capacity(function_name, &args); @@ -428,6 +653,13 @@ impl MultiUseSandbox { // TODO: Do we want to allow re-entrant guest function calls? self.get_mgr_wrapper_mut().as_mut().clear_io_buffers(); + if let Err(e) = &res { + // Determine if we should poison the sandbox. + self.poisoned |= e.is_poison_error(); + } + + // Note: clear_call_active() is automatically called when _guard is dropped here + res } @@ -463,6 +695,46 @@ impl MultiUseSandbox { pub fn interrupt_handle(&self) -> Arc { self.vm.interrupt_handle() } + + /// Returns whether the sandbox is currently poisoned. + /// + /// A poisoned sandbox is in an inconsistent state due to the guest not running to completion. + /// All operations will be rejected until the sandbox is restored from a non-poisoned snapshot. + /// + /// ## Causes of Poisoning + /// + /// The sandbox becomes poisoned when guest execution is interrupted: + /// - **Panics/Aborts** - Guest code panics or calls `abort()` + /// - **Invalid Memory Access** - Read/write/execute violations + /// - **Stack Overflow** - Guest exhausts stack space + /// - **Heap Exhaustion** - Guest runs out of heap memory + /// - **Forced Termination** - [`InterruptHandle::kill()`] called during execution + /// + /// ## Recovery + /// + /// To clear the poison state, use [`restore()`](Self::restore) with a snapshot + /// that was taken before the sandbox became poisoned. + /// + /// # Examples + /// + /// ```no_run + /// # use hyperlight_host::{MultiUseSandbox, UninitializedSandbox, GuestBinary}; + /// # fn example() -> Result<(), Box> { + /// let mut sandbox: MultiUseSandbox = UninitializedSandbox::new( + /// GuestBinary::FilePath("guest.bin".into()), + /// None + /// )?.evolve()?; + /// + /// // Check if sandbox is poisoned + /// if sandbox.poisoned() { + /// println!("Sandbox is poisoned and needs attention"); + /// } + /// # Ok(()) + /// # } + /// ``` + pub fn poisoned(&self) -> bool { + self.poisoned + } } impl Callable for MultiUseSandbox { @@ -471,6 +743,9 @@ impl Callable for MultiUseSandbox { func_name: &str, args: impl ParameterTuple, ) -> Result { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } self.call(func_name, args) } } @@ -497,6 +772,7 @@ mod tests { use std::sync::{Arc, Barrier}; use std::thread; + use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode; use hyperlight_testing::simple_guest_as_string; #[cfg(target_os = "linux")] @@ -506,6 +782,96 @@ mod tests { use crate::sandbox::SandboxConfiguration; use crate::{GuestBinary, HyperlightError, MultiUseSandbox, Result, UninitializedSandbox}; + #[test] + fn poison() { + let mut sbox: MultiUseSandbox = { + let path = simple_guest_as_string().unwrap(); + let u_sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u_sbox.evolve() + } + .unwrap(); + let snapshot = sbox.snapshot().unwrap(); + + // poison on purpose + let res = sbox + .call::<()>("guest_panic", "hello".to_string()) + .unwrap_err(); + assert!( + matches!(res, HyperlightError::GuestAborted(code, context) if code == ErrorCode::UnknownError as u8 && context.contains("hello")) + ); + assert!(sbox.poisoned()); + + // guest calls should fail when poisoned + let res = sbox + .call::<()>("guest_panic", "hello2".to_string()) + .unwrap_err(); + assert!(matches!(res, HyperlightError::PoisonedSandbox)); + + // snapshot should fail when poisoned + if let Err(e) = sbox.snapshot() { + assert!(sbox.poisoned()); + assert!(matches!(e, HyperlightError::PoisonedSandbox)); + } else { + panic!("Snapshot should fail"); + } + + // map_region should fail when poisoned + #[cfg(target_os = "linux")] + { + let map_mem = allocate_guest_memory(); + let guest_base = 0x0; + let region = region_for_memory(&map_mem, guest_base, MemoryRegionFlags::READ); + let res = unsafe { sbox.map_region(®ion) }.unwrap_err(); + assert!(matches!(res, HyperlightError::PoisonedSandbox)); + } + + // map_file_cow should fail when poisoned + #[cfg(target_os = "linux")] + { + let temp_file = std::env::temp_dir().join("test_poison_map_file.bin"); + let res = sbox.map_file_cow(&temp_file, 0x0).unwrap_err(); + assert!(matches!(res, HyperlightError::PoisonedSandbox)); + std::fs::remove_file(&temp_file).ok(); // Clean up + } + + // call_guest_function_by_name (deprecated) should fail when poisoned + #[allow(deprecated)] + let res = sbox + .call_guest_function_by_name::("Echo", "test".to_string()) + .unwrap_err(); + assert!(matches!(res, HyperlightError::PoisonedSandbox)); + + // restore to non-poisoned snapshot should work and clear poison + sbox.restore(&snapshot).unwrap(); + assert!(!sbox.poisoned()); + + // guest calls should work again after restore + let res = sbox.call::("Echo", "hello2".to_string()).unwrap(); + assert_eq!(res, "hello2".to_string()); + assert!(!sbox.poisoned()); + + // re-poison on purpose + let res = sbox + .call::<()>("guest_panic", "hello".to_string()) + .unwrap_err(); + assert!( + matches!(res, HyperlightError::GuestAborted(code, context) if code == ErrorCode::UnknownError as u8 && context.contains("hello")) + ); + assert!(sbox.poisoned()); + + // restore to non-poisoned snapshot should work again + sbox.restore(&snapshot).unwrap(); + assert!(!sbox.poisoned()); + + // guest calls should work again + let res = sbox.call::("Echo", "hello3".to_string()).unwrap(); + assert_eq!(res, "hello3".to_string()); + assert!(!sbox.poisoned()); + + // snapshot should work again + let _ = sbox.snapshot().unwrap(); + } + /// Tests that call_guest_function_by_name restores the state correctly #[test] fn test_call_guest_function_by_name() { diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index eceabd479..26d4c5467 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -60,6 +60,7 @@ fn interrupt_host_call() { .unwrap(); let mut sandbox: MultiUseSandbox = usbox.evolve().unwrap(); + let snapshot = sandbox.snapshot().unwrap(); let interrupt_handle = sandbox.interrupt_handle(); assert!(!interrupt_handle.dropped()); // not yet dropped @@ -72,6 +73,11 @@ fn interrupt_host_call() { let result = sandbox.call::("CallHostSpin", ()).unwrap_err(); assert!(matches!(result, HyperlightError::ExecutionCanceledByHost())); + assert!(sandbox.poisoned()); + + // Restore from snapshot to clear poison + sandbox.restore(&snapshot).unwrap(); + assert!(!sandbox.poisoned()); thread.join().unwrap(); } @@ -80,6 +86,7 @@ fn interrupt_host_call() { #[test] fn interrupt_in_progress_guest_call() { let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); + let snapshot = sbox1.snapshot().unwrap(); let barrier = Arc::new(Barrier::new(2)); let barrier2 = barrier.clone(); let interrupt_handle = sbox1.interrupt_handle(); @@ -96,6 +103,11 @@ fn interrupt_in_progress_guest_call() { let res = sbox1.call::("Spin", ()).unwrap_err(); assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); + assert!(sbox1.poisoned()); + + // Restore from snapshot to clear poison + sbox1.restore(&snapshot).unwrap(); + assert!(!sbox1.poisoned()); barrier.wait(); // Make sure we can still call guest functions after the VM was interrupted @@ -107,7 +119,7 @@ fn interrupt_in_progress_guest_call() { thread.join().expect("Thread should finish"); } -/// Makes sure interrupting a vm before the guest call has started also prevents the guest call from being executed +/// Makes sure interrupting a vm before the guest call has started does not prevent the guest call from running #[test] fn interrupt_guest_call_in_advance() { let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); @@ -125,10 +137,16 @@ fn interrupt_guest_call_in_advance() { }); barrier.wait(); // wait until `kill()` is called before starting the guest call - let res = sbox1.call::("Spin", ()).unwrap_err(); - assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); + match sbox1.call::("Echo", "hello".to_string()) { + Ok(_) => {} + Err(HyperlightError::ExecutionCanceledByHost()) => { + panic!("Unexpected Cancellation Error"); + } + Err(_) => {} + } - // Make sure we can still call guest functions after the VM was interrupted + // Make sure we can still call guest functions after the VM was interrupted early + // i.e. make sure we dont kill the next iteration. sbox1.call::("Echo", "hello".to_string()).unwrap(); // drop vm to make sure other thread can detect it @@ -150,6 +168,7 @@ fn interrupt_guest_call_in_advance() { fn interrupt_same_thread() { let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); let mut sbox2: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); + let snapshot2 = sbox2.snapshot().unwrap(); let mut sbox3: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); let barrier = Arc::new(Barrier::new(2)); @@ -174,12 +193,14 @@ fn interrupt_same_thread() { .call::("Echo", "hello".to_string()) .expect("Only sandbox 2 is allowed to be interrupted"); match sbox2.call::("Echo", "hello".to_string()) { - Ok(_) | Err(HyperlightError::ExecutionCanceledByHost()) => { - // Only allow successful calls or interrupted. - // The call can be successful in case the call is finished before kill() is called. - } + // Only allow successful calls or interrupted. + // The call can be successful in case the call is finished before kill() is called. + Ok(_) | Err(HyperlightError::ExecutionCanceledByHost()) => {} _ => panic!("Unexpected return"), }; + if sbox2.poisoned() { + sbox2.restore(&snapshot2).unwrap(); + } sbox3 .call::("Echo", "hello".to_string()) .expect("Only sandbox 2 is allowed to be interrupted"); @@ -192,6 +213,7 @@ fn interrupt_same_thread() { fn interrupt_same_thread_no_barrier() { let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); let mut sbox2: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); + let snapshot2 = sbox2.snapshot().unwrap(); let mut sbox3: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); let barrier = Arc::new(Barrier::new(2)); @@ -218,12 +240,14 @@ fn interrupt_same_thread_no_barrier() { .call::("Echo", "hello".to_string()) .expect("Only sandbox 2 is allowed to be interrupted"); match sbox2.call::("Echo", "hello".to_string()) { - Ok(_) | Err(HyperlightError::ExecutionCanceledByHost()) => { - // Only allow successful calls or interrupted. - // The call can be successful in case the call is finished before kill() is called. - } + // Only allow successful calls or interrupted. + // The call can be successful in case the call is finished before kill() is called. + Ok(_) | Err(HyperlightError::ExecutionCanceledByHost()) => {} _ => panic!("Unexpected return"), }; + if sbox2.poisoned() { + sbox2.restore(&snapshot2).unwrap(); + } sbox3 .call::("Echo", "hello".to_string()) .expect("Only sandbox 2 is allowed to be interrupted"); @@ -237,6 +261,7 @@ fn interrupt_same_thread_no_barrier() { #[test] fn interrupt_moved_sandbox() { let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); + let snapshot1 = sbox1.snapshot().unwrap(); let mut sbox2: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); let interrupt_handle = sbox1.interrupt_handle(); @@ -249,6 +274,9 @@ fn interrupt_moved_sandbox() { barrier2.wait(); let res = sbox1.call::("Spin", ()).unwrap_err(); assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); + assert!(sbox1.poisoned()); + sbox1.restore(&snapshot1).unwrap(); + assert!(!sbox1.poisoned()); }); let thread2 = thread::spawn(move || { @@ -287,6 +315,7 @@ fn interrupt_custom_signal_no_and_retry_delay() { .evolve() .unwrap(); + let snapshot1 = sbox1.snapshot().unwrap(); let interrupt_handle = sbox1.interrupt_handle(); assert!(!interrupt_handle.dropped()); // not yet dropped @@ -303,8 +332,11 @@ fn interrupt_custom_signal_no_and_retry_delay() { for _ in 0..NUM_ITERS { let res = sbox1.call::("Spin", ()).unwrap_err(); assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); + assert!(sbox1.poisoned()); // immediately reenter another guest function call after having being cancelled, // so that the vcpu is running again before the interruptor-thread has a chance to see that the vcpu is not running + sbox1.restore(&snapshot1).unwrap(); + assert!(!sbox1.poisoned()); } thread.join().expect("Thread should finish"); } @@ -737,3 +769,467 @@ fn log_test_messages(levelfilter: Option) { .unwrap(); } } + +/// Test that validates interrupt behavior with random kill timing under concurrent load +/// Uses a pool of 100 sandboxes, 100 threads, and 500 iterations per thread. +/// Randomly decides to kill some calls at random times during execution. +/// Validates that: +/// - Calls we chose to kill can end in any state (including some cancelled) +/// - Calls we did NOT choose to kill NEVER return ExecutionCanceledByHost +/// - We get a mix of killed and non-killed outcomes (not 100% or 0%) +#[test] +fn interrupt_random_kill_stress_test() { + // Wrapper to hold a sandbox and its snapshot together + struct SandboxWithSnapshot { + sandbox: MultiUseSandbox, + snapshot: Snapshot, + } + + use std::collections::VecDeque; + use std::sync::Mutex; + use std::sync::atomic::AtomicUsize; + + use hyperlight_host::sandbox::snapshot::Snapshot; + use log::{error, trace}; + + const POOL_SIZE: usize = 100; + const NUM_THREADS: usize = 100; + const ITERATIONS_PER_THREAD: usize = 500; + const KILL_PROBABILITY: f64 = 0.5; // 50% chance to attempt kill + const GUEST_CALL_DURATION_MS: u32 = 10; // SpinForMs duration + + // Create a pool of 50 sandboxes + println!("Creating pool of {} sandboxes...", POOL_SIZE); + let mut sandbox_pool: Vec = Vec::with_capacity(POOL_SIZE); + for i in 0..POOL_SIZE { + let mut sandbox = new_uninit_rust().unwrap().evolve().unwrap(); + // Create a snapshot for this sandbox + let snapshot = sandbox.snapshot().unwrap(); + if (i + 1) % 10 == 0 { + println!("Created {}/{} sandboxes", i + 1, POOL_SIZE); + } + sandbox_pool.push(SandboxWithSnapshot { sandbox, snapshot }); + } + + // Wrap the pool in Arc> for thread-safe access + let pool = Arc::new(Mutex::new(VecDeque::from(sandbox_pool))); + + // Counters for statistics + let total_iterations = Arc::new(AtomicUsize::new(0)); + let kill_attempted_count = Arc::new(AtomicUsize::new(0)); // We chose to kill + let actually_killed_count = Arc::new(AtomicUsize::new(0)); // Got ExecutionCanceledByHost + let not_killed_completed_ok = Arc::new(AtomicUsize::new(0)); + let not_killed_error = Arc::new(AtomicUsize::new(0)); // Non-cancelled errors + let killed_but_completed_ok = Arc::new(AtomicUsize::new(0)); + let killed_but_error = Arc::new(AtomicUsize::new(0)); // Non-cancelled errors + let unexpected_cancelled = Arc::new(AtomicUsize::new(0)); // CRITICAL: non-killed calls that got cancelled + let sandbox_replaced_count = Arc::new(AtomicUsize::new(0)); // Sandboxes replaced due to restore failure + + println!( + "Starting {} threads with {} iterations each...", + NUM_THREADS, ITERATIONS_PER_THREAD + ); + + // Spawn worker threads + let mut thread_handles = vec![]; + for thread_id in 0..NUM_THREADS { + let pool_clone = Arc::clone(&pool); + let total_iterations_clone = Arc::clone(&total_iterations); + let kill_attempted_count_clone = Arc::clone(&kill_attempted_count); + let actually_killed_count_clone = Arc::clone(&actually_killed_count); + let not_killed_completed_ok_clone = Arc::clone(¬_killed_completed_ok); + let not_killed_error_clone = Arc::clone(¬_killed_error); + let killed_but_completed_ok_clone = Arc::clone(&killed_but_completed_ok); + let killed_but_error_clone = Arc::clone(&killed_but_error); + let unexpected_cancelled_clone = Arc::clone(&unexpected_cancelled); + let sandbox_replaced_count_clone = Arc::clone(&sandbox_replaced_count); + + let handle = thread::spawn(move || { + // Use thread_id as seed for reproducible randomness per thread + use std::collections::hash_map::RandomState; + use std::hash::{BuildHasher, Hash}; + + let mut hasher = RandomState::new().build_hasher(); + thread_id.hash(&mut hasher); + let mut rng_state = RandomState::new().hash_one(thread_id); + + // Simple random number generator for reproducible randomness + let mut next_random = || -> u64 { + rng_state = rng_state.wrapping_mul(6364136223846793005).wrapping_add(1); + rng_state + }; + + for iteration in 0..ITERATIONS_PER_THREAD { + // === START OF ITERATION === + // Get a sandbox from the pool for this iteration + let sandbox_with_snapshot = loop { + let mut pool_guard = pool_clone.lock().unwrap(); + if let Some(sb) = pool_guard.pop_front() { + break sb; + } + // Pool is empty, release lock and wait + drop(pool_guard); + trace!( + "[THREAD-{}] Iteration {}: Pool empty, waiting for sandbox...", + thread_id, iteration + ); + thread::sleep(Duration::from_millis(1)); + }; + + // Use a guard struct to ensure sandbox is always returned to pool + struct SandboxGuard<'a> { + sandbox_with_snapshot: Option, + pool: &'a Arc>>, + } + + impl Drop for SandboxGuard<'_> { + fn drop(&mut self) { + if let Some(sb) = self.sandbox_with_snapshot.take() { + let mut pool_guard = self.pool.lock().unwrap(); + pool_guard.push_back(sb); + trace!( + "[GUARD] Returned sandbox to pool, pool size now: {}", + pool_guard.len() + ); + } + } + } + + let mut guard = SandboxGuard { + sandbox_with_snapshot: Some(sandbox_with_snapshot), + pool: &pool_clone, + }; + + // Decide randomly: should we attempt to kill this call? + let should_kill = (next_random() as f64 / u64::MAX as f64) < KILL_PROBABILITY; + + if should_kill { + kill_attempted_count_clone.fetch_add(1, Ordering::Relaxed); + } + + let sandbox_wrapper = guard.sandbox_with_snapshot.as_mut().unwrap(); + let sandbox = &mut sandbox_wrapper.sandbox; + let interrupt_handle = sandbox.interrupt_handle(); + + // If we decided to kill, spawn a thread that will kill at a random time + // Use a barrier to ensure the killer thread waits until we're about to call the guest + let killer_thread = if should_kill { + use std::sync::{Arc, Barrier}; + + let barrier = Arc::new(Barrier::new(2)); + let barrier_clone = Arc::clone(&barrier); + + // Generate random delay here before moving into thread + let kill_delay_ms = next_random() % 16; + let thread_id_clone = thread_id; + let iteration_clone = iteration; + let handle = thread::spawn(move || { + trace!( + "[KILLER-{}-{}] Waiting at barrier...", + thread_id_clone, iteration_clone + ); + // Wait at the barrier until the main thread is ready to call the guest + barrier_clone.wait(); + trace!( + "[KILLER-{}-{}] Passed barrier, sleeping for {}ms...", + thread_id_clone, iteration_clone, kill_delay_ms + ); + // Random delay between 0 and 15ms (guest runs for ~10ms) + thread::sleep(Duration::from_millis(kill_delay_ms)); + trace!( + "[KILLER-{}-{}] Calling kill()...", + thread_id_clone, iteration_clone + ); + interrupt_handle.kill(); + trace!( + "[KILLER-{}-{}] kill() returned, exiting thread", + thread_id_clone, iteration_clone + ); + }); + Some((handle, barrier)) + } else { + None + }; + + // Call the guest function + trace!( + "[THREAD-{}] Iteration {}: Calling guest function (should_kill={})...", + thread_id, iteration, should_kill + ); + + // Release the barrier just before calling the guest function + if let Some((_, ref barrier)) = killer_thread { + trace!( + "[THREAD-{}] Iteration {}: Main thread waiting at barrier...", + thread_id, iteration + ); + barrier.wait(); + trace!( + "[THREAD-{}] Iteration {}: Main thread passed barrier, calling guest...", + thread_id, iteration + ); + } + + let result = sandbox.call::("SpinForMs", GUEST_CALL_DURATION_MS); + trace!( + "[THREAD-{}] Iteration {}: Guest call returned: {:?}", + thread_id, + iteration, + result + .as_ref() + .map(|_| "Ok") + .map_err(|e| format!("{:?}", e)) + ); + + // Wait for killer thread to finish if it was spawned + if let Some((kt, _)) = killer_thread { + trace!( + "[THREAD-{}] Iteration {}: Waiting for killer thread to join...", + thread_id, iteration + ); + let _ = kt.join(); + } + + // Process the result based on whether we attempted to kill + match result { + Err(HyperlightError::ExecutionCanceledByHost()) => { + // Restore the sandbox from the snapshot + trace!( + "[THREAD-{}] Iteration {}: Restoring sandbox from snapshot after ExecutionCanceledByHost...", + thread_id, iteration + ); + let sandbox_wrapper = guard.sandbox_with_snapshot.as_mut().unwrap(); + + // Make sure the sandbox is poisoned + assert!(sandbox_wrapper.sandbox.poisoned()); + + // Try to restore the snapshot + if let Err(e) = sandbox_wrapper.sandbox.restore(&sandbox_wrapper.snapshot) { + error!( + "CRITICAL: Thread {} iteration {}: Failed to restore snapshot: {:?}", + thread_id, iteration, e + ); + trace!( + "[THREAD-{}] Iteration {}: Creating new sandbox to replace failed one...", + thread_id, iteration + ); + + // Create a new sandbox with snapshot + match new_uninit_rust().and_then(|uninit| uninit.evolve()) { + Ok(mut new_sandbox) => { + match new_sandbox.snapshot() { + Ok(new_snapshot) => { + // Replace the failed sandbox with the new one + sandbox_wrapper.sandbox = new_sandbox; + sandbox_wrapper.snapshot = new_snapshot; + sandbox_replaced_count_clone + .fetch_add(1, Ordering::Relaxed); + trace!( + "[THREAD-{}] Iteration {}: Successfully replaced sandbox", + thread_id, iteration + ); + } + Err(snapshot_err) => { + error!( + "CRITICAL: Thread {} iteration {}: Failed to create snapshot for new sandbox: {:?}", + thread_id, iteration, snapshot_err + ); + // Still use the new sandbox even without snapshot + sandbox_wrapper.sandbox = new_sandbox; + sandbox_replaced_count_clone + .fetch_add(1, Ordering::Relaxed); + } + } + } + Err(create_err) => { + error!( + "CRITICAL: Thread {} iteration {}: Failed to create new sandbox: {:?}", + thread_id, iteration, create_err + ); + // Continue with the broken sandbox - it will be removed from pool eventually + } + } + } + + if should_kill { + // We attempted to kill and it was cancelled - SUCCESS + actually_killed_count_clone.fetch_add(1, Ordering::Relaxed); + } else { + // We did NOT attempt to kill but got cancelled - CRITICAL FAILURE + unexpected_cancelled_clone.fetch_add(1, Ordering::Relaxed); + error!( + "CRITICAL: Thread {} iteration {}: Got ExecutionCanceledByHost but did NOT attempt kill!", + thread_id, iteration + ); + } + } + Ok(_) => { + if should_kill { + // We attempted to kill but it completed OK - acceptable race condition + killed_but_completed_ok_clone.fetch_add(1, Ordering::Relaxed); + } else { + // We did NOT attempt to kill and it completed OK - EXPECTED + not_killed_completed_ok_clone.fetch_add(1, Ordering::Relaxed); + } + } + Err(_other_error) => { + // Log the other error so we can see what it is + error!( + "Thread {} iteration {}: Got non-cancellation error: {:?}", + thread_id, iteration, _other_error + ); + if should_kill { + // We attempted to kill and got some other error - acceptable + killed_but_error_clone.fetch_add(1, Ordering::Relaxed); + } else { + // We did NOT attempt to kill and got some other error - acceptable + not_killed_error_clone.fetch_add(1, Ordering::Relaxed); + } + } + } + + total_iterations_clone.fetch_add(1, Ordering::Relaxed); + + // Progress reporting + let current_total = total_iterations_clone.load(Ordering::Relaxed); + if current_total % 5000 == 0 { + println!( + "Progress: {}/{} iterations completed", + current_total, + NUM_THREADS * ITERATIONS_PER_THREAD + ); + } + + // === END OF ITERATION === + // SandboxGuard will automatically return sandbox to pool when it goes out of scope + } + + trace!( + "[THREAD-{}] Completed all {} iterations!", + thread_id, ITERATIONS_PER_THREAD + ); + }); + + thread_handles.push(handle); + } + + trace!( + "All {} worker threads spawned, waiting for completion...", + NUM_THREADS + ); + + // Wait for all threads to complete + for (idx, handle) in thread_handles.into_iter().enumerate() { + trace!("Waiting for thread {} to join...", idx); + handle.join().unwrap(); + trace!("Thread {} joined successfully", idx); + } + + trace!("All threads joined successfully!"); + + // Collect final statistics + let total = total_iterations.load(Ordering::Relaxed); + let kill_attempted = kill_attempted_count.load(Ordering::Relaxed); + let actually_killed = actually_killed_count.load(Ordering::Relaxed); + let not_killed_ok = not_killed_completed_ok.load(Ordering::Relaxed); + let not_killed_err = not_killed_error.load(Ordering::Relaxed); + let killed_but_ok = killed_but_completed_ok.load(Ordering::Relaxed); + let killed_but_err = killed_but_error.load(Ordering::Relaxed); + let unexpected_cancel = unexpected_cancelled.load(Ordering::Relaxed); + let sandbox_replaced = sandbox_replaced_count.load(Ordering::Relaxed); + + let no_kill_attempted = total - kill_attempted; + + // Print detailed statistics + println!("\n=== Interrupt Random Kill Stress Test Statistics ==="); + println!("Total iterations: {}", total); + println!(); + println!( + "Kill Attempts: {} ({:.1}%)", + kill_attempted, + (kill_attempted as f64 / total as f64) * 100.0 + ); + println!( + " - Actually killed (ExecutionCanceledByHost): {}", + actually_killed + ); + println!(" - Completed OK despite kill attempt: {}", killed_but_ok); + println!( + " - Error (non-cancelled) despite kill attempt: {}", + killed_but_err + ); + if kill_attempted > 0 { + println!( + " - Kill success rate: {:.1}%", + (actually_killed as f64 / kill_attempted as f64) * 100.0 + ); + } + println!(); + println!( + "No Kill Attempts: {} ({:.1}%)", + no_kill_attempted, + (no_kill_attempted as f64 / total as f64) * 100.0 + ); + println!(" - Completed OK: {}", not_killed_ok); + println!(" - Error (non-cancelled): {}", not_killed_err); + println!( + " - Cancelled (SHOULD BE 0): {} {}", + unexpected_cancel, + if unexpected_cancel == 0 { + "āœ…" + } else { + "āŒ FAILURE" + } + ); + println!(); + println!("Sandbox Management:"); + println!( + " - Sandboxes replaced due to restore failure: {}", + sandbox_replaced + ); + + // CRITICAL VALIDATIONS + assert_eq!( + unexpected_cancel, 0, + "FAILURE: {} non-killed calls returned ExecutionCanceledByHost! This indicates false kills.", + unexpected_cancel + ); + + assert!( + actually_killed > 0, + "FAILURE: No calls were actually killed despite {} kill attempts!", + kill_attempted + ); + + assert!( + kill_attempted > 0, + "FAILURE: No kill attempts were made (expected ~50% of {} iterations)!", + total + ); + + assert!( + kill_attempted < total, + "FAILURE: All {} iterations were kill attempts (expected ~50%)!", + total + ); + + // Verify total accounting + assert_eq!( + total, + actually_killed + + not_killed_ok + + not_killed_err + + killed_but_ok + + killed_but_err + + unexpected_cancel, + "Iteration accounting mismatch!" + ); + + assert_eq!( + total, + NUM_THREADS * ITERATIONS_PER_THREAD, + "Not all iterations completed" + ); + + println!("\nāœ… All validations passed!"); +} diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index c3b17ad58..93c513b82 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -564,7 +564,44 @@ fn spin(_: &FunctionCall) -> Result> { Ok(get_flatbuffer_result(())) } -#[hyperlight_guest_tracing::trace_function] +/// Spins the CPU for approximately the specified number of milliseconds +fn spin_for_ms(fc: &FunctionCall) -> Result> { + let milliseconds = if let ParameterValue::UInt(ms) = fc.parameters.clone().unwrap()[0].clone() { + ms + } else { + return Err(HyperlightGuestError::new( + ErrorCode::GuestFunctionParameterTypeMismatch, + "Expected UInt parameter".to_string(), + )); + }; + + // Simple busy-wait loop - not precise but good enough for testing + // Different iteration counts for debug vs release mode to ensure reasonable CPU usage + #[cfg(debug_assertions)] + // Debug mode - less optimized. The value 120,000 for iterations_per_ms was empirically chosen + // to achieve approximately a 50% "kill rate" in test runs, meaning that about half of the tests + // using this spin function will hit a timeout or resource limit imposed by the host. This helps + // stress-test the host's timeout and resource management logic. The value may need adjustment + // depending on the test environment or hardware. + let iterations_per_ms = 120_000; + + #[cfg(not(debug_assertions))] + let iterations_per_ms = 1_000_000; // Release mode - highly optimized + + let total_iterations = milliseconds * iterations_per_ms; + + let mut counter: u64 = 0; + for _ in 0..total_iterations { + // Prevent the compiler from optimizing away the loop + counter = counter.wrapping_add(1); + core::hint::black_box(counter); + } + + // Calculate the actual number of milliseconds spun for, based on the counter and iterations per ms + let ms_spun = counter / iterations_per_ms as u64; + Ok(get_flatbuffer_result(ms_spun)) +} + fn test_abort(function_call: &FunctionCall) -> Result> { if let ParameterValue::Int(code) = function_call.parameters.clone().unwrap()[0].clone() { abort_with_code(&[code as u8]); @@ -1215,6 +1252,14 @@ pub extern "C" fn hyperlight_main() { ); register_function(spin_def); + let spin_for_ms_def = GuestFunctionDefinition::new( + "SpinForMs".to_string(), + Vec::from(&[ParameterType::UInt]), + ReturnType::ULong, + spin_for_ms as usize, + ); + register_function(spin_for_ms_def); + let abort_def = GuestFunctionDefinition::new( "GuestAbortWithCode".to_string(), Vec::from(&[ParameterType::Int]),