hyperlight-dev
diff --git a/‎src/hyperlight_host/src/hypervisor/hyperv_linux.rs
Lines changed: 51 additions & 27 deletions b/‎src/hyperlight_host/src/hypervisor/hyperv_linux.rs
Lines changed: 51 additions & 27 deletions
diff --git a/‎src/hyperlight_host/src/hypervisor/hyperv_windows.rs
Lines changed: 22 additions & 1 deletion b/‎src/hyperlight_host/src/hypervisor/hyperv_windows.rs
Lines changed: 22 additions & 1 deletion
diff --git a/‎src/hyperlight_host/src/hypervisor/kvm.rs
Lines changed: 47 additions & 20 deletions b/‎src/hyperlight_host/src/hypervisor/kvm.rs
Lines changed: 47 additions & 20 deletions
diff --git a/‎src/hyperlight_host/src/hypervisor/mod.rs
Lines changed: 20 additions & 6 deletions b/‎src/hyperlight_host/src/hypervisor/mod.rs
Lines changed: 20 additions & 6 deletions
@@ -395,6 +395,7 @@ impl HypervLinuxDriver {
             orig_rsp: rsp_ptr,
             interrupt_handle: Arc::new(LinuxInterruptHandle {
                 running: AtomicBool::new(false),
+                cancel_requested: AtomicBool::new(false),
                 tid: AtomicU64::new(unsafe { libc::pthread_self() }),
                 dropped: AtomicBool::new(false),
             }),
@@ -584,37 +585,52 @@ impl Hypervisor for HypervLinuxDriver {
         self.interrupt_handle
             .tid
             .store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed);
-        // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
-        // - before we've set the running to true,
-        // Then the signal does not have any effect, because the signal handler is a no-op.
+        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
+        // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
         self.interrupt_handle.running.store(true, Ordering::Relaxed);
-        // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
-        // - after we've set the running to true,
-        // - before we've called `VcpuFd::run()`
-        // Then the individual signal is lost, because the signal is only processed after we've left userspace.
-        // However, for this reason, we keep sending the signal again and again until we see that the atomic `running` is set to false.
-        #[cfg(mshv2)]
-        let run_result = {
-            let hv_message: hv_message = Default::default();
-            self.vcpu_fd.run(hv_message)
+        // Don't run the vcpu is `cancel_requested` is true
+        //
+        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
+        // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
+        let exit_reason = if self
+            .interrupt_handle
+            .cancel_requested
+            .swap(false, Ordering::Relaxed)
+        {
+            return Ok(HyperlightExit::Cancelled());
+        } else {
+            // Note: if a `InterruptHandle::kill()` called while this thread is **here**
+            // Then the vcpu will run, but we will keep sending signals to this thread
+            // to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will
+            // return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error,
+            // both of which are fine.
+            #[cfg(mshv2)]
+            {
+                let hv_message: hv_message = Default::default();
+                self.vcpu_fd.run(hv_message)
+            }
+            #[cfg(mshv3)]
+            self.vcpu_fd.run()
         };
-        #[cfg(mshv3)]
-        let run_result = self.vcpu_fd.run();
-        // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
-        // - after we've called `VcpuFd::run()`
-        // - before we've set the running to false
-        // Then this is fine because the call to `VcpuFd::run()` is already finished,
-        // the signal handler itself is a no-op, and the signals will stop being sent
-        // once we've set the `running` to false.
+        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
+        // Then signals will be sent to this thread until `running` is set to false.
+        // This is fine since the signal handler is a no-op.
+        let cancel_requested = self
+            .interrupt_handle
+            .cancel_requested
+            .swap(false, Ordering::Relaxed);
+        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
+        // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**.
+        // Additionally signals will be sent to this thread until `running` is set to false.
+        // This is fine since the signal handler is a no-op.
         self.interrupt_handle
             .running
             .store(false, Ordering::Relaxed);
-        // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
-        // - after we've set the running to false,
-        // Then the signal does not have any effect, because the signal handler is a no-op.
-        // This is fine since we are already done with the `VcpuFd::run()` call.
-
-        let result = match run_result {
+        // At this point, `running` is false so no more signals will be sent to this thread,
+        // but we may still receive async signals that were sent before this point.
+        // To prevent those signals from interrupting subsequent calls to `run()`,
+        // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below).
+        let result = match exit_reason {
             Ok(m) => match m.header.message_type {
                 HALT_MESSAGE => {
                     crate::debug!("mshv - Halt Details : {:#?}", &self);
@@ -691,7 +707,15 @@ impl Hypervisor for HypervLinuxDriver {
             },
             Err(e) => match e.errno() {
                 // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled
-                libc::EINTR => HyperlightExit::Cancelled(),
+                libc::EINTR => {
+                    // If cancellation was not requested for this specific vm, the vcpu was interrupted because of stale signal
+                    // that was meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it
+                    if !cancel_requested {
+                        HyperlightExit::Retry()
+                    } else {
+                        HyperlightExit::Cancelled()
+                    }
+                }
                 libc::EAGAIN => HyperlightExit::Retry(),
                 _ => {
                     crate::debug!("mshv Error - Details: Error: {} \n {:#?}", e, &self);
 
@@ -109,6 +109,7 @@ impl HypervWindowsDriver {
             mem_regions,
             interrupt_handle: Arc::new(WindowsInterruptHandle {
                 running: AtomicBool::new(false),
+                cancel_requested: AtomicBool::new(false),
                 partition_handle,
                 dropped: AtomicBool::new(false),
             }),
@@ -409,7 +410,25 @@ impl Hypervisor for HypervWindowsDriver {
     #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
     fn run(&mut self) -> Result<super::HyperlightExit> {
         self.interrupt_handle.running.store(true, Ordering::Relaxed);
-        let exit_context: WHV_RUN_VP_EXIT_CONTEXT = self.processor.run()?;
+
+        // Don't run the vcpu is `cancel_requested` is true
+        let exit_context = if self
+            .interrupt_handle
+            .cancel_requested
+            .load(Ordering::Relaxed)
+        {
+            WHV_RUN_VP_EXIT_CONTEXT {
+                ExitReason: WHV_RUN_VP_EXIT_REASON(8193i32), // WHvRunVpExitReasonCanceled
+                VpContext: Default::default(),
+                Anonymous: Default::default(),
+                Reserved: Default::default(),
+            }
+        } else {
+            self.processor.run()?
+        };
+        self.interrupt_handle
+            .cancel_requested
+            .store(false, Ordering::Relaxed);
         self.interrupt_handle
             .running
             .store(false, Ordering::Relaxed);
@@ -510,12 +529,14 @@ impl Drop for HypervWindowsDriver {
 pub struct WindowsInterruptHandle {
     // `WHvCancelRunVirtualProcessor()` will return Ok even if the vcpu is not running, which is the reason we need this flag.
     running: AtomicBool,
+    cancel_requested: AtomicBool,
     partition_handle: WHV_PARTITION_HANDLE,
     dropped: AtomicBool,
 }
 
 impl InterruptHandle for WindowsInterruptHandle {
     fn kill(&self) -> bool {
+        self.cancel_requested.store(true, Ordering::Relaxed);
         self.running.load(Ordering::Relaxed)
             && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() }
     }
 
@@ -350,6 +350,7 @@ impl KVMDriver {
             mem_regions,
             interrupt_handle: Arc::new(LinuxInterruptHandle {
                 running: AtomicBool::new(false),
+                cancel_requested: AtomicBool::new(false),
                 tid: AtomicU64::new(unsafe { libc::pthread_self() }),
                 dropped: AtomicBool::new(false),
             }),
@@ -519,29 +520,47 @@ impl Hypervisor for KVMDriver {
         self.interrupt_handle
             .tid
             .store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed);
-        // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
-        // - before we've set the running to true,
-        // Then the signal does not have any effect, because the signal handler is a no-op.
+        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
+        // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
         self.interrupt_handle.running.store(true, Ordering::Relaxed);
-        // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
-        // - after we've set the running to true,
-        // - before we've called `VcpuFd::run()`
-        // Then the individual signal is lost, because the signal is only processed after we've left userspace.
-        // However, for this reason, we keep sending the signal again and again until we see that the atomic `running` is set to false.
-        let exit_reason = self.vcpu_fd.run();
-        // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
-        // - after we've called `VcpuFd::run()`
-        // - before we've set the running to false
-        // Then this is fine because the call to `VcpuFd::run()` is already finished,
-        // the signal handler itself is a no-op, and the signals will stop being sent
-        // once we've set the `running` to false.
+        // Don't run the vcpu is `cancel_requested` is true
+        //
+        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
+        // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
+        let exit_reason = if self
+            .interrupt_handle
+            .cancel_requested
+            .load(Ordering::Relaxed)
+        {
+            Err(kvm_ioctls::Error::new(libc::EINTR))
+        } else {
+            // Note: if a `InterruptHandle::kill()` called while this thread is **here**
+            // Then the vcpu will run, but we will keep sending signals to this thread
+            // to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will
+            // return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error,
+            // both of which are fine.
+            self.vcpu_fd.run()
+        };
+        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
+        // Then signals will be sent to this thread until `running` is set to false.
+        // This is fine since the signal handler is a no-op.
+        #[allow(unused_variables)]
+        // The variable is only used when `cfg(not(gdb))`, but the flag needs to be reset always anyway
+        let cancel_requested = self
+            .interrupt_handle
+            .cancel_requested
+            .swap(false, Ordering::Relaxed);
+        // Note: if a `InterruptHandle::kill()` called while this thread is **here**
+        // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**.
+        // Additionally signals will be sent to this thread until `running` is set to false.
+        // This is fine since the signal handler is a no-op.
         self.interrupt_handle
             .running
             .store(false, Ordering::Relaxed);
-        // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
-        // - after we've set the running to false,
-        // Then the signal does not have any effect, because the signal handler is a no-op.
-        // This is fine since we are already done with the `VcpuFd::run()` call.
+        // At this point, `running` is false so no more signals will be sent to this thread,
+        // but we may still receive async signals that were sent before this point.
+        // To prevent those signals from interrupting subsequent calls to `run()` (on other vms!),
+        // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below).
         let result = match exit_reason {
             Ok(VcpuExit::Hlt) => {
                 crate::debug!("KVM - Halt Details : {:#?}", &self);
@@ -593,7 +612,15 @@ impl Hypervisor for KVMDriver {
                 libc::EINTR => HyperlightExit::Debug(VcpuStopReason::Interrupt),
                 // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled
                 #[cfg(not(gdb))]
-                libc::EINTR => HyperlightExit::Cancelled(),
+                libc::EINTR => {
+                    // If cancellation was not requested for this specific vm, the vcpu was interrupted because of stale signal
+                    // that was meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it
+                    if !cancel_requested {
+                        HyperlightExit::Retry()
+                    } else {
+                        HyperlightExit::Cancelled()
+                    }
+                }
                 libc::EAGAIN => HyperlightExit::Retry(),
                 _ => {
                     crate::debug!("KVM Error -Details: Address: {} \n {:#?}", e, &self);
 
@@ -20,6 +20,8 @@ use tracing::{instrument, Span};
 use crate::error::HyperlightError::ExecutionCanceledByHost;
 use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags};
 use crate::metrics::METRIC_GUEST_CANCELLATION;
+#[cfg(any(kvm, mshv))]
+use crate::signal_handlers::INTERRUPT_VCPU_SIGRTMIN_OFFSET;
 use crate::{log_then_return, new_error, HyperlightError, Result};
 
 /// Util for handling x87 fpu state
@@ -322,10 +324,12 @@ impl VirtualCPU {
 
 /// A trait for handling interrupts to a sandbox's vcpu
 pub trait InterruptHandle: Send + Sync {
-    /// Interrupt the corresponding sandbox's vcpu if it's running.
+    /// Interrupt the corresponding sandbox from running.
     ///
     /// - If this is called while the vcpu is running, then it will interrupt the vcpu and return `true`.
-    /// - If this is called while the vcpu is not running, then it will do nothing and return `false`.
+    /// - If this is called while the vcpu is not running, (for example during a host call), the
+    ///     vcpu will not immediately be interrupted, but will prevent the vcpu from running **the next time**
+    ///     it's scheduled, and returns `false`.
     ///
     /// # Note
     /// This function will block for the duration of the time it takes for the vcpu thread to be interrupted.
@@ -338,25 +342,35 @@ pub trait InterruptHandle: Send + Sync {
 #[cfg(any(kvm, mshv))]
 #[derive(Debug)]
 pub(super) struct LinuxInterruptHandle {
-    /// True when the vcpu is currently running and blocking the thread
+    /// Invariant: vcpu is running => `running` is true. (Neither converse nor inverse is true)
     running: AtomicBool,
-    /// The thread id on which the vcpu was most recently run on or is currently running on
+    /// Invariant: vcpu is running => `tid` is the thread on which it is running.
     tid: AtomicU64,
+    /// True when an "interruptor" has requested the VM to be cancelled. Set immediately when
+    /// `kill()` is called, and cleared when the vcpu is no longer running.
+    /// This is used to
+    /// 1. make sure stale signals do not interrupt the
+    ///     the wrong vcpu (a vcpu may only be interrupted iff `cancel_requested` is true),
+    /// 2. ensure that if a vm is killed while a host call is running,
+    ///     the vm will not re-enter the guest after the host call returns.
+    cancel_requested: AtomicBool,
     /// Whether the corresponding vm is dropped
     dropped: AtomicBool,
 }
 
 #[cfg(any(kvm, mshv))]
 impl InterruptHandle for LinuxInterruptHandle {
     fn kill(&self) -> bool {
-        let sigrtmin = libc::SIGRTMIN();
+        self.cancel_requested.store(true, Ordering::Relaxed);
+
+        let signal_number = libc::SIGRTMIN() + INTERRUPT_VCPU_SIGRTMIN_OFFSET;
         let mut sent_signal = false;
 
         while self.running.load(Ordering::Relaxed) {
             log::info!("Sending signal to kill vcpu thread...");
             sent_signal = true;
             unsafe {
-                libc::pthread_kill(self.tid.load(Ordering::Relaxed) as _, sigrtmin);
+                libc::pthread_kill(self.tid.load(Ordering::Relaxed) as _, signal_number);
             }
             std::thread::sleep(std::time::Duration::from_micros(50));
         }