@@ -395,6 +395,7 @@ impl HypervLinuxDriver {
395
395
orig_rsp : rsp_ptr,
396
396
interrupt_handle : Arc :: new ( LinuxInterruptHandle {
397
397
running : AtomicBool :: new ( false ) ,
398
+ cancel_requested : AtomicBool :: new ( false ) ,
398
399
tid : AtomicU64 :: new ( unsafe { libc:: pthread_self ( ) } ) ,
399
400
dropped : AtomicBool :: new ( false ) ,
400
401
} ) ,
@@ -584,37 +585,52 @@ impl Hypervisor for HypervLinuxDriver {
584
585
self . interrupt_handle
585
586
. tid
586
587
. store ( unsafe { libc:: pthread_self ( ) as u64 } , Ordering :: Relaxed ) ;
587
- // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
588
- // - before we've set the running to true,
589
- // Then the signal does not have any effect, because the signal handler is a no-op.
588
+ // Note: if a `InterruptHandle::kill()` called while this thread is **here**
589
+ // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
590
590
self . interrupt_handle . running . store ( true , Ordering :: Relaxed ) ;
591
- // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
592
- // - after we've set the running to true,
593
- // - before we've called `VcpuFd::run()`
594
- // Then the individual signal is lost, because the signal is only processed after we've left userspace.
595
- // However, for this reason, we keep sending the signal again and again until we see that the atomic `running` is set to false.
596
- #[ cfg( mshv2) ]
597
- let run_result = {
598
- let hv_message: hv_message = Default :: default ( ) ;
599
- self . vcpu_fd . run ( hv_message)
591
+ // Don't run the vcpu is `cancel_requested` is true
592
+ //
593
+ // Note: if a `InterruptHandle::kill()` called while this thread is **here**
594
+ // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
595
+ let exit_reason = if self
596
+ . interrupt_handle
597
+ . cancel_requested
598
+ . swap ( false , Ordering :: Relaxed )
599
+ {
600
+ return Ok ( HyperlightExit :: Cancelled ( ) ) ;
601
+ } else {
602
+ // Note: if a `InterruptHandle::kill()` called while this thread is **here**
603
+ // Then the vcpu will run, but we will keep sending signals to this thread
604
+ // to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will
605
+ // return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error,
606
+ // both of which are fine.
607
+ #[ cfg( mshv2) ]
608
+ {
609
+ let hv_message: hv_message = Default :: default ( ) ;
610
+ self . vcpu_fd . run ( hv_message)
611
+ }
612
+ #[ cfg( mshv3) ]
613
+ self . vcpu_fd . run ( )
600
614
} ;
601
- #[ cfg( mshv3) ]
602
- let run_result = self . vcpu_fd . run ( ) ;
603
- // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
604
- // - after we've called `VcpuFd::run()`
605
- // - before we've set the running to false
606
- // Then this is fine because the call to `VcpuFd::run()` is already finished,
607
- // the signal handler itself is a no-op, and the signals will stop being sent
608
- // once we've set the `running` to false.
615
+ // Note: if a `InterruptHandle::kill()` called while this thread is **here**
616
+ // Then signals will be sent to this thread until `running` is set to false.
617
+ // This is fine since the signal handler is a no-op.
618
+ let cancel_requested = self
619
+ . interrupt_handle
620
+ . cancel_requested
621
+ . swap ( false , Ordering :: Relaxed ) ;
622
+ // Note: if a `InterruptHandle::kill()` called while this thread is **here**
623
+ // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**.
624
+ // Additionally signals will be sent to this thread until `running` is set to false.
625
+ // This is fine since the signal handler is a no-op.
609
626
self . interrupt_handle
610
627
. running
611
628
. store ( false , Ordering :: Relaxed ) ;
612
- // Note: if a `InterruptHandle::kill()` signal is delivered to this thread **here**
613
- // - after we've set the running to false,
614
- // Then the signal does not have any effect, because the signal handler is a no-op.
615
- // This is fine since we are already done with the `VcpuFd::run()` call.
616
-
617
- let result = match run_result {
629
+ // At this point, `running` is false so no more signals will be sent to this thread,
630
+ // but we may still receive async signals that were sent before this point.
631
+ // To prevent those signals from interrupting subsequent calls to `run()`,
632
+ // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below).
633
+ let result = match exit_reason {
618
634
Ok ( m) => match m. header . message_type {
619
635
HALT_MESSAGE => {
620
636
crate :: debug!( "mshv - Halt Details : {:#?}" , & self ) ;
@@ -691,7 +707,15 @@ impl Hypervisor for HypervLinuxDriver {
691
707
} ,
692
708
Err ( e) => match e. errno ( ) {
693
709
// we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled
694
- libc:: EINTR => HyperlightExit :: Cancelled ( ) ,
710
+ libc:: EINTR => {
711
+ // If cancellation was not requested for this specific vm, the vcpu was interrupted because of stale signal
712
+ // that was meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it
713
+ if !cancel_requested {
714
+ HyperlightExit :: Retry ( )
715
+ } else {
716
+ HyperlightExit :: Cancelled ( )
717
+ }
718
+ }
695
719
libc:: EAGAIN => HyperlightExit :: Retry ( ) ,
696
720
_ => {
697
721
crate :: debug!( "mshv Error - Details: Error: {} \n {:#?}" , e, & self ) ;
0 commit comments