Skip to content

Commit de5c328

Browse files
committed
Prevent ABA-problem, where the vcpu could be successfully interrupted, but a new function call could be scheduled, before the interruptor-thread has time to observe the fact that the vcpu was interrupted
Signed-off-by: Ludvig Liljenberg <[email protected]>
1 parent 883624e commit de5c328

File tree

4 files changed

+91
-18
lines changed

4 files changed

+91
-18
lines changed

src/hyperlight_host/src/hypervisor/hyperv_linux.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ impl HypervLinuxDriver {
396396
entrypoint: entrypoint_ptr.absolute()?,
397397
orig_rsp: rsp_ptr,
398398
interrupt_handle: Arc::new(LinuxInterruptHandle {
399-
running: AtomicBool::new(false),
399+
running: AtomicU64::new(0),
400400
cancel_requested: AtomicBool::new(false),
401401
tid: AtomicU64::new(unsafe { libc::pthread_self() }),
402402
retry_delay: config.get_interrupt_retry_delay(),
@@ -591,7 +591,14 @@ impl Hypervisor for HypervLinuxDriver {
591591
.store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed);
592592
// Note: if a `InterruptHandle::kill()` called while this thread is **here**
593593
// Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
594-
self.interrupt_handle.running.store(true, Ordering::Relaxed);
594+
self.interrupt_handle
595+
.set_running_and_increment_generation()
596+
.map_err(|e| {
597+
new_error!(
598+
"Error setting running state and incrementing generation: {}",
599+
e
600+
)
601+
})?;
595602
// Don't run the vcpu if `cancel_requested` is true
596603
//
597604
// Note: if a `InterruptHandle::kill()` called while this thread is **here**
@@ -629,9 +636,7 @@ impl Hypervisor for HypervLinuxDriver {
629636
// Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**.
630637
// Additionally signals will be sent to this thread until `running` is set to false.
631638
// This is fine since the signal handler is a no-op.
632-
self.interrupt_handle
633-
.running
634-
.store(false, Ordering::Relaxed);
639+
self.interrupt_handle.clear_running_bit();
635640
// At this point, `running` is false so no more signals will be sent to this thread,
636641
// but we may still receive async signals that were sent before this point.
637642
// To prevent those signals from interrupting subsequent calls to `run()`,

src/hyperlight_host/src/hypervisor/kvm.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ impl KVMDriver {
351351
orig_rsp: rsp_gp,
352352
mem_regions,
353353
interrupt_handle: Arc::new(LinuxInterruptHandle {
354-
running: AtomicBool::new(false),
354+
running: AtomicU64::new(0),
355355
cancel_requested: AtomicBool::new(false),
356356
tid: AtomicU64::new(unsafe { libc::pthread_self() }),
357357
retry_delay: config.get_interrupt_retry_delay(),
@@ -526,7 +526,14 @@ impl Hypervisor for KVMDriver {
526526
.store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed);
527527
// Note: if a `InterruptHandle::kill()` called while this thread is **here**
528528
// Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
529-
self.interrupt_handle.running.store(true, Ordering::Relaxed);
529+
self.interrupt_handle
530+
.set_running_and_increment_generation()
531+
.map_err(|e| {
532+
new_error!(
533+
"Error setting running state and incrementing generation: {}",
534+
e
535+
)
536+
})?;
530537
// Don't run the vcpu if `cancel_requested` is true
531538
//
532539
// Note: if a `InterruptHandle::kill()` called while this thread is **here**
@@ -558,9 +565,7 @@ impl Hypervisor for KVMDriver {
558565
// Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**.
559566
// Additionally signals will be sent to this thread until `running` is set to false.
560567
// This is fine since the signal handler is a no-op.
561-
self.interrupt_handle
562-
.running
563-
.store(false, Ordering::Relaxed);
568+
self.interrupt_handle.clear_running_bit();
564569
// At this point, `running` is false so no more signals will be sent to this thread,
565570
// but we may still receive async signals that were sent before this point.
566571
// To prevent those signals from interrupting subsequent calls to `run()` (on other vms!),

src/hyperlight_host/src/hypervisor/mod.rs

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -338,8 +338,23 @@ pub trait InterruptHandle: Send + Sync {
338338
#[cfg(any(kvm, mshv))]
339339
#[derive(Debug)]
340340
pub(super) struct LinuxInterruptHandle {
341-
/// Invariant: vcpu is running => `running` is true. (Neither converse nor inverse is true)
342-
running: AtomicBool,
341+
/// Invariant: vcpu is running => most significant bit (63) of `running` is set. (Neither converse nor inverse is true)
342+
///
343+
/// Additionally, bit 0-62 tracks how many times the VCPU has been run. Incremented each time `run()` is called.
344+
///
345+
/// This prevents an ABA problem where:
346+
/// 1. The VCPU is running (generation N),
347+
/// 2. It gets cancelled,
348+
/// 3. Then quickly restarted (generation N+1),
349+
/// before the original thread has observed that it was cancelled.
350+
///
351+
/// Without this generation counter, the interrupt logic might assume the VCPU is still
352+
/// in the *original* run (generation N), see that it's `running`, and re-send the signal.
353+
/// But the new VCPU run (generation N+1) would treat this as a stale signal and ignore it,
354+
/// potentially causing an infinite loop where no effective interrupt is delivered.
355+
///
356+
/// Invariant: If the VCPU is running, `run_generation[bit 0-62]` matches the current run's generation.
357+
running: AtomicU64,
343358
/// Invariant: vcpu is running => `tid` is the thread on which it is running.
344359
/// Note: multiple vms may have the same `tid`, but at most one vm will have `running` set to true.
345360
tid: AtomicU64,
@@ -359,15 +374,61 @@ pub(super) struct LinuxInterruptHandle {
359374
sig_rt_min_offset: u8,
360375
}
361376

377+
#[cfg(any(kvm, mshv))]
378+
impl LinuxInterruptHandle {
379+
const RUNNING_BIT: u64 = 1 << 63;
380+
const MAX_GENERATION: u64 = Self::RUNNING_BIT - 1;
381+
382+
// set running to true and increment the generation. Generation will wrap around at `MAX_GENERATION`.
383+
fn set_running_and_increment_generation(&self) -> std::result::Result<u64, u64> {
384+
self.running
385+
.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |raw| {
386+
let generation = raw & !Self::RUNNING_BIT;
387+
if generation == Self::MAX_GENERATION {
388+
// restart generation from 0
389+
return Some(Self::RUNNING_BIT);
390+
}
391+
Some((generation + 1) | Self::RUNNING_BIT)
392+
})
393+
}
394+
395+
// clear the running bit and return the generation
396+
fn clear_running_bit(&self) -> u64 {
397+
self.running
398+
.fetch_and(!Self::RUNNING_BIT, Ordering::Relaxed)
399+
}
400+
401+
fn get_running_and_generation(&self) -> (bool, u64) {
402+
let raw = self.running.load(Ordering::Relaxed);
403+
let running = raw & Self::RUNNING_BIT != 0;
404+
let generation = raw & !Self::RUNNING_BIT;
405+
(running, generation)
406+
}
407+
}
408+
362409
#[cfg(any(kvm, mshv))]
363410
impl InterruptHandle for LinuxInterruptHandle {
364411
fn kill(&self) -> bool {
365412
self.cancel_requested.store(true, Ordering::Relaxed);
366413

367414
let signal_number = libc::SIGRTMIN() + self.sig_rt_min_offset as libc::c_int;
368415
let mut sent_signal = false;
416+
let mut target_generation: Option<u64> = None;
417+
418+
loop {
419+
let (running, generation) = self.get_running_and_generation();
420+
421+
if !running {
422+
break;
423+
}
424+
425+
match target_generation {
426+
None => target_generation = Some(generation),
427+
// prevent ABA problem
428+
Some(expected) if expected != generation => break,
429+
_ => {}
430+
}
369431

370-
while self.running.load(Ordering::Relaxed) {
371432
log::info!("Sending signal to kill vcpu thread...");
372433
sent_signal = true;
373434
unsafe {

src/hyperlight_host/tests/integration_test.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,14 @@ fn interrupt_moved_sandbox() {
283283
thread2.join().expect("Thread should finish");
284284
}
285285

286+
/// This tests exercises the behavior of killing vcpu with a long retry delay.
287+
/// This will exercise the ABA-problem, where the vcpu could be successfully interrupted,
288+
/// but restarted, before the interruptor-thread has a chance to see that the vcpu was killed.
289+
///
290+
/// The ABA-problem is solved by introducing run-generation on the vcpu.
286291
#[test]
287292
#[cfg(target_os = "linux")]
288293
fn interrupt_custom_signal_no_and_retry_delay() {
289-
env_logger::builder().filter_level(LevelFilter::Info).init();
290294
let mut config = SandboxConfiguration::default();
291295
config.set_interrupt_vcpu_sigrtmin_offset(0).unwrap();
292296
config.set_interrupt_retry_delay(Duration::from_secs(1));
@@ -301,26 +305,24 @@ fn interrupt_custom_signal_no_and_retry_delay() {
301305

302306
let interrupt_handle = sbox1.interrupt_handle();
303307
assert!(!interrupt_handle.dropped()); // not yet dropped
304-
let barrier = Arc::new(Barrier::new(2));
305-
let barrier2 = barrier.clone();
306308

307309
const NUM_ITERS: usize = 3;
308310

309311
let thread = thread::spawn(move || {
310312
for _ in 0..NUM_ITERS {
311-
barrier2.wait();
312313
// wait for the guest call to start
313314
thread::sleep(Duration::from_millis(1000));
314315
interrupt_handle.kill();
315316
}
316317
});
317318

318319
for _ in 0..NUM_ITERS {
319-
barrier.wait();
320320
let res = sbox1
321321
.call_guest_function_by_name::<i32>("Spin", ())
322322
.unwrap_err();
323323
assert!(matches!(res, HyperlightError::ExecutionCanceledByHost()));
324+
// immediately reenter another guest function call after having being cancelled,
325+
// so that the vcpu is running again before the interruptor-thread has a chance to see that the vcpu is not running
324326
}
325327
thread.join().expect("Thread should finish");
326328
}

0 commit comments

Comments
 (0)