Skip to content

Commit 01c6726

Browse files
Copilotjsturtevant
andauthored
Add metric for vcpu kicks (#1034)
Signed-off-by: James Sturtevant <[email protected]> Co-authored-by: James Sturtevant <[email protected]>
1 parent 336fb8e commit 01c6726

File tree

2 files changed

+12
-2
lines changed

2 files changed

+12
-2
lines changed

src/hyperlight_host/src/hypervisor/mod.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use crate::HyperlightError::StackOverflow;
2121
use crate::error::HyperlightError::ExecutionCanceledByHost;
2222
use crate::hypervisor::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters};
2323
use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags};
24-
use crate::metrics::METRIC_GUEST_CANCELLATION;
24+
use crate::metrics::{METRIC_ERRONEOUS_VCPU_KICKS, METRIC_GUEST_CANCELLATION};
2525
#[cfg(feature = "mem_profile")]
2626
use crate::sandbox::trace::MemTraceInfo;
2727
use crate::{HyperlightError, Result, log_then_return};
@@ -402,8 +402,12 @@ impl VirtualCPU {
402402
}
403403
Ok(HyperlightExit::Cancelled()) => {
404404
// If cancellation was not requested for this specific guest function call,
405-
// the vcpu was interrupted by a stale cancellation from a previous call
405+
// the vcpu was interrupted by a stale cancellation. This can occur when:
406+
// - Linux: A signal from a previous call arrives late
407+
// - Windows: WHvCancelRunVirtualProcessor called right after vcpu exits but RUNNING_BIT is still true
406408
if !cancel_requested && !debug_interrupted {
409+
// Track that an erroneous vCPU kick occurred
410+
metrics::counter!(METRIC_ERRONEOUS_VCPU_KICKS).increment(1);
407411
// treat this the same as a HyperlightExit::Retry, the cancel was not meant for this call
408412
continue;
409413
}

src/hyperlight_host/src/metrics/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ pub(crate) static METRIC_GUEST_ERROR_LABEL_CODE: &str = "code";
2121
// Counter metric that counts the number of times a guest function was called due to timing out
2222
pub(crate) static METRIC_GUEST_CANCELLATION: &str = "guest_cancellations_total";
2323

24+
// Counter metric that counts the number of times a vCPU was erroneously kicked by a stale cancellation
25+
// This can happen in two scenarios:
26+
// 1. Linux: A signal from a previous guest call arrives late and interrupts a new call
27+
// 2. Windows: WHvCancelRunVirtualProcessor is called right after vCPU exits but RUNNING_BIT is still true
28+
pub(crate) static METRIC_ERRONEOUS_VCPU_KICKS: &str = "erroneous_vcpu_kicks_total";
29+
2430
// Histogram metric that measures the duration of guest function calls
2531
#[cfg(feature = "function_call_metrics")]
2632
pub(crate) static METRIC_GUEST_FUNC_DURATION: &str = "guest_call_duration_seconds";

0 commit comments

Comments
 (0)