Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 199 additions & 7 deletions src/kernel/src/hal/arch/x86/cpu/interrupt/controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ enum InterruptControllerType {
Legacy(Pic),
Xapic(Xapic, Ioapic),
PicXapic(Pic, Xapic),
/// xAPIC-only mode: LAPIC handles timer delivery and EOI entirely
/// in-kernel (via the WHP LAPIC emulator). No PIC initialization
/// or routing is needed, eliminating ~47 VM exits from PIC I/O.
XapicOnly(Xapic),
}

pub struct InterruptController {
Expand All @@ -74,6 +78,20 @@ impl InterruptController {
intmap: InterruptMap,
eoi_xapic: Option<Xapic>,
) -> Result<Self, Error> {
// On WHP+microvm, when the xAPIC timer has already been
// initialized (eoi_xapic is Some), skip PIC initialization
// entirely. The WHP LAPIC emulator handles timer delivery
// and EOI via MMIO — no VM exits. PIC ports (0x20/21/A0/A1)
// are never accessed, eliminating ~47 exits per cold-start.
#[cfg(all(feature = "microvm", feature = "whp"))]
if let Some(xapic_eoi) = eoi_xapic {
info!("using xapic-only mode (skipping pic init for whp)");
return Ok(Self {
intmap,
intctrl: InterruptControllerType::XapicOnly(xapic_eoi),
});
}

// If legacy PIC is available, initialize it.
let pic: Option<Pic> = if let Some(mut pic) = pic {
Some(pic.init()?)
Expand Down Expand Up @@ -145,6 +163,168 @@ impl InterruptController {
});
}

// On microvm/WHP the partition enables LAPIC emulation in
// xAPIC mode. Enable the LAPIC software-enable bit and
// configure the LAPIC periodic timer so timer interrupts
// fire entirely inside the WHP LAPIC emulator — zero VM
// exits for timer delivery. The LAPIC page at 0xFEE00000
// is identity-mapped via the microvm platform init and
// handled by the WHP LAPIC emulator (not guest RAM).
#[cfg(all(feature = "microvm", feature = "whp"))]
{
use ::arch::cpu::xapic;
let lapic_base: usize = ::config::microvm::DEFAULT_LAPIC_BASE;
let lapic: xapic::Xapic = xapic::Xapic::new(lapic_base as *mut u32);
// SAFETY: The LAPIC MMIO page is identity-mapped during
// microvm platform init. Writes go through the WHP LAPIC
// emulator.
unsafe {
lapic.write(xapic::XAPIC_SVR, 0x1FF);
lapic.write(xapic::XAPIC_TPR, 0);
}
info!("lapic svr enabled for whp interrupt delivery");

// LAPIC timer calibration.
//
// When CPUID leaf 0x16 is available, an RDTSC-based spin
// loop is used. This eliminates ~100 PIT-polling VM exits
// that are extremely expensive during the first
// WHvRunVirtualProcessor call (WHP lazily initialises
// internal partition state).
//
// When leaf 0x16 is not available, we fall back to
// PIT-based calibration with a reduced 1 ms window.

// SAFETY: LAPIC registers go through the WHP emulator.
// CPUID and RDTSC do not cause VM exits.
unsafe {
// 1. Mask the LAPIC timer during calibration.
lapic.write(
xapic::XAPIC_TIMER,
xapic::XapicTimer::new(0x20, false, true, 0).to_u32(),
);

// 2. Set LAPIC timer divide-by-128.
lapic.write(xapic::XAPIC_TDCR, 0x0A);

// 3. Check CPUID leaf 0x16 for TSC frequency.
let base_freq: u32 = ::arch::cpu::cpuid::get_base_frequency_mhz();

let mut ticks_per_ms: u32 = if base_freq > 0 {
// RDTSC-based calibration (zero VM exits).
let tsc_freq_mhz: u64 = base_freq as u64;
let tsc_ticks_per_ms: u64 = tsc_freq_mhz * 1_000;

// 4a. Start the LAPIC timer counting from max value.
lapic.write(xapic::XAPIC_TICR, 0xFFFF_FFFF);

// 5a. Spin for ~1 ms using RDTSC (zero VM exits).
// A max-iteration guard prevents a hang if TSC
// does not advance (virtualisation quirk).
const RDTSC_MAX_ITERS: u64 = 1_000_000_000;
let tsc_start: u64 = ::arch::cpu::rdtsc();
let mut iters: u64 = 0;
while (::arch::cpu::rdtsc() - tsc_start) < tsc_ticks_per_ms {
core::hint::spin_loop();
iters += 1;
if iters >= RDTSC_MAX_ITERS {
warn!(
"rdtsc calibration timeout after {} iterations",
RDTSC_MAX_ITERS
);
break;
}
}

// 6a. Read remaining LAPIC count and actual TSC
// delta to correct for overshoot.
let current_count: u32 = lapic.read(xapic::XAPIC_TCCR);
let elapsed_ticks: u32 = 0xFFFF_FFFF_u32.wrapping_sub(current_count);
let tsc_elapsed: u64 = ::arch::cpu::rdtsc() - tsc_start;

// ticks_per_ms = elapsed × (target / actual) so the
// result is independent of TSC frequency errors.
let tpm: u32 =
((elapsed_ticks as u64 * tsc_ticks_per_ms) / tsc_elapsed) as u32;

info!(
"lapic timer calibration (rdtsc): elapsed_ticks={}, ticks_per_ms={}, \
tsc_freq_mhz={}",
elapsed_ticks, tpm, tsc_freq_mhz
);
tpm
} else {
// PIT-based fallback (reduced 1 ms window).
use ::arch::cpu::pit;
const CALIBRATION_MS: u32 = 1;
let pit_reload: u16 =
((pit::PIT_MAX_FREQUENCY as u64 * CALIBRATION_MS as u64 / 1000)
& 0xFFFF) as u16;

warn!("cpuid leaf 0x16 unavailable, using pit-based calibration fallback");

// 4b. Program PIT channel 2 in one-shot mode.
let speaker: u8 = (::arch::io::in8(0x61) & 0xFC) | 0x01;
::arch::io::out8(0x61, speaker);
::arch::io::out8(
pit::PIT_CTRL,
pit::PIT_SEL2
| pit::PIT_ACC_LOHI
| pit::PIT_MODE_TCOUNT
| pit::PIT_BINARY,
);
::arch::io::out8(pit::PIT_DATA + 2, (pit_reload & 0xFF) as u8);
::arch::io::out8(pit::PIT_DATA + 2, (pit_reload >> 8) as u8);

// Start the LAPIC timer counting from max value.
lapic.write(xapic::XAPIC_TICR, 0xFFFF_FFFF);

// 5b. Wait for PIT channel 2 output (bit 5 of
// port 0x61) with a bounded busy-wait.
const PIT_CALIBRATION_MAX_ITERS: u32 = 10_000_000;
let mut pit_iters: u32 = 0;
while (::arch::io::in8(0x61) & 0x20) == 0 {
core::hint::spin_loop();
pit_iters = pit_iters.wrapping_add(1);
if pit_iters >= PIT_CALIBRATION_MAX_ITERS {
warn!(
"pit calibration timeout after {} iterations",
PIT_CALIBRATION_MAX_ITERS
);
break;
}
}

// 6b. Read remaining LAPIC timer count.
let current_count: u32 = lapic.read(xapic::XAPIC_TCCR);
let elapsed_ticks: u32 = 0xFFFF_FFFF_u32.wrapping_sub(current_count);
let tpm: u32 = elapsed_ticks / CALIBRATION_MS;

info!(
"lapic timer calibration (pit fallback): elapsed_ticks={}, \
ticks_per_ms={}",
elapsed_ticks, tpm
);
tpm
};

if ticks_per_ms == 0 {
warn!("lapic timer calibration underflow: using fallback ticks_per_ms=1");
ticks_per_ms = 1;
}

// 7. Program LAPIC timer in periodic mode with vector
// 0x20, initial count = ticks_per_ms (1 kHz).
lapic.write(
xapic::XAPIC_TIMER,
xapic::XapicTimer::new(0x20, false, false, 1).to_u32(),
);
lapic.write(xapic::XAPIC_TICR, ticks_per_ms);

info!("lapic periodic timer started (vector=0x20, period=1ms)");
}
}

info!("using legacy pic");
return Ok(Self {
intmap,
Expand Down Expand Up @@ -202,6 +382,10 @@ impl InterruptController {
}
Ok(())
},
InterruptControllerType::XapicOnly(ref mut xapic) => {
xapic.ack();
Ok(())
},
}
}

Expand All @@ -220,6 +404,12 @@ impl InterruptController {
pic.unmask(intnum as u16);
Ok(())
},
InterruptControllerType::XapicOnly(_) => {
// No PIC to unmask. LAPIC timer is already unmasked
// during calibration; other interrupt sources (IKC)
// are injected directly via the LAPIC by the VMM.
Ok(())
},
}
}

Expand All @@ -246,7 +436,9 @@ impl InterruptController {
kstack: *const u8,
) -> Result<(), Error> {
match self.intctrl {
InterruptControllerType::Legacy(_) | InterruptControllerType::PicXapic(..) => {
InterruptControllerType::Legacy(_)
| InterruptControllerType::PicXapic(..)
| InterruptControllerType::XapicOnly(_) => {
let reason: &str = "pic does not support starting cores";
error!("{reason}");
Err(Error::new(ErrorCode::OperationNotSupported, reason))
Expand All @@ -263,9 +455,9 @@ impl InterruptController {
handler: Option<InterruptHandler>,
) -> Result<(), Error> {
let intnum: u8 = match self.intctrl {
InterruptControllerType::Legacy(_) | InterruptControllerType::PicXapic(..) => {
intnum as u8
},
InterruptControllerType::Legacy(_)
| InterruptControllerType::PicXapic(..)
| InterruptControllerType::XapicOnly(_) => intnum as u8,
InterruptControllerType::Xapic(_, _) => self.intmap[intnum],
};
unsafe { INTERRUPT_VECTOR[intnum as usize] = handler };
Expand All @@ -274,9 +466,9 @@ impl InterruptController {

pub fn get_handler(&self, intnum: InterruptNumber) -> Result<Option<InterruptHandler>, Error> {
let intnum: u8 = match self.intctrl {
InterruptControllerType::Legacy(_) | InterruptControllerType::PicXapic(..) => {
intnum as u8
},
InterruptControllerType::Legacy(_)
| InterruptControllerType::PicXapic(..)
| InterruptControllerType::XapicOnly(_) => intnum as u8,
InterruptControllerType::Xapic(_, _) => self.intmap[intnum],
};
unsafe { Ok(INTERRUPT_VECTOR[intnum as usize]) }
Expand Down
42 changes: 41 additions & 1 deletion src/libs/arch/src/x86/cpu/cpuid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -758,13 +758,53 @@ pub fn has_pbe() -> bool {
/// The processor base frequency in MHz, or 0 if not supported.
///
pub fn get_base_frequency_mhz() -> u32 {
// Check the maximum supported basic CPUID leaf.
let (max_basic_leaf, _, _, _): (u32, u32, u32, u32) = cpuid(0);

if max_basic_leaf < CPUID_FREQUENCY {
// Leaf 0x16 is not supported.
return 0;
}

let (eax, _, _, _): (u32, u32, u32, u32) = cpuid_subleaf(CPUID_FREQUENCY, 0);
// Issue CPUID with EAX = CPUID_FREQUENCY and ECX = 0 explicitly, so the
// subleaf selector is well-defined and does not depend on caller state.
let mut eax: u32 = CPUID_FREQUENCY;
let ebx: u32;
let mut ecx: u32 = 0;
let edx: u32;

unsafe {
#[cfg(target_pointer_width = "32")]
::core::arch::asm!(
"mov {ebx_backup}, ebx",
"cpuid",
"mov {ebx_out}, ebx",
"mov ebx, {ebx_backup}",
ebx_backup = out(reg) _,
ebx_out = out(reg) ebx,
inout("eax") eax,
inout("ecx") ecx,
out("edx") edx,
options(nomem, preserves_flags, nostack)
);

#[cfg(target_pointer_width = "64")]
::core::arch::asm!(
"mov {ebx_backup}, rbx",
"cpuid",
"mov {ebx_out:e}, ebx",
"mov rbx, {ebx_backup}",
ebx_backup = out(reg) _,
ebx_out = out(reg) ebx,
inout("eax") eax,
inout("ecx") ecx,
out("edx") edx,
options(nomem, preserves_flags, nostack)
);
}

// Suppress unused-variable warnings for registers we must clobber.
let _ = (ebx, ecx, edx);

// EAX contains the processor base frequency in MHz.
eax
Expand Down
6 changes: 5 additions & 1 deletion src/uservm/src/vmm/whp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,11 @@ impl Vmm {

// Write host TSC base frequency so the guest can use RDTSC-based LAPIC
// timer calibration without requiring CPUID leaf 0x16.
let tsc_freq_mhz: u32 = ::arch::cpu::cpuid::get_base_frequency_mhz();
// Use WHP's ProcessorClockFrequency capability (returns Hz) because
// Hyper-V zeros out CPUID leaf 0x16 on the host.
let tsc_freq_mhz: u32 = unsafe {
(partition::WhpPartition::query_processor_clock_frequency() / 1_000_000) as u32
};
vmem.write_bytes(
::config::microvm::DEFAULT_MICROVM_CTRL_TSC_FREQ_MHZ as u64,
&tsc_freq_mhz.to_le_bytes(),
Expand Down
Loading