diff --git a/Cargo.lock b/Cargo.lock index 0b07680fae..a22a3b68e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1217,6 +1217,21 @@ dependencies = [ "vmgs_broker", ] +[[package]] +name = "disk_layered" +version = "0.0.0" +dependencies = [ + "async-trait", + "disk_backend", + "disk_backend_resources", + "futures", + "guestmem", + "inspect", + "scsi_buffers", + "thiserror 2.0.0", + "vm_resource", +] + [[package]] name = "disk_nvme" version = "0.0.0" @@ -1251,15 +1266,16 @@ name = "disk_ramdisk" version = "0.0.0" dependencies = [ "anyhow", - "async-trait", "disk_backend", "disk_backend_resources", + "disk_layered", "event-listener", "guestmem", "inspect", "pal_async", "parking_lot", "scsi_buffers", + "test_with_tracing", "thiserror 2.0.0", "tracing", "vm_resource", @@ -1993,7 +2009,6 @@ dependencies = [ "chipset_arc_mutex_device", "chipset_device", "chipset_device_fuzz", - "disk_backend", "disk_ramdisk", "guestmem", "ide", @@ -4197,7 +4212,6 @@ version = "0.0.0" dependencies = [ "anyhow", "chipset_device", - "disk_backend", "disk_ramdisk", "event-listener", "futures", @@ -4550,6 +4564,7 @@ dependencies = [ "disk_blob", "disk_crypt", "disk_file", + "disk_layered", "disk_prwrap", "disk_ramdisk", "disk_vhd1", @@ -5983,7 +5998,6 @@ dependencies = [ "anyhow", "async-trait", "criterion", - "disk_backend", "disk_ramdisk", "event-listener", "fast_select", diff --git a/Cargo.toml b/Cargo.toml index a5a9ea74f1..87fdef85b6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -244,6 +244,7 @@ disk_crypt = { path = "vm/devices/storage/disk_crypt" } disk_crypt_resources = { path = "vm/devices/storage/disk_crypt_resources" } disk_file = { path = "vm/devices/storage/disk_file" } disk_get_vmgs = { path = "vm/devices/storage/disk_get_vmgs" } +disk_layered = { path = "vm/devices/storage/disk_layered" } disk_nvme = { path = "vm/devices/storage/disk_nvme" } disk_ramdisk = { path = "vm/devices/storage/disk_ramdisk" } disk_striped = { path = "vm/devices/storage/disk_striped" } diff --git a/openvmm/openvmm_entry/src/lib.rs b/openvmm/openvmm_entry/src/lib.rs index afe27819af..0822bc8e8f 100644 --- a/openvmm/openvmm_entry/src/lib.rs +++ b/openvmm/openvmm_entry/src/lib.rs @@ -27,6 +27,8 @@ use cli_args::EndpointConfigCli; use cli_args::NicConfigCli; use cli_args::SerialConfigCli; use cli_args::UefiConsoleModeCli; +use disk_backend_resources::layer::DiskLayerHandle; +use disk_backend_resources::layer::RamDiskLayerHandle; use floppy_resources::FloppyDiskConfig; use framebuffer::FramebufferAccess; use framebuffer::FRAMEBUFFER_SIZE; @@ -844,9 +846,9 @@ fn vm_config_from_command_line( let vmgs_disk = if let Some(disk) = &opt.get_vmgs { disk_open(disk, false).context("failed to open GET vmgs disk")? } else { - disk_backend_resources::RamDiskHandle { - len: vmgs_format::VMGS_DEFAULT_CAPACITY, - } + disk_backend_resources::LayeredDiskHandle::single_layer(RamDiskLayerHandle { + len: Some(vmgs_format::VMGS_DEFAULT_CAPACITY), + }) .into_resource() }; vmbus_devices.extend([ @@ -1416,7 +1418,11 @@ impl NicConfig { fn disk_open(disk_cli: &DiskCliKind, read_only: bool) -> anyhow::Result> { let disk_type = match disk_cli { - &DiskCliKind::Memory(len) => Resource::new(disk_backend_resources::RamDiskHandle { len }), + &DiskCliKind::Memory(len) => { + Resource::new(disk_backend_resources::LayeredDiskHandle::single_layer( + RamDiskLayerHandle { len: Some(len) }, + )) + } DiskCliKind::File(path) => open_disk_type(path, read_only) .with_context(|| format!("failed to open {}", path.display()))?, DiskCliKind::Blob { kind, url } => Resource::new(disk_backend_resources::BlobDiskHandle { @@ -1427,8 +1433,13 @@ fn disk_open(disk_cli: &DiskCliKind, read_only: bool) -> anyhow::Result { - Resource::new(disk_backend_resources::RamDiffDiskHandle { - lower: disk_open(inner, true)?, + Resource::new(disk_backend_resources::LayeredDiskHandle { + layers: vec![ + RamDiskLayerHandle { len: None }.into_resource().into(), + DiskLayerHandle(disk_open(inner, true)?) + .into_resource() + .into(), + ], }) } DiskCliKind::PersistentReservationsWrapper(inner) => Resource::new( @@ -2363,7 +2374,9 @@ async fn run_control(driver: &DefaultDriver, mesh: &VmmMesh, opt: Options) -> an .with_context(|| format!("failed to open {}", path.display()))? } Some(size) => { - Resource::new(disk_backend_resources::RamDiskHandle { len: size }) + Resource::new(disk_backend_resources::LayeredDiskHandle::single_layer( + RamDiskLayerHandle { len: Some(size) }, + )) } }; diff --git a/openvmm/openvmm_resources/Cargo.toml b/openvmm/openvmm_resources/Cargo.toml index a326a8d5ba..f576b64d1a 100644 --- a/openvmm/openvmm_resources/Cargo.toml +++ b/openvmm/openvmm_resources/Cargo.toml @@ -34,6 +34,7 @@ serial_socket.workspace = true disk_blob = { workspace = true, optional = true } disk_crypt = { workspace = true, optional = true } disk_file.workspace = true +disk_layered.workspace = true disk_prwrap.workspace = true disk_ramdisk.workspace = true disk_vhd1.workspace = true diff --git a/openvmm/openvmm_resources/src/lib.rs b/openvmm/openvmm_resources/src/lib.rs index 531d93745b..14601be6ad 100644 --- a/openvmm/openvmm_resources/src/lib.rs +++ b/openvmm/openvmm_resources/src/lib.rs @@ -42,6 +42,7 @@ vm_resource::register_static_resolvers! { net_dio::resolver::DioResolver, // Disks + disk_layered::resolver::LayeredDiskResolver, #[cfg(feature = "disk_crypt")] disk_crypt::resolver::DiskCryptResolver, disk_ramdisk::resolver::RamDiskResolver, diff --git a/petri/src/vm/construct.rs b/petri/src/vm/construct.rs index e55949d158..020c89bd83 100644 --- a/petri/src/vm/construct.rs +++ b/petri/src/vm/construct.rs @@ -18,7 +18,9 @@ use crate::BOOT_NVME_NSID; use crate::SCSI_INSTANCE; use crate::SIZE_1_GB; use anyhow::Context; -use disk_backend_resources::RamDiffDiskHandle; +use disk_backend_resources::layer::DiskLayerHandle; +use disk_backend_resources::layer::RamDiskLayerHandle; +use disk_backend_resources::LayeredDiskHandle; use framebuffer::Framebuffer; use framebuffer::FramebufferAccess; use framebuffer::FRAMEBUFFER_SIZE; @@ -750,7 +752,13 @@ impl PetriVmConfigSetupCore<'_> { PcatGuest::Vhd(_) => GuestMedia::Disk { read_only: false, disk_parameters: None, - disk_type: (RamDiffDiskHandle { lower: inner_disk }).into_resource(), + disk_type: LayeredDiskHandle { + layers: vec![ + RamDiskLayerHandle { len: None }.into_resource().into(), + DiskLayerHandle(inner_disk).into_resource().into(), + ], + } + .into_resource(), }, PcatGuest::Iso(_) => GuestMedia::Dvd( SimpleScsiDvdHandle { @@ -790,8 +798,13 @@ impl PetriVmConfigSetupCore<'_> { device: SimpleScsiDiskHandle { read_only: false, parameters: Default::default(), - disk: RamDiffDiskHandle { - lower: open_disk_type(&path, true)?, + disk: LayeredDiskHandle { + layers: vec![ + RamDiskLayerHandle { len: None }.into_resource().into(), + DiskLayerHandle(open_disk_type(&path, true)?) + .into_resource() + .into(), + ], } .into_resource(), } @@ -817,8 +830,13 @@ impl PetriVmConfigSetupCore<'_> { msix_count: 64, namespaces: vec![NamespaceDefinition { nsid: BOOT_NVME_NSID, - disk: RamDiffDiskHandle { - lower: open_disk_type(&path, true)?, + disk: LayeredDiskHandle { + layers: vec![ + RamDiskLayerHandle { len: None }.into_resource().into(), + DiskLayerHandle(open_disk_type(&path, true)?) + .into_resource() + .into(), + ], } .into_resource(), read_only: false, @@ -925,9 +943,9 @@ impl PetriVmConfigSetupCore<'_> { vmbus_redirection: false, vtl2_settings: None, // Will be added at startup to allow tests to modify vmgs_disk: Some( - disk_backend_resources::RamDiskHandle { - len: vmgs_format::VMGS_DEFAULT_CAPACITY, - } + LayeredDiskHandle::single_layer(RamDiskLayerHandle { + len: Some(vmgs_format::VMGS_DEFAULT_CAPACITY), + }) .into_resource(), ), framebuffer: framebuffer.then(|| SharedFramebufferHandle.into_resource()), diff --git a/vm/devices/get/guest_emulation_device/src/test_utilities.rs b/vm/devices/get/guest_emulation_device/src/test_utilities.rs index 4c6c141824..2e2a0868e2 100644 --- a/vm/devices/get/guest_emulation_device/src/test_utilities.rs +++ b/vm/devices/get/guest_emulation_device/src/test_utilities.rs @@ -6,7 +6,6 @@ use crate::GedChannel; use crate::GuestConfig; use crate::GuestEmulationDevice; use crate::GuestFirmwareConfig; -use disk_backend::Disk; use get_protocol::test_utilities::TEST_VMGS_CAPACITY; use get_protocol::HostNotifications; use get_protocol::HostRequests; @@ -264,10 +263,7 @@ pub fn create_host_channel( None, recv, None, - Some( - Disk::new(disk_ramdisk::RamDisk::new(TEST_VMGS_CAPACITY as u64, false).unwrap()) - .unwrap(), - ), + Some(disk_ramdisk::ram_disk(TEST_VMGS_CAPACITY as u64, false).unwrap()), ); if let Some(ged_responses) = ged_responses { diff --git a/vm/devices/storage/disk_backend/src/lib.rs b/vm/devices/storage/disk_backend/src/lib.rs index d30ba7f9e5..fdb30ca1e0 100644 --- a/vm/devices/storage/disk_backend/src/lib.rs +++ b/vm/devices/storage/disk_backend/src/lib.rs @@ -7,11 +7,7 @@ //! (such as the VMGS file system). //! //! `Disk`s are backed by a [`DiskIo`] implementation. Specific disk -//! backends should be in their own crates. The exceptions that prove the rule -//! is [`ZeroDisk`][], which is small enough to be in this crate and serve as an -//! example. -//! -//! [`ZeroDisk`]: crate::zerodisk::ZeroDisk +//! backends should be in their own crates. #![forbid(unsafe_code)] #![warn(missing_docs)] @@ -19,7 +15,6 @@ pub mod pr; pub mod resolve; pub mod sync_wrapper; -pub mod zerodisk; use guestmem::AccessError; use inspect::Inspect; diff --git a/vm/devices/storage/disk_backend/src/zerodisk.rs b/vm/devices/storage/disk_backend/src/zerodisk.rs deleted file mode 100644 index d36eb88dd8..0000000000 --- a/vm/devices/storage/disk_backend/src/zerodisk.rs +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -//! A read-only disk that always returns zero on read. - -use super::DiskIo; -use crate::DiskError; -use guestmem::MemoryWrite; -use inspect::Inspect; -use scsi_buffers::RequestBuffers; - -/// A read-only disk that always returns zero on read. -#[derive(Debug, Inspect)] -pub struct ZeroDisk { - sector_size: u32, - sector_count: u64, -} - -/// Error returned by [`ZeroDisk::new`] when the disk geometry is invalid. -#[derive(Debug, thiserror::Error)] -#[error("disk size {0:#x} is not a multiple of the sector size {1}")] -pub enum InvalidGeometry { - /// The sector size is invalid. - #[error("sector size {0} is invalid")] - InvalidSectorSize(u32), - /// The disk size is not a multiple of the sector size. - #[error("disk size {disk_size:#x} is not a multiple of the sector size {sector_size}")] - NotSectorMultiple { - /// The disk size. - disk_size: u64, - /// The sector size. - sector_size: u32, - }, - /// The disk has no sectors. - #[error("disk has no sectors")] - EmptyDisk, -} - -impl ZeroDisk { - /// Creates a new disk with the given geometry. - pub fn new(sector_size: u32, disk_size: u64) -> Result { - if !sector_size.is_power_of_two() || sector_size < 512 { - return Err(InvalidGeometry::InvalidSectorSize(sector_size)); - } - if disk_size % sector_size as u64 != 0 { - return Err(InvalidGeometry::NotSectorMultiple { - disk_size, - sector_size, - }); - } - if disk_size == 0 { - return Err(InvalidGeometry::EmptyDisk); - } - Ok(Self { - sector_size, - sector_count: disk_size / sector_size as u64, - }) - } -} - -impl DiskIo for ZeroDisk { - fn disk_type(&self) -> &str { - "zero" - } - - fn sector_count(&self) -> u64 { - self.sector_count - } - - fn sector_size(&self) -> u32 { - self.sector_size - } - - fn is_read_only(&self) -> bool { - true - } - - fn disk_id(&self) -> Option<[u8; 16]> { - None - } - - fn physical_sector_size(&self) -> u32 { - 512 - } - - fn is_fua_respected(&self) -> bool { - true - } - - async fn read_vectored( - &self, - buffers: &RequestBuffers<'_>, - _sector: u64, - ) -> Result<(), DiskError> { - buffers.writer().zero(buffers.len()).map_err(Into::into) - } - - async fn write_vectored( - &self, - _buffers: &RequestBuffers<'_>, - _sector: u64, - _fua: bool, - ) -> Result<(), DiskError> { - Err(DiskError::ReadOnly) - } - - async fn sync_cache(&self) -> Result<(), DiskError> { - Ok(()) - } -} diff --git a/vm/devices/storage/disk_backend_resources/src/layer.rs b/vm/devices/storage/disk_backend_resources/src/layer.rs new file mode 100644 index 0000000000..96dafd1b0e --- /dev/null +++ b/vm/devices/storage/disk_backend_resources/src/layer.rs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Disk layer resources. + +use mesh::MeshPayload; +use vm_resource::kind::DiskHandleKind; +use vm_resource::kind::DiskLayerHandleKind; +use vm_resource::Resource; +use vm_resource::ResourceId; + +/// RAM disk layer handle. +/// +/// FUTURE: allocate shared memory here so that the disk can be migrated between +/// processes. +#[derive(MeshPayload)] +pub struct RamDiskLayerHandle { + /// The size of the layer. If `None`, the layer will be the same size as the + /// lower disk. + pub len: Option, +} + +impl ResourceId for RamDiskLayerHandle { + const ID: &'static str = "ram"; +} + +/// Handle for a disk layer backed by a full disk. +#[derive(MeshPayload)] +pub struct DiskLayerHandle(pub Resource); + +impl ResourceId for DiskLayerHandle { + const ID: &'static str = "disk"; +} diff --git a/vm/devices/storage/disk_backend_resources/src/lib.rs b/vm/devices/storage/disk_backend_resources/src/lib.rs index 1f7fa43b9b..1a4d2af728 100644 --- a/vm/devices/storage/disk_backend_resources/src/lib.rs +++ b/vm/devices/storage/disk_backend_resources/src/lib.rs @@ -6,39 +6,18 @@ #![warn(missing_docs)] #![forbid(unsafe_code)] +pub mod layer; + use mesh::MeshPayload; use vm_resource::kind::DiskHandleKind; +use vm_resource::kind::DiskLayerHandleKind; +use vm_resource::IntoResource; use vm_resource::Resource; use vm_resource::ResourceId; // Define config types here so that you don't have to pull in the individual // crates just to describe the configuration. -/// RAM disk handle. -/// -/// FUTURE: allocate shared memory here so that the disk can be migrated between -/// processes. -#[derive(MeshPayload)] -pub struct RamDiskHandle { - /// Size of the disk, in bytes. - pub len: u64, -} - -impl ResourceId for RamDiskHandle { - const ID: &'static str = "ram"; -} - -/// RAM diff disk handle. -#[derive(MeshPayload)] -pub struct RamDiffDiskHandle { - /// The lower disk resource. - pub lower: Resource, -} - -impl ResourceId for RamDiffDiskHandle { - const ID: &'static str = "ramdiff"; -} - /// File-backed disk handle. #[derive(MeshPayload)] pub struct FileDiskHandle(pub std::fs::File); @@ -121,3 +100,44 @@ pub enum BlobDiskFormat { /// A fixed VHD1, with a VHD footer specifying disk metadata. FixedVhd1, } + +/// Handle for a disk that is backed by one or more layers. +#[derive(MeshPayload)] +pub struct LayeredDiskHandle { + /// The layers that make up the disk. The first layer is the top-most layer. + pub layers: Vec, +} + +impl LayeredDiskHandle { + /// Create a new layered disk handle with a single layer. + pub fn single_layer(layer: impl IntoResource) -> Self { + Self { + layers: vec![layer.into_resource().into()], + } + } +} + +impl ResourceId for LayeredDiskHandle { + const ID: &'static str = "layered"; +} + +/// Description of a disk layer. +#[derive(MeshPayload)] +pub struct DiskLayerDescription { + /// The layer resource. + pub layer: Resource, + /// If true, reads that miss this layer are written back to this layer. + pub read_cache: bool, + /// If true, writes are written both to this layer and the next one. + pub write_through: bool, +} + +impl From> for DiskLayerDescription { + fn from(layer: Resource) -> Self { + Self { + layer, + read_cache: false, + write_through: false, + } + } +} diff --git a/vm/devices/storage/disk_layered/Cargo.toml b/vm/devices/storage/disk_layered/Cargo.toml new file mode 100644 index 0000000000..23f0a41fec --- /dev/null +++ b/vm/devices/storage/disk_layered/Cargo.toml @@ -0,0 +1,23 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +[package] +name = "disk_layered" +edition = "2021" +rust-version.workspace = true + +[dependencies] +disk_backend.workspace = true +disk_backend_resources.workspace = true +scsi_buffers.workspace = true + +guestmem.workspace = true +vm_resource.workspace = true +inspect = { workspace = true, features = ["std"] } + +async-trait.workspace = true +futures.workspace = true +thiserror.workspace = true + +[lints] +workspace = true diff --git a/vm/devices/storage/disk_layered/src/bitmap.rs b/vm/devices/storage/disk_layered/src/bitmap.rs new file mode 100644 index 0000000000..c1ccb0ab41 --- /dev/null +++ b/vm/devices/storage/disk_layered/src/bitmap.rs @@ -0,0 +1,163 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Sector bitmaps for tracking read sectors during layered read operations. +//! +//! FUTURE: use real bitmaps instead of bool vectors. + +use std::ops::Range; + +pub(crate) struct Bitmap { + bits: Vec, + sector: u64, +} + +impl Bitmap { + pub fn new(sector: u64, len: usize) -> Self { + Self { + bits: vec![false; len], + sector, + } + } + + pub fn unset_iter(&mut self) -> impl Iterator> { + let mut n = 0; + let sector = self.sector; + self.bits + .chunk_by_mut(|&a, &b| a == b) + .filter_map(move |bits| { + let start = n; + n += bits.len(); + if bits.first().map_or(false, |&x| !x) { + Some(SectorBitmapRange { + bits, + start_sector: sector + start as u64, + sector_within_bitmap: start, + set_count: 0, + }) + } else { + None + } + }) + } +} + +pub(crate) struct SectorBitmapRange<'a> { + bits: &'a mut [bool], + start_sector: u64, + sector_within_bitmap: usize, + set_count: usize, +} + +impl SectorBitmapRange<'_> { + pub fn view(&mut self, len: u64) -> SectorMarker<'_> { + SectorMarker { + bits: &mut self.bits[..len as usize], + set_count: &mut self.set_count, + sector_base: self.start_sector, + } + } + + pub fn start_sector_within_bitmap(&self) -> usize { + self.sector_within_bitmap + } + + pub fn start_sector(&self) -> u64 { + self.start_sector + } + + pub fn end_sector(&self) -> u64 { + self.start_sector + self.bits.len() as u64 + } + + pub fn len(&self) -> u64 { + self.bits.len() as u64 + } + + pub fn set_count(&self) -> usize { + self.set_count + } +} + +/// A type to mark sectors that have been read by a layer as part of a +/// [`LayerIo::read`](super::LayerIo::read) operation. +pub struct SectorMarker<'a> { + bits: &'a mut [bool], + sector_base: u64, + set_count: &'a mut usize, +} + +impl SectorMarker<'_> { + #[track_caller] + fn sector_to_index(&self, sector: u64) -> usize { + let i = sector + .checked_sub(self.sector_base) + .expect("invalid sector"); + assert!(i < self.bits.len() as u64, "invalid sector"); + i as usize + } + + /// Mark the specified sector number as having been read. + #[track_caller] + pub fn set(&mut self, sector: u64) { + let i = self.sector_to_index(sector); + *self.set_count += !self.bits[i] as usize; + self.bits[i] = true; + } + + /// Mark the range of sectors as having been read. + #[track_caller] + pub fn set_range(&mut self, range: Range) { + for sector in range { + self.set(sector); + } + } + + /// Mark all the sectors as having been read. + pub fn set_all(&mut self) { + self.set_range(self.sector_base..self.sector_base + self.bits.len() as u64); + } +} + +#[cfg(test)] +mod tests { + use super::Bitmap; + + #[test] + fn test_bitmap() { + let base = 0x492893; + let mut bitmap = Bitmap::new(base, 10); + { + let mut iter = bitmap.unset_iter(); + let mut range = iter.next().unwrap(); + assert!(iter.next().is_none()); + assert_eq!(range.start_sector(), base); + assert_eq!(range.end_sector(), base + 10); + assert_eq!(range.len(), 10); + assert_eq!(range.set_count(), 0); + range.view(6).set_all(); + assert_eq!(range.set_count(), 6); + } + { + let mut iter = bitmap.unset_iter(); + let mut range = iter.next().unwrap(); + assert!(iter.next().is_none()); + assert_eq!(range.start_sector(), base + 6); + assert_eq!(range.end_sector(), base + 10); + assert_eq!(range.start_sector_within_bitmap(), 6); + assert_eq!(range.len(), 4); + range.view(4).set(base + 7); + assert_eq!(range.set_count(), 1); + } + { + let mut iter = bitmap.unset_iter(); + let range = iter.next().unwrap(); + let range2 = iter.next().unwrap(); + assert!(iter.next().is_none()); + assert_eq!(range.start_sector(), base + 6); + assert_eq!(range.end_sector(), base + 7); + assert_eq!(range2.start_sector(), base + 8); + assert_eq!(range2.end_sector(), base + 10); + } + } +} diff --git a/vm/devices/storage/disk_layered/src/lib.rs b/vm/devices/storage/disk_layered/src/lib.rs new file mode 100644 index 0000000000..7fb34d4ed3 --- /dev/null +++ b/vm/devices/storage/disk_layered/src/lib.rs @@ -0,0 +1,747 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! A layered disk implementation, [`LayeredDisk`]. +//! +//! A layered disk is a disk composed of multiple layers. Each layer is a block +//! device made up of sectors, but with the added per-sector state of whether +//! the sector is present or not. When reading a sector, the layered disk will +//! read from the topmost layer that has the sector present. When writing, the +//! disk will write to the topmost layer. +//! +//! A layer can also have caching behavior. If a layer is configured to cache +//! reads, then sectors that are read from lower layers are written back to the +//! layer. If a layer is configured to write through, then writes are written to +//! the layer and the next layer. These can be useful to implement simple +//! persistent and non-persistent caches, primarily designed for lazily +//! populating local backing stores from remote sources. +//! +//! Missing from this implementation is write-back caching and cache eviction, +//! which would be needed for caches that are smaller than the disk. These +//! require potentially complicated cache management policies and are probably +//! best implemented in a separate disk implementation. + +mod bitmap; +pub mod resolve; +pub mod resolver; + +pub use bitmap::SectorMarker; + +use bitmap::Bitmap; +use disk_backend::Disk; +use disk_backend::DiskError; +use disk_backend::DiskIo; +use disk_backend::Unmap; +use guestmem::MemoryWrite; +use inspect::Inspect; +use scsi_buffers::RequestBuffers; +use std::future::Future; +use std::pin::Pin; +use thiserror::Error; + +/// A disk composed of multiple layers. +#[derive(Inspect)] +pub struct LayeredDisk { + #[inspect(iter_by_index)] + layers: Vec, + read_only: bool, + is_fua_respected: bool, + sector_shift: u32, + disk_id: Option<[u8; 16]>, + physical_sector_size: u32, + optimal_unmap_sectors: Option, +} + +#[derive(Inspect)] +struct Layer { + backing: Box, + visible_sector_count: u64, + read_cache: bool, + write_through: bool, +} + +/// A disk layer, for use in [`LayeredDisk`]. +pub struct DiskLayer { + backing: Box, + disk_id: Option<[u8; 16]>, + is_fua_respected: bool, + read_only: bool, + sector_size: u32, + physical_sector_size: u32, + unmap_behavior: UnmapBehavior, + optimal_unmap_sectors: u32, + can_read_cache: bool, +} + +/// An error returned when creating a [`DiskLayer`]. +#[derive(Debug, Error)] +pub enum InvalidLayer { + /// Read caching was requested but is not supported. + #[error("read caching was requested but is not supported")] + ReadCacheNotSupported, + /// Caching was requested but the layer is read only. + #[error("caching was requested but the layer is read only")] + ReadOnlyCache, + /// The sector size is invalid. + #[error("sector size {0} is invalid")] + InvalidSectorSize(u32), + /// The sector size of the layers do not match. + #[error("mismatched sector size {found}, expected {expected}")] + MismatchedSectorSize { + /// The expected sector size. + expected: u32, + /// The sector size found in the layer. + found: u32, + }, + /// A write-through layer is preceeded by a layer that is not write-through, or + /// the last layer is write-through. + #[error("nothing to write through")] + UselessWriteThrough, + /// Writing to the layered disk would require this layer to be writable. + #[error("read only layer in a writable disk")] + ReadOnly, +} + +impl DiskLayer { + /// Creates a new layer from a backing store. + pub fn new(backing: T) -> Self { + let can_read_cache = backing.write_no_overwrite().is_some(); + Self { + disk_id: backing.disk_id(), + is_fua_respected: backing.is_fua_respected(), + sector_size: backing.sector_size(), + physical_sector_size: backing.physical_sector_size(), + unmap_behavior: backing.unmap_behavior(), + optimal_unmap_sectors: backing.optimal_unmap_sectors(), + read_only: backing.is_read_only(), + can_read_cache, + backing: Box::new(backing), + } + } + + /// Creates a layer from a disk. The resulting layer is always fully + /// present. + pub fn from_disk(disk: Disk) -> Self { + Self::new(DiskAsLayer(disk)) + } +} + +/// An error returned when creating a [`LayeredDisk`]. +#[derive(Debug, Error)] +pub enum InvalidLayeredDisk { + /// No layers were configured. + #[error("no layers were configured")] + NoLayers, + /// An error occurred in a layer. + #[error("invalid layer {0}")] + Layer(usize, #[source] InvalidLayer), +} + +/// A configuration for a layer in a [`LayeredDisk`]. +pub struct LayerConfiguration { + /// The backing store for the layer. + pub layer: DiskLayer, + /// Writes are written both to this layer and the next one. + pub write_through: bool, + /// Reads that miss this layer are written back to this layer. + pub read_cache: bool, +} + +impl LayeredDisk { + /// Creates a new layered disk from a list of layers. + /// + /// The layers must be ordered from top to bottom, with the top layer being + /// the first in the list. + pub fn new( + read_only: bool, + mut layers: Vec, + ) -> Result { + if layers.is_empty() { + return Err(InvalidLayeredDisk::NoLayers); + } + + // Collect the common properties of the layers. + let mut last_write_through = true; + let mut is_fua_respected = true; + let mut optimal_unmap_sectors = Some(1); + let mut unmap_must_zero = false; + let mut disk_id = None; + for (i, config) in layers.iter().enumerate() { + let layer_error = |e| InvalidLayeredDisk::Layer(i, e); + if config.read_cache && !config.layer.can_read_cache { + return Err(layer_error(InvalidLayer::ReadCacheNotSupported)); + } + if (config.read_cache || config.write_through) && config.layer.read_only { + return Err(layer_error(InvalidLayer::ReadOnlyCache)); + } + if !config.layer.sector_size.is_power_of_two() { + return Err(layer_error(InvalidLayer::InvalidSectorSize( + config.layer.sector_size, + ))); + } + if config.layer.sector_size != layers[0].layer.sector_size { + // FUTURE: consider supporting different sector sizes, within reason. + return Err(layer_error(InvalidLayer::MismatchedSectorSize { + expected: layers[0].layer.sector_size, + found: config.layer.sector_size, + })); + } + + if config.write_through { + // If using write-through, then unmap only works if the unmap + // operation will produce the same result in all the layers that + // are being written to. Otherwise, the guest could see + // inconsistent disk contents when the write through layer is + // removed. + unmap_must_zero = true; + // The write-through layers must all come first. + if !last_write_through { + return Err(layer_error(InvalidLayer::UselessWriteThrough)); + } + } + if last_write_through { + if config.layer.read_only && !read_only { + return Err(layer_error(InvalidLayer::ReadOnly)); + } + is_fua_respected &= config.layer.is_fua_respected; + let unmap = match config.layer.unmap_behavior { + UnmapBehavior::Zeroes => true, + UnmapBehavior::Unspecified => !unmap_must_zero, + UnmapBehavior::Ignored => false, + }; + if !unmap { + optimal_unmap_sectors = None; + } else if let Some(n) = &mut optimal_unmap_sectors { + *n = (*n).max(config.layer.optimal_unmap_sectors); + } + } + last_write_through = config.write_through; + if disk_id.is_none() { + disk_id = config.layer.disk_id; + } + } + if last_write_through { + return Err(InvalidLayeredDisk::Layer( + layers.len() - 1, + InvalidLayer::UselessWriteThrough, + )); + } + + let sector_size = layers[0].layer.sector_size; + let physical_sector_size = layers[0].layer.physical_sector_size; + + let mut last_sector_count = None; + let sector_counts_rev = layers + .iter_mut() + .rev() + .map(|config| { + config.layer.backing.attach(last_sector_count); + *last_sector_count.insert(config.layer.backing.sector_count()) + }) + .collect::>(); + + let mut visible_sector_count = !0; + let layers = layers + .into_iter() + .zip(sector_counts_rev.into_iter().rev()) + .map(|(config, sector_count)| { + visible_sector_count = sector_count.min(visible_sector_count); + Layer { + backing: config.layer.backing, + visible_sector_count, + read_cache: config.read_cache, + write_through: config.write_through, + } + }) + .collect::>(); + + Ok(Self { + is_fua_respected, + read_only, + sector_shift: sector_size.trailing_zeros(), + disk_id, + physical_sector_size, + optimal_unmap_sectors, + layers, + }) + } +} + +trait DynLayer: Send + Sync + Inspect { + fn attach(&mut self, lower_sector_count: Option); + + fn sector_count(&self) -> u64; + + fn read<'a>( + &'a self, + buffers: &'a RequestBuffers<'_>, + sector: u64, + bitmap: SectorMarker<'a>, + ) -> Pin> + Send>>; + + fn write<'a>( + &'a self, + buffers: &'a RequestBuffers<'_>, + sector: u64, + fua: bool, + no_overwrite: bool, + ) -> Pin> + Send>>; + + fn sync_cache(&self) -> Pin> + Send>>; + + fn unmap( + &self, + sector: u64, + count: u64, + block_level_only: bool, + next_is_zero: bool, + ) -> Pin> + Send>>; + + fn wait_resize(&self, sector_count: u64) -> Pin + Send>>; +} + +impl DynLayer for T { + fn attach(&mut self, lower_sector_count: Option) { + self.on_attach(lower_sector_count); + } + + fn sector_count(&self) -> u64 { + self.sector_count() + } + + fn read<'a>( + &'a self, + buffers: &'a RequestBuffers<'_>, + sector: u64, + bitmap: SectorMarker<'a>, + ) -> Pin> + Send>> { + Box::pin(async move { self.read(buffers, sector, bitmap).await }) + } + + fn write<'a>( + &'a self, + buffers: &'a RequestBuffers<'_>, + sector: u64, + fua: bool, + no_overwrite: bool, + ) -> Pin> + Send>> { + Box::pin(async move { + if no_overwrite { + self.write_no_overwrite() + .unwrap() + .write_no_overwrite(buffers, sector) + .await + } else { + self.write(buffers, sector, fua).await + } + }) + } + + fn sync_cache(&self) -> Pin> + Send>> { + Box::pin(self.sync_cache()) + } + + fn unmap( + &self, + sector: u64, + count: u64, + block_level_only: bool, + next_is_zero: bool, + ) -> Pin> + Send>> { + Box::pin(self.unmap(sector, count, block_level_only, next_is_zero)) + } + + fn wait_resize(&self, sector_count: u64) -> Pin + Send>> { + Box::pin(self.wait_resize(sector_count)) + } +} + +/// Metadata and IO for disk layers. +pub trait LayerIo: 'static + Send + Sync + Inspect { + /// Returns the layer type name as a string. + /// + /// This is used for diagnostic purposes. + fn layer_type(&self) -> &str; + + /// Returns the current sector count. + /// + /// For some backing stores, this may change at runtime. If it does, then + /// the backing store must also implement [`DiskIo::wait_resize`]. + fn sector_count(&self) -> u64; + + /// Returns the logical sector size of the backing store. + /// + /// This must not change at runtime. + fn sector_size(&self) -> u32; + + /// Optionally returns a 16-byte identifier for the disk, if there is a + /// natural one for this backing store. + /// + /// This may be exposed to the guest as a unique disk identifier. + /// This must not change at runtime. + fn disk_id(&self) -> Option<[u8; 16]>; + + /// Returns the physical sector size of the backing store. + /// + /// This must not change at runtime. + fn physical_sector_size(&self) -> u32; + + /// Returns true if the `fua` parameter to [`LayerIo::write`] is + /// respected by the backing store by ensuring that the IO is immediately + /// committed to disk. + fn is_fua_respected(&self) -> bool; + + /// Returns true if the layer is read only. + fn is_read_only(&self) -> bool; + + /// Issues an asynchronous flush operation to the disk. + fn sync_cache(&self) -> impl Future> + Send; + + /// Reads sectors from the layer. + /// + /// `marker` is used to specify which sectors have been read. Those that are + /// not read will be passed to the next layer, or zeroed if there are no + /// more layers. + fn read( + &self, + buffers: &RequestBuffers<'_>, + sector: u64, + marker: SectorMarker<'_>, + ) -> impl Future> + Send; + + /// Writes sectors to the layer. + fn write( + &self, + buffers: &RequestBuffers<'_>, + sector: u64, + fua: bool, + ) -> impl Future> + Send; + + /// Unmap sectors from the layer. + /// + /// If `next_is_zero` is true, then the next layer's content's are known to + /// be zero. A layer can use this information to just discard the sectors + /// rather than putting them in the zero state (which make take more space). + fn unmap( + &self, + sector: u64, + count: u64, + block_level_only: bool, + next_is_zero: bool, + ) -> impl Future> + Send; + + /// Returns the behavior of the unmap operation. + fn unmap_behavior(&self) -> UnmapBehavior; + + /// Returns the optimal granularity for unmaps, in sectors. + fn optimal_unmap_sectors(&self) -> u32 { + 1 + } + + /// Optionally returns a write-no-overwrite implementation. + fn write_no_overwrite(&self) -> Option { + None:: + } + + /// Waits for the disk sector size to be different than the specified value. + fn wait_resize(&self, sector_count: u64) -> impl Future + Send { + let _ = sector_count; + std::future::pending() + } + + /// Called when the layer is attached to a disk. The sector count of the + /// next lower layer is provided for the layer to optionally size/resize + /// itself. + fn on_attach(&mut self, lower_sector_count: Option) { + let _ = lower_sector_count; + } +} + +/// The behavior of unmap. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum UnmapBehavior { + /// Unmaps are ignored. + Ignored, + /// Unmap may or may not change the content, and not necessarily to zero. + Unspecified, + /// Unmap will deterministically zero the content. + Zeroes, +} + +enum NoIdet {} + +/// Writes to the layer without overwriting existing data. +pub trait WriteNoOverwrite: Send + Sync { + /// Write to the layer without overwriting existing data. Existing sectors + /// must be preserved. + /// + /// This is used to support read caching, where the data being written may + /// be stale by the time it is written back to the layer. + fn write_no_overwrite( + &self, + buffers: &RequestBuffers<'_>, + sector: u64, + ) -> impl Future> + Send; +} + +impl WriteNoOverwrite for &T { + fn write_no_overwrite( + &self, + buffers: &RequestBuffers<'_>, + sector: u64, + ) -> impl Future> + Send { + (*self).write_no_overwrite(buffers, sector) + } +} + +impl WriteNoOverwrite for NoIdet { + async fn write_no_overwrite( + &self, + _buffers: &RequestBuffers<'_>, + _sector: u64, + ) -> Result<(), DiskError> { + unreachable!() + } +} + +impl DiskIo for LayeredDisk { + fn disk_type(&self) -> &str { + "layered" + } + + fn sector_count(&self) -> u64 { + self.layers[0].backing.sector_count() + } + + fn sector_size(&self) -> u32 { + 1 << self.sector_shift + } + + fn disk_id(&self) -> Option<[u8; 16]> { + self.disk_id + } + + fn physical_sector_size(&self) -> u32 { + self.physical_sector_size + } + + fn is_fua_respected(&self) -> bool { + self.is_fua_respected + } + + fn is_read_only(&self) -> bool { + self.read_only + } + + async fn read_vectored( + &self, + buffers: &RequestBuffers<'_>, + sector: u64, + ) -> Result<(), DiskError> { + let sector_count = buffers.len() >> self.sector_shift; + let mut bitmap = Bitmap::new(sector, sector_count); + let mut bits_set = 0; + // FUTURE: queue the reads to the layers in parallel. + 'done: for (i, layer) in self.layers.iter().enumerate() { + if bits_set == sector_count { + break; + } + for mut range in bitmap.unset_iter() { + let end = if i == 0 { + // The visible sector count of the first layer is unknown, + // since it could change at any time. + range.end_sector() + } else { + // Restrict the range to the visible sector count of the + // layer; sectors beyond this are logically zero. + let end = range.end_sector().min(layer.visible_sector_count); + if range.start_sector() == end { + break 'done; + } + end + }; + + let sectors = end - range.start_sector(); + + let buffers = buffers.subrange( + range.start_sector_within_bitmap() << self.sector_shift, + (sectors as usize) << self.sector_shift, + ); + + layer + .backing + .read(&buffers, range.start_sector(), range.view(sectors)) + .await?; + + bits_set += range.set_count(); + + // TODO: populate read cache(s). Note that we need to detect + // this will be necessary before performing the read and bounce + // buffer into a stable buffer in case the bufferes are in guest + // memory (which could be mutated by the guest or other IOs). + } + } + if bits_set != sector_count { + for range in bitmap.unset_iter() { + let len = (range.len() as usize) << self.sector_shift; + buffers + .subrange(range.start_sector_within_bitmap() << self.sector_shift, len) + .writer() + .zero(len)?; + } + } + Ok(()) + } + + async fn write_vectored( + &self, + buffers: &RequestBuffers<'_>, + sector: u64, + fua: bool, + ) -> Result<(), DiskError> { + for layer in &self.layers { + layer.backing.write(buffers, sector, fua, false).await?; + if !layer.write_through { + break; + } + } + Ok(()) + } + + async fn sync_cache(&self) -> Result<(), DiskError> { + for layer in &self.layers { + layer.backing.sync_cache().await?; + if !layer.write_through { + break; + } + } + Ok(()) + } + + fn unmap(&self) -> Option { + self.optimal_unmap_sectors.map(|_| self) + } + + fn wait_resize(&self, sector_count: u64) -> impl Future + Send { + self.layers[0].backing.wait_resize(sector_count) + } +} + +impl Unmap for LayeredDisk { + async fn unmap( + &self, + sector_offset: u64, + sector_count: u64, + block_level_only: bool, + ) -> Result<(), DiskError> { + for (layer, next_layer) in self + .layers + .iter() + .zip(self.layers.iter().map(Some).skip(1).chain([None])) + { + let next_is_zero = if let Some(next_layer) = next_layer { + // Sectors beyond the layer's visible sector count are logically + // zero. + // + // FUTURE: consider splitting the unmap operation into multiple + // operations across this boundary. + sector_offset >= next_layer.visible_sector_count + } else { + true + }; + + layer + .backing + .unmap(sector_offset, sector_count, block_level_only, next_is_zero) + .await?; + if !layer.write_through { + break; + } + } + Ok(()) + } + + fn optimal_unmap_sectors(&self) -> u32 { + self.optimal_unmap_sectors.unwrap() + } +} + +/// A disk layer wrapping a full disk. +#[derive(Inspect)] +#[inspect(transparent)] +struct DiskAsLayer(Disk); + +impl LayerIo for DiskAsLayer { + fn layer_type(&self) -> &str { + "disk" + } + + fn sector_count(&self) -> u64 { + self.0.sector_count() + } + + fn sector_size(&self) -> u32 { + self.0.sector_size() + } + + fn disk_id(&self) -> Option<[u8; 16]> { + self.0.disk_id() + } + + fn physical_sector_size(&self) -> u32 { + self.0.physical_sector_size() + } + + fn is_fua_respected(&self) -> bool { + self.0.is_fua_respected() + } + + fn is_read_only(&self) -> bool { + self.0.is_read_only() + } + + fn sync_cache(&self) -> impl Future> + Send { + self.0.sync_cache() + } + + async fn read( + &self, + buffers: &RequestBuffers<'_>, + sector: u64, + mut bitmap: SectorMarker<'_>, + ) -> Result<(), DiskError> { + // The disk is fully populated. + bitmap.set_all(); + self.0.read_vectored(buffers, sector).await + } + + async fn write( + &self, + buffers: &RequestBuffers<'_>, + sector: u64, + fua: bool, + ) -> Result<(), DiskError> { + self.0.write_vectored(buffers, sector, fua).await + } + + async fn unmap( + &self, + sector: u64, + count: u64, + block_level_only: bool, + _lower_is_zero: bool, + ) -> Result<(), DiskError> { + if let Some(unmap) = self.0.unmap() { + unmap.unmap(sector, count, block_level_only).await?; + } + Ok(()) + } + + fn unmap_behavior(&self) -> UnmapBehavior { + if self.0.unmap().is_some() { + UnmapBehavior::Unspecified + } else { + UnmapBehavior::Ignored + } + } +} diff --git a/vm/devices/storage/disk_layered/src/resolve.rs b/vm/devices/storage/disk_layered/src/resolve.rs new file mode 100644 index 0000000000..0b9307fb87 --- /dev/null +++ b/vm/devices/storage/disk_layered/src/resolve.rs @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Resolver-related definitions for disk layer resources. + +use super::DiskLayer; +use super::LayerIo; +use vm_resource::kind::DiskLayerHandleKind; +use vm_resource::CanResolveTo; + +impl CanResolveTo for DiskLayerHandleKind { + type Input<'a> = ResolveDiskLayerParameters<'a>; +} + +/// Parameters used when resolving a disk layer resource. +#[derive(Copy, Clone)] +pub struct ResolveDiskLayerParameters<'a> { + /// Whether the layer is being opened for read-only use. + pub read_only: bool, + #[doc(hidden)] + // Workaround for async_trait not working well with GAT input parameters + // with missing lifetimes. Remove once we stop using async_trait for async + // resolvers. + pub _async_trait_workaround: &'a (), +} + +/// A resolved [`DiskLayer`]. +pub struct ResolvedDiskLayer(pub DiskLayer); + +impl ResolvedDiskLayer { + /// Returns a resolved disk wrapping a backing object. + pub fn new(layer: T) -> Self { + Self(DiskLayer::new(layer)) + } +} diff --git a/vm/devices/storage/disk_layered/src/resolver.rs b/vm/devices/storage/disk_layered/src/resolver.rs new file mode 100644 index 0000000000..90cabfbaef --- /dev/null +++ b/vm/devices/storage/disk_layered/src/resolver.rs @@ -0,0 +1,123 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Resolvers for layered disks. + +use super::resolve::ResolveDiskLayerParameters; +use super::resolve::ResolvedDiskLayer; +use super::DiskLayer; +use super::InvalidLayeredDisk; +use super::LayerConfiguration; +use super::LayeredDisk; +use async_trait::async_trait; +use disk_backend::resolve::ResolveDiskParameters; +use disk_backend::resolve::ResolvedDisk; +use disk_backend::InvalidDisk; +use disk_backend_resources::layer::DiskLayerHandle; +use disk_backend_resources::LayeredDiskHandle; +use futures::future::TryJoinAll; +use thiserror::Error; +use vm_resource::declare_static_async_resolver; +use vm_resource::kind::DiskHandleKind; +use vm_resource::kind::DiskLayerHandleKind; +use vm_resource::AsyncResolveResource; +use vm_resource::ResolveError; +use vm_resource::ResourceResolver; + +declare_static_async_resolver! { + LayeredDiskResolver, + (DiskHandleKind, LayeredDiskHandle), + (DiskLayerHandleKind, DiskLayerHandle) +} + +/// Resolver for [`LayeredDiskHandle`] and [`DiskLayerHandle`]. +pub struct LayeredDiskResolver; + +/// Error type for [`LayeredDiskResolver`]. +#[derive(Debug, Error)] +pub enum ResolveLayeredDiskError { + /// Failed to resolve a layer resource. + #[error("failed to resolve layer {0}")] + ResolveLayer(usize, #[source] ResolveError), + /// Failed to create the layered disk. + #[error("failed to create layered disk")] + CreateDisk(#[source] InvalidLayeredDisk), + /// Failed to instantiate the disk. + #[error("invalid disk")] + InvalidDisk(#[source] InvalidDisk), +} + +#[async_trait] +impl AsyncResolveResource for LayeredDiskResolver { + type Output = ResolvedDisk; + type Error = ResolveLayeredDiskError; + + async fn resolve( + &self, + resolver: &ResourceResolver, + resource: LayeredDiskHandle, + input: ResolveDiskParameters<'_>, + ) -> Result { + let mut read_only = input.read_only; + let layers = resource + .layers + .into_iter() + .enumerate() + .map(|(i, desc)| { + let this_read_only = read_only && !desc.write_through && !desc.read_cache; + if !desc.write_through { + read_only = true; + } + async move { + let layer = resolver + .resolve( + desc.layer, + ResolveDiskLayerParameters { + read_only: this_read_only, + _async_trait_workaround: &(), + }, + ) + .await + .map_err(|err| ResolveLayeredDiskError::ResolveLayer(i, err))?; + + Ok(LayerConfiguration { + layer: layer.0, + write_through: desc.write_through, + read_cache: desc.read_cache, + }) + } + }) + .collect::>() + .await?; + + let disk = LayeredDisk::new(input.read_only, layers) + .map_err(ResolveLayeredDiskError::CreateDisk)?; + + ResolvedDisk::new(disk).map_err(ResolveLayeredDiskError::InvalidDisk) + } +} + +#[async_trait] +impl AsyncResolveResource for LayeredDiskResolver { + type Output = ResolvedDiskLayer; + type Error = ResolveError; + + async fn resolve( + &self, + resolver: &ResourceResolver, + resource: DiskLayerHandle, + input: ResolveDiskLayerParameters<'_>, + ) -> Result { + let disk = resolver + .resolve( + resource.0, + ResolveDiskParameters { + read_only: input.read_only, + _async_trait_workaround: &(), + }, + ) + .await?; + + Ok(ResolvedDiskLayer(DiskLayer::from_disk(disk.0))) + } +} diff --git a/vm/devices/storage/disk_nvme/nvme_driver/Cargo.toml b/vm/devices/storage/disk_nvme/nvme_driver/Cargo.toml index 43e46335c5..77dac5dc0d 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/Cargo.toml +++ b/vm/devices/storage/disk_nvme/nvme_driver/Cargo.toml @@ -31,7 +31,6 @@ safe_intrinsics.workspace = true [dev-dependencies] chipset_device.workspace = true -disk_backend.workspace = true disk_ramdisk.workspace = true nvme.workspace = true pci_core.workspace = true diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs index 4f281ce9f5..1a096c0ca9 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs @@ -3,8 +3,6 @@ use crate::NvmeDriver; use chipset_device::mmio::ExternallyManagedMmioIntercepts; -use disk_backend::Disk; -use disk_ramdisk::RamDisk; use guid::Guid; use nvme::NvmeControllerCaps; use nvme_spec::nvm::DsmRange; @@ -60,7 +58,7 @@ async fn test_nvme_driver(driver: DefaultDriver, allow_dma: bool) { }, ); nvme.client() - .add_namespace(1, Disk::new(RamDisk::new(1 << 20, false).unwrap()).unwrap()) + .add_namespace(1, disk_ramdisk::ram_disk(1 << 20, false).unwrap()) .await .unwrap(); diff --git a/vm/devices/storage/disk_ramdisk/Cargo.toml b/vm/devices/storage/disk_ramdisk/Cargo.toml index 23f49ef612..157f08e4d4 100644 --- a/vm/devices/storage/disk_ramdisk/Cargo.toml +++ b/vm/devices/storage/disk_ramdisk/Cargo.toml @@ -9,6 +9,7 @@ rust-version.workspace = true [dependencies] disk_backend.workspace = true disk_backend_resources.workspace = true +disk_layered.workspace = true guestmem.workspace = true scsi_buffers.workspace = true vm_resource.workspace = true @@ -17,12 +18,15 @@ inspect.workspace = true pal_async.workspace = true anyhow.workspace = true -async-trait.workspace = true parking_lot.workspace = true event-listener.workspace = true thiserror.workspace = true tracing.workspace = true zerocopy.workspace = true +[dev-dependencies] +inspect = { workspace = true, features = ["initiate"] } +test_with_tracing.workspace = true + [lints] workspace = true diff --git a/vm/devices/storage/disk_ramdisk/src/lib.rs b/vm/devices/storage/disk_ramdisk/src/lib.rs index 2b509bd92a..e292b690b8 100644 --- a/vm/devices/storage/disk_ramdisk/src/lib.rs +++ b/vm/devices/storage/disk_ramdisk/src/lib.rs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -//! RAM-backed disk backend implementation. +//! RAM-backed disk layer implementation. #![forbid(unsafe_code)] #![warn(missing_docs)] @@ -9,12 +9,15 @@ pub mod resolver; use anyhow::Context; -use disk_backend::zerodisk::InvalidGeometry; -use disk_backend::zerodisk::ZeroDisk; use disk_backend::Disk; use disk_backend::DiskError; -use disk_backend::DiskIo; -use disk_backend::Unmap; +use disk_layered::DiskLayer; +use disk_layered::LayerConfiguration; +use disk_layered::LayerIo; +use disk_layered::LayeredDisk; +use disk_layered::SectorMarker; +use disk_layered::UnmapBehavior; +use disk_layered::WriteNoOverwrite; use guestmem::MemoryRead; use guestmem::MemoryWrite; use inspect::Inspect; @@ -29,36 +32,44 @@ use std::sync::atomic::Ordering; use thiserror::Error; /// A disk backed entirely by RAM. -pub struct RamDisk { - data: RwLock>, +#[derive(Inspect)] +#[inspect(extra = "Self::inspect_extra")] +pub struct RamLayer { + #[inspect(flatten)] + state: RwLock, + #[inspect(skip)] sector_count: AtomicU64, - read_only: bool, - lower_is_zero: bool, - lower: Disk, + #[inspect(skip)] resize_event: event_listener::Event, } -impl Inspect for RamDisk { - fn inspect(&self, req: inspect::Request<'_>) { - req.respond() - .field_with("committed_size", || { - self.data.read().len() * size_of::() - }) - .field("lower", &self.lower) - .field_mut_with("sector_count", |new_count| { - if let Some(new_count) = new_count { - self.resize(new_count.parse().context("invalid sector count")?)?; - } - anyhow::Ok(self.sector_count()) - }); +#[derive(Inspect)] +struct RamState { + #[inspect(skip)] + data: BTreeMap, + #[inspect(skip)] // handled in inspect_extra() + sector_count: u64, + zero_after: u64, +} + +impl RamLayer { + fn inspect_extra(&self, resp: &mut inspect::Response<'_>) { + resp.field_with("committed_size", || { + self.state.read().data.len() * size_of::() + }) + .field_mut_with("sector_count", |new_count| { + if let Some(new_count) = new_count { + self.resize(new_count.parse().context("invalid sector count")?)?; + } + anyhow::Ok(self.sector_count()) + }); } } -impl Debug for RamDisk { +impl Debug for RamLayer { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("RamDisk") + f.debug_struct("RamLayer") .field("sector_count", &self.sector_count) - .field("read_only", &self.read_only) .finish() } } @@ -66,48 +77,50 @@ impl Debug for RamDisk { /// An error creating a RAM disk. #[derive(Error, Debug)] pub enum Error { - /// Invalid disk geometry. - #[error(transparent)] - InvalidGeometry(#[from] InvalidGeometry), - /// Unsupported sector size. - #[error("unsupported sector size {0}")] - UnsupportedSectorSize(u32), + /// The disk size is not a multiple of the sector size. + #[error("disk size {disk_size:#x} is not a multiple of the sector size {sector_size}")] + NotSectorMultiple { + /// The disk size. + disk_size: u64, + /// The sector size. + sector_size: u32, + }, + /// The disk has no sectors. + #[error("disk has no sectors")] + EmptyDisk, } struct Sector([u8; 512]); const SECTOR_SIZE: u32 = 512; -impl RamDisk { +impl RamLayer { /// Makes a new RAM disk of `size` bytes. - pub fn new(len: u64, read_only: bool) -> Result { - Self::new_inner( - Disk::new(ZeroDisk::new(SECTOR_SIZE, len)?).unwrap(), - read_only, - true, - ) - } - - /// Makes a new RAM diff disk on top of `lower`. /// - /// Writes will be collected in RAM, but reads will go to the lower disk for - /// sectors that have not yet been overwritten. - pub fn diff(lower: Disk, read_only: bool) -> Result { - Self::new_inner(lower, read_only, false) - } - - fn new_inner(lower: Disk, read_only: bool, lower_is_zero: bool) -> Result { - let sector_size = lower.sector_size(); - if sector_size != SECTOR_SIZE { - return Err(Error::UnsupportedSectorSize(sector_size)); - } - let sector_count = lower.sector_count(); + /// If `None` is specified, then the disk will inherit its size from the + /// lower layer when it is attached to a [`LayeredDisk`]. + pub fn new(size: Option) -> Result { + let sector_count = if let Some(size) = size { + if size == 0 { + return Err(Error::EmptyDisk); + } + if size % SECTOR_SIZE as u64 != 0 { + return Err(Error::NotSectorMultiple { + disk_size: size, + sector_size: SECTOR_SIZE, + }); + } + size / SECTOR_SIZE as u64 + } else { + 0 + }; Ok(Self { - data: RwLock::new(BTreeMap::new()), + state: RwLock::new(RamState { + data: BTreeMap::new(), + sector_count, + zero_after: sector_count, + }), sector_count: sector_count.into(), - read_only, - lower_is_zero, - lower, resize_event: Default::default(), }) } @@ -118,17 +131,51 @@ impl RamDisk { } // Remove any truncated data and update the sector count under the lock. let _removed = { - let mut data = self.data.write(); + let mut state = self.state.write(); + // Remember that any non-present sectors after this point need to be zeroed. + state.zero_after = new_sector_count.min(state.zero_after); + state.sector_count = new_sector_count; + // Cache the sector count in an atomic for the fast path. + // + // FUTURE: remove uses of .sector_count() in the IO path, + // eliminating the need for this. self.sector_count.store(new_sector_count, Ordering::Relaxed); - data.split_off(&new_sector_count) + state.data.split_off(&new_sector_count) }; self.resize_event.notify(usize::MAX); Ok(()) } + + fn write_maybe_overwrite( + &self, + buffers: &RequestBuffers<'_>, + sector: u64, + overwrite: bool, + ) -> Result<(), DiskError> { + let count = buffers.len() / SECTOR_SIZE as usize; + tracing::trace!(sector, count, "write"); + let mut state = self.state.write(); + for i in 0..count { + let cur = i + sector as usize; + let buf = buffers.subrange(i * SECTOR_SIZE as usize, SECTOR_SIZE as usize); + let mut reader = buf.reader(); + match state.data.entry(cur as u64) { + Entry::Vacant(entry) => { + entry.insert(Sector(reader.read_plain()?)); + } + Entry::Occupied(mut entry) => { + if overwrite { + reader.read(&mut entry.get_mut().0)?; + } + } + } + } + Ok(()) + } } -impl DiskIo for RamDisk { - fn disk_type(&self) -> &str { +impl LayerIo for RamLayer { + fn layer_type(&self) -> &str { "ram" } @@ -141,73 +188,71 @@ impl DiskIo for RamDisk { } fn is_read_only(&self) -> bool { - self.read_only + false } fn disk_id(&self) -> Option<[u8; 16]> { - self.lower.disk_id() + None } fn physical_sector_size(&self) -> u32 { - self.lower.physical_sector_size() + SECTOR_SIZE } fn is_fua_respected(&self) -> bool { true } - fn unmap(&self) -> Option { - self.lower_is_zero.then_some(self) - } - - async fn read_vectored( + async fn read( &self, buffers: &RequestBuffers<'_>, sector: u64, + mut marker: SectorMarker<'_>, ) -> Result<(), DiskError> { let count = (buffers.len() / SECTOR_SIZE as usize) as u64; + let end = sector + count; tracing::trace!(sector, count, "read"); - // Always read the full lower and then overlay the changes. - // Optimizations are possible, but some heuristics are necessary to - // avoid lots of small reads when the disk is "Swiss cheesed". - self.lower.read_vectored(buffers, sector).await?; - for (&s, buf) in self.data.read().range(sector..sector + count) { - let offset = (s - sector) as usize * SECTOR_SIZE as usize; - buffers - .subrange(offset, SECTOR_SIZE as usize) - .writer() - .write(&buf.0)?; + let state = self.state.read(); + let mut range = state.data.range(sector..end); + let mut last = sector; + while last < end { + let r = range.next(); + let next = r.map(|(&s, _)| s).unwrap_or(end); + if next > last && next > state.zero_after { + // Some non-present sectors need to be zeroed, since they are + // after the zero-after point (due to a resize). + let zero_start = last.max(state.zero_after); + let zero_count = next - zero_start; + let offset = (zero_start - sector) as usize * SECTOR_SIZE as usize; + let len = zero_count as usize * SECTOR_SIZE as usize; + buffers.subrange(offset, len).writer().zero(len)?; + marker.set_range(zero_start..next); + } + if let Some((&s, buf)) = r { + let offset = (s - sector) as usize * SECTOR_SIZE as usize; + buffers + .subrange(offset, SECTOR_SIZE as usize) + .writer() + .write(&buf.0)?; + + marker.set(s); + } + last = next; } Ok(()) } - async fn write_vectored( + async fn write( &self, buffers: &RequestBuffers<'_>, sector: u64, _fua: bool, ) -> Result<(), DiskError> { - assert!(!self.read_only); - - let count = buffers.len() / SECTOR_SIZE as usize; - tracing::trace!(sector, count, "write"); - - let mut data = self.data.write(); - for i in 0..count { - let cur = i + sector as usize; - let buf = buffers.subrange(i * SECTOR_SIZE as usize, SECTOR_SIZE as usize); - let mut reader = buf.reader(); - match data.entry(cur as u64) { - Entry::Vacant(entry) => { - entry.insert(Sector(reader.read_plain()?)); - } - Entry::Occupied(mut entry) => { - reader.read(&mut entry.get_mut().0)?; - } - } - } + self.write_maybe_overwrite(buffers, sector, true) + } - Ok(()) + fn write_no_overwrite(&self) -> Option { + Some(self) } async fn sync_cache(&self) -> Result<(), DiskError> { @@ -225,49 +270,106 @@ impl DiskIo for RamDisk { listen.await; } } -} -impl Unmap for RamDisk { async fn unmap( &self, sector_offset: u64, sector_count: u64, _block_level_only: bool, + next_is_zero: bool, ) -> Result<(), DiskError> { - assert!(self.lower_is_zero); tracing::trace!(sector_offset, sector_count, "unmap"); - let mut data = self.data.write(); + let mut state = self.state.write(); + if !next_is_zero { + // This would create a hole of zeroes, which we cannot represent in + // the tree. Ignore the unmap. + if sector_offset + sector_count < state.zero_after { + return Ok(()); + } + // The unmap is within or will extend the not-present-is-zero + // region, so allow it. + state.zero_after = state.zero_after.min(sector_offset); + } // Sadly, there appears to be no way to remove a range of entries // from a btree map. let mut next_sector = sector_offset; let end = sector_offset + sector_count; while next_sector < end { - let Some((§or, _)) = data.range_mut(next_sector..).next() else { + let Some((§or, _)) = state.data.range_mut(next_sector..).next() else { break; }; if sector >= end { break; } - data.remove(§or); + state.data.remove(§or); next_sector = sector + 1; } Ok(()) } + fn unmap_behavior(&self) -> UnmapBehavior { + // This layer zeroes if the lower layer is zero, but otherwise does + // nothing, so we must report unspecified. + UnmapBehavior::Unspecified + } + fn optimal_unmap_sectors(&self) -> u32 { 1 } + + fn on_attach(&mut self, lower_sector_count: Option) { + if let Some(lower_sector_count) = lower_sector_count { + let mut state = self.state.write(); + if state.sector_count == 0 { + state.sector_count = lower_sector_count; + state.zero_after = lower_sector_count; + *self.sector_count.get_mut() = lower_sector_count; + } + } + } +} + +impl WriteNoOverwrite for RamLayer { + async fn write_no_overwrite( + &self, + buffers: &RequestBuffers<'_>, + sector: u64, + ) -> Result<(), DiskError> { + self.write_maybe_overwrite(buffers, sector, false) + } +} + +/// Create a RAM disk of `size` bytes. +/// +/// This is a convenience function for creating a layered disk with a single RAM +/// layer. It is useful since non-layered RAM disks are used all over the place, +/// especially in tests. +pub fn ram_disk(size: u64, read_only: bool) -> anyhow::Result { + let disk = Disk::new(LayeredDisk::new( + read_only, + vec![LayerConfiguration { + layer: DiskLayer::new(RamLayer::new(Some(size))?), + write_through: false, + read_cache: false, + }], + )?)?; + Ok(disk) } #[cfg(test)] mod tests { - use super::RamDisk; + use super::RamLayer; use super::SECTOR_SIZE; - use crate::DiskIo; - use disk_backend::Disk; + use disk_backend::DiskIo; + use disk_backend::Unmap; + use disk_layered::DiskLayer; + use disk_layered::LayerConfiguration; + use disk_layered::LayerIo; + use disk_layered::LayeredDisk; use guestmem::GuestMemory; use pal_async::async_test; use scsi_buffers::OwnedRequestBuffers; + use test_with_tracing::test; use zerocopy::AsBytes; const SECTOR_U64: u64 = SECTOR_SIZE as u64; @@ -300,6 +402,28 @@ mod tests { .unwrap(); } + async fn write_layer( + mem: &GuestMemory, + disk: &mut impl LayerIo, + sector: u64, + count: usize, + high: u8, + ) { + let buf: Vec<_> = (sector * SECTOR_U64 / 4..(sector + count as u64) * SECTOR_U64 / 4) + .map(|x| x as u32 | ((high as u32) << 24)) + .collect(); + let len = SECTOR_USIZE * count; + mem.write_at(0, &buf.as_bytes()[..len]).unwrap(); + + disk.write( + &OwnedRequestBuffers::linear(0, len, false).buffer(mem), + sector, + false, + ) + .await + .unwrap(); + } + async fn write(mem: &GuestMemory, disk: &mut impl DiskIo, sector: u64, count: usize, high: u8) { let buf: Vec<_> = (sector * SECTOR_U64 / 4..(sector + count as u64) * SECTOR_U64 / 4) .map(|x| x as u32 | ((high as u32) << 24)) @@ -316,15 +440,28 @@ mod tests { .unwrap(); } + async fn prep_disk(size: usize) -> (GuestMemory, LayeredDisk) { + let guest_mem = GuestMemory::allocate(size); + let mut lower = RamLayer::new(Some(size as u64)).unwrap(); + write_layer(&guest_mem, &mut lower, 0, size / SECTOR_USIZE, 0).await; + let upper = RamLayer::new(Some(size as u64)).unwrap(); + let upper = LayeredDisk::new( + false, + Vec::from_iter([upper, lower].map(|layer| LayerConfiguration { + layer: DiskLayer::new(layer), + write_through: false, + read_cache: false, + })), + ) + .unwrap(); + (guest_mem, upper) + } + #[async_test] async fn diff() { const SIZE: usize = 1024 * 1024; - let guest_mem = GuestMemory::allocate(SIZE); - - let mut lower = RamDisk::new(SIZE as u64, false).unwrap(); - write(&guest_mem, &mut lower, 0, SIZE / SECTOR_USIZE, 0).await; - let mut upper = RamDisk::diff(Disk::new(lower).unwrap(), false).unwrap(); + let (guest_mem, mut upper) = prep_disk(SIZE).await; read(&guest_mem, &mut upper, 10, 2).await; check(&guest_mem, 10, 0, 2, 0); write(&guest_mem, &mut upper, 10, 2, 1).await; @@ -335,4 +472,57 @@ mod tests { check(&guest_mem, 11, 2, 1, 2); check(&guest_mem, 12, 3, 1, 0); } + + async fn resize(disk: &LayeredDisk, new_size: u64) { + let inspect::ValueKind::Unsigned(v) = + inspect::update("layers/0/backing/sector_count", &new_size.to_string(), disk) + .await + .unwrap() + .kind + else { + panic!("bad inspect value") + }; + assert_eq!(new_size, v); + } + + #[async_test] + async fn test_resize() { + const SIZE: usize = 1024 * 1024; + const SECTORS: usize = SIZE / SECTOR_USIZE; + + let (guest_mem, mut upper) = prep_disk(SIZE).await; + check(&guest_mem, 0, 0, SECTORS, 0); + resize(&upper, SECTORS as u64 / 2).await; + resize(&upper, SECTORS as u64).await; + read(&guest_mem, &mut upper, 0, SECTORS).await; + check(&guest_mem, 0, 0, SECTORS / 2, 0); + for s in SECTORS / 2..SECTORS { + let mut buf = [0u8; SECTOR_USIZE]; + guest_mem.read_at(s as u64 * SECTOR_U64, &mut buf).unwrap(); + assert_eq!(buf, [0u8; SECTOR_USIZE]); + } + } + + #[async_test] + async fn test_unmap() { + const SIZE: usize = 1024 * 1024; + const SECTORS: usize = SIZE / SECTOR_USIZE; + + let (guest_mem, mut upper) = prep_disk(SIZE).await; + Unmap::unmap(&upper, 0, SECTORS as u64 - 1, false) + .await + .unwrap(); + read(&guest_mem, &mut upper, 0, SECTORS).await; + check(&guest_mem, 0, 0, SECTORS, 0); + Unmap::unmap(&upper, SECTORS as u64 / 2, SECTORS as u64 / 2, false) + .await + .unwrap(); + read(&guest_mem, &mut upper, 0, SECTORS).await; + check(&guest_mem, 0, 0, SECTORS / 2, 0); + for s in SECTORS / 2..SECTORS { + let mut buf = [0u8; SECTOR_USIZE]; + guest_mem.read_at(s as u64 * SECTOR_U64, &mut buf).unwrap(); + assert_eq!(buf, [0u8; SECTOR_USIZE]); + } + } } diff --git a/vm/devices/storage/disk_ramdisk/src/resolver.rs b/vm/devices/storage/disk_ramdisk/src/resolver.rs index 64adbb20da..774f6bca2c 100644 --- a/vm/devices/storage/disk_ramdisk/src/resolver.rs +++ b/vm/devices/storage/disk_ramdisk/src/resolver.rs @@ -4,25 +4,18 @@ //! Resource resolver for RAM disks. use super::Error; -use super::RamDisk; -use async_trait::async_trait; -use disk_backend::resolve::ResolveDiskParameters; -use disk_backend::resolve::ResolvedDisk; -use disk_backend_resources::RamDiffDiskHandle; -use disk_backend_resources::RamDiskHandle; -use vm_resource::declare_static_async_resolver; -use vm_resource::kind::DiskHandleKind; -use vm_resource::AsyncResolveResource; -use vm_resource::ResourceResolver; - -/// Resolver for a [`RamDiskHandle`] and [`RamDiffDiskHandle`]. +use super::RamLayer; +use disk_backend_resources::layer::RamDiskLayerHandle; +use disk_layered::resolve::ResolveDiskLayerParameters; +use disk_layered::resolve::ResolvedDiskLayer; +use vm_resource::declare_static_resolver; +use vm_resource::kind::DiskLayerHandleKind; +use vm_resource::ResolveResource; + +/// Resolver for a [`RamDiskLayerHandle`]. pub struct RamDiskResolver; -declare_static_async_resolver!( - RamDiskResolver, - (DiskHandleKind, RamDiskHandle), - (DiskHandleKind, RamDiffDiskHandle) -); +declare_static_resolver!(RamDiskResolver, (DiskLayerHandleKind, RamDiskLayerHandle)); /// Error type for [`RamDiskResolver`]. #[derive(Debug, Error)] @@ -30,56 +23,19 @@ pub enum ResolveRamDiskError { /// Failed to create the RAM disk. #[error("failed to create ram disk")] Ram(#[source] Error), - /// Failed to resolve the inner disk. - #[error("failed to resolve inner disk")] - Resolve(#[source] vm_resource::ResolveError), - /// Invalid disk. - #[error("invalid disk")] - InvalidDisk(#[source] disk_backend::InvalidDisk), -} - -#[async_trait] -impl AsyncResolveResource for RamDiskResolver { - type Output = ResolvedDisk; - type Error = ResolveRamDiskError; - - async fn resolve( - &self, - _resolver: &ResourceResolver, - rsrc: RamDiskHandle, - input: ResolveDiskParameters<'_>, - ) -> Result { - ResolvedDisk::new( - RamDisk::new(rsrc.len, input.read_only).map_err(ResolveRamDiskError::Ram)?, - ) - .map_err(ResolveRamDiskError::InvalidDisk) - } } -#[async_trait] -impl AsyncResolveResource for RamDiskResolver { - type Output = ResolvedDisk; +impl ResolveResource for RamDiskResolver { + type Output = ResolvedDiskLayer; type Error = ResolveRamDiskError; - async fn resolve( + fn resolve( &self, - resolver: &ResourceResolver, - rsrc: RamDiffDiskHandle, - input: ResolveDiskParameters<'_>, + rsrc: RamDiskLayerHandle, + _input: ResolveDiskLayerParameters<'_>, ) -> Result { - let lower = resolver - .resolve( - rsrc.lower, - ResolveDiskParameters { - read_only: true, - _async_trait_workaround: &(), - }, - ) - .await - .map_err(ResolveRamDiskError::Resolve)?; - ResolvedDisk::new( - RamDisk::diff(lower.0, input.read_only).map_err(ResolveRamDiskError::Ram)?, - ) - .map_err(ResolveRamDiskError::InvalidDisk) + Ok(ResolvedDiskLayer::new( + RamLayer::new(rsrc.len).map_err(ResolveRamDiskError::Ram)?, + )) } } diff --git a/vm/devices/storage/disk_striped/src/lib.rs b/vm/devices/storage/disk_striped/src/lib.rs index aa84fffd27..9044aa54c6 100644 --- a/vm/devices/storage/disk_striped/src/lib.rs +++ b/vm/devices/storage/disk_striped/src/lib.rs @@ -591,7 +591,6 @@ where #[cfg(test)] mod tests { use super::*; - use disk_ramdisk::RamDisk; use guestmem::GuestMemory; use hvdef::HV_PAGE_SIZE; use pal_async::async_test; @@ -607,8 +606,9 @@ mod tests { for _i in 0..disk_count { let ramdisk = - RamDisk::new(disk_size_in_bytes.unwrap_or(1024 * 1024 * 64), false).unwrap(); - devices.push(Disk::new(ramdisk).unwrap()); + disk_ramdisk::ram_disk(disk_size_in_bytes.unwrap_or(1024 * 1024 * 64), false) + .unwrap(); + devices.push(ramdisk); } StripedDisk::new(devices, chunk_size_in_bytes, logic_sector_count).unwrap() @@ -944,8 +944,8 @@ mod tests { // Creating striping disk using incompatible files shall fail. let mut devices = Vec::new(); for i in 0..2 { - let ramdisk = RamDisk::new(1024 * 1024 + i * 64 * 1024, false).unwrap(); - devices.push(Disk::new(ramdisk).unwrap()); + let ramdisk = disk_ramdisk::ram_disk(1024 * 1024 + i * 64 * 1024, false).unwrap(); + devices.push(ramdisk); } match StripedDisk::new(devices, None, None) { @@ -961,8 +961,8 @@ mod tests { // Creating striping disk using invalid chunk size shall fail. let mut block_devices = Vec::new(); for _ in 0..2 { - let ramdisk = RamDisk::new(1024 * 1024, false).unwrap(); - block_devices.push(Disk::new(ramdisk).unwrap()); + let ramdisk = disk_ramdisk::ram_disk(1024 * 1024, false).unwrap(); + block_devices.push(ramdisk); } match StripedDisk::new(block_devices, Some(4 * 1024 + 1), None) { @@ -975,8 +975,8 @@ mod tests { // Creating striping disk using invalid logic sector count shall fail. let mut block_devices = Vec::new(); for _ in 0..2 { - let ramdisk = RamDisk::new(1024 * 1024, false).unwrap(); - block_devices.push(Disk::new(ramdisk).unwrap()); + let ramdisk = disk_ramdisk::ram_disk(1024 * 1024, false).unwrap(); + block_devices.push(ramdisk); } match StripedDisk::new( @@ -996,8 +996,8 @@ mod tests { // Create a simple striping disk. let mut block_devices = Vec::new(); for _ in 0..2 { - let ramdisk = RamDisk::new(1024 * 1024, false).unwrap(); - block_devices.push(Disk::new(ramdisk).unwrap()); + let ramdisk = disk_ramdisk::ram_disk(1024 * 1024, false).unwrap(); + block_devices.push(ramdisk); } let disk = match StripedDisk::new(block_devices, Some(8 * 1024), None) { diff --git a/vm/devices/storage/ide/fuzz/Cargo.toml b/vm/devices/storage/ide/fuzz/Cargo.toml index cee22ef748..8cf2b52165 100644 --- a/vm/devices/storage/ide/fuzz/Cargo.toml +++ b/vm/devices/storage/ide/fuzz/Cargo.toml @@ -13,7 +13,6 @@ ide.workspace = true chipset_arc_mutex_device.workspace = true chipset_device_fuzz.workspace = true chipset_device.workspace = true -disk_backend.workspace = true disk_ramdisk.workspace = true pci_core.workspace = true scsidisk.workspace = true diff --git a/vm/devices/storage/ide/fuzz/fuzz_ide.rs b/vm/devices/storage/ide/fuzz/fuzz_ide.rs index c4bfe20a8e..1512668ab0 100644 --- a/vm/devices/storage/ide/fuzz/fuzz_ide.rs +++ b/vm/devices/storage/ide/fuzz/fuzz_ide.rs @@ -7,8 +7,6 @@ use arbitrary::Arbitrary; use arbitrary::Unstructured; use chipset_arc_mutex_device::services::PortIoInterceptServices; use chipset_device::pci::PciConfigSpace; -use disk_backend::Disk; -use disk_ramdisk::RamDisk; use guestmem::GuestMemory; use ide::DriveMedia; use ide::IdeDevice; @@ -30,14 +28,14 @@ impl FuzzDriveMedia { fn reify(self) -> DriveMedia { // we don't care about drive contents for fuzzing match self { - FuzzDriveMedia::HardDrive => DriveMedia::hard_disk( - Disk::new(RamDisk::new(0x100000 * 4, false).unwrap()).unwrap(), - ), - FuzzDriveMedia::OpticalDrive => DriveMedia::optical_disk(Arc::new(AtapiScsiDisk::new( - Arc::new(SimpleScsiDvd::new(Some( - Disk::new(RamDisk::new(0x100000 * 4, false).unwrap()).unwrap(), - ))), - ))), + FuzzDriveMedia::HardDrive => { + DriveMedia::hard_disk(disk_ramdisk::ram_disk(0x100000 * 4, false).unwrap()) + } + FuzzDriveMedia::OpticalDrive => { + DriveMedia::optical_disk(Arc::new(AtapiScsiDisk::new(Arc::new( + SimpleScsiDvd::new(Some(disk_ramdisk::ram_disk(0x100000 * 4, false).unwrap())), + )))) + } } } } diff --git a/vm/devices/storage/storvsp/Cargo.toml b/vm/devices/storage/storvsp/Cargo.toml index 0527941539..dc4d35dde7 100644 --- a/vm/devices/storage/storvsp/Cargo.toml +++ b/vm/devices/storage/storvsp/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" rust-version.workspace = true [dependencies] -disk_backend.workspace = true +disk_ramdisk.workspace = true # For `ioperf` modules scsi_buffers.workspace = true scsi_core.workspace = true scsi_defs.workspace = true diff --git a/vm/devices/storage/storvsp/src/ioperf.rs b/vm/devices/storage/storvsp/src/ioperf.rs index 38a45d1610..09a0755294 100644 --- a/vm/devices/storage/storvsp/src/ioperf.rs +++ b/vm/devices/storage/storvsp/src/ioperf.rs @@ -10,8 +10,7 @@ use crate::test_helpers::TestWorker; use crate::ScsiController; use crate::ScsiControllerDisk; use crate::ScsiPath; -use disk_backend::zerodisk::ZeroDisk; -use disk_backend::Disk; +use disk_ramdisk::ram_disk; use guestmem::GuestMemory; use pal_async::driver::SpawnDriver; use scsi_defs::srb::SrbStatus; @@ -29,12 +28,10 @@ pub struct PerfTester { impl PerfTester { pub async fn new(driver: impl SpawnDriver + Clone) -> Self { let io_queue_depth = None; - let device = ZeroDisk::new(512, 64 * 1024).unwrap(); + let device = ram_disk(64 * 1024, true).unwrap(); let controller = ScsiController::new(); - let disk = ScsiControllerDisk::new(Arc::new(SimpleScsiDisk::new( - Disk::new(device).unwrap(), - Default::default(), - ))); + let disk = + ScsiControllerDisk::new(Arc::new(SimpleScsiDisk::new(device, Default::default()))); controller .attach( ScsiPath { diff --git a/vm/devices/storage/storvsp/src/lib.rs b/vm/devices/storage/storvsp/src/lib.rs index 72b768d055..7b3c1ed901 100644 --- a/vm/devices/storage/storvsp/src/lib.rs +++ b/vm/devices/storage/storvsp/src/lib.rs @@ -1727,8 +1727,6 @@ mod tests { use crate::test_helpers::parse_guest_completion; use crate::test_helpers::parse_guest_completion_check_flags_status; use crate::test_helpers::TestWorker; - use disk_backend::Disk; - use disk_ramdisk::RamDisk; use pal_async::async_test; use pal_async::DefaultDriver; use scsi::srb::SrbStatus; @@ -1744,7 +1742,7 @@ mod tests { let test_guest_mem = GuestMemory::allocate(16384); let controller = ScsiController::new(); let disk = scsidisk::SimpleScsiDisk::new( - Disk::new(RamDisk::new(10 * 1024 * 1024, false).unwrap()).unwrap(), + disk_ramdisk::ram_disk(10 * 1024 * 1024, false).unwrap(), Default::default(), ); controller @@ -2149,7 +2147,7 @@ mod tests { // Add some disks while the guest is running. for lun in 0..4 { let disk = scsidisk::SimpleScsiDisk::new( - Disk::new(RamDisk::new(10 * 1024 * 1024, false).unwrap()).unwrap(), + disk_ramdisk::ram_disk(10 * 1024 * 1024, false).unwrap(), Default::default(), ); controller @@ -2256,10 +2254,10 @@ mod tests { #[async_test] pub async fn test_async_disk(driver: DefaultDriver) { - let device = RamDisk::new(64 * 1024, false).unwrap(); + let device = disk_ramdisk::ram_disk(64 * 1024, false).unwrap(); let controller = ScsiController::new(); let disk = ScsiControllerDisk::new(Arc::new(scsidisk::SimpleScsiDisk::new( - Disk::new(device).unwrap(), + device, Default::default(), ))); controller diff --git a/vm/vmcore/vm_resource/src/kind.rs b/vm/vmcore/vm_resource/src/kind.rs index 82b32986ba..e5b911b91b 100644 --- a/vm/vmcore/vm_resource/src/kind.rs +++ b/vm/vmcore/vm_resource/src/kind.rs @@ -66,6 +66,14 @@ impl ResourceKind for DiskHandleKind { const NAME: &'static str = "disk_handle"; } +/// A disk layer resource kind, where the underlying resources have already been +/// opened in a privileged context. +pub enum DiskLayerHandleKind {} + +impl ResourceKind for DiskLayerHandleKind { + const NAME: &'static str = "disk_layer_handle"; +} + /// A resource kind for SCSI devices. pub enum ScsiDeviceHandleKind {} diff --git a/vm/vmgs/vmgs/src/vmgs_impl.rs b/vm/vmgs/vmgs/src/vmgs_impl.rs index c6acf184d9..dd2f5abdeb 100644 --- a/vm/vmgs/vmgs/src/vmgs_impl.rs +++ b/vm/vmgs/vmgs/src/vmgs_impl.rs @@ -1864,7 +1864,6 @@ pub mod save_restore { #[cfg(test)] mod tests { use super::*; - use disk_ramdisk::RamDisk; use pal_async::async_test; #[cfg(with_encryption)] use vmgs_format::VMGS_ENCRYPTION_KEY_SIZE; @@ -1872,7 +1871,7 @@ mod tests { const ONE_MEGA_BYTE: u64 = 1024 * 1024; fn new_test_file() -> Disk { - Disk::new(RamDisk::new(4 * ONE_MEGA_BYTE, false).unwrap()).unwrap() + disk_ramdisk::ram_disk(4 * ONE_MEGA_BYTE, false).unwrap() } #[async_test] diff --git a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs index 69ca902d8c..81c4d561ec 100644 --- a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs +++ b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs @@ -4,7 +4,8 @@ //! Integration tests for x86_64 Linux direct boot with OpenHCL. use anyhow::Context; -use disk_backend_resources::RamDiskHandle; +use disk_backend_resources::layer::RamDiskLayerHandle; +use disk_backend_resources::LayeredDiskHandle; use gdma_resources::GdmaDeviceHandle; use gdma_resources::VportDefinition; use guid::Guid; @@ -130,9 +131,9 @@ async fn storvsp(config: PetriVmConfig) -> Result<(), anyhow::Error> { lun: vtl2_lun as u8, }, device: SimpleScsiDiskHandle { - disk: RamDiskHandle { - len: scsi_disk_sectors * sector_size, - } + disk: LayeredDiskHandle::single_layer(RamDiskLayerHandle { + len: Some(scsi_disk_sectors * sector_size), + }) .into_resource(), read_only: false, parameters: Default::default(), @@ -153,9 +154,9 @@ async fn storvsp(config: PetriVmConfig) -> Result<(), anyhow::Error> { msix_count: 64, namespaces: vec![NamespaceDefinition { nsid: vtl2_nsid, - disk: (RamDiskHandle { - len: nvme_disk_sectors * sector_size, - } + disk: (LayeredDiskHandle::single_layer(RamDiskLayerHandle { + len: Some(nvme_disk_sectors * sector_size), + }) .into_resource()), read_only: false, }], @@ -337,7 +338,12 @@ async fn openhcl_linux_storvsp_dvd(config: PetriVmConfig) -> Result<(), anyhow:: hot_plug_send .call_failable( SimpleScsiDvdRequest::ChangeMedia, - Some(RamDiskHandle { len: len as u64 }.into_resource()), + Some( + LayeredDiskHandle::single_layer(RamDiskLayerHandle { + len: Some(len as u64), + }) + .into_resource(), + ), ) .await .context("failed to change media")?; @@ -408,9 +414,9 @@ async fn openhcl_linux_stripe_storvsp(config: PetriVmConfig) -> Result<(), anyho msix_count: 64, namespaces: vec![NamespaceDefinition { nsid: vtl2_nsid, - disk: (RamDiskHandle { - len: nvme_disk_sectors * sector_size, - } + disk: (LayeredDiskHandle::single_layer(RamDiskLayerHandle { + len: Some(nvme_disk_sectors * sector_size), + }) .into_resource()), read_only: false, }], @@ -426,9 +432,9 @@ async fn openhcl_linux_stripe_storvsp(config: PetriVmConfig) -> Result<(), anyho msix_count: 64, namespaces: vec![NamespaceDefinition { nsid: vtl2_nsid, - disk: (RamDiskHandle { - len: nvme_disk_sectors * sector_size, - } + disk: (LayeredDiskHandle::single_layer(RamDiskLayerHandle { + len: Some(nvme_disk_sectors * sector_size), + }) .into_resource()), read_only: false, }],