From 94902d14b281e0086858a0db27f51ea399cb1cca Mon Sep 17 00:00:00 2001 From: Kevin Reid Date: Sun, 12 Jan 2025 20:38:46 -0800 Subject: [PATCH] Expose `StagingBelt` allocation directly. This allows using `StagingBelt` for copying to textures or any other operation where copying to an existing buffer is not the desired outcome. One unfortunate feature of the API is that `allocate()` returns the entire buffer and an offset, so applications could accidentally touch parts of the belt buffer outside the intended allocation. It might make more sense to return `wgpu::BufferSlice`, but that struct cannot be used in operations like `copy_buffer_to_texture` and does not have getters, so it is not currently suitable for that purpose. I also moved `Exclusive` into a module so that its unsafe-to-access field is properly private, and made various improvements to the `StagingBelt` documentation, such as acknowledging that `write_buffer_with()` exists. --- wgpu/src/util/belt.rs | 156 ++++++++++++++++++++++++++++++------------ 1 file changed, 114 insertions(+), 42 deletions(-) diff --git a/wgpu/src/util/belt.rs b/wgpu/src/util/belt.rs index d6ef7a0c46..3435e3ed3c 100644 --- a/wgpu/src/util/belt.rs +++ b/wgpu/src/util/belt.rs @@ -5,49 +5,27 @@ use crate::{ use std::fmt; use std::sync::{mpsc, Arc}; -struct Chunk { - buffer: Arc, - size: BufferAddress, - offset: BufferAddress, -} - -/// `Sync` wrapper that works by providing only exclusive access. -/// -/// See https://doc.rust-lang.org/nightly/std/sync/struct.Exclusive.html -struct Exclusive(T); - -unsafe impl Sync for Exclusive {} - -impl Exclusive { - fn new(value: T) -> Self { - Self(value) - } - - fn get_mut(&mut self) -> &mut T { - &mut self.0 - } -} - /// Efficiently performs many buffer writes by sharing and reusing temporary buffers. /// /// Internally it uses a ring-buffer of staging buffers that are sub-allocated. -/// It has an advantage over [`Queue::write_buffer()`] in a way that it returns a mutable slice, -/// which you can fill to avoid an extra data copy. +/// Its advantage over [`Queue::write_buffer_with()`] is that the individual allocations +/// are cheaper; `StagingBelt` is most useful when you are writing very many small pieces +/// of data. It can be understood as a sort of arena allocator. /// /// Using a staging belt is slightly complicated, and generally goes as follows: -/// 1. Write to buffers that need writing to using [`StagingBelt::write_buffer()`]. +/// 1. Use [`StagingBelt::write_buffer()`] or [`StagingBelt::allocate()`] to allocate +/// buffer slices, then write your data to them. /// 2. Call [`StagingBelt::finish()`]. /// 3. Submit all command encoders that were used in step 1. /// 4. Call [`StagingBelt::recall()`]. /// -/// [`Queue::write_buffer()`]: crate::Queue::write_buffer +/// [`Queue::write_buffer_with()`]: crate::Queue::write_buffer_with pub struct StagingBelt { chunk_size: BufferAddress, /// Chunks into which we are accumulating data to be transferred. active_chunks: Vec, /// Chunks that have scheduled transfers already; they are unmapped and some - /// command encoder has one or more `copy_buffer_to_buffer` commands with them - /// as source. + /// command encoder has one or more commands with them as source. closed_chunks: Vec, /// Chunks that are back from the GPU and ready to be mapped for write and put /// into `active_chunks`. @@ -70,7 +48,7 @@ impl StagingBelt { /// (per [`StagingBelt::finish()`]); and /// * bigger is better, within these bounds. pub fn new(chunk_size: BufferAddress) -> Self { - let (sender, receiver) = std::sync::mpsc::channel(); + let (sender, receiver) = mpsc::channel(); StagingBelt { chunk_size, active_chunks: Vec::new(), @@ -81,7 +59,7 @@ impl StagingBelt { } } - /// Allocate the staging belt slice of `size` to be uploaded into the `target` buffer + /// Allocate a staging belt slice of `size` to be copied into the `target` buffer /// at the specified offset. /// /// The upload will be placed into the provided command encoder. This encoder @@ -99,10 +77,57 @@ impl StagingBelt { size: BufferSize, device: &Device, ) -> BufferViewMut<'_> { + let (mapped, belt_buffer, offset_in_belt_buffer) = self.allocate( + size, + const { BufferSize::new(crate::COPY_BUFFER_ALIGNMENT).unwrap() }, + device, + ); + encoder.copy_buffer_to_buffer( + belt_buffer, + offset_in_belt_buffer, + target, + offset, + size.get(), + ); + mapped + } + + /// Allocate a staging belt slice with the given `size` and `alignment` and return it. + /// + /// This allows you to do whatever you want with the slice after, such as + /// copying it to a texture or executing a compute shader that reads it, whereas + /// [`StagingBelt::write_buffer()`] can only write to other buffers. + /// + /// If the `size` is greater than the size of any free internal buffer, a new buffer + /// will be allocated for it. Therefore, the `chunk_size` passed to [`StagingBelt::new()`] + /// should ideally be larger than every such size. + /// + /// Three values are returned: + /// + /// * The mapped buffer view which you should write into from the CPU side (Rust code). + /// * The buffer containing the slice, for you to use in GPU commands. + /// All commands involving this slice must be submitted after + /// [`StagingBelt::finish()`] is called and before [`StagingBelt::recall()`] is called. + /// * The offset within the buffer at which the slice starts. + /// This offset should be used for all GPU commands, and should not be used with + /// the mapped buffer view. + pub fn allocate( + &mut self, + size: BufferSize, + alignment: BufferSize, + device: &Device, + ) -> (BufferViewMut<'_>, &Buffer, BufferAddress) { + assert!( + alignment.get().is_power_of_two(), + "alignment must be a power of two, not {alignment}" + ); + // At minimum, we must have alignment sufficient to map the buffer. + let alignment = alignment.get().max(crate::MAP_ALIGNMENT); + let mut chunk = if let Some(index) = self .active_chunks .iter() - .position(|chunk| chunk.offset + size.get() <= chunk.size) + .position(|chunk| chunk.can_allocate(size, alignment)) { self.active_chunks.swap_remove(index) } else { @@ -111,7 +136,7 @@ impl StagingBelt { if let Some(index) = self .free_chunks .iter() - .position(|chunk| size.get() <= chunk.size) + .position(|chunk| chunk.can_allocate(size, alignment)) { self.free_chunks.swap_remove(index) } else { @@ -129,17 +154,19 @@ impl StagingBelt { } }; - encoder.copy_buffer_to_buffer(&chunk.buffer, chunk.offset, target, offset, size.get()); - let old_offset = chunk.offset; - chunk.offset = align_to(chunk.offset + size.get(), crate::MAP_ALIGNMENT); + let allocation_offset = chunk.allocate(size, alignment); self.active_chunks.push(chunk); - self.active_chunks - .last() - .unwrap() - .buffer - .slice(old_offset..old_offset + size.get()) - .get_mapped_range_mut() + let chunk = self.active_chunks.last().unwrap(); + + ( + chunk + .buffer + .slice(allocation_offset..allocation_offset + size.get()) + .get_mapped_range_mut(), + &chunk.buffer, + allocation_offset, + ) } /// Prepare currently mapped buffers for use in a submission. @@ -197,3 +224,48 @@ impl fmt::Debug for StagingBelt { .finish_non_exhaustive() } } + +struct Chunk { + buffer: Arc, + size: BufferAddress, + offset: BufferAddress, +} + +impl Chunk { + fn can_allocate(&self, size: BufferSize, alignment: BufferAddress) -> bool { + let alloc_start = align_to(self.offset, alignment); + let alloc_end = alloc_start + size.get(); + + alloc_end <= self.size + } + + fn allocate(&mut self, size: BufferSize, alignment: BufferAddress) -> BufferAddress { + let alloc_start = align_to(self.offset, alignment); + let alloc_end = alloc_start + size.get(); + + assert!(alloc_end <= self.size); + self.offset = alloc_end; + alloc_start + } +} + +use exclusive::Exclusive; +mod exclusive { + /// `Sync` wrapper that works by providing only exclusive access. + /// + /// See + pub(super) struct Exclusive(T); + + /// Safety: `&Exclusive` has no operations. + unsafe impl Sync for Exclusive {} + + impl Exclusive { + pub fn new(value: T) -> Self { + Self(value) + } + + pub fn get_mut(&mut self) -> &mut T { + &mut self.0 + } + } +}