diff --git a/betree/Cargo.toml b/betree/Cargo.toml index d9f116a30..ada5cff8b 100644 --- a/betree/Cargo.toml +++ b/betree/Cargo.toml @@ -68,6 +68,7 @@ quickcheck = "1" quickcheck_macros = "1" clap = "2.33" criterion = "0.3" +zipf = "7.0.1" [features] default = ["init_env_logger", "figment_config"] diff --git a/betree/benches/allocator.rs b/betree/benches/allocator.rs index c7dd2c032..47dab75fe 100644 --- a/betree/benches/allocator.rs +++ b/betree/benches/allocator.rs @@ -1,16 +1,283 @@ -use betree_storage_stack::allocator::{SegmentAllocator, SEGMENT_SIZE_BYTES}; +use std::time::{Duration, Instant}; + +use betree_storage_stack::allocator::{ + self, Allocator, BestFitFSM, BestFitList, BestFitScan, BestFitTree, FirstFitFSM, FirstFitList, + FirstFitScan, HybridAllocator, NextFitList, NextFitScan, SegmentAllocator, WorstFitFSM, + WorstFitList, WorstFitScan, SEGMENT_SIZE_BYTES, SEGMENT_SIZE_LOG_2, +}; use criterion::{black_box, criterion_group, criterion_main, Bencher, Criterion}; +use rand::{ + distributions::{Distribution, Uniform}, + rngs::StdRng, + SeedableRng, +}; +use zipf::ZipfDistribution; + +#[derive(Clone)] +enum SizeDistribution { + Uniform(Uniform), + Zipfian(ZipfDistribution), +} + +// Define a type alias for our benchmark function to make it less verbose +type BenchmarkFn = Box; + +// Macro to generate allocator benchmark entries +macro_rules! allocator_benchmark { + ($name:expr, $allocator_type:ty, $bench_function:ident) => { + ( + $name, + Box::new(|b, dist, allocations, deallocations, min_size, max_size| { + $bench_function::<$allocator_type>( + b, + dist, + allocations, + deallocations, + min_size, + max_size, + ) + }), + ) + }; +} + +// Macro to generate allocator benchmark entries with name derived from type +macro_rules! generate_allocator_benchmarks { + ($bench_function:ident, $($allocator_type:ty),*) => { + vec![ + $( + allocator_benchmark!(stringify!($allocator_type), $allocator_type, $bench_function), + )* + ] + }; +} + +// In Haura, allocators are not continuously active in memory. Instead, they are loaded from disk +// when needed. This benchmark simulates this behavior by creating a new allocator instance for each +// iteration. Also deallocations are buffered and applied during sync operations, not immediately to +// the allocator. Here, we simulate the sync operation by directly modifying the underlying bitmap +// data after the allocator has performed allocations, mimicking the delayed deallocation process. +fn bench_alloc( + b: &mut Bencher, + dist: SizeDistribution, + allocations: u64, + deallocations: u64, + min_size: usize, + max_size: usize, +) { + let data = [0; SEGMENT_SIZE_BYTES]; + let mut allocated = Vec::new(); + allocated.reserve(allocations as usize); + + let mut rng = StdRng::seed_from_u64(42); + let mut sample_size = || -> u32 { + match &dist { + SizeDistribution::Uniform(u) => return black_box(u.sample(&mut rng)) as u32, + SizeDistribution::Zipfian(z) => { + let rank = black_box(z.sample(&mut rng)) as usize; + return (min_size + (rank - 1)) as u32; + } + } + }; + + b.iter_custom(|iters| { + let mut total_allocation_time = Duration::new(0, 0); + + for _ in 0..iters { + allocated.clear(); + let mut allocator = A::new(data); + + let start = Instant::now(); + for _ in 0..allocations { + let size = sample_size(); + if let Some(offset) = black_box(allocator.allocate(size)) { + allocated.push((offset, size)); + } + } + total_allocation_time += start.elapsed(); + + // Simulates the deferred deallocations + let bitmap = allocator.data(); + for _ in 0..deallocations { + if allocated.is_empty() { + break; + } + let idx = rand::random::() % allocated.len(); + let (offset, size) = allocated.swap_remove(idx); + + let start = offset as usize; + let end = (offset + size) as usize; + let range = &mut bitmap[start..end]; + range.fill(false); + } + // At the end of the iteration, the allocator goes out of scope, simulating it being + // unloaded from memory. In the next iteration, a new allocator will be created and loaded + // with the modified bitmap data. + } + Duration::from_nanos((total_allocation_time.as_nanos() / allocations as u128) as u64) + }); +} + +fn bench_new( + b: &mut Bencher, + dist: SizeDistribution, + allocations: u64, + deallocations: u64, + min_size: usize, + max_size: usize, +) { + let data = [0; SEGMENT_SIZE_BYTES]; + let mut allocated = Vec::new(); + allocated.reserve(allocations as usize); + + let mut rng = StdRng::seed_from_u64(42); + let mut sample_size = || -> u32 { + match &dist { + SizeDistribution::Uniform(u) => return black_box(u.sample(&mut rng)) as u32, + SizeDistribution::Zipfian(z) => { + let rank = black_box(z.sample(&mut rng)) as usize; + return (min_size + (rank - 1)) as u32; // Linear mapping for Zipfian + } + } + }; + + b.iter_custom(|iters| { + let mut total_allocation_time = Duration::new(0, 0); + + for _ in 0..iters { + allocated.clear(); + let start = Instant::now(); + let mut allocator = A::new(data); + total_allocation_time += start.elapsed(); + + for _ in 0..allocations { + let size = sample_size(); + if let Some(offset) = black_box(allocator.allocate(size)) { + allocated.push((offset, size)); + } + } + + // Simulates the deferred deallocations + let bitmap = allocator.data(); + for _ in 0..deallocations { + if allocated.is_empty() { + break; + } + let idx = rand::random::() % allocated.len(); + let (offset, size) = allocated.swap_remove(idx); -fn allocate(b: &mut Bencher) { - let mut a = SegmentAllocator::new([0; SEGMENT_SIZE_BYTES]); - b.iter(|| { - black_box(a.allocate(10)); + let start = offset as usize; + let end = (offset + size) as usize; + let range = &mut bitmap[start..end]; + range.fill(false); + } + // At the end of the iteration, the allocator goes out of scope, simulating it being + // unloaded from memory. In the next iteration, a new allocator will be created and loaded + // with the modified bitmap data. + } + total_allocation_time }); } pub fn criterion_benchmark(c: &mut Criterion) { - c.bench_function("allocate", allocate); + let min_size = 128; + let max_size = 1024; + let zipfian_exponent = 0.99; + + let distributions = [ + ( + "uniform", + SizeDistribution::Uniform(Uniform::new(min_size, max_size + 1)), + ), + ( + "zipfian", + SizeDistribution::Zipfian( + ZipfDistribution::new(max_size - min_size + 1, zipfian_exponent).expect(""), + ), + ), + ]; + + let allocations = 2_u64.pow(SEGMENT_SIZE_LOG_2 as u32 - 10); + let deallocations = allocations / 2; + + // Define the allocators to benchmark for allocation + #[rustfmt::skip] + let allocator_benchmarks_alloc: Vec<(&'static str, BenchmarkFn)> = generate_allocator_benchmarks!( + bench_alloc, + FirstFitScan, + FirstFitList, + FirstFitFSM, + NextFitScan, + NextFitList, + BestFitScan, + BestFitList, + BestFitFSM, + BestFitTree, + WorstFitScan, + WorstFitList, + WorstFitFSM, + SegmentAllocator + ); + + for (dist_name, dist) in distributions.clone() { + let group_name = format!("allocator_alloc_{}_{}", dist_name, SEGMENT_SIZE_LOG_2); + let mut group = c.benchmark_group(group_name); + for (bench_name, bench_func) in &allocator_benchmarks_alloc { + group.bench_function(*bench_name, |b| { + bench_func( + b, + dist.clone(), + allocations, + deallocations, + min_size, + max_size, + ) + }); + } + group.finish(); + } + + // Define the allocators to benchmark for 'new' function time + let allocator_benchmarks_new: Vec<(&'static str, BenchmarkFn)> = generate_allocator_benchmarks!( + bench_new, + FirstFitScan, + FirstFitList, + FirstFitFSM, + NextFitScan, + NextFitList, + BestFitScan, + BestFitList, + BestFitFSM, + BestFitTree, + WorstFitScan, + WorstFitList, + WorstFitFSM, + SegmentAllocator + ); + + for (dist_name, dist) in distributions.clone() { + let group_name = format!("allocator_new_{}_{}", dist_name, SEGMENT_SIZE_LOG_2); + let mut group = c.benchmark_group(group_name); + for (bench_name, bench_func) in &allocator_benchmarks_new { + group.bench_function(*bench_name, |b| { + bench_func( + b, + dist.clone(), + allocations, + deallocations, + min_size, + max_size, + ) + }); + } + group.finish(); + } } -criterion_group!(benches, criterion_benchmark); +criterion_group! { + name = benches; + // This can be any expression that returns a `Criterion` object. + config = Criterion::default().sample_size(500).measurement_time(Duration::new(600, 0)).warm_up_time(Duration::new(10, 0)); + targets = criterion_benchmark +} criterion_main!(benches); diff --git a/betree/src/allocator.rs b/betree/src/allocator.rs deleted file mode 100644 index 04f7c73f4..000000000 --- a/betree/src/allocator.rs +++ /dev/null @@ -1,204 +0,0 @@ -//! This module provides `SegmentAllocator` and `SegmentId` for bitmap -//! allocation of 1GiB segments. - -use crate::{cow_bytes::CowBytes, storage_pool::DiskOffset, vdev::Block, Error}; -use bitvec::prelude::*; -use byteorder::{BigEndian, ByteOrder}; -use std::io::Write; - -/// 256KiB, so that `vdev::BLOCK_SIZE * SEGMENT_SIZE == 1GiB` -pub const SEGMENT_SIZE: usize = 1 << SEGMENT_SIZE_LOG_2; -/// Number of bytes required to store a segments allocation bitmap -pub const SEGMENT_SIZE_BYTES: usize = SEGMENT_SIZE / 8; -const SEGMENT_SIZE_LOG_2: usize = 18; -const SEGMENT_SIZE_MASK: usize = SEGMENT_SIZE - 1; - -/// Simple first-fit bitmap allocator -pub struct SegmentAllocator { - data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), -} - -impl SegmentAllocator { - /// Constructs a new `SegmentAllocator` given the segment allocation bitmap. - /// The `bitmap` must have a length of `SEGMENT_SIZE`. - pub fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self { - SegmentAllocator { - data: BitArray::new(bitmap), - } - } - - /// Allocates a block of the given `size`. - /// Returns `None` if the allocation request cannot be satisfied. - pub fn allocate(&mut self, size: u32) -> Option { - if size == 0 { - return Some(0); - } - let offset = { - let mut idx = 0; - loop { - loop { - if idx + size > SEGMENT_SIZE as u32 { - return None; - } - if !self.data[idx as usize] { - break; - } - idx += 1; - } - - let start_idx = (idx + 1) as usize; - let end_idx = (idx + size) as usize; - if let Some(first_alloc_idx) = self.data[start_idx..end_idx].first_one() { - idx = (idx + 1) + first_alloc_idx as u32 + 1; - } else { - break idx; - } - } - }; - self.mark(offset, size, Action::Allocate); - return Some(offset); - } - - /// Allocates a block of the given `size` at `offset`. - /// Returns `false` if the allocation request cannot be satisfied. - pub fn allocate_at(&mut self, size: u32, offset: u32) -> bool { - if size == 0 { - return true; - } - if offset + size > SEGMENT_SIZE as u32 { - return false; - } - - let start_idx = offset as usize; - let end_idx = (offset + size) as usize; - if self.data[start_idx..end_idx].any() { - return false; - } - self.mark(offset, size, Action::Allocate); - true - } - - /// Deallocates the allocated block. - pub fn deallocate(&mut self, offset: u32, size: u32) { - log::debug!( - "Marked a block range {{ offset: {}, size: {} }} for deallocation", - offset, - size - ); - self.mark(offset, size, Action::Deallocate); - } - - fn mark(&mut self, offset: u32, size: u32, action: Action) { - let start_idx = offset as usize; - let end_idx = (offset + size) as usize; - let range = &mut self.data[start_idx..end_idx]; - - match action { - // Is allocation, so range must be free - Action::Allocate => debug_assert!(!range.any()), - // Is deallocation, so range must be previously used - Action::Deallocate => debug_assert!(range.all()), - } - - range.fill(action.as_bool()); - } -} - -// TODO better wording -/// Allocation action -#[derive(Clone, Copy)] -pub enum Action { - /// Deallocate an allocated block. - Deallocate, - /// Allocate a deallocated block. - Allocate, -} - -impl Action { - /// Returns 1 if allocation and 0 if deallocation. - pub fn as_bool(self) -> bool { - match self { - Action::Deallocate => false, - Action::Allocate => true, - } - } -} - -/// Identifier for 1GiB segments of a `StoragePool`. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct SegmentId(pub u64); - -impl SegmentId { - /// Returns the corresponding segment of the given disk offset. - pub fn get(offset: DiskOffset) -> Self { - SegmentId(offset.as_u64() & !(SEGMENT_SIZE_MASK as u64)) - } - - /// Returns the block offset into the segment. - pub fn get_block_offset(offset: DiskOffset) -> u32 { - offset.as_u64() as u32 & SEGMENT_SIZE_MASK as u32 - } - - /// Returns the disk offset at the start of this segment. - pub fn as_disk_offset(&self) -> DiskOffset { - DiskOffset::from_u64(self.0) - } - - /// Returns the disk offset of the block in this segment at the given - /// offset. - pub fn disk_offset(&self, segment_offset: u32) -> DiskOffset { - DiskOffset::from_u64(self.0 + u64::from(segment_offset)) - } - - /// Returns the key of this segment for messages and queries. - pub fn key(&self, key_prefix: &[u8]) -> CowBytes { - // Shave off the two lower bytes because they are always null. - let mut segment_key = [0; 8]; - BigEndian::write_u64(&mut segment_key[..], self.0); - assert_eq!(&segment_key[6..], &[0, 0]); - - let mut key = CowBytes::new(); - key.push_slice(key_prefix); - key.push_slice(&segment_key[..6]); - key - } - - /// Returns the ID of the disk that belongs to this segment. - pub fn disk_id(&self) -> u16 { - self.as_disk_offset().disk_id() - } - - /// Returns the next segment ID. - /// Wraps around at the end of the disk. - pub fn next(&self, disk_size: Block) -> SegmentId { - let disk_offset = self.as_disk_offset(); - if disk_offset.block_offset().as_u64() + SEGMENT_SIZE as u64 >= disk_size.as_u64() { - SegmentId::get(DiskOffset::new( - disk_offset.storage_class(), - disk_offset.disk_id(), - Block(0), - )) - } else { - SegmentId(self.0 + SEGMENT_SIZE as u64) - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn segment_id() { - let offset = DiskOffset::new(1, 2, Block::from_bytes(4096)); - let segment = SegmentId::get(offset); - - assert_eq!(segment.as_disk_offset().storage_class(), 1); - assert_eq!(segment.disk_id(), 2); - assert_eq!( - segment.as_disk_offset().block_offset(), - Block::from_bytes(0) - ); - assert_eq!(SegmentId::get_block_offset(offset), 1); - } -} diff --git a/betree/src/allocator/best_fit_fsm.rs b/betree/src/allocator/best_fit_fsm.rs new file mode 100644 index 000000000..012026b2d --- /dev/null +++ b/betree/src/allocator/best_fit_fsm.rs @@ -0,0 +1,327 @@ +use super::*; + +/// Based on the free-space-map allocator from postgresql: +/// https://github.com/postgres/postgres/blob/02ed3c2bdcefab453b548bc9c7e0e8874a502790/src/backend/storage/freespace/README +/// This is an approximate best fit allocator. It will not always find the best fit but it tries +/// its best. +pub struct BestFitFSM { + data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), + fsm_tree: Vec<(u32, u32)>, // Array to represent the FSM tree, storing max free space + tree_height: u32, +} + +impl Allocator for BestFitFSM { + fn data(&mut self) -> &mut BitArr!(for SEGMENT_SIZE, in u8, Lsb0) { + &mut self.data + } + + /// Constructs a new `BestFitFSM` given the segment allocation bitmap. + /// The `bitmap` must have a length of `SEGMENT_SIZE`. + fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self { + let data = BitArray::new(bitmap); + let mut allocator = BestFitFSM { + data, + fsm_tree: Vec::new(), + tree_height: 0, + }; + allocator.build_fsm_tree(); + allocator + } + + fn allocate(&mut self, size: u32) -> Option { + if size == 0 { + return Some(0); + } + + if self.fsm_tree.is_empty() || self.fsm_tree[0].1 < size { + return None; // Not enough free space + } + + let mut current_node_index = 0; + while current_node_index < self.fsm_tree.len() / 2 { + // Traverse internal nodes + let left_child_index = 2 * current_node_index + 1; + let right_child_index = 2 * current_node_index + 2; + + let left_child_value = *self.fsm_tree.get(left_child_index).unwrap_or(&(0, 0)); + let right_child_value = *self.fsm_tree.get(right_child_index).unwrap_or(&(0, 0)); + + match (left_child_value.1 >= size, right_child_value.1 >= size) { + (true, true) => { + // Both children can fit size + if left_child_value.1 < right_child_value.1 { + // Left is better fit + current_node_index = left_child_index; + } else { + // Right is better or equal fit + current_node_index = right_child_index; + } + } + (true, false) => current_node_index = left_child_index, // Only left can fit + (false, true) => current_node_index = right_child_index, // Only right can fit + (false, false) => unreachable!(), // Neither child can fit, stop traversal + } + } + + // current_node_index is now the index of the best-fit leaf node + assert!(current_node_index >= self.fsm_tree.len() / 2); + let (offset, segment_size) = self.fsm_tree[current_node_index]; + + assert!(segment_size >= size); + + self.mark(offset, size, Action::Allocate); + + // Update the segment in the leaf node + self.fsm_tree[current_node_index].0 += size; + self.fsm_tree[current_node_index].1 -= size; + + // Update internal nodes up to the root + let mut current_index = current_node_index; + while current_index > 0 { + current_index = (current_index - 1) / 2; // Index of parent node + let left_child_index = 2 * current_index + 1; + let right_child_index = 2 * current_index + 2; + + let left_child_value = *self.fsm_tree.get(left_child_index).unwrap_or(&(0, 0)); + let right_child_value = *self.fsm_tree.get(right_child_index).unwrap_or(&(0, 0)); + if left_child_value.1 > right_child_value.1 { + self.fsm_tree[current_index] = left_child_value + } else { + self.fsm_tree[current_index] = right_child_value + } + } + + return Some(offset); + } + + fn allocate_at(&mut self, size: u32, offset: u32) -> bool { + // NOTE: Because the tree is sorted by offset because of how it's build, this shouldn't be + // to hard to implement efficiently if needed. + + if size == 0 { + return true; + } + if offset + size > SEGMENT_SIZE as u32 { + return false; + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + if self.data[start_idx..end_idx].any() { + return false; + } + self.mark(offset, size, Action::Allocate); + + // Rebuild the tree to reflect changes. This is **not** efficient but the easiest solution, + // as the allocate_at is called only **once** on loading the bitmap from disk. + self.build_fsm_tree(); + true + } +} + +impl BestFitFSM { + fn get_free_segments(&mut self) -> Vec<(u32, u32)> { + let mut offset: u32 = 0; + let mut free_segments = Vec::new(); + while offset < SEGMENT_SIZE as u32 { + if !self.data()[offset as usize] { + // If bit is 0, it's free + let start_offset = offset; + let mut current_size: u32 = 0; + while offset < SEGMENT_SIZE as u32 && !self.data()[offset as usize] { + current_size += 1; + offset += 1; + } + free_segments.push((start_offset, current_size)); + } else { + offset += 1; + } + } + free_segments + } + + fn build_fsm_tree(&mut self) { + let leaf_nodes = self.get_free_segments(); + let leaf_nodes_num = leaf_nodes.len(); + + if leaf_nodes_num == 0 { + self.fsm_tree = vec![(0, 0)]; // Root node with 0 free space + return; + } + + // Calculate the size of the FSM tree array. For simplicity we assume complete tree for now. + self.tree_height = (leaf_nodes_num as f64).log2().ceil() as u32; + // Number of nodes in complete binary tree of height h is 2^(h+1) - 1 + let tree_nodes_num = (1 << (self.tree_height + 1)) - 1; + + self.fsm_tree.clear(); + self.fsm_tree.resize(tree_nodes_num as usize, (0, 0)); + + // 1. Initialize leaf nodes in fsm_tree from free_segments + // OPTIM: just use memcpy + for (i, &(offset, size)) in leaf_nodes.iter().enumerate() { + // Leaf nodes are at the end of the fsm_tree array in a complete binary tree + let leaf_index = (tree_nodes_num / 2) + i; + if leaf_index < tree_nodes_num { + // Prevent out-of-bounds access if free_segments.len() is not power of 2 + self.fsm_tree[leaf_index] = (offset, size); + } + } + + // 2. Build internal nodes bottom-up similar to a binary heap + for i in (0..(tree_nodes_num / 2)).rev() { + let left_child_index = 2 * i + 1; + let right_child_index = 2 * i + 2; + + // Default to 0 if index is out of bounds (incomplete tree) + let left_child_value = *self.fsm_tree.get(left_child_index).unwrap_or(&(0, 0)); + let right_child_value = *self.fsm_tree.get(right_child_index).unwrap_or(&(0, 0)); + + if left_child_value.1 > right_child_value.1 { + self.fsm_tree[i] = left_child_value + } else { + self.fsm_tree[i] = right_child_value + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn build_empty() { + let bitmap = [0u8; SEGMENT_SIZE_BYTES]; + let allocator = BestFitFSM::new(bitmap); + + // In an empty bitmap, the root node should have a large free space + assert_eq!(allocator.fsm_tree[0].0, 0 as u32); + assert_eq!(allocator.fsm_tree[0].1, SEGMENT_SIZE as u32); + assert_eq!(allocator.tree_height, 0); + } + + #[test] + fn build_simple() { + // Example bitmap: 3 segments allocated at the beginning, 2 free, 3 allocated, rest free + let mut allocator = BestFitFSM::new([0u8; SEGMENT_SIZE_BYTES]); + let bitmap = allocator.data(); + + // Manually allocate some segments + bitmap[0..3].fill(true); // Allocate 3 blocks at the beginning + bitmap[5..7].fill(true); // Allocate 2 blocks after the free ones + + let mut allocator = BestFitFSM::new(bitmap.into_inner()); + + let fsm_tree = vec![ + (7, SEGMENT_SIZE as u32 - 7), + (3, 2), + (7, SEGMENT_SIZE as u32 - 7), + ]; + assert_eq!(allocator.fsm_tree, fsm_tree); + assert_eq!(allocator.tree_height, 1); + } + + #[test] + fn build_complex() { + let mut allocator = BestFitFSM::new([0u8; SEGMENT_SIZE_BYTES]); + let bitmap = allocator.data(); + + // Manually allocate some segments to create a non-trivial tree + bitmap[0..3].fill(true); + bitmap[5..8].fill(true); + bitmap[8..10].fill(true); + bitmap[14..22].fill(true); + bitmap[35..36].fill(true); + bitmap[42..53].fill(true); + + let allocator = BestFitFSM::new(bitmap.into_inner()); + + // binary heap layout + let fsm_tree = vec![ + (53, SEGMENT_SIZE as u32 - 53), + (22, 13), + (53, SEGMENT_SIZE as u32 - 53), + (10, 4), + (22, 13), + (53, SEGMENT_SIZE as u32 - 53), + (0, 0), + (3, 2), + (10, 4), + (22, 13), + (36, 6), + (53, SEGMENT_SIZE as u32 - 53), + (0, 0), + (0, 0), + (0, 0), + ]; + + assert_eq!(fsm_tree, allocator.fsm_tree); + assert_eq!(allocator.tree_height, 3); + } + + #[test] + fn allocate_empty_fsm_tree() { + let bitmap = [0u8; SEGMENT_SIZE_BYTES]; + let mut allocator = BestFitFSM::new(bitmap); + + let allocation = allocator.allocate(1024); + assert!(allocation.is_some()); // Allocation should succeed + + let allocated_offset = allocation.unwrap(); + assert_eq!(allocated_offset, 0); // Should allocate at the beginning + + // Check if the allocated region is marked as used in the bitmap + assert!(allocator.data()[0..1024 as usize].all()); + // Check root node value after allocation + assert_eq!(allocator.fsm_tree[0], (1024, SEGMENT_SIZE as u32 - 1024)); + } + + #[test] + fn allocate_complex_fsm_tree() { + let mut allocator = BestFitFSM::new([0u8; SEGMENT_SIZE_BYTES]); + let bitmap = allocator.data(); + + // Manually allocate some segments to create a non-trivial tree + bitmap[0..3].fill(true); + bitmap[5..8].fill(true); + bitmap[8..10].fill(true); + bitmap[14..22].fill(true); + bitmap[35..36].fill(true); + bitmap[42..53].fill(true); + + let mut allocator = BestFitFSM::new(bitmap.into_inner()); + + // Best-fit should allocate from the segment at offset 3 with size 2 + let allocation = allocator.allocate(2); // Request allocation of size 2 + assert!(allocation.is_some()); + assert_eq!(allocation.unwrap(), 3); + // Verify that the allocated region is marked in the bitmap + assert!(allocator.data()[3..5].all()); + + let allocation2 = allocator.allocate(10); + assert!(allocation2.is_some()); + assert_eq!(allocation2.unwrap(), 22); + assert!(allocator.data()[22..32].all()); + + // Allocate again, to use the next best fit segment + let allocation2 = allocator.allocate(100); + assert!(allocation2.is_some()); + assert_eq!(allocation2.unwrap(), 53); + assert!(allocator.data()[53..153].all()); + assert_eq!(allocator.fsm_tree[0].1, SEGMENT_SIZE as u32 - 153); + } + + #[test] + fn allocate_fail_fsm_tree() { + let mut allocator = BestFitFSM::new([0u8; SEGMENT_SIZE_BYTES]); + let root_free_space = allocator.fsm_tree[0].1; + + // Try to allocate more than available space + let allocation = allocator.allocate(root_free_space + 1); + assert!(allocation.is_none()); // Allocation should fail + + // Check if fsm_tree root value is still the same + assert_eq!(allocator.fsm_tree[0].1, root_free_space); // Should remain unchanged + } +} diff --git a/betree/src/allocator/best_fit_list.rs b/betree/src/allocator/best_fit_list.rs new file mode 100644 index 000000000..8e7676bdd --- /dev/null +++ b/betree/src/allocator/best_fit_list.rs @@ -0,0 +1,127 @@ +use super::*; + +/// Simple Best-Fit bitmap allocator that uses a list to manage free segments +pub struct BestFitList { + data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), + free_segments: Vec<(u32, u32)>, // (offset, size) of free segments +} + +impl Allocator for BestFitList { + fn data(&mut self) -> &mut BitArr!(for SEGMENT_SIZE, in u8, Lsb0) { + &mut self.data + } + + /// Constructs a new `BestFitList` given the segment allocation bitmap. + /// The `bitmap` must have a length of `SEGMENT_SIZE`. + fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self { + let data = BitArray::new(bitmap); + let mut allocator = BestFitList { + data, + free_segments: Vec::new(), + }; + allocator.initialize_free_segments(); + allocator + } + + /// Allocates a block of the given `size` using best-fit strategy. + /// Returns `None` if the allocation request cannot be satisfied. + fn allocate(&mut self, size: u32) -> Option { + if size == 0 { + return Some(0); + } + + let mut best_fit_segment_index: Option = None; + let mut best_fit_segment_size: u32 = u32::MAX; // Initialize with a large value + + for i in 0..self.free_segments.len() { + let (_, segment_size) = self.free_segments[i]; + if segment_size >= size && segment_size < best_fit_segment_size { + best_fit_segment_index = Some(i); + best_fit_segment_size = segment_size; + } + } + + if let Some(index) = best_fit_segment_index { + let (offset, segment_size) = self.free_segments[index]; + self.mark(offset, size, Action::Allocate); + + self.free_segments[index].0 = offset + size; + self.free_segments[index].1 = segment_size - size; + + return Some(offset); + } + None + } + + /// Allocates a block of the given `size` at `offset`. + /// Returns `false` if the allocation request cannot be satisfied. + fn allocate_at(&mut self, size: u32, offset: u32) -> bool { + if size == 0 { + return true; + } + if offset + size > SEGMENT_SIZE as u32 { + return false; + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + if self.data[start_idx..end_idx].any() { + return false; + } + + // Update free_segments to reflect the allocation - similar to FirstFitList::allocate_at + for i in 0..self.free_segments.len() { + let (seg_offset, seg_size) = self.free_segments[i]; + if seg_offset == offset && seg_size == size { + self.free_segments.remove(i); + self.mark(offset, size, Action::Allocate); + return true; + } else if seg_offset == offset && seg_size > size { + self.free_segments[i].0 += size; + self.free_segments[i].1 -= size; + self.mark(offset, size, Action::Allocate); + return true; + } else if offset > seg_offset && offset + size == seg_offset + seg_size { + self.free_segments[i].1 -= size; + self.mark(offset, size, Action::Allocate); + return true; + } else if offset > seg_offset + && offset < seg_offset + seg_size + && offset + size < seg_offset + seg_size + { + let remaining_size = seg_size - (size + (offset - seg_offset)); + let new_offset = offset + size; + self.free_segments[i].1 = offset - seg_offset; + + self.free_segments + .insert(i + 1, (new_offset, remaining_size)); + self.mark(offset, size, Action::Allocate); + return true; + } + } + + false + } +} + +impl BestFitList { + /// Initializes the `free_segments` vector by scanning the bitmap. + fn initialize_free_segments(&mut self) { + let mut offset: u32 = 0; + while offset < SEGMENT_SIZE as u32 { + if !self.data()[offset as usize] { + let start_offset = offset; + let mut current_size = 0; + while offset < SEGMENT_SIZE as u32 && !self.data()[offset as usize] { + current_size += 1; + offset += 1; + } + self.free_segments.push((start_offset, current_size)); + } else { + offset += 1; + } + } + // keep segments sorted by offset + self.free_segments.sort_by_key(|seg| seg.0); + } +} diff --git a/betree/src/allocator/best_fit_scan.rs b/betree/src/allocator/best_fit_scan.rs new file mode 100644 index 000000000..264456708 --- /dev/null +++ b/betree/src/allocator/best_fit_scan.rs @@ -0,0 +1,101 @@ +use super::*; + +/// Simple best-fit bitmap allocator +pub struct BestFitScan { + data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), +} + +impl Allocator for BestFitScan { + fn data(&mut self) -> &mut BitArr!(for SEGMENT_SIZE, in u8, Lsb0) { + &mut self.data + } + + /// Constructs a new `BestFitSimple` given the segment allocation bitmap. + /// The `bitmap` must have a length of `SEGMENT_SIZE`. + fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self { + BestFitScan { + data: BitArray::new(bitmap), + } + } + + /// Allocates a block of the given `size`. + /// Returns `None` if the allocation request cannot be satisfied. + fn allocate(&mut self, size: u32) -> Option { + if size == 0 { + return Some(0); + } + + let mut best_fit_offset = None; + // Initialize with a value larger than any possible size + let mut best_fit_size = SEGMENT_SIZE as u32 + 1; + let mut offset: u32 = 0; + + while offset + size <= SEGMENT_SIZE as u32 { + let end_idx = (offset + size) as usize; + + match self.data[offset as usize..end_idx].last_one() { + Some(last_alloc_idx) => { + // Skip to the end of the last allocated block + offset += last_alloc_idx as u32 + 1; + } + None => { + // Find the next allocated block after the current free range + match self.data[end_idx..].first_one() { + Some(next_alloc_idx) => { + let free_block_size = next_alloc_idx as u32 + end_idx as u32 - offset; + + // Check if this free block is a better fit + if free_block_size >= size && free_block_size < best_fit_size { + best_fit_offset = Some(offset); + best_fit_size = free_block_size; + // If this free block is optimal finish iterating + if free_block_size == size { + break; + } + } + + offset = next_alloc_idx as u32 + end_idx as u32 + 1; + } + None => { + // No more allocated blocks, we have scanned the whole segment. + let free_block_size = self.data[offset as usize..].len() as u32; + + // Check if this free block is a better fit + if free_block_size >= size && free_block_size < best_fit_size { + best_fit_offset = Some(offset); + best_fit_size = free_block_size; + } + + break; + } + } + } + } + } + + if let Some(offset) = best_fit_offset { + self.mark(offset, size, Action::Allocate); + } + + best_fit_offset + } + + /// Allocates a block of the given `size` at `offset`. + /// Returns `false` if the allocation request cannot be satisfied. + fn allocate_at(&mut self, size: u32, offset: u32) -> bool { + if size == 0 { + return true; + } + if offset + size > SEGMENT_SIZE as u32 { + return false; + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + if self.data[start_idx..end_idx].any() { + return false; + } + self.mark(offset, size, Action::Allocate); + true + } +} diff --git a/betree/src/allocator/best_fit_tree.rs b/betree/src/allocator/best_fit_tree.rs new file mode 100644 index 000000000..6746c261d --- /dev/null +++ b/betree/src/allocator/best_fit_tree.rs @@ -0,0 +1,106 @@ +use std::collections::BTreeMap; + +use super::*; + +/// This is a true best fit allocator. It will always find the best fit if available. +pub struct BestFitTree { + data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), + tree: BTreeMap>, // store free segments sorted by size (size, offset) +} + +impl Allocator for BestFitTree { + fn data(&mut self) -> &mut BitArr!(for SEGMENT_SIZE, in u8, Lsb0) { + &mut self.data + } + + /// Constructs a new `BestFitTree` given the segment allocation bitmap. + /// The `bitmap` must have a length of `SEGMENT_SIZE`. + fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self { + let data = BitArray::new(bitmap); + let mut allocator = BestFitTree { + data, + tree: BTreeMap::new(), + }; + allocator.build_tree(); + allocator + } + + fn allocate(&mut self, size: u32) -> Option { + if size == 0 { + return Some(0); + } + + if let Some((&segment_size, offsets)) = self.tree.range(size..).next() { + let best_fit_offset = *offsets.first().unwrap(); + self.mark(best_fit_offset, size, Action::Allocate); + + // Update free segments tree + self.remove_free_segment(segment_size, best_fit_offset); + if segment_size > size { + self.insert_free_segment(segment_size - size, best_fit_offset + size); + } + + return Some(best_fit_offset); + } + + None + } + + fn allocate_at(&mut self, size: u32, offset: u32) -> bool { + if size == 0 { + return true; + } + if offset + size > SEGMENT_SIZE as u32 { + return false; + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + if self.data[start_idx..end_idx].any() { + return false; + } + self.mark(offset, size, Action::Allocate); + + // Rebuild the tree to reflect changes. This is **not** efficient but the easiest solution, + // as the allocate_at is called only **once** on loading the bitmap from disk. + self.build_tree(); + true + } +} + +impl BestFitTree { + fn build_tree(&mut self) { + self.tree.clear(); + + let mut offset: u32 = 0; + while offset < SEGMENT_SIZE as u32 { + if !self.data()[offset as usize] { + // If bit is 0, it's free + let start_offset = offset; + let mut current_size: u32 = 0; + while offset < SEGMENT_SIZE as u32 && !self.data()[offset as usize] { + current_size += 1; + offset += 1; + } + self.insert_free_segment(current_size, start_offset); + } else { + offset += 1; + } + } + } + + fn insert_free_segment(&mut self, size: u32, offset: u32) { + self.tree.entry(size).or_insert_with(Vec::new).push(offset); + } + + fn remove_free_segment(&mut self, size: u32, offset: u32) { + if let Some(offsets) = self.tree.get_mut(&size) { + if let Some(index) = offsets.iter().position(|&seg_offset| seg_offset == offset) { + offsets.remove(index); + if offsets.is_empty() { + self.tree.remove(&size); + } + } + } + } +} diff --git a/betree/src/allocator/first_fit_fsm.rs b/betree/src/allocator/first_fit_fsm.rs new file mode 100644 index 000000000..6dab9c214 --- /dev/null +++ b/betree/src/allocator/first_fit_fsm.rs @@ -0,0 +1,320 @@ +use super::*; + +/// Based on the free-space-map allocator from postgresql: +/// https://github.com/postgres/postgres/blob/02ed3c2bdcefab453b548bc9c7e0e8874a502790/src/backend/storage/freespace/README +pub struct FirstFitFSM { + data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), + fsm_tree: Vec<(u32, u32)>, // Array to represent the FSM tree, storing max free space + tree_height: u32, +} + +impl Allocator for FirstFitFSM { + fn data(&mut self) -> &mut BitArr!(for SEGMENT_SIZE, in u8, Lsb0) { + &mut self.data + } + + /// Constructs a new `FirstFitFSM` given the segment allocation bitmap. + /// The `bitmap` must have a length of `SEGMENT_SIZE`. + fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self { + let data = BitArray::new(bitmap); + let mut allocator = FirstFitFSM { + data, + fsm_tree: Vec::new(), + tree_height: 0, + }; + allocator.build_fsm_tree(); + allocator + } + + fn allocate(&mut self, size: u32) -> Option { + if size == 0 { + return Some(0); + } + + if self.fsm_tree[0].1 < size { + return None; // Not enough free space + } + + let mut current_node_index = 0; + while current_node_index < self.fsm_tree.len() / 2 { + let left_child_index = 2 * current_node_index + 1; + let right_child_index = 2 * current_node_index + 2; + + // Check left child first for first fit + if let Some(left_child_value) = self.fsm_tree.get(left_child_index) { + if left_child_value.1 >= size { + current_node_index = left_child_index; + continue; // Go deeper into left subtree + } + } + if let Some(right_child_value) = self.fsm_tree.get(right_child_index) { + if right_child_value.1 >= size { + current_node_index = right_child_index; + continue; // Go deeper into right subtree + } + } + unreachable!(); + } + + // current_node_index is now the index of the best-fit leaf node + assert!(current_node_index >= self.fsm_tree.len() / 2); + let (offset, segment_size) = self.fsm_tree[current_node_index]; + + assert!(segment_size >= size); + + self.mark(offset, size, Action::Allocate); + + // Update the segment in the leaf node + self.fsm_tree[current_node_index].0 += size; + self.fsm_tree[current_node_index].1 -= size; + + // Update internal nodes up to the root + let mut current_index = current_node_index; + while current_index > 0 { + current_index = (current_index - 1) / 2; // Index of parent node + let left_child_index = 2 * current_index + 1; + let right_child_index = 2 * current_index + 2; + + let left_child_value = *self.fsm_tree.get(left_child_index).unwrap_or(&(0, 0)); + let right_child_value = *self.fsm_tree.get(right_child_index).unwrap_or(&(0, 0)); + if left_child_value.1 > right_child_value.1 { + self.fsm_tree[current_index] = left_child_value + } else { + self.fsm_tree[current_index] = right_child_value + } + } + + return Some(offset); + } + + fn allocate_at(&mut self, size: u32, offset: u32) -> bool { + // NOTE: Because the tree is sorted by offset because of how it's build, this shouldn't be + // to hard to implement efficiently if needed. + + if size == 0 { + return true; + } + if offset + size > SEGMENT_SIZE as u32 { + return false; + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + if self.data[start_idx..end_idx].any() { + return false; + } + self.mark(offset, size, Action::Allocate); + + // Rebuild the tree to reflect changes. This is **not** efficient but the easiest solution, + // as the allocate_at is called only **once** on loading the bitmap from disk. + self.build_fsm_tree(); + true + } +} + +impl FirstFitFSM { + fn get_free_segments(&mut self) -> Vec<(u32, u32)> { + let mut offset: u32 = 0; + let mut free_segments = Vec::new(); + while offset < SEGMENT_SIZE as u32 { + if !self.data()[offset as usize] { + // If bit is 0, it's free + let start_offset = offset; + let mut current_size: u32 = 0; + while offset < SEGMENT_SIZE as u32 && !self.data()[offset as usize] { + current_size += 1; + offset += 1; + } + free_segments.push((start_offset, current_size)); + } else { + offset += 1; + } + } + free_segments + } + + fn build_fsm_tree(&mut self) { + let leaf_nodes = self.get_free_segments(); + let leaf_nodes_num = leaf_nodes.len(); + + if leaf_nodes_num == 0 { + self.fsm_tree = vec![(0, 0)]; // Root node with 0 free space + return; + } + + // Calculate the size of the FSM tree array. For simplicity we assume complete tree for now. + self.tree_height = (leaf_nodes_num as f64).log2().ceil() as u32; + // Number of nodes in complete binary tree of height h is 2^(h+1) - 1 + let tree_nodes_num = (1 << (self.tree_height + 1)) - 1; + + self.fsm_tree.clear(); + self.fsm_tree.resize(tree_nodes_num as usize, (0, 0)); + + // 1. Initialize leaf nodes in fsm_tree from free_segments + // OPTIM: just use memcpy + for (i, &(offset, size)) in leaf_nodes.iter().enumerate() { + // Leaf nodes are at the end of the fsm_tree array in a complete binary tree + let leaf_index = (tree_nodes_num / 2) + i; + if leaf_index < tree_nodes_num { + // Prevent out-of-bounds access if free_segments.len() is not power of 2 + self.fsm_tree[leaf_index] = (offset, size); + } + } + + // 2. Build internal nodes bottom-up similar to a binary heap + for i in (0..(tree_nodes_num / 2)).rev() { + let left_child_index = 2 * i + 1; + let right_child_index = 2 * i + 2; + + // Default to 0 if index is out of bounds (incomplete tree) + let left_child_value = *self.fsm_tree.get(left_child_index).unwrap_or(&(0, 0)); + let right_child_value = *self.fsm_tree.get(right_child_index).unwrap_or(&(0, 0)); + + if left_child_value.1 > right_child_value.1 { + self.fsm_tree[i] = left_child_value + } else { + self.fsm_tree[i] = right_child_value + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn build_empty() { + let bitmap = [0u8; SEGMENT_SIZE_BYTES]; + let allocator = FirstFitFSM::new(bitmap); + + // In an empty bitmap, the root node should have a large free space + assert_eq!(allocator.fsm_tree[0].0, 0 as u32); + assert_eq!(allocator.fsm_tree[0].1, SEGMENT_SIZE as u32); + assert_eq!(allocator.tree_height, 0); + } + + #[test] + fn build_simple() { + // Example bitmap: 3 segments allocated at the beginning, 2 free, 3 allocated, rest free + let mut allocator = FirstFitFSM::new([0u8; SEGMENT_SIZE_BYTES]); + let bitmap = allocator.data(); + + // Manually allocate some segments + bitmap[0..3].fill(true); // Allocate 3 blocks at the beginning + bitmap[5..7].fill(true); // Allocate 2 blocks after the free ones + + let mut allocator = FirstFitFSM::new(bitmap.into_inner()); + + let fsm_tree = vec![ + (7, SEGMENT_SIZE as u32 - 7), + (3, 2), + (7, SEGMENT_SIZE as u32 - 7), + ]; + assert_eq!(allocator.fsm_tree, fsm_tree); + assert_eq!(allocator.tree_height, 1); + } + + #[test] + fn build_complex() { + let mut allocator = FirstFitFSM::new([0u8; SEGMENT_SIZE_BYTES]); + let bitmap = allocator.data(); + + // Manually allocate some segments to create a non-trivial tree + bitmap[0..3].fill(true); + bitmap[5..8].fill(true); + bitmap[8..10].fill(true); + bitmap[14..22].fill(true); + bitmap[35..36].fill(true); + bitmap[42..53].fill(true); + + let allocator = FirstFitFSM::new(bitmap.into_inner()); + + // binary heap layout + let fsm_tree = vec![ + (53, SEGMENT_SIZE as u32 - 53), + (22, 13), + (53, SEGMENT_SIZE as u32 - 53), + (10, 4), + (22, 13), + (53, SEGMENT_SIZE as u32 - 53), + (0, 0), + (3, 2), + (10, 4), + (22, 13), + (36, 6), + (53, SEGMENT_SIZE as u32 - 53), + (0, 0), + (0, 0), + (0, 0), + ]; + + assert_eq!(fsm_tree, allocator.fsm_tree); + assert_eq!(allocator.tree_height, 3); + } + + #[test] + fn allocate_empty_fsm_tree() { + let bitmap = [0u8; SEGMENT_SIZE_BYTES]; + let mut allocator = FirstFitFSM::new(bitmap); + + let allocation = allocator.allocate(1024); + assert!(allocation.is_some()); // Allocation should succeed + + let allocated_offset = allocation.unwrap(); + assert_eq!(allocated_offset, 0); // Should allocate at the beginning + + // Check if the allocated region is marked as used in the bitmap + assert!(allocator.data()[0..1024 as usize].all()); + // Check root node value after allocation + assert_eq!(allocator.fsm_tree[0], (1024, SEGMENT_SIZE as u32 - 1024)); + } + + #[test] + fn allocate_complex_fsm_tree() { + let mut allocator = FirstFitFSM::new([0u8; SEGMENT_SIZE_BYTES]); + let bitmap = allocator.data(); + + // Manually allocate some segments to create a non-trivial tree + bitmap[0..3].fill(true); + bitmap[5..8].fill(true); + bitmap[8..10].fill(true); + bitmap[14..22].fill(true); + bitmap[35..36].fill(true); + bitmap[42..53].fill(true); + + let mut allocator = FirstFitFSM::new(bitmap.into_inner()); + + // Best-fit should allocate from the segment at offset 3 with size 2 + let allocation = allocator.allocate(2); // Request allocation of size 2 + assert!(allocation.is_some()); + assert_eq!(allocation.unwrap(), 3); + // Verify that the allocated region is marked in the bitmap + assert!(allocator.data()[3..5].all()); + + let allocation2 = allocator.allocate(10); + assert!(allocation2.is_some()); + assert_eq!(allocation2.unwrap(), 22); + assert!(allocator.data()[22..32].all()); + + // Allocate again, to use the next best fit segment + let allocation2 = allocator.allocate(100); + assert!(allocation2.is_some()); + assert_eq!(allocation2.unwrap(), 53); + assert!(allocator.data()[53..153].all()); + assert_eq!(allocator.fsm_tree[0].1, SEGMENT_SIZE as u32 - 153); + } + + #[test] + fn allocate_fail_fsm_tree() { + let mut allocator = FirstFitFSM::new([0u8; SEGMENT_SIZE_BYTES]); + let root_free_space = allocator.fsm_tree[0].1; + + // Try to allocate more than available space + let allocation = allocator.allocate(root_free_space + 1); + assert!(allocation.is_none()); // Allocation should fail + + // Check if fsm_tree root value is still the same + assert_eq!(allocator.fsm_tree[0].1, root_free_space); // Should remain unchanged + } +} diff --git a/betree/src/allocator/first_fit_list.rs b/betree/src/allocator/first_fit_list.rs new file mode 100644 index 000000000..f7e54c758 --- /dev/null +++ b/betree/src/allocator/first_fit_list.rs @@ -0,0 +1,126 @@ +use super::*; + +/// Simple first-fit bitmap allocator that uses a list to manage free segments +pub struct FirstFitList { + data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), + free_segments: Vec<(u32, u32)>, // (offset, size) of free segments +} + +impl Allocator for FirstFitList { + fn data(&mut self) -> &mut BitArr!(for SEGMENT_SIZE, in u8, Lsb0) { + &mut self.data + } + + /// Constructs a new `ListFirstFit` given the segment allocation bitmap. + /// The `bitmap` must have a length of `SEGMENT_SIZE`. + fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self { + let data = BitArray::new(bitmap); + let mut allocator = FirstFitList { + data, + free_segments: Vec::new(), + }; + allocator.initialize_free_segments(); + allocator + } + + /// Allocates a block of the given `size`. + /// Returns `None` if the allocation request cannot be satisfied and the offset if if can. + fn allocate(&mut self, size: u32) -> Option { + if size == 0 { + return Some(0); + } + + for i in 0..self.free_segments.len() { + let (offset, segment_size) = self.free_segments[i]; + + if segment_size >= size { + self.mark(offset, size, Action::Allocate); + + // update the free segment with the remaining size and new offset + self.free_segments[i].0 = offset + size; + self.free_segments[i].1 = segment_size - size; + // NOTE: We do not handle the == case here. We could remove that entry from the + // list but we then would need to copy some things because the allocate_at (and + // deallocation) logic depends on a sorted list and also need have extra handling. + // The empty slots get garbage collected on the next sync anyway. + return Some(offset); + } + } + None + } + + /// Allocates a block of the given `size` at `offset`. + /// Returns `false` if the allocation request cannot be satisfied. + fn allocate_at(&mut self, size: u32, offset: u32) -> bool { + if size == 0 { + return true; + } + if offset + size > SEGMENT_SIZE as u32 { + return false; + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + if self.data[start_idx..end_idx].any() { + return false; + } + + // Update free_segments to reflect the allocation + for i in 0..self.free_segments.len() { + let (seg_offset, seg_size) = self.free_segments[i]; + if seg_offset == offset && seg_size == size { + // perfect fit, remove the segment + self.free_segments.remove(i); + self.mark(offset, size, Action::Allocate); + return true; + } else if seg_offset == offset && seg_size > size { + // allocation at the beginning of the segment, adjust offset and size + self.free_segments[i].0 += size; + self.free_segments[i].1 -= size; + self.mark(offset, size, Action::Allocate); + return true; + } else if offset > seg_offset && offset + size == seg_offset + seg_size { + // allocation at the end of the segment, just adjust size + self.free_segments[i].1 -= size; + self.mark(offset, size, Action::Allocate); + return true; + } else if offset > seg_offset + && offset < seg_offset + seg_size + && offset + size < seg_offset + seg_size + { + // allocation in the middle of the segment, split segment + let remaining_size = seg_size - (size + (offset - seg_offset)); + let new_offset = offset + size; + self.free_segments[i].1 = offset - seg_offset; // adjust current segment size + + self.free_segments + .insert(i + 1, (new_offset, remaining_size)); // insert new segment after current + self.mark(offset, size, Action::Allocate); + return true; + } + } + + false // No suitable free segment found in free_segments list + } +} + +impl FirstFitList { + /// Initializes the `free_segments` vector by scanning the bitmap. + fn initialize_free_segments(&mut self) { + let mut offset: u32 = 0; + while offset < SEGMENT_SIZE as u32 { + if !self.data()[offset as usize] { + // If bit is 0, it's free + let start_offset = offset; + let mut current_size = 0; + while offset < SEGMENT_SIZE as u32 && !self.data()[offset as usize] { + current_size += 1; + offset += 1; + } + self.free_segments.push((start_offset, current_size)); + } else { + offset += 1; + } + } + } +} diff --git a/betree/src/allocator/first_fit_scan.rs b/betree/src/allocator/first_fit_scan.rs new file mode 100644 index 000000000..77c96bd08 --- /dev/null +++ b/betree/src/allocator/first_fit_scan.rs @@ -0,0 +1,68 @@ +use super::*; + +/// Simple first-fit bitmap allocator +pub struct FirstFitScan { + data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), +} + +impl Allocator for FirstFitScan { + fn data(&mut self) -> &mut BitArr!(for SEGMENT_SIZE, in u8, Lsb0) { + &mut self.data + } + + /// Constructs a new `FirstFit` given the segment allocation bitmap. + /// The `bitmap` must have a length of `SEGMENT_SIZE`. + fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self { + FirstFitScan { + data: BitArray::new(bitmap), + } + } + + /// Allocates a block of the given `size`. + /// Returns `None` if the allocation request cannot be satisfied and the offset if if can. + fn allocate(&mut self, size: u32) -> Option { + if size == 0 { + return Some(0); + } + + let mut offset = 0; + + while offset + size <= SEGMENT_SIZE as u32 { + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + + match self.data[start_idx..end_idx].last_one() { + Some(last_alloc_idx) => { + // Skip to the end of the last allocated block if there is any one at all. + offset += last_alloc_idx as u32 + 1 + } + None => { + // No allocated blocks found, so allocate here. + self.mark(offset, size, Action::Allocate); + return Some(offset); + } + } + } + + None + } + + /// Allocates a block of the given `size` at `offset`. + /// Returns `false` if the allocation request cannot be satisfied. + fn allocate_at(&mut self, size: u32, offset: u32) -> bool { + if size == 0 { + return true; + } + if offset + size > SEGMENT_SIZE as u32 { + return false; + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + if self.data[start_idx..end_idx].any() { + return false; + } + self.mark(offset, size, Action::Allocate); + true + } +} diff --git a/betree/src/allocator/hybrid_allocator.rs b/betree/src/allocator/hybrid_allocator.rs new file mode 100644 index 000000000..3e87253e4 --- /dev/null +++ b/betree/src/allocator/hybrid_allocator.rs @@ -0,0 +1,243 @@ +use super::*; +use std::cmp::min; + +// Define pool configurations at compile time. +// Each tuple represents a pool: (SECTION_SIZE, POOL_PERCENTAGE) +// NOTE: Unfortunately Rust cannot infer the number of array elements, so change that, when adding +// or removing pools. +const POOL_CONFIGS: [(usize, f64); 3] = [(768, 0.80), (128, 0.05), (256, 0.05)]; +const NUM_POOLS: usize = POOL_CONFIGS.len(); + +// Number elements/slots a pool has. +const POOL_ELEMENTS: [usize; NUM_POOLS] = { + let mut arr = [0usize; NUM_POOLS]; + let mut i = 0; + while i < NUM_POOLS { + let tentative_blocks = (SEGMENT_SIZE as f64 * POOL_CONFIGS[i].1) as usize; + arr[i] = tentative_blocks / POOL_CONFIGS[i].0; + i += 1; + } + arr +}; + +// Number of 4KiB blocks each pool manages. +const POOL_BLOCKS_PER_POOL: [usize; NUM_POOLS] = { + let mut arr = [0usize; NUM_POOLS]; + let mut i = 0; + while i < NUM_POOLS { + arr[i] = POOL_ELEMENTS[i] * POOL_CONFIGS[i].0; + i += 1; + } + arr +}; + +// Sum of POOL_BLOCKS_PER_POOL. +const POOL_BLOCKS: usize = { + let mut total_blocks = 0; + let mut i = 0; + while i < NUM_POOLS { + total_blocks += POOL_BLOCKS_PER_POOL[i]; + i += 1; + } + total_blocks +}; + +// Offset where the blocks, that a pool manage, start in the global bitmap. +const POOL_OFFSET_START: [usize; NUM_POOLS] = { + let mut arr = [0usize; NUM_POOLS]; + let mut current_offset = 0; + let mut i = 0; + while i < NUM_POOLS { + arr[i] = current_offset; + current_offset += POOL_BLOCKS_PER_POOL[i]; + i += 1; + } + arr +}; + +struct Pool { + bitmap: BitVec, + last_offset: usize, + section_size: usize, + elements: usize, +} + +impl Pool { + fn new(section_size: usize, elements: usize) -> Self { + Pool { + bitmap: BitVec::with_capacity(elements), // Initialize with capacity for performance + last_offset: 0, + section_size, + elements, + } + } + + fn initialize_bitmap( + &mut self, + global_bitmap: &BitArr!(for SEGMENT_SIZE, in u8, Lsb0), + pool_start_offset: usize, + ) { + self.bitmap.resize(self.elements, false); // Actually create the bits now + // Initialize the pool bitmap based on the global bitmap + for i in 0..self.elements { + let start = pool_start_offset + i * self.section_size; + let end = pool_start_offset + (i + 1) * self.section_size; + if global_bitmap[start..end].any() { + self.bitmap.set(i, true); + } + } + } + + fn allocate_section(&mut self) -> Option { + // Next-Fit allocation within the pool + for _ in 0..self.elements { + if self.last_offset >= self.elements { + self.last_offset = 0; // Wrap around + } + + let offset = self.last_offset; + self.last_offset += 1; + + if !self.bitmap[offset] { + // Found free space. + self.bitmap.set(offset, true); + return Some(offset as u32); + } + } + None + } +} + +/// Hybrid allocator with pools for different sized-block allocations and NextFitScan for the rest. +pub struct HybridAllocator { + // Underlying bitmap of the allocator. The NextFitScan allocator works directly on this bitmap. + data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), + last_offset: u32, + // Pools for fixed-size allocations, using a vector of Pools + pools: [Pool; NUM_POOLS], +} + +impl Allocator for HybridAllocator { + fn data(&mut self) -> &mut BitArr!(for SEGMENT_SIZE, in u8, Lsb0) { + &mut self.data + } + + fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self + where + Self: Sized, + { + let data = BitArray::new(bitmap); + let mut allocator = HybridAllocator { + data, + last_offset: POOL_BLOCKS as u32, // Next-fit starts after pools + pools: { + core::array::from_fn(|i| { + let section_size = POOL_CONFIGS[i].0; + let elements = POOL_ELEMENTS[i]; + Pool::new(section_size, elements) + }) + }, + }; + + // Initialize pool bitmaps + for i in 0..NUM_POOLS { + let start_offset = POOL_OFFSET_START[i]; + allocator.pools[i].initialize_bitmap(&allocator.data, start_offset); + } + + allocator + } + + fn allocate(&mut self, size: u32) -> Option { + if size == 0 { + return Some(0); + } + + for i in 0..NUM_POOLS { + if size as usize == self.pools[i].section_size { + // Try to allocate from the pool. + if let Some(pool_offset) = self.pools[i].allocate_section() { + // Found free space in pool. + let offset = (POOL_OFFSET_START[i] as u32 + + pool_offset * self.pools[i].section_size as u32) + as u32; + self.mark(offset, size, Action::Allocate); + return Some(offset); + } + // Pool is full, break and use fallback. + break; // Only check one pool if size matches. + } + } + + // Fallback to next-fit allocator for other sizes and if no space found in pools. + let mut offset = self.last_offset; + let mut wrap_around = false; + + loop { + if offset + size > SEGMENT_SIZE as u32 { + // Wrap around to the beginning of the segment. + wrap_around = true; + } + + if wrap_around && offset >= self.last_offset { + // We've circled back to the starting point, no space found + return None; + } + + if offset + size > SEGMENT_SIZE as u32 { + offset = POOL_BLOCKS as u32; // Start next-fit scan after pools + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + + match self.data[start_idx..end_idx].last_one() { + Some(last_alloc_idx) => { + // Skip to the end of the last allocated block if there is any one at all. + offset += last_alloc_idx as u32 + 1 + } + None => { + // No allocated blocks found, so allocate here. + self.mark(offset, size, Action::Allocate); + self.last_offset = offset + size; + return Some(offset); + } + } + } + } + + fn allocate_at(&mut self, size: u32, offset: u32) -> bool { + if size == 0 { + return true; + } + if offset + size > SEGMENT_SIZE as u32 { + return false; + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + if self.data[start_idx..end_idx].any() { + return false; + } + self.mark(offset, size, Action::Allocate); + + // Find all pools and elements of these pools, that intersect the allocation in some way + // and mark them as allocated. + for i in 0..NUM_POOLS { + let pool = &mut self.pools[i]; + let pool_start = POOL_OFFSET_START[i] as u32; + let pool_end = pool_start + POOL_BLOCKS_PER_POOL[i] as u32; + if offset >= pool_start && offset < pool_end { + let pool_section_offset = (offset - pool_start) as usize / pool.section_size; + let pool_section_size = (size as usize / pool.section_size) + 1; + + let pool_section_end = + min(pool_section_size + pool_section_offset, pool.bitmap.len()); + + pool.bitmap[pool_section_offset..pool_section_end].fill(true); + } + } + + true + } +} diff --git a/betree/src/allocator/mod.rs b/betree/src/allocator/mod.rs new file mode 100644 index 000000000..c9b1a68cc --- /dev/null +++ b/betree/src/allocator/mod.rs @@ -0,0 +1,276 @@ +use crate::{cow_bytes::CowBytes, storage_pool::DiskOffset, vdev::Block}; +use bitvec::prelude::*; +use byteorder::{BigEndian, ByteOrder}; +use serde::{Deserialize, Serialize}; + +mod first_fit_scan; +pub use self::first_fit_scan::FirstFitScan; + +mod next_fit_scan; +pub use self::next_fit_scan::NextFitScan; + +mod best_fit_scan; +pub use self::best_fit_scan::BestFitScan; + +mod worst_fit_scan; +pub use self::worst_fit_scan::WorstFitScan; + +mod segment_allocator; +pub use self::segment_allocator::SegmentAllocator; + +mod first_fit_list; +pub use self::first_fit_list::FirstFitList; + +mod next_fit_list; +pub use self::next_fit_list::NextFitList; + +mod best_fit_list; +pub use self::best_fit_list::BestFitList; + +mod worst_fit_list; +pub use self::worst_fit_list::WorstFitList; + +mod first_fit_fsm; +pub use self::first_fit_fsm::FirstFitFSM; + +mod best_fit_tree; +pub use self::best_fit_tree::BestFitTree; + +mod best_fit_fsm; +pub use self::best_fit_fsm::BestFitFSM; + +mod worst_fit_fsm; +pub use self::worst_fit_fsm::WorstFitFSM; + +mod hybrid_allocator; +pub use self::hybrid_allocator::HybridAllocator; + +/// 256KiB, so that `vdev::BLOCK_SIZE * SEGMENT_SIZE == 1GiB` +pub const SEGMENT_SIZE: usize = 1 << SEGMENT_SIZE_LOG_2; +/// Number of bytes required to store a segments allocation bitmap +pub const SEGMENT_SIZE_BYTES: usize = SEGMENT_SIZE / 8; + +/// Define SEGMENT_SIZE_LOG_2 based on feature flags used for benchmarking allocators based on +/// different segment sizes. +pub const SEGMENT_SIZE_LOG_2: usize = 18; + +const SEGMENT_SIZE_MASK: usize = SEGMENT_SIZE - 1; + +/// The `AllocatorType` enum represents different strategies for allocating blocks +/// of memory within a fixed-size segment. +#[derive(Debug, Serialize, Deserialize, Clone, Copy)] +#[serde(rename_all = "lowercase")] // This will deserialize "firstfit" as FirstFit +pub enum AllocatorType { + /// **First Fit Scan:** + /// This allocator searches the segment from the beginning and allocates the + /// first free block that is large enough to satisfy the request. + FirstFitScan, + + /// **First Fit List:** + /// This allocator builds an internal list of the free space in the segment + /// and then searches from the beginning in that list and allocates the + /// first free block that is large enough to satisfy the request. + FirstFitList, + + /// **First Fit Tree:** + /// This allocator builds a binary tree of offsets and sizes, that has the + /// max-heap property on the sizes and uses it to find suitable free space. + FirstFitTree, + + /// **Next Fit Scan:** + /// This allocator starts searching from the last allocation and continues + /// searching the segment for the next free block that is large enough. + NextFitScan, + + /// **Next Fit List:** + /// This allocator builds an internal list of the free space in the segment + /// and then starts from the last allocation in that list and allocates the + /// next free block that is large enough to satisfy the request. + NextFitList, + + /// **Best Fit Scan:** + /// This allocator searches the entire segment and allocates the smallest + /// free block that is large enough to satisfy the request. This simple + /// version uses a linear search to find the best fit. + BestFitScan, + + /// **Best Fit List:** + /// This allocator maintains a list of free segments and chooses the best-fit + /// segment from this list to allocate memory. + BestFitList, + + /// **Best Fit Tree:** + /// This allocator builds a btree of offsets and sizes, that is sorted by + /// the sizes and uses it to find suitable free space. + BestFitTree, + + /// **Approximate Best Fit Tree:** + /// This allocator builds a binary tree of offsets and sizes, that has the + /// max-heap property on the sizes and uses it to find suitable free space. + ApproximateBestFitTree, + + /// **Worst Fit Scan:** + /// This allocator searches the entire segment and allocates the largest + /// free block. This simple version uses a linear search to find the worst + /// fit. + WorstFitScan, + + /// **Worst Fit List:** + /// This allocator maintains a list of free segments and chooses the worst-fit + /// (largest) segment from this list to allocate memory. + WorstFitList, + + /// **Worst Fit Tree:** + /// This allocator builds a binary tree of offsets and sizes, that has the + /// max-heap property on the sizes and uses it to find suitable free space. + WorstFitTree, + + /// **Segment Allocator:** + /// This is a first fit allocator that was used before making the allocators + /// generic. It is not efficient and mainly included for reference. + SegmentAllocator, + + /// **Hybrid Allocator:** + HybridAllocator, +} + +/// The `Allocator` trait defines an interface for allocating and deallocating +/// blocks of memory within a fixed-size segment. Different allocators can +/// implement various strategies for managing free space within the segment. +pub trait Allocator: Send + Sync { + /// Accesses the underlying bitmap data. + fn data(&mut self) -> &mut BitArr!(for SEGMENT_SIZE, in u8, Lsb0); + + /// Constructs a new `Allocator` instance given the segment allocation bitmap. + /// The `bitmap` must have a length of `SEGMENT_SIZE`. + fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self + where + Self: Sized; + + /// Allocates a block of memory of the given `size`. + /// + /// This method attempts to find a contiguous free block of memory within the + /// segment that is large enough to satisfy the request. + fn allocate(&mut self, size: u32) -> Option; + + /// Allocates a block of memory of the given `size` at the specified `offset`. + /// + /// This method attempts to allocate a contiguous block of memory at the given offset. + /// TODO: investigate if we need this method at all + fn allocate_at(&mut self, size: u32, offset: u32) -> bool; + + /// Marks a range of bits in the bitmap with the given action. + #[inline] + fn mark(&mut self, offset: u32, size: u32, action: Action) { + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + let range = &mut self.data()[start_idx..end_idx]; + + match action { + Action::Allocate => debug_assert!(!range.any()), + Action::Deallocate => debug_assert!(range.all()), + } + + range.fill(action.as_bool()); + } +} + +// TODO better wording +/// Allocation action +#[derive(Clone, Copy)] +pub enum Action { + /// Deallocate an allocated block. + Deallocate, + /// Allocate a deallocated block. + Allocate, +} + +impl Action { + /// Returns 1 if allocation and 0 if deallocation. + #[inline] + pub fn as_bool(self) -> bool { + match self { + Action::Deallocate => false, + Action::Allocate => true, + } + } +} + +/// Identifier for 1GiB segments of a `StoragePool`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SegmentId(pub u64); + +impl SegmentId { + /// Returns the corresponding segment of the given disk offset. + pub fn get(offset: DiskOffset) -> Self { + SegmentId(offset.as_u64() & !(SEGMENT_SIZE_MASK as u64)) + } + + /// Returns the block offset into the segment. + pub fn get_block_offset(offset: DiskOffset) -> u32 { + offset.as_u64() as u32 & SEGMENT_SIZE_MASK as u32 + } + + /// Returns the disk offset at the start of this segment. + pub fn as_disk_offset(&self) -> DiskOffset { + DiskOffset::from_u64(self.0) + } + + /// Returns the disk offset of the block in this segment at the given + /// offset. + pub fn disk_offset(&self, segment_offset: u32) -> DiskOffset { + DiskOffset::from_u64(self.0 + u64::from(segment_offset)) + } + + /// Returns the key of this segment for messages and queries. + pub fn key(&self, key_prefix: &[u8]) -> CowBytes { + // Shave off the two lower bytes because they are always null. + let mut segment_key = [0; 8]; + BigEndian::write_u64(&mut segment_key[..], self.0); + assert_eq!(&segment_key[6..], &[0, 0]); + + let mut key = CowBytes::new(); + key.push_slice(key_prefix); + key.push_slice(&segment_key[..6]); + key + } + + /// Returns the ID of the disk that belongs to this segment. + pub fn disk_id(&self) -> u16 { + self.as_disk_offset().disk_id() + } + + /// Returns the next segment ID. + /// Wraps around at the end of the disk. + pub fn next(&self, disk_size: Block) -> SegmentId { + let disk_offset = self.as_disk_offset(); + if disk_offset.block_offset().as_u64() + SEGMENT_SIZE as u64 >= disk_size.as_u64() { + SegmentId::get(DiskOffset::new( + disk_offset.storage_class(), + disk_offset.disk_id(), + Block(0), + )) + } else { + SegmentId(self.0 + SEGMENT_SIZE as u64) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn segment_id() { + let offset = DiskOffset::new(1, 2, Block::from_bytes(4096)); + let segment = SegmentId::get(offset); + + assert_eq!(segment.as_disk_offset().storage_class(), 1); + assert_eq!(segment.disk_id(), 2); + assert_eq!( + segment.as_disk_offset().block_offset(), + Block::from_bytes(0) + ); + assert_eq!(SegmentId::get_block_offset(offset), 1); + } +} diff --git a/betree/src/allocator/next_fit_list.rs b/betree/src/allocator/next_fit_list.rs new file mode 100644 index 000000000..8fb5d84bb --- /dev/null +++ b/betree/src/allocator/next_fit_list.rs @@ -0,0 +1,136 @@ +use super::*; + +/// Simple Next-Fit bitmap allocator that uses a list to manage free segments +pub struct NextFitList { + data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), + free_segments: Vec<(u32, u32)>, // (offset, size) of free segments + last_offset_index: usize, // Index of the last checked segment in free_segments +} + +impl Allocator for NextFitList { + fn data(&mut self) -> &mut BitArr!(for SEGMENT_SIZE, in u8, Lsb0) { + &mut self.data + } + + /// Constructs a new `ListNextFit` given the segment allocation bitmap. + /// The `bitmap` must have a length of `SEGMENT_SIZE`. + fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self { + let data = BitArray::new(bitmap); + let mut allocator = NextFitList { + data, + free_segments: Vec::new(), + last_offset_index: 0, // Initialize last_offset_index to 0 + }; + allocator.initialize_free_segments(); + allocator + } + + /// Allocates a block of the given `size`. + /// Returns `None` if the allocation request cannot be satisfied. + fn allocate(&mut self, size: u32) -> Option { + if size == 0 { + return Some(0); + } + + for _ in 0..self.free_segments.len() { + if self.last_offset_index >= self.free_segments.len() { + // Check for wrap-around + self.last_offset_index = 0; + } + + let (offset, segment_size) = self.free_segments[self.last_offset_index]; + + if segment_size >= size { + self.mark(offset, size, Action::Allocate); + + // update the free segment with the remaining size and new offset + self.free_segments[self.last_offset_index].0 = offset + size; + self.free_segments[self.last_offset_index].1 = segment_size - size; + // NOTE: We do not handle the == case here. We could remove that entry from the + // list but we then would need to copy some things because the allocate_at (and + // deallocation) logic depends on a sorted list and also need have extra handling. + // The empty slots get garbage collected on the next sync anyway. + + self.last_offset_index += 1; + return Some(offset); + } + self.last_offset_index += 1; + } + None + } + + /// Allocates a block of the given `size` at `offset`. + /// Returns `false` if the allocation request cannot be satisfied. + fn allocate_at(&mut self, size: u32, offset: u32) -> bool { + if size == 0 { + return true; + } + if offset + size > SEGMENT_SIZE as u32 { + return false; + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + if self.data[start_idx..end_idx].any() { + return false; + } + + // Update free_segments to reflect the allocation + for i in 0..self.free_segments.len() { + let (seg_offset, seg_size) = self.free_segments[i]; + if seg_offset == offset && seg_size == size { + // perfect fit, remove the segment + self.free_segments.remove(i); + self.mark(offset, size, Action::Allocate); + return true; + } else if seg_offset == offset && seg_size > size { + // allocation at the beginning of the segment, adjust offset and size + self.free_segments[i].0 += size; + self.free_segments[i].1 -= size; + self.mark(offset, size, Action::Allocate); + return true; + } else if offset > seg_offset && offset + size == seg_offset + seg_size { + // allocation at the end of the segment, just adjust size + self.free_segments[i].1 -= size; + self.mark(offset, size, Action::Allocate); + return true; + } else if offset > seg_offset + && offset < seg_offset + seg_size + && offset + size < seg_offset + seg_size + { + // allocation in the middle of the segment, split segment + let remaining_size = seg_size - (size + (offset - seg_offset)); + let new_offset = offset + size; + self.free_segments[i].1 = offset - seg_offset; // adjust current segment size + + self.free_segments + .insert(i + 1, (new_offset, remaining_size)); // insert new segment after current + self.mark(offset, size, Action::Allocate); + return true; + } + } + + false + } +} + +impl NextFitList { + /// Initializes the `free_segments` vector by scanning the bitmap. + fn initialize_free_segments(&mut self) { + let mut offset: u32 = 0; + while offset < SEGMENT_SIZE as u32 { + if !self.data()[offset as usize] { + // If bit is 0, it's free + let start_offset = offset; + let mut current_size = 0; + while offset < SEGMENT_SIZE as u32 && !self.data()[offset as usize] { + current_size += 1; + offset += 1; + } + self.free_segments.push((start_offset, current_size)); + } else { + offset += 1; + } + } + } +} diff --git a/betree/src/allocator/next_fit_scan.rs b/betree/src/allocator/next_fit_scan.rs new file mode 100644 index 000000000..cd644035a --- /dev/null +++ b/betree/src/allocator/next_fit_scan.rs @@ -0,0 +1,89 @@ +use super::*; + +/// Simple next-fit bitmap allocator +pub struct NextFitScan { + data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), + last_offset: u32, +} + +impl Allocator for NextFitScan { + fn data(&mut self) -> &mut BitArr!(for SEGMENT_SIZE, in u8, Lsb0) { + &mut self.data + } + + /// Constructs a new `NextFit` given the segment allocation bitmap. + /// The `bitmap` must have a length of `SEGMENT_SIZE`. + fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self { + NextFitScan { + data: BitArray::new(bitmap), + last_offset: 0, + } + } + + /// Allocates a block of the given `size`. + /// Returns `None` if the allocation request cannot be satisfied. + fn allocate(&mut self, size: u32) -> Option { + if size == 0 { + return Some(0); + } + let mut offset = self.last_offset; + let mut wrap_around = false; + + loop { + if offset + size > SEGMENT_SIZE as u32 { + // Wrap around to the beginning of the segment. + // NOTE: We **can't** set offset here. + wrap_around = true; + } + + if wrap_around && offset >= self.last_offset { + // We've circled back to the starting point, no space found + return None; + } + + if offset + size > SEGMENT_SIZE as u32 { + // NOTE: We **can't** set offset above because of the case if self.last_offset is + // larger than SEGMENT_SIZE - size. If it is and we would set offset above we would + // run into an infinite loop. Because the `offset >= self.last_offset` condition + // would never be fulfilled because offset is reset beforehand. + offset = 0; + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + + match self.data[start_idx..end_idx].last_one() { + Some(last_alloc_idx) => { + // Skip to the end of the last allocated block if there is any one at all. + offset += last_alloc_idx as u32 + 1 + } + None => { + // No allocated blocks found, so allocate here. + self.mark(offset, size, Action::Allocate); + self.last_offset = offset + size + 1; + return Some(offset); + } + } + } + } + + /// Allocates a block of the given `size` at `offset`. + /// Returns `false` if the allocation request cannot be satisfied. + fn allocate_at(&mut self, size: u32, offset: u32) -> bool { + if size == 0 { + return true; + } + if offset + size > SEGMENT_SIZE as u32 { + return false; + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + if self.data[start_idx..end_idx].any() { + return false; + } + self.mark(offset, size, Action::Allocate); + self.last_offset = offset + size; + true + } +} diff --git a/betree/src/allocator/segment_allocator.rs b/betree/src/allocator/segment_allocator.rs new file mode 100644 index 000000000..79d5976b1 --- /dev/null +++ b/betree/src/allocator/segment_allocator.rs @@ -0,0 +1,71 @@ +use super::*; + +/// Simple first-fit that is used for reference. For a more efficient version see FirstFit. +pub struct SegmentAllocator { + data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), +} + +impl Allocator for SegmentAllocator { + fn data(&mut self) -> &mut BitArr!(for SEGMENT_SIZE, in u8, Lsb0) { + &mut self.data + } + + /// Constructs a new `FirstFit` given the segment allocation bitmap. + /// The `bitmap` must have a length of `SEGMENT_SIZE`. + fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self { + SegmentAllocator { + data: BitArray::new(bitmap), + } + } + + /// Allocates a block of the given `size`. + /// Returns `None` if the allocation request cannot be satisfied and the offset if if can. + fn allocate(&mut self, size: u32) -> Option { + if size == 0 { + return Some(0); + } + let offset = { + let mut idx = 0; + loop { + loop { + if idx + size > SEGMENT_SIZE as u32 { + return None; + } + if !self.data[idx as usize] { + break; + } + idx += 1; + } + + let start_idx = (idx + 1) as usize; + let end_idx = (idx + size) as usize; + if let Some(first_alloc_idx) = self.data[start_idx..end_idx].first_one() { + idx = (idx + 1) + first_alloc_idx as u32 + 1; + } else { + break idx; + } + } + }; + self.mark(offset, size, Action::Allocate); + return Some(offset); + } + + /// Allocates a block of the given `size` at `offset`. + /// Returns `false` if the allocation request cannot be satisfied. + fn allocate_at(&mut self, size: u32, offset: u32) -> bool { + if size == 0 { + return true; + } + if offset + size > SEGMENT_SIZE as u32 { + return false; + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + if self.data[start_idx..end_idx].any() { + return false; + } + self.mark(offset, size, Action::Allocate); + true + } +} diff --git a/betree/src/allocator/worst_fit_fsm.rs b/betree/src/allocator/worst_fit_fsm.rs new file mode 100644 index 000000000..3fc3c7104 --- /dev/null +++ b/betree/src/allocator/worst_fit_fsm.rs @@ -0,0 +1,325 @@ +use super::*; + +/// Based on the free-space-map allocator from postgresql: +/// https://github.com/postgres/postgres/blob/02ed3c2bdcefab453b548bc9c7e0e8874a502790/src/backend/storage/freespace/README +pub struct WorstFitFSM { + data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), + fsm_tree: Vec<(u32, u32)>, // Array to represent the FSM tree, storing max free space + tree_height: u32, +} + +impl Allocator for WorstFitFSM { + fn data(&mut self) -> &mut BitArr!(for SEGMENT_SIZE, in u8, Lsb0) { + &mut self.data + } + + /// Constructs a new `WorstFitFSM` given the segment allocation bitmap. + /// The `bitmap` must have a length of `SEGMENT_SIZE`. + fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self { + let data = BitArray::new(bitmap); + let mut allocator = WorstFitFSM { + data, + fsm_tree: Vec::new(), + tree_height: 0, + }; + allocator.build_fsm_tree(); + allocator + } + + fn allocate(&mut self, size: u32) -> Option { + if size == 0 { + return Some(0); + } + + if self.fsm_tree.is_empty() || self.fsm_tree[0].1 < size { + return None; // Not enough free space + } + + let mut current_node_index = 0; + while current_node_index < self.fsm_tree.len() / 2 { + // Traverse internal nodes + let left_child_index = 2 * current_node_index + 1; + let right_child_index = 2 * current_node_index + 2; + + let left_child_value = *self.fsm_tree.get(left_child_index).unwrap_or(&(0, 0)); + let right_child_value = *self.fsm_tree.get(right_child_index).unwrap_or(&(0, 0)); + + match (left_child_value.1 >= size, right_child_value.1 >= size) { + (true, true) => { + // Both children can fit size + if left_child_value.1 < right_child_value.1 { + // Right is worse fit + current_node_index = right_child_index; + } else { + // Left is worse or equal fit + current_node_index = left_child_index; + } + } + (true, false) => current_node_index = left_child_index, // Only left can fit + (false, true) => current_node_index = right_child_index, // Only right can fit + (false, false) => unreachable!(), // Neither child can fit, stop traversal + } + } + + // current_node_index is now the index of the worst-fit leaf node + assert!(current_node_index >= self.fsm_tree.len() / 2); + let (offset, segment_size) = self.fsm_tree[current_node_index]; + + assert!(segment_size >= size); + + self.mark(offset, size, Action::Allocate); + + // Update the segment in the leaf node + self.fsm_tree[current_node_index].0 += size; + self.fsm_tree[current_node_index].1 -= size; + + // Update internal nodes up to the root + let mut current_index = current_node_index; + while current_index > 0 { + current_index = (current_index - 1) / 2; // Index of parent node + let left_child_index = 2 * current_index + 1; + let right_child_index = 2 * current_index + 2; + + let left_child_value = *self.fsm_tree.get(left_child_index).unwrap_or(&(0, 0)); + let right_child_value = *self.fsm_tree.get(right_child_index).unwrap_or(&(0, 0)); + if left_child_value.1 > right_child_value.1 { + self.fsm_tree[current_index] = left_child_value + } else { + self.fsm_tree[current_index] = right_child_value + } + } + + return Some(offset); + } + + fn allocate_at(&mut self, size: u32, offset: u32) -> bool { + // NOTE: Because the tree is sorted by offset because of how it's build, this shouldn't be + // to hard to implement efficiently if needed. + + if size == 0 { + return true; + } + if offset + size > SEGMENT_SIZE as u32 { + return false; + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + if self.data[start_idx..end_idx].any() { + return false; + } + self.mark(offset, size, Action::Allocate); + + // Rebuild the tree to reflect changes. This is **not** efficient but the easiest solution, + // as the allocate_at is called only **once** on loading the bitmap from disk. + self.build_fsm_tree(); + true + } +} + +impl WorstFitFSM { + fn get_free_segments(&mut self) -> Vec<(u32, u32)> { + let mut offset: u32 = 0; + let mut free_segments = Vec::new(); + while offset < SEGMENT_SIZE as u32 { + if !self.data()[offset as usize] { + // If bit is 0, it's free + let start_offset = offset; + let mut current_size: u32 = 0; + while offset < SEGMENT_SIZE as u32 && !self.data()[offset as usize] { + current_size += 1; + offset += 1; + } + free_segments.push((start_offset, current_size)); + } else { + offset += 1; + } + } + free_segments + } + + fn build_fsm_tree(&mut self) { + let leaf_nodes = self.get_free_segments(); + let leaf_nodes_num = leaf_nodes.len(); + + if leaf_nodes_num == 0 { + self.fsm_tree = vec![(0, 0)]; // Root node with 0 free space + return; + } + + // Calculate the size of the FSM tree array. For simplicity we assume complete tree for now. + self.tree_height = (leaf_nodes_num as f64).log2().ceil() as u32; + // Number of nodes in complete binary tree of height h is 2^(h+1) - 1 + let tree_nodes_num = (1 << (self.tree_height + 1)) - 1; + + self.fsm_tree.clear(); + self.fsm_tree.resize(tree_nodes_num as usize, (0, 0)); + + // 1. Initialize leaf nodes in fsm_tree from free_segments + // OPTIM: just use memcpy + for (i, &(offset, size)) in leaf_nodes.iter().enumerate() { + // Leaf nodes are at the end of the fsm_tree array in a complete binary tree + let leaf_index = (tree_nodes_num / 2) + i; + if leaf_index < tree_nodes_num { + // Prevent out-of-bounds access if free_segments.len() is not power of 2 + self.fsm_tree[leaf_index] = (offset, size); + } + } + + // 2. Build internal nodes bottom-up similar to a binary heap + for i in (0..(tree_nodes_num / 2)).rev() { + let left_child_index = 2 * i + 1; + let right_child_index = 2 * i + 2; + + // Default to 0 if index is out of bounds (incomplete tree) + let left_child_value = *self.fsm_tree.get(left_child_index).unwrap_or(&(0, 0)); + let right_child_value = *self.fsm_tree.get(right_child_index).unwrap_or(&(0, 0)); + + if left_child_value.1 > right_child_value.1 { + self.fsm_tree[i] = left_child_value + } else { + self.fsm_tree[i] = right_child_value + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn build_empty() { + let bitmap = [0u8; SEGMENT_SIZE_BYTES]; + let allocator = WorstFitFSM::new(bitmap); + + // In an empty bitmap, the root node should have a large free space + assert_eq!(allocator.fsm_tree[0].0, 0 as u32); + assert_eq!(allocator.fsm_tree[0].1, SEGMENT_SIZE as u32); + assert_eq!(allocator.tree_height, 0); + } + + #[test] + fn build_simple() { + // Example bitmap: 3 segments allocated at the beginning, 2 free, 3 allocated, rest free + let mut allocator = WorstFitFSM::new([0u8; SEGMENT_SIZE_BYTES]); + let bitmap = allocator.data(); + + // Manually allocate some segments + bitmap[0..3].fill(true); // Allocate 3 blocks at the beginning + bitmap[5..7].fill(true); // Allocate 2 blocks after the free ones + + let mut allocator = WorstFitFSM::new(bitmap.into_inner()); + + let fsm_tree = vec![ + (7, SEGMENT_SIZE as u32 - 7), + (3, 2), + (7, SEGMENT_SIZE as u32 - 7), + ]; + assert_eq!(allocator.fsm_tree, fsm_tree); + assert_eq!(allocator.tree_height, 1); + } + + #[test] + fn build_complex() { + let mut allocator = WorstFitFSM::new([0u8; SEGMENT_SIZE_BYTES]); + let bitmap = allocator.data(); + + // Manually allocate some segments to create a non-trivial tree + bitmap[0..3].fill(true); + bitmap[5..8].fill(true); + bitmap[8..10].fill(true); + bitmap[14..22].fill(true); + bitmap[35..36].fill(true); + bitmap[42..53].fill(true); + + let allocator = WorstFitFSM::new(bitmap.into_inner()); + + // binary heap layout + let fsm_tree = vec![ + (53, SEGMENT_SIZE as u32 - 53), + (22, 13), + (53, SEGMENT_SIZE as u32 - 53), + (10, 4), + (22, 13), + (53, SEGMENT_SIZE as u32 - 53), + (0, 0), + (3, 2), + (10, 4), + (22, 13), + (36, 6), + (53, SEGMENT_SIZE as u32 - 53), + (0, 0), + (0, 0), + (0, 0), + ]; + + assert_eq!(fsm_tree, allocator.fsm_tree); + assert_eq!(allocator.tree_height, 3); + } + + #[test] + fn allocate_empty_fsm_tree() { + let bitmap = [0u8; SEGMENT_SIZE_BYTES]; + let mut allocator = WorstFitFSM::new(bitmap); + + let allocation = allocator.allocate(1024); + assert!(allocation.is_some()); // Allocation should succeed + + let allocated_offset = allocation.unwrap(); + assert_eq!(allocated_offset, 0); // Should allocate at the beginning + + // Check if the allocated region is marked as used in the bitmap + assert!(allocator.data()[0..1024 as usize].all()); + // Check root node value after allocation + assert_eq!(allocator.fsm_tree[0], (1024, SEGMENT_SIZE as u32 - 1024)); + } + + #[test] + fn allocate_complex_fsm_tree() { + let mut allocator = WorstFitFSM::new([0u8; SEGMENT_SIZE_BYTES]); + let bitmap = allocator.data(); + + // Manually allocate some segments to create a non-trivial tree + bitmap[0..3].fill(true); + bitmap[5..8].fill(true); + bitmap[8..10].fill(true); + bitmap[14..22].fill(true); + bitmap[35..36].fill(true); + bitmap[42..53].fill(true); + + let mut allocator = WorstFitFSM::new(bitmap.into_inner()); + + // Worst-fit should allocate from the segment at offset 3 with size 2 + let allocation = allocator.allocate(2); // Request allocation of size 2 + assert!(allocation.is_some()); + assert_eq!(allocation.unwrap(), 53); + // Verify that the allocated region is marked in the bitmap + assert!(allocator.data()[53..55].all()); + + let allocation2 = allocator.allocate(10); + assert!(allocation2.is_some()); + assert_eq!(allocation2.unwrap(), 55); + assert!(allocator.data()[55..65].all()); + + // Allocate again, to use the next worst fit segment + let allocation2 = allocator.allocate(100); + assert!(allocation2.is_some()); + assert_eq!(allocation2.unwrap(), 65); + assert!(allocator.data()[65..165].all()); + assert_eq!(allocator.fsm_tree[0].1, SEGMENT_SIZE as u32 - 165); + } + + #[test] + fn allocate_fail_fsm_tree() { + let mut allocator = WorstFitFSM::new([0u8; SEGMENT_SIZE_BYTES]); + let root_free_space = allocator.fsm_tree[0].1; + + // Try to allocate more than available space + let allocation = allocator.allocate(root_free_space + 1); + assert!(allocation.is_none()); // Allocation should fail + + // Check if fsm_tree root value is still the same + assert_eq!(allocator.fsm_tree[0].1, root_free_space); // Should remain unchanged + } +} diff --git a/betree/src/allocator/worst_fit_list.rs b/betree/src/allocator/worst_fit_list.rs new file mode 100644 index 000000000..080991d3f --- /dev/null +++ b/betree/src/allocator/worst_fit_list.rs @@ -0,0 +1,127 @@ +use super::*; + +/// Simple Worst-Fit bitmap allocator that uses a list to manage free segments +pub struct WorstFitList { + data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), + free_segments: Vec<(u32, u32)>, // (offset, size) of free segments +} + +impl Allocator for WorstFitList { + fn data(&mut self) -> &mut BitArr!(for SEGMENT_SIZE, in u8, Lsb0) { + &mut self.data + } + + /// Constructs a new `WorstFitList` given the segment allocation bitmap. + /// The `bitmap` must have a length of `SEGMENT_SIZE`. + fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self { + let data = BitArray::new(bitmap); + let mut allocator = WorstFitList { + data, + free_segments: Vec::new(), + }; + allocator.initialize_free_segments(); + allocator + } + + /// Allocates a block of the given `size` using worst-fit strategy. + /// Returns `None` if the allocation request cannot be satisfied. + fn allocate(&mut self, size: u32) -> Option { + if size == 0 { + return Some(0); + } + + let mut worst_fit_segment_index: Option = None; + let mut worst_fit_segment_size: u32 = 0; // Initialize with a small value + + for i in 0..self.free_segments.len() { + let (_, segment_size) = self.free_segments[i]; + if segment_size >= size && segment_size > worst_fit_segment_size { + worst_fit_segment_index = Some(i); + worst_fit_segment_size = segment_size; + } + } + + if let Some(index) = worst_fit_segment_index { + let (offset, segment_size) = self.free_segments[index]; + self.mark(offset, size, Action::Allocate); + + self.free_segments[index].0 = offset + size; + self.free_segments[index].1 = segment_size - size; + + return Some(offset); + } + None + } + + /// Allocates a block of the given `size` at `offset`. + /// Returns `false` if the allocation request cannot be satisfied. + fn allocate_at(&mut self, size: u32, offset: u32) -> bool { + if size == 0 { + return true; + } + if offset + size > SEGMENT_SIZE as u32 { + return false; + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + if self.data[start_idx..end_idx].any() { + return false; + } + + // Update free_segments to reflect the allocation - similar to FirstFitList::allocate_at + for i in 0..self.free_segments.len() { + let (seg_offset, seg_size) = self.free_segments[i]; + if seg_offset == offset && seg_size == size { + self.free_segments.remove(i); + self.mark(offset, size, Action::Allocate); + return true; + } else if seg_offset == offset && seg_size > size { + self.free_segments[i].0 += size; + self.free_segments[i].1 -= size; + self.mark(offset, size, Action::Allocate); + return true; + } else if offset > seg_offset && offset + size == seg_offset + seg_size { + self.free_segments[i].1 -= size; + self.mark(offset, size, Action::Allocate); + return true; + } else if offset > seg_offset + && offset < seg_offset + seg_size + && offset + size < seg_offset + seg_size + { + let remaining_size = seg_size - (size + (offset - seg_offset)); + let new_offset = offset + size; + self.free_segments[i].1 = offset - seg_offset; + + self.free_segments + .insert(i + 1, (new_offset, remaining_size)); + self.mark(offset, size, Action::Allocate); + return true; + } + } + + false + } +} + +impl WorstFitList { + /// Initializes the `free_segments` vector by scanning the bitmap. + fn initialize_free_segments(&mut self) { + let mut offset: u32 = 0; + while offset < SEGMENT_SIZE as u32 { + if !self.data()[offset as usize] { + let start_offset = offset; + let mut current_size = 0; + while offset < SEGMENT_SIZE as u32 && !self.data()[offset as usize] { + current_size += 1; + offset += 1; + } + self.free_segments.push((start_offset, current_size)); + } else { + offset += 1; + } + } + // keep segments sorted by offset + self.free_segments.sort_by_key(|seg| seg.0); + } +} diff --git a/betree/src/allocator/worst_fit_scan.rs b/betree/src/allocator/worst_fit_scan.rs new file mode 100644 index 000000000..e222384b7 --- /dev/null +++ b/betree/src/allocator/worst_fit_scan.rs @@ -0,0 +1,98 @@ +use super::*; + +/// Simple worst-fit bitmap allocator +pub struct WorstFitScan { + data: BitArr!(for SEGMENT_SIZE, in u8, Lsb0), +} + +impl Allocator for WorstFitScan { + fn data(&mut self) -> &mut BitArr!(for SEGMENT_SIZE, in u8, Lsb0) { + &mut self.data + } + + /// Constructs a new `WorstFitSimple` given the segment allocation bitmap. + /// The `bitmap` must have a length of `SEGMENT_SIZE`. + fn new(bitmap: [u8; SEGMENT_SIZE_BYTES]) -> Self { + WorstFitScan { + data: BitArray::new(bitmap), + } + } + + /// Allocates a block of the given `size`. + /// Returns `None` if the allocation request cannot be satisfied. + fn allocate(&mut self, size: u32) -> Option { + if size == 0 { + return Some(0); + } + + let mut worst_fit_offset = None; + // Initialize with size, as we want the largest possible size and it has to be at least + // size large. + let mut worst_fit_size = size; + let mut offset: u32 = 0; + + while offset + size <= SEGMENT_SIZE as u32 { + let end_idx = (offset + size) as usize; + + match self.data[offset as usize..end_idx].last_one() { + Some(last_alloc_idx) => { + // Skip to the end of the last allocated block + offset += last_alloc_idx as u32 + 1; + } + None => { + // Find the next allocated block after the current free range + match self.data[end_idx..].first_one() { + Some(next_alloc_idx) => { + let free_block_size = next_alloc_idx as u32 + end_idx as u32 - offset; + + // Check if this free block is a worse fit (larger) + if free_block_size > worst_fit_size { + worst_fit_offset = Some(offset); + worst_fit_size = free_block_size; + } + + offset = next_alloc_idx as u32 + end_idx as u32 + 1; + } + None => { + // No more allocated blocks, we have scanned the whole segment. + let free_block_size = self.data[offset as usize..].len() as u32; + + // Check if this free block is a worse fit (larger) + if free_block_size > worst_fit_size { + worst_fit_offset = Some(offset); + worst_fit_size = free_block_size; + } + + break; + } + } + } + } + } + + if let Some(offset) = worst_fit_offset { + self.mark(offset, size, Action::Allocate); + } + + worst_fit_offset + } + + /// Allocates a block of the given `size` at `offset`. + /// Returns `false` if the allocation request cannot be satisfied. + fn allocate_at(&mut self, size: u32, offset: u32) -> bool { + if size == 0 { + return true; + } + if offset + size > SEGMENT_SIZE as u32 { + return false; + } + + let start_idx = offset as usize; + let end_idx = (offset + size) as usize; + if self.data[start_idx..end_idx].any() { + return false; + } + self.mark(offset, size, Action::Allocate); + true + } +} diff --git a/betree/src/cache/clock_cache.rs b/betree/src/cache/clock_cache.rs index db53565e1..ad8cdc4f6 100644 --- a/betree/src/cache/clock_cache.rs +++ b/betree/src/cache/clock_cache.rs @@ -267,6 +267,10 @@ impl { pub(crate) free_space_tier: Vec, pub(crate) delayed_messages: Mutex, SlicedCowBytes)>>, pub(crate) last_snapshot_generation: RwLock>, + pub(crate) allocator: AllocatorType, // Cache for allocators which have been in use since the last sync. This is // done to avoid cyclical updates on evictions. // NOTE: This map needs to be updated/emptied on sync's as the internal // representation is not updated on deallocation to avoid overwriting // potentially valid fallback data. - pub(crate) allocators: RwLock>>, + pub(crate) allocators: RwLock>>>, pub(crate) allocations: AtomicU64, pub(crate) old_root_allocation: SeqLock)>>, } @@ -95,13 +96,13 @@ impl Handler { } } -pub struct SegmentAllocatorGuard<'a> { - inner: RwLockReadGuard<'a, HashMap>>, +pub struct AllocatorGuard<'a> { + inner: RwLockReadGuard<'a, HashMap>>>, id: SegmentId, } -impl<'a> SegmentAllocatorGuard<'a> { - pub fn access(&self) -> RwLockWriteGuard { +impl<'a> AllocatorGuard<'a> { + pub fn access(&self) -> RwLockWriteGuard> { self.inner.get(&self.id).unwrap().write() } } @@ -159,7 +160,7 @@ impl Handler { Ok(()) } - pub fn get_allocation_bitmap(&self, id: SegmentId, dmu: &X) -> Result + pub fn get_allocation_bitmap(&self, id: SegmentId, dmu: &X) -> Result where X: Dml, ObjectRef = OR, ObjectPointer = OR::ObjectPointer>, { @@ -167,7 +168,7 @@ impl Handler { // Test if bitmap is already in cache let foo = self.allocators.read(); if foo.contains_key(&id) { - return Ok(SegmentAllocatorGuard { inner: foo, id }); + return Ok(AllocatorGuard { inner: foo, id }); } } @@ -194,7 +195,22 @@ impl Handler { } } - let mut allocator = SegmentAllocator::new(bitmap); + let mut allocator: Box = match self.allocator { + AllocatorType::FirstFitScan => Box::new(FirstFitScan::new(bitmap)), + AllocatorType::FirstFitList => Box::new(FirstFitList::new(bitmap)), + AllocatorType::FirstFitTree => Box::new(FirstFitFSM::new(bitmap)), + AllocatorType::NextFitScan => Box::new(NextFitScan::new(bitmap)), + AllocatorType::NextFitList => Box::new(NextFitList::new(bitmap)), + AllocatorType::BestFitScan => Box::new(BestFitScan::new(bitmap)), + AllocatorType::BestFitList => Box::new(BestFitList::new(bitmap)), + AllocatorType::BestFitTree => Box::new(BestFitTree::new(bitmap)), + AllocatorType::ApproximateBestFitTree => Box::new(BestFitFSM::new(bitmap)), + AllocatorType::WorstFitScan => Box::new(WorstFitScan::new(bitmap)), + AllocatorType::WorstFitList => Box::new(WorstFitList::new(bitmap)), + AllocatorType::WorstFitTree => Box::new(WorstFitFSM::new(bitmap)), + AllocatorType::SegmentAllocator => Box::new(SegmentAllocator::new(bitmap)), + AllocatorType::HybridAllocator => Box::new(HybridAllocator::new(bitmap)), + }; if let Some((offset, size)) = self.old_root_allocation.read() { if SegmentId::get(offset) == id { @@ -207,7 +223,7 @@ impl Handler { self.allocators.write().insert(id, RwLock::new(allocator)); let foo = self.allocators.read(); - Ok(SegmentAllocatorGuard { inner: foo, id }) + Ok(AllocatorGuard { inner: foo, id }) } pub fn free_space_disk(&self, disk_id: GlobalDiskId) -> Option { diff --git a/betree/src/database/mod.rs b/betree/src/database/mod.rs index bc9f37e0f..a131789eb 100644 --- a/betree/src/database/mod.rs +++ b/betree/src/database/mod.rs @@ -1,5 +1,6 @@ //! This module provides the Database Layer. use crate::{ + allocator::*, atomic_option::AtomicOption, cache::ClockCache, checksum::GxHash, @@ -150,6 +151,9 @@ pub struct DatabaseConfiguration { /// Where to log the allocations pub allocation_log_file_path: PathBuf, + + /// Select the allocator to use + pub allocator: AllocatorType, } impl Default for DatabaseConfiguration { @@ -166,6 +170,7 @@ impl Default for DatabaseConfiguration { metrics: None, migration_policy: None, allocation_log_file_path: PathBuf::from("allocation_log.bin"), + allocator: AllocatorType::NextFitScan, } } } @@ -212,6 +217,7 @@ impl DatabaseConfiguration { free_space_tier: (0..NUM_STORAGE_CLASSES) .map(|_| AtomicStorageInfo::default()) .collect_vec(), + allocator: self.allocator, allocations: AtomicU64::new(0), old_root_allocation: SeqLock::new(None), allocators: RwLock::new(HashMap::new()), diff --git a/scripts/alloc_cycles_to_csv b/scripts/alloc_cycles_to_csv new file mode 100755 index 000000000..32b05a446 --- /dev/null +++ b/scripts/alloc_cycles_to_csv @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 + +import argparse +import csv +import struct +import os +from typing import Iterator, Any, IO + +# --- Constants and Classes Copied/Adapted from the Original Script --- + +# Constants to get relevant information from the disk_offset. +MASK_LAYER_ID = ((1 << 2) - 1) << (10 + 52) +MASK_DISK_ID = ((1 << 10) - 1) << 52 +MASK_OFFSET = (1 << 52) - 1 +SEGMENT_SIZE_LOG_2 = 18 +SEGMENT_SIZE = 1 << SEGMENT_SIZE_LOG_2 +SEGMENT_SIZE_MASK = SEGMENT_SIZE - 1 +# This is the amount of bytes one (de-)allocation has in the log. +SIZE_PER_ALLOCATION = 29 + + +class StorageConfig: + """Represents the storage configuration of the system (needed for header parsing).""" + + def __init__(self, num_layers: int, disks_per_layer: list[int], + blocks_per_disk: list[list[int]], blocks_per_segment: int): + self.num_layers = num_layers + self.disks_per_layer = disks_per_layer + self.blocks_per_disk = blocks_per_disk + self.blocks_per_segment = blocks_per_segment + # We don't need the other methods for this script + + +class Timestamp: + time: int + op_type: int + offset: int + num_blocks: int + cycles_alloc: int + cycles_total: int + layer_id: int + disk_id: int + block_offset: int + segment_id: int + segment_offset: int + + def __init__(self, op_type: int, offset: int, num_blocks: int, cycles_alloc: int, cycles_total: int, time: int): + self.op_type = op_type + self.offset = offset + self.num_blocks = num_blocks + self.cycles_alloc = cycles_alloc + self.cycles_total = cycles_total + self.time = time + self._parse_offset() + + def __str__(self) -> str: + return (f"Timestep(op_type: {self.op_type}, " + f"offset: {self.offset}, " + f"num_blocks: {self.num_blocks}, " + f"cycles_alloc: {self.cycles_alloc}, " + f"cycles_total: {self.cycles_total}, " + f"time: {self.time}, " + f"layer_id: {self.layer_id}, " + f"disk_id: {self.disk_id}, " + f"block_offset: {self.block_offset}, " + f"segment_id: {self.segment_id}, " + f"segment_offset: {self.segment_offset})") + + def _parse_offset(self): + """Parses the offset into human readable values""" + self.layer_id = (self.offset & MASK_LAYER_ID) >> (52 + 10) + self.disk_id = (self.offset & MASK_DISK_ID) >> 52 + self.block_offset = self.offset & MASK_OFFSET + # In haura the segment id is a multiple of the segment size. This is ugly for plotting. + self.segment_id = (self.block_offset & ~SEGMENT_SIZE_MASK) // SEGMENT_SIZE + self.segment_offset = self.block_offset % SEGMENT_SIZE + + +class Parser: + """Parses the allocation log file.""" + log_file: str + _file_handle: IO[Any] + timesteps: int + time: int + + def __init__(self, log_file: str): + self.log_file = log_file + self._file_handle = open(log_file, "rb") # Open the file in binary mode + + # Precalculate the number of timesteps. + _ = self.parse_header() + self.timesteps = self._remaining_bytes() // SIZE_PER_ALLOCATION + self._file_handle.seek(0) + + def __del__(self): + try: + self._file_handle.close() + except AttributeError: + # Happens when the file does not exist + pass + + def __len__(self) -> int: + return self.timesteps + + def parse_header(self) -> StorageConfig: + """Parses the header of the log file and returns a StorageConfig.""" + f = self._file_handle + num_classes = struct.unpack(" Iterator[Timestamp]: + """Prepares the iterator by skipping the header. Returns itself as the iterator.""" + self._file_handle.seek(0) + _ = self.parse_header() + self.time = 0 + return self + + def __next__(self) -> Timestamp: + """Reads the next allocation from the log file and returns a timestamp.""" + try: + op_type = struct.unpack(" int: + """Returns the remaining bytes in a file from the current position of the file pointer.""" + f = self._file_handle + current_position = f.tell() + f.seek(0, os.SEEK_END) + end_position = f.tell() + # Return to the original position. + f.seek(current_position, os.SEEK_SET) + return end_position - current_position + + +def main(): + """ + Parses the allocation log file, filters for allocation events up to an + optional maximum count, and writes the allocation count number and + local allocation cycles to a specified output CSV file. + """ + parser = argparse.ArgumentParser( + description="Parse allocation log and output allocation count and cycles to a CSV file, with an optional limit." + ) + parser.add_argument("input_log_file", help="Path to the input binary log file.") + parser.add_argument("output_csv_file", help="Path to the output CSV file.") + parser.add_argument( + "--max-allocations", + "-n", + type=int, + default=None, # Default is None, meaning no limit unless specified + help="Maximum number of allocation entries to write to the output file. If not set, all allocations are written." + ) + args = parser.parse_args() + + max_alloc_limit = args.max_allocations + + # Optional: Add a check for invalid limit values + if max_alloc_limit is not None and max_alloc_limit <= 0: + print("Error: --max-allocations must be a positive integer if specified.") + exit(1) + + try: + log_parser = Parser(args.input_log_file) + except FileNotFoundError: + print(f"Error: Input file not found at {args.input_log_file}") + exit(1) # Exit if input file is not found + except Exception as e: + print(f"Error initializing parser: {e}") + exit(1) # Exit on other parser init errors + + allocation_count = 0 # Initialize allocation counter + logged_allocations = 0 # Counter for allocations actually logged + + try: + with open(args.output_csv_file, 'w', newline='') as csvfile: + csv_writer = csv.writer(csvfile) + # Write the header row + csv_writer.writerow(['allocation_count', 'allocation_cycles_local']) + + # Iterate through log entries generated by the parser + for timestamp_entry in log_parser: + # Filter for entries that represent an allocation. + # NOTE: Assuming op_type == 1 signifies an allocation. + # Please adjust this value if your log uses a different indicator. + if timestamp_entry.op_type == 1: + allocation_count += 1 # Increment total allocation counter + + # Check if the limit has been reached *before* writing + if max_alloc_limit is not None and logged_allocations >= max_alloc_limit: + print(f"Reached specified maximum allocation limit ({max_alloc_limit}). Stopping log output.") + break # Exit the loop, no more entries will be processed or written + + # Write the relevant data: allocation count and cycles_alloc + csv_writer.writerow([allocation_count, timestamp_entry.cycles_alloc]) + logged_allocations += 1 # Increment counter for logged allocations + + print(f"Processed {allocation_count} total allocations.") + print(f"Successfully wrote {logged_allocations} allocation entries to {args.output_csv_file}") + + except IOError as e: + print(f"Error writing to output file {args.output_csv_file}: {e}") + exit(1) # Exit on file writing error + except Exception as e: + print(f"An unexpected error occurred during processing: {e}") + exit(1) # Exit on other processing errors + + +if __name__ == "__main__": + main() diff --git a/scripts/visualize_allocation_log b/scripts/visualize_allocation_log index b5bbc5064..c57de7b37 100755 --- a/scripts/visualize_allocation_log +++ b/scripts/visualize_allocation_log @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import argparse +from collections import Counter import functools from multiprocessing import Pool, Value, Lock import os @@ -503,6 +504,8 @@ class Plotter: "allocation_cycles_proportion_aligned": False, "allocation_sizes": False, "allocation_sizes_ecdf": False, # empirical cumulative distribution function + "allocation_size_by_time": False, + "cycles_by_allocation_size": False, "slider": False, "checkboxes": not args.disable_checkboxes, } @@ -549,8 +552,10 @@ class Plotter: self.vline_allocation_cycles_total_aligned = self._allocation_cycles_total_aligned() self.vline_allocation_cycles_proportion = self._allocation_cycles_proportion() self.vline_allocation_cycles_proportion_aligned = self._allocation_cycles_proportion_aligned() - _ = self._allocation_sizes() - _ = self._allocation_sizes_ecdf() + self._allocation_sizes() + self._allocation_sizes_ecdf() + self._allocation_size_by_time() + self._cycles_by_allocation_size() self.slider = self._setup_slider() self.checkboxes = self._setup_checkboxes() @@ -667,6 +672,22 @@ class Plotter: for layer in self.layers: layout[-1].append("allocation_sizes_ecdf") + if self.plot_config["allocation_size_by_time"]: + layout.append([]) + gridspec["height_ratios"].append(1) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append("allocation_size_by_time") + + if self.plot_config["cycles_by_allocation_size"]: + layout.append([]) + gridspec["height_ratios"].append(1) + if self.plot_config["checkboxes"]: + layout[-1].append("checkboxes") + for layer in self.layers: + layout[-1].append("cycles_by_allocation_size") + if self.plot_config["slider"]: layout.append([]) gridspec["height_ratios"].append(0.1) @@ -998,6 +1019,7 @@ class Plotter: failed_allocations_ax.set_title("Allocation sizes") failed_allocations_ax.set_xlim(0, max(self.global_bitmap.sizes)) failed_allocations_ax.set_ylim(0) + failed_allocations_ax.set_yscale("symlog") failed_allocations_ax.set_xlabel("Sizes") failed_allocations_ax.set_ylabel("Amount") @@ -1014,6 +1036,81 @@ class Plotter: failed_allocations_ax.set_xlabel("Sizes") failed_allocations_ax.set_ylabel("Proportion") + def _allocation_size_by_time(self): + """Plots allocation size by timestamp.""" + if not self.plot_config["allocation_size_by_time"]: + return + + data = self.global_bitmap.sizes + x_values = np.arange(len(self.global_bitmap.sizes)) + + ax = self.axd["allocation_size_by_time"] + ax.scatter(x_values, data, s=5.0, linewidths=0) + ax.set_title("Allocation Size by Timestamp") + ax.set_xlabel("Timestamp") + ax.set_ylabel("Allocation Size (Blocks)") + ax.set_ylim(1) + ax.set_xlim(0, len(x_values)) + + window_sizes = [100, 500, 1000] + colors = ['red', 'orange', 'black', 'purple'] + line_styles = ['-', '--', '-.', ':'] + moving_average = np.zeros_like(data, dtype=float) + + for i, window_size in enumerate(window_sizes): + moving_average = np.zeros_like(data, dtype=float) + for j in range(len(data)): + window_start = max(0, j - window_size + 1) + window_end = j + 1 + moving_average[j] = np.mean(data[window_start:window_end]) + + # Plot with different colors and line styles + ax.plot(x_values, moving_average, + linewidth=1.5, + color=colors[i % len(colors)], + linestyle=line_styles[i % len(line_styles)], + label=f"Moving Average ({window_size})") + + ax.legend(loc="upper left") + + def _cycles_by_allocation_size(self): + """Plots cycles by allocation size using box plots, bucketed for legibility.""" + if not self.plot_config["cycles_by_allocation_size"]: + return + + ax = self.axd["cycles_by_allocation_size"] + + # Define allocation size buckets + bucket_size = 16 + max_size = max(self.global_bitmap.sizes) + buckets = range(0, max_size + bucket_size, bucket_size) + bucket_begins = [b for b in buckets] # begins of the buckets for boxplot positions + + # Initialize data structures for bucketed cycles + bucketed_cycles = {begin: [] for begin in bucket_begins} + + # Bucket cycles by allocation size + for size, cycles in zip(self.global_bitmap.sizes, self.global_bitmap.cycles_alloc[1]): + if size <= max_size: + for begin in bucket_begins: + if buckets[bucket_begins.index(begin)] <= size < buckets[bucket_begins.index(begin)] + bucket_size: + bucketed_cycles[begin].append(cycles) + break + + # Prepare data for boxplot: list of cycle lists for each bucket + boxplot_data = [bucketed_cycles[begin] for begin in bucket_begins if bucketed_cycles[begin]] + positions = [begin for begin in bucket_begins if bucketed_cycles[begin]] # Positions of the boxplots are bucket begins + + # Plotting box plots + ax.boxplot(boxplot_data, positions=positions, widths=bucket_size*0.5, showfliers=True, manage_ticks=False) + + ax.set_title("Cycles by Allocation Size (Box Plot, Bucketed)") + ax.set_xlabel("Allocation Size (Blocks)") + ax.set_ylabel("Allocation Cycles") + ax.set_xlim(0, max_size + bucket_size) + ax.set_xticks(range(0, max_size + bucket_size + 1, 64)) + ax.set_ylim(0) + def _setup_slider(self): """Helper method for setting up the slider for interactive plotting.""" if not self.plot_config["slider"]: