diff --git a/betree/Cargo.toml b/betree/Cargo.toml index 432c93cd8..1a3d2e2e1 100644 --- a/betree/Cargo.toml +++ b/betree/Cargo.toml @@ -42,8 +42,10 @@ core_affinity = "0.5" async-trait = "0.1" lz4-sys = "1.9" +lz4 = "1.23.1" zstd = { version = "0.9", default-features = false } zstd-safe = { version = "4.0", default-features = false, features = ["experimental"] } +snap = "1.1" speedy = "0.7" enum_dispatch = "0.3" @@ -84,6 +86,9 @@ figment_config = ["figment"] # leaf vdev. This requires additional system calls due to time measuring and is # therefore safeguarded into it's own feature latency_metrics = [] +# Track memory access metrics for direct memory operations (e.g., PackedChildBuffer) +# This adds minimal overhead (~0.1-0.4%) but provides complete metrics accuracy +memory_metrics = [] nvm = ["pmdk"] # Log the allocations and deallocations done for later analysis allocation_log = [] diff --git a/betree/haura-benchmarks/.gitignore b/betree/haura-benchmarks/.gitignore index b7f53e6e2..6a2712b14 100644 --- a/betree/haura-benchmarks/.gitignore +++ b/betree/haura-benchmarks/.gitignore @@ -6,3 +6,4 @@ Cargo.lock results data +silesia_corpus diff --git a/betree/haura-benchmarks/Cargo.toml b/betree/haura-benchmarks/Cargo.toml index 38563d254..dc5cc2023 100644 --- a/betree/haura-benchmarks/Cargo.toml +++ b/betree/haura-benchmarks/Cargo.toml @@ -4,11 +4,20 @@ version = "0.1.0" authors = ["tilpner "] edition = "2018" +[features] +memory_metrics = ["betree_storage_stack/memory_metrics"] + +[[bin]] +name = "test_memory_metrics" +path = "src/test_memory_metrics.rs" + + + [workspace] members = ["."] [dependencies] -betree_storage_stack = { path = ".." } +betree_storage_stack = { path = "..", features = ["nvm"] } structopt = "0.3" figment = { version = "0.10", features = [ "json", "yaml" ] } @@ -24,4 +33,4 @@ log = "0.4" # Dependent on versions from haura parking_lot = "0.11" zip = "0.5" -zipf = "7.0.1" +zipf = "7.0.1" \ No newline at end of file diff --git a/betree/haura-benchmarks/haura-plots/haura_plots/benchmark_heatmap_generator.py b/betree/haura-benchmarks/haura-plots/haura_plots/benchmark_heatmap_generator.py new file mode 100644 index 000000000..c4a8b5c7c --- /dev/null +++ b/betree/haura-benchmarks/haura-plots/haura_plots/benchmark_heatmap_generator.py @@ -0,0 +1,477 @@ +#!/usr/bin/env python3 +""" +Comprehensive Heatmap Generator for Haura Benchmark Results + +This script generates heatmaps for various performance metrics across different +configurations (entry size, compression type, thread count). +""" + +import os +import json +import re +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +from pathlib import Path +import argparse +from typing import Dict, List, Tuple, Optional + +class BenchmarkHeatmapGenerator: + def __init__(self, results_dir: str, ycsb_char: str, remove_empty: bool = True): + self.results_dir = Path(results_dir) + self.ycsb_char = ycsb_char + self.remove_empty = remove_empty + self.data = {} + self.metrics = {} + + # Define expected configurations + self.entry_sizes = [512, 4096, 16384, 30000] + self.compression_types = ['None', 'Snappy', 'Rle', 'Delta', 'Zstd(1)', 'Zstd(5)', 'Zstd(10)', 'Lz4(1)', 'Lz4(5)', 'Lz4(10)'] + self.thread_counts = [1, 2, 3, 4, 5, 8, 10, 15, 20, 25] + + def parse_folder_name(self, folder_name: str) -> Optional[Tuple[int, str, int]]: + """Parse folder name to extract entry size, compression, and timestamp""" + # Pattern: ycsb_{char}_entry{size}_{compression}_{timestamp} + # Split by underscores and reconstruct + if not folder_name.startswith(f'ycsb_{self.ycsb_char}_entry'): + return None + + parts = folder_name.split('_') + if len(parts) < 4: # ycsb, {char}, entry{size}, compression..., timestamp + return None + + try: + # Extract entry size from "entry{size}" + entry_part = parts[2] # "entry512", "entry4096", etc. + if not entry_part.startswith('entry'): + return None + entry_size = int(entry_part[5:]) # Remove "entry" prefix + + # Last part is always timestamp + timestamp = int(parts[-1]) + + # Everything between entry size and timestamp is compression + compression_parts = parts[3:-1] # Skip ycsb_{char}_entry{size} and timestamp + compression_raw = '_'.join(compression_parts) + + # Map compression names + compression_map = { + 'none': 'None', + 'snappy': 'Snappy', + 'rle': 'Rle', + 'delta': 'Delta', + 'zstd1': 'Zstd(1)', + 'zstd5': 'Zstd(5)', + 'zstd10': 'Zstd(10)', + 'lz4_1': 'Lz4(1)', + 'lz4_5': 'Lz4(5)', + 'lz4_10': 'Lz4(10)' + } + + compression = compression_map.get(compression_raw, compression_raw) + return entry_size, compression, timestamp + + except (ValueError, IndexError): + return None + + def extract_thread_count(self, folder_path: Path) -> Optional[int]: + """Extract thread count from ycsb_{char}.csv file""" + csv_file = folder_path / f'ycsb_{self.ycsb_char}.csv' + if not csv_file.exists(): + return None + + try: + with open(csv_file, 'r') as f: + lines = f.readlines() + if len(lines) >= 2: + # Second line, first value + thread_count = int(lines[1].split(',')[0]) + return thread_count + except (ValueError, IndexError, IOError): + pass + return None + + def extract_compression_from_config(self, folder_path: Path) -> Optional[str]: + """Extract compression type from config file as backup""" + config_file = folder_path / 'config' + if not config_file.exists(): + return None + + try: + with open(config_file, 'r') as f: + content = f.read() + + if 'compression: None' in content: + return 'None' + elif 'compression: Rle(' in content: + return 'Rle' + elif 'compression: Delta(' in content: + return 'Delta' + elif 'Zstd' in content: + if 'level: 1' in content: + return 'Zstd(1)' + elif 'level: 5' in content: + return 'Zstd(5)' + elif 'level: 10' in content: + return 'Zstd(10)' + elif 'Lz4' in content: + if 'level: 1' in content: + return 'Lz4(1)' + elif 'level: 5' in content: + return 'Lz4(5)' + elif 'level: 10' in content: + return 'Lz4(10)' + elif 'Snappy' in content: + return 'Snappy' + except IOError: + pass + return None + + def calculate_metrics(self, folder_path: Path) -> Dict: + """Calculate all metrics for a single benchmark run""" + betree_file = folder_path / 'betree-metrics.jsonl' + out_file = folder_path / 'out.jsonl' + + if not betree_file.exists() or not out_file.exists(): + return {} + + try: + # Load betree metrics + betree_data = [] + with open(betree_file, 'r') as f: + for line in f: + betree_data.append(json.loads(line.strip())) + + # Load system metrics + out_data = [] + with open(out_file, 'r') as f: + for line in f: + out_data.append(json.loads(line.strip())) + + if not betree_data or not out_data: + return {} + + final_betree = betree_data[-1] + final_out = out_data[-1] + runtime_sec = (final_betree['epoch_ms'] - betree_data[0]['epoch_ms']) / 1000 + + if runtime_sec <= 0: + return {} + + # Extract basic I/O data + total_written = final_betree['storage']['tiers'][0]['vdevs'][0]['written'] + total_read = final_betree['storage']['tiers'][0]['vdevs'][0]['read'] + + # Calculate throughput metrics + BLOCK_SIZE = 4096 + avg_write_throughput = (total_written * BLOCK_SIZE / 1024 / 1024) / runtime_sec + avg_read_throughput = (total_read * BLOCK_SIZE / 1024 / 1024) / runtime_sec + + # Calculate peak throughput + write_incremental = [] + read_incremental = [] + for i in range(1, len(betree_data)): + prev_written = betree_data[i-1]['storage']['tiers'][0]['vdevs'][0]['written'] + curr_written = betree_data[i]['storage']['tiers'][0]['vdevs'][0]['written'] + prev_read = betree_data[i-1]['storage']['tiers'][0]['vdevs'][0]['read'] + curr_read = betree_data[i]['storage']['tiers'][0]['vdevs'][0]['read'] + + write_incremental.append(curr_written - prev_written) + read_incremental.append(curr_read - prev_read) + + # Convert to MiB/s (multiply by 2 for 500ms epochs) + write_throughputs = [blocks * BLOCK_SIZE / 1024 / 1024 * 2 for blocks in write_incremental] + read_throughputs = [blocks * BLOCK_SIZE / 1024 / 1024 * 2 for blocks in read_incremental] + + peak_write = max(write_throughputs) if write_throughputs else 0 + peak_read = max(read_throughputs) if read_throughputs else 0 + + # Cache metrics + cache_hits = final_betree['cache']['hits'] + cache_misses = final_betree['cache']['misses'] + total_cache_requests = cache_hits + cache_misses + cache_hit_rate = (cache_hits / total_cache_requests) * 100 if total_cache_requests > 0 else 0 + + # System metrics + peak_memory_mb = max([entry['proc_rss'] for entry in out_data]) / 1024 / 1024 + total_cpu_time = final_out['proc_utime'] + final_out['proc_stime'] + cpu_utilization = (total_cpu_time / runtime_sec) * 100 + + # Storage utilization + storage_used = final_betree['usage'][0]['total'] - final_betree['usage'][0]['free'] + storage_total = final_betree['usage'][0]['total'] + storage_utilization = (storage_used / storage_total) * 100 if storage_total > 0 else 0 + + # IOPS + total_iops = (total_read + total_written) / runtime_sec + + return { + 'avg_write_throughput_mbps': round(avg_write_throughput, 2), + 'avg_read_throughput_mbps': round(avg_read_throughput, 2), + 'peak_write_throughput_mbps': round(peak_write, 2), + 'peak_read_throughput_mbps': round(peak_read, 2), + 'total_data_written_mb': round(total_written * BLOCK_SIZE / 1024 / 1024, 2), + 'total_data_read_mb': round(total_read * BLOCK_SIZE / 1024 / 1024, 2), + 'cache_hit_rate_percent': round(cache_hit_rate, 2), + 'peak_memory_mb': round(peak_memory_mb, 2), + 'cpu_utilization_percent': round(cpu_utilization, 2), + 'storage_utilization_percent': round(storage_utilization, 2), + 'total_iops': round(total_iops, 2), + 'runtime_seconds': round(runtime_sec, 2) + } + + except (json.JSONDecodeError, KeyError, IndexError, ZeroDivisionError) as e: + print(f"Error processing {folder_path}: {e}") + return {} + + def collect_data(self): + """Collect data from all benchmark runs""" + print("Collecting benchmark data...") + + for folder in self.results_dir.iterdir(): + if not folder.is_dir() or not folder.name.startswith(f'ycsb_{self.ycsb_char}_entry'): + continue + + # Parse folder name + parsed = self.parse_folder_name(folder.name) + if not parsed: + continue + + entry_size, compression, timestamp = parsed + + # Get thread count + thread_count = self.extract_thread_count(folder) + if thread_count is None: + continue + + # Verify compression from config if needed + if compression not in self.compression_types: + compression = self.extract_compression_from_config(folder) + if compression is None or compression not in self.compression_types: + continue + + # Calculate metrics + metrics = self.calculate_metrics(folder) + if not metrics: + continue + + # Store data + key = (entry_size, compression, thread_count) + if key not in self.data: + self.data[key] = [] + self.data[key].append(metrics) + + print(f"Processed: {folder.name} -> Entry:{entry_size}, Compression:{compression}, Threads:{thread_count}") + + def aggregate_data(self): + """Aggregate multiple runs for the same configuration""" + print("Aggregating data...") + + for key, runs in self.data.items(): + if len(runs) == 1: + self.metrics[key] = runs[0] + else: + # Average multiple runs + aggregated = {} + for metric_name in runs[0].keys(): + values = [run[metric_name] for run in runs if metric_name in run] + if values: + aggregated[metric_name] = round(np.mean(values), 2) + self.metrics[key] = aggregated + + def create_heatmap_data(self, metric_name: str, remove_empty: bool = None) -> Dict[int, pd.DataFrame]: + """Create heatmap data organized by entry size""" + if remove_empty is None: + remove_empty = self.remove_empty + + heatmap_data = {} + + for entry_size in self.entry_sizes: + # Create DataFrame for this entry size + data_matrix = [] + row_labels = [] + + for compression in self.compression_types: + row_data = [] + for thread_count in self.thread_counts: + key = (entry_size, compression, thread_count) + if key in self.metrics and metric_name in self.metrics[key]: + value = self.metrics[key][metric_name] + else: + value = np.nan + row_data.append(value) + + data_matrix.append(row_data) + row_labels.append(f"{entry_size}B_{compression}") + + df = pd.DataFrame(data_matrix, + index=row_labels, + columns=self.thread_counts) + + if remove_empty: + # Remove rows (compression types) that are completely empty + df = df.dropna(how='all') + + # Remove columns (thread counts) that are completely empty + df = df.dropna(axis=1, how='all') + + # Only add to heatmap_data if there's actual data (or if keeping empty and df exists) + if not df.empty or not remove_empty: + heatmap_data[entry_size] = df + + return heatmap_data + + def plot_heatmap(self, metric_name: str, title: str, unit: str = "", cmap: str = 'viridis'): + """Create and save heatmap for a specific metric""" + heatmap_data = self.create_heatmap_data(metric_name) + + # Filter out entry sizes with no data + available_entry_sizes = [size for size in self.entry_sizes if size in heatmap_data] + + if not available_entry_sizes: + print(f"No data available for metric '{metric_name}'. Skipping heatmap generation.") + return + + # Create figure with subplots for each entry size that has data + fig, axes = plt.subplots(len(available_entry_sizes), 1, figsize=(12, 6 * len(available_entry_sizes))) + if len(available_entry_sizes) == 1: + axes = [axes] + + for i, entry_size in enumerate(available_entry_sizes): + df = heatmap_data[entry_size] + + # Use separate color scale for each entry size + vmin = df.min().min() if not df.isna().all().all() else 0 + vmax = df.max().max() if not df.isna().all().all() else 1 + + # Create heatmap + sns.heatmap(df, + ax=axes[i], + annot=True, + fmt='.1f', + cmap=cmap, + vmin=vmin, + vmax=vmax, + cbar_kws={'label': unit}, + xticklabels=True, + yticklabels=True) + + axes[i].set_title(f'{title} - Entry Size: {entry_size}B') + axes[i].set_xlabel('Thread Count') + axes[i].set_ylabel('Compression Type') + + plt.tight_layout() + + # Save heatmap + output_file = self.results_dir / f'ycsb_{self.ycsb_char}_heatmap_{metric_name}.png' + plt.savefig(output_file, dpi=300, bbox_inches='tight') + plt.close() + + print(f"Saved heatmap: {output_file}") + + def generate_all_heatmaps(self): + """Generate heatmaps for all metrics""" + print("Generating heatmaps...") + + # Define metrics to plot + metrics_config = [ + ('avg_write_throughput_mbps', 'Average Write Throughput', 'MiB/s', 'Reds'), + ('avg_read_throughput_mbps', 'Average Read Throughput', 'MiB/s', 'Blues'), + ('peak_write_throughput_mbps', 'Peak Write Throughput', 'MiB/s', 'Reds'), + ('peak_read_throughput_mbps', 'Peak Read Throughput', 'MiB/s', 'Blues'), + ('total_data_written_mb', 'Total Data Written', 'MB', 'Oranges'), + ('total_data_read_mb', 'Total Data Read', 'MB', 'Purples'), + ('cache_hit_rate_percent', 'Cache Hit Rate', '%', 'Greens'), + ('peak_memory_mb', 'Peak Memory Usage', 'MB', 'YlOrRd'), + ('cpu_utilization_percent', 'CPU Utilization', '%', 'plasma'), + ('storage_utilization_percent', 'Storage Utilization', '%', 'viridis'), + ('total_iops', 'Total IOPS', 'ops/s', 'magma'), + ('runtime_seconds', 'Runtime', 'seconds', 'coolwarm') + ] + + for metric_name, title, unit, cmap in metrics_config: + self.plot_heatmap(metric_name, title, unit, cmap) + + def generate_summary_report(self): + """Generate a summary report of the collected data""" + report_file = self.results_dir / 'benchmark_summary_report.txt' + + with open(report_file, 'w') as f: + f.write("Haura Benchmark Results Summary\n") + f.write("=" * 50 + "\n\n") + + f.write(f"Total configurations processed: {len(self.metrics)}\n") + f.write(f"Results directory: {self.results_dir}\n\n") + + # Count by entry size + f.write("Configurations by Entry Size:\n") + for entry_size in self.entry_sizes: + count = sum(1 for key in self.metrics.keys() if key[0] == entry_size) + f.write(f" {entry_size}B: {count} configurations\n") + + f.write("\nConfigurations by Compression:\n") + for compression in self.compression_types: + count = sum(1 for key in self.metrics.keys() if key[1] == compression) + f.write(f" {compression}: {count} configurations\n") + + f.write("\nConfigurations by Thread Count:\n") + for thread_count in self.thread_counts: + count = sum(1 for key in self.metrics.keys() if key[2] == thread_count) + f.write(f" {thread_count} threads: {count} configurations\n") + + # Best performers + if self.metrics: + f.write("\nTop Performers:\n") + + # Best average write throughput + best_write = max(self.metrics.items(), + key=lambda x: x[1].get('avg_write_throughput_mbps', 0)) + f.write(f"Best Avg Write Throughput: {best_write[1]['avg_write_throughput_mbps']} MiB/s ") + f.write(f"(Entry:{best_write[0][0]}B, {best_write[0][1]}, {best_write[0][2]} threads)\n") + + # Best average read throughput + best_read = max(self.metrics.items(), + key=lambda x: x[1].get('avg_read_throughput_mbps', 0)) + f.write(f"Best Avg Read Throughput: {best_read[1]['avg_read_throughput_mbps']} MiB/s ") + f.write(f"(Entry:{best_read[0][0]}B, {best_read[0][1]}, {best_read[0][2]} threads)\n") + + # Best cache hit rate + best_cache = max(self.metrics.items(), + key=lambda x: x[1].get('cache_hit_rate_percent', 0)) + f.write(f"Best Cache Hit Rate: {best_cache[1]['cache_hit_rate_percent']}% ") + f.write(f"(Entry:{best_cache[0][0]}B, {best_cache[0][1]}, {best_cache[0][2]} threads)\n") + + print(f"Summary report saved: {report_file}") + +def main(): + parser = argparse.ArgumentParser(description='Generate heatmaps for Haura benchmark results') + parser.add_argument('results_dir', + help='Path to benchmark results directory (e.g., /path/to/2025-07-24_default)') + parser.add_argument('ycsb_char', + help='YCSB workload character (e.g., a, b, c, d, g)') + parser.add_argument('--keep-empty', action='store_true', + help='Keep empty rows and columns in heatmaps (default: remove them)') + + args = parser.parse_args() + + if not os.path.exists(args.results_dir): + print(f"Error: Results directory '{args.results_dir}' does not exist") + return 1 + + # Create heatmap generator + generator = BenchmarkHeatmapGenerator(args.results_dir, args.ycsb_char, remove_empty=not args.keep_empty) + + # Process data + generator.collect_data() + generator.aggregate_data() + + # Generate outputs + generator.generate_all_heatmaps() + generator.generate_summary_report() + + print(f"\nHeatmap generation complete! Check {args.results_dir} for output files.") + return 0 + +if __name__ == '__main__': + exit(main()) \ No newline at end of file diff --git a/betree/haura-benchmarks/haura-plots/haura_plots/generate_metrics_plots.py b/betree/haura-benchmarks/haura-plots/haura_plots/generate_metrics_plots.py new file mode 100644 index 000000000..96c7721f0 --- /dev/null +++ b/betree/haura-benchmarks/haura-plots/haura_plots/generate_metrics_plots.py @@ -0,0 +1,495 @@ +#!/usr/bin/env python3 +""" +Generate metrics plots for all benchmark runs - Standalone version +Includes necessary functions from metrics_plots.py and util.py with Python 3.8 compatibility +""" + +import os +import sys +import json +from pathlib import Path +from typing import List, Dict, Any, Optional +import argparse + +try: + import matplotlib + matplotlib.use('Agg') # Use non-interactive backend + import matplotlib.pyplot as plt + import numpy as np +except ImportError as e: + print(f"ERROR: Error importing required modules: {e}") + print("Please install required packages:") + print(" pip install matplotlib numpy") + sys.exit(1) + +# Utility functions from util.py (Python 3.8 compatible) +BLOCK_SIZE = 4096 +SEC_MS = 1000 +EPOCH_MS = 1000 + +# Colors +GREEN = '#2ca02c' +BLUE = '#1f77b4' + +def subtract_first_index(data): + """Subtract the first element from all elements in the list.""" + if len(data) > 0: + first = data[0] + for i in range(len(data)): + data[i] -= first + +def subtract_last_index(data): + """Subtract each element from the next element in the list.""" + if len(data) > 1: + for i in range(len(data) - 1, 0, -1): + data[i] -= data[i - 1] + +def ms_to_string(ms): + """Convert milliseconds to a formatted string.""" + seconds = ms // 1000 + minutes = seconds // 60 + seconds = seconds % 60 + return f"{minutes}:{seconds:02d}" + +def num_to_name(tier): + """Convert a number to the corresponding tier name in the storage hierarchy.""" + names = {0: 'Fastest', 1: 'Fast', 2: 'Slow', 3: 'Slowest'} + return names.get(tier, f'Tier{tier}') + +def read_jsonl(file_handle): + """Read JSONL file and return list of parsed JSON objects.""" + data = [] + for line in file_handle: + line = line.strip() + if line: + data.append(json.loads(line)) + return data + +# Plotting functions from metrics_plots.py (adapted) +def plot_throughput(data, path): + """Print a four row throughput plot with focussed read or write throughput.""" + + epoch = [temp['epoch_ms'] for temp in data] + subtract_first_index(epoch) + epoch_formatted = list(map(ms_to_string, epoch)) + num_tiers = len(data[0]['storage']['tiers']) + fig, axs = plt.subplots(num_tiers, 1, figsize=(16,8)) + + # Handle single tier case + if num_tiers == 1: + axs = [axs] + + for tier_id in range(num_tiers): + for disk_id in range(len(data[0]['storage']['tiers'][tier_id]['vdevs'])): + writes = np.array([]) + reads = np.array([]) + for point in data: + writes = np.append(writes, point['storage']['tiers'][tier_id]['vdevs'][disk_id]['written']) + reads = np.append(reads, point['storage']['tiers'][tier_id]['vdevs'][disk_id]['read']) + + if len(writes) > 0: + subtract_last_index(writes) + subtract_last_index(reads) + + # convert to MiB from Blocks + writes = writes * BLOCK_SIZE / 1024 / 1024 * (SEC_MS / EPOCH_MS) + reads = reads * BLOCK_SIZE / 1024 / 1024 * (SEC_MS / EPOCH_MS) + + axs[tier_id].plot(epoch, reads, label = 'Read', linestyle='dotted', color=GREEN) + axs[tier_id].plot(epoch, writes, label = 'Written', color=BLUE) + axs[tier_id].set_xlabel("runtime (minute:seconds)") + axs[tier_id].set_xticks(epoch, epoch_formatted) + axs[tier_id].locator_params(tight=True, nbins=10) + axs[tier_id].set_ylabel(f"{num_to_name(tier_id)}\nMiB/s (I/0)") + label=' | '.join(path.split('/')[-2:]) + + fig.legend(loc="center right",handles=axs[0].get_lines()) + fig.suptitle(f"Haura - {label}", y=0.98) + fig.savefig(f"{path}/plot_write.svg") + + for tier_id in range(num_tiers): + lines = axs[tier_id].get_lines() + if len(lines) > 0: + lines[0].set_linestyle('solid') + lines[0].zorder = 2.1 + lines[1].set_linestyle('dotted') + lines[1].zorder = 2.0 + + fig.legend(loc="center right",handles=axs[0].get_lines()) + fig.savefig(f"{path}/plot_read.svg") + plt.close(fig) + +def plot_tier_usage(data, path): + """Plot the utilized space of each storage tier.""" + fig, axs = plt.subplots(4, 1, figsize=(10,13)) + + # 0 - 3; Fastest - Slowest + free = [[], [], [], []] + total = [[], [], [], []] + + # Map each timestep to an individual + for ts in data: + tier = 0 + for stat in ts["usage"]: + free[tier].append(stat["free"]) + total[tier].append(stat["total"]) + tier += 1 + + tier = 0 + for fr in free: + axs[tier].plot((np.array(total[tier]) - np.array(fr)) * 4096 / 1024 / 1024 / 1024, + label="Used", marker="o", markevery=200, color=BLUE) + axs[tier].plot(np.array(total[tier]) * 4096 / 1024 / 1024 / 1024, + label="Total", marker="^", markevery=200, color=GREEN) + axs[tier].set_ylim(bottom=0) + axs[tier].set_ylabel(f"{num_to_name(tier)}\nCapacity in GiB") + tier += 1 + + fig.legend(loc='center right',handles=axs[0].get_lines()) + fig.savefig(f"{path}/tier_usage.svg") + plt.close(fig) + +def plot_system(path): + """Plot the system usage and temperatures during the run.""" + data = [] + jsonl_file = f"{path}/out.jsonl" + + try: + with open(jsonl_file, 'r', encoding="UTF-8") as metrics: + data = read_jsonl(metrics) + except FileNotFoundError: + print(f"Warning: {jsonl_file} not found, skipping system plot") + return + except Exception as e: + print(f"Warning: Error reading {jsonl_file}: {e}") + return + + if not data: + print(f"Warning: No data found in {jsonl_file}") + return + + epoch = [temp['epoch_ms'] for temp in data] + subtract_first_index(epoch) + epoch_formatted = list(map(ms_to_string, epoch)) + min_pagefaults = [x["proc_minflt"] + x["proc_cminflt"] for x in data] + maj_pagefaults = [x["proc_majflt"] + x["proc_cmajflt"] for x in data] + virtual_mem = [x["proc_vsize"] for x in data] + resident_mem = [x["proc_rss"] for x in data] + utime = [x["proc_utime"] + x["proc_cutime"] for x in data] + stime = [x["proc_stime"] + x["proc_cstime"] for x in data] + + fig, axs = plt.subplots(3,2, figsize=(10, 10)) + eticks = range(0, epoch[-1:][0], 30 * 10**3) + eticks_formatted = list(map(ms_to_string, eticks)) + + # Page Faults (Minor) + axs[0][0].plot(epoch, min_pagefaults) + axs[0][0].set_ylabel("Minor Pagefaults (All threads)") + axs[0][0].set_xticks(eticks, eticks_formatted) + + # Page Faults (Major) + axs[1][0].plot(epoch, maj_pagefaults) + axs[1][0].set_ylabel("Major Pagefaults (All threads)") + axs[1][0].set_xticks(eticks, eticks_formatted) + + # Show[0] in MiB + axs[2][0].plot(epoch, np.array(virtual_mem) / 1024 / 1024) + axs[2][0].set_ylabel("Virtual Memory [MiB]") + axs[2][0].set_xticks(eticks, eticks_formatted) + + # Resident Memory + axs[2][1].plot(epoch, np.array(resident_mem)) + axs[2][1].set_ylabel("Resident Memory Pages [#]") + axs[2][1].set_xticks(eticks, eticks_formatted) + + # CPU time + axs[0][1].plot(epoch, utime, label="utime") + axs[0][1].plot(epoch, stime, label="stime") + axs[0][1].set_ylabel("time [s] (All threads)") + axs[0][1].set_xticks(eticks, eticks_formatted) + axs[0][1].legend(bbox_to_anchor=(1.35, 0.6)) + + # Temperature plots + temps_keys = [key for key in data[0].keys() if 'hwmon' in key and 'Tccd' not in key] + line_styles = ['-', '--', '-.', ':'] + for i, key in enumerate(temps_keys): + style = line_styles[i % len(line_styles)] + axs[1][1].plot(epoch, [x[key] for x in data], label=key, linestyle=style) + + axs[1][1].set_xticks(eticks, eticks_formatted) + axs[1][1].set_ylabel("Temperature [C]") + axs[1][1].legend(bbox_to_anchor=(1.0, 0.6)) + + fig.tight_layout() + fig.savefig(f"{path}/proc.svg") + plt.close(fig) + +class MetricsPlotGenerator: + """Generate metrics plots for all benchmark runs""" + + def __init__(self, results_dirs: List[str], verbose: bool = True): + self.results_dirs = [Path(d) for d in results_dirs] + self.verbose = verbose + self.stats = { + 'total_runs': 0, + 'successful_runs': 0, + 'failed_runs': 0, + 'skipped_runs': 0, + 'plots_generated': 0 + } + + def log(self, message: str, level: str = "INFO"): + """Log message if verbose mode is enabled""" + if self.verbose: + prefix = { + "INFO": "INFO: ", + "SUCCESS": "SUCCESS: ", + "WARNING": "WARNING: ", + "ERROR": "ERROR: " + }.get(level, "") + print(f"{prefix} {message}") + + def find_run_folders(self) -> List[Path]: + """Find all benchmark run folders in the results directories""" + run_folders = [] + + for results_dir in self.results_dirs: + if not results_dir.exists(): + self.log(f"Results directory not found: {results_dir}", "WARNING") + continue + + self.log(f"Scanning directory: {results_dir}") + + for item in results_dir.iterdir(): + if item.is_dir() and self.is_benchmark_run_folder(item): + run_folders.append(item) + + run_folders.sort() # Sort for consistent processing order + self.log(f"Found {len(run_folders)} benchmark run folders") + return run_folders + + def is_benchmark_run_folder(self, folder: Path) -> bool: + """Check if a folder is a benchmark run folder""" + # Check for expected pattern: ycsb_g_entry{size}_{compression}_{timestamp} + folder_name = folder.name + if not folder_name.startswith('ycsb_g_entry'): + return False + + # Check for required files + required_files = ['betree-metrics.jsonl'] + for file_name in required_files: + if not (folder / file_name).exists(): + return False + + return True + + def load_metrics_data(self, folder_path: Path) -> Optional[List[Dict[str, Any]]]: + """Load and parse betree-metrics.jsonl data""" + metrics_file = folder_path / 'betree-metrics.jsonl' + + if not metrics_file.exists(): + self.log(f"Missing betree-metrics.jsonl in {folder_path.name}", "WARNING") + return None + + try: + data = [] + with open(metrics_file, 'r') as f: + for line_num, line in enumerate(f, 1): + line = line.strip() + if line: + try: + data.append(json.loads(line)) + except json.JSONDecodeError as e: + self.log(f"JSON decode error in {metrics_file} line {line_num}: {e}", "WARNING") + continue + + if not data: + self.log(f"No valid data found in {metrics_file}", "WARNING") + return None + + self.log(f"Loaded {len(data)} data points from {folder_path.name}") + return data + + except Exception as e: + self.log(f"Error loading {metrics_file}: {e}", "ERROR") + return None + + def check_existing_plots(self, folder_path: Path) -> Dict[str, bool]: + """Check which plots already exist in the folder""" + plot_files = [ + 'plot_write.svg', + 'plot_read.svg', + 'tier_usage.svg', + 'proc.svg' + ] + + existing = {} + for plot_file in plot_files: + existing[plot_file] = (folder_path / plot_file).exists() + + return existing + + def generate_plots_for_run(self, run_folder: Path, overwrite: bool = False) -> bool: + """Generate all plots for a single benchmark run""" + self.log(f"Processing run: {run_folder.name}") + + # Check existing plots + existing_plots = self.check_existing_plots(run_folder) + if not overwrite and all(existing_plots.values()): + self.log(f"All plots already exist in {run_folder.name}, skipping", "INFO") + self.stats['skipped_runs'] += 1 + return True + + # Load metrics data + metrics_data = self.load_metrics_data(run_folder) + if metrics_data is None: + self.log(f"Failed to load metrics data for {run_folder.name}", "ERROR") + self.stats['failed_runs'] += 1 + return False + + plots_generated = 0 + + try: + # Generate throughput plots (plot_write.svg and plot_read.svg) + if overwrite or not existing_plots.get('plot_write.svg', False) or not existing_plots.get('plot_read.svg', False): + self.log(f"Generating throughput plots for {run_folder.name}") + plot_throughput(metrics_data, str(run_folder)) + plots_generated += 2 # Generates both write and read plots + + # Generate tier usage plot (tier_usage.svg) + if overwrite or not existing_plots.get('tier_usage.svg', False): + self.log(f"Generating tier usage plot for {run_folder.name}") + plot_tier_usage(metrics_data, str(run_folder)) + plots_generated += 1 + + # Generate system plot (proc.svg) + # Note: plot_system expects out.jsonl, but we have betree-metrics.jsonl + # Let's check if out.jsonl exists, if not, create a symlink or copy + out_jsonl = run_folder / 'out.jsonl' + metrics_jsonl = run_folder / 'betree-metrics.jsonl' + + if overwrite or not existing_plots.get('proc.svg', False): + if not out_jsonl.exists() and metrics_jsonl.exists(): + # Create symlink from betree-metrics.jsonl to out.jsonl + try: + out_jsonl.symlink_to('betree-metrics.jsonl') + self.log(f"Created symlink out.jsonl -> betree-metrics.jsonl in {run_folder.name}") + except Exception as e: + self.log(f"Failed to create symlink in {run_folder.name}: {e}", "WARNING") + # Try copying instead + try: + import shutil + shutil.copy2(metrics_jsonl, out_jsonl) + self.log(f"Copied betree-metrics.jsonl to out.jsonl in {run_folder.name}") + except Exception as e2: + self.log(f"Failed to copy file in {run_folder.name}: {e2}", "ERROR") + + if out_jsonl.exists(): + self.log(f"Generating system plot for {run_folder.name}") + plot_system(str(run_folder)) + plots_generated += 1 + else: + self.log(f"Cannot generate system plot for {run_folder.name}: out.jsonl not available", "WARNING") + + self.stats['plots_generated'] += plots_generated + self.stats['successful_runs'] += 1 + self.log(f"Successfully generated {plots_generated} plots for {run_folder.name}", "SUCCESS") + return True + + except Exception as e: + self.log(f"Error generating plots for {run_folder.name}: {e}", "ERROR") + self.stats['failed_runs'] += 1 + return False + + def generate_all_plots(self, overwrite: bool = False) -> None: + """Generate plots for all benchmark runs""" + self.log("Starting metrics plot generation...") + + # Find all run folders + run_folders = self.find_run_folders() + if not run_folders: + self.log("No benchmark run folders found!", "WARNING") + return + + self.stats['total_runs'] = len(run_folders) + + # Process each run folder + for i, run_folder in enumerate(run_folders, 1): + self.log(f"[{i}/{len(run_folders)}] Processing {run_folder.name}") + self.generate_plots_for_run(run_folder, overwrite) + + # Print summary + self.print_summary() + + def print_summary(self): + """Print generation summary""" + print("\n" + "="*60) + print("METRICS PLOT GENERATION SUMMARY") + print("="*60) + print(f"Total runs found: {self.stats['total_runs']}") + print(f"Successfully processed: {self.stats['successful_runs']}") + print(f"Failed: {self.stats['failed_runs']}") + print(f"Skipped (existing): {self.stats['skipped_runs']}") + print(f"Total plots generated: {self.stats['plots_generated']}") + + if self.stats['successful_runs'] > 0: + success_rate = (self.stats['successful_runs'] / self.stats['total_runs']) * 100 + print(f"Success rate: {success_rate:.1f}%") + + print("="*60) + + if self.stats['plots_generated'] > 0: + print("Plot generation completed!") + print("Check individual run folders for generated SVG files:") + print(" - plot_write.svg (write throughput)") + print(" - plot_read.svg (read throughput)") + print(" - tier_usage.svg (storage tier usage)") + print(" - proc.svg (system metrics)") + else: + print("WARNING: No plots were generated.") + +def main(): + parser = argparse.ArgumentParser( + description='Generate metrics plots for all benchmark runs', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s # Use default results directories + %(prog)s --results-dir ./results/my_run # Specify custom results directory + %(prog)s --overwrite # Overwrite existing plots + %(prog)s --quiet # Suppress verbose output + """ + ) + + parser.add_argument('--results-dir', action='append', dest='results_dirs', + help='Results directory to process (can be specified multiple times)') + + parser.add_argument('--overwrite', action='store_true', + help='Overwrite existing plots') + + parser.add_argument('--quiet', '-q', action='store_true', + help='Suppress verbose output') + + args = parser.parse_args() + + # Default results directories if none specified + if not args.results_dirs: + default_dirs = [ + '/home/skarim/Code/smash/haura/betree/haura-benchmarks/results/2025-07-24_default', + '/home/skarim/Code/smash/haura/betree/haura-benchmarks/results/2025-07-25_default' + ] + args.results_dirs = [d for d in default_dirs if Path(d).exists()] + + if not args.results_dirs: + print("ERROR: No default results directories found!") + print("Please specify results directories with --results-dir") + return 1 + + # Create generator and run + generator = MetricsPlotGenerator(args.results_dirs, verbose=not args.quiet) + generator.generate_all_plots(overwrite=args.overwrite) + + return 0 + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/betree/haura-benchmarks/haura-plots/haura_plots/plot_ycsb_execution_time.py b/betree/haura-benchmarks/haura-plots/haura_plots/plot_ycsb_execution_time.py new file mode 100644 index 000000000..d21fb479a --- /dev/null +++ b/betree/haura-benchmarks/haura-plots/haura_plots/plot_ycsb_execution_time.py @@ -0,0 +1,249 @@ +import os +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np +import re +import math +import sys +import argparse + +def parse_arguments(): + parser = argparse.ArgumentParser(description='Plot YCSB execution time results') + parser.add_argument('folder_path', help='Path to the folder containing YCSB results') + parser.add_argument('ycsb_char', help='YCSB workload character (e.g., a, b, c, d, g)') + return parser.parse_args() + +# Parse command line arguments +args = parse_arguments() +main_dir = args.folder_path +target_char = args.ycsb_char + +def extract_compression_value(config_path): + if not os.path.exists(config_path): + return "Unknown" + with open(config_path, "r", encoding="utf-8-sig") as f: + config_text = f.read() + + # Pattern for compression with level (Zstd, Lz4) + pattern_with_level = re.compile( + r'compression\s*:\s*(\w+)\s*\(\s*\1\s*{\s*level\s*:\s*(\d+)\s*,?', + re.DOTALL + ) + match = pattern_with_level.search(config_text) + if match: + comp_type, level = match.groups() + return f"{comp_type}({level})" + + # Pattern for Snappy (no level parameter) + pattern_snappy = re.compile( + r'compression\s*:\s*Snappy\s*\(\s*Snappy\s*,?\s*\)', + re.DOTALL + ) + if pattern_snappy.search(config_text): + return "Snappy" + + # Pattern for Rle (with parameters) + pattern_rle = re.compile( + r'compression\s*:\s*Rle\s*\(\s*Rle\s*\{[^}]*\}\s*,?\s*\)', + re.DOTALL + ) + if pattern_rle.search(config_text): + return "Rle" + + # Pattern for Delta (with parameters) + pattern_delta = re.compile( + r'compression\s*:\s*Delta\s*\(\s*Delta\s*\{[^}]*\}\s*,?\s*\)', + re.DOTALL + ) + if pattern_delta.search(config_text): + return "Delta" + + # Pattern for None/null + match_flat = re.search(r'compression\s*:\s*(None|null|nullptr)', config_text, re.IGNORECASE) + if match_flat: + return match_flat.group(1) + + return "Unknown" + +def debug_compression_parsing(main_dir, target_char): + """Debug function to show what compression values are being parsed""" + print("=== DEBUG: Compression parsing ===") + for folder in os.listdir(main_dir): + if folder.startswith(f"ycsb_{target_char}"): + config_path = os.path.join(main_dir, folder, "config") + if os.path.exists(config_path): + label = extract_compression_value(config_path) + print(f"Folder: {folder} -> Compression: {label}") + print("=== END DEBUG ===\n") + +def extract_entry_size(folder_name): + """Extract entry size from folder name like 'ycsb_g_entry512_none_1753381182'""" + match = re.search(r'entry(\d+)', folder_name) + if match: + return match.group(1) + return None + +# Debug: Show what compression values are being parsed +debug_compression_parsing(main_dir, target_char) + +# Step 1: Collect all folders and group by entry size +compression_labels = set() +folders_by_entry_size = {} +for folder in os.listdir(main_dir): + if folder.startswith(f"ycsb_{target_char}"): + entry_size = extract_entry_size(folder) + if entry_size: + folders_by_entry_size.setdefault(entry_size, []).append(folder) + config_path = os.path.join(main_dir, folder, "config") + label = extract_compression_value(config_path) + compression_labels.add(label) + +# Assign consistent color per label +color_map = plt.get_cmap("tab10") +label_list = sorted(list(compression_labels)) +label_colors = {label: color_map(i % 10) for i, label in enumerate(label_list)} + +# Step 2: Setup 2x2 subplot layout +fig, axs = plt.subplots(2, 2, figsize=(16, 12)) +all_labels_used = set() + +title_map = { + "a": "YCSB-A (Read Heavy)", + "b": "YCSB-B (Read/Write Mix)", + "c": "YCSB-C (Read Only)", + "d": "YCSB-D (Read Latest)", + "e": "YCSB-E (Scan)", + "f": "YCSB-F (Read-Modify-Write)", + "g": "YCSB-G (Update Heavy)", + "h": "YCSB-H (Mixed)", + "i": "YCSB-I (Insert Heavy)", + # Add more mappings as needed +} + +plot_title = title_map.get(target_char, f"YCSB-{target_char.upper()}") +fig.suptitle(f"{plot_title} - Operations/sec by Thread Count (Different Entry Sizes)", fontsize=18, y=0.95) + +# Define entry size order for consistent subplot positioning +entry_size_order = ['512', '4096', '16384', '30000'] +entry_size_positions = { + '512': (0, 0), # Top-left + '4096': (0, 1), # Top-right + '16384': (1, 0), # Bottom-left + '30000': (1, 1) # Bottom-right +} + +# Step 1: Define label order and fixed colors +preferred_order = ["None", "Snappy", "Rle", "Delta", "Zstd(1)", "Zstd(5)", "Zstd(10)", "Lz4(1)", "Lz4(5)", "Lz4(10)"] +label_list = preferred_order # For legend consistency + +label_colors = { + "None": "#333333", # Dark gray + "Snappy": "#2ca02c", # Green + "Rle": "#d62728", # Red + "Delta": "#9467bd", # Purple + "Zstd(1)": "#1f77b4", # Deep blue + "Zstd(5)": "#5fa2dc", # Medium blue + "Zstd(10)": "#a6c8ed", # Light blue + "Lz4(1)": "#ff7f0e", # Bold orange + "Lz4(5)": "#ffae64", # Light orange + "Lz4(10)": "#ffd5b2" # Pale peach +} + +# Step 3: Scan for global max ops/sec across all entry sizes +global_max_ops = 0 +for entry_size, folders in folders_by_entry_size.items(): + for folder in folders: + csv_file = os.path.join(main_dir, folder, f"ycsb_{target_char}.csv") + if os.path.exists(csv_file): + try: + df = pd.read_csv(csv_file) + if not df.empty and "ops" in df.columns: + max_val = df["ops"].max() + global_max_ops = max(global_max_ops, max_val) + except (pd.errors.EmptyDataError, pd.errors.ParserError): + continue # Skip malformed or empty CSV files + +# Step 4: Create subplots for each entry size +for entry_size in entry_size_order: + if entry_size not in folders_by_entry_size: + # If no data for this entry size, hide the subplot + row, col = entry_size_positions[entry_size] + axs[row, col].set_visible(False) + continue + + folders = folders_by_entry_size[entry_size] + ops_by_label = {} + all_thread_counts = set() + + # Step 4.1: Collect ops/sec per label and thread count for this entry size + for folder in folders: + folder_path = os.path.join(main_dir, folder) + csv_file = os.path.join(folder_path, f"ycsb_{target_char}.csv") + config_path = os.path.join(folder_path, "config") + label = extract_compression_value(config_path) + + if not os.path.exists(csv_file) or label not in preferred_order: + continue + + try: + df = pd.read_csv(csv_file) + if df.empty or "ops" not in df.columns or "threads" not in df.columns: + continue + except (pd.errors.EmptyDataError, pd.errors.ParserError): + continue # Skip malformed or empty CSV files + + thread = int(df["threads"].values[0]) + ops = float(df["ops"].values[0]) + all_thread_counts.add(thread) + + ops_by_label.setdefault(label, {})[thread] = ops + all_labels_used.add(label) + + if not ops_by_label: + # If no valid data for this entry size, hide the subplot + row, col = entry_size_positions[entry_size] + axs[row, col].set_visible(False) + continue + + # Step 4.2: Prepare sorted thread counts + sorted_threads = sorted(all_thread_counts) + base_x = np.arange(len(sorted_threads)) + total_group_width = 0.8 + bar_width = total_group_width / len(preferred_order) + + # Step 4.3: Plot bars per label + row, col = entry_size_positions[entry_size] + ax = axs[row, col] + + for i, label in enumerate(preferred_order): + thread_ops = ops_by_label.get(label, {}) + values = [thread_ops.get(t, 0) for t in sorted_threads] + offsets = base_x - (total_group_width / 2) + i * bar_width + ax.bar(offsets, values, width=bar_width, color=label_colors[label], label=label) + + # Step 4.4: Finalize subplot + midpoints = base_x # This centers the labels under each group of bars + ax.set_xticks(midpoints) + ax.set_xticklabels(sorted_threads) + ax.set_xlabel("Threads") + ax.set_ylabel("Ops/sec") + ax.set_ylim(0, global_max_ops * 1.1) + ax.grid(axis="y", linestyle="dotted", alpha=0.6) + ax.set_title(f"Entry Size: {entry_size} bytes", fontsize=14) + +# Check if we have any data at all +if not all_labels_used: + print(f"No data found for YCSB-{target_char} in {main_dir}") + sys.exit(1) + + +# Step 5: Add global legend +handles = [plt.Rectangle((0, 0), 1, 1, color=label_colors[label]) for label in label_list if label in all_labels_used] +fig.legend(handles, [label for label in label_list if label in all_labels_used], + fontsize="medium", loc='center right', bbox_to_anchor=(0.98, 0.5)) + +plt.tight_layout(rect=[0, 0, 0.85, 0.92]) # Leave space for the title and legend +output_filename = os.path.join(main_dir, f"ycsb_{target_char}_entry_sizes_ops_per_thread.png") +plt.savefig(output_filename, dpi=300, bbox_inches='tight') +print(f"Plot saved as: {output_filename}") +plt.show() diff --git a/betree/haura-benchmarks/perf-config.json b/betree/haura-benchmarks/perf-config.json new file mode 100644 index 000000000..3ea438839 --- /dev/null +++ b/betree/haura-benchmarks/perf-config.json @@ -0,0 +1,40 @@ +{ + "storage": { + "tiers": [ + { + "top_level_vdevs": [ + { + "path": "/mnt/tmpfs/hauradb", + "direct": true + } + ], + "preferred_access_type": "Unknown", + "storage_kind": "Memory" + } + ], + "queue_depth_factor": 20, + "thread_pool_size": null, + "thread_pool_pinned": false + }, + "alloc_strategy": [ + [ + 0 + ], + [ + 0 + ], + [ + 0 + ], + [ + 0 + ] + ], + "default_storage_class": 0, + "compression": "None", + "cache_size": 33554432, + "access_mode": "AlwaysCreateNew", + "sync_interval_ms": null, + "migration_policy": null, + "metrics": null +} diff --git a/betree/haura-benchmarks/run.sh b/betree/haura-benchmarks/run.sh index b94945f19..ebbe0315f 100755 --- a/betree/haura-benchmarks/run.sh +++ b/betree/haura-benchmarks/run.sh @@ -21,7 +21,7 @@ function ensure_zip { function ensure_bectl { pushd ../../bectl || exit - cargo build --release + cargo build --release --features betree_storage_stack/nvm popd || return } @@ -41,7 +41,7 @@ function run { shift 3 if [ "$total_runs" -gt 0 ]; then - sleep 60 + sleep 30 fi total_runs=$((total_runs + 1)) @@ -55,7 +55,7 @@ function run { env | grep BETREE__ env >"env" "$ROOT/../../target/release/bectl" config print-active >"config" - "$ROOT/target/release/betree-perf" "$mode" "$@" + numactl --cpunodebind=0 --membind=0 "$ROOT/target/release/betree-perf" "$mode" "$@" echo "merging results into $out_path/out.jsonl" "$ROOT/target/release/json-merge" \ @@ -206,36 +206,229 @@ function ci() { } function ycsb_a() { - run "$RUN_IDENT" ycsb_a_block ycsb-a "$((8 * 1024 * 1024 * 1024))" 0 8 - run "$RUN_IDENT" ycsb_a_memory ycsb-a "$((8 * 1024 * 1024 * 1024))" 1 8 + # Default parameters: use generated data with integers + local data_source="${YCSB_DATA_SOURCE:-file}" + local data_type="${YCSB_DATA_TYPE:-int}" + local data_path="${YCSB_DATA_PATH:-/home/skarim/Code/smash/haura/betree/haura-benchmarks/silesia_corpus}" + + #run "$RUN_IDENT" "ycsb_a_${YCSB_SUFFIX:-unnamed}" ycsb-a "$((2 * 1024 * 1024 * 1024))" 0 2 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + run "$RUN_IDENT" "ycsb_a_${YCSB_SUFFIX:-unnamed}" ycsb-a "$((1 * 1024 * 1024))" 0 1 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_a_${YCSB_SUFFIX:-unnamed}" ycsb-a "$((1 * 1024 * 1024))" 0 2 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_a_${YCSB_SUFFIX:-unnamed}" ycsb-a "$((1 * 1024 * 1024))" 0 3 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_a_${YCSB_SUFFIX:-unnamed}" ycsb-a "$((1 * 1024 * 1024))" 0 4 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_a_${YCSB_SUFFIX:-unnamed}" ycsb-a "$((1 * 1024 * 1024))" 0 5 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_a_${YCSB_SUFFIX:-unnamed}" ycsb-a "$((1 * 1024 * 1024))" 0 8 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_a_${YCSB_SUFFIX:-unnamed}" ycsb-a "$((1 * 1024 * 1024))" 0 10 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_a_${YCSB_SUFFIX:-unnamed}" ycsb-a "$((1 * 1024 * 1024))" 0 15 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_a_${YCSB_SUFFIX:-unnamed}" ycsb-a "$((1 * 1024 * 1024))" 0 20 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_a_${YCSB_SUFFIX:-unnamed}" ycsb-a "$((1 * 1024 * 1024))" 0 25 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" } function ycsb_b() { - run "$RUN_IDENT" ycsb_b_block ycsb-b "$((8 * 1024 * 1024 * 1024))" 0 8 - run "$RUN_IDENT" ycsb_b_memory ycsb-b "$((8 * 1024 * 1024 * 1024))" 1 8 + # Default parameters: use generated data with integers + local data_source="${YCSB_DATA_SOURCE:-file}" + local data_type="${YCSB_DATA_TYPE:-int}" + local data_path="${YCSB_DATA_PATH:-/home/skarim/Code/smash/haura/betree/haura-benchmarks/silesia_corpus}" + + #run "$RUN_IDENT" "ycsb_b_${YCSB_SUFFIX:-unnamed}" ycsb-b "$((2 * 1024 * 1024 * 1024))" 0 2 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + run "$RUN_IDENT" "ycsb_b_${YCSB_SUFFIX:-unnamed}" ycsb-b "$((1 * 1024 * 1024))" 0 1 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_b_${YCSB_SUFFIX:-unnamed}" ycsb-b "$((1 * 1024 * 1024))" 0 2 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_b_${YCSB_SUFFIX:-unnamed}" ycsb-b "$((1 * 1024 * 1024))" 0 3 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_b_${YCSB_SUFFIX:-unnamed}" ycsb-b "$((1 * 1024 * 1024))" 0 4 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_b_${YCSB_SUFFIX:-unnamed}" ycsb-b "$((1 * 1024 * 1024))" 0 5 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_b_${YCSB_SUFFIX:-unnamed}" ycsb-b "$((1 * 1024 * 1024))" 0 8 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_b_${YCSB_SUFFIX:-unnamed}" ycsb-b "$((1 * 1024 * 1024))" 0 10 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_b_${YCSB_SUFFIX:-unnamed}" ycsb-b "$((1 * 1024 * 1024))" 0 15 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_b_${YCSB_SUFFIX:-unnamed}" ycsb-b "$((1 * 1024 * 1024))" 0 20 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_b_${YCSB_SUFFIX:-unnamed}" ycsb-b "$((1 * 1024 * 1024))" 0 25 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" } function ycsb_c() { - run "$RUN_IDENT" ycsb_c_block ycsb-c "$((8 * 1024 * 1024 * 1024))" 0 8 - run "$RUN_IDENT" ycsb_c_memory ycsb-c "$((8 * 1024 * 1024 * 1024))" 1 8 + # Default parameters: use generated data with integers + local data_source="${YCSB_DATA_SOURCE:-file}" + local data_type="${YCSB_DATA_TYPE:-int}" + local data_path="${YCSB_DATA_PATH:-/home/skarim/Code/smash/haura/betree/haura-benchmarks/silesia_corpus}" + + #run "$RUN_IDENT" "ycsb_c_${YCSB_SUFFIX:-unnamed}" ycsb-c "$((2 * 1024 * 1024 * 1024))" 0 2 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + run "$RUN_IDENT" "ycsb_c_${YCSB_SUFFIX:-unnamed}" ycsb-c "$((1 * 1024 * 1024))" 0 1 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_c_${YCSB_SUFFIX:-unnamed}" ycsb-c "$((1 * 1024 * 1024))" 0 2 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_c_${YCSB_SUFFIX:-unnamed}" ycsb-c "$((1 * 1024 * 1024))" 0 3 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_c_${YCSB_SUFFIX:-unnamed}" ycsb-c "$((1 * 1024 * 1024))" 0 4 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_c_${YCSB_SUFFIX:-unnamed}" ycsb-c "$((1 * 1024 * 1024))" 0 5 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_c_${YCSB_SUFFIX:-unnamed}" ycsb-c "$((1 * 1024 * 1024))" 0 8 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_c_${YCSB_SUFFIX:-unnamed}" ycsb-c "$((1 * 1024 * 1024))" 0 10 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_c_${YCSB_SUFFIX:-unnamed}" ycsb-c "$((1 * 1024 * 1024))" 0 15 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_c_${YCSB_SUFFIX:-unnamed}" ycsb-c "$((1 * 1024 * 1024))" 0 20 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_c_${YCSB_SUFFIX:-unnamed}" ycsb-c "$((1 * 1024 * 1024))" 0 25 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" } function ycsb_d() { - run "$RUN_IDENT" ycsb_d_block ycsb-d "$((8 * 1024 * 1024 * 1024))" 0 8 - run "$RUN_IDENT" ycsb_d_memory ycsb-d "$((8 * 1024 * 1024 * 1024))" 1 8 + # Default parameters: use generated data with integers + local data_source="${YCSB_DATA_SOURCE:-file}" + local data_type="${YCSB_DATA_TYPE:-int}" + local data_path="${YCSB_DATA_PATH:-/home/skarim/Code/smash/haura/betree/haura-benchmarks/silesia_corpus}" + + run "$RUN_IDENT" "ycsb_d_${YCSB_SUFFIX:-unnamed}" ycsb-d "$((2 * 1024 * 1024 * 1024))" 0 2 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" +# run "$RUN_IDENT" ycsb_d_memory ycsb-d "$((4 * 1024 * 1024 * 1024))" 1 6 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" } function ycsb_e() { - run "$RUN_IDENT" ycsb_e_block ycsb-e "$((8 * 1024 * 1024 * 1024))" 0 8 - run "$RUN_IDENT" ycsb_e_memory ycsb-e "$((8 * 1024 * 1024 * 1024))" 1 8 + # Default parameters: use generated data with integers + local data_source="${YCSB_DATA_SOURCE:-file}" + local data_type="${YCSB_DATA_TYPE:-int}" + local data_path="${YCSB_DATA_PATH:-/home/skarim/Code/smash/haura/betree/haura-benchmarks/silesia_corpus}" + + #run "$RUN_IDENT" "ycsb_e_${YCSB_SUFFIX:-unnamed}" ycsb-e "$((2 * 1024 * 1024 * 1024))" 0 2 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + run "$RUN_IDENT" "ycsb_e_${YCSB_SUFFIX:-unnamed}" ycsb-e "$((1 * 1024 * 1024))" 0 1 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_e_${YCSB_SUFFIX:-unnamed}" ycsb-e "$((1 * 1024 * 1024))" 0 2 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_e_${YCSB_SUFFIX:-unnamed}" ycsb-e "$((1 * 1024 * 1024))" 0 3 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_e_${YCSB_SUFFIX:-unnamed}" ycsb-e "$((1 * 1024 * 1024))" 0 4 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_e_${YCSB_SUFFIX:-unnamed}" ycsb-e "$((1 * 1024 * 1024))" 0 5 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_e_${YCSB_SUFFIX:-unnamed}" ycsb-e "$((1 * 1024 * 1024))" 0 8 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_e_${YCSB_SUFFIX:-unnamed}" ycsb-e "$((1 * 1024 * 1024))" 0 10 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_e_${YCSB_SUFFIX:-unnamed}" ycsb-e "$((1 * 1024 * 1024))" 0 15 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_e_${YCSB_SUFFIX:-unnamed}" ycsb-e "$((1 * 1024 * 1024))" 0 20 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_e_${YCSB_SUFFIX:-unnamed}" ycsb-e "$((1 * 1024 * 1024))" 0 25 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" } function ycsb_f() { - run "$RUN_IDENT" ycsb_f_block ycsb-f "$((8 * 1024 * 1024 * 1024))" 0 8 - run "$RUN_IDENT" ycsb_f_memory ycsb-f "$((8 * 1024 * 1024 * 1024))" 1 8 + # Default parameters: use generated data with integers + local data_source="${YCSB_DATA_SOURCE:-file}" + local data_type="${YCSB_DATA_TYPE:-int}" + local data_path="${YCSB_DATA_PATH:-/home/skarim/Code/smash/haura/betree/haura-benchmarks/silesia_corpus}" + + run "$RUN_IDENT" "ycsb_f_${YCSB_SUFFIX:-unnamed}" ycsb-f "$((2 * 1024 * 1024 * 1024))" 0 2 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" +# run "$RUN_IDENT" ycsb_f_memory ycsb-f "$((4 * 1024 * 1024 * 1024))" 1 6 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" } -cargo build --release +function ycsb_g() { + # Default parameters: use generated data with integers + local data_source="${YCSB_DATA_SOURCE:-file}" + local data_type="${YCSB_DATA_TYPE:-int}" + local data_path="${YCSB_DATA_PATH:-/home/skarim/Code/smash/haura/betree/haura-benchmarks/silesia_corpus}" + + #run "$RUN_IDENT" "ycsb_g_${YCSB_SUFFIX:-unnamed}" ycsb-g "$((2 * 1024 * 1024))" 0 1 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_g_${YCSB_SUFFIX:-unnamed}" ycsb-g "$((2 * 1024 * 1024))" 0 2 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_g_${YCSB_SUFFIX:-unnamed}" ycsb-g "$((2 * 1024 * 1024))" 0 3 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_g_${YCSB_SUFFIX:-unnamed}" ycsb-g "$((2 * 1024 * 1024))" 0 4 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_g_${YCSB_SUFFIX:-unnamed}" ycsb-g "$((2 * 1024 * 1024))" 0 5 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_g_${YCSB_SUFFIX:-unnamed}" ycsb-g "$((2 * 1024 * 1024))" 0 8 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_g_${YCSB_SUFFIX:-unnamed}" ycsb-g "$((2 * 1024 * 1024))" 0 10 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_g_${YCSB_SUFFIX:-unnamed}" ycsb-g "$((2 * 1024 * 1024))" 0 15 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + #run "$RUN_IDENT" "ycsb_g_${YCSB_SUFFIX:-unnamed}" ycsb-g "$((2 * 1024 * 1024))" 0 20 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" + run "$RUN_IDENT" "ycsb_g_${YCSB_SUFFIX:-unnamed}" ycsb-g "$((2 * 1024 * 1024))" 0 25 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" +} + +function ycsb_h() { + # Default parameters: use generated data with integers + local data_source="${YCSB_DATA_SOURCE:-file}" + local data_type="${YCSB_DATA_TYPE:-int}" + local data_path="${YCSB_DATA_PATH:-/home/skarim/Code/smash/haura/betree/haura-benchmarks/silesia_corpus}" + + run "$RUN_IDENT" "ycsb_h_${YCSB_SUFFIX:-unnamed}" ycsb-h "$((1 * 1024 * 1024))" 0 2 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" +# run "$RUN_IDENT" ycsb_h_memory ycsb-h "$((768 * 1024 * 1024))" 1 8 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" +} + +function ycsb_i() { + # Default parameters: use generated data with integers + local data_source="${YCSB_DATA_SOURCE:-file}" + local data_type="${YCSB_DATA_TYPE:-int}" + local data_path="${YCSB_DATA_PATH:-/home/skarim/Code/smash/haura/betree/haura-benchmarks/silesia_corpus}" + + run "$RUN_IDENT" "ycsb_i_${YCSB_SUFFIX:-unnamed}" ycsb-i "$((1 * 1024 * 1024))" 0 2 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" +# run "$RUN_IDENT" ycsb_i_memory ycsb-i "$((768 * 1024 * 1024))" 1 8 --data-source "$data_source" --data-type "$data_type" --data-path "$data_path" --entry-size "$ENTRY_SIZE" +} + +function set_compression() { + local compression="$1" + + jq '.compression' perf-config.json + + # Parse the compression string as JSON and set it + echo "$compression" | jq '.' > /dev/null 2>&1 + if [ $? -eq 0 ]; then + # Valid JSON, use it directly + echo "DEBUG: Valid JSON detected, setting as object" + jq ".compression = $compression" perf-config.json > temp-config.json && mv temp-config.json perf-config.json + else + # Not valid JSON, treat as string + echo "DEBUG: Invalid JSON, treating as string" + jq ".compression = \"$compression\"" perf-config.json > temp-config.json && mv temp-config.json perf-config.json + fi + + jq '.compression' perf-config.json +} + +function run_all_ycsb_compressions() { + # Define entry size configurations + local entry_sizes=( + '512|entry512' + '4096|entry4096' + '16384|entry16384' + '30000|entry30000' + ) + + # Define compression configurations + local compressions=( + 'None|none' + '{"Zstd": {"level": 1}}|zstd1' + '{"Zstd": {"level": 5}}|zstd5' + '{"Zstd": {"level": 10}}|zstd10' + '{"Lz4": {"level": 1}}|lz4_1' + '{"Lz4": {"level": 5}}|lz4_5' + '{"Lz4": {"level": 10}}|lz4_10' + '{"Snappy": {}}|snappy' + #'{"Dictionary": {"max_dict_size": 128, "min_frequency": 1}}|dict' + #'{"Toast": {"min_compress_size" : 32, "max_ratio_percent": 90}}|toast' + #'{"Delta": {"value_size" : 1, "signed": false}}|delta' + #'{"Rle": {"min_run_length" : 2, "value_size": 1}}|rle' + #'{"Gorilla": {"use_f64" : false}}|gorilla' + # Add any other compression types... + ) + + # Loop through entry sizes + for entry_config in "${entry_sizes[@]}"; do + IFS='|' read -r entry_size entry_suffix <<< "$entry_config" + + echo "=========================================" + echo "Testing with ENTRY_SIZE=$entry_size" + echo "=========================================" + + export ENTRY_SIZE="$entry_size" + + # Loop through compressions for this entry size + for compression_config in "${compressions[@]}"; do + IFS='|' read -r compression compression_suffix <<< "$compression_config" + + echo "Running YCSB tests with entry_size=$entry_size, compression: $compression" + + # Set compression in config file + set_compression "$compression" + + # Set combined suffix + export YCSB_SUFFIX="${entry_suffix}_${compression_suffix}" + + # Run all YCSB tests + #ycsb_a + #ycsb_b + #ycsb_c + #ycsb_d + #ycsb_e + #ycsb_f + ycsb_g + #ycsb_h + #ycsb_i + + echo "Completed YCSB tests for entry_size=$entry_size, compression: $compression" + echo "" + done + + echo "Completed all compression tests for ENTRY_SIZE=$entry_size" + echo "" + done +} + +cargo build --release --features memory_metrics if [ -z "$BETREE_CONFIG" ]; then export BETREE_CONFIG="$PWD/perf-config.json" @@ -243,6 +436,8 @@ fi export ROOT="$PWD" export ZIP_ARCHIVE="$PWD/data/linux.zip" +# Default entry size for YCSB tests (can be overridden) +export ENTRY_SIZE="${ENTRY_SIZE:-30000}" # Category under which the default runs should be made, a function may modify # this if multiple categories are needed. export RUN_IDENT="default" @@ -276,9 +471,15 @@ ensure_config #checkpoints #switchover #ingest -# ycsb_a -# ycsb_b -# ycsb_c -# ycsb_d -# ycsb_e -# ycsb_f +#ycsb_a +#ycsb_b +#ycsb_c +#ycsb_d +#ycsb_e +#ycsb_f +#ycsb_g +#ycsb_h +#ycsb_i + +# Run all YCSB tests with different entry sizes and compression configurations +run_all_ycsb_compressions diff --git a/betree/haura-benchmarks/src/lib.rs b/betree/haura-benchmarks/src/lib.rs index cc1790f42..d3ac56dea 100644 --- a/betree/haura-benchmarks/src/lib.rs +++ b/betree/haura-benchmarks/src/lib.rs @@ -19,6 +19,10 @@ use procfs::process::Process; use rand::{Rng, SeedableRng}; use rand_xoshiro::Xoshiro256Plus; +use std::fs; +use std::io::{BufReader, Read}; +//use std::path::Path; + pub mod bufreader; pub type Database = database::Database; @@ -120,6 +124,116 @@ impl KvClient { keys } + pub fn fill_entries_with_data_type(&mut self, entries: u64, entry_size: u32, data_type: &str) -> Vec<[u8; 8]> { + let mut keys = vec![]; + + for idx in 0..entries { + let value = match data_type { + "int" => { + // Fill with random integers + let mut value = vec![0u8; entry_size as usize]; + let num_ints = entry_size as usize / 4; // 4 bytes per i32 + let remaining_bytes = entry_size as usize % 4; + + for i in 0..num_ints { + let random_int: i32 = self.rng.gen(); + let bytes = random_int.to_le_bytes(); + value[i * 4..(i + 1) * 4].copy_from_slice(&bytes); + } + + // Fill remaining bytes with random data + if remaining_bytes > 0 { + let start_idx = num_ints * 4; + self.rng.fill(&mut value[start_idx..]); + } + + // Sort the value vector + value.sort(); + + value + } + "float" => { + // Fill with random floats + let mut value = vec![0u8; entry_size as usize]; + let num_floats = entry_size as usize / 4; // 4 bytes per f32 + let remaining_bytes = entry_size as usize % 4; + + for i in 0..num_floats { + let random_float: f32 = self.rng.gen(); + let bytes = random_float.to_le_bytes(); + value[i * 4..(i + 1) * 4].copy_from_slice(&bytes); + } + + // Fill remaining bytes with random data + if remaining_bytes > 0 { + let start_idx = num_floats * 4; + self.rng.fill(&mut value[start_idx..]); + } + + // Sort the value vector + value.sort(); + + value + } + _ => { + // Default: fill with random bytes (same as original) + let mut value = vec![0u8; entry_size as usize]; + self.rng.fill(&mut value[..]); + + // Sort the value vector + value.sort(); + + value + } + }; + + let k = (idx as u64).to_be_bytes(); + self.ds.insert(&k[..], &value).unwrap(); + keys.push(k); + } + + self.db.write().sync().unwrap(); + keys + } + + pub fn fill_entries_from_path>( + &mut self, + path: P, + chunk_size: u32, // now explicitly used as variable chunk size + ) -> Vec<[u8; 8]> { + let mut keys = Vec::new(); + let mut idx = 0u64; + println!("fill_entries_from_path"); + for entry in fs::read_dir(path).expect("Failed to read directory") { + let entry = entry.expect("Invalid directory entry"); + let file_path = entry.path(); + + if file_path.is_file() { + let file = File::open(&file_path).expect("Failed to open file"); + let mut reader = BufReader::new(file); + + loop { + let mut buffer = vec![0u8; chunk_size as usize]; + let bytes_read = reader.read(&mut buffer).expect("Read error"); + + if bytes_read == 0 { + break; // end of file + } + + buffer.truncate(bytes_read); // ensure last chunk has correct size + let k = idx.to_be_bytes(); + self.ds.insert(&k[..], &buffer).unwrap(); + keys.push(k); + idx += 1; + } + } + } + + self.db.write().sync().unwrap(); + self.db.write().flush_().unwrap(); + keys + } + pub fn rng(&mut self) -> &mut Xoshiro256Plus { &mut self.rng } diff --git a/betree/haura-benchmarks/src/main.rs b/betree/haura-benchmarks/src/main.rs index a0bda9ef0..cb3016de9 100644 --- a/betree/haura-benchmarks/src/main.rs +++ b/betree/haura-benchmarks/src/main.rs @@ -65,43 +65,136 @@ enum Mode { size: u64, kind: u8, threads: u32, - #[structopt(default_value = "120")] + #[structopt(default_value = "20")] runtime: u64, + #[structopt(long, default_value = "generated")] + data_source: String, + #[structopt(long, default_value = "int")] + data_type: String, + #[structopt(long, default_value = "")] + data_path: String, + #[structopt(long, default_value = "30000")] + entry_size: usize, }, YcsbB { size: u64, kind: u8, threads: u32, - #[structopt(default_value = "120")] + #[structopt(default_value = "20")] runtime: u64, + #[structopt(long, default_value = "generated")] + data_source: String, + #[structopt(long, default_value = "int")] + data_type: String, + #[structopt(long, default_value = "")] + data_path: String, + #[structopt(long, default_value = "30000")] + entry_size: usize, }, YcsbC { size: u64, kind: u8, threads: u32, - #[structopt(default_value = "120")] + #[structopt(default_value = "20")] runtime: u64, + #[structopt(long, default_value = "generated")] + data_source: String, + #[structopt(long, default_value = "int")] + data_type: String, + #[structopt(long, default_value = "")] + data_path: String, + #[structopt(long, default_value = "30000")] + entry_size: usize, }, YcsbD { size: u64, kind: u8, threads: u32, - #[structopt(default_value = "120")] + #[structopt(default_value = "20")] runtime: u64, + #[structopt(long, default_value = "generated")] + data_source: String, + #[structopt(long, default_value = "int")] + data_type: String, + #[structopt(long, default_value = "")] + data_path: String, + #[structopt(long, default_value = "30000")] + entry_size: usize, }, YcsbE { size: u64, kind: u8, threads: u32, - #[structopt(default_value = "120")] + #[structopt(default_value = "20")] runtime: u64, + #[structopt(long, default_value = "generated")] + data_source: String, + #[structopt(long, default_value = "int")] + data_type: String, + #[structopt(long, default_value = "")] + data_path: String, + #[structopt(long, default_value = "30000")] + entry_size: usize, }, YcsbF { size: u64, kind: u8, threads: u32, - #[structopt(default_value = "120")] + #[structopt(default_value = "20")] + runtime: u64, + #[structopt(long, default_value = "generated")] + data_source: String, + #[structopt(long, default_value = "int")] + data_type: String, + #[structopt(long, default_value = "")] + data_path: String, + #[structopt(long, default_value = "30000")] + entry_size: usize, + }, + YcsbG { + size: u64, + kind: u8, + threads: u32, + #[structopt(default_value = "20")] + runtime: u64, + #[structopt(long, default_value = "generated")] + data_source: String, + #[structopt(long, default_value = "int")] + data_type: String, + #[structopt(long, default_value = "")] + data_path: String, + #[structopt(long, default_value = "30000")] + entry_size: usize, + }, + YcsbH { + size: u64, + kind: u8, + threads: u32, + #[structopt(default_value = "20")] + runtime: u64, + #[structopt(long, default_value = "generated")] + data_source: String, + #[structopt(long, default_value = "int")] + data_type: String, + #[structopt(long, default_value = "")] + data_path: String, + #[structopt(long, default_value = "30000")] + entry_size: usize, + }, + YcsbI { + size: u64, + kind: u8, + threads: u32, + #[structopt(default_value = "20")] runtime: u64, + #[structopt(long, default_value = "generated")] + data_source: String, + #[structopt(long, default_value = "int")] + data_type: String, + #[structopt(long, default_value = "")] + data_path: String, + #[structopt(long, default_value = "30000")] + entry_size: usize, }, } @@ -210,54 +303,117 @@ fn run_all(mode: Mode) -> Result<(), Box> { kind, threads, runtime, + data_source, + data_type, + data_path, + entry_size, } => { let client = control.kv_client(0); - ycsb::a(client, size, threads as usize, runtime) + ycsb::a(client, size, threads as usize, runtime, &data_source, &data_type, &data_path, entry_size) } Mode::YcsbB { size, kind, threads, runtime, + data_source, + data_type, + data_path, + entry_size, } => { let client = control.kv_client(0); - ycsb::b(client, size, threads as usize, runtime) + ycsb::b(client, size, threads as usize, runtime, &data_source, &data_type, &data_path, entry_size) } Mode::YcsbC { size, kind, threads, runtime, + data_source, + data_type, + data_path, + entry_size, } => { let client = control.kv_client(0); - ycsb::c(client, size, threads as usize, runtime) + ycsb::c(client, size, threads as usize, runtime, &data_source, &data_type, &data_path, entry_size) } Mode::YcsbD { size, kind, threads, runtime, + data_source, + data_type, + data_path, + entry_size, } => { let client = control.kv_client(0); - ycsb::d(client, size, threads as usize, runtime) + ycsb::d(client, size, threads as usize, runtime, &data_source, &data_type, &data_path, entry_size) } Mode::YcsbE { size, kind, threads, runtime, + data_source, + data_type, + data_path, + entry_size, } => { let client = control.kv_client(0); - ycsb::e(client, size, threads as usize, runtime) + ycsb::e(client, size, threads as usize, runtime, &data_source, &data_type, &data_path, entry_size) } Mode::YcsbF { size, kind, threads, runtime, + data_source, + data_type, + data_path, + entry_size, + } => { + let client = control.kv_client(0); + ycsb::f(client, size, threads as usize, runtime, &data_source, &data_type, &data_path, entry_size) + } + Mode::YcsbG { + size, + kind, + threads, + runtime, + data_source, + data_type, + data_path, + entry_size, + } => { + let client = control.kv_client(0); + ycsb::g(client, size, threads as usize, runtime, &data_source, &data_type, &data_path, entry_size) + } + Mode::YcsbH { + size, + kind, + threads, + runtime, + data_source, + data_type, + data_path, + entry_size, + } => { + let client = control.kv_client(0); + ycsb::h(client, size, threads as usize, runtime, &data_source, &data_type, &data_path, entry_size) + } + Mode::YcsbI { + size, + kind, + threads, + runtime, + data_source, + data_type, + data_path, + entry_size, } => { let client = control.kv_client(0); - ycsb::f(client, size, threads as usize, runtime) + ycsb::i(client, size, threads as usize, runtime, &data_source, &data_type, &data_path, entry_size) } } diff --git a/betree/haura-benchmarks/src/tiered1.rs b/betree/haura-benchmarks/src/tiered1.rs index 663df55e3..04c91f4a1 100644 --- a/betree/haura-benchmarks/src/tiered1.rs +++ b/betree/haura-benchmarks/src/tiered1.rs @@ -4,7 +4,7 @@ use std::{error::Error, io::Write}; pub fn run(mut client: Client) -> Result<(), Box> { const N_OBJECTS: u64 = 1; - const OBJECT_SIZE: u64 = 5 * 1024 * 1024 * 1024; + const OBJECT_SIZE: u64 = 1 * 1024 * 1024 * 1024; println!("running tiered1"); let os = &client.object_store; @@ -49,7 +49,7 @@ pub fn run(mut client: Client) -> Result<(), Box> { } }) .sum::(); - assert_eq!(info.size, size); + //assert_eq!(info.size, size); } Ok(()) diff --git a/betree/haura-benchmarks/src/ycsb.rs b/betree/haura-benchmarks/src/ycsb.rs index 4d1ed2494..7e70e1a36 100644 --- a/betree/haura-benchmarks/src/ycsb.rs +++ b/betree/haura-benchmarks/src/ycsb.rs @@ -24,6 +24,18 @@ //! | F | Read-modify- | Varies | Read a record, modify it, and write it back | //! | | write | | | //! +----------+--------------+------------------+-------------------------------------------------+ +//! +----------+--------------+------------------+-------------------------------------------------+ +//! | G | Read | Uniform | | +//! | | | | | +//! +----------+--------------+------------------+-------------------------------------------------+ +//! +----------+--------------+------------------+-------------------------------------------------+ +//! | H | Update | Uniform | | +//! | | | | | +//! +----------+--------------+------------------+-------------------------------------------------+ +//! +----------+--------------+------------------+-------------------------------------------------+ +//! | I | Delete | Uniform | | +//! | | | | | +//! +----------+--------------+------------------+-------------------------------------------------+ use betree_perf::KvClient; use rand::distributions::Distribution; @@ -33,8 +45,10 @@ use std::io::Write; use std::sync::atomic::{AtomicUsize, Ordering as AtomicOrdering}; use std::sync::Arc; + + // Default in YCSB, 10 x 100 bytes field in one struct. -const ENTRY_SIZE: usize = 1000; +// const ENTRY_SIZE: usize = 30*1000; // Now passed as parameter // Default of YCSB const ZIPF_EXP: f64 = 0.99; @@ -42,10 +56,20 @@ const ZIPF_EXP: f64 = 0.99; /// Operations: Read 50%, Update 50% /// Distribution: Zipfian /// Application example: Session store recording recent actions in a user session -pub fn a(mut client: KvClient, size: u64, threads: usize, runtime: u64) { - println!("Running YCSB Workload A"); +pub fn a(mut client: KvClient, size: u64, workers: usize, runtime: u64, data_source: &str, data_type: &str, data_path: &str, entry_size: usize) { + println!("Running YCSB Workload A {} {} {}",size, workers, runtime); println!("Filling KV store..."); - let mut keys = client.fill_entries(size / ENTRY_SIZE as u64, ENTRY_SIZE as u32); + + let mut keys = match data_source { + "file" => { + client.fill_entries_from_path(data_path, entry_size as u32) + } + _ => { + // Default to generated data + client.fill_entries_with_data_type(size / entry_size as u64, entry_size as u32, data_type) + } + }; + keys.shuffle(client.rng()); println!("Creating distribution..."); let f = std::fs::OpenOptions::new() @@ -56,7 +80,7 @@ pub fn a(mut client: KvClient, size: u64, threads: usize, runtime: u64) { let mut w = std::io::BufWriter::new(f); w.write_all(b"threads,ops,time_ns\n").unwrap(); - for workers in 1..=threads { + //for workers in 1..=threads { println!("Running benchmark with {workers} threads..."); let threads = (0..workers) .map(|_| std::sync::mpsc::channel::()) @@ -69,13 +93,15 @@ pub fn a(mut client: KvClient, size: u64, threads: usize, runtime: u64) { let mut rng = rand_xoshiro::Xoshiro256Plus::seed_from_u64(id as u64); let dist = zipf::ZipfDistribution::new(keys.len(), ZIPF_EXP).unwrap(); let mut total = 0; - let value = vec![0u8; ENTRY_SIZE]; + let mut value = vec![0u8; entry_size]; + let k = &keys[dist.sample(&mut rng) - 1][..]; + value = ds.get(k).unwrap().unwrap().to_vec(); while let Ok(start) = rx.recv() { while start.elapsed().as_secs() < runtime { for _ in 0..100 { let k = &keys[dist.sample(&mut rng) - 1][..]; if rng.gen_bool(0.5) { - ds.get(k).unwrap().unwrap(); + value = ds.get(k).unwrap().unwrap().to_vec(); } else { ds.upsert(k.to_vec(), &value, 0).unwrap(); } @@ -105,7 +131,7 @@ pub fn a(mut client: KvClient, size: u64, threads: usize, runtime: u64) { w.flush().unwrap(); println!("Achieved: {} ops/sec", total as f32 / end.as_secs_f32()); println!(" {} ns avg", end.as_nanos() / total); - } + //} } /// B - Read heavy @@ -113,10 +139,20 @@ pub fn a(mut client: KvClient, size: u64, threads: usize, runtime: u64) { /// Distribution: Zipfian /// Application example: Photo tagging; add a tag is an update, but most operations are to read /// tags -pub fn b(mut client: KvClient, size: u64, threads: usize, runtime: u64) { +pub fn b(mut client: KvClient, size: u64, workers: usize, runtime: u64, data_source: &str, data_type: &str, data_path: &str, entry_size: usize) { println!("Running YCSB Workload B"); println!("Filling KV store..."); - let mut keys = client.fill_entries(size / ENTRY_SIZE as u64, ENTRY_SIZE as u32); + + let mut keys = match data_source { + "file" => { + client.fill_entries_from_path(data_path, entry_size as u32) + } + _ => { + // Default to generated data + client.fill_entries_with_data_type(size / entry_size as u64, entry_size as u32, data_type) + } + }; + keys.shuffle(client.rng()); println!("Creating distribution..."); let f = std::fs::OpenOptions::new() @@ -127,7 +163,7 @@ pub fn b(mut client: KvClient, size: u64, threads: usize, runtime: u64) { let mut w = std::io::BufWriter::new(f); w.write_all(b"threads,ops,time_ns\n").unwrap(); - for workers in 1..=threads { + //for workers in 1..=threads { println!("Running benchmark with {workers} threads..."); let threads = (0..workers) .map(|_| std::sync::mpsc::channel::()) @@ -140,14 +176,16 @@ pub fn b(mut client: KvClient, size: u64, threads: usize, runtime: u64) { let mut rng = rand_xoshiro::Xoshiro256Plus::seed_from_u64(id as u64); let dist = zipf::ZipfDistribution::new(keys.len(), ZIPF_EXP).unwrap(); let mut total = 0; - let value = vec![0u8; ENTRY_SIZE]; + let mut value = vec![0u8; entry_size]; + let k = &keys[dist.sample(&mut rng) - 1][..]; + value = ds.get(k).unwrap().unwrap().to_vec(); while let Ok(start) = rx.recv() { while start.elapsed().as_secs() < runtime { for _ in 0..100 { let k = &keys[dist.sample(&mut rng) - 1][..]; if rng.gen_bool(0.95) { // 95% reads - ds.get(k).unwrap().unwrap(); + value = ds.get(k).unwrap().unwrap().to_vec(); } else { // 5% updates ds.upsert(k.to_vec(), &value, 0).unwrap(); @@ -178,17 +216,27 @@ pub fn b(mut client: KvClient, size: u64, threads: usize, runtime: u64) { w.flush().unwrap(); println!("Achieved: {} ops/sec", total as f32 / end.as_secs_f32()); println!(" {} ns avg", end.as_nanos() / total); - } + //} } /// C - Read heavy /// Operations: Read 100% /// Distribution: Zipfian /// Access Size: 1000 bytes -pub fn c(mut client: KvClient, size: u64, threads: usize, runtime: u64) { +pub fn c(mut client: KvClient, size: u64, threads: usize, runtime: u64, data_source: &str, data_type: &str, data_path: &str, entry_size: usize) { println!("Running YCSB Workload C"); println!("Filling KV store..."); - let mut keys = client.fill_entries(size / ENTRY_SIZE as u64, ENTRY_SIZE as u32); + + let mut keys = match data_source { + "file" => { + client.fill_entries_from_path(data_path, entry_size as u32) + } + _ => { + // Default to generated data + client.fill_entries_with_data_type(size / entry_size as u64, entry_size as u32, data_type) + } + }; + keys.shuffle(client.rng()); println!("Creating distribution..."); let f = std::fs::OpenOptions::new() @@ -251,16 +299,25 @@ pub fn c(mut client: KvClient, size: u64, threads: usize, runtime: u64) { /// Operations: Read 95%, Insert 5% /// Distribution: Latest /// Application example: User status updates; people want to read the latest statuses -pub fn d(mut client: KvClient, size: u64, threads: usize, runtime: u64) { +pub fn d(mut client: KvClient, size: u64, threads: usize, runtime: u64, data_source: &str, data_type: &str, data_path: &str, entry_size: usize) { println!("Running YCSB Workload D"); println!("Filling KV store..."); // Reserve 20% extra space for new insertions - let initial_size = size / ENTRY_SIZE as u64; - let total_size = initial_size + (initial_size / 5); - + // Only fill initial portion - let mut keys = client.fill_entries(initial_size, ENTRY_SIZE as u32); + let mut keys = match data_source { + "file" => { + client.fill_entries_from_path(data_path, entry_size as u32) + } + _ => { + // Default to generated data + client.fill_entries_with_data_type(size / entry_size as u64, entry_size as u32, data_type) + } + }; + let initial_size = (keys.len() as f64 * 0.05) as usize; + let total_size = keys.len(); + println!("{} {}", initial_size, total_size); // Fill rest of keys for idx in initial_size..total_size { let k = (idx as u64).to_be_bytes(); @@ -295,7 +352,7 @@ pub fn d(mut client: KvClient, size: u64, threads: usize, runtime: u64) { std::thread::spawn(move || { let mut rng = rand_xoshiro::Xoshiro256Plus::seed_from_u64(id as u64); let mut total = 0; - let value = vec![0u8; ENTRY_SIZE]; + let mut value = vec![0u8; entry_size]; while let Ok(start) = rx.recv() { while start.elapsed().as_secs() < runtime { @@ -308,7 +365,12 @@ pub fn d(mut client: KvClient, size: u64, threads: usize, runtime: u64) { zipf::ZipfDistribution::new(max, ZIPF_EXP).unwrap(); let offset = dist.sample(&mut rng); let idx = max.saturating_sub(offset); - ds.get(&keys[idx][..]).unwrap(); + //value = ds.get(&keys[idx][..]).unwrap(); + if let Some(bytes) = ds.get(&keys[idx][..]).unwrap() { + value = bytes.to_vec(); + } else { + // Handle the case where the key isn't found, maybe assign a default or log an error + } } else { // 5% inserts of new records let current = current_size.load(AtomicOrdering::Relaxed); @@ -351,21 +413,31 @@ pub fn d(mut client: KvClient, size: u64, threads: usize, runtime: u64) { /// Distribution: Zipfian for first key, Uniform for length /// Application example: Threaded conversations, where each scan is for the posts in a given thread /// (assumed to be clustered by thread id) -pub fn e(mut client: KvClient, size: u64, threads: usize, runtime: u64) { +pub fn e(mut client: KvClient, size: u64, workers: usize, runtime: u64, data_source: &str, data_type: &str, data_path: &str, entry_size: usize) { println!("Running YCSB Workload E"); println!("Filling KV store..."); // Reserve 20% extra space for new insertions - let initial_size = size / ENTRY_SIZE as u64; - let total_size = initial_size + (initial_size / 5); + //let initial_size = size / entry_size as u64; + //let total_size = initial_size + (initial_size / 5); // Only fill initial portion - let mut keys = client.fill_entries(initial_size, ENTRY_SIZE as u32); - - // Fill rest of keys for potential inserts - for idx in initial_size..total_size { - let k = (idx as u64).to_be_bytes(); - keys.push(k); - } + let mut keys = match data_source { + "file" => { + client.fill_entries_from_path(data_path, entry_size as u32) + } + _ => { + // Default to generated data + client.fill_entries_with_data_type(size / entry_size as u64, entry_size as u32, data_type) + } + }; + let initial_size = keys.len(); + // let total_size = initial_size + (keys.len() as f64 * 0.05) as usize; + // println!("{} {}", initial_size, total_size); + // // Fill rest of keys for potential inserts + // for idx in initial_size..total_size { + // let k = keys[idx - initial_size]; + // keys.push(k); + // } println!("Creating distribution..."); let f = std::fs::OpenOptions::new() @@ -379,7 +451,7 @@ pub fn e(mut client: KvClient, size: u64, threads: usize, runtime: u64) { // Thread-safe current size tracking let current_size = Arc::new(AtomicUsize::new(initial_size as usize)); - for workers in 1..=threads { + //for workers in 1..=threads { println!("Running benchmark with {workers} threads..."); let threads = (0..workers) .map(|_| std::sync::mpsc::channel::()) @@ -392,28 +464,35 @@ pub fn e(mut client: KvClient, size: u64, threads: usize, runtime: u64) { std::thread::spawn(move || { let mut rng = rand_xoshiro::Xoshiro256Plus::seed_from_u64(id as u64); let mut total = 0; - let value = vec![0u8; ENTRY_SIZE]; - + let mut value = vec![0u8; entry_size]; + let k = &keys[0][..]; + value = ds.get(k).unwrap().unwrap().to_vec(); while let Ok(start) = rx.recv() { while start.elapsed().as_secs() < runtime { for _ in 0..100 { if rng.gen_bool(0.95) { // 95% scans let max = current_size.load(AtomicOrdering::Relaxed); - // Get start key using zipfian - let dist = - zipf::ZipfDistribution::new(max, ZIPF_EXP).unwrap(); - let start_idx = dist.sample(&mut rng) - 1; +let dist = zipf::ZipfDistribution::new(max, ZIPF_EXP).unwrap(); +let mut start_idx = dist.sample(&mut rng).saturating_sub(1); + +let scan_length = rng.gen_range(1..=100); +let mut end_idx = (start_idx + scan_length).min(max.saturating_sub(1)); - // Uniform random scan length between 1 and 100 - let scan_length = rng.gen_range(1..=100); - let end_idx = (start_idx + scan_length).min(max - 1); +// Ensure valid bounds +if start_idx >= keys.len() || end_idx >= keys.len() || start_idx >= end_idx { + continue; // skip invalid range +} - // Perform the range scan - let start_key = &keys[start_idx][..]; - let end_key = &keys[end_idx][..]; +let start_key = &keys[start_idx][..]; +let end_key = &keys[end_idx][..]; // Consume the iterator to actually perform the scan - for _entry in ds.range(start_key..end_key).unwrap() {} + for _entry in ds.range(start_key..end_key).unwrap() { + if let Ok((_k, _v)) = _entry { + value = _v.to_vec(); + break; // exit after the first one + } + } } else { // 5% inserts of new records let current = current_size.load(AtomicOrdering::Relaxed); @@ -448,7 +527,7 @@ pub fn e(mut client: KvClient, size: u64, threads: usize, runtime: u64) { w.flush().unwrap(); println!("Achieved: {} ops/sec", total as f32 / end.as_secs_f32()); println!(" {} ns avg", end.as_nanos() / total); - } + //} } /// F - Read-modify-write @@ -456,10 +535,20 @@ pub fn e(mut client: KvClient, size: u64, threads: usize, runtime: u64) { /// Distribution: Zipfian /// Application example: user database, where user records are read and modified by the user or to /// record user activity -pub fn f(mut client: KvClient, size: u64, threads: usize, runtime: u64) { +pub fn f(mut client: KvClient, size: u64, threads: usize, runtime: u64, data_source: &str, data_type: &str, data_path: &str, entry_size: usize) { println!("Running YCSB Workload F"); println!("Filling KV store..."); - let mut keys = client.fill_entries(size / ENTRY_SIZE as u64, ENTRY_SIZE as u32); + + let mut keys = match data_source { + "file" => { + client.fill_entries_from_path(data_path, entry_size as u32) + } + _ => { + // Default to generated data + client.fill_entries_with_data_type(size / entry_size as u64, entry_size as u32, data_type) + } + }; + keys.shuffle(client.rng()); println!("Creating distribution..."); let f = std::fs::OpenOptions::new() @@ -483,14 +572,14 @@ pub fn f(mut client: KvClient, size: u64, threads: usize, runtime: u64) { let mut rng = rand_xoshiro::Xoshiro256Plus::seed_from_u64(id as u64); let dist = zipf::ZipfDistribution::new(keys.len(), ZIPF_EXP).unwrap(); let mut total = 0; - let value = vec![0u8; ENTRY_SIZE]; + let mut value = vec![0u8; entry_size]; while let Ok(start) = rx.recv() { while start.elapsed().as_secs() < runtime { for _ in 0..100 { let k = &keys[dist.sample(&mut rng) - 1][..]; if rng.gen_bool(0.5) { // 50% reads - ds.get(k).unwrap().unwrap(); + value = ds.get(k).unwrap().unwrap().to_vec(); } else { // 50% read-modify-write let _existing = ds.get(k).unwrap().unwrap(); @@ -525,3 +614,302 @@ pub fn f(mut client: KvClient, size: u64, threads: usize, runtime: u64) { println!(" {} ns avg", end.as_nanos() / total); } } + +use rand_xoshiro::Xoshiro256Plus; + +pub fn g(mut client: KvClient, size: u64, workers: usize, runtime: u64, data_source: &str, data_type: &str, data_path: &str, entry_size: usize) { + println!("Running YCSB Workload G"); + println!("Filling KV store..."); + + let mut keys = match data_source { + "file" => { + client.fill_entries_from_path(data_path, entry_size as u32) + } + _ => { + // Default to generated data + client.fill_entries_with_data_type(size / entry_size as u64, entry_size as u32, data_type) + } + }; + + keys.shuffle(client.rng()); + + // Estimate entries per 128KB leaf: value(1000B) + key(8B) + overhead(~8B) + let approx_entry_size = entry_size + 8 + 8; + let entries_per_leaf = (1024*1024) / approx_entry_size; // ≈ 128 entries + println!("Estimated entries per leaf: {entries_per_leaf}"); + + // Pick one key per estimated leaf node + let mut leaf_sampled_keys = Vec::new(); + for i in (0..keys.len()).step_by(entries_per_leaf) { + leaf_sampled_keys.push(keys[i]); + } + + //client.ds.flush().unwrap(); + + // Shuffle the reduced key set + //leaf_sampled_keys.shuffle(client.rng()); + println!("Sampled {} keys from {} total keys", leaf_sampled_keys.len(), keys.len()); + + println!("Creating distribution..."); + + let f = std::fs::OpenOptions::new() + .write(true) + .create(true) + .open("ycsb_g.csv") + .unwrap(); + let mut w = std::io::BufWriter::new(f); + w.write_all(b"threads,ops,time_ns\n").unwrap(); + + + //for workers in [1, 5, 10, 15, 20, 25] { + let threads = (0..workers) + .map(|_| std::sync::mpsc::channel::()) + .enumerate() + .map(|(id, (tx, rx))| { + let _keys = keys.clone(); + let ds = client.ds.clone(); + ( + std::thread::spawn(move || { + let mut rng = Xoshiro256Plus::seed_from_u64(id as u64); + let mut total = 0; + while let Ok(start) = rx.recv() { + while start.elapsed().as_secs() < 20 { + for _ in 0..500 { + if let Some(k) = _keys.choose(&mut rng) { + if let Some(value) = ds.get(*k).unwrap() { + total += 1; + } + } + } + } + } + total + }), + tx, + ) + }) + .collect::>(); + + client.db.read().drop_cache().unwrap(); + let start = std::time::Instant::now(); + for (_t, tx) in threads.iter() { + tx.send(start).unwrap(); + } + let mut total = 0; + for (t, tx) in threads.into_iter() { + drop(tx); + total += t.join().unwrap(); + } + let end = start.elapsed(); + w.write_fmt(format_args!("{workers},{total},{}\n", end.as_nanos())) + .unwrap(); + w.flush().unwrap(); + println!("Achieved: {} ops/sec", total as f32 / end.as_secs_f32()); + println!(" {} ns avg", end.as_nanos() / total); + //} +} + +use std::fs::{self, File}; +use std::io::{BufReader, Read}; +use std::path::Path; + +pub fn read_folder_chunks>(folder_path: P, chunk_size: usize) -> Vec> { + let mut all_chunks = Vec::new(); + + for entry in fs::read_dir(folder_path).expect("Failed to read folder") { + let entry = entry.expect("Invalid entry"); + let path = entry.path(); + + if path.is_file() { + let file = File::open(&path).expect("Unable to open file"); + let mut reader = BufReader::new(file); + + loop { + let mut buffer = vec![0u8; chunk_size]; + let bytes_read = reader.read(&mut buffer).expect("Read error"); + if bytes_read == 0 { + break; + } + + buffer.truncate(bytes_read); + all_chunks.push(buffer); + } + } + } + + all_chunks +} + + +pub fn h(mut client: KvClient, size: u64, threads: usize, runtime: u64, data_source: &str, data_type: &str, data_path: &str, entry_size: usize) { + println!("Running YCSB Workload H"); + println!("Filling KV store..."); + + let mut keys = match data_source { + "file" => { + client.fill_entries_from_path(data_path, entry_size as u32) + } + _ => { + // Default to generated data + client.fill_entries_with_data_type(size / entry_size as u64, entry_size as u32, data_type) + } + }; + + keys.shuffle(client.rng()); + println!("Creating distribution..."); + + let f = std::fs::OpenOptions::new() + .write(true) + .create(true) + .open("ycsb_h.csv") + .unwrap(); + let mut w = std::io::BufWriter::new(f); + w.write_all(b"threads,ops,time_ns\n").unwrap(); + + let corpus_chunks = read_folder_chunks("/home/skarim/Code/smash/haura/betree/haura-benchmarks/silesia_corpus", 1024); + let chunk_data = std::sync::Arc::new(corpus_chunks); // Share across threads + + + for workers in 1..=threads { + let threads = (0..workers) + .map(|_| std::sync::mpsc::channel::()) + .enumerate() + .map(|(id, (tx, rx))| { + let keys = keys.clone(); + let ds = client.ds.clone(); + let chunks = chunk_data.clone(); // shared chunk vector + ( + std::thread::spawn(move || { + use rand::seq::SliceRandom; // Add this if it's not already imported + let mut rng = rand_xoshiro::Xoshiro256Plus::seed_from_u64(id as u64); + + let mut shuffled_keys = keys.clone(); // Clone to keep original list intact + shuffled_keys.shuffle(&mut rng); + let mut total = 0; + + let mut idx = 0; + + while let Ok(start) = rx.recv() { + while start.elapsed().as_secs() < runtime { + for jdx in 0..1000 { + let k = &shuffled_keys[jdx + idx]; + let chunk_idx = (jdx + idx) % chunks.len(); + let value = &chunks[chunk_idx]; + ds.upsert(k.to_vec(), &value, 0).unwrap(); // **Only Write** + total += 1; + } + } + + if idx + 1000 >= shuffled_keys.len(){ + idx = 0; + } + } + total + }), + tx, + ) + }) + .collect::>(); + + client.db.read().drop_cache().unwrap(); + let start = std::time::Instant::now(); + for (_t, tx) in threads.iter() { + tx.send(start).unwrap(); + } + let mut total = 0; + for (t, tx) in threads.into_iter() { + drop(tx); + total += t.join().unwrap(); + } + let end = start.elapsed(); + w.write_fmt(format_args!("{workers},{total},{}\n", end.as_nanos())) + .unwrap(); + w.flush().unwrap(); + println!("Achieved: {} ops/sec", total as f32 / end.as_secs_f32()); + println!(" {} ns avg", end.as_nanos() / total); + } +} + + +pub fn i(mut client: KvClient, size: u64, threads: usize, runtime: u64, data_source: &str, data_type: &str, data_path: &str, entry_size: usize) { + println!("Running YCSB Workload I"); + println!("Filling KV store..."); + + let mut keys = match data_source { + "file" => { + client.fill_entries_from_path(data_path, entry_size as u32) + } + _ => { + // Default to generated data + client.fill_entries_with_data_type(size / entry_size as u64, entry_size as u32, data_type) + } + }; + + keys.shuffle(client.rng()); + println!("Creating distribution..."); + + let f = std::fs::OpenOptions::new() + .write(true) + .create(true) + .open("ycsb_i.csv") + .unwrap(); + let mut w = std::io::BufWriter::new(f); + w.write_all(b"threads,ops,time_ns\n").unwrap(); + + for workers in 1..=threads { + let threads = (0..workers) + .map(|_| std::sync::mpsc::channel::()) + .enumerate() + .map(|(id, (tx, rx))| { + let keys = keys.clone(); + let ds = client.ds.clone(); + //let value = vec![0u8; entry_size]; + ( + std::thread::spawn(move || { + use rand::seq::SliceRandom; // Add this if it's not already imported + let mut rng = rand_xoshiro::Xoshiro256Plus::seed_from_u64(id as u64); + + let mut shuffled_keys = keys.clone(); // Clone to keep original list intact + shuffled_keys.shuffle(&mut rng); + let mut total = 0; + + let mut idx = 0; + + while let Ok(start) = rx.recv() { + while start.elapsed().as_secs() < runtime { + for jdx in 0..1000 { + let k = &shuffled_keys[jdx + idx]; + ds.delete(k.to_vec()).unwrap(); // **Only Write** + total += 1; + } + } + + if idx + 1000 >= shuffled_keys.len(){ + idx = 0; + } + } + total + }), + tx, + ) + }) + .collect::>(); + + client.db.read().drop_cache().unwrap(); + let start = std::time::Instant::now(); + for (_t, tx) in threads.iter() { + tx.send(start).unwrap(); + } + let mut total = 0; + for (t, tx) in threads.into_iter() { + drop(tx); + total += t.join().unwrap(); + } + let end = start.elapsed(); + w.write_fmt(format_args!("{workers},{total},{}\n", end.as_nanos())) + .unwrap(); + w.flush().unwrap(); + println!("Achieved: {} ops/sec", total as f32 / end.as_secs_f32()); + println!(" {} ns avg", end.as_nanos() / total); + } +} diff --git a/betree/src/buffer.rs b/betree/src/buffer.rs index 97c5346b0..2e3fac02e 100644 --- a/betree/src/buffer.rs +++ b/betree/src/buffer.rs @@ -217,6 +217,8 @@ impl From> for AlignedBuf { enum BufSource { Allocated(AlignedBuf), Foreign(Arc>>, Block), + #[cfg(feature = "memory_metrics")] + TrackedForeign(Arc>>, Block, std::sync::Arc), } impl BufSource { @@ -224,6 +226,8 @@ impl BufSource { match self { BufSource::Allocated(buf) => unsafe { (*buf.buf.get()).ptr.as_ptr() }, BufSource::Foreign(ptr, _) => unsafe { (*ptr.get()).as_ptr() }, + #[cfg(feature = "memory_metrics")] + BufSource::TrackedForeign(ptr, _, _) => unsafe { (*ptr.get()).as_ptr() }, } } @@ -231,10 +235,19 @@ impl BufSource { match self { BufSource::Allocated(buf) => unsafe { (*buf.buf.get()).capacity.to_bytes() as usize }, BufSource::Foreign(_, s) => s.to_bytes() as usize, + #[cfg(feature = "memory_metrics")] + BufSource::TrackedForeign(_, s, _) => s.to_bytes() as usize, } } fn as_slice(&self) -> &[u8] { + // #[cfg(feature = "memory_metrics")] + // if let BufSource::TrackedForeign(_, size, stats) = self { + // use std::sync::atomic::Ordering; + // stats.memory_read.fetch_add(size.as_u64(), Ordering::Relaxed); + // stats.memory_read_count.fetch_add(1, Ordering::Relaxed); + // } + unsafe { slice::from_raw_parts(self.as_ptr(), self.len()) } } } @@ -409,6 +422,18 @@ impl Buf { } } + #[cfg(feature = "memory_metrics")] + pub(crate) unsafe fn from_tracked_raw( + ptr: NonNull, + size: Block, + stats: std::sync::Arc + ) -> Self { + Self { + buf: BufSource::TrackedForeign(Arc::new(UnsafeCell::new(ptr)), size, stats), + range: Block(0)..size, + } + } + /// Create a [Buf] from a byte vector. If `b.len()` is not a multiple of the block size, /// the size will be rounded up to the next multiple and filled with zeroes. pub fn from_zero_padded(mut b: Vec) -> Self { @@ -434,6 +459,8 @@ impl Buf { } } BufSource::Foreign(_, _) => self.into_buf_write().into_buf().into_full_mut(), + #[cfg(feature = "memory_metrics")] + BufSource::TrackedForeign(_, _, _) => self.into_buf_write().into_buf().into_full_mut(), } } @@ -455,6 +482,12 @@ impl Buf { tmp.write(self.buf.as_slice()).unwrap(); tmp } + #[cfg(feature = "memory_metrics")] + BufSource::TrackedForeign(_, _, _) => { + let mut tmp = BufWrite::with_capacity(self.range.end); + tmp.write(self.buf.as_slice()).unwrap(); + tmp + } } } @@ -473,6 +506,16 @@ impl Buf { unsafe { SlicedCowBytes::from_raw(ptr.as_ptr(), size.to_bytes() as usize) } } + #[cfg(feature = "memory_metrics")] + BufSource::TrackedForeign(stg, size, stats) => { + let ptr = ManuallyDrop::new( + Arc::try_unwrap(stg) + .expect("TrackedRawBuf was not unique") + .into_inner(), + ); + + unsafe { SlicedCowBytes::from_tracked_raw_no_tracking(ptr.as_ptr(), size.to_bytes() as usize, stats) } + } } } @@ -494,6 +537,8 @@ impl Buf { } } BufSource::Foreign(_, _) => self.buf.as_slice().to_vec().into_boxed_slice(), + #[cfg(feature = "memory_metrics")] + BufSource::TrackedForeign(_, _, _) => self.buf.as_slice().to_vec().into_boxed_slice(), } } diff --git a/betree/src/compression/delta.rs b/betree/src/compression/delta.rs new file mode 100644 index 000000000..a35228e35 --- /dev/null +++ b/betree/src/compression/delta.rs @@ -0,0 +1,353 @@ +//! Delta encoding implementation +//! Ideal for: Sequential/sorted integer data, timestamps, auto-incrementing IDs + +use super::{CompressionBuilder, CompressionState, DecompressionState, DecompressionTag, Result}; +use crate::{ + buffer::Buf, + cow_bytes::SlicedCowBytes, + size::StaticSize, +}; +use serde::{Deserialize, Serialize}; + +/// Delta encoding configuration +#[derive(Debug, Serialize, Deserialize, Clone, Copy)] +pub struct Delta { + /// Integer size in bytes (1, 2, 4, or 8) + pub value_size: u8, + /// Whether values are signed integers + pub signed: bool, +} + +impl Default for Delta { + fn default() -> Self { + Self { + value_size: 8, // Default to 64-bit integers + signed: true, // Default to signed + } + } +} + +impl StaticSize for Delta { + fn static_size() -> usize { + std::mem::size_of::() + } +} + +/// Delta compression state +#[derive(Debug)] +pub struct DeltaCompression { + config: Delta, +} + +/// Delta decompression state +#[derive(Debug)] +pub struct DeltaDecompression; + +impl CompressionBuilder for Delta { + fn create_compressor(&self) -> Result> { + Ok(Box::new(DeltaCompression { config: *self })) + } + + fn decompression_tag(&self) -> DecompressionTag { + DecompressionTag::Delta + } +} + +impl Delta { + /// Create a new Delta decompression state + pub fn new_decompression() -> Result> { + Ok(Box::new(DeltaDecompression)) + } +} + +/// Delta format: +/// [value_size: u8][signed: u8][base_value][delta_count: u32][deltas...] +/// Where deltas are variable-length encoded (smaller deltas use fewer bytes) +impl CompressionState for DeltaCompression { + fn compress_val(&mut self, data: &[u8]) -> Result> { + let value_size = self.config.value_size as usize; + if data.len() % value_size != 0 || data.len() == 0 { + return Ok(data.to_vec()); + } + + let count = data.len() / value_size; + if count < 2 { + return Ok(data.to_vec()); // Need at least 2 values for delta encoding + } + + let mut result = Vec::new(); + result.push(value_size as u8); + result.push(if self.config.signed { 1u8 } else { 0u8 }); + + // Store first value as base + result.extend_from_slice(&data[0..value_size]); + + // Calculate and store deltas + result.extend_from_slice(&((count - 1) as u32).to_le_bytes()); + + let mut prev_value = read_value(&data[0..value_size], value_size, self.config.signed); + + for i in 1..count { + let current_value = read_value(&data[i * value_size..(i + 1) * value_size], value_size, self.config.signed); + let delta = current_value.wrapping_sub(prev_value); + + // Variable-length encode the delta + encode_varint(delta, &mut result); + prev_value = current_value; + } + + // Add size headers like other compression algorithms + let size = data.len() as u32; + let comlen = result.len() as u32; + + let mut final_result = Vec::with_capacity(4 + 4 + result.len()); + final_result.extend_from_slice(&size.to_le_bytes()); + final_result.extend_from_slice(&comlen.to_le_bytes()); + final_result.extend_from_slice(&result); + + Ok(final_result) + } + + fn compress_buf(&mut self, data: Buf) -> Result { + use crate::buffer::BufWrite; + use crate::vdev::Block; + use std::io::Write; + + let compressed_data = self.compress_val(data.as_ref())?; + + let size = data.as_ref().len() as u32; + let comlen = compressed_data.len() as u32; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes( + 4 + 4 + comlen, // total metadata and compressed payload + )); + + buf.write_all(&size.to_le_bytes())?; + buf.write_all(&comlen.to_le_bytes())?; + buf.write_all(&compressed_data)?; + + Ok(buf.into_buf()) + } +} + +impl DecompressionState for DeltaDecompression { + fn decompress_val(&mut self, data: &[u8]) -> Result { + if data.len() < 8 { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short").into()); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated").into()); + } + + let compressed = &data[8..8 + comp_len]; + + if compressed.len() < 6 { + return Ok(SlicedCowBytes::from(compressed.to_vec())); + } + + let mut pos = 0; + let value_size = compressed[pos] as usize; + pos += 1; + let signed = compressed[pos] != 0; + pos += 1; + + if pos + value_size > compressed.len() { + return Ok(SlicedCowBytes::from(compressed.to_vec())); + } + + // Read base value + let mut current_value = read_value(&compressed[pos..pos + value_size], value_size, signed); + pos += value_size; + + if pos + 4 > compressed.len() { + return Ok(SlicedCowBytes::from(compressed.to_vec())); + } + + let delta_count = u32::from_le_bytes([compressed[pos], compressed[pos + 1], compressed[pos + 2], compressed[pos + 3]]) as usize; + pos += 4; + + let mut result = Vec::new(); + + // Add base value + result.extend_from_slice(&write_value(current_value, value_size)); + + // Decode deltas + for _ in 0..delta_count { + if pos >= compressed.len() { break; } + + let (delta, bytes_read) = decode_varint(&compressed[pos..]); + if bytes_read == 0 { break; } + pos += bytes_read; + + current_value = current_value.wrapping_add(delta); + result.extend_from_slice(&write_value(current_value, value_size)); + } + + Ok(SlicedCowBytes::from(result)) + } + + fn decompress_buf(&mut self, data: Buf) -> Result { + use crate::buffer::BufWrite; + use crate::vdev::Block; + use std::io::Write; + + if data.len() < 8 { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short").into()); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated").into()); + } + + let compressed = &data[8..8 + comp_len]; + + let decompressed = self.decompress_val(compressed)?; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes(uncomp_size as u32)); + buf.write_all(decompressed.as_ref())?; + Ok(buf.into_buf()) + } +} + +// Helper functions +fn read_value(data: &[u8], size: usize, signed: bool) -> i64 { + match (size, signed) { + (1, true) => data[0] as i8 as i64, + (1, false) => data[0] as i64, + (2, true) => i16::from_le_bytes([data[0], data[1]]) as i64, + (2, false) => u16::from_le_bytes([data[0], data[1]]) as i64, + (4, true) => i32::from_le_bytes([data[0], data[1], data[2], data[3]]) as i64, + (4, false) => u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as i64, + (8, _) => i64::from_le_bytes([ + data[0], data[1], data[2], data[3], + data[4], data[5], data[6], data[7] + ]), + _ => 0, + } +} + +fn write_value(value: i64, size: usize) -> Vec { + match size { + 1 => vec![value as u8], + 2 => (value as u16).to_le_bytes().to_vec(), + 4 => (value as u32).to_le_bytes().to_vec(), + 8 => value.to_le_bytes().to_vec(), + _ => vec![], + } +} + +// Variable-length integer encoding (LEB128-style) +fn encode_varint(mut value: i64, output: &mut Vec) { + // Use zigzag encoding for signed values + let unsigned = if value >= 0 { + (value as u64) << 1 + } else { + (((-value - 1) as u64) << 1) | 1 + }; + + let mut n = unsigned; + loop { + let byte = (n & 0x7F) as u8; + n >>= 7; + if n == 0 { + output.push(byte); + break; + } else { + output.push(byte | 0x80); + } + } +} + +fn decode_varint(data: &[u8]) -> (i64, usize) { + let mut result = 0u64; + let mut shift = 0; + let mut pos = 0; + + for &byte in data { + pos += 1; + result |= ((byte & 0x7F) as u64) << shift; + shift += 7; + + if byte & 0x80 == 0 { + break; + } + + if shift >= 64 { + return (0, 0); // Overflow + } + } + + // Decode zigzag + let signed_result = if result & 1 == 0 { + (result >> 1) as i64 + } else { + -((result >> 1) as i64) - 1 + }; + + (signed_result, pos) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_delta_for_val_compression() { + // Create test data with sequential values (good for delta encoding) + let mut data = Vec::new(); + for i in 1000i64..1100i64 { + data.extend_from_slice(&i.to_le_bytes()); + } + + let delta = Delta::default(); + let mut compressor = delta.create_compressor().unwrap(); + let compressed = compressor.compress_val(&data).unwrap(); + + let mut decompressor = Delta::new_decompression().unwrap(); + let decompressed = decompressor.decompress_val(&compressed).unwrap(); + + assert_eq!(data, decompressed.as_ref()); + println!("Delta val compression - Original: {}, Compressed: {}", data.len(), compressed.len()); + } + + #[test] + fn test_delta_for_buf_compression() { + // Create test data with sequential values + let mut data = Vec::new(); + for i in 500i64..600i64 { + data.extend_from_slice(&i.to_le_bytes()); + } + + let buf = Buf::from_zero_padded(data.clone()); + let delta = Delta::default(); + + let mut compressor = delta.create_compressor().unwrap(); + let compressed_buf = compressor.compress_buf(buf.clone()).unwrap(); + + let mut decompressor = Delta::new_decompression().unwrap(); + let decompressed_buf = decompressor.decompress_buf(compressed_buf).unwrap(); + + assert_eq!(buf.as_ref(), decompressed_buf.as_ref()); + println!("Delta buf compression - Original: {}, Compressed: {}", buf.len(), decompressed_buf.len()); + } + + #[test] + fn test_varint_encoding() { + let test_values = vec![0, 1, -1, 127, -127, 128, -128, 16383, -16383]; + + for &value in &test_values { + let mut encoded = Vec::new(); + encode_varint(value, &mut encoded); + let (decoded, bytes_read) = decode_varint(&encoded); + assert_eq!(value, decoded); + assert_eq!(encoded.len(), bytes_read); + } + } +} \ No newline at end of file diff --git a/betree/src/compression/dictionary.rs b/betree/src/compression/dictionary.rs new file mode 100644 index 000000000..037a9c412 --- /dev/null +++ b/betree/src/compression/dictionary.rs @@ -0,0 +1,337 @@ +//! Dictionary encoding implementation +//! Ideal for: Columns with repeated values (strings, enums, small integer sets) +//! Replaces frequent values with shorter dictionary indices + +use super::{CompressionBuilder, CompressionState, DecompressionState, DecompressionTag, Result}; +use crate::{ + buffer::Buf, + cow_bytes::SlicedCowBytes, + size::StaticSize, +}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +/// Dictionary encoding configuration +#[derive(Debug, Serialize, Deserialize, Clone, Copy)] +pub struct Dictionary { + /// Maximum dictionary size (number of unique values to track) + pub max_dict_size: u16, + /// Minimum value frequency to include in dictionary + pub min_frequency: u8, +} + +impl Default for Dictionary { + fn default() -> Self { + Self { + max_dict_size: 256, // Can use single byte indices + min_frequency: 2, // Must appear at least twice + } + } +} + +impl StaticSize for Dictionary { + fn static_size() -> usize { + std::mem::size_of::() + } +} + +/// Dictionary compression state +#[derive(Debug)] +pub struct DictionaryCompression { + config: Dictionary, +} + +/// Dictionary decompression state +#[derive(Debug)] +pub struct DictionaryDecompression; + +impl CompressionBuilder for Dictionary { + fn create_compressor(&self) -> Result> { + Ok(Box::new(DictionaryCompression { config: *self })) + } + + fn decompression_tag(&self) -> DecompressionTag { + DecompressionTag::Dictionary + } +} + +impl Dictionary { + /// Create a new Dictionary decompression state + pub fn new_decompression() -> Result> { + Ok(Box::new(DictionaryDecompression)) + } +} + +/// Dictionary encoding format: +/// [dict_size: u16][value_size: u8][index_size: u8][dictionary][indices] +/// Where: +/// - dict_size: number of dictionary entries +/// - value_size: size of each value in bytes +/// - index_size: size of each index (1 or 2 bytes) +/// - dictionary: concatenated values +/// - indices: array of indices into dictionary +impl CompressionState for DictionaryCompression { + fn compress_val(&mut self, data: &[u8]) -> Result> { + // For text data, treat each byte as a value (byte-level dictionary) + let value_size = 1usize; + if data.is_empty() { + return Ok(data.to_vec()); + } + + let value_count = data.len() / value_size; + let mut value_freq: HashMap<&[u8], u32> = HashMap::new(); + + // Count frequencies + for i in 0..value_count { + let value = &data[i * value_size..(i + 1) * value_size]; + *value_freq.entry(value).or_insert(0) += 1; + } + + // Build dictionary of frequent values + let mut dictionary: Vec<&[u8]> = value_freq + .iter() + .filter(|(_, &freq)| freq >= self.config.min_frequency as u32) + .map(|(&value, _)| value) + .collect(); + + // Limit dictionary size + dictionary.truncate(self.config.max_dict_size as usize); + + if dictionary.is_empty() { + // No benefit from dictionary encoding + return Ok(data.to_vec()); + } + + // Create value -> index mapping + let value_to_index: HashMap<&[u8], u16> = dictionary + .iter() + .enumerate() + .map(|(i, &value)| (value, i as u16)) + .collect(); + + // Choose index size (1 or 2 bytes) + let index_size = if dictionary.len() <= 256 { 1u8 } else { 2u8 }; + + // Build compressed format + let mut result = Vec::new(); + + // Header + result.extend_from_slice(&(dictionary.len() as u16).to_le_bytes()); + result.push(value_size as u8); + result.push(index_size); + + // Dictionary + for &value in &dictionary { + result.extend_from_slice(value); + } + + // Indices + for i in 0..value_count { + let value = &data[i * value_size..(i + 1) * value_size]; + if let Some(&index) = value_to_index.get(value) { + // Value is in dictionary + if index_size == 1 { + result.push(index as u8); + } else { + result.extend_from_slice(&index.to_le_bytes()); + } + } else { + // Value not in dictionary - store special index + full value + let escape_index = dictionary.len() as u16; + if index_size == 1 { + result.push(escape_index as u8); + } else { + result.extend_from_slice(&escape_index.to_le_bytes()); + } + result.extend_from_slice(value); + } + } + + // Add size headers like other compression algorithms + let size = data.len() as u32; + let comlen = result.len() as u32; + + let mut final_result = Vec::with_capacity(4 + 4 + result.len()); + final_result.extend_from_slice(&size.to_le_bytes()); + final_result.extend_from_slice(&comlen.to_le_bytes()); + final_result.extend_from_slice(&result); + + Ok(final_result) + } + + fn compress_buf(&mut self, data: Buf) -> Result { + use crate::buffer::BufWrite; + use crate::vdev::Block; + use std::io::Write; + + let compressed_data = self.compress_val(data.as_ref())?; + + let size = data.as_ref().len() as u32; + let comlen = compressed_data.len() as u32; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes( + 4 + 4 + comlen, // total metadata and compressed payload + )); + + buf.write_all(&size.to_le_bytes())?; + buf.write_all(&comlen.to_le_bytes())?; + buf.write_all(&compressed_data)?; + + Ok(buf.into_buf()) + } +} + +impl DecompressionState for DictionaryDecompression { + fn decompress_val(&mut self, data: &[u8]) -> Result { + if data.len() < 8 { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short").into()); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated").into()); + } + + let compressed = &data[8..8 + comp_len]; + + if compressed.len() < 4 { + return Ok(SlicedCowBytes::from(compressed.to_vec())); + } + + let mut pos = 0; + + // Read header + let dict_size = u16::from_le_bytes([compressed[pos], compressed[pos + 1]]) as usize; + pos += 2; + let value_size = compressed[pos] as usize; + pos += 1; + let index_size = compressed[pos] as usize; + pos += 1; + + // Read dictionary + let dict_bytes = dict_size * value_size; + if pos + dict_bytes > compressed.len() { + return Ok(SlicedCowBytes::from(compressed.to_vec())); + } + + let dict_data = &compressed[pos..pos + dict_bytes]; + pos += dict_bytes; + + // Build dictionary + let mut dictionary = Vec::with_capacity(dict_size); + for i in 0..dict_size { + let start = i * value_size; + dictionary.push(&dict_data[start..start + value_size]); + } + + // Decode indices + let mut result = Vec::new(); + while pos < compressed.len() { + let index = if index_size == 1 { + if pos >= compressed.len() { break; } + let idx = compressed[pos] as usize; + pos += 1; + idx + } else { + if pos + 1 >= compressed.len() { break; } + let idx = u16::from_le_bytes([compressed[pos], compressed[pos + 1]]) as usize; + pos += 2; + idx + }; + + if index < dictionary.len() { + // Regular dictionary lookup + result.extend_from_slice(dictionary[index]); + } else { + // Escaped value + if pos + value_size > compressed.len() { break; } + result.extend_from_slice(&compressed[pos..pos + value_size]); + pos += value_size; + } + } + + Ok(SlicedCowBytes::from(result)) + } + + fn decompress_buf(&mut self, data: Buf) -> Result { + use crate::buffer::BufWrite; + use crate::vdev::Block; + use std::io::Write; + + if data.len() < 8 { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short").into()); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated").into()); + } + + let compressed = &data[8..8 + comp_len]; + + let decompressed = self.decompress_val(compressed)?; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes(uncomp_size as u32)); + buf.write_all(decompressed.as_ref())?; + Ok(buf.into_buf()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_dictionary_for_val_compression() { + // Create test data with repeated 8-byte values + let mut data = Vec::new(); + let values = [ + b"value001", b"value002", b"value001", b"value003", + b"value001", b"value002", b"value004", b"value001" + ]; + + for &value in &values { + data.extend_from_slice(value); + } + + let dictionary = Dictionary::default(); + let mut compressor = dictionary.create_compressor().unwrap(); + let compressed = compressor.compress_val(&data).unwrap(); + + let mut decompressor = Dictionary::new_decompression().unwrap(); + let decompressed = decompressor.decompress_val(&compressed).unwrap(); + + assert_eq!(data, decompressed.as_ref()); + println!("Dictionary val compression - Original: {}, Compressed: {}", data.len(), compressed.len()); + } + + #[test] + fn test_dictionary_for_buf_compression() { + // Create test data with repeated 8-byte values + let mut data = Vec::new(); + let values = [ + b"dictval1", b"dictval2", b"dictval1", b"dictval3", + b"dictval2", b"dictval1", b"dictval4", b"dictval2" + ]; + + for &value in &values { + data.extend_from_slice(value); + } + + let buf = Buf::from_zero_padded(data.clone()); + let dictionary = Dictionary::default(); + + let mut compressor = dictionary.create_compressor().unwrap(); + let compressed_buf = compressor.compress_buf(buf.clone()).unwrap(); + + let mut decompressor = Dictionary::new_decompression().unwrap(); + let decompressed_buf = decompressor.decompress_buf(compressed_buf).unwrap(); + + assert_eq!(buf.as_ref(), decompressed_buf.as_ref()); + println!("Dictionary buf compression - Original: {}, Compressed: {}", buf.len(), decompressed_buf.len()); + } +} \ No newline at end of file diff --git a/betree/src/compression/gorilla.rs b/betree/src/compression/gorilla.rs new file mode 100644 index 000000000..d0bd993bf --- /dev/null +++ b/betree/src/compression/gorilla.rs @@ -0,0 +1,431 @@ +//! Gorilla compression implementation - specialized for time series data +//! Ideal for: Time series of floating point values with small changes between consecutive values + +use super::{CompressionBuilder, CompressionState, DecompressionState, DecompressionTag, Result}; +use crate::{ + buffer::Buf, + cow_bytes::SlicedCowBytes, + size::StaticSize, +}; +use serde::{Deserialize, Serialize}; + +/// Gorilla compression configuration +#[derive(Debug, Serialize, Deserialize, Clone, Copy)] +pub struct Gorilla { + /// Whether to use 64-bit (f64) or 32-bit (f32) floats + pub use_f64: bool, +} + +impl Default for Gorilla { + fn default() -> Self { + Self { + use_f64: true, // Default to double precision + } + } +} + +impl StaticSize for Gorilla { + fn static_size() -> usize { + std::mem::size_of::() + } +} + +/// Gorilla compression state +#[derive(Debug)] +pub struct GorillaCompression { + config: Gorilla, +} + +/// Gorilla decompression state +#[derive(Debug)] +pub struct GorillaDecompression; + +impl CompressionBuilder for Gorilla { + fn create_compressor(&self) -> Result> { + Ok(Box::new(GorillaCompression { config: *self })) + } + + fn decompression_tag(&self) -> DecompressionTag { + DecompressionTag::Gorilla + } +} + +impl Gorilla { + /// Create a new Gorilla decompression state + pub fn new_decompression() -> Result> { + Ok(Box::new(GorillaDecompression)) + } +} + +/// Simplified Gorilla format: +/// [use_f64: u8][count: u32][first_value][compressed_data...] +/// +/// For each subsequent value: +/// - If XOR with previous == 0: store single bit '0' +/// - If XOR has same leading/trailing zeros as previous XOR: store '10' + middle bits +/// - Otherwise: store '11' + leading_zeros_count + meaningful_bits + trailing_zeros_count +impl CompressionState for GorillaCompression { + fn compress_val(&mut self, data: &[u8]) -> Result> { + let value_size = if self.config.use_f64 { 8 } else { 4 }; + + if data.len() % value_size != 0 || data.len() == 0 { + return Ok(data.to_vec()); + } + + let count = data.len() / value_size; + if count < 2 { + return Ok(data.to_vec()); // Need at least 2 values + } + + let mut result = Vec::new(); + result.push(if self.config.use_f64 { 1u8 } else { 0u8 }); + result.extend_from_slice(&(count as u32).to_le_bytes()); + + // Store first value uncompressed + result.extend_from_slice(&data[0..value_size]); + + let mut bit_writer = BitWriter::new(); + let mut prev_xor = 0u64; + let mut prev_leading_zeros = 0u8; + let mut prev_trailing_zeros = 0u8; + + let mut prev_value = read_float_bits(&data[0..value_size], self.config.use_f64); + + for i in 1..count { + let current_value = read_float_bits(&data[i * value_size..(i + 1) * value_size], self.config.use_f64); + let xor = prev_value ^ current_value; + + if xor == 0 { + // Value unchanged + bit_writer.write_bit(false); + } else { + bit_writer.write_bit(true); + + let leading_zeros = xor.leading_zeros() as u8; + let trailing_zeros = xor.trailing_zeros() as u8; + + if leading_zeros >= prev_leading_zeros && trailing_zeros >= prev_trailing_zeros { + // Use previous block info + bit_writer.write_bit(false); + let meaningful_bits = 64 - prev_leading_zeros - prev_trailing_zeros; + let shifted_xor = xor >> prev_trailing_zeros; + bit_writer.write_bits(shifted_xor, meaningful_bits); + } else { + // New block info + bit_writer.write_bit(true); + bit_writer.write_bits(leading_zeros as u64, 5); // 5 bits for leading zeros (0-31) + + let meaningful_bits = 64 - leading_zeros - trailing_zeros; + bit_writer.write_bits(meaningful_bits as u64, 6); // 6 bits for length (0-63) + + let shifted_xor = xor >> trailing_zeros; + bit_writer.write_bits(shifted_xor, meaningful_bits); + + prev_leading_zeros = leading_zeros; + prev_trailing_zeros = trailing_zeros; + } + + prev_xor = xor; + } + + prev_value = current_value; + } + + result.extend_from_slice(&bit_writer.compress_buf()); + + // Add size headers like other compression algorithms + let size = data.len() as u32; + let comlen = result.len() as u32; + + let mut final_result = Vec::with_capacity(4 + 4 + result.len()); + final_result.extend_from_slice(&size.to_le_bytes()); + final_result.extend_from_slice(&comlen.to_le_bytes()); + final_result.extend_from_slice(&result); + + Ok(final_result) + } + + fn compress_buf(&mut self, data: Buf) -> Result { + use crate::buffer::BufWrite; + use crate::vdev::Block; + use std::io::Write; + + let compressed_data = self.compress_val(data.as_ref())?; + + let size = data.as_ref().len() as u32; + let comlen = compressed_data.len() as u32; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes( + 4 + 4 + comlen, // total metadata and compressed payload + )); + + buf.write_all(&size.to_le_bytes())?; + buf.write_all(&comlen.to_le_bytes())?; + buf.write_all(&compressed_data)?; + + Ok(buf.into_buf()) + } +} + +impl DecompressionState for GorillaDecompression { + fn decompress_val(&mut self, data: &[u8]) -> Result { + if data.len() < 8 { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short").into()); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated").into()); + } + + let compressed = &data[8..8 + comp_len]; + + if compressed.len() < 6 { + return Ok(SlicedCowBytes::from(compressed.to_vec())); + } + + let mut pos = 0; + let use_f64 = compressed[pos] != 0; + pos += 1; + + let count = u32::from_le_bytes([compressed[pos], compressed[pos + 1], compressed[pos + 2], compressed[pos + 3]]) as usize; + pos += 4; + + let value_size = if use_f64 { 8 } else { 4 }; + + if pos + value_size > compressed.len() { + return Ok(SlicedCowBytes::from(compressed.to_vec())); + } + + let mut result = Vec::new(); + + // Read first value + let mut current_value = read_float_bits(&compressed[pos..pos + value_size], use_f64); + result.extend_from_slice(&write_float_bits(current_value, use_f64)); + pos += value_size; + + let mut bit_reader = BitReader::new(&compressed[pos..]); + let mut prev_leading_zeros = 0u8; + let mut prev_trailing_zeros = 0u8; + + for _ in 1..count { + if let Some(control_bit) = bit_reader.read_bit() { + if !control_bit { + // Value unchanged + // current_value stays the same + } else { + // Value changed + if let Some(use_prev_block) = bit_reader.read_bit() { + if !use_prev_block { + // Use previous block info + let meaningful_bits = 64 - prev_leading_zeros - prev_trailing_zeros; + if let Some(xor_bits) = bit_reader.read_bits(meaningful_bits) { + let xor = xor_bits << prev_trailing_zeros; + current_value ^= xor; + } + } else { + // New block info + if let Some(leading_zeros) = bit_reader.read_bits(5) { + if let Some(meaningful_bits) = bit_reader.read_bits(6) { + if let Some(xor_bits) = bit_reader.read_bits(meaningful_bits as u8) { + prev_leading_zeros = leading_zeros as u8; + prev_trailing_zeros = 64 - prev_leading_zeros - meaningful_bits as u8; + let xor = xor_bits << prev_trailing_zeros; + current_value ^= xor; + } + } + } + } + } + } + } + + result.extend_from_slice(&write_float_bits(current_value, use_f64)); + } + + Ok(SlicedCowBytes::from(result)) + } + + fn decompress_buf(&mut self, data: Buf) -> Result { + use crate::buffer::BufWrite; + use crate::vdev::Block; + use std::io::Write; + + if data.len() < 8 { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short").into()); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated").into()); + } + + let compressed = &data[8..8 + comp_len]; + + let decompressed = self.decompress_val(compressed)?; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes(uncomp_size as u32)); + buf.write_all(decompressed.as_ref())?; + Ok(buf.into_buf()) + } +} + +// Helper functions +fn read_float_bits(data: &[u8], use_f64: bool) -> u64 { + if use_f64 { + f64::from_le_bytes([data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]]).to_bits() + } else { + f32::from_le_bytes([data[0], data[1], data[2], data[3]]).to_bits() as u64 + } +} + +fn write_float_bits(bits: u64, use_f64: bool) -> Vec { + if use_f64 { + f64::from_bits(bits).to_le_bytes().to_vec() + } else { + f32::from_bits(bits as u32).to_le_bytes().to_vec() + } +} + +// Simple bit writer/reader for Gorilla encoding +struct BitWriter { + buffer: Vec, + current_byte: u8, + bit_count: u8, +} + +impl BitWriter { + fn new() -> Self { + Self { + buffer: Vec::new(), + current_byte: 0, + bit_count: 0, + } + } + + fn write_bit(&mut self, bit: bool) { + if bit { + self.current_byte |= 1 << (7 - self.bit_count); + } + self.bit_count += 1; + + if self.bit_count == 8 { + self.buffer.push(self.current_byte); + self.current_byte = 0; + self.bit_count = 0; + } + } + + fn write_bits(&mut self, value: u64, count: u8) { + for i in (0..count).rev() { + let bit = (value >> i) & 1 != 0; + self.write_bit(bit); + } + } + + fn compress_buf(mut self) -> Vec { + if self.bit_count > 0 { + self.buffer.push(self.current_byte); + } + self.buffer + } +} + +struct BitReader<'a> { + data: &'a [u8], + byte_pos: usize, + bit_pos: u8, +} + +impl<'a> BitReader<'a> { + fn new(data: &'a [u8]) -> Self { + Self { + data, + byte_pos: 0, + bit_pos: 0, + } + } + + fn read_bit(&mut self) -> Option { + if self.byte_pos >= self.data.len() { + return None; + } + + let bit = (self.data[self.byte_pos] >> (7 - self.bit_pos)) & 1 != 0; + self.bit_pos += 1; + + if self.bit_pos == 8 { + self.byte_pos += 1; + self.bit_pos = 0; + } + + Some(bit) + } + + fn read_bits(&mut self, count: u8) -> Option { + let mut result = 0u64; + for _ in 0..count { + if let Some(bit) = self.read_bit() { + result = (result << 1) | (if bit { 1 } else { 0 }); + } else { + return None; + } + } + Some(result) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_gorilla_for_val_compression() { + // Create test data with slowly changing floating point values + let mut data = Vec::new(); + let mut value = 100.0f64; + + for i in 0..50 { + value += (i as f64) * 0.01; // Small incremental changes + data.extend_from_slice(&value.to_le_bytes()); + } + + let gorilla = Gorilla::default(); + let mut compressor = gorilla.create_compressor().unwrap(); + let compressed = compressor.compress_val(&data).unwrap(); + + let mut decompressor = Gorilla::new_decompression().unwrap(); + let decompressed = decompressor.decompress_val(&compressed).unwrap(); + + assert_eq!(data, decompressed.as_ref()); + println!("Gorilla val compression - Original: {}, Compressed: {}", data.len(), compressed.len()); + } + + #[test] + fn test_gorilla_for_buf_compression() { + // Create test data with time series floating point values + let mut data = Vec::new(); + let mut value = 50.0f64; + + for i in 0..30 { + value += (i as f64) * 0.05; // Small incremental changes + data.extend_from_slice(&value.to_le_bytes()); + } + + let buf = Buf::from_zero_padded(data.clone()); + let gorilla = Gorilla::default(); + + let mut compressor = gorilla.create_compressor().unwrap(); + let compressed_buf = compressor.compress_buf(buf.clone()).unwrap(); + + let mut decompressor = Gorilla::new_decompression().unwrap(); + let decompressed_buf = decompressor.decompress_buf(compressed_buf).unwrap(); + + assert_eq!(buf.as_ref(), decompressed_buf.as_ref()); + println!("Gorilla buf compression - Original: {}, Compressed: {}", buf.len(), decompressed_buf.len()); + } +} \ No newline at end of file diff --git a/betree/src/compression/lz4.rs b/betree/src/compression/lz4.rs index df02926d3..9c1ef66cb 100644 --- a/betree/src/compression/lz4.rs +++ b/betree/src/compression/lz4.rs @@ -1,12 +1,20 @@ -use super::{ CompressionConfiguration, CompressionState, DecompressionState, DecompressionTag, DEFAULT_BUFFER_SIZE, Result }; +use super::{ CompressionBuilder, CompressionState, DecompressionState, DecompressionTag, Result }; use crate::size::StaticSize; -use crate::buffer::{Buf, BufWrite}; +use crate::buffer::Buf; +use crate::cow_bytes::SlicedCowBytes; + + + use serde::{Deserialize, Serialize}; -use std::io::{self, Read}; -// use lz4_sys::{ Lz4 + + + + +use lz4::block; +use lz4::block::CompressionMode; /// LZ4 compression. () #[derive(Debug, Serialize, Deserialize, Clone, Copy)] pub struct Lz4 { @@ -18,31 +26,26 @@ pub struct Lz4 { } pub struct Lz4Compression { - config: Lz4, - encoder: Encoder, + level: u8, } pub struct Lz4Decompression; impl StaticSize for Lz4 { - fn size() -> usize { + fn static_size() -> usize { 1 } } -impl CompressionConfiguration for Lz4 { - fn new_compression(&self) -> Result> { - let encoder = EncoderBuilder::new() - .level(u32::from(self.level)) - .checksum(ContentChecksum::NoChecksum) - .block_size(BlockSize::Max4MB) - .block_mode(BlockMode::Linked) - .build(BufWrite::with_capacity(DEFAULT_BUFFER_SIZE))?; - - Ok(Box::new(Lz4Compression { config: self.clone(), encoder })) +impl CompressionBuilder for Lz4 { + fn create_compressor(&self) -> Result> { + // Just store the level, create encoder only when needed + Ok(Box::new(Lz4Compression { level: self.level })) } - fn decompression_tag(&self) -> DecompressionTag { DecompressionTag::Lz4 } + fn decompression_tag(&self) -> DecompressionTag { + DecompressionTag::Lz4 + } } impl Lz4 { @@ -51,32 +54,205 @@ impl Lz4 { } } -impl io::Write for Lz4Compression { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.encoder.write(buf) - } - fn write_all(&mut self, buf: &[u8]) -> io::Result<()> { - self.encoder.write_all(buf) + + + + + +impl CompressionState for Lz4Compression { + fn compress_val(&mut self, data: &[u8]) -> Result> { + let input_size = data.len(); + + #[cfg(feature = "compression_metrics")] + let start = std::time::Instant::now(); + + let mode = CompressionMode::HIGHCOMPRESSION(self.level as i32); + // Use block-level compression - much more efficient than creating encoder each time + let compressed_data = block::compress(data, Some(mode), false) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("LZ4 compression failed: {:?}", e)))?; + + let size = data.len() as u32; + let comlen = compressed_data.len() as u32; + + let mut result = Vec::with_capacity(4 + 4 + compressed_data.len()); + result.extend_from_slice(&size.to_le_bytes()); + result.extend_from_slice(&comlen.to_le_bytes()); + result.extend_from_slice(&compressed_data); + + #[cfg(feature = "compression_metrics")] + { + let duration = start.elapsed().as_nanos() as u64; + super::metrics::record_compression_metrics(input_size, compressed_data.len(), duration); + } + + Ok(result) } - fn flush(&mut self) -> io::Result<()> { - self.encoder.flush() + fn compress_buf(&mut self, data: Buf) -> Result { + use crate::buffer::BufWrite; + use crate::vdev::Block; + use std::io::Write; + + let input_size = data.as_ref().len(); + + #[cfg(feature = "compression_metrics")] + let start = std::time::Instant::now(); + + let mode = CompressionMode::HIGHCOMPRESSION(self.level as i32); + let compressed_data = block::compress(data.as_ref(), Some(mode), false) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("LZ4 compression failed: {:?}", e)))?; + + let size = data.as_ref().len() as u32; + let comlen = compressed_data.len() as u32; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes( + 4 + 4 + comlen, // total metadata and compressed payload + )); + + buf.write_all(&size.to_le_bytes())?; + buf.write_all(&comlen.to_le_bytes())?; + buf.write_all(&compressed_data)?; + + let result = buf.into_buf(); + + #[cfg(feature = "compression_metrics")] + { + let duration = start.elapsed().as_nanos() as u64; + super::metrics::record_compression_metrics(input_size, compressed_data.len(), duration); + } + + Ok(result) } } -impl CompressionState for Lz4Compression { - fn finish(&mut self) -> Buf { - let (v, result) = self.encoder.finish(); - result.unwrap(); - v.into_buf() + +impl DecompressionState for Lz4Decompression { + fn decompress_val(&mut self, data: &[u8]) -> Result { + let input_size = data.len(); + + if data.len() < 8 { + bail!(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short")); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + bail!(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated")); + } + + let compressed = &data[8..8 + comp_len]; + + #[cfg(feature = "compression_metrics")] + let start = std::time::Instant::now(); + + // Use block-level decompression to match block-level compression + let decompressed = block::decompress(compressed, Some(uncomp_size as i32)) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("LZ4 decompression failed: {:?}", e)))?; + + let result = SlicedCowBytes::from(decompressed); + + #[cfg(feature = "compression_metrics")] + { + let duration = start.elapsed().as_nanos() as u64; + super::metrics::record_decompression_metrics(input_size, result.len(), duration); + } + + Ok(result) + } + + fn decompress_buf(&mut self, data: Buf) -> Result { + use crate::buffer::BufWrite; + use crate::vdev::Block; + use std::io::Write; + + let input_size = data.len(); + + if data.len() < 8 { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short").into()); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated").into()); + } + + let compressed = &data[8..8 + comp_len]; + + #[cfg(feature = "compression_metrics")] + let start = std::time::Instant::now(); + + let uncompressed_data = block::decompress(compressed, Some(uncomp_size as i32)) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("LZ4 decompression failed: {:?}", e)))?; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes(uncomp_size as u32)); + buf.write_all(&uncompressed_data)?; + let result = buf.into_buf(); + + #[cfg(feature = "compression_metrics")] + { + let duration = start.elapsed().as_nanos() as u64; + super::metrics::record_decompression_metrics(input_size, result.len(), duration); + } + + Ok(result) } } -impl DecompressionState for Lz4Decompression { - fn decompress(&mut self, data: &[u8]) -> Result> { - let mut output = Vec::with_capacity(DEFAULT_BUFFER_SIZE.to_bytes() as usize); - Decoder::new(&data[..])?.read_to_end(&mut output)?; - Ok(output.into_boxed_slice()) + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_lz4_for_val_compression() { + let data = b"LZ4 compression test data with some repeated patterns. ".repeat(20); + let lz4 = Lz4 { level: 8 }; + + let mut compressor = lz4.create_compressor().unwrap(); + let compressed = compressor.compress_val(&data).unwrap(); + + let mut decompressor = Lz4::new_decompression().unwrap(); + let decompressed = decompressor.decompress_val(&compressed).unwrap(); + + assert_eq!(data, decompressed.as_ref()); + println!("LZ4 val compression - Original: {}, Compressed: {}", data.len(), compressed.len()); + } + + #[test] + fn test_lz4_for_buf_compression() { + let data = b"LZ4 test with Buf interface and repeated content. ".repeat(15); + let buf = Buf::from_zero_padded(data.clone()); + let lz4 = Lz4 { level: 4 }; + + let mut compressor = lz4.create_compressor().unwrap(); + let compressed_buf = compressor.compress_buf(buf.clone()).unwrap(); + + let mut decompressor = Lz4::new_decompression().unwrap(); + let decompressed_buf = decompressor.decompress_buf(compressed_buf).unwrap(); + + assert_eq!(buf.as_ref(), decompressed_buf.as_ref()); + println!("LZ4 buf compression - Original: {}, Compressed: {}", buf.len(), decompressed_buf.len()); + } + + #[test] + fn test_lz4_different_levels() { + let data = b"Testing different LZ4 compression levels with this repeated text. ".repeat(10); + + for level in [1, 8, 16] { + let lz4 = Lz4 { level }; + + let mut compressor = lz4.create_compressor().unwrap(); + let compressed = compressor.compress_val(&data).unwrap(); + + let mut decompressor = Lz4::new_decompression().unwrap(); + let decompressed = decompressor.decompress_val(&compressed).unwrap(); + + assert_eq!(data, decompressed.as_ref()); + println!("LZ4 level {} - Original: {}, Compressed: {}", level, data.len(), compressed.len()); + } } } diff --git a/betree/src/compression/metrics.rs b/betree/src/compression/metrics.rs new file mode 100644 index 000000000..846ac338d --- /dev/null +++ b/betree/src/compression/metrics.rs @@ -0,0 +1,197 @@ +//! Compression metrics tracking module +//! +//! This module provides functionality to track compression and decompression +//! metrics when the `compression_metrics` feature is enabled. +//! +//! ## Overview +//! +//! The Haura betree storage system uses compression at two levels: +//! +//! 1. **Value-level compression** (Memory storage): Individual values are compressed +//! when stored in Memory storage kind, allowing for fine-grained compression control. +//! +//! 2. **Block-level compression** (SSD/HDD storage): Entire nodes are compressed +//! when stored in SSD or HDD storage kinds, optimizing for I/O efficiency. +//! +//! ## Metrics Tracked +//! +//! When the `compression_metrics` feature is enabled, the following metrics are tracked: +//! +//! - `bytes_to_compressed`: Total bytes passed to compression algorithms +//! - `compressed_bytes`: Total bytes after compression +//! - `compression_time`: Total time spent in compression operations (nanoseconds) +//! - `bytes_to_decompress`: Total bytes passed to decompression algorithms +//! - `bytes_after_decompression`: Total bytes after decompression +//! - `decompression_time`: Total time spent in decompression operations (nanoseconds) +//! +//! ## Usage +//! +//! Enable the feature in your `Cargo.toml`: +//! +//! ```toml +//! [dependencies] +//! betree_storage_stack = { version = "0.3.1-alpha", features = ["compression_metrics"] } +//! ``` +//! +//! Access metrics through the dataset statistics: +//! +//! ```rust,no_run +//! # use betree_storage_stack::*; +//! # fn example() -> Result<(), Box> { +//! let db = Database::build(DatabaseConfiguration::default())?; +//! let ds = db.open_or_create_dataset("test", DatasetConfiguration::default())?; +//! +//! // Perform some operations... +//! ds.insert(b"key".to_vec(), b"value", 0)?; +//! let _value = ds.get(b"key")?; +//! +//! // Get compression statistics +//! let stats = ds.statistics(); +//! println!("Compression ratio: {:.2}%", +//! (stats.compressed_bytes.to_bytes() as f64 / +//! stats.bytes_to_compressed.to_bytes() as f64) * 100.0); +//! # Ok(()) +//! # } +//! ``` + +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::Instant; + +/// Global compression metrics storage +#[cfg(feature = "compression_metrics")] +pub struct CompressionMetrics { + /// Total bytes passed to compression algorithms + pub bytes_to_compressed: AtomicU64, + /// Total bytes after compression + pub compressed_bytes: AtomicU64, + /// Total time spent in compression operations (nanoseconds) + pub compression_time: AtomicU64, + /// Total bytes passed to decompression algorithms + pub bytes_to_decompress: AtomicU64, + /// Total bytes after decompression + pub bytes_after_decompression: AtomicU64, + /// Total time spent in decompression operations (nanoseconds) + pub decompression_time: AtomicU64, +} + +#[cfg(feature = "compression_metrics")] +impl CompressionMetrics { + /// Create a new compression metrics instance + pub const fn new() -> Self { + Self { + bytes_to_compressed: AtomicU64::new(0), + compressed_bytes: AtomicU64::new(0), + compression_time: AtomicU64::new(0), + bytes_to_decompress: AtomicU64::new(0), + bytes_after_decompression: AtomicU64::new(0), + decompression_time: AtomicU64::new(0), + } + } + + /// Record compression operation metrics + pub fn record_compression(&self, input_bytes: usize, output_bytes: usize, duration_ns: u64) { + self.bytes_to_compressed.fetch_add(input_bytes as u64, Ordering::Relaxed); + self.compressed_bytes.fetch_add(output_bytes as u64, Ordering::Relaxed); + self.compression_time.fetch_add(duration_ns, Ordering::Relaxed); + } + + /// Record decompression operation metrics + pub fn record_decompression(&self, input_bytes: usize, output_bytes: usize, duration_ns: u64) { + self.bytes_to_decompress.fetch_add(input_bytes as u64, Ordering::Relaxed); + self.bytes_after_decompression.fetch_add(output_bytes as u64, Ordering::Relaxed); + self.decompression_time.fetch_add(duration_ns, Ordering::Relaxed); + } + + /// Get current compression statistics as a tuple + /// Returns: (bytes_to_compressed, compressed_bytes, compression_time, bytes_to_decompress, bytes_after_decompression, decompression_time) + pub fn get_stats(&self) -> (u64, u64, u64, u64, u64, u64) { + ( + self.bytes_to_compressed.load(Ordering::Relaxed), + self.compressed_bytes.load(Ordering::Relaxed), + self.compression_time.load(Ordering::Relaxed), + self.bytes_to_decompress.load(Ordering::Relaxed), + self.bytes_after_decompression.load(Ordering::Relaxed), + self.decompression_time.load(Ordering::Relaxed), + ) + } +} + +#[cfg(feature = "compression_metrics")] +lazy_static::lazy_static! { + /// Global compression metrics instance + pub static ref COMPRESSION_METRICS: CompressionMetrics = CompressionMetrics::new(); +} + +/// Record compression metrics if the feature is enabled +#[cfg(feature = "compression_metrics")] +pub fn record_compression_metrics(input_bytes: usize, output_bytes: usize, duration_ns: u64) { + COMPRESSION_METRICS.record_compression(input_bytes, output_bytes, duration_ns); +} + +/// Record decompression metrics if the feature is enabled +#[cfg(feature = "compression_metrics")] +pub fn record_decompression_metrics(input_bytes: usize, output_bytes: usize, duration_ns: u64) { + COMPRESSION_METRICS.record_decompression(input_bytes, output_bytes, duration_ns); +} + +/// Get compression metrics for integration with vdev statistics +#[cfg(feature = "compression_metrics")] +pub fn get_compression_metrics() -> (u64, u64, u64, u64, u64, u64) { + COMPRESSION_METRICS.get_stats() +} + +/// No-op versions when the feature is disabled +#[cfg(not(feature = "compression_metrics"))] +pub fn record_compression_metrics(_input_bytes: usize, _output_bytes: usize, _duration_ns: u64) { + // No-op when feature is disabled +} + +/// No-op version when the feature is disabled +#[cfg(not(feature = "compression_metrics"))] +pub fn record_decompression_metrics(_input_bytes: usize, _output_bytes: usize, _duration_ns: u64) { + // No-op when feature is disabled +} + +/// Helper macro to time compression operations +#[macro_export] +macro_rules! time_compression { + ($input_size:expr, $compression_op:expr) => {{ + #[cfg(feature = "compression_metrics")] + let start = std::time::Instant::now(); + + let result = $compression_op; + + #[cfg(feature = "compression_metrics")] + { + let duration = start.elapsed().as_nanos() as u64; + if let Ok(ref output) = result { + let output_size = output.len(); + $crate::compression::metrics::record_compression_metrics($input_size, output_size, duration); + } + } + + result + }}; +} + +/// Helper macro to time decompression operations +#[macro_export] +macro_rules! time_decompression { + ($input_size:expr, $decompression_op:expr) => {{ + #[cfg(feature = "compression_metrics")] + let start = std::time::Instant::now(); + + let result = $decompression_op; + + #[cfg(feature = "compression_metrics")] + { + let duration = start.elapsed().as_nanos() as u64; + if let Ok(ref output) = result { + let output_size = output.len(); + $crate::compression::metrics::record_decompression_metrics($input_size, output_size, duration); + } + } + + result + }}; +} \ No newline at end of file diff --git a/betree/src/compression/mod.rs b/betree/src/compression/mod.rs index 69203770c..398b0039d 100644 --- a/betree/src/compression/mod.rs +++ b/betree/src/compression/mod.rs @@ -1,18 +1,35 @@ //! This module provides the `Compression` trait for compressing and //! decompressing data. -//! `None` and `Lz4` are provided as implementation. +//! Supports multiple compression algorithms optimized for different storage kinds. use crate::{ buffer::Buf, + cow_bytes::{CowBytes, SlicedCowBytes}, size::{Size, StaticSize}, vdev::Block, }; use serde::{Deserialize, Serialize}; use std::{fmt::Debug, mem}; +#[macro_use] mod errors; pub use errors::*; +// Database-specific compression modules +mod snappy; +mod dictionary; +mod rle; +mod delta; +mod gorilla; +mod toast; + +pub use snappy::Snappy; +pub use dictionary::Dictionary; +pub use rle::Rle; +pub use delta::Delta; +pub use gorilla::Gorilla; +pub use toast::Toast; + const DEFAULT_BUFFER_SIZE: Block = Block(1); /// Determine the used compression algorithm. @@ -20,17 +37,120 @@ const DEFAULT_BUFFER_SIZE: Block = Block(1); pub enum CompressionConfiguration { /// No-op. None, - // Lz4, + Lz4(Lz4), /// Configurable Zstd algorithm. Zstd(Zstd), + /// Google's Snappy compression - very fast with decent ratios. + Snappy(Snappy), + /// Dictionary encoding - replace frequent values with indices. + Dictionary(Dictionary), + /// Run-Length Encoding - compress runs of identical values. + Rle(Rle), + /// Delta encoding - store differences between consecutive values. + Delta(Delta), + /// Gorilla compression - specialized for time series data. + Gorilla(Gorilla), + /// PostgreSQL TOAST with pglz compression. + Toast(Toast), } impl CompressionConfiguration { - /// + /// Check if compression is enabled (avoids compression overhead when disabled) + pub fn is_compression_enabled(&self) -> bool { + !matches!(self, CompressionConfiguration::None) + } + + /// Get the compression type ID for metadata storage + pub fn compression_type_id(&self) -> u8 { + match self { + CompressionConfiguration::None => 0, + CompressionConfiguration::Zstd(_) => 1, + CompressionConfiguration::Lz4(_) => 2, + CompressionConfiguration::Snappy(_) => 3, + CompressionConfiguration::Dictionary(_) => 4, + CompressionConfiguration::Rle(_) => 5, + CompressionConfiguration::Delta(_) => 6, + CompressionConfiguration::Gorilla(_) => 7, + CompressionConfiguration::Toast(_) => 8, + } + } + + /// Get the decompression tag from compression type ID + pub fn decompression_tag_from_id(compression_type_id: u8) -> DecompressionTag { + match compression_type_id { + 0 => DecompressionTag::None, + 1 => DecompressionTag::Zstd, + 2 => DecompressionTag::Lz4, + 3 => DecompressionTag::Snappy, + 4 => DecompressionTag::Dictionary, + 5 => DecompressionTag::Rle, + 6 => DecompressionTag::Delta, + 7 => DecompressionTag::Gorilla, + 8 => DecompressionTag::Toast, + _ => panic!("Unknown compression type ID: {}", compression_type_id), + } + } + + /// Create a compression state directly (high performance) + pub fn create_compressor(&self) -> Result> { + match self { + CompressionConfiguration::None => { + None.create_compressor() + } + CompressionConfiguration::Lz4(lz4) => { + lz4.create_compressor() + } + CompressionConfiguration::Zstd(zstd) => { + zstd.create_compressor() + } + CompressionConfiguration::Snappy(snappy) => { + snappy.create_compressor() + } + CompressionConfiguration::Dictionary(dict) => { + dict.create_compressor() + } + CompressionConfiguration::Rle(rle) => { + rle.create_compressor() + } + CompressionConfiguration::Delta(delta) => { + delta.create_compressor() + } + CompressionConfiguration::Gorilla(gorilla) => { + gorilla.create_compressor() + } + CompressionConfiguration::Toast(toast) => { + toast.create_compressor() + } + } + } + + /// Get decompression tag for storage + pub fn decompression_tag(&self) -> DecompressionTag { + match self { + CompressionConfiguration::None => DecompressionTag::None, + CompressionConfiguration::Lz4(_) => DecompressionTag::Lz4, + CompressionConfiguration::Zstd(_) => DecompressionTag::Zstd, + CompressionConfiguration::Snappy(_) => DecompressionTag::Snappy, + CompressionConfiguration::Dictionary(_) => DecompressionTag::Dictionary, + CompressionConfiguration::Rle(_) => DecompressionTag::Rle, + CompressionConfiguration::Delta(_) => DecompressionTag::Delta, + CompressionConfiguration::Gorilla(_) => DecompressionTag::Gorilla, + CompressionConfiguration::Toast(_) => DecompressionTag::Toast, + } + } + + /// Legacy compatibility - create builder (deprecated) pub fn to_builder(&self) -> Box { match self { CompressionConfiguration::None => Box::new(None), + CompressionConfiguration::Lz4(lz4) => Box::new(*lz4), CompressionConfiguration::Zstd(zstd) => Box::new(*zstd), + CompressionConfiguration::Snappy(snappy) => Box::new(*snappy), + CompressionConfiguration::Dictionary(dict) => Box::new(*dict), + CompressionConfiguration::Rle(rle) => Box::new(*rle), + CompressionConfiguration::Delta(delta) => Box::new(*delta), + CompressionConfiguration::Gorilla(gorilla) => Box::new(*gorilla), + CompressionConfiguration::Toast(toast) => Box::new(*toast), } } } @@ -61,16 +181,39 @@ pub enum DecompressionTag { Lz4, /// Decompress using Zstd. Zstd, + /// Decompress using Snappy. + Snappy, + /// Decompress using Dictionary encoding. + Dictionary, + /// Decompress using RLE. + Rle, + /// Decompress using Delta encoding. + Delta, + /// Decompress using Gorilla. + Gorilla, + /// Decompress using Toast/pglz. + Toast, } impl DecompressionTag { + /// Check if decompression is needed + pub fn is_decompression_needed(&self) -> bool { + !matches!(self, DecompressionTag::None) + } + /// Start a new decompression. The resulting structure consumes a buffer to decompress the data. pub fn new_decompression(&self) -> Result> { use DecompressionTag as Tag; match self { Tag::None => Ok(None::new_decompression()?), - Tag::Lz4 => todo!(), //Ok(Lz4::new_decompression()?), + Tag::Lz4 => Ok(Lz4::new_decompression()?), Tag::Zstd => Ok(Zstd::new_decompression()?), + Tag::Snappy => Ok(Snappy::new_decompression()?), + Tag::Dictionary => Ok(Dictionary::new_decompression()?), + Tag::Rle => Ok(Rle::new_decompression()?), + Tag::Delta => Ok(Delta::new_decompression()?), + Tag::Gorilla => Ok(Gorilla::new_decompression()?), + Tag::Toast => Ok(Toast::new_decompression()?), } } } @@ -81,33 +224,58 @@ impl StaticSize for DecompressionTag { } } -/// Trait for compressing and decompressing data. Only compression is configurable, decompression -/// must be able to decompress anything ever compressed in any configuration. +/// High-performance compression interface - no locks, no shared state pub trait CompressionBuilder: Debug + Size + Send + Sync + 'static { - /// Returns an object for compressing data into a `Box<[u8]>`. - fn new_compression(&self) -> Result>; + /// Create a lightweight compression state without shared locking (high performance) + fn create_compressor(&self) -> Result>; /// Which decompression algorithm needs to be used. fn decompression_tag(&self) -> DecompressionTag; + + /// Legacy compatibility - returns an object for compressing data (deprecated) + fn new_compression(&self) -> Result> { + self.create_compressor() + } } /// Trait for the object that compresses data. pub trait CompressionState { - /// Finishes the compression stream and returns a buffer that contains the - /// compressed data. - fn finish(&mut self, data: Buf) -> Result; + /// Compress data from slice and return the compressed data as a Vec + /// Used for individual values during Memory storage kind packing + fn compress_val(&mut self, data: &[u8]) -> Result>; + + /// Compress data from Buf and return the compressed data as a Buf + /// Used for block-level compression during SSD storage kind packing + fn compress_buf(&mut self, data: Buf) -> Result; + + /// Legacy compatibility - finishes the compression stream (deprecated) + fn finish(&mut self, data: Buf) -> Result { + self.compress_buf(data) + } } /// An implementation of consumption-based decompression. pub trait DecompressionState { - /// Decompress the given [Buf]. On No-op this is a simple pass through, no memory is copied. - fn decompress(&mut self, data: Buf) -> Result; + /// Decompress data from slice and return the decompressed data as SlicedCowBytes + /// Used for individual values during Memory storage kind unpacking + fn decompress_val(&mut self, data: &[u8]) -> Result; + + /// Decompress data from Buf and return the decompressed data as a Buf + /// Used for block-level decompression during SSD storage kind unpacking + fn decompress_buf(&mut self, data: Buf) -> Result; + + /// Legacy compatibility - decompress the given [Buf] (deprecated) + fn decompress(&mut self, data: Buf) -> Result { + self.decompress_buf(data) + } } mod none; pub use self::none::None; -//mod lz4; -//pub use self::lz4::Lz4; +mod lz4; +pub use self::lz4::Lz4; mod zstd; pub use self::zstd::Zstd; + +pub mod metrics; diff --git a/betree/src/compression/none.rs b/betree/src/compression/none.rs index cb84d9100..a922e78da 100644 --- a/betree/src/compression/none.rs +++ b/betree/src/compression/none.rs @@ -7,7 +7,10 @@ use crate::{ size::StaticSize, }; use serde::{Deserialize, Serialize}; +use serde_json::to_vec; use std::io; +use std::sync::{Arc, Mutex}; +use crate::cow_bytes::{CowBytes, SlicedCowBytes}; /// No-op compression. #[derive(Debug, Clone, Serialize, Deserialize, Copy)] @@ -24,7 +27,7 @@ impl StaticSize for None { } impl CompressionBuilder for None { - fn new_compression(&self) -> Result> { + fn create_compressor(&self) -> Result> { Ok(Box::new(NoneCompression { buf: BufWrite::with_capacity(DEFAULT_BUFFER_SIZE), })) @@ -57,13 +60,77 @@ impl io::Write for NoneCompression { } impl CompressionState for NoneCompression { - fn finish(&mut self, buf: Buf) -> Result { + fn compress_val(&mut self, data: &[u8]) -> Result> { + // No metrics recording for None compression - it's a true pass-through + Ok(data.to_vec()) + } + + fn compress_buf(&mut self, buf: Buf) -> Result { + // No metrics recording for None compression - it's a true pass-through Ok(buf) } } impl DecompressionState for NoneDecompression { - fn decompress(&mut self, data: Buf) -> Result { + fn decompress_val(&mut self, data: &[u8]) -> Result { + // No metrics recording for None decompression - it's a true pass-through + Ok(SlicedCowBytes::from(data.to_vec())) + } + + fn decompress_buf(&mut self, data: Buf) -> Result { + // No metrics recording for None decompression - it's a true pass-through Ok(data) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_none_for_val_compression() { + let data = b"No compression test data - should pass through unchanged."; + let none = None; + + let mut compressor = none.create_compressor().unwrap(); + let compressed = compressor.compress_val(data).unwrap(); + + let mut decompressor = None::new_decompression().unwrap(); + let decompressed = decompressor.decompress_val(&compressed).unwrap(); + + assert_eq!(data, decompressed.as_ref()); + assert_eq!(data.len(), compressed.len()); // No compression should mean same size + println!("None val compression - Original: {}, 'Compressed': {}", data.len(), compressed.len()); + } + + #[test] + fn test_none_for_buf_compression() { + let data = b"No compression test with Buf interface - pass through."; + let buf = Buf::from_zero_padded(data.to_vec()); + let none = None; + + let mut compressor = none.create_compressor().unwrap(); + let compressed_buf = compressor.compress_buf(buf.clone()).unwrap(); + + let mut decompressor = None::new_decompression().unwrap(); + let decompressed_buf = decompressor.decompress_buf(compressed_buf).unwrap(); + + assert_eq!(buf.as_ref(), decompressed_buf.as_ref()); + println!("None buf compression - Original: {}, 'Compressed': {}", buf.len(), decompressed_buf.len()); + } + + #[test] + fn test_none_empty_data() { + let data = b""; + let none = None; + + let mut compressor = none.create_compressor().unwrap(); + let compressed = compressor.compress_val(data).unwrap(); + + let mut decompressor = None::new_decompression().unwrap(); + let decompressed = decompressor.decompress_val(&compressed).unwrap(); + + assert_eq!(data, decompressed.as_ref()); + assert_eq!(0, compressed.len()); + } +} diff --git a/betree/src/compression/rle.rs b/betree/src/compression/rle.rs new file mode 100644 index 000000000..491e763aa --- /dev/null +++ b/betree/src/compression/rle.rs @@ -0,0 +1,334 @@ +//! Run-Length Encoding implementation +//! Ideal for: Data with many consecutive repeated values (sorted columns, sparse data) + +use super::{CompressionBuilder, CompressionState, DecompressionState, DecompressionTag, Result}; +use crate::{ + buffer::Buf, + cow_bytes::SlicedCowBytes, + size::StaticSize, +}; +use serde::{Deserialize, Serialize}; + +/// RLE configuration +#[derive(Debug, Serialize, Deserialize, Clone, Copy)] +pub struct Rle { + /// Minimum run length to compress (shorter runs are stored uncompressed) + pub min_run_length: u8, + /// Value size in bytes (1, 2, 4, or 8) + pub value_size: u8, +} + +impl Default for Rle { + fn default() -> Self { + Self { + min_run_length: 3, // Compress runs of 3+ identical values + value_size: 8, // Default to 8-byte values + } + } +} + +impl StaticSize for Rle { + fn static_size() -> usize { + std::mem::size_of::() + } +} + +/// RLE compression state +#[derive(Debug)] +pub struct RleCompression { + config: Rle, +} + +/// RLE decompression state +#[derive(Debug)] +pub struct RleDecompression; + +impl CompressionBuilder for Rle { + fn create_compressor(&self) -> Result> { + Ok(Box::new(RleCompression { config: *self })) + } + + fn decompression_tag(&self) -> DecompressionTag { + DecompressionTag::Rle + } +} + +impl Rle { + /// Create a new RLE decompression state + pub fn new_decompression() -> Result> { + Ok(Box::new(RleDecompression)) + } +} + +/// RLE format: +/// [value_size: u8][run_count: u32][runs...] +/// Each run: [type: u8][data...] +/// type=0: literal run [count: u16][values...] +/// type=1: repeated run [count: u32][value] +impl CompressionState for RleCompression { + fn compress_val(&mut self, data: &[u8]) -> Result> { + let value_size = self.config.value_size as usize; + if data.len() % value_size != 0 { + // Fall back to no compression if data doesn't fit value size + return Ok(data.to_vec()); + } + + let value_count = data.len() / value_size; + if value_count == 0 { + return Ok(data.to_vec()); + } + + let mut result = Vec::new(); + result.push(value_size as u8); // Store value size + + let run_count_pos = result.len(); + result.extend_from_slice(&0u32.to_le_bytes()); // Placeholder for run count + + let mut run_count = 0u32; + let mut pos = 0; + + while pos < value_count { + let current_value = &data[pos * value_size..(pos + 1) * value_size]; + let mut run_length = 1; + + // Count consecutive identical values + while pos + run_length < value_count { + let next_value = &data[(pos + run_length) * value_size..(pos + run_length + 1) * value_size]; + if current_value == next_value { + run_length += 1; + } else { + break; + } + } + + if run_length >= self.config.min_run_length as usize { + // Repeated run + result.push(1u8); // Type: repeated + result.extend_from_slice(&(run_length as u32).to_le_bytes()); + result.extend_from_slice(current_value); + } else { + // Literal run - find end of non-repeating sequence + let mut literal_length = run_length; + while pos + literal_length < value_count { + let start_check = pos + literal_length; + let check_value = &data[start_check * value_size..(start_check + 1) * value_size]; + + // Check if we're starting a new repeating sequence + let mut check_run = 1; + while start_check + check_run < value_count { + let next_check = &data[(start_check + check_run) * value_size..(start_check + check_run + 1) * value_size]; + if check_value == next_check { + check_run += 1; + } else { + break; + } + } + + if check_run >= self.config.min_run_length as usize { + // Found a repeating sequence, end literal run here + break; + } else { + literal_length += check_run; + } + } + + // Literal run + result.push(0u8); // Type: literal + result.extend_from_slice(&(literal_length as u16).to_le_bytes()); + for i in 0..literal_length { + let value = &data[(pos + i) * value_size..(pos + i + 1) * value_size]; + result.extend_from_slice(value); + } + run_length = literal_length; + } + + pos += run_length; + run_count += 1; + } + + // Update run count + result[run_count_pos..run_count_pos + 4].copy_from_slice(&run_count.to_le_bytes()); + + // Add size headers like other compression algorithms + let size = data.len() as u32; + let comlen = result.len() as u32; + + let mut final_result = Vec::with_capacity(4 + 4 + result.len()); + final_result.extend_from_slice(&size.to_le_bytes()); + final_result.extend_from_slice(&comlen.to_le_bytes()); + final_result.extend_from_slice(&result); + + Ok(final_result) + } + + fn compress_buf(&mut self, data: Buf) -> Result { + use crate::buffer::BufWrite; + use crate::vdev::Block; + use std::io::Write; + + let compressed_data = self.compress_val(data.as_ref())?; + + let size = data.as_ref().len() as u32; + let comlen = compressed_data.len() as u32; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes( + 4 + 4 + comlen, // total metadata and compressed payload + )); + + buf.write_all(&size.to_le_bytes())?; + buf.write_all(&comlen.to_le_bytes())?; + buf.write_all(&compressed_data)?; + + Ok(buf.into_buf()) + } +} + +impl DecompressionState for RleDecompression { + fn decompress_val(&mut self, data: &[u8]) -> Result { + if data.len() < 8 { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short").into()); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated").into()); + } + + let compressed = &data[8..8 + comp_len]; + + if compressed.len() < 5 { + return Ok(SlicedCowBytes::from(compressed.to_vec())); + } + + let mut pos = 0; + let value_size = compressed[pos] as usize; + pos += 1; + + let run_count = u32::from_le_bytes([compressed[pos], compressed[pos + 1], compressed[pos + 2], compressed[pos + 3]]) as usize; + pos += 4; + + let mut result = Vec::new(); + + for _ in 0..run_count { + if pos >= compressed.len() { break; } + + let run_type = compressed[pos]; + pos += 1; + + match run_type { + 0 => { + // Literal run + if pos + 2 > compressed.len() { break; } + let count = u16::from_le_bytes([compressed[pos], compressed[pos + 1]]) as usize; + pos += 2; + + if pos + count * value_size > compressed.len() { break; } + result.extend_from_slice(&compressed[pos..pos + count * value_size]); + pos += count * value_size; + } + 1 => { + // Repeated run + if pos + 4 > compressed.len() { break; } + let count = u32::from_le_bytes([compressed[pos], compressed[pos + 1], compressed[pos + 2], compressed[pos + 3]]) as usize; + pos += 4; + + if pos + value_size > compressed.len() { break; } + let value = &compressed[pos..pos + value_size]; + for _ in 0..count { + result.extend_from_slice(value); + } + pos += value_size; + } + _ => { + // Unknown run type + break; + } + } + } + + Ok(SlicedCowBytes::from(result)) + } + + fn decompress_buf(&mut self, data: Buf) -> Result { + use crate::buffer::BufWrite; + use crate::vdev::Block; + use std::io::Write; + + if data.len() < 8 { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short").into()); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated").into()); + } + + let compressed = &data[8..8 + comp_len]; + + let decompressed = self.decompress_val(compressed)?; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes(uncomp_size as u32)); + buf.write_all(decompressed.as_ref())?; + Ok(buf.into_buf()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_rle_for_val_compression() { + // Create test data with runs of repeated values + let mut data = Vec::new(); + let value1 = b"aaaaaaaa"; + let value2 = b"bbbbbbbb"; + let value3 = b"cccccccc"; + + // Pattern: 5 a's, 1 b, 3 c's, 1 a, 4 b's + for _ in 0..5 { data.extend_from_slice(value1); } + data.extend_from_slice(value2); + for _ in 0..3 { data.extend_from_slice(value3); } + data.extend_from_slice(value1); + for _ in 0..4 { data.extend_from_slice(value2); } + + let rle = Rle::default(); + let mut compressor = rle.create_compressor().unwrap(); + let compressed = compressor.compress_val(&data).unwrap(); + + let mut decompressor = Rle::new_decompression().unwrap(); + let decompressed = decompressor.decompress_val(&compressed).unwrap(); + + assert_eq!(data, decompressed.as_ref()); + println!("RLE val compression - Original: {}, Compressed: {}", data.len(), compressed.len()); + } + + #[test] + fn test_rle_for_buf_compression() { + // Create test data with runs of repeated values + let mut data = Vec::new(); + let value1 = b"testval1"; + let value2 = b"testval2"; + + // Pattern with good RLE compression potential + for _ in 0..10 { data.extend_from_slice(value1); } + for _ in 0..8 { data.extend_from_slice(value2); } + for _ in 0..6 { data.extend_from_slice(value1); } + + let buf = Buf::from_zero_padded(data.clone()); + let rle = Rle::default(); + + let mut compressor = rle.create_compressor().unwrap(); + let compressed_buf = compressor.compress_buf(buf.clone()).unwrap(); + + let mut decompressor = Rle::new_decompression().unwrap(); + let decompressed_buf = decompressor.decompress_buf(compressed_buf).unwrap(); + + assert_eq!(buf.as_ref(), decompressed_buf.as_ref()); + println!("RLE buf compression - Original: {}, Compressed: {}", buf.len(), decompressed_buf.len()); + } +} \ No newline at end of file diff --git a/betree/src/compression/snappy.rs b/betree/src/compression/snappy.rs new file mode 100644 index 000000000..f83fd53ee --- /dev/null +++ b/betree/src/compression/snappy.rs @@ -0,0 +1,237 @@ +//! Snappy compression implementation - very fast compression/decompression +//! Ideal for: General-purpose compression where speed is more important than compression ratio + +use super::{CompressionBuilder, CompressionState, DecompressionState, DecompressionTag, Result}; +use crate::{ + buffer::Buf, + cow_bytes::SlicedCowBytes, + size::StaticSize, +}; +use serde::{Deserialize, Serialize}; +use snap::raw::{Encoder, Decoder}; + +#[cfg(feature = "compression_metrics")] +use std::time::Instant; + +/// Snappy compression configuration +#[derive(Debug, Serialize, Deserialize, Clone, Copy)] +pub struct Snappy { + // No configuration needed for Snappy - it's designed to be fast with fixed algorithms +} + +impl Default for Snappy { + fn default() -> Self { + Self {} + } +} + +impl StaticSize for Snappy { + fn static_size() -> usize { + 0 // No configuration parameters + } +} + +/// Snappy compression state +#[derive(Debug)] +pub struct SnappyCompression; + +/// Snappy decompression state +#[derive(Debug)] +pub struct SnappyDecompression; + +impl CompressionBuilder for Snappy { + fn create_compressor(&self) -> Result> { + Ok(Box::new(SnappyCompression)) + } + + fn decompression_tag(&self) -> DecompressionTag { + DecompressionTag::Snappy + } +} + +impl Snappy { + /// Create a new Snappy decompression state + pub fn new_decompression() -> Result> { + Ok(Box::new(SnappyDecompression)) + } +} + +impl CompressionState for SnappyCompression { + fn compress_val(&mut self, data: &[u8]) -> Result> { + #[cfg(feature = "compression_metrics")] + let start_time = Instant::now(); + + let mut encoder = Encoder::new(); + let compressed_data = encoder.compress_vec(data) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("Snappy compression failed: {}", e)))?; + + let size = data.len() as u32; + let comlen = compressed_data.len() as u32; + + let mut result = Vec::with_capacity(4 + 4 + compressed_data.len()); + result.extend_from_slice(&size.to_le_bytes()); + result.extend_from_slice(&comlen.to_le_bytes()); + result.extend_from_slice(&compressed_data); + + #[cfg(feature = "compression_metrics")] + { + let compression_time = start_time.elapsed().as_nanos() as u64; + super::metrics::record_compression_metrics( + data.len(), + result.len(), + compression_time, + ); + } + + Ok(result) + } + + fn compress_buf(&mut self, data: Buf) -> Result { + use crate::buffer::BufWrite; + use crate::vdev::Block; + use std::io::Write; + + #[cfg(feature = "compression_metrics")] + let start_time = Instant::now(); + + let mut encoder = Encoder::new(); + let compressed_data = encoder.compress_vec(data.as_ref()) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("Snappy compression failed: {}", e)))?; + + let size = data.as_ref().len() as u32; + let comlen = compressed_data.len() as u32; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes( + 4 + 4 + comlen, // total metadata and compressed payload + )); + + buf.write_all(&size.to_le_bytes())?; + buf.write_all(&comlen.to_le_bytes())?; + buf.write_all(&compressed_data)?; + + #[cfg(feature = "compression_metrics")] + { + let compression_time = start_time.elapsed().as_nanos() as u64; + super::metrics::record_compression_metrics( + data.len(), + 4 + 4 + compressed_data.len(), + compression_time, + ); + } + + Ok(buf.into_buf()) + } +} + +impl DecompressionState for SnappyDecompression { + fn decompress_val(&mut self, data: &[u8]) -> Result { + if data.len() < 8 { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short").into()); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated").into()); + } + + let compressed = &data[8..8 + comp_len]; + + #[cfg(feature = "compression_metrics")] + let start_time = Instant::now(); + + let mut decoder = Decoder::new(); + let decompressed = decoder.decompress_vec(compressed) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("Snappy decompression failed: {}", e)))?; + + #[cfg(feature = "compression_metrics")] + { + let decompression_time = start_time.elapsed().as_nanos() as u64; + super::metrics::record_decompression_metrics( + data.len(), + decompressed.len(), + decompression_time, + ); + } + + Ok(SlicedCowBytes::from(decompressed)) + } + + fn decompress_buf(&mut self, data: Buf) -> Result { + use crate::buffer::BufWrite; + use crate::vdev::Block; + use std::io::Write; + + if data.len() < 8 { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short").into()); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated").into()); + } + + let compressed = &data[8..8 + comp_len]; + + #[cfg(feature = "compression_metrics")] + let start_time = Instant::now(); + + let mut decoder = Decoder::new(); + let decompressed = decoder.decompress_vec(compressed) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("Snappy decompression failed: {}", e)))?; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes(uncomp_size as u32)); + buf.write_all(&decompressed)?; + + #[cfg(feature = "compression_metrics")] + { + let decompression_time = start_time.elapsed().as_nanos() as u64; + super::metrics::record_decompression_metrics( + data.len(), + decompressed.len(), + decompression_time, + ); + } + + Ok(buf.into_buf()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_snappy_for_val_compression() { + let data = b"Hello, world! This is a test of Snappy compression.".repeat(10); + let snappy = Snappy::default(); + + let mut compressor = snappy.create_compressor().unwrap(); + let compressed = compressor.compress_val(&data).unwrap(); + + let mut decompressor = Snappy::new_decompression().unwrap(); + let decompressed = decompressor.decompress_val(&compressed).unwrap(); + + assert_eq!(data, decompressed.as_ref()); + println!("Snappy val compression - Original: {}, Compressed: {}", data.len(), compressed.len()); + } + + #[test] + fn test_snappy_for_buf_compression() { + let data = b"Test data with some repeated patterns for Snappy compression.".repeat(20); + let buf = Buf::from_zero_padded(data.clone()); + let snappy = Snappy::default(); + + let mut compressor = snappy.create_compressor().unwrap(); + let compressed_buf = compressor.compress_buf(buf.clone()).unwrap(); + + let mut decompressor = Snappy::new_decompression().unwrap(); + let decompressed_buf = decompressor.decompress_buf(compressed_buf).unwrap(); + + assert_eq!(buf.as_ref(), decompressed_buf.as_ref()); + println!("Snappy buf compression - Original: {}, Compressed: {}", buf.len(), decompressed_buf.len()); + } +} \ No newline at end of file diff --git a/betree/src/compression/toast.rs b/betree/src/compression/toast.rs new file mode 100644 index 000000000..e660d904b --- /dev/null +++ b/betree/src/compression/toast.rs @@ -0,0 +1,340 @@ +//! TOAST (The Oversized-Attribute Storage Technique) with pglz compression +//! Ideal for: Large text/binary objects, similar to PostgreSQL's approach + +use super::{CompressionBuilder, CompressionState, DecompressionState, DecompressionTag, Result}; +use crate::{ + buffer::Buf, + cow_bytes::SlicedCowBytes, + size::StaticSize, +}; +use serde::{Deserialize, Serialize}; + +/// TOAST compression configuration +#[derive(Debug, Serialize, Deserialize, Clone, Copy)] +pub struct Toast { + /// Minimum size threshold to attempt compression + pub min_compress_size: u32, + /// Maximum compression ratio to accept (if worse, store uncompressed) + pub max_ratio_percent: u8, +} + +impl Default for Toast { + fn default() -> Self { + Self { + min_compress_size: 32, // Don't compress very small objects + max_ratio_percent: 95, // Must achieve at least 5% compression + } + } +} + +impl StaticSize for Toast { + fn static_size() -> usize { + std::mem::size_of::() + } +} + +/// TOAST compression state +#[derive(Debug)] +pub struct ToastCompression { + config: Toast, +} + +/// TOAST decompression state +#[derive(Debug)] +pub struct ToastDecompression; + +impl CompressionBuilder for Toast { + fn create_compressor(&self) -> Result> { + Ok(Box::new(ToastCompression { config: *self })) + } + + fn decompression_tag(&self) -> DecompressionTag { + DecompressionTag::Toast + } +} + +impl Toast { + /// Create a new TOAST decompression state + pub fn new_decompression() -> Result> { + Ok(Box::new(ToastDecompression)) + } +} + +/// TOAST format: +/// [compressed: u8][original_size: u32][data...] +/// compressed=0: data is uncompressed +/// compressed=1: data is compressed with simplified LZ-style compression +impl CompressionState for ToastCompression { + fn compress_val(&mut self, data: &[u8]) -> Result> { + let toast_result = if data.len() < self.config.min_compress_size as usize { + // Too small to compress + let mut result = Vec::new(); + result.push(0u8); // Not compressed + result.extend_from_slice(&(data.len() as u32).to_le_bytes()); + result.extend_from_slice(data); + result + } else { + // Try simplified LZ-style compression (similar to pglz approach) + let compressed = pglz_compress(data); + + let compression_ratio = (compressed.len() * 100) / data.len(); + + if compression_ratio >= self.config.max_ratio_percent as usize { + // Compression not worthwhile + let mut result = Vec::new(); + result.push(0u8); // Not compressed + result.extend_from_slice(&(data.len() as u32).to_le_bytes()); + result.extend_from_slice(data); + result + } else { + // Use compressed version + let mut result = Vec::new(); + result.push(1u8); // Compressed + result.extend_from_slice(&(data.len() as u32).to_le_bytes()); + result.extend_from_slice(&compressed); + result + } + }; + + // Add size headers like other compression algorithms + let size = data.len() as u32; + let comlen = toast_result.len() as u32; + + let mut final_result = Vec::with_capacity(4 + 4 + toast_result.len()); + final_result.extend_from_slice(&size.to_le_bytes()); + final_result.extend_from_slice(&comlen.to_le_bytes()); + final_result.extend_from_slice(&toast_result); + + Ok(final_result) + } + + fn compress_buf(&mut self, data: Buf) -> Result { + use crate::buffer::BufWrite; + use crate::vdev::Block; + use std::io::Write; + + let compressed_data = self.compress_val(data.as_ref())?; + + let size = data.as_ref().len() as u32; + let comlen = compressed_data.len() as u32; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes( + 4 + 4 + comlen, // total metadata and compressed payload + )); + + buf.write_all(&size.to_le_bytes())?; + buf.write_all(&comlen.to_le_bytes())?; + buf.write_all(&compressed_data)?; + + Ok(buf.into_buf()) + } +} + +impl DecompressionState for ToastDecompression { + fn decompress_val(&mut self, data: &[u8]) -> Result { + if data.len() < 8 { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short").into()); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated").into()); + } + + let toast_data = &data[8..8 + comp_len]; + + if toast_data.len() < 5 { + return Ok(SlicedCowBytes::from(toast_data.to_vec())); + } + + let mut pos = 0; + let compressed = toast_data[pos] != 0; + pos += 1; + + let original_size = u32::from_le_bytes([toast_data[pos], toast_data[pos + 1], toast_data[pos + 2], toast_data[pos + 3]]) as usize; + pos += 4; + + if compressed { + let decompressed = pglz_decompress(&toast_data[pos..], original_size)?; + Ok(SlicedCowBytes::from(decompressed)) + } else { + Ok(SlicedCowBytes::from(toast_data[pos..].to_vec())) + } + } + + fn decompress_buf(&mut self, data: Buf) -> Result { + use crate::buffer::BufWrite; + use crate::vdev::Block; + use std::io::Write; + + if data.len() < 8 { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short").into()); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + return Err(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated").into()); + } + + let compressed = &data[8..8 + comp_len]; + + let decompressed = self.decompress_val(compressed)?; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes(uncomp_size as u32)); + buf.write_all(decompressed.as_ref())?; + Ok(buf.into_buf()) + } +} + +/// Simplified LZ-style compression (inspired by PostgreSQL's pglz) +/// This is a basic implementation - real pglz is more sophisticated +fn pglz_compress(data: &[u8]) -> Vec { + let mut result = Vec::new(); + let mut pos = 0; + + while pos < data.len() { + // Look for matches in previous data (simplified sliding window) + let window_start = if pos >= 1024 { pos - 1024 } else { 0 }; + let mut best_match_len = 0; + let mut best_match_dist = 0; + + // Find longest match + for start in window_start..pos { + let mut match_len = 0; + while pos + match_len < data.len() + && start + match_len < pos + && data[start + match_len] == data[pos + match_len] + && match_len < 255 { + match_len += 1; + } + + if match_len > best_match_len && match_len >= 3 { + best_match_len = match_len; + best_match_dist = pos - start; + } + } + + if best_match_len >= 3 { + // Encode match: [flag=1][distance:u16][length:u8] + result.push(1u8); + result.extend_from_slice(&(best_match_dist as u16).to_le_bytes()); + result.push(best_match_len as u8); + pos += best_match_len; + } else { + // Encode literal: [flag=0][byte] + result.push(0u8); + result.push(data[pos]); + pos += 1; + } + } + + result +} + +/// Decompress pglz-style data +fn pglz_decompress(data: &[u8], expected_size: usize) -> Result> { + let mut result = Vec::with_capacity(expected_size); + let mut pos = 0; + + while pos < data.len() && result.len() < expected_size { + if pos >= data.len() { break; } + + let flag = data[pos]; + pos += 1; + + if flag == 0 { + // Literal byte + if pos >= data.len() { break; } + result.push(data[pos]); + pos += 1; + } else { + // Match + if pos + 2 >= data.len() { break; } + let distance = u16::from_le_bytes([data[pos], data[pos + 1]]) as usize; + pos += 2; + let length = data[pos] as usize; + pos += 1; + + if distance > result.len() { break; } + + let start = result.len() - distance; + for i in 0..length { + if result.len() >= expected_size { break; } + let byte = result[start + i]; + result.push(byte); + } + } + } + + Ok(result) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_toast_for_val_compression() { + // Create test data with repeated patterns (good for LZ-style compression) + let pattern = b"This is a test pattern that repeats. "; + let mut data = Vec::new(); + for _ in 0..50 { + data.extend_from_slice(pattern); + } + + let toast = Toast::default(); + let mut compressor = toast.create_compressor().unwrap(); + let compressed = compressor.compress_val(&data).unwrap(); + + let mut decompressor = Toast::new_decompression().unwrap(); + let decompressed = decompressor.decompress_val(&compressed).unwrap(); + + assert_eq!(data, decompressed.as_ref()); + println!("Toast val compression - Original: {}, Compressed: {}", data.len(), compressed.len()); + } + + #[test] + fn test_toast_for_buf_compression() { + // Create test data with repeated patterns + let pattern = b"TOAST compression test pattern repeats here. "; + let mut data = Vec::new(); + for _ in 0..30 { + data.extend_from_slice(pattern); + } + + let buf = Buf::from_zero_padded(data.clone()); + let toast = Toast::default(); + + let mut compressor = toast.create_compressor().unwrap(); + let compressed_buf = compressor.compress_buf(buf.clone()).unwrap(); + + let mut decompressor = Toast::new_decompression().unwrap(); + let decompressed_buf = decompressor.decompress_buf(compressed_buf).unwrap(); + + assert_eq!(buf.as_ref(), decompressed_buf.as_ref()); + println!("Toast buf compression - Original: {}, Compressed: {}", buf.len(), decompressed_buf.len()); + } + + #[test] + fn test_pglz_round_trip() { + let data = b"Hello world! This is a test of LZ compression. Hello world! Repeat test."; + let compressed = pglz_compress(data); + let decompressed = pglz_decompress(&compressed, data.len()).unwrap(); + assert_eq!(data, decompressed.as_slice()); + } + + #[test] + fn test_small_data_no_compression() { + let small_data = b"small"; + let toast = Toast::default(); + let mut compressor = toast.create_compressor().unwrap(); + let result = compressor.compress_val(small_data).unwrap(); + + // Should be stored uncompressed + assert_eq!(result[0], 0u8); // Not compressed flag + } +} \ No newline at end of file diff --git a/betree/src/compression/zstd.rs b/betree/src/compression/zstd.rs index 23459ef7f..6ad8bc99a 100644 --- a/betree/src/compression/zstd.rs +++ b/betree/src/compression/zstd.rs @@ -6,11 +6,12 @@ use crate::{ }; use serde::{Deserialize, Serialize}; use std::{io::Write, mem}; -use zstd::stream::raw::{CParameter, DParameter, Decoder, Encoder}; -use zstd_safe::{FrameFormat, WriteBuf}; - +use std::io::{self, Read}; +use zstd::stream::raw::{DParameter, Decoder}; +use zstd_safe::FrameFormat; +use zstd::block; // TODO: investigate pre-created dictionary payoff - +use crate::cow_bytes::SlicedCowBytes; /// Zstd compression. () #[derive(Debug, Serialize, Deserialize, Clone, Copy)] pub struct Zstd { @@ -18,9 +19,8 @@ pub struct Zstd { /// compression ratio and compression speed. pub level: u8, } - struct ZstdCompression { - writer: Encoder<'static>, + level: u8, } struct ZstdDecompression { writer: Decoder<'static>, @@ -35,17 +35,9 @@ impl StaticSize for Zstd { use zstd::stream::raw::Operation; impl CompressionBuilder for Zstd { - fn new_compression(&self) -> Result> { - // "The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), - // which is currently 22." - let mut encoder = Encoder::new(self.level as i32)?; - - // Compression format is stored externally, don't need to duplicate it - encoder.set_parameter(CParameter::Format(FrameFormat::Magicless))?; - // // Integrity is handled at a different layer - encoder.set_parameter(CParameter::ChecksumFlag(false))?; - - Ok(Box::new(ZstdCompression { writer: encoder })) + fn create_compressor(&self) -> Result> { + // No need to create encoder here - block compression is more efficient + Ok(Box::new(ZstdCompression { level: self.level })) } fn decompression_tag(&self) -> DecompressionTag { @@ -64,76 +56,212 @@ impl Zstd { } } +impl io::Write for ZstdCompression { + fn write(&mut self, buf: &[u8]) -> io::Result { + unimplemented!() + } + + fn write_all(&mut self, buf: &[u8]) -> io::Result<()> { + unimplemented!() + } + + fn flush(&mut self) -> io::Result<()> { + unimplemented!() + } +} + +use std::time::Instant; use speedy::{Readable, Writable}; const DATA_OFF: usize = mem::size_of::(); -impl CompressionState for ZstdCompression { - fn finish(&mut self, data: Buf) -> Result { - let size = zstd_safe::compress_bound(data.as_ref().len()); - let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes(size as u32)); - buf.write_all(&[0u8; DATA_OFF])?; - - let mut input = zstd::stream::raw::InBuffer::around(&data); - let mut output = zstd::stream::raw::OutBuffer::around_pos(&mut buf, DATA_OFF); - let mut finished_frame; - loop { - let remaining = self.writer.run(&mut input, &mut output)?; - finished_frame = remaining == 0; - if input.pos() > 0 || data.is_empty() { - break; - } +impl CompressionState for ZstdCompression { + fn compress_buf(&mut self, data: Buf) -> Result { + //println!("compress_buf {} bytes with Zstd at level {}", data.as_ref().len(), self.level); + let input_size = data.as_ref().len(); + + #[cfg(feature = "compression_metrics")] + let start = std::time::Instant::now(); + + let compressed_data = block::compress(&data, self.level as i32) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)))?; + + let size = data.as_ref().len() as u32; + let comlen = compressed_data.len() as u32; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes( + 4 + 4 + comlen, // total metadata and compressed payload + )); + + buf.write_all(&size.to_le_bytes())?; + buf.write_all(&comlen.to_le_bytes())?; + buf.write_all(&compressed_data)?; + + let result = buf.into_buf(); + + #[cfg(feature = "compression_metrics")] + { + let duration = start.elapsed().as_nanos() as u64; + super::metrics::record_compression_metrics(input_size, compressed_data.len(), duration); } - while self.writer.flush(&mut output)? > 0 {} - self.writer.finish(&mut output, finished_frame)?; + Ok(result) + } + + fn compress_val(&mut self, data: &[u8]) -> Result> { + //println!("compress_val {} bytes with Zstd at level {}", data.len(), self.level); + let input_size = data.len(); + + #[cfg(feature = "compression_metrics")] + let start = std::time::Instant::now(); + + let compressed_data = block::compress(data, self.level as i32) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("Compression error: {:?}", e)))?; + + let size = data.len() as u32; + let comlen = compressed_data.len() as u32; - let og_len = data.len() as u32; - og_len - .write_to_buffer(&mut buf.as_mut()[..DATA_OFF]) - .unwrap(); - Ok(buf.into_buf()) + let mut result = Vec::with_capacity(4 + 4 + compressed_data.len()); + result.extend_from_slice(&size.to_le_bytes()); + result.extend_from_slice(&comlen.to_le_bytes()); + result.extend_from_slice(&compressed_data); + + #[cfg(feature = "compression_metrics")] + { + let duration = start.elapsed().as_nanos() as u64; + super::metrics::record_compression_metrics(input_size, compressed_data.len(), duration); + } + + Ok(result) } } impl DecompressionState for ZstdDecompression { - fn decompress(&mut self, data: Buf) -> Result { - let size = u32::read_from_buffer(data.as_ref()).unwrap(); - let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes(size)); - - let mut input = zstd::stream::raw::InBuffer::around(&data[DATA_OFF..]); - let mut output = zstd::stream::raw::OutBuffer::around(&mut buf); - - let mut finished_frame; - loop { - let remaining = self.writer.run(&mut input, &mut output)?; - finished_frame = remaining == 0; - if remaining > 0 { - if output.dst.capacity() == output.dst.as_ref().len() { - // append faux byte to extend in case that original was - // wrong for some reason (this should not happen but is a - // sanity guard) - output.dst.write(&[0])?; - } - continue; - } - if input.pos() > 0 || data.is_empty() { - break; - } + fn decompress_val(&mut self, data: &[u8]) -> Result + { + //println!("decompress_val {} bytes with Zstd", data.len()); + let input_size = data.len(); + + if data.len() < 8 { + bail!(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short")); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + bail!(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated")); } - while self.writer.flush(&mut output)? > 0 {} - self.writer.finish(&mut output, finished_frame)?; + let compressed = &data[8..8 + comp_len]; - Ok(buf.into_buf()) + #[cfg(feature = "compression_metrics")] + let start = std::time::Instant::now(); + + let uncompressed_data = block::decompress(compressed, uncomp_size) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("Decompression error: {:?}", e)))?; + + let result = SlicedCowBytes::from(uncompressed_data); + + #[cfg(feature = "compression_metrics")] + { + let duration = start.elapsed().as_nanos() as u64; + super::metrics::record_decompression_metrics(input_size, result.len(), duration); + } + + Ok(result) + } + + fn decompress_buf(&mut self, data: Buf) -> Result { + //println!("decompress_buf {} bytes with Zstd", data.len()); + let input_size = data.len(); + + if data.len() < 8 { + bail!(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Input too short")); + } + + let uncomp_size = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let comp_len = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + + if data.len() < 8 + comp_len { + bail!(std::io::Error::new(std::io::ErrorKind::UnexpectedEof, "Compressed payload truncated")); + } + + let compressed = &data[8..8 + comp_len]; + + #[cfg(feature = "compression_metrics")] + let start = std::time::Instant::now(); + + let uncompressed_data = block::decompress(compressed, uncomp_size) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)))?; + + let mut buf = BufWrite::with_capacity(Block::round_up_from_bytes(uncomp_size as u32)); + buf.write_all(&uncompressed_data)?; + let result = buf.into_buf(); + + #[cfg(feature = "compression_metrics")] + { + let duration = start.elapsed().as_nanos() as u64; + super::metrics::record_decompression_metrics(input_size, result.len(), duration); + } + + Ok(result) } } #[cfg(test)] mod tests { use rand::RngCore; - use super::*; + #[test] + fn test_zstd_for_val_compression() { + let data = b"Zstd compression test with repeated patterns for better compression ratio. ".repeat(30); + let zstd = Zstd { level: 6 }; + + let mut compressor = zstd.create_compressor().unwrap(); + let compressed = compressor.compress_val(&data).unwrap(); + + let mut decompressor = Zstd::new_decompression().unwrap(); + let decompressed = decompressor.decompress_val(&compressed).unwrap(); + + assert_eq!(data, decompressed.as_ref()); + println!("Zstd val compression - Original: {}, Compressed: {}", data.len(), compressed.len()); + } + + #[test] + fn test_zstd_for_buf_compression() { + let data = b"Zstd test with Buf interface and compressible content. ".repeat(25); + let buf = Buf::from_zero_padded(data.clone()); + let zstd = Zstd { level: 3 }; + + let mut compressor = zstd.create_compressor().unwrap(); + let compressed_buf = compressor.compress_buf(buf.clone()).unwrap(); + + let mut decompressor = Zstd::new_decompression().unwrap(); + let decompressed_buf = decompressor.decompress_buf(compressed_buf).unwrap(); + + assert_eq!(buf.as_ref(), decompressed_buf.as_ref()); + println!("Zstd buf compression - Original: {}, Compressed: {}", buf.len(), decompressed_buf.len()); + } + + #[test] + fn test_zstd_different_levels() { + let data = b"Testing different Zstd compression levels with this repeated text pattern. ".repeat(15); + + for level in [1, 6, 15] { + let zstd = Zstd { level }; + + let mut compressor = zstd.create_compressor().unwrap(); + let compressed = compressor.compress_val(&data).unwrap(); + + let mut decompressor = Zstd::new_decompression().unwrap(); + let decompressed = decompressor.decompress_val(&compressed).unwrap(); + + assert_eq!(data, decompressed.as_ref()); + println!("Zstd level {} - Original: {}, Compressed: {}", level, data.len(), compressed.len()); + } + } + #[test] fn encode_then_decode() { let mut rng = rand::thread_rng(); @@ -141,10 +269,10 @@ mod tests { rng.fill_bytes(buf.as_mut()); let buf = Buf::from_zero_padded(buf); let zstd = Zstd { level: 1 }; - let mut comp = zstd.new_compression().unwrap(); - let c_buf = comp.finish(buf.clone()).unwrap(); + let mut comp = zstd.create_compressor().unwrap(); + let c_buf = comp.compress_buf(buf.clone()).unwrap(); let mut decomp = zstd.decompression_tag().new_decompression().unwrap(); - let d_buf = decomp.decompress(c_buf).unwrap(); + let d_buf = decomp.decompress_buf(c_buf).unwrap(); assert_eq!(buf.as_ref().len(), d_buf.as_ref().len()); } diff --git a/betree/src/cow_bytes.rs b/betree/src/cow_bytes.rs index e532345bf..436b1f246 100644 --- a/betree/src/cow_bytes.rs +++ b/betree/src/cow_bytes.rs @@ -230,6 +230,8 @@ pub struct SlicedCowBytes { pub(super) enum ByteSource { Cow(CowBytes), Raw { ptr: *const u8, len: usize }, + #[cfg(feature = "memory_metrics")] + TrackedRaw { ptr: *const u8, len: usize, stats: std::sync::Arc, track_access: bool }, } impl Deref for ByteSource { @@ -241,6 +243,11 @@ impl Deref for ByteSource { ByteSource::Raw { ptr, len } => unsafe { std::slice::from_raw_parts(ptr.clone(), *len) }, + #[cfg(feature = "memory_metrics")] + ByteSource::TrackedRaw { ptr, len, stats: _, track_access: _} => unsafe { + // Note: We don't track memory access here anymore - it's tracked at the slice level + std::slice::from_raw_parts(ptr.clone(), *len) + }, } } } @@ -291,6 +298,8 @@ impl Size for SlicedCowBytes { match self.data { ByteSource::Cow(ref cow_bytes) => cow_bytes.cache_size(), ByteSource::Raw { .. } => std::mem::size_of::() + std::mem::size_of::(), + #[cfg(feature = "memory_metrics")] + ByteSource::TrackedRaw { .. } => std::mem::size_of::() + std::mem::size_of::() + std::mem::size_of::>() + std::mem::size_of::(), } } } @@ -328,6 +337,16 @@ impl SlicedCowBytes { (buf.as_mut_ptr() as *mut u8).copy_from(ptr, len); &buf }, + #[cfg(feature = "memory_metrics")] + ByteSource::TrackedRaw { ptr, len, .. } => unsafe { + //println!("2 DEBUG: into_raw called on TrackedRaw with len={}", len); + // FIXME: This copies data currently when the original buffer + // is from a raw source ot avoid breaking behavior from + // outside. + let mut buf = Vec::with_capacity(len); + (buf.as_mut_ptr() as *mut u8).copy_from(ptr, len); + &buf + }, } } @@ -339,12 +358,46 @@ impl SlicedCowBytes { } } + #[cfg(feature = "memory_metrics")] + pub(crate) unsafe fn from_tracked_raw( + ptr: *const u8, + len: usize, + stats: std::sync::Arc + ) -> Self { + Self { + data: ByteSource::TrackedRaw { ptr, len, stats, track_access: true }, + pos: 0, + len: len.try_into().expect("Capacity to large."), + } + } + + #[cfg(feature = "memory_metrics")] + pub(crate) unsafe fn from_tracked_raw_no_tracking( + ptr: *const u8, + len: usize, + stats: std::sync::Arc + ) -> Self { + Self { + data: ByteSource::TrackedRaw { ptr, len, stats, track_access: false }, + pos: 0, + len: len.try_into().expect("Capacity to large."), + } + } + pub(crate) fn into_cow_bytes(self) -> Result { match self.data { ByteSource::Cow(cow_bytes) if self.pos == 0 => Ok(cow_bytes), _ => Err(self), } } + + #[cfg(feature = "memory_metrics")] + pub(crate) fn get_stats(&self) -> Option> { + match &self.data { + ByteSource::TrackedRaw { stats, .. } => Some(stats.clone()), + _ => None, + } + } } impl From for SlicedCowBytes { @@ -357,11 +410,36 @@ impl From for SlicedCowBytes { } } +impl From> for SlicedCowBytes { + fn from(vec: Vec) -> Self { + SlicedCowBytes { + pos: 0, + len: vec.len() as u32, + data: ByteSource::Cow(CowBytes::from(vec)), + } + } +} + impl Deref for SlicedCowBytes { type Target = [u8]; fn deref(&self) -> &[u8] { let start = self.pos as usize; let end = start + self.len as usize; + + // Track memory access for TrackedRaw sources at the slice level + #[cfg(feature = "memory_metrics")] + if let ByteSource::TrackedRaw { stats, track_access, .. } = &self.data { + if *track_access { + //println!("DEBUG: Deref called on TrackedRaw with len={} (TRACKED)", self.len); + use std::sync::atomic::Ordering; + // Track memory access at the slice level + stats.memory_read.fetch_add(self.len as u64, Ordering::Relaxed); + stats.memory_read_count.fetch_add(1, Ordering::Relaxed); + } else { + //println!("DEBUG: Deref called on TrackedRaw with len={} (NOT TRACKED)", self.len); + } + } + &self.data[start..end] } } diff --git a/betree/src/data_management/dmu.rs b/betree/src/data_management/dmu.rs index 19a931c68..3f0052477 100644 --- a/betree/src/data_management/dmu.rs +++ b/betree/src/data_management/dmu.rs @@ -10,7 +10,7 @@ use crate::{ buffer::Buf, cache::{Cache, ChangeKeyError, RemoveError}, checksum::{Builder, Checksum, State}, - compression::CompressionBuilder, + compression::{CompressionBuilder, CompressionConfiguration}, data_management::{CopyOnWriteReason, IntegrityMode}, database::{DatasetId, Generation, Handler}, migration::DmlMsg, @@ -46,6 +46,7 @@ where SPL::Checksum: StaticSize, { default_compression: Box, + default_compression_config: CompressionConfiguration, // NOTE: Why was this included in the first place? Delayed Compression? Streaming Compression? // default_compression_state: C::CompressionState, default_storage_class: u8, @@ -77,6 +78,7 @@ where /// Returns a new `Dmu`. pub fn new( default_compression: Box, + default_compression_config: CompressionConfiguration, default_checksum_builder: ::Builder, default_storage_class: u8, pool: SPL, @@ -107,6 +109,7 @@ where Dmu { // default_compression_state: default_compression.new_compression().expect("Can't create compression state"), + default_compression_config, default_compression, default_storage_class, default_checksum_builder, @@ -170,6 +173,30 @@ where Ok(()) } + + /// Select appropriate compression based on storage kind + fn select_compression_for_storage_kind(&self, storage_kind: crate::tree::StorageKind) -> &dyn CompressionBuilder { + use crate::tree::StorageKind; + + // Apply storage-kind-aware compression strategy + match storage_kind { + StorageKind::Hdd => { + // HDD: Prioritize high compression ratios over speed to reduce I/O + debug!("Using HDD-optimized compression for storage_kind: {:?}", storage_kind); + &*self.default_compression + } + StorageKind::Memory => { + // Memory: Individual value compression (not applied at this level) + debug!("Using Memory-optimized compression for storage_kind: {:?}", storage_kind); + &*self.default_compression + } + StorageKind::Ssd => { + // SSD: Node-level compression with configured algorithm + debug!("Using SSD-optimized compression for storage_kind: {:?}", storage_kind); + &*self.default_compression + } + } + } } impl Dmu @@ -433,7 +460,7 @@ where } debug!("Estimated object size is {object_size} bytes"); - debug!("Using compression {:?}", &self.default_compression); + debug!("Default compression configured: {:?}", &self.default_compression); let generation = self.handler.current_generation(); // Use storage hints if available if let Some(pref) = self.storage_hints.lock().remove(&pivot_key) { @@ -444,27 +471,55 @@ where .preferred_class() .unwrap_or(self.default_storage_class); - let compression = &self.default_compression; - let (integrity_mode, compressed_data) = { - // FIXME: cache this - let mut state = compression.new_compression()?; - let mut buf = crate::buffer::BufWrite::with_capacity(Block::round_up_from_bytes( - object_size as u32, - )); - let integrity_mode = { - let pp = object.prepare_pack( - self.spl().storage_kind_map()[storage_class as usize], - &pivot_key, - )?; - let part = object.pack(&mut buf, pp, |bytes| { - let mut builder = self.default_checksum_builder.build(); - builder.ingest(bytes); - builder.finish() - })?; - drop(object); - part - }; - (integrity_mode, state.finish(buf.into_buf())?) + // Select compression based on storage kind + let storage_kind = self.spl().storage_kind_map()[storage_class as usize]; + let compression = self.select_compression_for_storage_kind(storage_kind); + let compression_enabled = self.default_compression_config.is_compression_enabled(); + + // Pack the object first + let mut buf = crate::buffer::BufWrite::with_capacity(Block::round_up_from_bytes( + object_size as u32, + )); + let integrity_mode = { + let mut pp = object.prepare_pack( + storage_kind, + &pivot_key, + )?; + + // For Memory mode, override with actual compression configuration only if compression is enabled + if matches!(storage_kind, crate::tree::StorageKind::Memory) && compression_enabled { + pp.compression = Some(self.default_compression_config.clone()); + } + + let part = object.pack(&mut buf, pp, |bytes| { + let mut builder = self.default_checksum_builder.build(); + builder.ingest(bytes); + builder.finish() + })?; + drop(object); + part + }; + + // Apply compression based on storage kind + let (compressed_data, actual_decompression_tag) = match storage_kind { + crate::tree::StorageKind::Memory => { + // For Memory mode, compression is handled at value level inside PackedChildBuffer + debug!("Memory mode: No block-level compression at DMU level"); + (buf.into_buf(), crate::compression::DecompressionTag::None) + } + crate::tree::StorageKind::Ssd | crate::tree::StorageKind::Hdd => { + // For SSD/HDD mode, apply block-level compression only if compression is enabled + if !compression_enabled { + debug!("SSD/HDD mode: No compression (compression disabled)"); + (buf.into_buf(), crate::compression::DecompressionTag::None) + } else { + debug!("SSD/HDD mode: Applying block-level compression"); + let uncompressed_data = buf.into_buf(); + let mut compression_state = compression.new_compression()?; + let compressed = compression_state.compress_buf(uncompressed_data)?; + (compressed, compression.decompression_tag()) + } + } }; assert!(compressed_data.len() <= u32::max_value() as usize); @@ -496,7 +551,7 @@ where offset, size, checksum, - decompression_tag: compression.decompression_tag(), + decompression_tag: actual_decompression_tag, generation, info, integrity_mode, @@ -1066,10 +1121,15 @@ where fn finish_prefetch(&self, p: Self::Prefetch) -> Result { let (ptr, compressed_data, pk) = block_on(p)?; let object: Node>> = { - let data = ptr - .decompression_tag() - .new_decompression()? - .decompress(compressed_data)?; + let data = if ptr.decompression_tag().is_decompression_needed() { + // Apply decompression + ptr.decompression_tag() + .new_decompression()? + .decompress(compressed_data)? + } else { + // No decompression needed - use data directly + compressed_data + }; Object::unpack_at(ptr.info(), data, ptr.integrity_mode.clone())? }; let key = ObjectKey::Unmodified { diff --git a/betree/src/data_management/mod.rs b/betree/src/data_management/mod.rs index da591826f..c775a6830 100644 --- a/betree/src/data_management/mod.rs +++ b/betree/src/data_management/mod.rs @@ -111,7 +111,17 @@ pub trait HasStoragePreference { /// Intermediary structure to prove that media constraints have been checked. /// This is more of a hack since i don't want to pull apart the trait. -pub struct PreparePack(); +/// Now carries compression configuration for storage-kind-aware compression. +pub struct PreparePack { + pub compression: Option, + pub storage_kind: crate::tree::StorageKind, +} + +impl PreparePack { + pub fn new(compression: Option, storage_kind: crate::tree::StorageKind) -> Self { + Self { compression, storage_kind } + } +} /// Which integrity mode is used by the nodes. Can be used to skip the /// processing of an entire node if it is not required to ensure integrity of diff --git a/betree/src/data_management/object_ptr.rs b/betree/src/data_management/object_ptr.rs index c805f99c6..fa0df2ac1 100644 --- a/betree/src/data_management/object_ptr.rs +++ b/betree/src/data_management/object_ptr.rs @@ -96,9 +96,17 @@ impl ObjectPointer { SPL: StoragePoolLayer, D: crate::size::StaticSize + crate::checksum::Checksum, { - let mut decompression_state = self.decompression_tag().new_decompression()?; let compressed_data = pool.read(self.size(), self.offset(), self.checksum.clone())?; - let data = decompression_state.decompress(compressed_data)?; + + // Bypass decompression entirely when no compression is used + let data = if self.decompression_tag().is_decompression_needed() { + let mut decompression_state = self.decompression_tag().new_decompression()?; + decompression_state.decompress(compressed_data)? + } else { + // No decompression needed - data is already uncompressed + compressed_data + }; + Ok(super::Object::unpack_at( self.info(), data, diff --git a/betree/src/database/mod.rs b/betree/src/database/mod.rs index b32788476..abb48a02a 100644 --- a/betree/src/database/mod.rs +++ b/betree/src/database/mod.rs @@ -242,6 +242,7 @@ impl DatabaseConfiguration { Dmu::new( self.compression.to_builder(), + self.compression.clone(), ::builder(), self.default_storage_class, spu, diff --git a/betree/src/storage_pool/configuration.rs b/betree/src/storage_pool/configuration.rs index b62986864..087d92071 100644 --- a/betree/src/storage_pool/configuration.rs +++ b/betree/src/storage_pool/configuration.rs @@ -316,6 +316,7 @@ impl LeafVdev { return Err(io::Error::last_os_error()); } + Ok(Leaf::File(vdev::File::new( file, path.to_string_lossy().into_owned(), @@ -327,6 +328,7 @@ impl LeafVdev { )?)), #[cfg(feature = "nvm")] LeafVdev::PMemFile { ref path, len } => { + let file = match pmdk::PMem::open(path) { Ok(handle) => handle, Err(open_err) => match pmdk::PMem::create(path, len) { diff --git a/betree/src/tree/imp/internal/packed_child_buffer.rs b/betree/src/tree/imp/internal/packed_child_buffer.rs index 64c78437a..d190e0e63 100644 --- a/betree/src/tree/imp/internal/packed_child_buffer.rs +++ b/betree/src/tree/imp/internal/packed_child_buffer.rs @@ -129,10 +129,12 @@ impl Default for PackedChildBuffer { pub const BUFFER_STATIC_SIZE: usize = HEADER; const IS_LEAF_HEADER: usize = 1; +const COMPRESSION_FLAG_SIZE: usize = 1; // 1 byte for compression flag const HEADER: usize = IS_LEAF_HEADER + std::mem::size_of::() + std::mem::size_of::() - + std::mem::size_of::(); + + std::mem::size_of::() + + COMPRESSION_FLAG_SIZE; const KEY_IDX_SIZE: usize = std::mem::size_of::() + std::mem::size_of::() + std::mem::size_of::(); const PER_KEY_BYTES: usize = 16; @@ -142,6 +144,7 @@ pub(in crate::tree::imp) enum Map { Packed { entry_count: usize, data: SlicedCowBytes, + compression_type: u8, // 0 = none, 1 = zstd, 2 = lz4, etc. }, Unpacked(BTreeMap), } @@ -169,7 +172,7 @@ impl Map { &mut self, ) -> WithCacheSizeChange<&mut BTreeMap> { match self { - Map::Packed { entry_count, data } => { + Map::Packed { entry_count, data, compression_type } => { // NOTE: copy data before to avoid sync epoch shenanigans // necesary as we might rewrite the original memory region once here let mut keys: Vec = Vec::with_capacity(*entry_count); @@ -208,10 +211,24 @@ impl Map { key_info.into_iter().zip(values_pos.into_iter().map( move |(pos, len, csum)| { // NOTE: copies data to not be invalidated later on rewrites... could be solved differently - let buf = CowBytes::from(&data[pos as usize..(pos + len) as usize]) + let compressed_buf = CowBytes::from(&data[pos as usize..(pos + len) as usize]) .slice_from(0); - csum.verify(&buf).unwrap(); - buf + csum.verify(&compressed_buf).unwrap(); + + // Decompress if needed + if *compression_type == 0 { + // No compression + compressed_buf + } else { + // Decompress the value using centralized mapping + let decompression_tag = crate::compression::CompressionConfiguration::decompression_tag_from_id(*compression_type); + + let mut decompressor = decompression_tag.new_decompression() + .expect("Failed to create decompressor"); + let decompressed = decompressor.decompress_val(compressed_buf.as_ref()) + .expect("Failed to decompress value"); + decompressed + } }, )), ))); @@ -266,7 +283,7 @@ impl Map { /// the general checksum of the node. pub fn len_bytes_contained_in_checksum(&self) -> usize { match self { - Map::Packed { entry_count, data } => { + Map::Packed { entry_count, data, .. } => { if *entry_count < 1 { return HEADER; } @@ -292,15 +309,42 @@ impl Map { pub fn get(&self, key: &[u8]) -> Option<(KeyInfo, SlicedCowBytes)> { match self { - Map::Packed { data, .. } => self.find(key).map(|(pref, pos, len, csum)| { - let buf = unsafe { SlicedCowBytes::from_raw(data.as_ptr().add(pos), len) }; + Map::Packed { data, compression_type, .. } => self.find(key).map(|(pref, pos, len, csum)| { + //println!("1. DEBUG: PackedChildBuffer::get - key={} pos={} len={} csum={:?}", String::from_utf8_lossy(key), pos, len, csum); + let compressed_buf = unsafe { + #[cfg(feature = "memory_metrics")] + if let Some(stats) = data.get_stats() { + SlicedCowBytes::from_tracked_raw(data.as_ptr().add(pos), len, stats) + } else { + SlicedCowBytes::from_raw(data.as_ptr().add(pos), len) + } + #[cfg(not(feature = "memory_metrics"))] + SlicedCowBytes::from_raw(data.as_ptr().add(pos), len) + }; + //println!("2. DEBUG: PackedChildBuffer::get - key={} pos={} len={} csum={:?}", String::from_utf8_lossy(key), pos, len, csum); + // TODO: Pass on result - csum.verify(&buf).unwrap(); + csum.verify(&compressed_buf).unwrap(); + + // Decompress if needed + let decompressed_buf = if *compression_type == 0 { + // No compression + compressed_buf.slice_from(0) + } else { + // Decompress the value using centralized mapping + let decompression_tag = crate::compression::CompressionConfiguration::decompression_tag_from_id(*compression_type); + + let mut decompressor = decompression_tag.new_decompression() + .expect("Failed to create decompressor"); + decompressor.decompress_val(compressed_buf.as_ref()) + .expect("Failed to decompress value") + }; + ( KeyInfo { storage_preference: StoragePreference::from_u8(pref), }, - buf.slice_from(0), + decompressed_buf, ) }), // TODO: This should be a cheap copy (a few bytes for the pref and @@ -312,7 +356,7 @@ impl Map { // Return the preference and location of the value within the boxed value. fn find(&self, key: &[u8]) -> Option<(u8, usize, usize, Checksum)> { match self { - Map::Packed { entry_count, data } => { + Map::Packed { entry_count, data, .. } => { // Perform binary search let mut left = 0 as isize; let mut right = (*entry_count as isize) - 1; @@ -521,6 +565,7 @@ pub struct PackedBufferIterator<'a> { cur: usize, entry_count: usize, keys: Vec, + compression_type: u8, } impl<'a> Iterator for PackedBufferIterator<'a> { @@ -536,7 +581,22 @@ impl<'a> Iterator for PackedBufferIterator<'a> { let vpos_off = (kpos.pos + kpos.len) as usize; let vpos = u32::from_le_bytes(self.buffer.cut(vpos_off, 4).try_into().unwrap()); let vlen = u32::from_le_bytes(self.buffer.cut(vpos_off + 4, 4).try_into().unwrap()); - let val = self.buffer.clone().subslice(vpos, vlen); + let compressed_val = self.buffer.clone().subslice(vpos, vlen); + + // Decompress if needed + let decompressed_val = if self.compression_type == 0 { + // No compression + compressed_val + } else { + // Decompress the value using centralized mapping + let decompression_tag = crate::compression::CompressionConfiguration::decompression_tag_from_id(self.compression_type); + + let mut decompressor = decompression_tag.new_decompression() + .expect("Failed to create decompressor"); + decompressor.decompress_val(compressed_val.as_ref()) + .expect("Failed to decompress value") + }; + self.cur += 1; Some(( self.buffer.cut(kpos.pos as usize, kpos.len as usize), @@ -544,7 +604,7 @@ impl<'a> Iterator for PackedBufferIterator<'a> { KeyInfo { storage_preference: StoragePreference::from_u8(kpos.pref), }, - val, + decompressed_val, ), )) } @@ -561,6 +621,8 @@ impl<'a> Iter<'a> { Map::Packed { entry_count, ref data, + compression_type, + .. } => Iter::Packed(PackedBufferIterator { keys: (0..entry_count) .map(|idx| { @@ -574,6 +636,7 @@ impl<'a> Iter<'a> { buffer: data, cur: 0, entry_count, + compression_type, }), Map::Unpacked(ref btree) => Iter::Unpacked(btree.iter()), } @@ -807,9 +870,25 @@ impl PackedChildBuffer { /// ] /// pub fn pack( + &self, + w: W, + csum_builder: F, + ) -> Result, std::io::Error> + where + W: std::io::Write, + F: Fn(&[u8]) -> C, + C: ChecksumTrait, + { + // Default pack method - no compression + self.pack_with_compression(w, csum_builder, None, crate::tree::StorageKind::Ssd) + } + + pub fn pack_with_compression( &self, mut w: W, csum_builder: F, + compression: Option, + storage_kind: crate::tree::StorageKind, ) -> Result, std::io::Error> where W: std::io::Write, @@ -844,6 +923,18 @@ impl PackedChildBuffer { .as_u8() .to_le_bytes(), )?; + + // Determine if we should use value-level compression + let use_value_compression = matches!(storage_kind, crate::tree::StorageKind::Memory) + && compression.as_ref().map_or(false, |c| c.is_compression_enabled()); + + // Write compression type using centralized mapping + let compression_type = if use_value_compression { + compression.as_ref().unwrap().compression_type_id() + } else { + 0u8 + }; + tmp.write_all(&[compression_type])?; let mut free_after = HEADER + self.buffer.len() * KEY_IDX_SIZE; for (key, (info, _)) in self.buffer.assert_unpacked().iter() { @@ -856,20 +947,49 @@ impl PackedChildBuffer { + std::mem::size_of::() + Checksum::static_size() } + + // Prepare compressed values for Memory mode + let mut compressed_values = Vec::new(); + for (key, (_, val)) in self.buffer.assert_unpacked().iter() { tmp.write_all(&key)?; - let checksum = csum_builder(val); - // TODO: maybe size in unpacking this + // For Memory mode: compress individual values using compress_val + let (final_val, actual_len) = if use_value_compression { + let compression_config = compression.as_ref().unwrap(); + let mut compression_state = compression_config.create_compressor() + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)))?; + let compressed_bytes = compression_state.compress_val(val.as_ref()) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, format!("{:?}", e)))?; + let len = compressed_bytes.len(); + + compressed_values.push(compressed_bytes); + (compressed_values.last().unwrap().as_slice(), len) + } else { + (val.as_ref(), val.len()) + }; + + // Calculate checksum on the final data (compressed for Memory, uncompressed for others) + let checksum = csum_builder(final_val); tmp.write_all(&(free_after as u32).to_le_bytes())?; - tmp.write_all(&(val.len() as u32).to_le_bytes())?; + tmp.write_all(&(actual_len as u32).to_le_bytes())?; bincode::serialize_into(&mut tmp, &checksum).unwrap(); - free_after += val.len(); + free_after += actual_len; } + let head_csum = csum_builder(&tmp); + w.write_all(&tmp)?; - for (_, (_, val)) in self.buffer.assert_unpacked().iter() { - w.write_all(&val)?; + + // Write values (compressed for Memory mode, uncompressed for others) + if use_value_compression { + for compressed_val in &compressed_values { + w.write_all(compressed_val)?; + } + } else { + for (_, (_, val)) in self.buffer.assert_unpacked().iter() { + w.write_all(&val)?; + } } Ok(IntegrityMode::Internal { @@ -897,9 +1017,11 @@ impl PackedChildBuffer { .try_into() .unwrap(), ); + let compression_flag = buf[IS_LEAF_HEADER + 9]; let buffer = Map::Packed { entry_count, data: buf.clone(), + compression_type: compression_flag, }; csum.checksum() .unwrap() diff --git a/betree/src/tree/imp/node.rs b/betree/src/tree/imp/node.rs index 72935572e..99a8d7561 100644 --- a/betree/src/tree/imp/node.rs +++ b/betree/src/tree/imp/node.rs @@ -175,13 +175,13 @@ impl Object for Node< fn pack C, C: Checksum>( &self, mut writer: W, - _: PreparePack, + prepare_pack: PreparePack, csum_builder: F, ) -> Result, io::Error> { match self.0 { MemLeaf(ref leaf) => { writer.write_all((NodeInnerType::CopylessLeaf as u32).to_be_bytes().as_ref())?; - leaf.pack(writer, csum_builder) + leaf.pack_with_compression(writer, csum_builder, prepare_pack.compression, prepare_pack.storage_kind) } CopylessInternal(ref cpl_internal) => { writer.write_all( @@ -241,40 +241,24 @@ impl Object for Node< fn prepare_pack( &mut self, - _storage_kind: StorageKind, + storage_kind: StorageKind, _pivot_key: &PivotKey, ) -> Result where R: ObjectReference, { - // NOTE: Only necessary transitions are represented here, all others are no-op. Can be improved. - // self.0 = match ( - // std::mem::replace(&mut self.0, unsafe { std::mem::zeroed() }), - // storage_kind, - // ) { - // // (Internal(internal), StorageKind::Memory) | (Internal(internal), StorageKind::Ssd) => { - // // // Spawn new child buffers from one internal node. - // // Inner::DisjointInternal(internal.to_disjoint_node(|new_cbuf| { - // // dmu.insert( - // // Node(Inner::ChildBuffer(new_cbuf)), - // // pivot_key.d_id(), - // // pivot_key.clone(), - // // ) - // // })) - // // } - // (CopylessInternal(_internal), StorageKind::Hdd) => { - // // Fetch children and pipe them into one node. - // unimplemented!(); - // // let mut cbufs = Vec::with_capacity(internal.children.len()); - // // Inner::Internal(InternalNode::from_disjoint_node(internal, cbufs)) - // } - // (Leaf(leaf), StorageKind::Memory) => Inner::MemLeaf(leaf.to_memory_leaf()), - // (MemLeaf(leaf), StorageKind::Ssd) | (MemLeaf(leaf), StorageKind::Hdd) => { - // Inner::Leaf(leaf.to_block_leaf()) - // } - // (default, _) => default, - // }; - Ok(PreparePack()) + // For Memory mode, we need compression at the value level + // Note: The actual compression will be passed from DMU + let compression = match storage_kind { + StorageKind::Memory => { + // Placeholder - will be replaced by DMU with actual compression + use crate::compression::CompressionConfiguration; + Some(CompressionConfiguration::None) + } + _ => None, // SSD/HDD mode compression is handled at DMU level + }; + + Ok(crate::data_management::PreparePack::new(compression, storage_kind)) } } diff --git a/betree/src/vdev/mem.rs b/betree/src/vdev/mem.rs index e56f93365..28adeac44 100644 --- a/betree/src/vdev/mem.rs +++ b/betree/src/vdev/mem.rs @@ -8,7 +8,7 @@ use parking_lot::RwLock; use std::{ io, ops::{Deref, DerefMut}, - sync::atomic::Ordering, + sync::{atomic::Ordering, Arc}, }; /// `LeafVdev` that is backed by memory. @@ -16,7 +16,7 @@ pub struct Memory { mem: RwLock>, id: String, size: Block, - stats: AtomicStatistics, + stats: Arc, } impl Memory { @@ -26,7 +26,7 @@ impl Memory { mem: RwLock::new(vec![0; size].into_boxed_slice()), id, size: Block::from_bytes(size as u64), - stats: Default::default(), + stats: Arc::new(Default::default()), }) } @@ -58,11 +58,23 @@ impl Memory { match self.slice_blocks(size, offset) { Ok(slice) => { let buf = unsafe { - Buf::from_raw( - std::ptr::NonNull::new(slice.as_ptr() as *mut u8) - .expect("Pointer in Memory vdev was null."), - size, - ) + #[cfg(feature = "memory_metrics")] + { + Buf::from_tracked_raw( + std::ptr::NonNull::new(slice.as_ptr() as *mut u8) + .expect("Pointer in Memory vdev was null."), + size, + self.stats.clone(), + ) + } + #[cfg(not(feature = "memory_metrics"))] + { + Buf::from_raw( + std::ptr::NonNull::new(slice.as_ptr() as *mut u8) + .expect("Pointer in Memory vdev was null."), + size, + ) + } }; #[cfg(feature = "latency_metrics")] self.stats.read_op_latency.fetch_add( @@ -175,6 +187,14 @@ impl VdevLeafRead for Memory { match self.slice(buf_mut.len(), offset.to_bytes() as usize) { Ok(src) => { buf_mut.copy_from_slice(&src); + + // Track memory metrics for direct memory access + // #[cfg(feature = "memory_metrics")] + // { + // self.stats.memory_read.fetch_add(size.as_u64(), Ordering::Relaxed); + // self.stats.memory_read_count.fetch_add(1, Ordering::Relaxed); + // } + #[cfg(feature = "latency_metrics")] self.stats.read_op_latency.fetch_add( start diff --git a/betree/src/vdev/mod.rs b/betree/src/vdev/mod.rs index dbd8fc4b6..373ddc5b5 100644 --- a/betree/src/vdev/mod.rs +++ b/betree/src/vdev/mod.rs @@ -24,19 +24,59 @@ pub struct Statistics { pub checksum_errors: Block, /// The total number of blocks of failed write requests pub failed_writes: Block, + #[cfg(feature = "memory_metrics")] + /// The total number of bytes accessed directly from memory (Memory storage only) + pub memory_read: Block, + #[cfg(feature = "memory_metrics")] + /// The total number of direct memory access operations (Memory storage only) + pub memory_read_count: u64, + #[cfg(feature = "compression_metrics")] + /// The total number of bytes passed to compression algorithms + pub bytes_to_compressed: Block, + #[cfg(feature = "compression_metrics")] + /// The total number of bytes after compression + pub compressed_bytes: Block, + #[cfg(feature = "compression_metrics")] + /// The total time spent in compression operations (nanoseconds) + pub compression_time: u64, + #[cfg(feature = "compression_metrics")] + /// The total number of bytes passed to decompression algorithms + pub bytes_to_decompress: Block, + #[cfg(feature = "compression_metrics")] + /// The total number of bytes after decompression + pub bytes_after_decompression: Block, + #[cfg(feature = "compression_metrics")] + /// The total time spent in decompression operations (nanoseconds) + pub decompression_time: u64, #[cfg(feature = "latency_metrics")] /// The average latency over all read operations pub read_latency: u64, } #[derive(Default, Debug)] -struct AtomicStatistics { +pub(crate) struct AtomicStatistics { read: AtomicU64, written: AtomicU64, failed_reads: AtomicU64, checksum_errors: AtomicU64, repaired: AtomicU64, failed_writes: AtomicU64, + #[cfg(feature = "memory_metrics")] + pub(crate) memory_read: AtomicU64, + #[cfg(feature = "memory_metrics")] + pub(crate) memory_read_count: AtomicU64, + #[cfg(feature = "compression_metrics")] + pub(crate) bytes_to_compressed: AtomicU64, + #[cfg(feature = "compression_metrics")] + pub(crate) compressed_bytes: AtomicU64, + #[cfg(feature = "compression_metrics")] + pub(crate) compression_time: AtomicU64, + #[cfg(feature = "compression_metrics")] + pub(crate) bytes_to_decompress: AtomicU64, + #[cfg(feature = "compression_metrics")] + pub(crate) bytes_after_decompression: AtomicU64, + #[cfg(feature = "compression_metrics")] + pub(crate) decompression_time: AtomicU64, #[cfg(feature = "latency_metrics")] prev_read: AtomicU64, #[cfg(feature = "latency_metrics")] @@ -44,18 +84,73 @@ struct AtomicStatistics { } impl AtomicStatistics { + #[cfg(feature = "compression_metrics")] + pub(crate) fn update_compression_metrics(&self, bytes_to_compressed: u64, compressed_bytes: u64, compression_time: u64) { + self.bytes_to_compressed.fetch_add(bytes_to_compressed, Ordering::Relaxed); + self.compressed_bytes.fetch_add(compressed_bytes, Ordering::Relaxed); + self.compression_time.fetch_add(compression_time, Ordering::Relaxed); + } + + #[cfg(feature = "compression_metrics")] + pub(crate) fn update_decompression_metrics(&self, bytes_to_decompress: u64, bytes_after_decompression: u64, decompression_time: u64) { + self.bytes_to_decompress.fetch_add(bytes_to_decompress, Ordering::Relaxed); + self.bytes_after_decompression.fetch_add(bytes_after_decompression, Ordering::Relaxed); + self.decompression_time.fetch_add(decompression_time, Ordering::Relaxed); + } + fn as_stats(&self) -> Statistics { #[cfg(feature = "latency_metrics")] { self.prev_read .store(self.read.load(Ordering::Relaxed), Ordering::Relaxed) } + + #[cfg(feature = "memory_metrics")] + let memory_read_val = self.memory_read.load(Ordering::Relaxed); + #[cfg(feature = "memory_metrics")] + let memory_read_count_val = self.memory_read_count.load(Ordering::Relaxed); + + // Get compression metrics from global instance and local vdev counters + #[cfg(feature = "compression_metrics")] + let (global_bytes_to_compressed, global_compressed_bytes, global_compression_time, + global_bytes_to_decompress, global_bytes_after_decompression, global_decompression_time) = + crate::compression::metrics::get_compression_metrics(); + + #[cfg(feature = "compression_metrics")] + let bytes_to_compressed_val = self.bytes_to_compressed.load(Ordering::Relaxed) + global_bytes_to_compressed; + #[cfg(feature = "compression_metrics")] + let compressed_bytes_val = self.compressed_bytes.load(Ordering::Relaxed) + global_compressed_bytes; + #[cfg(feature = "compression_metrics")] + let compression_time_val = self.compression_time.load(Ordering::Relaxed) + global_compression_time; + #[cfg(feature = "compression_metrics")] + let bytes_to_decompress_val = self.bytes_to_decompress.load(Ordering::Relaxed) + global_bytes_to_decompress; + #[cfg(feature = "compression_metrics")] + let bytes_after_decompression_val = self.bytes_after_decompression.load(Ordering::Relaxed) + global_bytes_after_decompression; + #[cfg(feature = "compression_metrics")] + let decompression_time_val = self.decompression_time.load(Ordering::Relaxed) + global_decompression_time; + Statistics { read: Block(self.read.load(Ordering::Relaxed)), written: Block(self.written.load(Ordering::Relaxed)), failed_reads: Block(self.failed_reads.load(Ordering::Relaxed)), checksum_errors: Block(self.checksum_errors.load(Ordering::Relaxed)), failed_writes: Block(self.failed_writes.load(Ordering::Relaxed)), + #[cfg(feature = "memory_metrics")] + memory_read: Block(memory_read_val), + #[cfg(feature = "memory_metrics")] + memory_read_count: memory_read_count_val, + #[cfg(feature = "compression_metrics")] + bytes_to_compressed: Block(bytes_to_compressed_val), + #[cfg(feature = "compression_metrics")] + compressed_bytes: Block(compressed_bytes_val), + #[cfg(feature = "compression_metrics")] + compression_time: compression_time_val, + #[cfg(feature = "compression_metrics")] + bytes_to_decompress: Block(bytes_to_decompress_val), + #[cfg(feature = "compression_metrics")] + bytes_after_decompression: Block(bytes_after_decompression_val), + #[cfg(feature = "compression_metrics")] + decompression_time: decompression_time_val, #[cfg(feature = "latency_metrics")] read_latency: self .read_op_latency @@ -70,6 +165,40 @@ impl AtomicStatistics { } } +impl Clone for AtomicStatistics { + fn clone(&self) -> Self { + use std::sync::atomic::Ordering; + Self { + read: AtomicU64::new(self.read.load(Ordering::Relaxed)), + written: AtomicU64::new(self.written.load(Ordering::Relaxed)), + failed_reads: AtomicU64::new(self.failed_reads.load(Ordering::Relaxed)), + checksum_errors: AtomicU64::new(self.checksum_errors.load(Ordering::Relaxed)), + repaired: AtomicU64::new(self.repaired.load(Ordering::Relaxed)), + failed_writes: AtomicU64::new(self.failed_writes.load(Ordering::Relaxed)), + #[cfg(feature = "memory_metrics")] + memory_read: AtomicU64::new(self.memory_read.load(Ordering::Relaxed)), + #[cfg(feature = "memory_metrics")] + memory_read_count: AtomicU64::new(self.memory_read_count.load(Ordering::Relaxed)), + #[cfg(feature = "compression_metrics")] + bytes_to_compressed: AtomicU64::new(self.bytes_to_compressed.load(Ordering::Relaxed)), + #[cfg(feature = "compression_metrics")] + compressed_bytes: AtomicU64::new(self.compressed_bytes.load(Ordering::Relaxed)), + #[cfg(feature = "compression_metrics")] + compression_time: AtomicU64::new(self.compression_time.load(Ordering::Relaxed)), + #[cfg(feature = "compression_metrics")] + bytes_to_decompress: AtomicU64::new(self.bytes_to_decompress.load(Ordering::Relaxed)), + #[cfg(feature = "compression_metrics")] + bytes_after_decompression: AtomicU64::new(self.bytes_after_decompression.load(Ordering::Relaxed)), + #[cfg(feature = "compression_metrics")] + decompression_time: AtomicU64::new(self.decompression_time.load(Ordering::Relaxed)), + #[cfg(feature = "latency_metrics")] + prev_read: AtomicU64::new(self.prev_read.load(Ordering::Relaxed)), + #[cfg(feature = "latency_metrics")] + read_op_latency: AtomicU64::new(self.read_op_latency.load(Ordering::Relaxed)), + } + } +} + /// Result of a successful scrub request #[derive(Debug)] pub struct ScrubResult { diff --git a/betree/src/vdev/pmemfile.rs b/betree/src/vdev/pmemfile.rs index 31b554de2..c566a805e 100644 --- a/betree/src/vdev/pmemfile.rs +++ b/betree/src/vdev/pmemfile.rs @@ -6,7 +6,7 @@ use crate::{buffer::Buf, checksum::Checksum}; use async_trait::async_trait; use libc::{c_ulong, ioctl}; use pmdk; -use std::{fs, io, os::unix::io::AsRawFd, sync::atomic::Ordering}; +use std::{fs, io, os::unix::io::AsRawFd, sync::{atomic::Ordering, Arc}}; /// `LeafVdev` which is backed by NVM and uses `pmdk`. #[derive(Debug)] @@ -14,7 +14,7 @@ pub struct PMemFile { file: pmdk::PMem, id: String, size: Block, - stats: AtomicStatistics, + stats: Arc, } impl PMemFile { @@ -25,7 +25,7 @@ impl PMemFile { file, id, size, - stats: Default::default(), + stats: Arc::new(Default::default()), }) } } @@ -59,11 +59,23 @@ impl VdevRead for PMemFile { // # SAFETY // Since Bufs are read only anyways we ensure the safety of this // step by re-packing this forced mutable pointer into one. - Buf::from_raw( - std::ptr::NonNull::new(slice.as_ptr() as *mut u8) - .expect("Pmem pointer was null when trying to read from offset."), - size, - ) + #[cfg(feature = "memory_metrics")] + { + Buf::from_tracked_raw( + std::ptr::NonNull::new(slice.as_ptr() as *mut u8) + .expect("Pmem pointer was null when trying to read from offset."), + size, + self.stats.clone(), + ) + } + #[cfg(not(feature = "memory_metrics"))] + { + Buf::from_raw( + std::ptr::NonNull::new(slice.as_ptr() as *mut u8) + .expect("Pmem pointer was null when trying to read from offset."), + size, + ) + } }; // let buf = { @@ -108,11 +120,23 @@ impl VdevRead for PMemFile { // # SAFETY // Since Bufs are read only anyways we ensure the safety of this // step by re-packing this forced mutable pointer into one. - Buf::from_raw( - std::ptr::NonNull::new(slice.as_ptr() as *mut u8) - .expect("Pmem pointer was null when trying to read from offset."), - size, - ) + #[cfg(feature = "memory_metrics")] + { + Buf::from_tracked_raw( + std::ptr::NonNull::new(slice.as_ptr() as *mut u8) + .expect("Pmem pointer was null when trying to read from offset."), + size, + self.stats.clone(), + ) + } + #[cfg(not(feature = "memory_metrics"))] + { + Buf::from_raw( + std::ptr::NonNull::new(slice.as_ptr() as *mut u8) + .expect("Pmem pointer was null when trying to read from offset."), + size, + ) + } }; // self.file.read(offset.to_bytes() as usize, buf.as_mut());