Skip to content

Commit

Permalink
Add mmap-backed file for tracking doc id validity state (#326)
Browse files Browse the repository at this point in the history
  • Loading branch information
tyb0807 authored Jan 21, 2025
1 parent ab8cf09 commit c46fdae
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 0 deletions.
122 changes: 122 additions & 0 deletions rs/index/src/ivf/files/invalidated_ids.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
use std::fs::OpenOptions;

use anyhow::{anyhow, Result};
use memmap2::MmapMut;

pub struct InvalidatedIdsStorage {
mmap: MmapMut,
num_points: usize,
}

const BITS_IN_BYTE: usize = 8;

impl InvalidatedIdsStorage {
pub fn new(base_directory: &str, num_points: usize) -> Result<Self> {
let file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open(format!("{}/invalidated_ids", base_directory))?;

let bytes_needed = num_points.div_ceil(BITS_IN_BYTE);
file.set_len(bytes_needed as u64)?;

let mmap = unsafe { MmapMut::map_mut(&file)? };
Ok(Self { mmap, num_points })
}

pub fn invalidate(&mut self, point_id: u32) -> Result<()> {
let point_id = point_id as usize;
if point_id >= self.num_points {
return Err(anyhow!("ID out of bound"));
}

let byte_index = point_id / BITS_IN_BYTE;
let bit_index = point_id % BITS_IN_BYTE;

self.mmap[byte_index] |= 1 << bit_index;

Ok(self.mmap.flush()?)
}

// TODO(tyb): consider having an unsafe version w/o bound check
// for performance reason
pub fn is_valid(&self, point_id: u32) -> Result<bool> {
let point_id = point_id as usize;
if point_id >= self.num_points {
return Err(anyhow!("ID out of bound"));
}
let byte_index = point_id / BITS_IN_BYTE;
let bit_index = point_id % BITS_IN_BYTE;

Ok((self.mmap[byte_index] & (1 << bit_index)) == 0)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_invalidated_ids() {
let temp_dir = tempdir::TempDir::new("test_invalidated_ids")
.expect("Failed to create temporary directory");
let base_dir = temp_dir
.path()
.to_str()
.expect("Failed to convert temporary directory path to string")
.to_string();

// Test creation and initial state
let mut bitmap = InvalidatedIdsStorage::new(&base_dir, 1000)
.expect("Failed to open invalidated ids storage");

// All ids should be valid initially
for i in 0..1000 {
assert!(bitmap.is_valid(i).expect("Failed to query validity state"));
}

// Test out of bound
assert!(bitmap.invalidate(1000).is_err());
assert!(bitmap.is_valid(1001).is_err());

// Test invalidating ids
assert!(bitmap.invalidate(0).is_ok());
assert!(bitmap.invalidate(500).is_ok());
assert!(bitmap.invalidate(999).is_ok());

assert!(!bitmap.is_valid(0).expect("Failed to query validity state"));
assert!(!bitmap
.is_valid(500)
.expect("Failed to query validity state"));
assert!(!bitmap
.is_valid(999)
.expect("Failed to query validity state"));
assert!(bitmap.is_valid(1).expect("Failed to query validity state"));
assert!(bitmap
.is_valid(998)
.expect("Failed to query validity state"));

// Test edge cases
assert!(bitmap.invalidate(7).is_ok());
assert!(bitmap.invalidate(8).is_ok());
assert!(!bitmap.is_valid(7).expect("Failed to query validity state"));
assert!(!bitmap.is_valid(8).expect("Failed to query validity state"));
assert!(bitmap.is_valid(6).expect("Failed to query validity state"));
assert!(bitmap.is_valid(9).expect("Failed to query validity state"));

// Test persistence
drop(bitmap);
let bitmap2 = InvalidatedIdsStorage::new(&base_dir, 1000)
.expect("Failed to open invalidated ids storage");
assert!(!bitmap2.is_valid(0).expect("Failed to query validity state"));
assert!(!bitmap2
.is_valid(500)
.expect("Failed to query validity state"));
assert!(!bitmap2
.is_valid(999)
.expect("Failed to query validity state"));
assert!(!bitmap2.is_valid(7).expect("Failed to query validity state"));
assert!(!bitmap2.is_valid(8).expect("Failed to query validity state"));
}
}
1 change: 1 addition & 0 deletions rs/index/src/ivf/files/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod invalidated_ids;
1 change: 1 addition & 0 deletions rs/index/src/ivf/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub mod builder;
pub mod files;
pub mod index;
pub mod reader;
pub mod writer;

0 comments on commit c46fdae

Please sign in to comment.