diff --git a/sstable/src/dictionary.rs b/sstable/src/dictionary.rs index caa7a6bb83..22d302a558 100644 --- a/sstable/src/dictionary.rs +++ b/sstable/src/dictionary.rs @@ -487,7 +487,7 @@ impl Dictionary { /// the buffer may be modified. pub fn ord_to_term(&self, ord: TermOrdinal, bytes: &mut Vec) -> io::Result { // find block in which the term would be - let block_addr = self.sstable_index.get_block_with_ord(ord); + let block_addr = self.sstable_index.get_block_with_ord::(ord).0; let first_ordinal = block_addr.first_ordinal; // then search inside that block only @@ -511,16 +511,17 @@ impl Dictionary { mut cb: F, ) -> io::Result { let mut bytes = Vec::new(); - let mut current_block_addr = self.sstable_index.get_block_with_ord(0); + let (mut current_block_addr, mut next_block_ord) = + self.sstable_index.get_block_with_ord::(0); let mut current_sstable_delta_reader = self.sstable_delta_reader_block(current_block_addr.clone())?; let mut current_ordinal = 0; for ord in ord { assert!(ord >= current_ordinal); // check if block changed for new term_ord - let new_block_addr = self.sstable_index.get_block_with_ord(ord); - if new_block_addr != current_block_addr { - current_block_addr = new_block_addr; + if ord >= next_block_ord { + (current_block_addr, next_block_ord) = + self.sstable_index.get_block_with_ord::(ord); current_ordinal = current_block_addr.first_ordinal; current_sstable_delta_reader = self.sstable_delta_reader_block(current_block_addr.clone())?; @@ -544,7 +545,7 @@ impl Dictionary { /// Returns the number of terms in the dictionary. pub fn term_info_from_ord(&self, term_ord: TermOrdinal) -> io::Result> { // find block in which the term would be - let block_addr = self.sstable_index.get_block_with_ord(term_ord); + let block_addr = self.sstable_index.get_block_with_ord::(term_ord).0; let first_ordinal = block_addr.first_ordinal; // then search inside that block only @@ -846,7 +847,7 @@ mod tests { fn test_ord_term_conversion() { let (dic, slice) = make_test_sstable(); - let block = dic.sstable_index.get_block_with_ord(100_000); + let block = dic.sstable_index.get_block_with_ord::(100_000).0; slice.restrict(block.byte_range); let mut res = Vec::new(); @@ -872,7 +873,11 @@ mod tests { // end of a block let ordinal = block.first_ordinal - 1; - let new_range = dic.sstable_index.get_block_with_ord(ordinal).byte_range; + let new_range = dic + .sstable_index + .get_block_with_ord::(ordinal) + .0 + .byte_range; slice.restrict(new_range); assert!(dic.ord_to_term(ordinal, &mut res).unwrap()); assert_eq!(res, format!("{ordinal:05X}").into_bytes()); @@ -882,7 +887,7 @@ mod tests { // before first block // 1st block must be loaded for key-related operations - let block = dic.sstable_index.get_block_with_ord(0); + let block = dic.sstable_index.get_block_with_ord::(0).0; slice.restrict(block.byte_range); assert!(dic.get(b"$$$").unwrap().is_none()); @@ -891,7 +896,11 @@ mod tests { // after last block // last block must be loaded for ord related operations let ordinal = 0x40000 + 10; - let new_range = dic.sstable_index.get_block_with_ord(ordinal).byte_range; + let new_range = dic + .sstable_index + .get_block_with_ord::(ordinal) + .0 + .byte_range; slice.restrict(new_range); assert!(!dic.ord_to_term(ordinal, &mut res).unwrap()); assert!(dic.term_info_from_ord(ordinal).unwrap().is_none()); diff --git a/sstable/src/sstable_index_v2.rs b/sstable/src/sstable_index_v2.rs index f0aa83ab0b..86147e1b25 100644 --- a/sstable/src/sstable_index_v2.rs +++ b/sstable/src/sstable_index_v2.rs @@ -72,9 +72,15 @@ impl SSTableIndex { } /// Get the [`BlockAddr`] of the block containing the `ord`-th term. - pub(crate) fn get_block_with_ord(&self, ord: TermOrdinal) -> BlockAddr { + pub(crate) fn get_block_with_ord(&self, ord: TermOrdinal) -> (BlockAddr, u64) { // locate_with_ord always returns an index within range - self.get_block(self.locate_with_ord(ord)).unwrap() + let block_pos = self.locate_with_ord(ord); + ( + self.get_block(block_pos).unwrap(), + self.get_block(block_pos + 1) + .map(|b| b.first_ordinal) + .unwrap_or(u64::MAX), + ) } pub(crate) fn get_block_for_automaton<'a>( diff --git a/sstable/src/sstable_index_v3.rs b/sstable/src/sstable_index_v3.rs index c2ab1fa07e..f2fceb05d9 100644 --- a/sstable/src/sstable_index_v3.rs +++ b/sstable/src/sstable_index_v3.rs @@ -58,10 +58,13 @@ impl SSTableIndex { } /// Get the [`BlockAddr`] of the block containing the `ord`-th term. - pub(crate) fn get_block_with_ord(&self, ord: TermOrdinal) -> BlockAddr { + pub(crate) fn get_block_with_ord( + &self, + ord: TermOrdinal, + ) -> (BlockAddr, u64) { match self { SSTableIndex::V2(v2_index) => v2_index.get_block_with_ord(ord), - SSTableIndex::V3(v3_index) => v3_index.get_block_with_ord(ord), + SSTableIndex::V3(v3_index) => v3_index.get_block_with_ord::(ord), SSTableIndex::V3Empty(v3_empty) => v3_empty.get_block_with_ord(ord), } } @@ -152,12 +155,18 @@ impl SSTableIndexV3 { } pub(crate) fn locate_with_ord(&self, ord: TermOrdinal) -> u64 { - self.block_addr_store.binary_search_ord(ord).0 + self.block_addr_store.binary_search_ord::(ord).0 } /// Get the [`BlockAddr`] of the block containing the `ord`-th term. - pub(crate) fn get_block_with_ord(&self, ord: TermOrdinal) -> BlockAddr { - self.block_addr_store.binary_search_ord(ord).1 + pub(crate) fn get_block_with_ord( + &self, + ord: TermOrdinal, + ) -> (BlockAddr, u64) { + let (_block_id, block_addr, next_ord) = self + .block_addr_store + .binary_search_ord::(ord); + (block_addr, next_ord) } pub(crate) fn get_block_for_automaton<'a>( @@ -253,8 +262,8 @@ impl SSTableIndexV3Empty { } /// Get the [`BlockAddr`] of the block containing the `ord`-th term. - pub(crate) fn get_block_with_ord(&self, _ord: TermOrdinal) -> BlockAddr { - self.block_addr.clone() + pub(crate) fn get_block_with_ord(&self, _ord: TermOrdinal) -> (BlockAddr, u64) { + (self.block_addr.clone(), u64::MAX) } } #[derive(Clone, Eq, PartialEq, Debug)] @@ -461,7 +470,11 @@ impl BlockAddrBlockMetadata { }) } - fn bisect_for_ord(&self, data: &[u8], target_ord: TermOrdinal) -> (u64, BlockAddr) { + fn bisect_for_ord( + &self, + data: &[u8], + target_ord: TermOrdinal, + ) -> (u64, BlockAddr, u64) { let inner_target_ord = target_ord - self.ref_block_addr.first_ordinal; let num_bits = self.num_bits() as usize; let range_start_nbits = self.range_start_nbits as usize; @@ -481,11 +494,17 @@ impl BlockAddrBlockMetadata { Err(inner_offset) => inner_offset, }; // we can unwrap because inner_offset <= self.block_len - ( - inner_offset, - self.deserialize_block_addr(data, inner_offset as usize) - .unwrap(), - ) + let block = self + .deserialize_block_addr(data, inner_offset as usize) + .unwrap(); + let next_ord = if FETCH_NEXT_ORD { + self.deserialize_block_addr(data, inner_offset as usize + 1) + .map(|b| b.first_ordinal) + .unwrap_or(u64::MAX) + } else { + 0 + }; + (inner_offset, block, next_ord) } } @@ -591,7 +610,10 @@ impl BlockAddrStore { ) } - fn binary_search_ord(&self, ord: TermOrdinal) -> (u64, BlockAddr) { + fn binary_search_ord( + &self, + ord: TermOrdinal, + ) -> (u64, BlockAddr, u64) { let max_block = (self.block_meta_bytes.len() / BlockAddrBlockMetadata::SIZE_IN_BYTES) as u64; let get_first_ordinal = |block_id| { @@ -606,20 +628,29 @@ impl BlockAddrStore { Ok(store_block_id) => { let block_id = store_block_id * STORE_BLOCK_LEN as u64; // we can unwrap because store_block_id < max_block - return (block_id, self.get(block_id).unwrap()); + let next_ord = if FETCH_NEXT_ORD { + self.get(block_id + 1) + .map(|b| b.first_ordinal) + .unwrap_or(u64::MAX) + } else { + 0 + }; + return (block_id, self.get(block_id).unwrap(), next_ord); } Err(store_block_id) => store_block_id - 1, }; // we can unwrap because store_block_id < max_block let block_addr_block_data = self.get_block_meta(store_block_id as usize).unwrap(); - let (inner_offset, block_addr) = block_addr_block_data.bisect_for_ord( - &self.addr_bytes[block_addr_block_data.offset as usize..], - ord, - ); + let (inner_offset, block_addr, next_block_ord) = block_addr_block_data + .bisect_for_ord::( + &self.addr_bytes[block_addr_block_data.offset as usize..], + ord, + ); ( store_block_id * STORE_BLOCK_LEN as u64 + inner_offset, block_addr, + next_block_ord, ) } }