diff --git a/examples/custom_collector.rs b/examples/custom_collector.rs index 29b6069308..355e134ba1 100644 --- a/examples/custom_collector.rs +++ b/examples/custom_collector.rs @@ -70,7 +70,7 @@ impl Collector for StatsCollector { fn for_segment( &self, _segment_local_id: u32, - segment_reader: &SegmentReader, + segment_reader: &dyn SegmentReader, ) -> tantivy::Result { let fast_field_reader = segment_reader.fast_fields().u64(&self.field)?; Ok(StatsSegmentCollector { diff --git a/examples/faceted_search_with_tweaked_score.rs b/examples/faceted_search_with_tweaked_score.rs index d21a1c3d4c..be5b6891b0 100644 --- a/examples/faceted_search_with_tweaked_score.rs +++ b/examples/faceted_search_with_tweaked_score.rs @@ -65,7 +65,7 @@ fn main() -> tantivy::Result<()> { ); let top_docs_by_custom_score = // Call TopDocs with a custom tweak score - TopDocs::with_limit(2).tweak_score(move |segment_reader: &SegmentReader| { + TopDocs::with_limit(2).tweak_score(move |segment_reader: &dyn SegmentReader| { let ingredient_reader = segment_reader.facet_reader("ingredient").unwrap(); let facet_dict = ingredient_reader.facet_dict(); diff --git a/examples/warmer.rs b/examples/warmer.rs index 1cae9d349a..ea71939d80 100644 --- a/examples/warmer.rs +++ b/examples/warmer.rs @@ -43,7 +43,7 @@ impl DynamicPriceColumn { } } - pub fn price_for_segment(&self, segment_reader: &SegmentReader) -> Option>> { + pub fn price_for_segment(&self, segment_reader: &dyn SegmentReader) -> Option>> { let segment_key = (segment_reader.segment_id(), segment_reader.delete_opstamp()); self.price_cache.read().unwrap().get(&segment_key).cloned() } @@ -157,7 +157,7 @@ fn main() -> tantivy::Result<()> { let query = query_parser.parse_query("cooking")?; let searcher = reader.searcher(); - let score_by_price = move |segment_reader: &SegmentReader| { + let score_by_price = move |segment_reader: &dyn SegmentReader| { let price = price_dynamic_column .price_for_segment(segment_reader) .unwrap(); diff --git a/src/aggregation/accessor_helpers.rs b/src/aggregation/accessor_helpers.rs index fa51041e47..22e13ac165 100644 --- a/src/aggregation/accessor_helpers.rs +++ b/src/aggregation/accessor_helpers.rs @@ -57,7 +57,7 @@ pub(crate) fn get_numeric_or_date_column_types() -> &'static [ColumnType] { /// Get fast field reader or empty as default. pub(crate) fn get_ff_reader( - reader: &SegmentReader, + reader: &dyn SegmentReader, field_name: &str, allowed_column_types: Option<&[ColumnType]>, ) -> crate::Result<(columnar::Column, ColumnType)> { @@ -74,7 +74,7 @@ pub(crate) fn get_ff_reader( } pub(crate) fn get_dynamic_columns( - reader: &SegmentReader, + reader: &dyn SegmentReader, field_name: &str, ) -> crate::Result> { let ff_fields = reader.fast_fields().dynamic_column_handles(field_name)?; @@ -90,7 +90,7 @@ pub(crate) fn get_dynamic_columns( /// /// Is guaranteed to return at least one column. pub(crate) fn get_all_ff_reader_or_empty( - reader: &SegmentReader, + reader: &dyn SegmentReader, field_name: &str, allowed_column_types: Option<&[ColumnType]>, fallback_type: ColumnType, diff --git a/src/aggregation/agg_data.rs b/src/aggregation/agg_data.rs index deedb57813..1433afae47 100644 --- a/src/aggregation/agg_data.rs +++ b/src/aggregation/agg_data.rs @@ -486,7 +486,7 @@ impl AggKind { /// Build AggregationsData by walking the request tree. pub(crate) fn build_aggregations_data_from_req( aggs: &Aggregations, - reader: &SegmentReader, + reader: &dyn SegmentReader, segment_ordinal: SegmentOrdinal, context: AggContextParams, ) -> crate::Result { @@ -505,7 +505,7 @@ pub(crate) fn build_aggregations_data_from_req( fn build_nodes( agg_name: &str, req: &Aggregation, - reader: &SegmentReader, + reader: &dyn SegmentReader, segment_ordinal: SegmentOrdinal, data: &mut AggregationsSegmentCtx, is_top_level: bool, @@ -750,7 +750,6 @@ fn build_nodes( let idx_in_req_data = data.push_filter_req_data(FilterAggReqData { name: agg_name.to_string(), req: filter_req.clone(), - segment_reader: reader.clone(), evaluator, matching_docs_buffer, }); @@ -766,7 +765,7 @@ fn build_nodes( fn build_children( aggs: &Aggregations, - reader: &SegmentReader, + reader: &dyn SegmentReader, segment_ordinal: SegmentOrdinal, data: &mut AggregationsSegmentCtx, ) -> crate::Result> { @@ -785,7 +784,7 @@ fn build_children( } fn get_term_agg_accessors( - reader: &SegmentReader, + reader: &dyn SegmentReader, field_name: &str, missing: &Option, ) -> crate::Result, ColumnType)>> { @@ -838,7 +837,7 @@ fn build_terms_or_cardinality_nodes( agg_name: &str, field_name: &str, missing: &Option, - reader: &SegmentReader, + reader: &dyn SegmentReader, segment_ordinal: SegmentOrdinal, data: &mut AggregationsSegmentCtx, sub_aggs: &Aggregations, diff --git a/src/aggregation/bucket/filter.rs b/src/aggregation/bucket/filter.rs index d4461bf1fe..0a68769801 100644 --- a/src/aggregation/bucket/filter.rs +++ b/src/aggregation/bucket/filter.rs @@ -397,8 +397,6 @@ pub struct FilterAggReqData { pub name: String, /// The filter aggregation pub req: FilterAggregation, - /// The segment reader - pub segment_reader: SegmentReader, /// Document evaluator for the filter query (precomputed BitSet) /// This is built once when the request data is created pub evaluator: DocumentQueryEvaluator, @@ -408,9 +406,8 @@ pub struct FilterAggReqData { impl FilterAggReqData { pub(crate) fn get_memory_consumption(&self) -> usize { - // Estimate: name + segment reader reference + bitset + buffer capacity + // Estimate: name + bitset + buffer capacity self.name.len() - + std::mem::size_of::() + self.evaluator.bitset.len() / 8 // BitSet memory (bits to bytes) + self.matching_docs_buffer.capacity() * std::mem::size_of::() } @@ -431,7 +428,7 @@ impl DocumentQueryEvaluator { pub(crate) fn new( query: Box, schema: Schema, - segment_reader: &SegmentReader, + segment_reader: &dyn SegmentReader, ) -> crate::Result { let max_doc = segment_reader.max_doc(); diff --git a/src/aggregation/collector.rs b/src/aggregation/collector.rs index 4c4c2c7f10..7712520738 100644 --- a/src/aggregation/collector.rs +++ b/src/aggregation/collector.rs @@ -66,7 +66,7 @@ impl Collector for DistributedAggregationCollector { fn for_segment( &self, segment_local_id: crate::SegmentOrdinal, - reader: &crate::SegmentReader, + reader: &dyn crate::SegmentReader, ) -> crate::Result { AggregationSegmentCollector::from_agg_req_and_reader( &self.agg, @@ -96,7 +96,7 @@ impl Collector for AggregationCollector { fn for_segment( &self, segment_local_id: crate::SegmentOrdinal, - reader: &crate::SegmentReader, + reader: &dyn crate::SegmentReader, ) -> crate::Result { AggregationSegmentCollector::from_agg_req_and_reader( &self.agg, @@ -145,7 +145,7 @@ impl AggregationSegmentCollector { /// reader. Also includes validation, e.g. checking field types and existence. pub fn from_agg_req_and_reader( agg: &Aggregations, - reader: &SegmentReader, + reader: &dyn SegmentReader, segment_ordinal: SegmentOrdinal, context: &AggContextParams, ) -> crate::Result { diff --git a/src/collector/count_collector.rs b/src/collector/count_collector.rs index dcd102249d..419f7a5d00 100644 --- a/src/collector/count_collector.rs +++ b/src/collector/count_collector.rs @@ -43,7 +43,7 @@ impl Collector for Count { fn for_segment( &self, _: SegmentOrdinal, - _: &SegmentReader, + _: &dyn SegmentReader, ) -> crate::Result { Ok(SegmentCountCollector::default()) } diff --git a/src/collector/custom_score_top_collector.rs b/src/collector/custom_score_top_collector.rs index 54d42469eb..9d84a7c8e2 100644 --- a/src/collector/custom_score_top_collector.rs +++ b/src/collector/custom_score_top_collector.rs @@ -40,7 +40,7 @@ pub trait CustomScorer: Sync { type Child: CustomSegmentScorer; /// Builds a child scorer for a specific segment. The child scorer is associated with /// a specific segment. - fn segment_scorer(&self, segment_reader: &SegmentReader) -> crate::Result; + fn segment_scorer(&self, segment_reader: &dyn SegmentReader) -> crate::Result; } impl Collector for CustomScoreTopCollector @@ -55,7 +55,7 @@ where fn for_segment( &self, segment_local_id: u32, - segment_reader: &SegmentReader, + segment_reader: &dyn SegmentReader, ) -> crate::Result { let segment_collector = self.collector.for_segment(segment_local_id, segment_reader); let segment_scorer = self.custom_scorer.segment_scorer(segment_reader)?; @@ -102,12 +102,12 @@ where impl CustomScorer for F where - F: 'static + Send + Sync + Fn(&SegmentReader) -> T, + F: 'static + Send + Sync + Fn(&dyn SegmentReader) -> T, T: CustomSegmentScorer, { type Child = T; - fn segment_scorer(&self, segment_reader: &SegmentReader) -> crate::Result { + fn segment_scorer(&self, segment_reader: &dyn SegmentReader) -> crate::Result { Ok((self)(segment_reader)) } } diff --git a/src/collector/docset_collector.rs b/src/collector/docset_collector.rs index a27a394189..7c738d00ba 100644 --- a/src/collector/docset_collector.rs +++ b/src/collector/docset_collector.rs @@ -15,7 +15,7 @@ impl Collector for DocSetCollector { fn for_segment( &self, segment_local_id: crate::SegmentOrdinal, - _segment: &crate::SegmentReader, + _segment: &dyn crate::SegmentReader, ) -> crate::Result { Ok(DocSetChildCollector { segment_local_id, diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs index a94ec03e81..c8992b6ee5 100644 --- a/src/collector/facet_collector.rs +++ b/src/collector/facet_collector.rs @@ -265,7 +265,7 @@ impl Collector for FacetCollector { fn for_segment( &self, _: SegmentOrdinal, - reader: &SegmentReader, + reader: &dyn SegmentReader, ) -> crate::Result { let facet_reader = reader.facet_reader(&self.field_name)?; let facet_dict = facet_reader.facet_dict(); diff --git a/src/collector/filter_collector_wrapper.rs b/src/collector/filter_collector_wrapper.rs index 167ec980bd..0b8d8ab48e 100644 --- a/src/collector/filter_collector_wrapper.rs +++ b/src/collector/filter_collector_wrapper.rs @@ -107,7 +107,7 @@ where fn for_segment( &self, segment_local_id: u32, - segment_reader: &SegmentReader, + segment_reader: &dyn SegmentReader, ) -> crate::Result { let column_opt = segment_reader.fast_fields().column_opt(&self.field)?; @@ -261,7 +261,7 @@ where fn for_segment( &self, segment_local_id: u32, - segment_reader: &SegmentReader, + segment_reader: &dyn SegmentReader, ) -> crate::Result { let column_opt = segment_reader.fast_fields().bytes(&self.field)?; diff --git a/src/collector/histogram_collector.rs b/src/collector/histogram_collector.rs index 51105e7b1c..87742a9782 100644 --- a/src/collector/histogram_collector.rs +++ b/src/collector/histogram_collector.rs @@ -110,7 +110,7 @@ impl Collector for HistogramCollector { fn for_segment( &self, _segment_local_id: crate::SegmentOrdinal, - segment: &crate::SegmentReader, + segment: &dyn crate::SegmentReader, ) -> crate::Result { let column_opt = segment.fast_fields().u64_lenient(&self.field)?; let (column, _column_type) = column_opt.ok_or_else(|| FastFieldNotAvailableError { diff --git a/src/collector/mod.rs b/src/collector/mod.rs index a31754316e..b73a4ca2c5 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -150,7 +150,7 @@ pub trait Collector: Sync + Send { fn for_segment( &self, segment_local_id: SegmentOrdinal, - segment: &SegmentReader, + segment: &dyn SegmentReader, ) -> crate::Result; /// Returns true iff the collector requires to compute scores for documents. @@ -168,7 +168,7 @@ pub trait Collector: Sync + Send { &self, weight: &dyn Weight, segment_ord: u32, - reader: &SegmentReader, + reader: &dyn SegmentReader, ) -> crate::Result<::Fruit> { let mut segment_collector = self.for_segment(segment_ord, reader)?; @@ -227,7 +227,7 @@ impl Collector for Option { fn for_segment( &self, segment_local_id: SegmentOrdinal, - segment: &SegmentReader, + segment: &dyn SegmentReader, ) -> crate::Result { Ok(if let Some(inner) = self { let inner_segment_collector = inner.for_segment(segment_local_id, segment)?; @@ -302,7 +302,7 @@ where fn for_segment( &self, segment_local_id: u32, - segment: &SegmentReader, + segment: &dyn SegmentReader, ) -> crate::Result { let left = self.0.for_segment(segment_local_id, segment)?; let right = self.1.for_segment(segment_local_id, segment)?; @@ -361,7 +361,7 @@ where fn for_segment( &self, segment_local_id: u32, - segment: &SegmentReader, + segment: &dyn SegmentReader, ) -> crate::Result { let one = self.0.for_segment(segment_local_id, segment)?; let two = self.1.for_segment(segment_local_id, segment)?; @@ -427,7 +427,7 @@ where fn for_segment( &self, segment_local_id: u32, - segment: &SegmentReader, + segment: &dyn SegmentReader, ) -> crate::Result { let one = self.0.for_segment(segment_local_id, segment)?; let two = self.1.for_segment(segment_local_id, segment)?; diff --git a/src/collector/multi_collector.rs b/src/collector/multi_collector.rs index 8077577d29..1e50e4e9fa 100644 --- a/src/collector/multi_collector.rs +++ b/src/collector/multi_collector.rs @@ -19,7 +19,7 @@ impl Collector for CollectorWrapper { fn for_segment( &self, segment_local_id: u32, - reader: &SegmentReader, + reader: &dyn SegmentReader, ) -> crate::Result> { let child = self.0.for_segment(segment_local_id, reader)?; Ok(Box::new(SegmentCollectorWrapper(child))) @@ -197,7 +197,7 @@ impl Collector for MultiCollector<'_> { fn for_segment( &self, segment_local_id: SegmentOrdinal, - segment: &SegmentReader, + segment: &dyn SegmentReader, ) -> crate::Result { let children = self .collector_wrappers diff --git a/src/collector/tests.rs b/src/collector/tests.rs index 7af7c6d8ce..ed6284c235 100644 --- a/src/collector/tests.rs +++ b/src/collector/tests.rs @@ -106,7 +106,7 @@ impl Collector for TestCollector { fn for_segment( &self, segment_id: SegmentOrdinal, - _reader: &SegmentReader, + _reader: &dyn SegmentReader, ) -> crate::Result { Ok(TestSegmentCollector { segment_id, @@ -177,7 +177,7 @@ impl Collector for FastFieldTestCollector { fn for_segment( &self, _: SegmentOrdinal, - segment_reader: &SegmentReader, + segment_reader: &dyn SegmentReader, ) -> crate::Result { let reader = segment_reader .fast_fields() @@ -240,7 +240,7 @@ impl Collector for BytesFastFieldTestCollector { fn for_segment( &self, _segment_local_id: u32, - segment_reader: &SegmentReader, + segment_reader: &dyn SegmentReader, ) -> crate::Result { let column_opt = segment_reader.fast_fields().bytes(&self.field)?; Ok(BytesFastFieldSegmentCollector { diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs index 29ff086005..4efa9d66f2 100644 --- a/src/collector/top_collector.rs +++ b/src/collector/top_collector.rs @@ -124,7 +124,7 @@ where T: PartialOrd + Clone pub(crate) fn for_segment( &self, segment_id: SegmentOrdinal, - _: &SegmentReader, + _: &dyn SegmentReader, ) -> TopSegmentCollector { TopSegmentCollector::new(segment_id, self.limit + self.offset) } diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index 33c5df59e1..fcf8fdfd38 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -41,7 +41,7 @@ where fn for_segment( &self, segment_local_id: crate::SegmentOrdinal, - segment: &SegmentReader, + segment: &dyn SegmentReader, ) -> crate::Result { let schema = segment.schema(); let field = schema.get_field(&self.field)?; @@ -102,7 +102,7 @@ impl Collector for StringConvertCollector { fn for_segment( &self, segment_local_id: crate::SegmentOrdinal, - segment: &SegmentReader, + segment: &dyn SegmentReader, ) -> crate::Result { let schema = segment.schema(); let field = schema.get_field(&self.field)?; @@ -323,7 +323,7 @@ struct ScorerByField { impl CustomScorer for ScorerByField { type Child = ScorerByFastFieldReader; - fn segment_scorer(&self, segment_reader: &SegmentReader) -> crate::Result { + fn segment_scorer(&self, segment_reader: &dyn SegmentReader) -> crate::Result { // We interpret this field as u64, regardless of its type, that way, // we avoid needless conversion. Regardless of the fast field type, the // mapping is monotonic, so it is sufficient to compute our top-K docs. @@ -657,7 +657,7 @@ impl TopDocs { /// // This is where we build our collector with our custom score. /// let top_docs_by_custom_score = TopDocs /// ::with_limit(10) - /// .tweak_score(move |segment_reader: &SegmentReader| { + /// .tweak_score(move |segment_reader: &dyn SegmentReader| { /// // The argument is a function that returns our scoring /// // function. /// // @@ -759,7 +759,7 @@ impl TopDocs { /// // This is where we build our collector with our custom score. /// let top_docs_by_custom_score = TopDocs /// ::with_limit(10) - /// .custom_score(move |segment_reader: &SegmentReader| { + /// .custom_score(move |segment_reader: &dyn SegmentReader| { /// // The argument is a function that returns our scoring /// // function. /// // @@ -824,7 +824,7 @@ impl Collector for TopDocs { fn for_segment( &self, segment_local_id: SegmentOrdinal, - reader: &SegmentReader, + reader: &dyn SegmentReader, ) -> crate::Result { let collector = self.0.for_segment(segment_local_id, reader); Ok(TopScoreSegmentCollector(collector)) @@ -845,7 +845,7 @@ impl Collector for TopDocs { &self, weight: &dyn Weight, segment_ord: u32, - reader: &SegmentReader, + reader: &dyn SegmentReader, ) -> crate::Result<::Fruit> { let heap_len = self.0.limit + self.0.offset; let mut top_n: TopNComputer<_, _> = TopNComputer::new(heap_len); @@ -1875,7 +1875,9 @@ mod tests { let query_parser = QueryParser::for_index(&index, vec![field]); let text_query = query_parser.parse_query("droopy tax")?; let collector = TopDocs::with_limit(2).and_offset(1).tweak_score( - move |_segment_reader: &SegmentReader| move |doc: DocId, _original_score: Score| doc, + move |_segment_reader: &dyn SegmentReader| { + move |doc: DocId, _original_score: Score| doc + }, ); let score_docs: Vec<(u32, DocAddress)> = index.reader()?.searcher().search(&text_query, &collector)?; @@ -1894,7 +1896,7 @@ mod tests { let text_query = query_parser.parse_query("droopy tax").unwrap(); let collector = TopDocs::with_limit(2) .and_offset(1) - .custom_score(move |_segment_reader: &SegmentReader| move |doc: DocId| doc); + .custom_score(move |_segment_reader: &dyn SegmentReader| move |doc: DocId| doc); let score_docs: Vec<(u32, DocAddress)> = index .reader() .unwrap() diff --git a/src/collector/tweak_score_top_collector.rs b/src/collector/tweak_score_top_collector.rs index e7e8d1547a..9833e636a9 100644 --- a/src/collector/tweak_score_top_collector.rs +++ b/src/collector/tweak_score_top_collector.rs @@ -42,7 +42,7 @@ pub trait ScoreTweaker: Sync { /// Builds a child tweaker for a specific segment. The child scorer is associated with /// a specific segment. - fn segment_tweaker(&self, segment_reader: &SegmentReader) -> Result; + fn segment_tweaker(&self, segment_reader: &dyn SegmentReader) -> Result; } impl Collector for TweakedScoreTopCollector @@ -57,7 +57,7 @@ where fn for_segment( &self, segment_local_id: u32, - segment_reader: &SegmentReader, + segment_reader: &dyn SegmentReader, ) -> Result { let segment_scorer = self.score_tweaker.segment_tweaker(segment_reader)?; let segment_collector = self.collector.for_segment(segment_local_id, segment_reader); @@ -105,12 +105,12 @@ where impl ScoreTweaker for F where - F: 'static + Send + Sync + Fn(&SegmentReader) -> TSegmentScoreTweaker, + F: 'static + Send + Sync + Fn(&dyn SegmentReader) -> TSegmentScoreTweaker, TSegmentScoreTweaker: ScoreSegmentTweaker, { type Child = TSegmentScoreTweaker; - fn segment_tweaker(&self, segment_reader: &SegmentReader) -> Result { + fn segment_tweaker(&self, segment_reader: &dyn SegmentReader) -> Result { Ok((self)(segment_reader)) } } diff --git a/src/core/searcher.rs b/src/core/searcher.rs index 51db7311f4..c26370390c 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -4,7 +4,7 @@ use std::{fmt, io}; use crate::collector::Collector; use crate::core::Executor; -use crate::index::{SegmentId, SegmentReader}; +use crate::index::{ArcSegmentReader, SegmentId, SegmentReader}; use crate::query::{Bm25StatisticsProvider, EnableScoring, Query}; use crate::schema::document::DocumentDeserialize; use crate::schema::{Schema, Term}; @@ -36,7 +36,7 @@ pub struct SearcherGeneration { impl SearcherGeneration { pub(crate) fn from_segment_readers( - segment_readers: &[SegmentReader], + segment_readers: &[ArcSegmentReader], generation_id: u64, ) -> Self { let mut segment_id_to_del_opstamp = BTreeMap::new(); @@ -133,7 +133,7 @@ impl Searcher { pub fn doc_freq(&self, term: &Term) -> crate::Result { let mut total_doc_freq = 0; for segment_reader in &self.inner.segment_readers { - let inverted_index = segment_reader.inverted_index(term.field())?; + let inverted_index = segment_reader.as_ref().inverted_index(term.field())?; let doc_freq = inverted_index.doc_freq(term)?; total_doc_freq += u64::from(doc_freq); } @@ -146,7 +146,7 @@ impl Searcher { pub async fn doc_freq_async(&self, term: &Term) -> crate::Result { let mut total_doc_freq = 0; for segment_reader in &self.inner.segment_readers { - let inverted_index = segment_reader.inverted_index(term.field())?; + let inverted_index = segment_reader.as_ref().inverted_index(term.field())?; let doc_freq = inverted_index.doc_freq_async(term).await?; total_doc_freq += u64::from(doc_freq); } @@ -154,13 +154,13 @@ impl Searcher { } /// Return the list of segment readers - pub fn segment_readers(&self) -> &[SegmentReader] { + pub fn segment_readers(&self) -> &[ArcSegmentReader] { &self.inner.segment_readers } /// Returns the segment_reader associated with the given segment_ord - pub fn segment_reader(&self, segment_ord: u32) -> &SegmentReader { - &self.inner.segment_readers[segment_ord as usize] + pub fn segment_reader(&self, segment_ord: u32) -> &dyn SegmentReader { + self.inner.segment_readers[segment_ord as usize].as_ref() } /// Runs a query on the segment readers wrapped by the searcher. @@ -228,7 +228,11 @@ impl Searcher { let segment_readers = self.segment_readers(); let fruits = executor.map( |(segment_ord, segment_reader)| { - collector.collect_segment(weight.as_ref(), segment_ord as u32, segment_reader) + collector.collect_segment( + weight.as_ref(), + segment_ord as u32, + segment_reader.as_ref(), + ) }, segment_readers.iter().enumerate(), )?; @@ -258,7 +262,7 @@ impl From> for Searcher { pub(crate) struct SearcherInner { schema: Schema, index: Index, - segment_readers: Vec, + segment_readers: Vec, store_readers: Vec, generation: TrackedObject, } @@ -268,7 +272,7 @@ impl SearcherInner { pub(crate) fn new( schema: Schema, index: Index, - segment_readers: Vec, + segment_readers: Vec, generation: TrackedObject, doc_store_cache_num_blocks: usize, ) -> io::Result { @@ -300,7 +304,7 @@ impl fmt::Debug for Searcher { let segment_ids = self .segment_readers() .iter() - .map(SegmentReader::segment_id) + .map(|segment_reader| segment_reader.segment_id()) .collect::>(); write!(f, "Searcher({segment_ids:?})") } diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index db1ce0d77f..451b619409 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -96,7 +96,7 @@ mod tests { }; use crate::time::OffsetDateTime; use crate::tokenizer::{LowerCaser, RawTokenizer, TextAnalyzer, TokenizerManager}; - use crate::{Index, IndexWriter, SegmentReader}; + use crate::{Index, IndexWriter}; pub static SCHEMA: Lazy = Lazy::new(|| { let mut schema_builder = Schema::builder(); @@ -430,7 +430,7 @@ mod tests { .searcher() .segment_readers() .iter() - .map(SegmentReader::segment_id) + .map(|segment_reader| segment_reader.segment_id()) .collect(); assert_eq!(segment_ids.len(), 2); index_writer.merge(&segment_ids[..]).wait().unwrap(); diff --git a/src/index/index.rs b/src/index/index.rs index 5495ddcede..b00a59ed1f 100644 --- a/src/index/index.rs +++ b/src/index/index.rs @@ -24,7 +24,7 @@ use crate::reader::{IndexReader, IndexReaderBuilder}; use crate::schema::document::Document; use crate::schema::{Field, FieldType, Schema}; use crate::tokenizer::{TextAnalyzer, TokenizerManager}; -use crate::SegmentReader; +use crate::{SegmentReader, TantivySegmentReader}; fn load_metas( directory: &dyn Directory, @@ -492,7 +492,7 @@ impl Index { let segments = self.searchable_segments()?; let fields_metadata: Vec> = segments .into_iter() - .map(|segment| SegmentReader::open(&segment)?.fields_metadata()) + .map(|segment| TantivySegmentReader::open(&segment)?.fields_metadata()) .collect::>()?; Ok(merge_field_meta_data(fields_metadata)) } diff --git a/src/index/mod.rs b/src/index/mod.rs index 76dc3ed9b6..59d7e7a3cb 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -17,4 +17,6 @@ pub use self::inverted_index_reader::InvertedIndexReader; pub use self::segment::Segment; pub use self::segment_component::SegmentComponent; pub use self::segment_id::SegmentId; -pub use self::segment_reader::{FieldMetadata, SegmentReader}; +pub use self::segment_reader::{ + ArcSegmentReader, FieldMetadata, SegmentReader, TantivySegmentReader, +}; diff --git a/src/index/segment_reader.rs b/src/index/segment_reader.rs index f5589a6902..8f252f538c 100644 --- a/src/index/segment_reader.rs +++ b/src/index/segment_reader.rs @@ -9,7 +9,7 @@ use itertools::Itertools; use crate::directory::{CompositeFile, FileSlice}; use crate::error::DataCorruption; use crate::fastfield::{intersect_alive_bitsets, AliveBitSet, FacetReader, FastFieldReaders}; -use crate::fieldnorm::{FieldNormReader, FieldNormReaders}; +use crate::fieldnorm::FieldNormReaders; use crate::index::{InvertedIndexReader, Segment, SegmentComponent, SegmentId}; use crate::json_utils::json_path_sep_to_dot; use crate::schema::{Field, IndexRecordOption, Schema, Type}; @@ -18,82 +18,35 @@ use crate::store::StoreReader; use crate::termdict::TermDictionary; use crate::{DocId, Opstamp}; -/// Entry point to access all of the datastructures of the `Segment` +/// Abstraction over a segment reader for accessing all data structures of a segment. /// -/// - term dictionary -/// - postings -/// - store -/// - fast field readers -/// - field norm reader -/// -/// The segment reader has a very low memory footprint, -/// as close to all of the memory data is mmapped. -#[derive(Clone)] -pub struct SegmentReader { - inv_idx_reader_cache: Arc>>>, - - segment_id: SegmentId, - delete_opstamp: Option, - - max_doc: DocId, - num_docs: DocId, - - termdict_composite: CompositeFile, - postings_composite: CompositeFile, - positions_composite: CompositeFile, - fast_fields_readers: FastFieldReaders, - fieldnorm_readers: FieldNormReaders, +/// This trait exists to decouple the query layer from the concrete on-disk layout. Alternative +/// codecs can implement it to expose their own segment representation. +pub trait SegmentReader: Send + Sync { + /// Highest document id ever attributed in this segment + 1. + fn max_doc(&self) -> DocId; - store_file: FileSlice, - alive_bitset_opt: Option, - schema: Schema, -} - -impl SegmentReader { - /// Returns the highest document id ever attributed in - /// this segment + 1. - pub fn max_doc(&self) -> DocId { - self.max_doc - } - - /// Returns the number of alive documents. - /// Deleted documents are not counted. - pub fn num_docs(&self) -> DocId { - self.num_docs - } + /// Number of alive documents. Deleted documents are not counted. + fn num_docs(&self) -> DocId; /// Returns the schema of the index this segment belongs to. - pub fn schema(&self) -> &Schema { - &self.schema - } + fn schema(&self) -> &Schema; - /// Return the number of documents that have been - /// deleted in the segment. - pub fn num_deleted_docs(&self) -> DocId { - self.max_doc - self.num_docs + /// Return the number of documents that have been deleted in the segment. + fn num_deleted_docs(&self) -> DocId { + self.max_doc() - self.num_docs() } /// Returns true if some of the documents of the segment have been deleted. - pub fn has_deletes(&self) -> bool { + fn has_deletes(&self) -> bool { self.num_deleted_docs() > 0 } - /// Accessor to a segment's fast field reader given a field. - /// - /// Returns the u64 fast value reader if the field - /// is a u64 field indexed as "fast". - /// - /// Return a FastFieldNotAvailableError if the field is not - /// declared as a fast field in the schema. - /// - /// # Panics - /// May panic if the index is corrupted. - pub fn fast_fields(&self) -> &FastFieldReaders { - &self.fast_fields_readers - } + /// Accessor to a segment's fast field reader. + fn fast_fields(&self) -> &FastFieldReaders; /// Accessor to the `FacetReader` associated with a given `Field`. - pub fn facet_reader(&self, field_name: &str) -> crate::Result { + fn facet_reader(&self, field_name: &str) -> crate::Result { let schema = self.schema(); let field = schema.get_field(field_name)?; let field_entry = schema.get_field_entry(field); @@ -108,39 +61,84 @@ impl SegmentReader { Ok(FacetReader::new(facet_column)) } - /// Accessor to the segment's `Field norms`'s reader. - /// - /// Field norms are the length (in tokens) of the fields. - /// It is used in the computation of the [TfIdf](https://fulmicoton.gitbooks.io/tantivy-doc/content/tfidf.html). - /// - /// They are simply stored as a fast field, serialized in - /// the `.fieldnorm` file of the segment. - pub fn get_fieldnorms_reader(&self, field: Field) -> crate::Result { - self.fieldnorm_readers.get_field(field)?.ok_or_else(|| { - let field_name = self.schema.get_field_name(field); - let err_msg = format!( - "Field norm not found for field {field_name:?}. Was the field set to record norm \ - during indexing?" - ); - crate::TantivyError::SchemaError(err_msg) - }) - } + /// Accessor to the segment's field norms readers container. + fn fieldnorms_readers(&self) -> &FieldNormReaders; - #[doc(hidden)] - pub fn fieldnorms_readers(&self) -> &FieldNormReaders { - &self.fieldnorm_readers + /// Accessor to the segment's [`StoreReader`](crate::store::StoreReader). + fn get_store_reader(&self, cache_num_blocks: usize) -> io::Result; + + /// Returns a field reader associated with the field given in argument. + fn inverted_index(&self, field: Field) -> crate::Result>; + + /// Returns the list of fields that have been indexed in the segment. + fn fields_metadata(&self) -> crate::Result>; + + /// Returns the segment id + fn segment_id(&self) -> SegmentId; + + /// Returns the delete opstamp + fn delete_opstamp(&self) -> Option; + + /// Returns the bitset representing the alive `DocId`s. + fn alive_bitset(&self) -> Option<&AliveBitSet>; + + /// Returns true if the `doc` is marked as deleted. + fn is_deleted(&self, doc: DocId) -> bool { + self.alive_bitset() + .map(|alive_bitset| alive_bitset.is_deleted(doc)) + .unwrap_or(false) } - /// Accessor to the segment's [`StoreReader`](crate::store::StoreReader). - /// - /// `cache_num_blocks` sets the number of decompressed blocks to be cached in an LRU. - /// The size of blocks is configurable, this should be reflexted in the - pub fn get_store_reader(&self, cache_num_blocks: usize) -> io::Result { - StoreReader::open(self.store_file.clone(), cache_num_blocks) + /// Returns an iterator that will iterate over the alive document ids + fn doc_ids_alive(&self) -> Box + Send + '_> { + if let Some(alive_bitset) = &self.alive_bitset() { + Box::new(alive_bitset.iter_alive()) + } else { + Box::new(0u32..self.max_doc()) + } } + /// Summarize total space usage of this segment. + fn space_usage(&self) -> io::Result; +} + +/// Convenient alias for an atomically reference counted segment reader handle. +pub type ArcSegmentReader = Arc; + +/// Entry point to access all of the datastructures of the `Segment` +/// +/// - term dictionary +/// - postings +/// - store +/// - fast field readers +/// - field norm reader +/// +/// The segment reader has a very low memory footprint, +/// as close to all of the memory data is mmapped. +#[derive(Clone)] +pub struct TantivySegmentReader { + inv_idx_reader_cache: Arc>>>, + + segment_id: SegmentId, + delete_opstamp: Option, + + max_doc: DocId, + num_docs: DocId, + + termdict_composite: CompositeFile, + postings_composite: CompositeFile, + positions_composite: CompositeFile, + fast_fields_readers: FastFieldReaders, + fieldnorm_readers: FieldNormReaders, + + store_file: FileSlice, + alive_bitset_opt: Option, + schema: Schema, +} + +impl TantivySegmentReader { /// Open a new segment for reading. - pub fn open(segment: &Segment) -> crate::Result { + pub fn open(segment: &Segment) -> crate::Result { Self::open_with_custom_alive_set(segment, None) } @@ -148,7 +146,7 @@ impl SegmentReader { pub fn open_with_custom_alive_set( segment: &Segment, custom_bitset: Option, - ) -> crate::Result { + ) -> crate::Result { let termdict_file = segment.open_read(SegmentComponent::Terms)?; let termdict_composite = CompositeFile::open(&termdict_file)?; @@ -190,7 +188,7 @@ impl SegmentReader { .map(|alive_bitset| alive_bitset.num_alive_docs() as u32) .unwrap_or(max_doc); - Ok(SegmentReader { + Ok(TantivySegmentReader { inv_idx_reader_cache: Default::default(), num_docs, max_doc, @@ -206,6 +204,52 @@ impl SegmentReader { schema, }) } +} + +impl SegmentReader for TantivySegmentReader { + /// Returns the highest document id ever attributed in + /// this segment + 1. + fn max_doc(&self) -> DocId { + self.max_doc + } + + /// Returns the number of alive documents. + /// Deleted documents are not counted. + fn num_docs(&self) -> DocId { + self.num_docs + } + + /// Returns the schema of the index this segment belongs to. + fn schema(&self) -> &Schema { + &self.schema + } + + /// Accessor to a segment's fast field reader given a field. + /// + /// Returns the u64 fast value reader if the field + /// is a u64 field indexed as "fast". + /// + /// Return a FastFieldNotAvailableError if the field is not + /// declared as a fast field in the schema. + /// + /// # Panics + /// May panic if the index is corrupted. + fn fast_fields(&self) -> &FastFieldReaders { + &self.fast_fields_readers + } + + #[doc(hidden)] + fn fieldnorms_readers(&self) -> &FieldNormReaders { + &self.fieldnorm_readers + } + + /// Accessor to the segment's [`StoreReader`](crate::store::StoreReader). + /// + /// `cache_num_blocks` sets the number of decompressed blocks to be cached in an LRU. + /// The size of blocks is configurable, this should be reflexted in the + fn get_store_reader(&self, cache_num_blocks: usize) -> io::Result { + StoreReader::open(self.store_file.clone(), cache_num_blocks) + } /// Returns a field reader associated with the field given in argument. /// If the field was not present in the index during indexing time, @@ -219,7 +263,7 @@ impl SegmentReader { /// is returned. /// Similarly, if the field is marked as indexed but no term has been indexed for the given /// index, an empty `InvertedIndexReader` is returned (but no warning is logged). - pub fn inverted_index(&self, field: Field) -> crate::Result> { + fn inverted_index(&self, field: Field) -> crate::Result> { if let Some(inv_idx_reader) = self .inv_idx_reader_cache .read() @@ -298,7 +342,7 @@ impl SegmentReader { /// Disclaimer: Some fields may not be listed here. For instance, if the schema contains a json /// field that is not indexed nor a fast field but is stored, it is possible for the field /// to not be listed. - pub fn fields_metadata(&self) -> crate::Result> { + fn fields_metadata(&self) -> crate::Result> { let mut indexed_fields: Vec = Vec::new(); let mut map_to_canonical = FnvHashMap::default(); for (field, field_entry) in self.schema().fields() { @@ -420,39 +464,22 @@ impl SegmentReader { } /// Returns the segment id - pub fn segment_id(&self) -> SegmentId { + fn segment_id(&self) -> SegmentId { self.segment_id } /// Returns the delete opstamp - pub fn delete_opstamp(&self) -> Option { + fn delete_opstamp(&self) -> Option { self.delete_opstamp } /// Returns the bitset representing the alive `DocId`s. - pub fn alive_bitset(&self) -> Option<&AliveBitSet> { + fn alive_bitset(&self) -> Option<&AliveBitSet> { self.alive_bitset_opt.as_ref() } - /// Returns true if the `doc` is marked - /// as deleted. - pub fn is_deleted(&self, doc: DocId) -> bool { - self.alive_bitset() - .map(|alive_bitset| alive_bitset.is_deleted(doc)) - .unwrap_or(false) - } - - /// Returns an iterator that will iterate over the alive document ids - pub fn doc_ids_alive(&self) -> Box + Send + '_> { - if let Some(alive_bitset) = &self.alive_bitset_opt { - Box::new(alive_bitset.iter_alive()) - } else { - Box::new(0u32..self.max_doc) - } - } - /// Summarize total space usage of this segment. - pub fn space_usage(&self) -> io::Result { + fn space_usage(&self) -> io::Result { Ok(SegmentSpaceUsage::new( self.num_docs(), self.termdict_composite.space_usage(), @@ -576,7 +603,7 @@ fn intersect_alive_bitset( } } -impl fmt::Debug for SegmentReader { +impl fmt::Debug for TantivySegmentReader { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "SegmentReader({:?})", self.segment_id) } diff --git a/src/indexer/delete_queue.rs b/src/indexer/delete_queue.rs index 3aa9f0d858..d6b6b17ed3 100644 --- a/src/indexer/delete_queue.rs +++ b/src/indexer/delete_queue.rs @@ -250,11 +250,15 @@ mod tests { struct DummyWeight; impl Weight for DummyWeight { - fn scorer(&self, _reader: &SegmentReader, _boost: Score) -> crate::Result> { + fn scorer( + &self, + _reader: &dyn SegmentReader, + _boost: Score, + ) -> crate::Result> { Err(crate::TantivyError::InternalError("dummy impl".to_owned())) } - fn explain(&self, _reader: &SegmentReader, _doc: DocId) -> crate::Result { + fn explain(&self, _reader: &dyn SegmentReader, _doc: DocId) -> crate::Result { Err(crate::TantivyError::InternalError("dummy impl".to_owned())) } } diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 7f993027e2..e43965a62b 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -12,7 +12,9 @@ use super::{AddBatch, AddBatchReceiver, AddBatchSender, PreparedCommit}; use crate::directory::{DirectoryLock, GarbageCollectionResult, TerminatingWrite}; use crate::error::TantivyError; use crate::fastfield::write_alive_bitset; -use crate::index::{Index, Segment, SegmentComponent, SegmentId, SegmentMeta, SegmentReader}; +use crate::index::{ + Index, Segment, SegmentComponent, SegmentId, SegmentMeta, SegmentReader, TantivySegmentReader, +}; use crate::indexer::delete_queue::{DeleteCursor, DeleteQueue}; use crate::indexer::doc_opstamp_mapping::DocToOpstampMapping; use crate::indexer::index_writer_status::IndexWriterStatus; @@ -94,7 +96,7 @@ pub struct IndexWriter { fn compute_deleted_bitset( alive_bitset: &mut BitSet, - segment_reader: &SegmentReader, + segment_reader: &dyn SegmentReader, delete_cursor: &mut DeleteCursor, doc_opstamps: &DocToOpstampMapping, target_opstamp: Opstamp, @@ -143,7 +145,7 @@ pub(crate) fn advance_deletes( return Ok(()); } - let segment_reader = SegmentReader::open(&segment)?; + let segment_reader = TantivySegmentReader::open(&segment)?; let max_doc = segment_reader.max_doc(); let mut alive_bitset: BitSet = match segment_entry.alive_bitset() { @@ -243,7 +245,7 @@ fn apply_deletes( .max() .expect("Empty DocOpstamp is forbidden"); - let segment_reader = SegmentReader::open(segment)?; + let segment_reader = TantivySegmentReader::open(segment)?; let doc_to_opstamps = DocToOpstampMapping::WithMap(doc_opstamps); let max_doc = segment.meta().max_doc(); diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 1af64607bd..91c4a6a808 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -12,7 +12,7 @@ use crate::docset::{DocSet, TERMINATED}; use crate::error::DataCorruption; use crate::fastfield::AliveBitSet; use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter}; -use crate::index::{Segment, SegmentComponent, SegmentReader}; +use crate::index::{Segment, SegmentComponent, SegmentReader, TantivySegmentReader}; use crate::indexer::doc_id_mapping::{MappingType, SegmentDocIdMapping}; use crate::indexer::SegmentSerializer; use crate::postings::{InvertedIndexSerializer, Postings, SegmentPostings}; @@ -27,7 +27,7 @@ use crate::{DocAddress, DocId, InvertedIndexReader}; pub const MAX_DOC_LIMIT: u32 = 1 << 31; fn estimate_total_num_tokens_in_single_segment( - reader: &SegmentReader, + reader: &dyn SegmentReader, field: Field, ) -> crate::Result { // There are no deletes. We can simply use the exact value saved into the posting list. @@ -68,7 +68,7 @@ fn estimate_total_num_tokens_in_single_segment( Ok((segment_num_tokens as f64 * ratio) as u64) } -fn estimate_total_num_tokens(readers: &[SegmentReader], field: Field) -> crate::Result { +fn estimate_total_num_tokens(readers: &[TantivySegmentReader], field: Field) -> crate::Result { let mut total_num_tokens: u64 = 0; for reader in readers { total_num_tokens += estimate_total_num_tokens_in_single_segment(reader, field)?; @@ -78,7 +78,7 @@ fn estimate_total_num_tokens(readers: &[SegmentReader], field: Field) -> crate:: pub struct IndexMerger { schema: Schema, - pub(crate) readers: Vec, + pub(crate) readers: Vec, max_doc: u32, } @@ -170,8 +170,10 @@ impl IndexMerger { let mut readers = vec![]; for (segment, new_alive_bitset_opt) in segments.iter().zip(alive_bitset_opt) { if segment.meta().num_docs() > 0 { - let reader = - SegmentReader::open_with_custom_alive_set(segment, new_alive_bitset_opt)?; + let reader = TantivySegmentReader::open_with_custom_alive_set( + segment, + new_alive_bitset_opt, + )?; readers.push(reader); } } @@ -204,8 +206,20 @@ impl IndexMerger { let fieldnorms_readers: Vec = self .readers .iter() - .map(|reader| reader.get_fieldnorms_reader(field)) - .collect::>()?; + .map(|reader| { + reader + .fieldnorms_readers() + .get_field(field)? + .ok_or_else(|| { + let field_name = self.schema.get_field_name(field); + let err_msg = format!( + "Field norm not found for field {field_name:?}. Was the field set \ + to record norm during indexing?" + ); + crate::TantivyError::SchemaError(err_msg) + }) + }) + .collect::>()?; for old_doc_addr in doc_id_mapping.iter_old_doc_addrs() { let fieldnorms_reader = &fieldnorms_readers[old_doc_addr.segment_ord as usize]; let fieldnorm_id = fieldnorms_reader.fieldnorm_id(old_doc_addr.doc_id); @@ -262,7 +276,7 @@ impl IndexMerger { }), ); - let has_deletes: bool = self.readers.iter().any(SegmentReader::has_deletes); + let has_deletes: bool = self.readers.iter().any(|reader| reader.has_deletes()); let mapping_type = if has_deletes { MappingType::StackedWithDeletes } else { @@ -1533,7 +1547,7 @@ mod tests { for segment_reader in searcher.segment_readers() { let mut term_scorer = term_query .specialized_weight(EnableScoring::enabled_from_searcher(&searcher))? - .specialized_scorer(segment_reader, 1.0)?; + .specialized_scorer(segment_reader.as_ref(), 1.0)?; // the difference compared to before is intrinsic to the bm25 formula. no worries // there. for doc in segment_reader.doc_ids_alive() { diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index b72667dedb..2ffc65a96c 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -710,7 +710,7 @@ mod tests { use crate::indexer::segment_updater::merge_filtered_segments; use crate::query::QueryParser; use crate::schema::*; - use crate::{Directory, DocAddress, Index, Segment}; + use crate::{Directory, DocAddress, Index, Segment, SegmentReader}; #[test] fn test_delete_during_merge() -> crate::Result<()> { diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index ad2ffa0676..5bf55bd5b1 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -871,7 +871,7 @@ mod tests { let searcher = reader.searcher(); let segment_reader = searcher.segment_reader(0u32); - fn assert_type(reader: &SegmentReader, field: &str, typ: ColumnType) { + fn assert_type(reader: &dyn SegmentReader, field: &str, typ: ColumnType) { let cols = reader.fast_fields().dynamic_column_handles(field).unwrap(); assert_eq!(cols.len(), 1, "{field}"); assert_eq!(cols[0].column_type(), typ, "{field}"); @@ -890,7 +890,7 @@ mod tests { assert_type(segment_reader, "json.my_arr", ColumnType::I64); assert_type(segment_reader, "json.my_arr.my_key", ColumnType::Str); - fn assert_empty(reader: &SegmentReader, field: &str) { + fn assert_empty(reader: &dyn SegmentReader, field: &str) { let cols = reader.fast_fields().dynamic_column_handles(field).unwrap(); assert_eq!(cols.len(), 0); } diff --git a/src/lib.rs b/src/lib.rs index c8d15552ed..a69bd0c846 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -221,8 +221,8 @@ pub use crate::core::json_utils; pub use crate::core::{Executor, Searcher, SearcherGeneration}; pub use crate::directory::Directory; pub use crate::index::{ - Index, IndexBuilder, IndexMeta, IndexSettings, InvertedIndexReader, Order, Segment, - SegmentMeta, SegmentReader, + ArcSegmentReader, Index, IndexBuilder, IndexMeta, IndexSettings, InvertedIndexReader, Order, + Segment, SegmentMeta, SegmentReader, TantivySegmentReader, }; pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter}; pub use crate::schema::{Document, TantivyDocument, Term}; @@ -520,11 +520,11 @@ pub mod tests { let searcher = index_reader.searcher(); let reader = searcher.segment_reader(0); { - let fieldnorm_reader = reader.get_fieldnorms_reader(text_field)?; + let fieldnorm_reader = reader.fieldnorms_readers().get_field(text_field)?.unwrap(); assert_eq!(fieldnorm_reader.fieldnorm(0), 3); } { - let fieldnorm_reader = reader.get_fieldnorms_reader(title_field)?; + let fieldnorm_reader = reader.fieldnorms_readers().get_field(title_field)?.unwrap(); assert_eq!(fieldnorm_reader.fieldnorm_id(0), 0); } Ok(()) @@ -542,15 +542,18 @@ pub mod tests { index_writer.commit()?; let reader = index.reader()?; let searcher = reader.searcher(); - let segment_reader: &SegmentReader = searcher.segment_reader(0); - let fieldnorms_reader = segment_reader.get_fieldnorms_reader(text_field)?; + let segment_reader: &dyn SegmentReader = searcher.segment_reader(0); + let fieldnorms_reader = segment_reader + .fieldnorms_readers() + .get_field(text_field)? + .unwrap(); assert_eq!(fieldnorms_reader.fieldnorm(0), 3); assert_eq!(fieldnorms_reader.fieldnorm(1), 0); assert_eq!(fieldnorms_reader.fieldnorm(2), 2); Ok(()) } - fn advance_undeleted(docset: &mut dyn DocSet, reader: &SegmentReader) -> bool { + fn advance_undeleted(docset: &mut dyn DocSet, reader: &dyn SegmentReader) -> bool { let mut doc = docset.advance(); while doc != TERMINATED { if !reader.is_deleted(doc) { @@ -1067,7 +1070,7 @@ pub mod tests { } let reader = index.reader()?; let searcher = reader.searcher(); - let segment_reader: &SegmentReader = searcher.segment_reader(0); + let segment_reader: &dyn SegmentReader = searcher.segment_reader(0); { let fast_field_reader_res = segment_reader.fast_fields().u64("text"); assert!(fast_field_reader_res.is_err()); diff --git a/src/postings/mod.rs b/src/postings/mod.rs index efc0e069dc..b77dd37919 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -46,7 +46,7 @@ pub(crate) mod tests { use super::{InvertedIndexSerializer, Postings}; use crate::docset::{DocSet, TERMINATED}; use crate::fieldnorm::FieldNormReader; - use crate::index::{Index, SegmentComponent, SegmentReader}; + use crate::index::{Index, SegmentComponent, TantivySegmentReader}; use crate::indexer::operation::AddOperation; use crate::indexer::SegmentWriter; use crate::query::Scorer; @@ -54,7 +54,7 @@ pub(crate) mod tests { Field, IndexRecordOption, Schema, Term, TextFieldIndexing, TextOptions, INDEXED, TEXT, }; use crate::tokenizer::{SimpleTokenizer, MAX_TOKEN_LEN}; - use crate::{DocId, HasLen, IndexWriter, Score}; + use crate::{DocId, HasLen, IndexWriter, Score, SegmentReader}; #[test] pub fn test_position_write() -> crate::Result<()> { @@ -258,9 +258,12 @@ pub(crate) mod tests { segment_writer.finalize()?; } { - let segment_reader = SegmentReader::open(&segment)?; + let segment_reader = TantivySegmentReader::open(&segment)?; { - let fieldnorm_reader = segment_reader.get_fieldnorms_reader(text_field)?; + let fieldnorm_reader = segment_reader + .fieldnorms_readers() + .get_field(text_field)? + .unwrap(); assert_eq!(fieldnorm_reader.fieldnorm(0), 8 + 5); assert_eq!(fieldnorm_reader.fieldnorm(1), 2); for i in 2..1000 { diff --git a/src/query/all_query.rs b/src/query/all_query.rs index 11172f9ed3..2b5853f25d 100644 --- a/src/query/all_query.rs +++ b/src/query/all_query.rs @@ -21,12 +21,12 @@ impl Query for AllQuery { pub struct AllWeight; impl Weight for AllWeight { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { + fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result> { let all_scorer = AllScorer::new(reader.max_doc()); Ok(Box::new(BoostScorer::new(all_scorer, boost))) } - fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result { + fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result { if doc >= reader.max_doc() { return Err(does_not_match(doc)); } diff --git a/src/query/automaton_weight.rs b/src/query/automaton_weight.rs index 5f1053fb67..90aebf11e7 100644 --- a/src/query/automaton_weight.rs +++ b/src/query/automaton_weight.rs @@ -67,7 +67,7 @@ where } /// Returns the term infos that match the automaton - pub fn get_match_term_infos(&self, reader: &SegmentReader) -> crate::Result> { + pub fn get_match_term_infos(&self, reader: &dyn SegmentReader) -> crate::Result> { let inverted_index = reader.inverted_index(self.field)?; let term_dict = inverted_index.terms(); let mut term_stream = self.automaton_stream(term_dict)?; @@ -84,7 +84,7 @@ where A: Automaton + Send + Sync + 'static, A::State: Clone, { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { + fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result> { let max_doc = reader.max_doc(); let mut doc_bitset = BitSet::with_max_value(max_doc); let inverted_index = reader.inverted_index(self.field)?; @@ -110,7 +110,7 @@ where Ok(Box::new(const_scorer)) } - fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result { + fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result { let mut scorer = self.scorer(reader, 1.0)?; if scorer.seek(doc) == doc { Ok(Explanation::new("AutomatonScorer", 1.0)) diff --git a/src/query/boolean_query/boolean_weight.rs b/src/query/boolean_query/boolean_weight.rs index 5dbd5ea44f..a433f71ab2 100644 --- a/src/query/boolean_query/boolean_weight.rs +++ b/src/query/boolean_query/boolean_weight.rs @@ -137,7 +137,7 @@ impl BooleanWeight { fn per_occur_scorers( &self, - reader: &SegmentReader, + reader: &dyn SegmentReader, boost: Score, ) -> crate::Result>>> { let mut per_occur_scorers: HashMap>> = HashMap::new(); @@ -153,7 +153,7 @@ impl BooleanWeight { fn complex_scorer( &self, - reader: &SegmentReader, + reader: &dyn SegmentReader, boost: Score, score_combiner_fn: impl Fn() -> TComplexScoreCombiner, ) -> crate::Result { @@ -258,7 +258,7 @@ impl BooleanWeight { } impl Weight for BooleanWeight { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { + fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result> { let num_docs = reader.num_docs(); if self.weights.is_empty() { Ok(Box::new(EmptyScorer)) @@ -282,7 +282,7 @@ impl Weight for BooleanWeight crate::Result { + fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result { let mut scorer = self.scorer(reader, 1.0)?; if scorer.seek(doc) != doc { return Err(does_not_match(doc)); @@ -304,7 +304,7 @@ impl Weight for BooleanWeight crate::Result<()> { let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?; @@ -326,7 +326,7 @@ impl Weight for BooleanWeight crate::Result<()> { let scorer = self.complex_scorer(reader, 1.0, || DoNothingCombiner)?; @@ -361,7 +361,7 @@ impl Weight for BooleanWeight Score, ) -> crate::Result<()> { let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?; diff --git a/src/query/boost_query.rs b/src/query/boost_query.rs index 06678287f3..71e22ece49 100644 --- a/src/query/boost_query.rs +++ b/src/query/boost_query.rs @@ -67,11 +67,11 @@ impl BoostWeight { } impl Weight for BoostWeight { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { + fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result> { self.weight.scorer(reader, boost * self.boost) } - fn explain(&self, reader: &SegmentReader, doc: u32) -> crate::Result { + fn explain(&self, reader: &dyn SegmentReader, doc: u32) -> crate::Result { let underlying_explanation = self.weight.explain(reader, doc)?; let score = underlying_explanation.value() * self.boost; let mut explanation = @@ -80,7 +80,7 @@ impl Weight for BoostWeight { Ok(explanation) } - fn count(&self, reader: &SegmentReader) -> crate::Result { + fn count(&self, reader: &dyn SegmentReader) -> crate::Result { self.weight.count(reader) } } diff --git a/src/query/const_score_query.rs b/src/query/const_score_query.rs index 570c7fecae..b6667aa838 100644 --- a/src/query/const_score_query.rs +++ b/src/query/const_score_query.rs @@ -63,12 +63,12 @@ impl ConstWeight { } impl Weight for ConstWeight { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { + fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result> { let inner_scorer = self.weight.scorer(reader, boost)?; Ok(Box::new(ConstScorer::new(inner_scorer, boost * self.score))) } - fn explain(&self, reader: &SegmentReader, doc: u32) -> crate::Result { + fn explain(&self, reader: &dyn SegmentReader, doc: u32) -> crate::Result { let mut scorer = self.scorer(reader, 1.0)?; if scorer.seek(doc) != doc { return Err(TantivyError::InvalidArgument(format!( @@ -81,7 +81,7 @@ impl Weight for ConstWeight { Ok(explanation) } - fn count(&self, reader: &SegmentReader) -> crate::Result { + fn count(&self, reader: &dyn SegmentReader) -> crate::Result { self.weight.count(reader) } } diff --git a/src/query/empty_query.rs b/src/query/empty_query.rs index 86ff84c08c..ddf973b8c9 100644 --- a/src/query/empty_query.rs +++ b/src/query/empty_query.rs @@ -26,11 +26,11 @@ impl Query for EmptyQuery { /// It is useful for tests and handling edge cases. pub struct EmptyWeight; impl Weight for EmptyWeight { - fn scorer(&self, _reader: &SegmentReader, _boost: Score) -> crate::Result> { + fn scorer(&self, _reader: &dyn SegmentReader, _boost: Score) -> crate::Result> { Ok(Box::new(EmptyScorer)) } - fn explain(&self, _reader: &SegmentReader, doc: DocId) -> crate::Result { + fn explain(&self, _reader: &dyn SegmentReader, doc: DocId) -> crate::Result { Err(does_not_match(doc)) } } diff --git a/src/query/exist_query.rs b/src/query/exist_query.rs index f97e9e5c7c..aaad08abdc 100644 --- a/src/query/exist_query.rs +++ b/src/query/exist_query.rs @@ -98,7 +98,7 @@ pub struct ExistsWeight { } impl Weight for ExistsWeight { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { + fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result> { let fast_field_reader = reader.fast_fields(); let mut column_handles = fast_field_reader.dynamic_column_handles(&self.field_name)?; if self.field_type == Type::Json && self.json_subpaths { @@ -161,7 +161,7 @@ impl Weight for ExistsWeight { Ok(Box::new(ConstScorer::new(docset, boost))) } - fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result { + fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result { let mut scorer = self.scorer(reader, 1.0)?; if scorer.seek(doc) != doc { return Err(does_not_match(doc)); diff --git a/src/query/phrase_prefix_query/phrase_prefix_weight.rs b/src/query/phrase_prefix_query/phrase_prefix_weight.rs index 546eb89e8e..be9a8a5f8b 100644 --- a/src/query/phrase_prefix_query/phrase_prefix_weight.rs +++ b/src/query/phrase_prefix_query/phrase_prefix_weight.rs @@ -32,7 +32,7 @@ impl PhrasePrefixWeight { } } - fn fieldnorm_reader(&self, reader: &SegmentReader) -> crate::Result { + fn fieldnorm_reader(&self, reader: &dyn SegmentReader) -> crate::Result { let field = self.phrase_terms[0].1.field(); if self.similarity_weight_opt.is_some() { if let Some(fieldnorm_reader) = reader.fieldnorms_readers().get_field(field)? { @@ -44,7 +44,7 @@ impl PhrasePrefixWeight { pub(crate) fn phrase_scorer( &self, - reader: &SegmentReader, + reader: &dyn SegmentReader, boost: Score, ) -> crate::Result>> { let similarity_weight_opt = self @@ -114,7 +114,7 @@ impl PhrasePrefixWeight { } impl Weight for PhrasePrefixWeight { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { + fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result> { if let Some(scorer) = self.phrase_scorer(reader, boost)? { Ok(Box::new(scorer)) } else { @@ -122,7 +122,7 @@ impl Weight for PhrasePrefixWeight { } } - fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result { + fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result { let scorer_opt = self.phrase_scorer(reader, 1.0)?; if scorer_opt.is_none() { return Err(does_not_match(doc)); diff --git a/src/query/phrase_query/phrase_weight.rs b/src/query/phrase_query/phrase_weight.rs index 4118f79f6c..b0cdec60c5 100644 --- a/src/query/phrase_query/phrase_weight.rs +++ b/src/query/phrase_query/phrase_weight.rs @@ -29,7 +29,7 @@ impl PhraseWeight { } } - fn fieldnorm_reader(&self, reader: &SegmentReader) -> crate::Result { + fn fieldnorm_reader(&self, reader: &dyn SegmentReader) -> crate::Result { let field = self.phrase_terms[0].1.field(); if self.similarity_weight_opt.is_some() { if let Some(fieldnorm_reader) = reader.fieldnorms_readers().get_field(field)? { @@ -41,7 +41,7 @@ impl PhraseWeight { pub(crate) fn phrase_scorer( &self, - reader: &SegmentReader, + reader: &dyn SegmentReader, boost: Score, ) -> crate::Result>> { let similarity_weight_opt = self @@ -74,7 +74,7 @@ impl PhraseWeight { } impl Weight for PhraseWeight { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { + fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result> { if let Some(scorer) = self.phrase_scorer(reader, boost)? { Ok(Box::new(scorer)) } else { @@ -82,7 +82,7 @@ impl Weight for PhraseWeight { } } - fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result { + fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result { let scorer_opt = self.phrase_scorer(reader, 1.0)?; if scorer_opt.is_none() { return Err(does_not_match(doc)); diff --git a/src/query/phrase_query/regex_phrase_weight.rs b/src/query/phrase_query/regex_phrase_weight.rs index 4e850d2e29..8e246dec40 100644 --- a/src/query/phrase_query/regex_phrase_weight.rs +++ b/src/query/phrase_query/regex_phrase_weight.rs @@ -45,7 +45,7 @@ impl RegexPhraseWeight { } } - fn fieldnorm_reader(&self, reader: &SegmentReader) -> crate::Result { + fn fieldnorm_reader(&self, reader: &dyn SegmentReader) -> crate::Result { if self.similarity_weight_opt.is_some() { if let Some(fieldnorm_reader) = reader.fieldnorms_readers().get_field(self.field)? { return Ok(fieldnorm_reader); @@ -56,7 +56,7 @@ impl RegexPhraseWeight { pub(crate) fn phrase_scorer( &self, - reader: &SegmentReader, + reader: &dyn SegmentReader, boost: Score, ) -> crate::Result>> { let similarity_weight_opt = self @@ -174,7 +174,7 @@ impl RegexPhraseWeight { /// Use Roaring Bitmaps for sparse terms. The full bitvec is main memory consumer currently. pub(crate) fn get_union_from_term_infos( term_infos: &[TermInfo], - reader: &SegmentReader, + reader: &dyn SegmentReader, inverted_index: &InvertedIndexReader, ) -> crate::Result { let max_doc = reader.max_doc(); @@ -269,7 +269,7 @@ impl RegexPhraseWeight { } impl Weight for RegexPhraseWeight { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { + fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result> { if let Some(scorer) = self.phrase_scorer(reader, boost)? { Ok(Box::new(scorer)) } else { @@ -277,7 +277,7 @@ impl Weight for RegexPhraseWeight { } } - fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result { + fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result { let scorer_opt = self.phrase_scorer(reader, 1.0)?; if scorer_opt.is_none() { return Err(does_not_match(doc)); diff --git a/src/query/query.rs b/src/query/query.rs index 32f74536fe..476887d246 100644 --- a/src/query/query.rs +++ b/src/query/query.rs @@ -146,7 +146,7 @@ pub trait Query: QueryClone + Send + Sync + downcast_rs::Downcast + fmt::Debug { let weight = self.weight(EnableScoring::disabled_from_searcher(searcher))?; let mut result = 0; for reader in searcher.segment_readers() { - result += weight.count(reader)? as usize; + result += weight.count(reader.as_ref())? as usize; } Ok(result) } diff --git a/src/query/range_query/range_query.rs b/src/query/range_query/range_query.rs index 5035c43f17..8ead853105 100644 --- a/src/query/range_query/range_query.rs +++ b/src/query/range_query/range_query.rs @@ -212,7 +212,7 @@ impl InvertedIndexRangeWeight { } impl Weight for InvertedIndexRangeWeight { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { + fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result> { let max_doc = reader.max_doc(); let mut doc_bitset = BitSet::with_max_value(max_doc); @@ -245,7 +245,7 @@ impl Weight for InvertedIndexRangeWeight { Ok(Box::new(ConstScorer::new(doc_bitset, boost))) } - fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result { + fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result { let mut scorer = self.scorer(reader, 1.0)?; if scorer.seek(doc) != doc { return Err(does_not_match(doc)); diff --git a/src/query/range_query/range_query_fastfield.rs b/src/query/range_query/range_query_fastfield.rs index b17694cfaf..2c419080f1 100644 --- a/src/query/range_query/range_query_fastfield.rs +++ b/src/query/range_query/range_query_fastfield.rs @@ -52,7 +52,7 @@ impl FastFieldRangeWeight { } impl Weight for FastFieldRangeWeight { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { + fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result> { // Check if both bounds are Bound::Unbounded if self.bounds.is_unbounded() { return Ok(Box::new(AllScorer::new(reader.max_doc()))); @@ -219,7 +219,7 @@ impl Weight for FastFieldRangeWeight { } } - fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result { + fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result { let mut scorer = self.scorer(reader, 1.0)?; if scorer.seek(doc) != doc { return Err(TantivyError::InvalidArgument(format!( @@ -236,7 +236,7 @@ impl Weight for FastFieldRangeWeight { /// /// Convert into fast field value space and search. fn search_on_json_numerical_field( - reader: &SegmentReader, + reader: &dyn SegmentReader, field_name: &str, typ: Type, bounds: BoundsRange>>, diff --git a/src/query/term_query/term_scorer.rs b/src/query/term_query/term_scorer.rs index 09f7502b98..bcde9d8fa8 100644 --- a/src/query/term_query/term_scorer.rs +++ b/src/query/term_query/term_scorer.rs @@ -259,7 +259,7 @@ mod tests { let mut block_max_scores_b = vec![]; let mut docs = vec![]; { - let mut term_scorer = term_weight.specialized_scorer(reader, 1.0)?; + let mut term_scorer = term_weight.specialized_scorer(reader.as_ref(), 1.0)?; while term_scorer.doc() != TERMINATED { let mut score = term_scorer.score(); docs.push(term_scorer.doc()); @@ -273,7 +273,7 @@ mod tests { } } { - let mut term_scorer = term_weight.specialized_scorer(reader, 1.0)?; + let mut term_scorer = term_weight.specialized_scorer(reader.as_ref(), 1.0)?; for d in docs { term_scorer.seek_block(d); block_max_scores_b.push(term_scorer.block_max_score()); diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs index a70c8ce8fa..43a19b6990 100644 --- a/src/query/term_query/term_weight.rs +++ b/src/query/term_query/term_weight.rs @@ -18,12 +18,12 @@ pub struct TermWeight { } impl Weight for TermWeight { - fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result> { + fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result> { let term_scorer = self.specialized_scorer(reader, boost)?; Ok(Box::new(term_scorer)) } - fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result { + fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result { let mut scorer = self.specialized_scorer(reader, 1.0)?; if scorer.doc() > doc || scorer.seek(doc) != doc { return Err(does_not_match(doc)); @@ -33,7 +33,7 @@ impl Weight for TermWeight { Ok(explanation) } - fn count(&self, reader: &SegmentReader) -> crate::Result { + fn count(&self, reader: &dyn SegmentReader) -> crate::Result { if let Some(alive_bitset) = reader.alive_bitset() { Ok(self.scorer(reader, 1.0)?.count(alive_bitset)) } else { @@ -48,7 +48,7 @@ impl Weight for TermWeight { /// `DocSet` and push the scored documents to the collector. fn for_each( &self, - reader: &SegmentReader, + reader: &dyn SegmentReader, callback: &mut dyn FnMut(DocId, Score), ) -> crate::Result<()> { let mut scorer = self.specialized_scorer(reader, 1.0)?; @@ -60,7 +60,7 @@ impl Weight for TermWeight { /// `DocSet` and push the scored documents to the collector. fn for_each_no_score( &self, - reader: &SegmentReader, + reader: &dyn SegmentReader, callback: &mut dyn FnMut(&[DocId]), ) -> crate::Result<()> { let mut scorer = self.specialized_scorer(reader, 1.0)?; @@ -82,7 +82,7 @@ impl Weight for TermWeight { fn for_each_pruning( &self, threshold: Score, - reader: &SegmentReader, + reader: &dyn SegmentReader, callback: &mut dyn FnMut(DocId, Score) -> Score, ) -> crate::Result<()> { let scorer = self.specialized_scorer(reader, 1.0)?; @@ -112,7 +112,7 @@ impl TermWeight { pub(crate) fn specialized_scorer( &self, - reader: &SegmentReader, + reader: &dyn SegmentReader, boost: Score, ) -> crate::Result { let field = self.term.field(); diff --git a/src/query/weight.rs b/src/query/weight.rs index 23ff55c046..a457ba3b87 100644 --- a/src/query/weight.rs +++ b/src/query/weight.rs @@ -69,13 +69,13 @@ pub trait Weight: Send + Sync + 'static { /// `boost` is a multiplier to apply to the score. /// /// See [`Query`](crate::query::Query). - fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result>; + fn scorer(&self, reader: &dyn SegmentReader, boost: Score) -> crate::Result>; /// Returns an [`Explanation`] for the given document. - fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result; + fn explain(&self, reader: &dyn SegmentReader, doc: DocId) -> crate::Result; /// Returns the number documents within the given [`SegmentReader`]. - fn count(&self, reader: &SegmentReader) -> crate::Result { + fn count(&self, reader: &dyn SegmentReader) -> crate::Result { let mut scorer = self.scorer(reader, 1.0)?; if let Some(alive_bitset) = reader.alive_bitset() { Ok(scorer.count(alive_bitset)) @@ -88,7 +88,7 @@ pub trait Weight: Send + Sync + 'static { /// `DocSet` and push the scored documents to the collector. fn for_each( &self, - reader: &SegmentReader, + reader: &dyn SegmentReader, callback: &mut dyn FnMut(DocId, Score), ) -> crate::Result<()> { let mut scorer = self.scorer(reader, 1.0)?; @@ -100,7 +100,7 @@ pub trait Weight: Send + Sync + 'static { /// `DocSet` and push the scored documents to the collector. fn for_each_no_score( &self, - reader: &SegmentReader, + reader: &dyn SegmentReader, callback: &mut dyn FnMut(&[DocId]), ) -> crate::Result<()> { let mut docset = self.scorer(reader, 1.0)?; @@ -123,7 +123,7 @@ pub trait Weight: Send + Sync + 'static { fn for_each_pruning( &self, threshold: Score, - reader: &SegmentReader, + reader: &dyn SegmentReader, callback: &mut dyn FnMut(DocId, Score) -> Score, ) -> crate::Result<()> { let mut scorer = self.scorer(reader, 1.0)?; diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 157e237d8c..0609d56824 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -10,7 +10,7 @@ use self::warming::WarmingState; use crate::core::searcher::{SearcherGeneration, SearcherInner}; use crate::directory::{Directory, WatchCallback, WatchHandle, META_LOCK}; use crate::store::DOCSTORE_CACHE_CAPACITY; -use crate::{Index, Inventory, Searcher, SegmentReader, TrackedObject}; +use crate::{ArcSegmentReader, Index, Inventory, Searcher, TantivySegmentReader, TrackedObject}; /// Defines when a new version of the index should be reloaded. /// @@ -189,19 +189,22 @@ impl InnerIndexReader { /// /// This function acquires a lock to prevent GC from removing files /// as we are opening our index. - fn open_segment_readers(index: &Index) -> crate::Result> { + fn open_segment_readers(index: &Index) -> crate::Result> { // Prevents segment files from getting deleted while we are in the process of opening them let _meta_lock = index.directory().acquire_lock(&META_LOCK)?; let searchable_segments = index.searchable_segments()?; let segment_readers = searchable_segments .iter() - .map(SegmentReader::open) + .map(|segment| { + TantivySegmentReader::open(segment) + .map(|reader| Arc::new(reader) as ArcSegmentReader) + }) .collect::>()?; Ok(segment_readers) } fn track_segment_readers_in_inventory( - segment_readers: &[SegmentReader], + segment_readers: &[ArcSegmentReader], searcher_generation_counter: &Arc, searcher_generation_inventory: &Inventory, ) -> TrackedObject { diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index 77e061bc61..f949df3ec3 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -210,8 +210,11 @@ mod tests { index_writer.add_document(doc!(text=>"abc"))?; index_writer.commit()?; let searcher = index.reader()?.searcher(); - let err = searcher.segment_reader(0u32).get_fieldnorms_reader(text); - assert!(matches!(err, Err(crate::TantivyError::SchemaError(_)))); + let field_norm_opt = searcher + .segment_reader(0u32) + .fieldnorms_readers() + .get_field(text)?; + assert!(field_norm_opt.is_none()); Ok(()) } } diff --git a/src/store/mod.rs b/src/store/mod.rs index 5826435158..04ee587009 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -26,7 +26,7 @@ //! and should rely on either //! //! - at the segment level, the [`SegmentReader`'s `doc` -//! method](../struct.SegmentReader.html#method.doc) +//! method](../trait.SegmentReader.html#method.doc) //! - at the index level, the [`Searcher::doc()`](crate::Searcher::doc) method mod compressors;