Skip to content

Commit dc4c218

Browse files
committed
Optimization when posting list are saturated.
If a posting list doc freq is the segment reader's max_doc, and if scoring does not matter, we can replace it by a AllScorer. In turn, in a boolean query, we can dismiss all scorers and empty scorers, to accelerate the request.
1 parent c363bbd commit dc4c218

File tree

10 files changed

+424
-122
lines changed

10 files changed

+424
-122
lines changed

src/aggregation/bucket/filter.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -727,12 +727,13 @@ mod tests {
727727

728728
let schema = schema_builder.build();
729729
let index = Index::create_in_ram(schema);
730-
let mut writer: IndexWriter = index.writer(50_000_000)?;
730+
let mut writer: IndexWriter = index.writer_for_tests()?;
731731

732732
writer.add_document(doc!(
733733
category => "electronics", brand => "apple",
734734
price => 999u64, rating => 4.5f64, in_stock => true
735735
))?;
736+
writer.commit()?;
736737
writer.add_document(doc!(
737738
category => "electronics", brand => "samsung",
738739
price => 799u64, rating => 4.2f64, in_stock => true
@@ -936,7 +937,7 @@ mod tests {
936937
let index = create_standard_test_index()?;
937938
let reader = index.reader()?;
938939
let searcher = reader.searcher();
939-
940+
assert_eq!(searcher.segment_readers().len(), 2);
940941
let agg = json!({
941942
"premium_electronics": {
942943
"filter": "category:electronics AND price:[800 TO *]",

src/indexer/merger.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1518,7 +1518,9 @@ mod tests {
15181518
let searcher = reader.searcher();
15191519
let mut term_scorer = term_query
15201520
.specialized_weight(EnableScoring::enabled_from_searcher(&searcher))?
1521-
.specialized_scorer(searcher.segment_reader(0u32), 1.0)?;
1521+
.specialized_scorer(searcher.segment_reader(0u32), 1.0)?
1522+
.into_term_scorer()
1523+
.unwrap();
15221524
assert_eq!(term_scorer.doc(), 0);
15231525
assert_nearly_equals!(term_scorer.block_max_score(), 0.0079681855);
15241526
assert_nearly_equals!(term_scorer.score(), 0.0079681855);
@@ -1533,7 +1535,9 @@ mod tests {
15331535
for segment_reader in searcher.segment_readers() {
15341536
let mut term_scorer = term_query
15351537
.specialized_weight(EnableScoring::enabled_from_searcher(&searcher))?
1536-
.specialized_scorer(segment_reader, 1.0)?;
1538+
.specialized_scorer(segment_reader, 1.0)?
1539+
.into_term_scorer()
1540+
.unwrap();
15371541
// the difference compared to before is intrinsic to the bm25 formula. no worries
15381542
// there.
15391543
for doc in segment_reader.doc_ids_alive() {
@@ -1558,7 +1562,9 @@ mod tests {
15581562
let segment_reader = searcher.segment_reader(0u32);
15591563
let mut term_scorer = term_query
15601564
.specialized_weight(EnableScoring::enabled_from_searcher(&searcher))?
1561-
.specialized_scorer(segment_reader, 1.0)?;
1565+
.specialized_scorer(segment_reader, 1.0)?
1566+
.into_term_scorer()
1567+
.unwrap();
15621568
// the difference compared to before is intrinsic to the bm25 formula. no worries there.
15631569
for doc in segment_reader.doc_ids_alive() {
15641570
assert_eq!(term_scorer.doc(), doc);

src/query/bm25.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::sync::Arc;
2+
13
use crate::fieldnorm::FieldNormReader;
24
use crate::query::Explanation;
35
use crate::schema::Field;
@@ -57,31 +59,34 @@ fn cached_tf_component(fieldnorm: u32, average_fieldnorm: Score) -> Score {
5759
K1 * (1.0 - B + B * fieldnorm as Score / average_fieldnorm)
5860
}
5961

60-
fn compute_tf_cache(average_fieldnorm: Score) -> [Score; 256] {
62+
fn compute_tf_cache(average_fieldnorm: Score) -> Arc<[Score; 256]> {
6163
let mut cache: [Score; 256] = [0.0; 256];
6264
for (fieldnorm_id, cache_mut) in cache.iter_mut().enumerate() {
6365
let fieldnorm = FieldNormReader::id_to_fieldnorm(fieldnorm_id as u8);
6466
*cache_mut = cached_tf_component(fieldnorm, average_fieldnorm);
6567
}
66-
cache
68+
Arc::new(cache)
6769
}
6870

6971
/// A struct used for computing BM25 scores.
7072
#[derive(Clone)]
7173
pub struct Bm25Weight {
7274
idf_explain: Option<Explanation>,
7375
weight: Score,
74-
cache: [Score; 256],
76+
cache: Arc<[Score; 256]>,
7577
average_fieldnorm: Score,
7678
}
7779

7880
impl Bm25Weight {
7981
/// Increase the weight by a multiplicative factor.
8082
pub fn boost_by(&self, boost: Score) -> Bm25Weight {
83+
if boost == 1.0f32 {
84+
return self.clone();
85+
}
8186
Bm25Weight {
8287
idf_explain: self.idf_explain.clone(),
8388
weight: self.weight * boost,
84-
cache: self.cache,
89+
cache: self.cache.clone(),
8590
average_fieldnorm: self.average_fieldnorm,
8691
}
8792
}

src/query/boolean_query/boolean_weight.rs

Lines changed: 128 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner};
99
use crate::query::term_query::TermScorer;
1010
use crate::query::weight::{for_each_docset_buffered, for_each_pruning_scorer, for_each_scorer};
1111
use crate::query::{
12-
intersect_scorers, BufferedUnionScorer, EmptyScorer, Exclude, Explanation, Occur,
12+
intersect_scorers, AllScorer, BufferedUnionScorer, EmptyScorer, Exclude, Explanation, Occur,
1313
RequiredOptionalScorer, Scorer, Weight,
1414
};
1515
use crate::{DocId, Score};
@@ -97,6 +97,15 @@ fn into_box_scorer<TScoreCombiner: ScoreCombiner>(
9797
}
9898
}
9999

100+
enum ShouldScorersCombinationMethod {
101+
// Should scorers are irrelevant.
102+
Ignored,
103+
// Only contributes to final score.
104+
Optional(SpecializedScorer),
105+
// Regardless of score, the should scorers may impact whether a document is matching or not.
106+
Required(SpecializedScorer),
107+
}
108+
100109
/// Weight associated to the `BoolQuery`.
101110
pub struct BooleanWeight<TScoreCombiner: ScoreCombiner> {
102111
weights: Vec<(Occur, Box<dyn Weight>)>,
@@ -159,90 +168,125 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
159168
) -> crate::Result<SpecializedScorer> {
160169
let num_docs = reader.num_docs();
161170
let mut per_occur_scorers = self.per_occur_scorers(reader, boost)?;
162-
// Indicate how should clauses are combined with other clauses.
163-
enum CombinationMethod {
164-
Ignored,
165-
// Only contributes to final score.
166-
Optional(SpecializedScorer),
167-
Required(SpecializedScorer),
171+
172+
// Indicate how should clauses are combined with must clauses.
173+
let mut must_scorers: Vec<Box<dyn Scorer>> =
174+
per_occur_scorers.remove(&Occur::Must).unwrap_or_default();
175+
let must_special_scorer_counts = remove_and_count_all_and_empty_scorers(&mut must_scorers);
176+
177+
if must_special_scorer_counts.empty_count > 0 {
178+
return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)));
179+
}
180+
181+
let mut should_scorers = per_occur_scorers.remove(&Occur::Should).unwrap_or_default();
182+
let should_special_scorer_counts =
183+
remove_and_count_all_and_empty_scorers(&mut should_scorers);
184+
185+
let mut exclude_scorers: Vec<Box<dyn Scorer>> = per_occur_scorers
186+
.remove(&Occur::MustNot)
187+
.unwrap_or_default();
188+
let exclude_special_scorer_counts =
189+
remove_and_count_all_and_empty_scorers(&mut exclude_scorers);
190+
191+
if exclude_special_scorer_counts.all_count > 0 {
192+
// We exclude all documents at one point.
193+
return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)));
168194
}
169-
let mut must_scorers = per_occur_scorers.remove(&Occur::Must);
170-
let should_opt = if let Some(mut should_scorers) = per_occur_scorers.remove(&Occur::Should)
171-
{
195+
196+
let minimum_number_should_match = self
197+
.minimum_number_should_match
198+
.saturating_sub(should_special_scorer_counts.all_count);
199+
200+
let should_scorers: ShouldScorersCombinationMethod = {
172201
let num_of_should_scorers = should_scorers.len();
173-
if self.minimum_number_should_match > num_of_should_scorers {
202+
if minimum_number_should_match > num_of_should_scorers {
203+
// We don't have enough scorers to satisfy the minimum number of should matches.
204+
// The request will match no documents.
174205
return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)));
175206
}
176-
match self.minimum_number_should_match {
177-
0 => CombinationMethod::Optional(scorer_union(
207+
match minimum_number_should_match {
208+
0 if num_of_should_scorers == 0 => ShouldScorersCombinationMethod::Ignored,
209+
0 => ShouldScorersCombinationMethod::Optional(scorer_union(
178210
should_scorers,
179211
&score_combiner_fn,
180212
num_docs,
181213
)),
182-
1 => CombinationMethod::Required(scorer_union(
214+
1 => ShouldScorersCombinationMethod::Required(scorer_union(
183215
should_scorers,
184216
&score_combiner_fn,
185217
num_docs,
186218
)),
187219
n if num_of_should_scorers == n => {
188220
// When num_of_should_scorers equals the number of should clauses,
189221
// they are no different from must clauses.
190-
must_scorers = match must_scorers.take() {
191-
Some(mut must_scorers) => {
192-
must_scorers.append(&mut should_scorers);
193-
Some(must_scorers)
194-
}
195-
None => Some(should_scorers),
196-
};
197-
CombinationMethod::Ignored
222+
must_scorers.append(&mut should_scorers);
223+
ShouldScorersCombinationMethod::Ignored
198224
}
199-
_ => CombinationMethod::Required(SpecializedScorer::Other(scorer_disjunction(
200-
should_scorers,
201-
score_combiner_fn(),
202-
self.minimum_number_should_match,
203-
))),
225+
_ => ShouldScorersCombinationMethod::Required(SpecializedScorer::Other(
226+
scorer_disjunction(
227+
should_scorers,
228+
score_combiner_fn(),
229+
self.minimum_number_should_match,
230+
),
231+
)),
204232
}
233+
};
234+
235+
let exclude_scorer_opt: Option<Box<dyn Scorer>> = if exclude_scorers.is_empty() {
236+
None
205237
} else {
206-
// None of should clauses are provided.
207-
if self.minimum_number_should_match > 0 {
208-
return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)));
209-
} else {
210-
CombinationMethod::Ignored
211-
}
238+
let exclude_specialized_scorer: SpecializedScorer =
239+
scorer_union(exclude_scorers, DoNothingCombiner::default, num_docs);
240+
Some(into_box_scorer(
241+
exclude_specialized_scorer,
242+
DoNothingCombiner::default,
243+
num_docs,
244+
))
212245
};
213-
let exclude_scorer_opt: Option<Box<dyn Scorer>> = per_occur_scorers
214-
.remove(&Occur::MustNot)
215-
.map(|scorers| scorer_union(scorers, DoNothingCombiner::default, num_docs))
216-
.map(|specialized_scorer: SpecializedScorer| {
217-
into_box_scorer(specialized_scorer, DoNothingCombiner::default, num_docs)
218-
});
219-
let positive_scorer = match (should_opt, must_scorers) {
220-
(CombinationMethod::Ignored, Some(must_scorers)) => {
221-
SpecializedScorer::Other(intersect_scorers(must_scorers, num_docs))
246+
247+
let positive_scorer = match (should_scorers, must_scorers) {
248+
(ShouldScorersCombinationMethod::Ignored, must_scorers) => {
249+
let boxed_scorer: Box<dyn Scorer> = if must_scorers.is_empty() {
250+
if must_special_scorer_counts.all_count + should_special_scorer_counts.all_count
251+
> 0
252+
{
253+
Box::new(AllScorer::new(reader.max_doc()))
254+
} else {
255+
Box::new(EmptyScorer)
256+
}
257+
} else {
258+
intersect_scorers(must_scorers, num_docs)
259+
};
260+
SpecializedScorer::Other(boxed_scorer)
222261
}
223-
(CombinationMethod::Optional(should_scorer), Some(must_scorers)) => {
224-
let must_scorer = intersect_scorers(must_scorers, num_docs);
225-
if self.scoring_enabled {
226-
SpecializedScorer::Other(Box::new(
227-
RequiredOptionalScorer::<_, _, TScoreCombiner>::new(
262+
(ShouldScorersCombinationMethod::Optional(should_scorer), must_scorers) => {
263+
if must_scorers.is_empty() && must_special_scorer_counts.all_count == 0 {
264+
// Optional options are promoted to required if no must scorers exists.
265+
should_scorer
266+
} else {
267+
let must_scorer = intersect_scorers(must_scorers, num_docs);
268+
if self.scoring_enabled {
269+
SpecializedScorer::Other(Box::new(RequiredOptionalScorer::<
270+
_,
271+
_,
272+
TScoreCombiner,
273+
>::new(
228274
must_scorer,
229275
into_box_scorer(should_scorer, &score_combiner_fn, num_docs),
230-
),
231-
))
232-
} else {
233-
SpecializedScorer::Other(must_scorer)
276+
)))
277+
} else {
278+
SpecializedScorer::Other(must_scorer)
279+
}
234280
}
235281
}
236-
(CombinationMethod::Required(should_scorer), Some(mut must_scorers)) => {
237-
must_scorers.push(into_box_scorer(should_scorer, &score_combiner_fn, num_docs));
238-
SpecializedScorer::Other(intersect_scorers(must_scorers, num_docs))
239-
}
240-
(CombinationMethod::Ignored, None) => {
241-
return Ok(SpecializedScorer::Other(Box::new(EmptyScorer)))
282+
(ShouldScorersCombinationMethod::Required(should_scorer), mut must_scorers) => {
283+
if must_scorers.is_empty() {
284+
should_scorer
285+
} else {
286+
must_scorers.push(into_box_scorer(should_scorer, &score_combiner_fn, num_docs));
287+
SpecializedScorer::Other(intersect_scorers(must_scorers, num_docs))
288+
}
242289
}
243-
(CombinationMethod::Required(should_scorer), None) => should_scorer,
244-
// Optional options are promoted to required if no must scorers exists.
245-
(CombinationMethod::Optional(should_scorer), None) => should_scorer,
246290
};
247291
if let Some(exclude_scorer) = exclude_scorer_opt {
248292
let positive_scorer_boxed =
@@ -257,6 +301,30 @@ impl<TScoreCombiner: ScoreCombiner> BooleanWeight<TScoreCombiner> {
257301
}
258302
}
259303

304+
#[derive(Default, Copy, Clone, Debug)]
305+
struct AllAndEmptyScorerCounts {
306+
all_count: usize,
307+
empty_count: usize,
308+
}
309+
310+
fn remove_and_count_all_and_empty_scorers(
311+
scorers: &mut Vec<Box<dyn Scorer>>,
312+
) -> AllAndEmptyScorerCounts {
313+
let mut counts = AllAndEmptyScorerCounts::default();
314+
scorers.retain(|scorer| {
315+
if scorer.is::<AllScorer>() {
316+
counts.all_count += 1;
317+
false
318+
} else if scorer.is::<EmptyScorer>() {
319+
counts.empty_count += 1;
320+
false
321+
} else {
322+
true
323+
}
324+
});
325+
counts
326+
}
327+
260328
impl<TScoreCombiner: ScoreCombiner + Sync> Weight for BooleanWeight<TScoreCombiner> {
261329
fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
262330
let num_docs = reader.num_docs();

0 commit comments

Comments
 (0)