Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up top-k retrieval on filtered conjunctions. #13994

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 36 additions & 2 deletions lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -539,8 +539,7 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
boolean actuallyRewritten = false;
for (BooleanClause clause : clauses) {
if (clause.occur() == Occur.SHOULD && clause.query() instanceof BooleanQuery) {
BooleanQuery innerQuery = (BooleanQuery) clause.query();
if (clause.occur() == Occur.SHOULD && clause.query() instanceof BooleanQuery innerQuery) {
if (innerQuery.isPureDisjunction()) {
actuallyRewritten = true;
for (BooleanClause innerClause : innerQuery.clauses()) {
Expand All @@ -558,6 +557,41 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
}
}

// Inline required / prohibited clauses. This helps run filtered conjunctive queries more
// efficiently by providing all clauses to the block-max AND scorer.
{
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch);
boolean actuallyRewritten = false;
for (BooleanClause clause : clauses) {
if (clause.isRequired() && clause.query() instanceof BooleanQuery innerQuery) {
if (innerQuery.getMinimumNumberShouldMatch() == 0
&& innerQuery.getClauses(Occur.SHOULD).isEmpty()) {
actuallyRewritten = true;
for (BooleanClause innerClause : innerQuery) {
Occur occur = innerClause.occur();
if (occur == Occur.FILTER
|| occur == Occur.MUST_NOT
|| clause.occur() == Occur.MUST) {
builder.add(innerClause);
} else {
assert clause.occur() == Occur.FILTER && occur == Occur.MUST;
// In this case we need to change the occur of the inner query from MUST to FILTER.
builder.add(innerClause.query(), Occur.FILTER);
}
}
} else {
builder.add(clause);
}
} else {
builder.add(clause);
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Holy smokes I had to read this like 5 times to fully grok what it is doing.

Could you name occur to innerClauseOccur and change clause to outerClause?

if (actuallyRewritten) {
return builder.build();
}
}

// SHOULD clause count less than or equal to minimumNumberShouldMatch
// Important(this can only be processed after nested clauses have been flattened)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -333,10 +333,15 @@ private BulkScorer requiredBulkScorer() throws IOException {
requiredScoring.add(ss.get(leadCost));
}
if (scoreMode == ScoreMode.TOP_SCORES
&& requiredNoScoring.isEmpty()
&& requiredScoring.size() > 1
// Only specialize top-level conjunctions for clauses that don't have a two-phase iterator.
&& requiredNoScoring.stream().map(Scorer::twoPhaseIterator).allMatch(Objects::isNull)
&& requiredScoring.stream().map(Scorer::twoPhaseIterator).allMatch(Objects::isNull)) {
// Turn all filters into scoring clauses with a score of zero, so that
// BlockMaxConjunctionBulkScorer is applicable.
for (Scorer filter : requiredNoScoring) {
requiredScoring.add(new ConstantScoreScorer(0f, ScoreMode.COMPLETE, filter.iterator()));
}
return new BlockMaxConjunctionBulkScorer(maxDoc, requiredScoring);
}
if (scoreMode != ScoreMode.TOP_SCORES
Expand Down
139 changes: 120 additions & 19 deletions lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java
Original file line number Diff line number Diff line change
Expand Up @@ -330,8 +330,8 @@ public void testDeeplyNestedBooleanRewriteShouldClauses() throws IOException {
int depth = TestUtil.nextInt(random(), 10, 30);
TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
TestRewriteQuery rewriteQuery = new TestRewriteQuery();
Query expectedQuery =
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build();
BooleanQuery.Builder expectedQueryBuilder =
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER);
Query deepBuilder =
new BooleanQuery.Builder()
.add(rewriteQuery, Occur.SHOULD)
Expand All @@ -345,21 +345,19 @@ public void testDeeplyNestedBooleanRewriteShouldClauses() throws IOException {
.add(tq, Occur.SHOULD)
.add(deepBuilder, Occur.SHOULD);
deepBuilder = bq.build();
BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER);
expectedQueryBuilder.add(tq, Occur.FILTER);
if (i == depth) {
expectedBq.add(rewriteQuery, Occur.FILTER);
} else {
expectedBq.add(expectedQuery, Occur.FILTER);
expectedQueryBuilder.add(rewriteQuery, Occur.FILTER);
}
expectedQuery = expectedBq.build();
}
BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f);
Query expectedQuery =
new BoostQuery(new ConstantScoreQuery(expectedQueryBuilder.build()), 0.0f);
Query rewritten = searcher.rewrite(bq);
assertEquals(expectedQuery, rewritten);
// the SHOULD clauses cause more rewrites because they incrementally change to `MUST` and then
// `FILTER`
assertEquals("Depth=" + depth, depth + 1, rewriteQuery.numRewrites);
// `FILTER`, plus the flattening of required clauses
assertEquals("Depth=" + depth, depth * 2, rewriteQuery.numRewrites);
}

public void testDeeplyNestedBooleanRewrite() throws IOException {
Expand All @@ -369,27 +367,26 @@ public void testDeeplyNestedBooleanRewrite() throws IOException {
int depth = TestUtil.nextInt(random(), 10, 30);
TestRewriteQuery rewriteQueryExpected = new TestRewriteQuery();
TestRewriteQuery rewriteQuery = new TestRewriteQuery();
Query expectedQuery =
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER).build();
BooleanQuery.Builder expectedQueryBuilder =
new BooleanQuery.Builder().add(rewriteQueryExpected, Occur.FILTER);
Query deepBuilder = new BooleanQuery.Builder().add(rewriteQuery, Occur.MUST).build();
for (int i = depth; i > 0; i--) {
TermQuery tq = termQueryFunction.apply(i);
BooleanQuery.Builder bq =
new BooleanQuery.Builder().add(tq, Occur.MUST).add(deepBuilder, Occur.MUST);
deepBuilder = bq.build();
BooleanQuery.Builder expectedBq = new BooleanQuery.Builder().add(tq, Occur.FILTER);
expectedQueryBuilder.add(tq, Occur.FILTER);
if (i == depth) {
expectedBq.add(rewriteQuery, Occur.FILTER);
} else {
expectedBq.add(expectedQuery, Occur.FILTER);
expectedQueryBuilder.add(rewriteQuery, Occur.FILTER);
}
expectedQuery = expectedBq.build();
}
BooleanQuery bq = new BooleanQuery.Builder().add(deepBuilder, Occur.FILTER).build();
expectedQuery = new BoostQuery(new ConstantScoreQuery(expectedQuery), 0.0f);
Query expectedQuery =
new BoostQuery(new ConstantScoreQuery(expectedQueryBuilder.build()), 0.0f);
Query rewritten = searcher.rewrite(bq);
assertEquals(expectedQuery, rewritten);
assertEquals("Depth=" + depth, 1, rewriteQuery.numRewrites);
// `depth` rewrites because of the flattening
assertEquals("Depth=" + depth, depth, rewriteQuery.numRewrites);
}

public void testRemoveMatchAllFilter() throws IOException {
Expand Down Expand Up @@ -691,6 +688,110 @@ public void testFlattenInnerDisjunctions() throws IOException {
assertSame(query, searcher.rewrite(query));
}

public void testFlattenInnerConjunctions() throws IOException {
IndexSearcher searcher = newSearcher(new MultiReader());

Query inner =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
.build();
Query query =
new BooleanQuery.Builder()
.add(inner, Occur.MUST)
.add(new TermQuery(new Term("foo", "baz")), Occur.FILTER)
.build();
Query expectedRewritten =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
.add(new TermQuery(new Term("foo", "baz")), Occur.FILTER)
.build();
assertEquals(expectedRewritten, searcher.rewrite(query));

query =
new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(0)
.add(inner, Occur.MUST)
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
.build();
expectedRewritten =
new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(0)
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
.build();
assertEquals(expectedRewritten, searcher.rewrite(query));

query =
new BooleanQuery.Builder()
.add(inner, Occur.MUST)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST_NOT)
.build();
expectedRewritten =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST_NOT)
.build();
assertEquals(expectedRewritten, searcher.rewrite(query));

inner =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
.build();
query =
new BooleanQuery.Builder()
.add(inner, Occur.MUST)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
expectedRewritten =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
assertEquals(expectedRewritten, searcher.rewrite(query));

inner =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
.build();
query =
new BooleanQuery.Builder()
.add(inner, Occur.FILTER)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
expectedRewritten =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
.add(new TermQuery(new Term("foo", "quux")), Occur.FILTER)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
assertEquals(expectedRewritten, searcher.rewrite(query));

inner =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST_NOT)
.build();
query =
new BooleanQuery.Builder()
.add(inner, Occur.FILTER)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
expectedRewritten =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
.add(new TermQuery(new Term("foo", "quux")), Occur.MUST_NOT)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
assertEquals(expectedRewritten, searcher.rewrite(query));
}

public void testDiscardShouldClauses() throws IOException {
IndexSearcher searcher = newSearcher(new MultiReader());

Expand Down