Skip to content

Commit e745fc5

Browse files
committed
add range query compatible engines
1 parent b956323 commit e745fc5

File tree

9 files changed

+88
-43
lines changed

9 files changed

+88
-43
lines changed

Makefile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@ export
33

44
WIKI_SRC = "https://www.dropbox.com/s/wwnfnu441w1ec9p/wiki-articles.json.bz2"
55

6-
COMMANDS ?= TOP_10 TOP_10_COUNT COUNT
6+
COMMANDS ?= TOP_10 TOP_10_COUNT COUNT
77

8-
# ENGINES ?= tantivy-0.13 lucene-8.4.0 pisa-0.8.2 rucene-0.1 bleve-0.8.0-scorch rucene-0.1 tantivy-0.11 tantivy-0.14 tantivy-0.15 tantivy-0.16 tantivy-0.17 tantivy-0.18 tantivy-0.19
8+
# ENGINES ?= tantivy-0.13 lucene-8.4.0 pisa-0.8.2 rucene-0.1 bleve-0.8.0-scorch rucene-0.1 tantivy-0.11 tantivy-0.16 tantivy-0.17 tantivy-0.18 tantivy-0.19
99
# ENGINES ?= tantivy-0.16 lucene-8.10.1 pisa-0.8.2 bleve-0.8.0-scorch rucene-0.1
10-
ENGINES ?= tantivy-0.19 lucene-8.10.1
10+
ENGINES ?= tantivy-0.18 tantivy-0.19 lucene-8.10.1
11+
export RANGE_QUERY_ENABLED_ENGINES ?= tantivy-0.18 tantivy-0.19 lucene-8.10.1 lucene-8.0.0 lucene-7.2.1
1112
PORT ?= 8080
1213

1314
help:

engines/lucene-7.2.1/src/main/java/BuildIndex.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,11 @@ public static void main(String[] args) throws IOException {
2525
final Document document = new Document();
2626

2727
StoredField idField = new StoredField("id", "");
28+
IntPoint idNumField = new IntPoint("id_num", 0);
2829
TextField textField = new TextField("text", "", Field.Store.NO);
2930

3031
document.add(idField);
32+
document.add(idNumField);
3133
document.add(textField);
3234

3335
String line;
@@ -37,8 +39,10 @@ public static void main(String[] args) throws IOException {
3739
}
3840
final JsonObject parsed_doc = Json.parse(line).asObject();
3941
final String id = parsed_doc.get("id").asString();
42+
final int id_num = parsed_doc.get("id_num").asInt();
4043
final String text = parsed_doc.get("text").asString();
4144
idField.setStringValue(id);
45+
idNumField.setIntValue(id_num);
4246
textField.setStringValue(text);
4347
writer.addDocument(document);
4448
}

engines/lucene-8.0.0/src/main/java/BuildIndex.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,11 @@ public static void main(String[] args) throws IOException {
2525
final Document document = new Document();
2626

2727
StoredField idField = new StoredField("id", "");
28+
IntPoint idNumField = new IntPoint("id_num", 0);
2829
TextField textField = new TextField("text", "", Field.Store.NO);
2930

3031
document.add(idField);
32+
document.add(idNumField);
3133
document.add(textField);
3234

3335
String line;
@@ -37,8 +39,10 @@ public static void main(String[] args) throws IOException {
3739
}
3840
final JsonObject parsed_doc = Json.parse(line).asObject();
3941
final String id = parsed_doc.get("id").asString();
42+
final int id_num = parsed_doc.get("id_num").asInt();
4043
final String text = parsed_doc.get("text").asString();
4144
idField.setStringValue(id);
45+
idNumField.setIntValue(id_num);
4246
textField.setStringValue(text);
4347
writer.addDocument(document);
4448
}

engines/tantivy-0.13/src/bin/build_index.rs

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
1-
use tantivy::schema::{Schema, STORED, TEXT};
2-
use tantivy::Index;
1+
use futures::executor::block_on;
32
use std::env;
43
use std::io::BufRead;
54
use std::path::Path;
6-
use futures::executor::block_on;
5+
use tantivy::schema::{Schema, STORED, TEXT};
6+
use tantivy::Index;
77

88
fn main() {
99
let args: Vec<String> = env::args().collect();
1010
main_inner(&Path::new(&args[1])).unwrap();
1111
}
1212

1313
fn create_schema() -> Schema {
14-
let mut schema_builder = Schema::builder();
15-
schema_builder.add_text_field("id", STORED);
16-
schema_builder.add_text_field("text", TEXT);
17-
schema_builder.build()
14+
let mut schema_builder = Schema::builder();
15+
schema_builder.add_text_field("id", STORED);
16+
schema_builder.add_u64_field("id_num", FAST | INDEXED);
17+
schema_builder.add_text_field("text", TEXT);
18+
schema_builder.build()
1819
}
1920

2021
fn main_inner(output_dir: &Path) -> tantivy::Result<()> {
@@ -25,7 +26,9 @@ fn main_inner(output_dir: &Path) -> tantivy::Result<()> {
2526

2627
let mut i = 0;
2728
{
28-
let mut index_writer = index.writer_with_num_threads(4, 2_000_000_000).expect("failed to create index writer");
29+
let mut index_writer = index
30+
.writer_with_num_threads(4, 2_000_000_000)
31+
.expect("failed to create index writer");
2932
let stdin = std::io::stdin();
3033

3134
for line in stdin.lock().lines() {
@@ -44,9 +47,11 @@ fn main_inner(output_dir: &Path) -> tantivy::Result<()> {
4447
index_writer.commit()?;
4548
index_writer.wait_merging_threads()?;
4649
}
47-
let segment_ids = index.searchable_segment_ids()?;
48-
let mut index_writer = index.writer(1_500_000_000).expect("failed to create index writer");
49-
block_on(index_writer.merge(&segment_ids))?;
50-
block_on(index_writer.garbage_collect_files())?;
50+
let segment_ids = index.searchable_segment_ids()?;
51+
let mut index_writer = index
52+
.writer(1_500_000_000)
53+
.expect("failed to create index writer");
54+
block_on(index_writer.merge(&segment_ids))?;
55+
block_on(index_writer.garbage_collect_files())?;
5156
Ok(())
5257
}

engines/tantivy-0.16/src/bin/build_index.rs

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
1-
use tantivy::schema::{Schema, STORED, TEXT};
2-
use tantivy::Index;
1+
use futures::executor::block_on;
32
use std::env;
43
use std::io::BufRead;
54
use std::path::Path;
6-
use futures::executor::block_on;
5+
use tantivy::schema::{Schema, STORED, TEXT};
6+
use tantivy::Index;
77

88
fn main() {
99
let args: Vec<String> = env::args().collect();
1010
main_inner(&Path::new(&args[1])).unwrap();
1111
}
1212

1313
fn create_schema() -> Schema {
14-
let mut schema_builder = Schema::builder();
15-
schema_builder.add_text_field("id", STORED);
16-
schema_builder.add_text_field("text", TEXT);
17-
schema_builder.build()
14+
let mut schema_builder = Schema::builder();
15+
schema_builder.add_text_field("id", STORED);
16+
schema_builder.add_u64_field("id_num", FAST | INDEXED);
17+
schema_builder.add_text_field("text", TEXT);
18+
schema_builder.build()
1819
}
1920

2021
fn main_inner(output_dir: &Path) -> tantivy::Result<()> {
@@ -25,7 +26,9 @@ fn main_inner(output_dir: &Path) -> tantivy::Result<()> {
2526

2627
let mut i = 0;
2728
{
28-
let mut index_writer = index.writer_with_num_threads(4, 2_000_000_000).expect("failed to create index writer");
29+
let mut index_writer = index
30+
.writer_with_num_threads(4, 2_000_000_000)
31+
.expect("failed to create index writer");
2932
let stdin = std::io::stdin();
3033

3134
for line in stdin.lock().lines() {
@@ -44,9 +47,11 @@ fn main_inner(output_dir: &Path) -> tantivy::Result<()> {
4447
index_writer.commit()?;
4548
index_writer.wait_merging_threads()?;
4649
}
47-
let segment_ids = index.searchable_segment_ids()?;
48-
let mut index_writer = index.writer(1_500_000_000).expect("failed to create index writer");
49-
block_on(index_writer.merge(&segment_ids))?;
50-
block_on(index_writer.garbage_collect_files())?;
50+
let segment_ids = index.searchable_segment_ids()?;
51+
let mut index_writer = index
52+
.writer(1_500_000_000)
53+
.expect("failed to create index writer");
54+
block_on(index_writer.merge(&segment_ids))?;
55+
block_on(index_writer.garbage_collect_files())?;
5156
Ok(())
5257
}

engines/tantivy-0.17/src/bin/build_index.rs

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
1-
use tantivy::schema::{Schema, STORED, TEXT};
2-
use tantivy::Index;
1+
use futures::executor::block_on;
32
use std::env;
43
use std::io::BufRead;
54
use std::path::Path;
6-
use futures::executor::block_on;
5+
use tantivy::schema::{Schema, STORED, TEXT};
6+
use tantivy::Index;
77

88
fn main() {
99
let args: Vec<String> = env::args().collect();
1010
main_inner(&Path::new(&args[1])).unwrap();
1111
}
1212

1313
fn create_schema() -> Schema {
14-
let mut schema_builder = Schema::builder();
15-
schema_builder.add_text_field("id", STORED);
16-
schema_builder.add_text_field("text", TEXT);
17-
schema_builder.build()
14+
let mut schema_builder = Schema::builder();
15+
schema_builder.add_text_field("id", STORED);
16+
schema_builder.add_u64_field("id_num", FAST | INDEXED);
17+
schema_builder.add_text_field("text", TEXT);
18+
schema_builder.build()
1819
}
1920

2021
fn main_inner(output_dir: &Path) -> tantivy::Result<()> {
@@ -25,7 +26,9 @@ fn main_inner(output_dir: &Path) -> tantivy::Result<()> {
2526

2627
let mut i = 0;
2728
{
28-
let mut index_writer = index.writer_with_num_threads(4, 2_000_000_000).expect("failed to create index writer");
29+
let mut index_writer = index
30+
.writer_with_num_threads(4, 2_000_000_000)
31+
.expect("failed to create index writer");
2932
let stdin = std::io::stdin();
3033

3134
for line in stdin.lock().lines() {
@@ -44,9 +47,11 @@ fn main_inner(output_dir: &Path) -> tantivy::Result<()> {
4447
index_writer.commit()?;
4548
index_writer.wait_merging_threads()?;
4649
}
47-
let segment_ids = index.searchable_segment_ids()?;
48-
let mut index_writer = index.writer(1_500_000_000).expect("failed to create index writer");
49-
block_on(index_writer.merge(&segment_ids))?;
50-
block_on(index_writer.garbage_collect_files())?;
50+
let segment_ids = index.searchable_segment_ids()?;
51+
let mut index_writer = index
52+
.writer(1_500_000_000)
53+
.expect("failed to create index writer");
54+
block_on(index_writer.merge(&segment_ids))?;
55+
block_on(index_writer.garbage_collect_files())?;
5156
Ok(())
5257
}

results.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

src/client.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010

1111
class SearchClient:
1212

13-
def __init__(self, engine):
13+
def __init__(self, engine, unsupported_queries):
1414
self.engine = engine
15+
self.unsupported_queries = unsupported_queries
1516
dirname = os.path.split(os.path.abspath(__file__))[0]
1617
dirname = path.dirname(dirname)
1718
dirname = path.join(dirname, "engines")
@@ -23,6 +24,8 @@ def __init__(self, engine):
2324
stdin=subprocess.PIPE)
2425

2526
def query(self, query, command):
27+
if query in unsupported_queries:
28+
return None
2629
query_line = "%s\t%s\n" % (command, query)
2730
self.process.stdin.write(query_line.encode("utf-8"))
2831
self.process.stdin.flush()
@@ -57,19 +60,37 @@ def read_queries(query_path):
5760
WARMUP_ITER = 1
5861
NUM_ITER = 3
5962

63+
def filter_non_range_queries(queries):
64+
return [query for query in queries if 'range' not in query.tags]
65+
66+
def get_range_queries(queries):
67+
range_queries = set()
68+
for query in queries:
69+
if 'range' in query.tags:
70+
range_queries.add(query.query)
71+
return range_queries
6072

6173
if __name__ == "__main__":
6274
import sys
6375
random.seed(2)
6476
query_path = sys.argv[1]
6577
engines = sys.argv[2:]
78+
range_query_enabled_engines = os.environ['RANGE_QUERY_ENABLED_ENGINES'].split(" ")
79+
range_query_enabled_engines = [engine.strip() for engine in range_query_enabled_engines]
6680
queries = list(read_queries(query_path))
81+
# non_range_queries = filter_non_range_queries(queries)
82+
range_queries = get_range_queries(queries)
6783
results = {}
6884
for command in COMMANDS:
6985
results_commands = {}
7086
for engine in engines:
7187
engine_results = []
7288
query_idx = {}
89+
if engine in range_query_enabled_engines:
90+
unsupported_queries = set()
91+
else:
92+
unsupported_queries = range_queries
93+
7394
for query in queries:
7495
query_result = {
7596
"query": query.query,
@@ -81,7 +102,7 @@ def read_queries(query_path):
81102
engine_results.append(query_result)
82103
print("======================")
83104
print("BENCHMARKING %s %s" % (engine, command))
84-
search_client = SearchClient(engine)
105+
search_client = SearchClient(engine, unsupported_queries)
85106
print("--- Warming up ...")
86107
queries_shuffled = list(queries[:])
87108
random.seed(2)

web/build/results.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)