Skip to content

Commit a873885

Browse files
authored
fix: use new Compressor::rebuild_from for FSST pushdown (#2708)
The previous implementation of FSST comparison pushdown relied on rebuilding a compressor by reinserting symbols one-by-one into the CompressorBuilder, and then building it. That doesn't work, for reasons described in the description at spiraldb/fsst#84. We use the new `rebuild_from` API on fsst compressor to build a new compressor that is guaranteed to preserve symbol table ordering, and thus guarantee equal compression outputs.
1 parent a0da90b commit a873885

File tree

3 files changed

+4
-11
lines changed

3 files changed

+4
-11
lines changed

Cargo.lock

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ fastlanes = "0.1.8"
9393
flatbuffers = "25"
9494
flexbuffers = "25"
9595
flume = "0.11"
96-
fsst-rs = "0.5.1"
96+
fsst-rs = "0.5.2"
9797
futures = { version = "0.3.31", default-features = false }
9898
futures-executor = "0.3.31"
9999
futures-util = "0.3.31"

encodings/fsst/src/compute/compare.rs

+1-8
Original file line numberDiff line numberDiff line change
@@ -68,14 +68,7 @@ fn compare_fsst_constant(
6868
return Ok(None);
6969
}
7070

71-
let symbols = left.symbols();
72-
let symbol_lens = left.symbol_lengths();
73-
74-
let mut compressor = fsst::CompressorBuilder::new();
75-
for (symbol, symbol_len) in symbols.iter().zip(symbol_lens.iter()) {
76-
compressor.insert(*symbol, *symbol_len as usize);
77-
}
78-
let compressor = compressor.build();
71+
let compressor = fsst::Compressor::rebuild_from(left.symbols(), left.symbol_lengths());
7972

8073
let encoded_scalar = match left.dtype() {
8174
DType::Utf8(_) => {

0 commit comments

Comments
 (0)