After all levels are encoded, memory offsets for each node's neighbor nodes are appended to
* the end of the file. The offsets are encoded by {@link
* org.apache.lucene.util.packed.DirectMonotonicWriter}.
@@ -149,12 +151,19 @@ public final class Lucene99HnswVectorsFormat extends KnnVectorsFormat {
private final int beamWidth;
/** The format for storing, reading, and merging vectors on disk. */
- private static final FlatVectorsFormat flatVectorsFormat =
- new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer());
+ private static final FlatVectorsScorer DEFAULT_FLAT_VECTORS_SCORER =
+ FlatVectorScorerUtil.getLucene99FlatVectorsScorer();
+
+ private static final FlatVectorsFormat DEFAULT_FLAT_VECTORS_FORMAT =
+ new Lucene99FlatVectorsFormat(DEFAULT_FLAT_VECTORS_SCORER, false);
+ private static final FlatVectorsFormat REORDERING_FLAT_VECTORS_FORMAT =
+ new Lucene99FlatVectorsFormat(DEFAULT_FLAT_VECTORS_SCORER, true);
private final int numMergeWorkers;
private final TaskExecutor mergeExec;
+ private final boolean enableReorder;
+
/**
* The threshold to use to bypass HNSW graph building for tiny segments in terms of k for a graph
* i.e. number of docs to match the query (default is {@link
@@ -168,6 +177,8 @@ public final class Lucene99HnswVectorsFormat extends KnnVectorsFormat {
*/
private final int tinySegmentsThreshold;
+ private final FlatVectorsFormat flatVectorsFormat;
+
private final int writeVersion;
/** Constructs a format using default graph construction parameters */
@@ -178,6 +189,7 @@ public Lucene99HnswVectorsFormat() {
DEFAULT_NUM_MERGE_WORKER,
null,
HNSW_GRAPH_THRESHOLD,
+ false,
VERSION_CURRENT);
}
@@ -188,7 +200,14 @@ public Lucene99HnswVectorsFormat() {
* @param beamWidth the size of the queue maintained during graph construction.
*/
public Lucene99HnswVectorsFormat(int maxConn, int beamWidth) {
- this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null, HNSW_GRAPH_THRESHOLD, VERSION_CURRENT);
+ this(
+ maxConn,
+ beamWidth,
+ DEFAULT_NUM_MERGE_WORKER,
+ null,
+ HNSW_GRAPH_THRESHOLD,
+ false,
+ VERSION_CURRENT);
}
/**
@@ -201,7 +220,13 @@ public Lucene99HnswVectorsFormat(int maxConn, int beamWidth) {
*/
public Lucene99HnswVectorsFormat(int maxConn, int beamWidth, int tinySegmentsThreshold) {
this(
- maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null, tinySegmentsThreshold, VERSION_CURRENT);
+ maxConn,
+ beamWidth,
+ DEFAULT_NUM_MERGE_WORKER,
+ null,
+ tinySegmentsThreshold,
+ false,
+ VERSION_CURRENT);
}
/**
@@ -217,7 +242,14 @@ public Lucene99HnswVectorsFormat(int maxConn, int beamWidth, int tinySegmentsThr
*/
public Lucene99HnswVectorsFormat(
int maxConn, int beamWidth, int numMergeWorkers, ExecutorService mergeExec) {
- this(maxConn, beamWidth, numMergeWorkers, mergeExec, HNSW_GRAPH_THRESHOLD, VERSION_CURRENT);
+ this(
+ maxConn,
+ beamWidth,
+ numMergeWorkers,
+ mergeExec,
+ HNSW_GRAPH_THRESHOLD,
+ false,
+ VERSION_CURRENT);
}
/**
@@ -240,7 +272,46 @@ public Lucene99HnswVectorsFormat(
int numMergeWorkers,
ExecutorService mergeExec,
int tinySegmentsThreshold) {
- this(maxConn, beamWidth, numMergeWorkers, mergeExec, tinySegmentsThreshold, VERSION_CURRENT);
+ this(
+ maxConn,
+ beamWidth,
+ numMergeWorkers,
+ mergeExec,
+ tinySegmentsThreshold,
+ false,
+ VERSION_CURRENT);
+ }
+
+ /**
+ * Constructs a format using the given graph construction parameters. (This is a Test-Only
+ * Constructor)
+ *
+ * @param maxConn the maximum number of connections to a node in the HNSW graph
+ * @param beamWidth the size of the queue maintained during graph construction.
+ * @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
+ * larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
+ * @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
+ * generated by this format to do the merge. If null, the configured {@link
+ * MergeScheduler#getIntraMergeExecutor(MergePolicy.OneMerge)} is used.
+ * @param tinySegmentsThreshold the expected number of vector operations to return k nearest
+ * neighbors of the current graph size
+ * @param enableReorder if true, BpVectorReorderer is used to sort vector nodes when merging.
+ */
+ public Lucene99HnswVectorsFormat(
+ int maxConn,
+ int beamWidth,
+ int numMergeWorkers,
+ ExecutorService mergeExec,
+ int tinySegmentsThreshold,
+ boolean enableReorder) {
+ this(
+ maxConn,
+ beamWidth,
+ numMergeWorkers,
+ mergeExec,
+ tinySegmentsThreshold,
+ enableReorder,
+ VERSION_CURRENT);
}
/**
@@ -255,6 +326,7 @@ public Lucene99HnswVectorsFormat(
* MergeScheduler#getIntraMergeExecutor(MergePolicy.OneMerge)} is used.
* @param tinySegmentsThreshold the expected number of vector operations to return k nearest
* neighbors of the current graph size
+ * @param enableReorder if true, BpVectorReorderer is used to sort vector nodes when merging.
* @param writeVersion the version used for the writer to encode docID's (VarInt=0, GroupVarInt=1)
*/
Lucene99HnswVectorsFormat(
@@ -263,6 +335,7 @@ public Lucene99HnswVectorsFormat(
int numMergeWorkers,
ExecutorService mergeExec,
int tinySegmentsThreshold,
+ boolean enableReorder,
int writeVersion) {
super("Lucene99HnswVectorsFormat");
if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) {
@@ -282,6 +355,7 @@ public Lucene99HnswVectorsFormat(
this.maxConn = maxConn;
this.beamWidth = beamWidth;
this.tinySegmentsThreshold = tinySegmentsThreshold;
+ this.enableReorder = enableReorder;
this.writeVersion = writeVersion;
if (numMergeWorkers == 1 && mergeExec != null) {
throw new IllegalArgumentException(
@@ -293,6 +367,11 @@ public Lucene99HnswVectorsFormat(
} else {
this.mergeExec = null;
}
+ if (enableReorder) {
+ flatVectorsFormat = REORDERING_FLAT_VECTORS_FORMAT;
+ } else {
+ flatVectorsFormat = DEFAULT_FLAT_VECTORS_FORMAT;
+ }
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
index d2526cff3ab8..a5751b3fe0ac 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
@@ -48,6 +48,7 @@
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.TaskExecutor;
import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BpVectorReorderer;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.RamUsageEstimator;
@@ -257,7 +258,9 @@ private void writeSortingField(FieldWriter> fieldData, Sorter.DocMap sortMap)
long vectorIndexOffset = vectorIndex.getFilePointer();
OnHeapHnswGraph graph = fieldData.getGraph();
int[][] graphLevelNodeOffsets = graph == null ? new int[0][] : new int[graph.numLevels()][];
- HnswGraph mockGraph = reconstructAndWriteGraph(graph, ordMap, oldOrdMap, graphLevelNodeOffsets);
+ HnswGraph mockGraph =
+ reconstructAndWriteGraph(
+ graph, BpVectorReorderer.sortMapOf(oldOrdMap, ordMap), graphLevelNodeOffsets);
long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset;
writeMeta(
@@ -275,15 +278,13 @@ private void writeSortingField(FieldWriter> fieldData, Sorter.DocMap sortMap)
* Additionally, the graph node connections are written to the vectorIndex.
*
* @param graph The current on heap graph
- * @param newToOldMap the new node ids indexed to the old node ids
- * @param oldToNewMap the old node ids indexed to the new node ids
+ * @param sortMap the new node ids mapped to (and from) the old node ids
* @param levelNodeOffsets where to place the new offsets for the nodes in the vector index.
* @return The graph
* @throws IOException if writing to vectorIndex fails
*/
private HnswGraph reconstructAndWriteGraph(
- OnHeapHnswGraph graph, int[] newToOldMap, int[] oldToNewMap, int[][] levelNodeOffsets)
- throws IOException {
+ OnHeapHnswGraph graph, Sorter.DocMap sortMap, int[][] levelNodeOffsets) throws IOException {
if (graph == null) return null;
List nodesByLevel = new ArrayList<>(graph.numLevels());
@@ -295,9 +296,9 @@ private HnswGraph reconstructAndWriteGraph(
levelNodeOffsets[0] = new int[nodesOnLevel0.size()];
while (nodesOnLevel0.hasNext()) {
int node = nodesOnLevel0.nextInt();
- NeighborArray neighbors = graph.getNeighbors(0, newToOldMap[node]);
+ NeighborArray neighbors = graph.getNeighbors(0, sortMap.newToOld(node));
long offset = vectorIndex.getFilePointer();
- reconstructAndWriteNeighbours(neighbors, oldToNewMap, scratch, maxOrd);
+ reconstructAndWriteNeighbours(neighbors, sortMap, scratch, maxOrd);
levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offset);
}
@@ -305,16 +306,16 @@ private HnswGraph reconstructAndWriteGraph(
NodesIterator nodesOnLevel = graph.getNodesOnLevel(level);
int[] newNodes = new int[nodesOnLevel.size()];
for (int n = 0; nodesOnLevel.hasNext(); n++) {
- newNodes[n] = oldToNewMap[nodesOnLevel.nextInt()];
+ newNodes[n] = sortMap.oldToNew(nodesOnLevel.nextInt());
}
Arrays.sort(newNodes);
nodesByLevel.add(newNodes);
levelNodeOffsets[level] = new int[newNodes.length];
int nodeOffsetIndex = 0;
for (int node : newNodes) {
- NeighborArray neighbors = graph.getNeighbors(level, newToOldMap[node]);
+ NeighborArray neighbors = graph.getNeighbors(level, sortMap.newToOld(node));
long offset = vectorIndex.getFilePointer();
- reconstructAndWriteNeighbours(neighbors, oldToNewMap, scratch, maxOrd);
+ reconstructAndWriteNeighbours(neighbors, sortMap, scratch, maxOrd);
levelNodeOffsets[level][nodeOffsetIndex++] =
Math.toIntExact(vectorIndex.getFilePointer() - offset);
}
@@ -367,12 +368,13 @@ public NodesIterator getNodesOnLevel(int level) {
}
private void reconstructAndWriteNeighbours(
- NeighborArray neighbors, int[] oldToNewMap, int[] scratch, int maxOrd) throws IOException {
+ NeighborArray neighbors, Sorter.DocMap sortMap, int[] scratch, int maxOrd)
+ throws IOException {
int size = neighbors.size();
// Destructively modify; it's ok we are discarding it after this
int[] nnodes = neighbors.nodes();
for (int i = 0; i < size; i++) {
- nnodes[i] = oldToNewMap[nnodes[i]];
+ nnodes[i] = sortMap.oldToNew(nnodes[i]);
}
Arrays.sort(nnodes, 0, size);
int actualSize = 0;
@@ -410,6 +412,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
// we use Lucene99HnswVectorsReader.DenseOffHeapVectorValues for the graph construction
// doesn't need to know docIds
OnHeapHnswGraph graph = null;
+ HnswGraph graphToWrite = null;
int[][] vectorIndexNodeOffsets = null;
// Check if we should bypass graph building for tiny segments
boolean makeHnswGraph =
@@ -445,7 +448,14 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
mergedVectorValues,
segmentWriteState.infoStream,
scorerSupplier.totalVectorCount());
- vectorIndexNodeOffsets = writeGraph(graph);
+ if (scorerSupplier.sortMap() == null) {
+ vectorIndexNodeOffsets = writeGraph(graph);
+ graphToWrite = graph;
+ } else {
+ vectorIndexNodeOffsets = new int[graph.numLevels()][];
+ graphToWrite =
+ reconstructAndWriteGraph(graph, scorerSupplier.sortMap(), vectorIndexNodeOffsets);
+ }
}
long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset;
writeMeta(
@@ -453,7 +463,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
vectorIndexOffset,
vectorIndexLength,
scorerSupplier.totalVectorCount(),
- graph,
+ graphToWrite,
vectorIndexNodeOffsets);
} catch (Throwable t) {
IOUtils.closeWhileSuppressingExceptions(t, scorerSupplier);
@@ -464,11 +474,14 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
/**
* @param graph Write the graph in a compressed format
+ * @param sortMap if not null, used to map the node ordinals (old to new) while rewriting
* @return The non-cumulative offsets for the nodes. Should be used to create cumulative offsets.
* @throws IOException if writing to vectorIndex fails
*/
private int[][] writeGraph(OnHeapHnswGraph graph) throws IOException {
- if (graph == null) return new int[0][0];
+ if (graph == null) {
+ return new int[0][0];
+ }
// write vectors' neighbours on each level into the vectorIndex file
int countOnLevel0 = graph.size();
int[][] offsets = new int[graph.numLevels()][];
@@ -478,7 +491,8 @@ private int[][] writeGraph(OnHeapHnswGraph graph) throws IOException {
offsets[level] = new int[sortedNodes.size()];
int nodeOffsetId = 0;
while (sortedNodes.hasNext()) {
- NeighborArray neighbors = graph.getNeighbors(level, sortedNodes.next());
+ int node = sortedNodes.next();
+ NeighborArray neighbors = graph.getNeighbors(level, node);
int size = neighbors.size();
// Write size in VInt as the neighbors list is typically small
long offsetStart = vectorIndex.getFilePointer();
@@ -515,6 +529,70 @@ private int[][] writeGraph(OnHeapHnswGraph graph) throws IOException {
return offsets;
}
+ private static HnswGraph withReorderedNodes(HnswGraph inner, Sorter.DocMap sortMap) {
+ return new HnswGraph() {
+ @Override
+ public void seek(int level, int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int neighborCount() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int size() {
+ return inner.size();
+ }
+
+ @Override
+ public int nextNeighbor() throws IOException {
+ return sortMap.oldToNew(inner.nextNeighbor());
+ }
+
+ @Override
+ public int numLevels() throws IOException {
+ return inner.numLevels();
+ }
+
+ @Override
+ public int maxConn() {
+ return inner.maxConn();
+ }
+
+ @Override
+ public int entryNode() throws IOException {
+ return sortMap.oldToNew(inner.entryNode());
+ }
+
+ @Override
+ public NodesIterator getNodesOnLevel(int level) throws IOException {
+ NodesIterator innerIter = inner.getNodesOnLevel(level);
+ return new NodesIterator(innerIter.size()) {
+ @Override
+ public int consume(int[] dest) {
+ int result = innerIter.consume(dest);
+ for (int i = 0; i < result; i++) {
+ dest[i] = sortMap.oldToNew(dest[i]);
+ }
+ return result;
+ }
+
+ @Override
+ public int nextInt() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean hasNext() {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+ };
+ }
+
private void writeMeta(
FieldInfo field,
long vectorIndexOffset,
diff --git a/lucene/core/src/java/org/apache/lucene/index/KnnVectorValues.java b/lucene/core/src/java/org/apache/lucene/index/KnnVectorValues.java
index 8e58f387a334..aad677e9de97 100644
--- a/lucene/core/src/java/org/apache/lucene/index/KnnVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/KnnVectorValues.java
@@ -65,8 +65,6 @@ public int getVectorByteLength() {
/** Returns a Bits accepting docs accepted by the argument and having a vector value */
public Bits getAcceptOrds(Bits acceptDocs) {
- // FIXME: change default to return acceptDocs and provide this impl
- // somewhere more specialized (in every non-dense impl).
if (acceptDocs == null) {
return null;
}
diff --git a/lucene/misc/src/java/org/apache/lucene/misc/index/AbstractBPReorderer.java b/lucene/core/src/java/org/apache/lucene/util/AbstractBPReorderer.java
similarity index 97%
rename from lucene/misc/src/java/org/apache/lucene/misc/index/AbstractBPReorderer.java
rename to lucene/core/src/java/org/apache/lucene/util/AbstractBPReorderer.java
index 3f7442a25263..5f6e2e28a587 100644
--- a/lucene/misc/src/java/org/apache/lucene/misc/index/AbstractBPReorderer.java
+++ b/lucene/core/src/java/org/apache/lucene/util/AbstractBPReorderer.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.misc.index;
+package org.apache.lucene.util;
/** Base class for docid-reorderers implemented using binary partitioning (BP). */
public abstract class AbstractBPReorderer implements IndexReorderer {
@@ -71,7 +71,7 @@ public void setRAMBudgetMB(double ramBudgetMB) {
/** Exception that is thrown when not enough RAM is available. */
public static class NotEnoughRAMException extends RuntimeException {
- NotEnoughRAMException(String message) {
+ public NotEnoughRAMException(String message) {
super(message);
}
}
diff --git a/lucene/misc/src/java/org/apache/lucene/misc/index/BpVectorReorderer.java b/lucene/core/src/java/org/apache/lucene/util/BpVectorReorderer.java
similarity index 92%
rename from lucene/misc/src/java/org/apache/lucene/misc/index/BpVectorReorderer.java
rename to lucene/core/src/java/org/apache/lucene/util/BpVectorReorderer.java
index 246109ede04c..20f20a22cddf 100644
--- a/lucene/misc/src/java/org/apache/lucene/misc/index/BpVectorReorderer.java
+++ b/lucene/core/src/java/org/apache/lucene/util/BpVectorReorderer.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.misc.index;
+package org.apache.lucene.util;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
@@ -28,6 +28,7 @@
import java.util.concurrent.RecursiveAction;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.DocsWithFieldSet;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.IndexReader;
@@ -42,15 +43,11 @@
import org.apache.lucene.search.TaskExecutor;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.CloseableThreadLocal;
-import org.apache.lucene.util.IntroSelector;
-import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.VectorUtil;
/**
* Implementation of "recursive graph bisection", also called "bipartite graph partitioning" and
* often abbreviated BP, an approach to doc ID assignment that aims at reducing the sum of the log
- * gap between consecutive neighbor node ids. See {@link BPIndexReorderer}.
+ * gap between consecutive neighbor node ids. See also BPIndexReorderer.
*/
public class BpVectorReorderer extends AbstractBPReorderer {
@@ -89,10 +86,14 @@ public class BpVectorReorderer extends AbstractBPReorderer {
private final String partitionField;
+ public BpVectorReorderer() {
+ this(null);
+ }
+
/** Constructor. */
public BpVectorReorderer(String partitionField) {
- setMinPartitionSize(DEFAULT_MIN_PARTITION_SIZE);
- setMaxIters(DEFAULT_MAX_ITERS);
+ setMinPartitionSize(AbstractBPReorderer.DEFAULT_MIN_PARTITION_SIZE);
+ setMaxIters(AbstractBPReorderer.DEFAULT_MAX_ITERS);
// 10% of the available heap size by default
setRAMBudgetMB(Runtime.getRuntime().totalMemory() / 1024d / 1024d / 10d);
this.partitionField = partitionField;
@@ -117,12 +118,17 @@ private static class PerThreadState {
}
}
- private static class DocMap extends Sorter.DocMap {
+ public static final class DocMap extends Sorter.DocMap {
+
+ public final int[] oldToNew;
+ public final int[] newToOld;
- private final int[] newToOld;
- private final int[] oldToNew;
+ public DocMap(int[] oldToNew, int[] newToOld) {
+ this.oldToNew = oldToNew;
+ this.newToOld = newToOld;
+ }
- public DocMap(int[] newToOld) {
+ DocMap(int[] newToOld) {
this.newToOld = newToOld;
oldToNew = new int[newToOld.length];
for (int i = 0; i < newToOld.length; ++i) {
@@ -146,6 +152,10 @@ public int newToOld(int docID) {
}
}
+ public static Sorter.DocMap sortMapOf(int[] oldToNew, int[] newToOld) {
+ return new DocMap(oldToNew, newToOld);
+ }
+
private abstract class BaseRecursiveAction extends RecursiveAction {
protected final TaskExecutor executor;
@@ -577,6 +587,10 @@ private float computeBias(float[] vector, float[] leftCentroid, float[] rightCen
@Override
public Sorter.DocMap computeDocMap(CodecReader reader, Directory tempDir, Executor executor)
throws IOException {
+ if (partitionField == null) {
+ throw new IllegalStateException(
+ "initialized with null field name; cannot partition using a reader");
+ }
TaskExecutor taskExecutor;
if (executor == null) {
taskExecutor = null;
@@ -588,12 +602,15 @@ public Sorter.DocMap computeDocMap(CodecReader reader, Directory tempDir, Execut
return null;
}
FloatVectorValues floats = reader.getFloatVectorValues(partitionField);
+ if (floats == null) {
+ return null;
+ }
Sorter.DocMap valueMap = computeValueMap(floats, vectorScore, taskExecutor);
return valueMapToDocMap(valueMap, floats, reader.maxDoc());
}
/** Expert: Compute the {@link DocMap} that holds the new vector ordinal numbering. */
- Sorter.DocMap computeValueMap(
+ public Sorter.DocMap computeValueMap(
FloatVectorValues vectors, VectorSimilarityFunction vectorScore, TaskExecutor executor) {
if (docRAMRequirements(vectors.size()) >= ramBudgetMB * 1024 * 1024) {
throw new NotEnoughRAMException(
@@ -611,9 +628,9 @@ Sorter.DocMap computeValueMap(
*/
private int[] computePermutation(
FloatVectorValues vectors, VectorSimilarityFunction vectorScore, TaskExecutor executor) {
- final int size = vectors.size();
+ int size = vectors.size();
int[] sortedIds = new int[size];
- for (int i = 0; i < size; ++i) {
+ for (int i = 0; i < size; i++) {
sortedIds[i] = i;
}
try (CloseableThreadLocal threadLocal =
@@ -623,7 +640,7 @@ protected PerThreadState initialValue() {
return new PerThreadState(vectors);
}
}) {
- IntsRef ids = new IntsRef(sortedIds, 0, sortedIds.length);
+ IntsRef ids = new IntsRef(sortedIds, 0, size);
new ReorderTask(ids, new float[size], threadLocal, executor, 0, vectorScore).compute();
}
return sortedIds;
@@ -768,23 +785,27 @@ private static Sorter.DocMap valueMapToDocMap(
++docid;
++nextNullDoc;
}
+ return new DocMap(oldToNew, newToOld);
+ }
- return new Sorter.DocMap() {
-
- @Override
- public int size() {
- return newToOld.length;
- }
-
- @Override
- public int oldToNew(int docID) {
- return oldToNew[docID];
- }
-
- @Override
- public int newToOld(int docID) {
- return newToOld[docID];
- }
- };
+ // returns a map where new->old maps ord->doc and
+ // old->new maps old ords (which are in doc order) to new ords
+ public static DocMap ordToDocFromValueMap(
+ Sorter.DocMap valueMap, DocsWithFieldSet docsWithField) throws IOException {
+ // valueMap maps old/new ords; values maps old docs/old ords
+ // we want old docs/new ords map. docs with no value map to -1
+ int[] newToOld = new int[valueMap.size()];
+ int[] oldToNew = new int[valueMap.size()];
+ DocIdSetIterator it = docsWithField.iterator();
+ for (int ord = 0, nextDoc = it.nextDoc();
+ nextDoc != NO_MORE_DOCS;
+ nextDoc = it.nextDoc(), ord++) {
+ int newOrd = valueMap.oldToNew(ord);
+ newToOld[newOrd] = nextDoc;
+ // this will hold ords in docid order, but it is not doc->ord because there may be gaps in the
+ // doc sequence
+ oldToNew[ord] = newOrd;
+ }
+ return new DocMap(oldToNew, newToOld);
}
}
diff --git a/lucene/misc/src/java/org/apache/lucene/misc/index/IndexReorderer.java b/lucene/core/src/java/org/apache/lucene/util/IndexReorderer.java
similarity index 92%
rename from lucene/misc/src/java/org/apache/lucene/misc/index/IndexReorderer.java
rename to lucene/core/src/java/org/apache/lucene/util/IndexReorderer.java
index 1fdba7a4eb03..694a5ac5285c 100644
--- a/lucene/misc/src/java/org/apache/lucene/misc/index/IndexReorderer.java
+++ b/lucene/core/src/java/org/apache/lucene/util/IndexReorderer.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.misc.index;
+package org.apache.lucene.util;
import java.io.IOException;
import java.util.concurrent.Executor;
@@ -22,7 +22,7 @@
import org.apache.lucene.index.Sorter;
import org.apache.lucene.store.Directory;
-/** Interface for docid-reordering expected by {@link BPReorderingMergePolicy}. */
+/** Interface for docid-reordering expected by BPReorderingMergePolicy. */
public interface IndexReorderer {
/**
* Returns a mapping from old to new docids.
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/CloseableRandomVectorScorerSupplier.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/CloseableRandomVectorScorerSupplier.java
index 8c66147f1a8b..bb515d7e2885 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/CloseableRandomVectorScorerSupplier.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/CloseableRandomVectorScorerSupplier.java
@@ -18,6 +18,7 @@
package org.apache.lucene.util.hnsw;
import java.io.Closeable;
+import org.apache.lucene.index.Sorter;
/**
* A supplier that creates {@link UpdateableRandomVectorScorer} from an ordinal. Caller should be
@@ -28,4 +29,14 @@
*/
public interface CloseableRandomVectorScorerSupplier extends Closeable, RandomVectorScorerSupplier {
int totalVectorCount();
+
+ /**
+ * If the vectors were reordered, this encodes the mapping from old (before reordering) node
+ * ordinals to new (reordered) ones. If no reordering was done, this will be null. Note that the
+ * RandomVectorScorer(s) returned from this will be based on the old ordinals; the reordering is
+ * only indicative.
+ */
+ default Sorter.DocMap sortMap() {
+ return null;
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/MergingHnswGraphBuilder.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/MergingHnswGraphBuilder.java
index 08366927b247..cd093b192aaa 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/MergingHnswGraphBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/MergingHnswGraphBuilder.java
@@ -20,7 +20,7 @@
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
-import org.apache.lucene.internal.hppc.IntCursor;
+import java.util.Arrays;
import org.apache.lucene.internal.hppc.IntHashSet;
import org.apache.lucene.util.BitSet;
@@ -145,8 +145,10 @@ private void updateGraph(HnswGraph gS, int[] ordMapS) throws IOException {
IntHashSet j = UpdateGraphsUtils.computeJoinSet(gS);
// for nodes that in the join set, add them directly to the graph
- for (IntCursor node : j) {
- addGraphNode(ordMapS[node.value]);
+ int[] nodes = j.toArray();
+ Arrays.sort(nodes);
+ for (int node : nodes) {
+ addGraphNode(ordMapS[node]);
}
// for each node outside of j set:
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java
index 32d5b07ad3e0..8088f01b9774 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene104/TestLucene104ScalarQuantizedVectorsFormat.java
@@ -365,7 +365,7 @@ private void updateVectorMetadataFile(Directory dir, String fileName) throws Exc
// Write configuration
OrdToDocDISIReaderConfiguration.writeStoredMeta(
- DIRECT_MONOTONIC_BLOCK_SHIFT, out, null, 0, 0, null);
+ DIRECT_MONOTONIC_BLOCK_SHIFT, out, null, 0, 0, null, null);
// Mark end of fields and write footer
out.writeInt(-1);
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java b/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java
index c03afd92be69..71c8386e70d4 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java
@@ -28,6 +28,7 @@
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.hnsw.HnswGraphProvider;
+import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -53,6 +54,7 @@
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.hnsw.HnswGraph;
@@ -222,9 +224,11 @@ public void testSortOnAddIndicesRandom() throws IOException {
new Sort(
new SortField("sorted_binary_sort_field", SortField.Type.STRING, false),
new SortField("alt_id", SortField.Type.INT))));
+ IndexWriterConfig cfg = newIndexWriterConfig();
+ cfg.setCodec(TestUtil.alwaysKnnVectorsFormat(new Lucene99HnswVectorsFormat(8, 32)));
+ cfg.setIndexSort(indexSort);
try (Directory sortDir = newDirectory()) {
- try (IndexWriter writer =
- new IndexWriter(sortDir, newIndexWriterConfig().setIndexSort(indexSort))) {
+ try (IndexWriter writer = new IndexWriter(sortDir, cfg)) {
try (DirectoryReader reader = DirectoryReader.open(dir)) {
List readers = new ArrayList<>();
for (LeafReaderContext ctx : reader.leaves()) {
diff --git a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
index 0f9a6c84578b..be95bdc8e9d9 100644
--- a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
+++ b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
@@ -611,8 +611,7 @@ public void testRandomWithFilter() throws IOException {
try (Directory d = newDirectoryForTest()) {
// Always use the default kNN format to have predictable behavior around when it hits
// visitedLimit. This is fine since the test targets AbstractKnnVectorQuery logic, not the kNN
- // format
- // implementation.
+ // format implementation.
IndexWriterConfig iwc = configStandardCodec();
RandomIndexWriter w = new RandomIndexWriter(random(), d, iwc);
for (int i = 0; i < numDocs; i++) {
@@ -659,7 +658,9 @@ public void testRandomWithFilter() throws IOException {
assertEquals(1, fieldDoc.fields.length);
int tag = (int) fieldDoc.fields[0];
- assertTrue(lower <= tag && tag <= numDocs);
+ assertTrue(
+ "tag=" + tag + " lower=" + lower + " numDocs=" + numDocs,
+ lower <= tag && tag <= numDocs);
}
// Test a filter with cost slightly more than k, and check we use exact search as k
// results are not retrieved from approximate search
diff --git a/lucene/misc/src/test/org/apache/lucene/misc/index/TestBpVectorReorderer.java b/lucene/core/src/test/org/apache/lucene/util/TestBpVectorReorderer.java
similarity index 70%
rename from lucene/misc/src/test/org/apache/lucene/misc/index/TestBpVectorReorderer.java
rename to lucene/core/src/test/org/apache/lucene/util/TestBpVectorReorderer.java
index 653c519729a9..f33272f0b305 100644
--- a/lucene/misc/src/test/org/apache/lucene/misc/index/TestBpVectorReorderer.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestBpVectorReorderer.java
@@ -15,20 +15,25 @@
* limitations under the License.
*/
-package org.apache.lucene.misc.index;
+package org.apache.lucene.util;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.util.Random;
import java.util.concurrent.Executor;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinWorkerThread;
+import org.apache.lucene.codecs.hnsw.HnswGraphProvider;
import org.apache.lucene.codecs.lucene104.Lucene104HnswScalarQuantizedVectorsFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.KnnFloatVectorField;
+import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StoredField;
+import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.IndexReader;
@@ -36,15 +41,19 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LogDocMergePolicy;
+import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.Sorter;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TaskExecutor;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
-import org.apache.lucene.util.VectorUtil;
+import org.apache.lucene.util.hnsw.HnswGraph;
/** Tests reordering vector values using Binary Partitioning */
public class TestBpVectorReorderer extends LuceneTestCase {
@@ -126,6 +135,7 @@ public void testEuclideanLinear() {
}
public void testQuantizedIndex() throws Exception {
+ // FIXME test with an executor sometimes
doTestQuantizedIndex(null);
}
@@ -217,6 +227,10 @@ private static List shuffleVectors(List vectors) {
private static List randomLinearVectors() {
int n = random().nextInt(100) + 10;
+ return randomLinearVectors(n);
+ }
+
+ private static List randomLinearVectors(int n) {
List vectors = new ArrayList<>();
float b = random().nextFloat();
float m = random().nextFloat();
@@ -310,13 +324,25 @@ && angularDifference(t0min, t0max) < angularDifference(t0min, t1max))
&& angularDifference(t1min, t1max) < angularDifference(t1min, t0max)));
}
+ // Disable skipping HNSW graph creation for small segments and pass through reordering flag.
+ private IndexWriterConfig createIndexWriterConfig(boolean enableReorder) {
+ IndexWriterConfig cfg = new IndexWriterConfig();
+ cfg.setCodec(
+ TestUtil.alwaysKnnVectorsFormat(
+ new Lucene99HnswVectorsFormat(8, 32, 1, null, 0, enableReorder)));
+ cfg.setMergePolicy(new LogDocMergePolicy());
+ cfg.setMergeScheduler(new SerialMergeScheduler());
+ return cfg;
+ }
+
public void testIndexReorderDense() throws Exception {
List vectors = shuffleVectors(randomLinearVectors());
Path tmpdir = createTempDir();
try (Directory dir = newFSDirectory(tmpdir)) {
// create an index with a single leaf
- try (IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig())) {
+ IndexWriterConfig cfg = createIndexWriterConfig(false);
+ try (IndexWriter writer = new IndexWriter(dir, cfg)) {
int id = 0;
for (float[] vector : vectors) {
Document doc = new Document();
@@ -384,6 +410,7 @@ public void testIndexReorderDense() throws Exception {
}
}
+ // test the reordering utility (BpVectorReorderer.main)
public void testIndexReorderSparse() throws Exception {
List vectors = shuffleVectors(randomLinearVectors());
// compute the expected ordering
@@ -394,7 +421,8 @@ public void testIndexReorderSparse() throws Exception {
int maxDoc = 0;
try (Directory dir = newFSDirectory(tmpdir)) {
// create an index with a single leaf
- try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig())) {
+ IndexWriterConfig cfg = createIndexWriterConfig(false);
+ try (IndexWriter writer = new IndexWriter(dir, cfg)) {
for (float[] vector : vectors) {
Document doc = new Document();
if (random().nextBoolean()) {
@@ -431,6 +459,139 @@ public void testIndexReorderSparse() throws Exception {
}
}
+ // Tests of reordering in the codec
+ public void testReorderDenseCodec() throws Exception {
+ doTestReorderCodec(false);
+ }
+
+ public void testReorderSparseCodec() throws Exception {
+ doTestReorderCodec(true);
+ }
+
+ private void addDocuments(List vectors, boolean indexIsSparse, long seed, IndexWriter writer) throws IOException {
+ int id = 0;
+ boolean didCommit = false;
+ Random random = new Random(seed);
+ for (float[] vector : vectors) {
+ Document doc = new Document();
+ doc.add(new KnnFloatVectorField("f", vector, VectorSimilarityFunction.EUCLIDEAN));
+ doc.add(new NumericDocValuesField("id", id));
+ doc.add(new StoredField("id", id));
+ writer.addDocument(doc);
+ if (indexIsSparse && random.nextBoolean()) {
+ for (int i = 0; i < random.nextInt(3); i++) {
+ // insert some gaps -- docs with no vectors
+ // give them the same numeric id as their "neighbor" so that
+ // they sort together
+ Document gapDoc = new Document();
+ gapDoc.add(new NumericDocValuesField("id", id));
+ writer.addDocument(gapDoc);
+ }
+ }
+ ++id;
+ if (didCommit == false && id > vectors.size() / 2) {
+ // make two segments to induce a merge
+ writer.commit();
+ }
+ }
+ writer.forceMerge(1);
+ }
+
+ private void doTestReorderCodec(boolean indexIsSparse) throws Exception {
+ // must be big enough to trigger some reordering
+ int numVectors = BpVectorReorderer.DEFAULT_MIN_PARTITION_SIZE * 4 + random().nextInt(32);
+ List vectors = shuffleVectors(randomLinearVectors(numVectors));
+ // use default settings to match codec
+ reorderer = new BpVectorReorderer();
+
+ // compute the expected ordering
+ Sorter.DocMap expected =
+ reorderer.computeValueMap(
+ FloatVectorValues.fromFloats(vectors, 2), VectorSimilarityFunction.EUCLIDEAN, null);
+
+ // index without reordering in order to get the expected HNSW graph
+ // record a seed so we can re-generate the same index below
+ long seed = random().nextLong();
+ Path tmpdir = createTempDir();
+ List> expectedGraph = new ArrayList<>();
+ try (Directory dir = newFSDirectory(tmpdir)) {
+ try (IndexWriter writer = new IndexWriter(dir, createIndexWriterConfig(false))) {
+ addDocuments(vectors, indexIsSparse, seed, writer);
+ }
+ try (IndexReader reader = DirectoryReader.open(dir)) {
+ LeafReader leafReader = getOnlyLeafReader(reader);
+ HnswGraph hnsw =
+ ((HnswGraphProvider) ((CodecReader) leafReader).getVectorReader()).getGraph("f");
+ for (int ord = 0; ord < hnsw.size(); ord++) {
+ List neighbors = new ArrayList<>();
+ hnsw.seek(0, ord);
+ int nbr;
+ while ((nbr = hnsw.nextNeighbor()) != DocIdSetIterator.NO_MORE_DOCS) {
+ neighbors.add(nbr);
+ }
+ expectedGraph.add(neighbors);
+ }
+ }
+ }
+
+ tmpdir = createTempDir();
+ try (Directory dir = newFSDirectory(tmpdir)) {
+ IndexWriterConfig cfg = createIndexWriterConfig(true);
+ // Configure an index sort sometimes
+ if (random().nextBoolean()) {
+ cfg.setIndexSort(new Sort(new SortField("id", SortField.Type.INT)));
+ }
+ try (IndexWriter writer = new IndexWriter(dir, cfg)) {
+ addDocuments(vectors, indexIsSparse, seed, writer);
+ }
+
+ // Verify the ordering produced when merging is the same,
+ // that the values iterator returns the correct vector values
+ // and that the ordToDoc mapping is correct
+
+ try (IndexReader reader = DirectoryReader.open(dir)) {
+ LeafReader leafReader = getOnlyLeafReader(reader);
+ FloatVectorValues values = leafReader.getFloatVectorValues("f");
+ StoredFields storedFields = reader.storedFields();
+ KnnVectorValues.DocIndexIterator it = values.iterator();
+ while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+ int docId = it.docID();
+ int oldOrd = Integer.parseInt(storedFields.document(docId).get("id"));
+ int newOrd = it.index();
+ assertEquals(expected.oldToNew(oldOrd), newOrd);
+ assertEquals(expected.newToOld(newOrd), oldOrd);
+ assertEquals(docId, values.ordToDoc(newOrd));
+ float[] actualVector = values.vectorValue(newOrd);
+ float[] expectedVector = vectors.get(oldOrd);
+ assertArrayEquals(
+ "values differ at index " + oldOrd + "->" + newOrd + " docid=" + docId,
+ expectedVector,
+ actualVector,
+ 0);
+ }
+ // Verify that we produce the same graph, numbered according to the reordering.
+ HnswGraph hnsw =
+ ((HnswGraphProvider) ((CodecReader) leafReader).getVectorReader()).getGraph("f");
+ assertEquals(expectedGraph.size(), hnsw.size());
+ for (int newOrd = 0; newOrd < hnsw.size(); newOrd++) {
+ hnsw.seek(0, newOrd);
+ List neighbors = new ArrayList<>();
+ int nbr;
+ while ((nbr = hnsw.nextNeighbor()) != DocIdSetIterator.NO_MORE_DOCS) {
+ // map neighbor ords back to original ords
+ neighbors.add(expected.newToOld(nbr));
+ }
+ // we may now get nodes out of order
+ Collections.sort(neighbors);
+ assertEquals(
+ "neighbors of " + newOrd + " (was " + expected.newToOld(newOrd) + ")",
+ expectedGraph.get(expected.newToOld(newOrd)),
+ neighbors);
+ }
+ }
+ }
+ }
+
static double angularDifference(double a, double b) {
return angle2pi(b - a);
}
diff --git a/lucene/misc/src/java/org/apache/lucene/misc/index/BPIndexReorderer.java b/lucene/misc/src/java/org/apache/lucene/misc/index/BPIndexReorderer.java
index 4d2bab216b9b..08afcaed66b4 100644
--- a/lucene/misc/src/java/org/apache/lucene/misc/index/BPIndexReorderer.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/index/BPIndexReorderer.java
@@ -48,6 +48,7 @@
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.store.TrackingDirectoryWrapper;
+import org.apache.lucene.util.AbstractBPReorderer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BytesRef;
diff --git a/lucene/misc/src/java/org/apache/lucene/misc/index/BPReorderingMergePolicy.java b/lucene/misc/src/java/org/apache/lucene/misc/index/BPReorderingMergePolicy.java
index 51af1662a860..77e198127d5c 100644
--- a/lucene/misc/src/java/org/apache/lucene/misc/index/BPReorderingMergePolicy.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/index/BPReorderingMergePolicy.java
@@ -27,8 +27,9 @@
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.Sorter;
-import org.apache.lucene.misc.index.AbstractBPReorderer.NotEnoughRAMException;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.AbstractBPReorderer.NotEnoughRAMException;
+import org.apache.lucene.util.IndexReorderer;
import org.apache.lucene.util.SetOnce;
/**
diff --git a/lucene/misc/src/test/org/apache/lucene/misc/index/TestBPReorderingMergePolicy.java b/lucene/misc/src/test/org/apache/lucene/misc/index/TestBPReorderingMergePolicy.java
index 302ce0d751cc..fa78214596fe 100644
--- a/lucene/misc/src/test/org/apache/lucene/misc/index/TestBPReorderingMergePolicy.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/index/TestBPReorderingMergePolicy.java
@@ -35,6 +35,8 @@
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.util.AbstractBPReorderer;
+import org.apache.lucene.util.BpVectorReorderer;
import org.apache.lucene.util.IOUtils;
import org.junit.Before;