Skip to content

Commit 2443459

Browse files
committed
more tests
1 parent ddd192e commit 2443459

File tree

6 files changed

+267
-138
lines changed

6 files changed

+267
-138
lines changed

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/HNSW.java

Lines changed: 74 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ public class HNSW {
9292
public static final int MAX_CONCURRENT_NODE_READS = 16;
9393
public static final int MAX_CONCURRENT_NEIGHBOR_FETCHES = 3;
9494
public static final int MAX_CONCURRENT_SEARCHES = 10;
95-
@Nonnull public static final Random DEFAULT_RANDOM = new Random(0L);
95+
public static final long DEFAULT_RANDOM_SEED = 0L;
9696
@Nonnull public static final Metric DEFAULT_METRIC = Metric.EUCLIDEAN_METRIC;
9797
public static final boolean DEFAULT_USE_INLINING = false;
9898
public static final int DEFAULT_M = 16;
@@ -109,6 +109,8 @@ public class HNSW {
109109
@Nonnull
110110
public static final ConfigBuilder DEFAULT_CONFIG_BUILDER = new ConfigBuilder();
111111

112+
@Nonnull
113+
private final Random random;
112114
@Nonnull
113115
private final Subspace subspace;
114116
@Nonnull
@@ -125,8 +127,7 @@ public class HNSW {
125127
*/
126128
@SuppressWarnings("checkstyle:MemberName")
127129
public static class Config {
128-
@Nonnull
129-
private final Random random;
130+
private final long randomSeed;
130131
@Nonnull
131132
private final Metric metric;
132133
private final int numDimensions;
@@ -141,26 +142,11 @@ public static class Config {
141142
private final boolean useRaBitQ;
142143
private final int raBitQNumExBits;
143144

144-
protected Config(final int numDimensions) {
145-
this.random = DEFAULT_RANDOM;
146-
this.metric = DEFAULT_METRIC;
147-
this.numDimensions = numDimensions;
148-
this.useInlining = DEFAULT_USE_INLINING;
149-
this.m = DEFAULT_M;
150-
this.mMax = DEFAULT_M_MAX;
151-
this.mMax0 = DEFAULT_M_MAX_0;
152-
this.efConstruction = DEFAULT_EF_CONSTRUCTION;
153-
this.extendCandidates = DEFAULT_EXTEND_CANDIDATES;
154-
this.keepPrunedConnections = DEFAULT_KEEP_PRUNED_CONNECTIONS;
155-
this.useRaBitQ = DEFAULT_USE_RABITQ;
156-
this.raBitQNumExBits = DEFAULT_RABITQ_NUM_EX_BITS;
157-
}
158-
159-
protected Config(@Nonnull final Random random, @Nonnull final Metric metric, final int numDimensions,
145+
protected Config(final long randomSeed, @Nonnull final Metric metric, final int numDimensions,
160146
final boolean useInlining, final int m, final int mMax, final int mMax0,
161147
final int efConstruction, final boolean extendCandidates, final boolean keepPrunedConnections,
162148
final boolean useRaBitQ, final int raBitQNumExBits) {
163-
this.random = random;
149+
this.randomSeed = randomSeed;
164150
this.metric = metric;
165151
this.numDimensions = numDimensions;
166152
this.useInlining = useInlining;
@@ -174,9 +160,8 @@ protected Config(@Nonnull final Random random, @Nonnull final Metric metric, fin
174160
this.raBitQNumExBits = raBitQNumExBits;
175161
}
176162

177-
@Nonnull
178-
public Random getRandom() {
179-
return random;
163+
public long getRandomSeed() {
164+
return randomSeed;
180165
}
181166

182167
@Nonnull
@@ -226,16 +211,48 @@ public int getRaBitQNumExBits() {
226211

227212
@Nonnull
228213
public ConfigBuilder toBuilder() {
229-
return new ConfigBuilder(getRandom(), getMetric(), isUseInlining(), getM(), getMMax(), getMMax0(),
214+
return new ConfigBuilder(getRandomSeed(), getMetric(), isUseInlining(), getM(), getMMax(), getMMax0(),
230215
getEfConstruction(), isExtendCandidates(), isKeepPrunedConnections(), isUseRaBitQ(),
231216
getRaBitQNumExBits());
232217
}
233218

219+
@Override
220+
public final boolean equals(final Object o) {
221+
if (!(o instanceof Config)) {
222+
return false;
223+
}
224+
225+
final Config config = (Config)o;
226+
return randomSeed == config.randomSeed && numDimensions == config.numDimensions &&
227+
useInlining == config.useInlining && m == config.m && mMax == config.mMax &&
228+
mMax0 == config.mMax0 && efConstruction == config.efConstruction &&
229+
extendCandidates == config.extendCandidates &&
230+
keepPrunedConnections == config.keepPrunedConnections && useRaBitQ == config.useRaBitQ &&
231+
raBitQNumExBits == config.raBitQNumExBits && metric == config.metric;
232+
}
233+
234+
@Override
235+
public int hashCode() {
236+
int result = Long.hashCode(randomSeed);
237+
result = 31 * result + metric.name().hashCode();
238+
result = 31 * result + numDimensions;
239+
result = 31 * result + Boolean.hashCode(useInlining);
240+
result = 31 * result + m;
241+
result = 31 * result + mMax;
242+
result = 31 * result + mMax0;
243+
result = 31 * result + efConstruction;
244+
result = 31 * result + Boolean.hashCode(extendCandidates);
245+
result = 31 * result + Boolean.hashCode(keepPrunedConnections);
246+
result = 31 * result + Boolean.hashCode(useRaBitQ);
247+
result = 31 * result + raBitQNumExBits;
248+
return result;
249+
}
250+
234251
@Override
235252
@Nonnull
236253
public String toString() {
237-
return "Config[metric=" + getMetric() + ", numDimensions=" + numDimensions +
238-
", isUseInlining=" + isUseInlining() + ", M=" + getM() +
254+
return "Config[randomSeed=" + getRandomSeed() + ", metric=" + getMetric() +
255+
", numDimensions=" + getNumDimensions() + ", isUseInlining=" + isUseInlining() + ", M=" + getM() +
239256
", MMax=" + getMMax() + ", MMax0=" + getMMax0() + ", efConstruction=" + getEfConstruction() +
240257
", isExtendCandidates=" + isExtendCandidates() +
241258
", isKeepPrunedConnections=" + isKeepPrunedConnections() +
@@ -252,8 +269,7 @@ public String toString() {
252269
@CanIgnoreReturnValue
253270
@SuppressWarnings("checkstyle:MemberName")
254271
public static class ConfigBuilder {
255-
@Nonnull
256-
private Random random = DEFAULT_RANDOM;
272+
private long randomSeed = DEFAULT_RANDOM_SEED;
257273
@Nonnull
258274
private Metric metric = DEFAULT_METRIC;
259275
private boolean useInlining = DEFAULT_USE_INLINING;
@@ -270,11 +286,11 @@ public static class ConfigBuilder {
270286
public ConfigBuilder() {
271287
}
272288

273-
public ConfigBuilder(@Nonnull final Random random, @Nonnull final Metric metric, final boolean useInlining,
289+
public ConfigBuilder(final long randomSeed, @Nonnull final Metric metric, final boolean useInlining,
274290
final int m, final int mMax, final int mMax0, final int efConstruction,
275291
final boolean extendCandidates, final boolean keepPrunedConnections,
276292
final boolean useRaBitQ, final int raBitQNumExBits) {
277-
this.random = random;
293+
this.randomSeed = randomSeed;
278294
this.metric = metric;
279295
this.useInlining = useInlining;
280296
this.m = m;
@@ -287,14 +303,13 @@ public ConfigBuilder(@Nonnull final Random random, @Nonnull final Metric metric,
287303
this.raBitQNumExBits = raBitQNumExBits;
288304
}
289305

290-
@Nonnull
291-
public Random getRandom() {
292-
return random;
306+
public long getRandomSeed() {
307+
return randomSeed;
293308
}
294309

295310
@Nonnull
296-
public ConfigBuilder setRandom(@Nonnull final Random random) {
297-
this.random = random;
311+
public ConfigBuilder setRandomSeed(final long randomSeed) {
312+
this.randomSeed = randomSeed;
298313
return this;
299314
}
300315

@@ -394,7 +409,7 @@ public ConfigBuilder setRaBitQNumExBits(final int raBitQNumExBits) {
394409
}
395410

396411
public Config build(final int numDimensions) {
397-
return new Config(getRandom(), getMetric(), numDimensions, isUseInlining(), getM(), getMMax(), getMMax0(),
412+
return new Config(getRandomSeed(), getMetric(), numDimensions, isUseInlining(), getM(), getMMax(), getMMax0(),
398413
getEfConstruction(), isExtendCandidates(), isKeepPrunedConnections(), isUseRaBitQ(),
399414
getRaBitQNumExBits());
400415
}
@@ -409,6 +424,17 @@ public static ConfigBuilder newConfigBuilder() {
409424
return new ConfigBuilder();
410425
}
411426

427+
/**
428+
* Returns a default {@link Config}.
429+
* @param numDimensions number of dimensions
430+
* @return a new default {@code Config}.
431+
* @see ConfigBuilder#build
432+
*/
433+
@Nonnull
434+
public static Config defaultConfig(int numDimensions) {
435+
return new ConfigBuilder().build(numDimensions);
436+
}
437+
412438
/**
413439
* Creates a new {@code HNSW} instance using the default configuration, write listener, and read listener.
414440
* <p>
@@ -442,6 +468,7 @@ public HNSW(@Nonnull final Subspace subspace,
442468
@Nonnull final Config config,
443469
@Nonnull final OnWriteListener onWriteListener,
444470
@Nonnull final OnReadListener onReadListener) {
471+
this.random = new Random(config.getRandomSeed());
445472
this.subspace = subspace;
446473
this.executor = executor;
447474
this.config = config;
@@ -1033,12 +1060,13 @@ private <N extends NodeReference> CompletableFuture<List<NodeReferenceAndNode<N>
10331060
* of type {@code U}, corresponding to each processed node reference.
10341061
*/
10351062
@Nonnull
1036-
private <R extends NodeReference, N extends NodeReference, U> CompletableFuture<List<U>> fetchSomeNodesAndApply(@Nonnull final StorageAdapter<N> storageAdapter,
1037-
@Nonnull final ReadTransaction readTransaction,
1038-
final int layer,
1039-
@Nonnull final Iterable<R> nodeReferences,
1040-
@Nonnull final Function<R, U> fetchBypassFunction,
1041-
@Nonnull final BiFunction<R, Node<N>, U> biMapFunction) {
1063+
private <R extends NodeReference, N extends NodeReference, U> CompletableFuture<List<U>>
1064+
fetchSomeNodesAndApply(@Nonnull final StorageAdapter<N> storageAdapter,
1065+
@Nonnull final ReadTransaction readTransaction,
1066+
final int layer,
1067+
@Nonnull final Iterable<R> nodeReferences,
1068+
@Nonnull final Function<R, U> fetchBypassFunction,
1069+
@Nonnull final BiFunction<R, Node<N>, U> biMapFunction) {
10421070
return forEach(nodeReferences,
10431071
currentNeighborReference -> fetchNodeIfNecessaryAndApply(storageAdapter, readTransaction, layer,
10441072
currentNeighborReference, fetchBypassFunction, biMapFunction), MAX_CONCURRENT_NODE_READS,
@@ -1085,7 +1113,7 @@ public CompletableFuture<Void> insert(@Nonnull final Transaction transaction, @N
10851113
@Nonnull
10861114
public CompletableFuture<Void> insert(@Nonnull final Transaction transaction, @Nonnull final Tuple newPrimaryKey,
10871115
@Nonnull final RealVector newVector) {
1088-
final int insertionLayer = insertionLayer(getConfig().getRandom());
1116+
final int insertionLayer = insertionLayer();
10891117
if (logger.isTraceEnabled()) {
10901118
logger.trace("new node with key={} selected to be inserted into layer={}", newPrimaryKey, insertionLayer);
10911119
}
@@ -1182,11 +1210,10 @@ public CompletableFuture<Void> insert(@Nonnull final Transaction transaction, @N
11821210
public CompletableFuture<Void> insertBatch(@Nonnull final Transaction transaction,
11831211
@Nonnull List<NodeReferenceWithVector> batch) {
11841212
// determine the layer each item should be inserted at
1185-
final Random random = getConfig().getRandom();
11861213
final List<NodeReferenceWithLayer> batchWithLayers = Lists.newArrayListWithCapacity(batch.size());
11871214
for (final NodeReferenceWithVector current : batch) {
1188-
batchWithLayers.add(new NodeReferenceWithLayer(current.getPrimaryKey(), current.getVector(),
1189-
insertionLayer(random)));
1215+
batchWithLayers.add(
1216+
new NodeReferenceWithLayer(current.getPrimaryKey(), current.getVector(), insertionLayer()));
11901217
}
11911218
// sort the layers in reverse order
11921219
batchWithLayers.sort(Comparator.comparing(NodeReferenceWithLayer::getLayer).reversed());
@@ -1878,12 +1905,9 @@ private StorageAdapter<? extends NodeReference> getStorageAdapterForLayer(final
18781905
* number and {@code lambda} is a normalization factor derived from a system
18791906
* configuration parameter {@code M}.
18801907
*
1881-
* @param random the {@link Random} object used for generating a random number.
1882-
* It must not be null.
1883-
*
18841908
* @return a non-negative integer representing the randomly selected layer.
18851909
*/
1886-
private int insertionLayer(@Nonnull final Random random) {
1910+
private int insertionLayer() {
18871911
double lambda = 1.0 / Math.log(getConfig().getM());
18881912
double u = 1.0 - random.nextDouble(); // Avoid log(0)
18891913
return (int) Math.floor(-Math.log(u) * lambda);

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/InliningStorageAdapter.java

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -176,13 +176,26 @@ private Node<NodeReferenceWithVector> nodeFromRaw(final int layer,
176176
@Nonnull
177177
private NodeReferenceWithVector neighborFromRaw(final int layer, final @Nonnull byte[] key, final byte[] value) {
178178
final OnReadListener onReadListener = getOnReadListener();
179-
180179
onReadListener.onKeyValueRead(layer, key, value);
180+
181181
final Tuple neighborKeyTuple = getDataSubspace().unpack(key);
182182
final Tuple neighborValueTuple = Tuple.fromBytes(value);
183183

184-
final Tuple neighborPrimaryKey = neighborKeyTuple.getNestedTuple(2); // neighbor primary key
185-
final RealVector neighborVector = StorageAdapter.vectorFromTuple(getConfig(), neighborValueTuple); // the entire value is the vector
184+
return neighborFromTuples(neighborKeyTuple, neighborValueTuple);
185+
}
186+
187+
/**
188+
* Constructs a {@code NodeReferenceWithVector} from tuples retrieved from storage.
189+
* <p>
190+
* @param keyTuple the key tuple from the database, which contains the neighbor's primary key.
191+
* @param valueTuple the value tuple from the database, which represents the neighbor's vector.
192+
* @return a new {@link NodeReferenceWithVector} instance representing the deserialized neighbor.
193+
* @throws IllegalArgumentException if the key or value byte arrays are malformed and cannot be unpacked.
194+
*/
195+
@Nonnull
196+
private NodeReferenceWithVector neighborFromTuples(final @Nonnull Tuple keyTuple, final Tuple valueTuple) {
197+
final Tuple neighborPrimaryKey = keyTuple.getNestedTuple(2); // neighbor primary key
198+
final RealVector neighborVector = StorageAdapter.vectorFromTuple(getConfig(), valueTuple); // the entire value is the vector
186199
return new NodeReferenceWithVector(neighborPrimaryKey, neighborVector);
187200
}
188201

@@ -308,6 +321,7 @@ private byte[] getNeighborKey(final int layer,
308321
@Override
309322
public Iterable<Node<NodeReferenceWithVector>> scanLayer(@Nonnull final ReadTransaction readTransaction, int layer,
310323
@Nullable final Tuple lastPrimaryKey, int maxNumRead) {
324+
final OnReadListener onReadListener = getOnReadListener();
311325
final byte[] layerPrefix = getDataSubspace().pack(Tuple.from(layer));
312326
final Range range =
313327
lastPrimaryKey == null
@@ -317,30 +331,29 @@ public Iterable<Node<NodeReferenceWithVector>> scanLayer(@Nonnull final ReadTran
317331
final AsyncIterable<KeyValue> itemsIterable =
318332
readTransaction.getRange(range,
319333
maxNumRead, false, StreamingMode.ITERATOR);
320-
int numRead = 0;
321334
Tuple nodePrimaryKey = null;
322335
ImmutableList.Builder<Node<NodeReferenceWithVector>> nodeBuilder = ImmutableList.builder();
323-
ImmutableList.Builder<NodeReferenceWithVector> neighborsBuilder = ImmutableList.builder();
336+
ImmutableList.Builder<NodeReferenceWithVector> neighborsBuilder = null;
324337
for (final KeyValue item: itemsIterable) {
325-
final NodeReferenceWithVector neighbor =
326-
neighborFromRaw(layer, item.getKey(), item.getValue());
327-
final Tuple primaryKeyFromNodeReference = neighbor.getPrimaryKey();
328-
if (nodePrimaryKey == null) {
329-
nodePrimaryKey = primaryKeyFromNodeReference;
330-
} else {
331-
if (!nodePrimaryKey.equals(primaryKeyFromNodeReference)) {
338+
final byte[] key = item.getKey();
339+
final byte[] value = item.getValue();
340+
onReadListener.onKeyValueRead(layer, key, value);
341+
342+
final Tuple neighborKeyTuple = getDataSubspace().unpack(key);
343+
final Tuple neighborValueTuple = Tuple.fromBytes(value);
344+
final NodeReferenceWithVector neighbor = neighborFromTuples(neighborKeyTuple, neighborValueTuple);
345+
final Tuple nodePrimaryKeyFromNeighbor = neighborKeyTuple.getNestedTuple(1);
346+
if (nodePrimaryKey == null || !nodePrimaryKey.equals(nodePrimaryKeyFromNeighbor)) {
347+
if (nodePrimaryKey != null) {
332348
nodeBuilder.add(getNodeFactory().create(nodePrimaryKey, null, neighborsBuilder.build()));
333349
}
350+
nodePrimaryKey = nodePrimaryKeyFromNeighbor;
351+
neighborsBuilder = ImmutableList.builder();
334352
}
335353
neighborsBuilder.add(neighbor);
336-
numRead ++;
337-
}
338-
339-
// there may be a rest
340-
if (numRead > 0 && numRead < maxNumRead) {
341-
nodeBuilder.add(getNodeFactory().create(nodePrimaryKey, null, neighborsBuilder.build()));
342354
}
343355

356+
// there may be a rest; throw it away
344357
return nodeBuilder.build();
345358
}
346359
}

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/StorageAdapter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ void writeNode(@Nonnull Transaction transaction, @Nonnull Node<N> node, int laye
168168
@Nonnull NeighborsChangeSet<N> changeSet);
169169

170170
/**
171-
* Scans a specified layer of the directory, returning an iterable sequence of nodes.
171+
* Scans a specified layer of the structure, returning an iterable sequence of nodes.
172172
* <p>
173173
* This method allows for paginated scanning of a layer. The scan can be started from the beginning of the layer by
174174
* passing {@code null} for the {@code lastPrimaryKey}, or it can be resumed from a previous point by providing the

0 commit comments

Comments
 (0)