Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
b70b88d
initial code drop from hnsw-poc
normen662 Sep 16, 2025
4ec18d7
adding tests
normen662 Sep 16, 2025
3ff9fe4
adding javadocs
normen662 Sep 17, 2025
ea8c115
adding comments
normen662 Sep 17, 2025
0cce801
more javadoc and tests
normen662 Sep 17, 2025
e164441
adding a lot of java doc
normen662 Sep 19, 2025
e17a2bd
added tests
normen662 Sep 19, 2025
f3733b8
increase timeout for test case
normen662 Sep 19, 2025
c71b942
refactored Vector class to be more aligned with math libraries
normen662 Sep 23, 2025
660cab5
removed efSearch from HNSW
normen662 Sep 23, 2025
8692c2a
adding some initial rabitq-related matrix ops
normen662 Sep 30, 2025
2032bdb
best rescale factor
normen662 Oct 2, 2025
047a428
quantize ex
normen662 Oct 3, 2025
accd3da
basic encoding works
normen662 Oct 4, 2025
fcbd209
estimator works
normen662 Oct 6, 2025
0aa1946
encoding + estimation
normen662 Oct 8, 2025
84bf004
packing works
normen662 Oct 11, 2025
19b2470
serialization round trip works
normen662 Oct 11, 2025
67b4db8
pre-savepoint
normen662 Oct 13, 2025
0c848db
rabitq in hnsw; barely compiles
normen662 Oct 13, 2025
10365d4
rabitq in hnsw works
normen662 Oct 14, 2025
72bb3b5
basic vector encoding, half support
normen662 Oct 14, 2025
647a3a8
refactoring so that feature branch hnsw and rabitq can use a proper l…
normen662 Oct 15, 2025
b492ddb
addressing some comments
normen662 Oct 16, 2025
2a2ec6d
addressing some comments (2)
normen662 Oct 17, 2025
5464eec
addressing some comments (3)
normen662 Oct 17, 2025
391b4d5
addressing some comments (4)
normen662 Oct 18, 2025
ad93c96
addressing some comments (5)
normen662 Oct 19, 2025
ddd192e
addressing some comments (6)
normen662 Oct 20, 2025
2443459
more tests
normen662 Oct 21, 2025
04728f3
more tests
normen662 Oct 21, 2025
7e506f9
code complete
normen662 Oct 22, 2025
0253cee
code complete -- for realz
normen662 Oct 22, 2025
9da7010
remove all HNSW code
normen662 Oct 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions ACKNOWLEDGEMENTS
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,27 @@ Unicode, Inc (ICU4J)
Creative Commons Attribution 4.0 License (GeoNames)

https://creativecommons.org/licenses/by/4.0/

Christian Heina (HALF4J)

Copyright 2023 Christian Heina

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Jianyang Gao, Yutong Gou, Yuexuan Xu, Yongyi Yang, Cheng Long, Raymond Chi-Wing Wong,
"Practical and Asymptotically Optimal Quantization of High-Dimensional Vectors in Euclidean Space for
Approximate Nearest Neighbor Search",
SIGMOD 2025, available at https://arxiv.org/abs/2409.09913

Yutong Gou, Jianyang Gao, Yuexuan Xu, Jifan Shi and Zhonghao Yang
https://github.com/VectorDB-NTU/RaBitQ-Library/blob/main/LICENSE
32 changes: 32 additions & 0 deletions fdb-extensions/fdb-extensions.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,38 @@ dependencies {
testFixturesAnnotationProcessor(libs.autoService)
}

def siftSmallFile = layout.buildDirectory.file('downloads/siftsmall.tar.gz')
def extractDir = layout.buildDirectory.dir("extracted")

// Task that downloads the CSV exactly once unless it changed
tasks.register('downloadSiftSmall', de.undercouch.gradle.tasks.download.Download) {
src 'https://huggingface.co/datasets/vecdata/siftsmall/resolve/3106e1b83049c44713b1ce06942d0ab474bbdfb6/siftsmall.tar.gz'
dest siftSmallFile.get().asFile
onlyIfModified true
tempAndMove true
retries 3
}

tasks.register('extractSiftSmall', Copy) {
dependsOn 'downloadSiftSmall'
from(tarTree(resources.gzip(siftSmallFile)))
into extractDir

doLast {
println "Extracted files into: ${extractDir.get().asFile}"
fileTree(extractDir).visit { details ->
if (!details.isDirectory()) {
println " - ${details.file}"
}
}
}
}

test {
dependsOn tasks.named('extractSiftSmall')
inputs.dir extractDir
}

publishing {
publications {
library(MavenPublication) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@
import com.apple.foundationdb.annotation.API;
import com.apple.foundationdb.util.LoggableException;
import com.google.common.base.Suppliers;
import com.google.common.collect.Lists;
import com.google.common.util.concurrent.ThreadFactoryBuilder;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
Expand All @@ -42,9 +44,13 @@
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.function.IntPredicate;
import java.util.function.IntUnaryOperator;
import java.util.function.Predicate;
import java.util.function.Supplier;

Expand Down Expand Up @@ -1051,6 +1057,93 @@
return result;
}

/**
* Method that provides the functionality of a for loop, however, in an asynchronous way. The result of this method
* is a {@link CompletableFuture} that represents the result of the last iteration of the loop body.
* @param startI an integer analogous to the starting value of a loop variable in a for loop
* @param startU an object of some type {@code U} that represents some initial state that is passed to the loop's
* initial state
* @param conditionPredicate a predicate on the loop variable that must be true before the next iteration is
* entered; analogous to the condition in a for loop
* @param stepFunction a unary operator used for modifying the loop variable after each iteration
* @param body a bi-function to be called for each iteration; this function is initially invoked using
* {@code startI} and {@code startU}; the result of the body is then passed into the next iterator's body
* together with a new value for the loop variable. In this way callers can access state inside an iteration
* that was computed in a previous iteration.
* @param executor the executor
* @param <U> the type of the result of the body {@link BiFunction}
* @return a {@link CompletableFuture} containing the result of the last iteration's body invocation.
*/
@Nonnull
public static <U> CompletableFuture<U> forLoop(final int startI, @Nullable final U startU,
@Nonnull final IntPredicate conditionPredicate,
@Nonnull final IntUnaryOperator stepFunction,
@Nonnull final BiFunction<Integer, U, CompletableFuture<U>> body,
@Nonnull final Executor executor) {
final AtomicInteger loopVariableAtomic = new AtomicInteger(startI);
final AtomicReference<U> lastResultAtomic = new AtomicReference<>(startU);
return whileTrue(() -> {
final int loopVariable = loopVariableAtomic.get();
if (!conditionPredicate.test(loopVariable)) {
return AsyncUtil.READY_FALSE;
}
return body.apply(loopVariable, lastResultAtomic.get())
.thenApply(result -> {
loopVariableAtomic.set(stepFunction.applyAsInt(loopVariable));
lastResultAtomic.set(result);
return true;
});

Check warning on line 1095 in fdb-extensions/src/main/java/com/apple/foundationdb/async/MoreAsyncUtil.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Findings

fdb-extensions/src/main/java/com/apple/foundationdb/async/MoreAsyncUtil.java#L1091-L1095

Method always returns the same value (true) https://fdb.teamscale.io/findings/details/foundationdb-fdb-record-layer?t=FORK_MR%2F3677%2Fnormen662%2Fvector-basics%3AHEAD&id=8070E99D8E3F8370FE548A32704A6A30
}, executor).thenApply(ignored -> lastResultAtomic.get());
}

/**
* Method to iterate over some items, for each of which a body is executed asynchronously. The result of each such
* executed is then collected in a list and returned as a {@link CompletableFuture} over that list.
* @param items the items to iterate over
* @param body a function to be called for each item
* @param parallelism the maximum degree of parallelism this method should use
* @param executor the executor
* @param <T> the type of item
* @param <U> the type of the result
* @return a {@link CompletableFuture} containing a list of results collected from the individual body invocations
*/
@Nonnull
@SuppressWarnings("unchecked")
public static <T, U> CompletableFuture<List<U>> forEach(@Nonnull final Iterable<T> items,
@Nonnull final Function<T, CompletableFuture<U>> body,
final int parallelism,
@Nonnull final Executor executor) {
// this deque is only modified by once upon creation
final ArrayDeque<T> toBeProcessed = new ArrayDeque<>();
for (final T item : items) {
toBeProcessed.addLast(item);
}

final List<CompletableFuture<Void>> working = Lists.newArrayList();
final AtomicInteger indexAtomic = new AtomicInteger(0);
final Object[] resultArray = new Object[toBeProcessed.size()];

return whileTrue(() -> {
working.removeIf(CompletableFuture::isDone);

while (working.size() <= parallelism) {
final T currentItem = toBeProcessed.pollFirst();
if (currentItem == null) {
break;
}

final int index = indexAtomic.getAndIncrement();
working.add(body.apply(currentItem)
.thenAccept(result -> resultArray[index] = result));
}

if (working.isEmpty()) {
return AsyncUtil.READY_FALSE;
}
return whenAny(working).thenApply(ignored -> true);
}, executor).thenApply(ignored -> Arrays.asList((U[])resultArray));
}

/**
* A {@code Boolean} function that is always true.
* @param <T> the type of the (ignored) argument to the function
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
* Storage adapter used for serialization and deserialization of nodes.
*/
interface StorageAdapter {

/**
* Get the {@link RTree.Config} associated with this storage adapter.
* @return the configuration used by this storage adapter
Expand Down
Loading
Loading