Skip to content

Commit 7e506f9

Browse files
committed
code complete
1 parent 04728f3 commit 7e506f9

File tree

3 files changed

+63
-1
lines changed

3 files changed

+63
-1
lines changed

fdb-extensions/src/main/java/com/apple/foundationdb/async/rabitq/RaBitQuantizer.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ public final class RaBitQuantizer implements Quantizer {
7474
*/
7575
public RaBitQuantizer(@Nonnull final Metric metric, final int numExBits) {
7676
Preconditions.checkArgument(numExBits > 0 && numExBits < TIGHT_START.length);
77+
Preconditions.checkArgument(
78+
metric == Metric.EUCLIDEAN_METRIC ||
79+
metric == Metric.EUCLIDEAN_SQUARE_METRIC ||
80+
metric == Metric.DOT_PRODUCT_METRIC);
7781

7882
this.numExBits = numExBits;
7983
this.metric = metric;
@@ -102,7 +106,8 @@ public RaBitEstimator estimator() {
102106
* core encoding logic to an internal helper method and returns the final
103107
* {@link EncodedRealVector}.
104108
*
105-
* @param data the {@link RealVector} to be encoded; must not be null.
109+
* @param data the {@link RealVector} to be encoded; must not be null. The vector must be pre-rotated and
110+
* translated.
106111
*
107112
* @return the resulting {@link EncodedRealVector}, guaranteed to be non-null.
108113
*/

fdb-extensions/src/main/java/com/apple/foundationdb/linear/Estimator.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,25 @@
2222

2323
import javax.annotation.Nonnull;
2424

25+
/**
26+
* Interface of an estimator used for calculating the distance between vectors.
27+
* <p>
28+
* Implementations of this interface are expected to provide a specific distance
29+
* metric calculation, often used in search or similarity contexts where one
30+
* vector (the query) is compared against many stored vectors.
31+
*/
2532
public interface Estimator {
33+
/**
34+
* Calculates the distance between a pre-rotated and translated query vector and a stored vector.
35+
* <p>
36+
* This method is designed to compute the distance metric between two vectors in a high-dimensional space. It is
37+
* crucial that the {@code query} vector has already been appropriately transformed (e.g., rotated and translated)
38+
* to align with the coordinate system of the {@code storedVector} before calling this method.
39+
*
40+
* @param query the pre-rotated and translated query vector, cannot be null.
41+
* @param storedVector the stored vector to which the distance is calculated, cannot be null.
42+
* @return a non-negative {@code double} representing the distance between the two vectors.
43+
*/
2644
double distance(@Nonnull RealVector query, // pre-rotated query q
2745
@Nonnull RealVector storedVector);
2846
}

fdb-extensions/src/main/java/com/apple/foundationdb/linear/Quantizer.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,52 @@
2222

2323
import javax.annotation.Nonnull;
2424

25+
/**
26+
* Defines the contract for a quantizer, a component responsible for encoding data vectors into a different, ideally
27+
* a more compact, representation.
28+
* <p>
29+
* Quantizers are typically used in machine learning and information retrieval to transform raw data into a format that
30+
* is more suitable for processing, such as a compressed representation.
31+
*/
2532
public interface Quantizer {
33+
/**
34+
* Returns the {@code Estimator} instance associated with this object.
35+
* <p>
36+
* The estimator is responsible for performing the primary distance estimation or calculation logic. This method
37+
* provides access to that underlying component.
38+
*
39+
* @return the {@link Estimator} instance, which is guaranteed to be non-null.
40+
*/
2641
@Nonnull
2742
Estimator estimator();
2843

44+
/**
45+
* Encodes the given data vector into another vector representation.
46+
* <p>
47+
* This method transforms the raw input data into a different, quantized format, which is often a vector more
48+
* suitable for processing/storing the data. The specifics of the encoding depend on the implementation of the class.
49+
*
50+
* @param data the input {@link RealVector} to be encoded. Must not be {@code null} and is assumed to have been
51+
* preprocessed, such as by rotation and/or translation. The preprocessing has to align with the requirements
52+
* of the specific quantizer.
53+
* @return the encoded vector representation of the input data, guaranteed to be non-null.
54+
*/
2955
@Nonnull
3056
RealVector encode(@Nonnull RealVector data);
3157

58+
/**
59+
* Creates a no-op {@code Quantizer} that does not perform any data transformation.
60+
* <p>
61+
* The returned quantizer's {@link Quantizer#encode(RealVector)} method acts as an
62+
* identity function, returning the input vector without modification. The
63+
* {@link Quantizer#estimator()} is created directly from the distance function
64+
* of the provided {@link Metric}. This can be useful for baseline comparisons
65+
* or for algorithms that require a {@code Quantizer} but where no quantization
66+
* is desired.
67+
*
68+
* @param metric the {@link Metric} used to build the distance estimator for the quantizer.
69+
* @return a new {@link Quantizer} instance that performs no operation.
70+
*/
3271
@Nonnull
3372
static Quantizer noOpQuantizer(@Nonnull final Metric metric) {
3473
return new Quantizer() {

0 commit comments

Comments
 (0)