41
41
import java .util .List ;
42
42
import java .util .concurrent .CompletableFuture ;
43
43
44
+ /**
45
+ * The {@code CompactStorageAdapter} class is a concrete implementation of {@link StorageAdapter} for managing HNSW
46
+ * graph data in a compact format.
47
+ * <p>
48
+ * It handles the serialization and deserialization of graph nodes to and from a persistent data store. This
49
+ * implementation is optimized for space efficiency by storing nodes with their accompanying vector data and by storing
50
+ * just neighbor primary keys. It extends {@link AbstractStorageAdapter} to inherit common storage logic.
51
+ */
44
52
class CompactStorageAdapter extends AbstractStorageAdapter <NodeReference > implements StorageAdapter <NodeReference > {
45
53
@ Nonnull
46
54
private static final Logger logger = LoggerFactory .getLogger (CompactStorageAdapter .class );
47
55
56
+ /**
57
+ * Constructs a new {@code CompactStorageAdapter}.
58
+ * <p>
59
+ * This constructor initializes the adapter by delegating to the superclass,
60
+ * setting up the necessary components for managing an HNSW graph.
61
+ *
62
+ * @param config the HNSW graph configuration, must not be null. See {@link HNSW.Config}.
63
+ * @param nodeFactory the factory used to create new nodes of type {@link NodeReference}, must not be null.
64
+ * @param subspace the {@link Subspace} where the graph data is stored, must not be null.
65
+ * @param onWriteListener the listener to be notified of write events, must not be null.
66
+ * @param onReadListener the listener to be notified of read events, must not be null.
67
+ */
48
68
public CompactStorageAdapter (@ Nonnull final HNSW .Config config , @ Nonnull final NodeFactory <NodeReference > nodeFactory ,
49
69
@ Nonnull final Subspace subspace ,
50
70
@ Nonnull final OnWriteListener onWriteListener ,
51
71
@ Nonnull final OnReadListener onReadListener ) {
52
72
super (config , nodeFactory , subspace , onWriteListener , onReadListener );
53
73
}
54
74
75
+ /**
76
+ * Returns this storage adapter instance, as it is already a compact storage adapter.
77
+ * @return the current instance, which serves as its own compact representation.
78
+ * This will never be {@code null}.
79
+ */
55
80
@ Nonnull
56
81
@ Override
57
82
public StorageAdapter <NodeReference > asCompactStorageAdapter () {
58
83
return this ;
59
84
}
60
85
86
+ /**
87
+ * Returns this adapter as a {@code StorageAdapter} that supports inlining.
88
+ * <p>
89
+ * This operation is not supported by a compact storage adapter. Calling this method on this implementation will
90
+ * always result in an {@code IllegalStateException}.
91
+ *
92
+ * @return an instance of {@code StorageAdapter} that supports inlining
93
+ *
94
+ * @throws IllegalStateException unconditionally, as this operation is not supported
95
+ * on a compact storage adapter.
96
+ */
61
97
@ Nonnull
62
98
@ Override
63
99
public StorageAdapter <NodeReferenceWithVector > asInliningStorageAdapter () {
64
100
throw new IllegalStateException ("cannot call this method on a compact storage adapter" );
65
101
}
66
102
103
+ /**
104
+ * Asynchronously fetches a node from the database for a given layer and primary key.
105
+ * <p>
106
+ * This internal method constructs a raw byte key from the {@code layer} and {@code primaryKey}
107
+ * within the store's data subspace. It then uses the provided {@link ReadTransaction} to
108
+ * retrieve the raw value. If a value is found, it is deserialized into a {@link Node} object
109
+ * using the {@code nodeFromRaw} method.
110
+ *
111
+ * @param readTransaction the transaction to use for the read operation
112
+ * @param layer the layer of the node to fetch
113
+ * @param primaryKey the primary key of the node to fetch
114
+ *
115
+ * @return a future that will complete with the fetched {@link Node}
116
+ *
117
+ * @throws IllegalStateException if the node cannot be found in the database for the given key
118
+ */
67
119
@ Nonnull
68
120
@ Override
69
121
protected CompletableFuture <Node <NodeReference >> fetchNodeInternal (@ Nonnull final ReadTransaction readTransaction ,
@@ -80,20 +132,52 @@ protected CompletableFuture<Node<NodeReference>> fetchNodeInternal(@Nonnull fina
80
132
});
81
133
}
82
134
135
+ /**
136
+ * Deserializes a raw key-value byte array pair into a {@code Node}.
137
+ * <p>
138
+ * This method first converts the {@code valueBytes} into a {@link Tuple} and then,
139
+ * along with the {@code primaryKey}, constructs the final {@code Node} object.
140
+ * It also notifies any registered {@link OnReadListener} about the raw key-value
141
+ * read and the resulting node creation.
142
+ *
143
+ * @param layer the layer of the HNSW where this node resides
144
+ * @param primaryKey the primary key for the node
145
+ * @param keyBytes the raw byte representation of the node's key
146
+ * @param valueBytes the raw byte representation of the node's value, which will be deserialized
147
+ *
148
+ * @return a non-null, deserialized {@link Node} object
149
+ */
83
150
@ Nonnull
84
151
private Node <NodeReference > nodeFromRaw (final int layer , final @ Nonnull Tuple primaryKey ,
85
152
@ Nonnull final byte [] keyBytes , @ Nonnull final byte [] valueBytes ) {
86
153
final Tuple nodeTuple = Tuple .fromBytes (valueBytes );
87
- final Node <NodeReference > node = nodeFromTuples (primaryKey , nodeTuple );
154
+ final Node <NodeReference > node = nodeFromKeyValuesTuples (primaryKey , nodeTuple );
88
155
final OnReadListener onReadListener = getOnReadListener ();
89
156
onReadListener .onNodeRead (layer , node );
90
157
onReadListener .onKeyValueRead (layer , keyBytes , valueBytes );
91
158
return node ;
92
159
}
93
160
161
+ /**
162
+ * Constructs a compact {@link Node} from its representation as stored key and value tuples.
163
+ * <p>
164
+ * This method deserializes a node by extracting its components from the provided tuples. It verifies that the
165
+ * node is of type {@link NodeKind#COMPACT} before delegating the final construction to
166
+ * {@link #compactNodeFromTuples(Tuple, Tuple, Tuple)}. The {@code valueTuple} is expected to have a specific
167
+ * structure: the serialized node kind at index 0, a nested tuple for the vector at index 1, and a nested
168
+ * tuple for the neighbors at index 2.
169
+ *
170
+ * @param primaryKey the tuple representing the primary key of the node
171
+ * @param valueTuple the tuple containing the serialized node data, including kind, vector, and neighbors
172
+ *
173
+ * @return the reconstructed compact {@link Node}
174
+ *
175
+ * @throws com.google.common.base.VerifyException if the node kind encoded in {@code valueTuple} is not
176
+ * {@link NodeKind#COMPACT}
177
+ */
94
178
@ Nonnull
95
- private Node <NodeReference > nodeFromTuples (@ Nonnull final Tuple primaryKey ,
96
- @ Nonnull final Tuple valueTuple ) {
179
+ private Node <NodeReference > nodeFromKeyValuesTuples (@ Nonnull final Tuple primaryKey ,
180
+ @ Nonnull final Tuple valueTuple ) {
97
181
final NodeKind nodeKind = NodeKind .fromSerializedNodeKind ((byte )valueTuple .getLong (0 ));
98
182
Verify .verify (nodeKind == NodeKind .COMPACT );
99
183
@@ -105,6 +189,21 @@ private Node<NodeReference> nodeFromTuples(@Nonnull final Tuple primaryKey,
105
189
return compactNodeFromTuples (primaryKey , vectorTuple , neighborsTuple );
106
190
}
107
191
192
+ /**
193
+ * Creates a compact in-memory representation of a graph node from its constituent storage tuples.
194
+ * <p>
195
+ * This method deserializes the raw data stored in {@code Tuple} objects into their
196
+ * corresponding in-memory types. It extracts the vector, constructs a list of
197
+ * {@link NodeReference} objects for the neighbors, and then uses a factory to
198
+ * assemble the final {@code Node} object.
199
+ * </p>
200
+ *
201
+ * @param primaryKey the tuple representing the node's primary key
202
+ * @param vectorTuple the tuple containing the node's vector data
203
+ * @param neighborsTuple the tuple containing a list of nested tuples, where each nested tuple represents a neighbor
204
+ *
205
+ * @return a new {@code Node} instance containing the deserialized data from the input tuples
206
+ */
108
207
@ Nonnull
109
208
private Node <NodeReference > compactNodeFromTuples (@ Nonnull final Tuple primaryKey ,
110
209
@ Nonnull final Tuple vectorTuple ,
@@ -120,6 +219,21 @@ private Node<NodeReference> compactNodeFromTuples(@Nonnull final Tuple primaryKe
120
219
return getNodeFactory ().create (primaryKey , vector , nodeReferences );
121
220
}
122
221
222
+ /**
223
+ * Writes the internal representation of a compact node to the data store within a given transaction.
224
+ * This method handles the serialization of the node's vector and its final set of neighbors based on the
225
+ * provided {@code neighborsChangeSet}.
226
+ *
227
+ * <p>The node is stored as a {@link Tuple} with the structure {@code (NodeKind, Vector, NeighborPrimaryKeys)}.
228
+ * The key for the storage is derived from the node's layer and its primary key. After writing, it notifies any
229
+ * registered write listeners via {@code onNodeWritten} and {@code onKeyValueWritten}.
230
+ *
231
+ * @param transaction the {@link Transaction} to use for the write operation.
232
+ * @param node the {@link Node} to be serialized and written; it is processed as a {@link CompactNode}.
233
+ * @param layer the graph layer index for the node, used to construct the storage key.
234
+ * @param neighborsChangeSet a {@link NeighborsChangeSet} containing the additions and removals, which are
235
+ * merged to determine the final set of neighbors to be written.
236
+ */
123
237
@ Override
124
238
public void writeNodeInternal (@ Nonnull final Transaction transaction , @ Nonnull final Node <NodeReference > node ,
125
239
final int layer , @ Nonnull final NeighborsChangeSet <NodeReference > neighborsChangeSet ) {
@@ -151,6 +265,22 @@ public void writeNodeInternal(@Nonnull final Transaction transaction, @Nonnull f
151
265
}
152
266
}
153
267
268
+ /**
269
+ * Scans a given layer for nodes, returning an iterable over the results.
270
+ * <p>
271
+ * This method reads a limited number of nodes from a specific layer in the underlying data store.
272
+ * The scan can be started from a specific point using the {@code lastPrimaryKey} parameter, which is
273
+ * useful for paginating through the nodes in a large layer.
274
+ *
275
+ * @param readTransaction the transaction to use for reading data; must not be {@code null}
276
+ * @param layer the layer to scan for nodes
277
+ * @param lastPrimaryKey the primary key of the last node from a previous scan. If {@code null},
278
+ * the scan starts from the beginning of the layer.
279
+ * @param maxNumRead the maximum number of nodes to read in this scan
280
+ *
281
+ * @return an {@link Iterable} of {@link Node} objects found in the specified layer,
282
+ * limited by {@code maxNumRead}
283
+ */
154
284
@ Nonnull
155
285
@ Override
156
286
public Iterable <Node <NodeReference >> scanLayer (@ Nonnull final ReadTransaction readTransaction , int layer ,
0 commit comments