Skip to content

Commit 5560920

Browse files
committed
Refine Cassandra Vector Search documentation.
Add metadata prefiltering. Update documentation to reflect Cassandra specifics regarding sorting. Closes #1608
1 parent f36bc6d commit 5560920

File tree

7 files changed

+45
-38
lines changed

7 files changed

+45
-38
lines changed

spring-data-cassandra/src/test/java/org/springframework/data/cassandra/core/CassandraVectorSearchIntegrationTests.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
package org.springframework.data.cassandra.core;
1717

1818
import static org.assertj.core.api.Assertions.*;
19+
import static org.springframework.data.cassandra.core.query.Criteria.*;
1920

2021
import java.util.List;
2122
import java.util.UUID;
@@ -86,16 +87,19 @@ void shouldQueryVector() {
8687

8788
Comments one = new Comments();
8889
one.setId(UUID.randomUUID());
90+
one.setLanguage("en");
8991
one.setVector(Vector.of(0.45f, 0.09f, 0.01f, 0.2f, 0.11f));
9092
one.setComment("Raining too hard should have postponed");
9193

9294
Comments two = new Comments();
9395
two.setId(UUID.randomUUID());
96+
two.setLanguage("en");
9497
two.setVector(Vector.of(0.99f, 0.5f, 0.99f, 0.1f, 0.34f));
9598
two.setComment("Second rest stop was out of water");
9699

97100
Comments three = new Comments();
98101
three.setId(UUID.randomUUID());
102+
three.setLanguage("en");
99103
three.setVector(Vector.of(0.9f, 0.54f, 0.12f, 0.1f, 0.95f));
100104
three.setComment("LATE RIDERS SHOULD NOT DELAY THE START");
101105

@@ -107,7 +111,7 @@ void shouldQueryVector() {
107111

108112
Columns columns = Columns.empty().include("comment").select("vector",
109113
it -> it.similarity(vector).cosine().as("similarity"));
110-
Query query = Query.select(columns).limit(3).sort(VectorSort.ann("vector", vector));
114+
Query query = Query.select(columns).and(where("language").is("en")).limit(3).sort(VectorSort.ann("vector", vector));
111115

112116
List<CommentSearch> result = template.query(Comments.class).as(CommentSearch.class).matching(query).all();
113117

@@ -134,6 +138,7 @@ static class Comments {
134138

135139
@Id UUID id;
136140
String comment;
141+
@SaiIndexed String language;
137142

138143
@VectorType(dimensions = 5)
139144
@SaiIndexed Vector vector;
@@ -154,6 +159,14 @@ public void setComment(String comment) {
154159
this.comment = comment;
155160
}
156161

162+
public String getLanguage() {
163+
return language;
164+
}
165+
166+
public void setLanguage(String language) {
167+
this.language = language;
168+
}
169+
157170
public Vector getVector() {
158171
return vector;
159172
}

spring-data-cassandra/src/test/java/org/springframework/data/cassandra/repository/VectorSearchIntegrationTests.java

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -92,28 +92,28 @@ void setUp() {
9292
repository.saveAll(List.of(w1, w2, w3, w4));
9393
}
9494

95-
@Test // GH-
95+
@Test // GH-1573
9696
void searchWithoutScoringFunctionShouldFail() {
9797
assertThatExceptionOfType(QueryCreationException.class)
9898
.isThrownBy(() -> repository.searchByEmbeddingNear(VECTOR, Limit.of(100)));
9999
}
100100

101-
@Test // GH-
101+
@Test // GH-1573
102102
void shouldConsiderScoringFunction() {
103103

104-
SearchResults<WithVectorFields> results = repository.searchByEmbeddingNear(VECTOR,
105-
ScoringFunction.dotProduct(), Limit.of(100));
104+
SearchResults<WithVectorFields> results = repository.searchByEmbeddingNearAndCountry(VECTOR,
105+
ScoringFunction.dotProduct(), "de", Limit.of(100));
106106

107-
assertThat(results).hasSize(4);
107+
assertThat(results).hasSize(3);
108108

109109
for (SearchResult<WithVectorFields> result : results) {
110110
assertThat(result.getScore()).isInstanceOf(Similarity.class);
111111
assertThat(result.getScore().getValue()).isNotCloseTo(0d, offset(0.1d));
112112
}
113113

114-
results = repository.searchByEmbeddingNear(VECTOR, VectorScoringFunctions.EUCLIDEAN, Limit.of(100));
114+
results = repository.searchByEmbeddingNearAndCountry(VECTOR, VectorScoringFunctions.EUCLIDEAN, "de", Limit.of(100));
115115

116-
assertThat(results).hasSize(4);
116+
assertThat(results).hasSize(3);
117117

118118
for (SearchResult<WithVectorFields> result : results) {
119119

@@ -122,19 +122,19 @@ void shouldConsiderScoringFunction() {
122122
}
123123
}
124124

125-
@Test // GH-
125+
@Test // GH-1573
126126
void shouldRunAnnotatedSearchByVector() {
127127

128-
SearchResults<WithVectorFields> results = repository.searchAnnotatedByEmbeddingNear(VECTOR, Limit.of(100));
128+
SearchResults<WithVectorFields> results = repository.searchAnnotatedByEmbeddingNear(VECTOR, "de", Limit.of(100));
129129

130-
assertThat(results).hasSize(4);
130+
assertThat(results).hasSize(3);
131131
for (SearchResult<WithVectorFields> result : results) {
132132
assertThat(result.getScore()).isInstanceOf(Similarity.class);
133133
assertThat(result.getScore().getValue()).isNotCloseTo(0d, offset(0.1d));
134134
}
135135
}
136136

137-
@Test // GH-
137+
@Test // GH-1573
138138
void shouldFindByVector() {
139139

140140
List<WithVectorFields> result = repository.findByEmbeddingNear(VECTOR, Limit.of(100));
@@ -144,22 +144,29 @@ void shouldFindByVector() {
144144

145145
interface VectorSearchRepository extends CrudRepository<WithVectorFields, UUID> {
146146

147-
SearchResults<WithVectorFields> searchByEmbeddingNear(Vector embedding, ScoringFunction function, Limit limit);
147+
SearchResults<WithVectorFields> searchByEmbeddingNearAndCountry(Vector embedding, ScoringFunction function,
148+
String country, Limit limit);
148149

149150
SearchResults<WithVectorFields> searchByEmbeddingNear(Vector embedding, Limit limit);
150151

151152
List<WithVectorFields> findByEmbeddingNear(Vector embedding, Limit limit);
152153

153-
@Query("SELECT id,description,country,similarity_cosine(embedding,:embedding) AS score FROM withvectorfields ORDER BY embedding ANN OF :embedding LIMIT :limit")
154-
SearchResults<WithVectorFields> searchAnnotatedByEmbeddingNear(Vector embedding, Limit limit);
154+
@Query("""
155+
SELECT id,description,country,similarity_cosine(embedding,:embedding) AS score
156+
FROM withvectorfields
157+
WHERE country = :country
158+
ORDER BY embedding ANN OF :embedding
159+
LIMIT :limit
160+
""")
161+
SearchResults<WithVectorFields> searchAnnotatedByEmbeddingNear(Vector embedding, String country, Limit limit);
155162

156163
}
157164

158165
@Table
159166
static class WithVectorFields {
160167

161168
@Id String id;
162-
String country;
169+
@SaiIndexed String country;
163170
String description;
164171

165172
@VectorType(dimensions = 5)

src/main/antora/modules/ROOT/partials/vector-search-method-annotated-include.adoc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@ interface CommentRepository extends Repository<Comment, String> {
77
@Query("""
88
SELECT id, description, country, similarity_cosine(embedding,:embedding) AS score
99
FROM comments
10+
WHERE country = :country
1011
ORDER BY embedding ANN OF :embedding LIMIT :limit
1112
""")
12-
SearchResults<WithVectorFields> searchAnnotatedByEmbeddingNear(Vector embedding, Limit limit);
13+
SearchResults<WithVectorFields> searchByEmbeddingNearAndCountry(Vector embedding, String country, Limit limit);
1314
}
1415
----
1516
====

src/main/antora/modules/ROOT/partials/vector-search-method-derived-include.adoc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ interface CommentRepository extends Repository<Comment, String> {
66
77
List<Comment> searchByEmbeddingNear(Vector vector);
88
9-
SearchResults<Comment> searchByEmbeddingNear(Vector vector, ScoringFunction function);
9+
SearchResults<Comment> searchByEmbeddingNearAndCountry(Vector vector, ScoringFunction function, String country);
1010
1111
}
1212
----

src/main/antora/modules/ROOT/partials/vector-search-model-include.adoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
class Comment {
55
66
@Id String id;
7+
8+
@SaiIndexed
79
String country;
810
String comment;
911

src/main/antora/modules/ROOT/partials/vector-search-repository-include.adoc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
----
55
interface CommentRepository extends Repository<Comment, String> {
66
7-
SearchResults<Comment> searchByEmbeddingNear(Vector vector, ScoringFunction function, Limit limit);
7+
SearchResults<Comment> searchByEmbeddingNearAndCountry(Vector vector, ScoringFunction function, String country, Limit limit);
88
99
}
1010
11-
SearchResults<Comment> results = repository.searchByEmbeddingNear(Vector.of(…), ScoringFunction.cosine(), Limit.of(10));
11+
SearchResults<Comment> results = repository.searchByEmbeddingNearAndCountry(Vector.of(…), ScoringFunction.cosine(), "…", Limit.of(10));
1212
----
1313
====

src/main/antora/modules/ROOT/partials/vector-search.adoc

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ ifdef::vector-search-repository-include[]
6060
include::{vector-search-repository-include}[]
6161
endif::[]
6262

63-
In this example, the `searchByCountryAndEmbeddingNear` method returns a `SearchResults<Comment>` object, which contains a list of `SearchResult<Comment>` instances.
63+
In this example, the `searchByEmbeddingNearAndCountry` method returns a `SearchResults<Comment>` object, which contains a list of `SearchResult<Comment>` instances.
6464
Each result includes the matched `Comment` entity and its relevance score.
6565

6666
Relevance score is a numerical value that indicates how closely the matched vector aligns with the query vector.
@@ -147,21 +147,5 @@ If an annotated query does not define e.g. the score, then the score value in th
147147
[[vector-search.method.sorting]]
148148
=== Sorting
149149

150-
By default, search results are ordered according to their score.
151-
You can override sorting by using the `Sort` parameter:
150+
Cassandra Vector search results are ordered according to their score through the `ORDER BY embedding ANN OF [vector]` clause.
152151

153-
.Using `Sort` in Repository Search Methods
154-
====
155-
[source,java]
156-
----
157-
interface CommentRepository extends Repository<Comment, String> {
158-
159-
SearchResults<Comment> searchByEmbeddingNearOrderByCountry(Vector vector, Score score);
160-
161-
SearchResults<Comment> searchByEmbeddingWithin(Vector vector, Score score, Sort sort);
162-
}
163-
----
164-
====
165-
166-
Please note that custom sorting does not allow expressing the score as a sorting criteria.
167-
You can only refer to domain properties.

0 commit comments

Comments
 (0)