Skip to content

Commit af5d8c1

Browse files
committed
rebase
1 parent 3e31e00 commit af5d8c1

File tree

7 files changed

+70
-97
lines changed

7 files changed

+70
-97
lines changed

spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/elasticsearch/ElasticsearchVectorStoreAutoConfiguration.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
import org.springframework.context.annotation.Bean;
2929
import org.springframework.util.StringUtils;
3030

31+
import java.util.Objects;
32+
3133
/**
3234
* @author Eddú Meléndez
3335
* @author Wei Jiang
@@ -52,10 +54,7 @@ ElasticsearchVectorStore vectorStore(ElasticsearchVectorStoreProperties properti
5254
if (properties.getDimensions() != null) {
5355
elasticsearchVectorStoreOptions.setDimensions(properties.getDimensions());
5456
}
55-
if (properties.isDenseVectorIndexing() != null) {
56-
elasticsearchVectorStoreOptions.setDenseVectorIndexing(properties.isDenseVectorIndexing());
57-
}
58-
if (StringUtils.hasText(properties.getSimilarity())) {
57+
if (properties.getSimilarity() != null) {
5958
elasticsearchVectorStoreOptions.setSimilarity(properties.getSimilarity());
6059
}
6160

spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/elasticsearch/ElasticsearchVectorStoreProperties.java

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
package org.springframework.ai.autoconfigure.vectorstore.elasticsearch;
1717

1818
import org.springframework.ai.autoconfigure.CommonVectorStoreProperties;
19+
import org.springframework.ai.vectorstore.SimilarityFunction;
1920
import org.springframework.boot.context.properties.ConfigurationProperties;
2021

2122
/**
@@ -37,15 +38,10 @@ public class ElasticsearchVectorStoreProperties extends CommonVectorStorePropert
3738
*/
3839
private Integer dimensions;
3940

40-
/**
41-
* Whether to use dense vector indexing.
42-
*/
43-
private Boolean denseVectorIndexing;
44-
4541
/**
4642
* The similarity function to use.
4743
*/
48-
private String similarity;
44+
private SimilarityFunction similarity;
4945

5046
public String getIndexName() {
5147
return this.indexName;
@@ -63,19 +59,11 @@ public void setDimensions(Integer dimensions) {
6359
this.dimensions = dimensions;
6460
}
6561

66-
public Boolean isDenseVectorIndexing() {
67-
return denseVectorIndexing;
68-
}
69-
70-
public void setDenseVectorIndexing(Boolean denseVectorIndexing) {
71-
this.denseVectorIndexing = denseVectorIndexing;
72-
}
73-
74-
public String getSimilarity() {
62+
public SimilarityFunction getSimilarity() {
7563
return similarity;
7664
}
7765

78-
public void setSimilarity(String similarity) {
66+
public void setSimilarity(SimilarityFunction similarity) {
7967
this.similarity = similarity;
8068
}
8169

vector-stores/spring-ai-elasticsearch-store/pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
<dependency>
3636
<groupId>co.elastic.clients</groupId>
3737
<artifactId>elasticsearch-java</artifactId>
38+
<version>8.13.2</version>
3839
</dependency>
3940

4041
<!-- TESTING -->

vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStore.java

Lines changed: 20 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@
1616
package org.springframework.ai.vectorstore;
1717

1818
import co.elastic.clients.elasticsearch.ElasticsearchClient;
19-
import co.elastic.clients.elasticsearch._types.mapping.TypeMapping;
19+
import co.elastic.clients.elasticsearch._types.mapping.DenseVectorProperty;
20+
import co.elastic.clients.elasticsearch._types.mapping.Property;
2021
import co.elastic.clients.elasticsearch.core.BulkRequest;
2122
import co.elastic.clients.elasticsearch.core.BulkResponse;
2223
import co.elastic.clients.elasticsearch.core.SearchResponse;
24+
import co.elastic.clients.elasticsearch.core.bulk.BulkResponseItem;
2325
import co.elastic.clients.elasticsearch.core.search.Hit;
2426
import co.elastic.clients.elasticsearch.indices.CreateIndexResponse;
2527
import co.elastic.clients.json.jackson.JacksonJsonpMapper;
@@ -43,6 +45,7 @@
4345
import java.util.stream.Collectors;
4446

4547
import static java.lang.Math.sqrt;
48+
import static org.springframework.ai.vectorstore.SimilarityFunction.l2_norm;
4649

4750
/**
4851
* @author Jemin Huh
@@ -59,8 +62,6 @@ public class ElasticsearchVectorStore implements VectorStore, InitializingBean {
5962

6063
private final ElasticsearchVectorStoreOptions options;
6164

62-
private String similarityFunction = SIMILARITY_DEFAULT;
63-
6465
private final FilterExpressionConverter filterExpressionConverter;
6566

6667
private String similarityFunction;
@@ -85,18 +86,18 @@ public ElasticsearchVectorStore(ElasticsearchVectorStoreOptions options, RestCli
8586

8687
@Override
8788
public void add(List<Document> documents) {
88-
BulkRequest.Builder builkRequestBuilder = new BulkRequest.Builder();
89+
BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder();
8990

9091
for (Document document : documents) {
9192
if (Objects.isNull(document.getEmbedding()) || document.getEmbedding().isEmpty()) {
9293
logger.debug("Calling EmbeddingModel for document id = " + document.getId());
9394
document.setEmbedding(this.embeddingModel.embed(document));
9495
}
95-
builkRequestBuilder.operations(op -> op
96+
bulkRequestBuilder.operations(op -> op
9697
.index(idx -> idx.index(this.options.getIndexName()).id(document.getId()).document(document)));
9798
}
9899

99-
BulkResponse bulkRequest = bulkRequest(builkRequestBuilder.build());
100+
BulkResponse bulkRequest = bulkRequest(bulkRequestBuilder.build());
100101

101102
if (bulkRequest.errors()) {
102103
List<BulkResponseItem> bulkResponseItems = bulkRequest.items();
@@ -110,10 +111,10 @@ public void add(List<Document> documents) {
110111

111112
@Override
112113
public Optional<Boolean> delete(List<String> idList) {
113-
BulkRequest.Builder builkRequestBuilder = new BulkRequest.Builder();
114+
BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder();
114115
for (String id : idList)
115-
builkRequestBuilder.operations(op -> op.delete(idx -> idx.index(this.options.getIndexName()).id(id)));
116-
return Optional.of(bulkRequest(builkRequestBuilder.build()).errors());
116+
bulkRequestBuilder.operations(op -> op.delete(idx -> idx.index(this.options.getIndexName()).id(id)));
117+
return Optional.of(bulkRequest(bulkRequestBuilder.build()).errors());
117118
}
118119

119120
private BulkResponse bulkRequest(BulkRequest bulkRequest) {
@@ -131,7 +132,7 @@ public List<Document> similaritySearch(SearchRequest searchRequest) {
131132
try {
132133
float threshold = (float) searchRequest.getSimilarityThreshold();
133134
// reverting l2_norm distance to its original value
134-
if (similarityFunction.equals("l2_norm")) {
135+
if (options.getSimilarity().equals(l2_norm)) {
135136
threshold = 1 - threshold;
136137
}
137138
final float finalThreshold = threshold;
@@ -141,11 +142,11 @@ public List<Document> similaritySearch(SearchRequest searchRequest) {
141142
.toList();
142143

143144
SearchResponse<Document> res = elasticsearchClient.search(
144-
sr -> sr.index(this.index)
145+
sr -> sr.index(options.getIndexName())
145146
.knn(knn -> knn.queryVector(vectors)
146147
.similarity(finalThreshold)
147148
.k(searchRequest.getTopK())
148-
.field(EMBEDDING)
149+
.field("embedding")
149150
.numCandidates((long) (1.5 * searchRequest.getTopK()))
150151
.filter(fl -> fl.queryString(
151152
qs -> qs.query(getElasticsearchQueryString(searchRequest.getFilterExpression()))))),
@@ -174,8 +175,8 @@ private Document toDocument(Hit<Document> hit) {
174175
// more info on score/distance calculation
175176
// https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html#knn-similarity-search
176177
private float calculateDistance(Float score) {
177-
switch (similarityFunction) {
178-
case "l2_norm":
178+
switch (options.getSimilarity()) {
179+
case l2_norm:
179180
// the returned value of l2_norm is the opposite of the other functions
180181
// (closest to zero means more accurate), so to make it consistent
181182
// with the other functions the reverse is returned applying a "1-"
@@ -187,9 +188,9 @@ private float calculateDistance(Float score) {
187188
}
188189
}
189190

190-
public boolean existsIndex() {
191+
public boolean indexExists() {
191192
try {
192-
return this.elasticsearchClient.indices().exists(ex -> ex.index(this.index)).value();
193+
return this.elasticsearchClient.indices().exists(ex -> ex.index(options.getIndexName())).value();
193194
}
194195
catch (IOException e) {
195196
throw new RuntimeException(e);
@@ -199,18 +200,9 @@ public boolean existsIndex() {
199200
private CreateIndexResponse createIndexMapping() {
200201
try {
201202
return this.elasticsearchClient.indices()
202-
.create(createIndexBuilder -> createIndexBuilder.index(options.getIndexName())
203-
.mappings(typeMappingBuilder -> {
204-
typeMappingBuilder.properties("embedding",
205-
new Property.Builder()
206-
.denseVector(new DenseVectorProperty.Builder().dims(options.getDimensions())
207-
.similarity(options.getSimilarity())
208-
.index(options.isDenseVectorIndexing())
209-
.build())
210-
.build());
211-
212-
return typeMappingBuilder;
213-
}));
203+
.create(cr -> cr.index(options.getIndexName())
204+
.mappings(map -> map.properties("embedding", p -> p.denseVector(
205+
dv -> dv.similarity(options.getSimilarity().toString()).dims(options.getDimensions())))));
214206
}
215207
catch (IOException e) {
216208
throw new RuntimeException(e);

vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/ElasticsearchVectorStoreOptions.java

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,10 @@ public class ElasticsearchVectorStoreOptions {
3434
*/
3535
private int dimensions = 1536;
3636

37-
/**
38-
* Whether to use dense vector indexing.
39-
*/
40-
private boolean denseVectorIndexing = true;
41-
4237
/**
4338
* The similarity function to use.
4439
*/
45-
private String similarity = "cosine";
40+
private SimilarityFunction similarity = SimilarityFunction.cosine;
4641

4742
public String getIndexName() {
4843
return indexName;
@@ -60,19 +55,11 @@ public void setDimensions(int dims) {
6055
this.dimensions = dims;
6156
}
6257

63-
public boolean isDenseVectorIndexing() {
64-
return denseVectorIndexing;
65-
}
66-
67-
public void setDenseVectorIndexing(boolean denseVectorIndexing) {
68-
this.denseVectorIndexing = denseVectorIndexing;
69-
}
70-
71-
public String getSimilarity() {
58+
public SimilarityFunction getSimilarity() {
7259
return similarity;
7360
}
7461

75-
public void setSimilarity(String similarity) {
62+
public void setSimilarity(SimilarityFunction similarity) {
7663
this.similarity = similarity;
7764
}
7865

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package org.springframework.ai.vectorstore;
2+
3+
/*
4+
https://www.elastic.co/guide/en/elasticsearch/reference/master/dense-vector.html
5+
max_inner_product is currently not supported because the distance value is not
6+
normalized and would not comply with the requirement of being between 0 and 1
7+
*/
8+
public enum SimilarityFunction {
9+
10+
l2_norm, dot_product, cosine
11+
12+
}

vector-stores/spring-ai-elasticsearch-store/src/test/java/org/springframework/ai/vectorstore/ElasticsearchVectorStoreIT.java

Lines changed: 27 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import java.util.concurrent.TimeUnit;
2727

2828
import co.elastic.clients.elasticsearch.ElasticsearchClient;
29-
import co.elastic.clients.elasticsearch._types.mapping.TypeMapping;
29+
import co.elastic.clients.elasticsearch.cat.indices.IndicesRecord;
3030
import co.elastic.clients.json.jackson.JacksonJsonpMapper;
3131
import co.elastic.clients.transport.rest_client.RestClientTransport;
3232
import com.fasterxml.jackson.databind.DeserializationFeature;
@@ -93,29 +93,15 @@ private ApplicationContextRunner getContextRunner() {
9393
return new ApplicationContextRunner().withUserConfiguration(TestApplication.class);
9494
}
9595

96-
private void prepareMapping(String similarityFunction, ElasticsearchVectorStore vectorStore) {
97-
if (!similarityFunction.equals("cosine")) { // cosine is the default similarity
98-
// function, no need for custom
99-
// mapping
100-
101-
// vector dimension 1536 is openAI specific
102-
TypeMapping mapping = TypeMapping.of(tm -> tm.properties("embedding",
103-
p -> p.denseVector(dv -> dv.dims(1536).index(true).similarity(similarityFunction))));
104-
105-
vectorStore.createIndexMapping(mapping);
106-
}
107-
}
10896

10997
@BeforeEach
11098
void cleanDatabase() {
11199
getContextRunner().run(context -> {
112-
VectorStore vectorStore = context.getBean(VectorStore.class);
113-
vectorStore.delete(List.of("_all"));
114-
// deleting index so that it can be recreated with new mapping
115-
// containing a different similarity function
100+
// deleting indices and data before following tests
116101
ElasticsearchClient elasticsearchClient = context.getBean(ElasticsearchClient.class);
117-
if (elasticsearchClient.indices().exists(ex -> ex.index("spring-ai-document-index")).value()) {
118-
elasticsearchClient.indices().delete(del -> del.index("spring-ai-document-index"));
102+
List indices = elasticsearchClient.cat().indices().valueBody().stream().map(IndicesRecord::index).toList();
103+
if(!indices.isEmpty()) {
104+
elasticsearchClient.indices().delete(del -> del.index(indices));
119105
}
120106
});
121107
}
@@ -125,9 +111,8 @@ void cleanDatabase() {
125111
public void addAndSearchTest(String similarityFunction) {
126112

127113
getContextRunner().run(context -> {
128-
ElasticsearchVectorStore vectorStore = context.getBean(ElasticsearchVectorStore.class);
129114

130-
prepareMapping(similarityFunction, vectorStore);
115+
ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_"+similarityFunction, ElasticsearchVectorStore.class);
131116

132117
vectorStore.add(documents);
133118

@@ -162,9 +147,7 @@ public void addAndSearchTest(String similarityFunction) {
162147
public void searchWithFilters(String similarityFunction) {
163148

164149
getContextRunner().run(context -> {
165-
ElasticsearchVectorStore vectorStore = context.getBean(ElasticsearchVectorStore.class);
166-
167-
prepareMapping(similarityFunction, vectorStore);
150+
ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_"+similarityFunction, ElasticsearchVectorStore.class);
168151

169152
var bgDocument = new Document("1", "The World is Big and Salvation Lurks Around the Corner",
170153
Map.of("country", "BG", "year", 2020, "activationDate", new Date(1000)));
@@ -260,9 +243,7 @@ public void searchWithFilters(String similarityFunction) {
260243
public void documentUpdateTest(String similarityFunction) {
261244

262245
getContextRunner().run(context -> {
263-
ElasticsearchVectorStore vectorStore = context.getBean(ElasticsearchVectorStore.class);
264-
265-
prepareMapping(similarityFunction, vectorStore);
246+
ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_"+similarityFunction, ElasticsearchVectorStore.class);
266247

267248
Document document = new Document(UUID.randomUUID().toString(), "Spring AI rocks!!",
268249
Map.of("meta1", "meta1"));
@@ -314,9 +295,7 @@ public void documentUpdateTest(String similarityFunction) {
314295
@ValueSource(strings = { "cosine", "l2_norm", "dot_product" })
315296
public void searchThresholdTest(String similarityFunction) {
316297
getContextRunner().run(context -> {
317-
ElasticsearchVectorStore vectorStore = context.getBean(ElasticsearchVectorStore.class);
318-
319-
prepareMapping(similarityFunction, vectorStore);
298+
ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_"+similarityFunction, ElasticsearchVectorStore.class);
320299

321300
vectorStore.add(documents);
322301

@@ -355,11 +334,27 @@ public void searchThresholdTest(String similarityFunction) {
355334
@EnableAutoConfiguration(exclude = { DataSourceAutoConfiguration.class })
356335
public static class TestApplication {
357336

358-
@Bean
359-
public ElasticsearchVectorStore vectorStore(EmbeddingClient embeddingClient, RestClient restClient) {
337+
@Bean("vectorStore_cosine")
338+
public ElasticsearchVectorStore vectorStoreDefault(EmbeddingClient embeddingClient, RestClient restClient) {
360339
return new ElasticsearchVectorStore(restClient, embeddingClient);
361340
}
362341

342+
@Bean("vectorStore_l2_norm")
343+
public ElasticsearchVectorStore vectorStoreL2(EmbeddingClient embeddingClient, RestClient restClient) {
344+
ElasticsearchVectorStoreOptions options = new ElasticsearchVectorStoreOptions();
345+
options.setIndexName("index_l2");
346+
options.setSimilarity(SimilarityFunction.l2_norm);
347+
return new ElasticsearchVectorStore(options,restClient, embeddingClient);
348+
}
349+
350+
@Bean("vectorStore_dot_product")
351+
public ElasticsearchVectorStore vectorStoreDotProduct(EmbeddingClient embeddingClient, RestClient restClient) {
352+
ElasticsearchVectorStoreOptions options = new ElasticsearchVectorStoreOptions();
353+
options.setIndexName("index_dot_product");
354+
options.setSimilarity(SimilarityFunction.dot_product);
355+
return new ElasticsearchVectorStore(options,restClient, embeddingClient);
356+
}
357+
363358
@Bean
364359
public EmbeddingModel embeddingModel() {
365360
return new OpenAiEmbeddingModel(new OpenAiApi(System.getenv("OPENAI_API_KEY")));
@@ -377,5 +372,4 @@ ElasticsearchClient elasticsearchClient(RestClient restClient) {
377372
}
378373

379374
}
380-
381375
}

0 commit comments

Comments
 (0)