From 636a20dc26f745f236850e7a46e400150a2a1cd8 Mon Sep 17 00:00:00 2001 From: clun Date: Wed, 18 Oct 2023 20:46:18 +0200 Subject: [PATCH 1/4] demo --- cassandra-example/.idea/.gitignore | 8 + cassandra-example/.idea/compiler.xml | 14 ++ cassandra-example/.idea/encodings.xml | 9 ++ cassandra-example/.idea/jarRepositories.xml | 25 +++ cassandra-example/.idea/misc.xml | 12 ++ cassandra-example/.idea/uiDesigner.xml | 124 +++++++++++++++ cassandra-example/.idea/vcs.xml | 6 + cassandra-example/pom.xml | 50 ++++++ .../demo/RagWithAstraAndOpenAiTest.java | 148 ++++++++++++++++++ .../demo/RagWithAstraAndVertexAITest.java | 147 +++++++++++++++++ .../src/test/resources/logback-test.xml | 32 ++++ .../resources/story-about-happy-carrot.txt | 28 ++++ 12 files changed, 603 insertions(+) create mode 100644 cassandra-example/.idea/.gitignore create mode 100644 cassandra-example/.idea/compiler.xml create mode 100644 cassandra-example/.idea/encodings.xml create mode 100644 cassandra-example/.idea/jarRepositories.xml create mode 100644 cassandra-example/.idea/misc.xml create mode 100644 cassandra-example/.idea/uiDesigner.xml create mode 100644 cassandra-example/.idea/vcs.xml create mode 100644 cassandra-example/pom.xml create mode 100644 cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndOpenAiTest.java create mode 100644 cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndVertexAITest.java create mode 100644 cassandra-example/src/test/resources/logback-test.xml create mode 100644 cassandra-example/src/test/resources/story-about-happy-carrot.txt diff --git a/cassandra-example/.idea/.gitignore b/cassandra-example/.idea/.gitignore new file mode 100644 index 00000000..13566b81 --- /dev/null +++ b/cassandra-example/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/cassandra-example/.idea/compiler.xml b/cassandra-example/.idea/compiler.xml new file mode 100644 index 00000000..d31fea84 --- /dev/null +++ b/cassandra-example/.idea/compiler.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/cassandra-example/.idea/encodings.xml b/cassandra-example/.idea/encodings.xml new file mode 100644 index 00000000..d5f2f75e --- /dev/null +++ b/cassandra-example/.idea/encodings.xml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/cassandra-example/.idea/jarRepositories.xml b/cassandra-example/.idea/jarRepositories.xml new file mode 100644 index 00000000..afced2a7 --- /dev/null +++ b/cassandra-example/.idea/jarRepositories.xml @@ -0,0 +1,25 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/cassandra-example/.idea/misc.xml b/cassandra-example/.idea/misc.xml new file mode 100644 index 00000000..512b2e97 --- /dev/null +++ b/cassandra-example/.idea/misc.xml @@ -0,0 +1,12 @@ + + + + + + + + \ No newline at end of file diff --git a/cassandra-example/.idea/uiDesigner.xml b/cassandra-example/.idea/uiDesigner.xml new file mode 100644 index 00000000..2b63946d --- /dev/null +++ b/cassandra-example/.idea/uiDesigner.xml @@ -0,0 +1,124 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/cassandra-example/.idea/vcs.xml b/cassandra-example/.idea/vcs.xml new file mode 100644 index 00000000..6c0b8635 --- /dev/null +++ b/cassandra-example/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/cassandra-example/pom.xml b/cassandra-example/pom.xml new file mode 100644 index 00000000..0aa700e1 --- /dev/null +++ b/cassandra-example/pom.xml @@ -0,0 +1,50 @@ + + + 4.0.0 + + dev.langchain4j + cassandra-example + 0.23.0 + + + 11 + 11 + UTF-8 + 0.23.0 + + + + + + dev.langchain4j + langchain4j-cassandra + ${langchain.version} + + + + + ch.qos.logback + logback-classic + 1.4.11 + test + + + + + dev.langchain4j + langchain4j-open-ai + ${langchain.version} + test + + + dev.langchain4j + langchain4j-vertex-ai + ${langchain.version} + test + + + + + \ No newline at end of file diff --git a/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndOpenAiTest.java b/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndOpenAiTest.java new file mode 100644 index 00000000..3b4ad097 --- /dev/null +++ b/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndOpenAiTest.java @@ -0,0 +1,148 @@ +package com.datastax.demo; + +import com.dtsx.astra.sdk.utils.TestUtils; +import dev.langchain4j.data.document.Document; +import dev.langchain4j.data.document.DocumentSplitter; +import dev.langchain4j.data.document.DocumentType; +import dev.langchain4j.data.document.FileSystemDocumentLoader; +import dev.langchain4j.data.document.splitter.DocumentSplitters; +import dev.langchain4j.data.embedding.Embedding; +import dev.langchain4j.data.message.AiMessage; +import dev.langchain4j.data.segment.TextSegment; +import dev.langchain4j.model.chat.ChatLanguageModel; +import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.model.input.Prompt; +import dev.langchain4j.model.input.PromptTemplate; +import dev.langchain4j.model.openai.OpenAiChatModel; +import dev.langchain4j.model.openai.OpenAiEmbeddingModel; +import dev.langchain4j.model.openai.OpenAiTokenizer; +import dev.langchain4j.model.output.Response; +import dev.langchain4j.store.embedding.EmbeddingMatch; +import dev.langchain4j.store.embedding.EmbeddingStore; +import dev.langchain4j.store.embedding.EmbeddingStoreIngestor; +import dev.langchain4j.store.embedding.cassandra.AstraDbEmbeddingStore; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import java.io.File; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static com.dtsx.astra.sdk.utils.TestUtils.getAstraToken; +import static com.dtsx.astra.sdk.utils.TestUtils.setupVectorDatabase; +import static dev.langchain4j.model.openai.OpenAiModelName.GPT_3_5_TURBO; +import static dev.langchain4j.model.openai.OpenAiModelName.TEXT_EMBEDDING_ADA_002; +import static java.time.Duration.ofSeconds; +import static java.util.stream.Collectors.joining; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +class RagWithAstraAndOpenAiTest { + + final String db = "langchain4j"; + final String vectorStore = "happy_carrot_open_ai"; + final String embeddingsModel = "text-embedding-ada-002"; + final String completionModel = "gpt-3.5-turbo"; + final int vectorDimension = 1536; + + @Test + @EnabledIfEnvironmentVariable(named = "ASTRA_DB_APPLICATION_TOKEN", matches = "Astra.*") + @EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = "sk.*") + void shouldRagWithOpenAiAndAstra() { + // Initialization + String databaseId = setupVectorDatabase(db, db); + String openAIKey = System.getenv("OPENAI_API_KEY"); + + // Given + assertNotNull(openAIKey); + assertNotNull(databaseId); + + // --- Ingesting documents --- + + // Parsing input file + Path path = new File(getClass().getResource("/story-about-happy-carrot.txt").getFile()).toPath(); + Document document = FileSystemDocumentLoader.loadDocument(path, DocumentType.TXT); + DocumentSplitter splitter = DocumentSplitters + .recursive(100, 10, new OpenAiTokenizer(completionModel)); + + // Embedding model (OpenAI) + EmbeddingModel embeddingModel = OpenAiEmbeddingModel.builder() + .apiKey(openAIKey) + .modelName(embeddingsModel) + .timeout(ofSeconds(15)) + .logRequests(true) + .logResponses(true) + .build(); + + // Embed the document and it in the store + EmbeddingStore embeddingStore = AstraDbEmbeddingStore.builder() + .token(getAstraToken()) + .database(databaseId, TestUtils.TEST_REGION) + .table(db, vectorStore) + .vectorDimension(vectorDimension) + .build(); + + // Ingest method 2 + EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder() + .documentSplitter(splitter) + .embeddingModel(embeddingModel) + .embeddingStore(embeddingStore) + .build(); + ingestor.ingest(document); + + // --------- RAG ------------- + + // Specify the question you want to ask the model + String question = "Who is Charlie?"; + + // Embed the question + Response questionEmbedding = embeddingModel.embed(question); + + // Find relevant embeddings in embedding store by semantic similarity + // You can play with parameters below to find a sweet spot for your specific use case + int maxResults = 3; + double minScore = 0.8; + List> relevantEmbeddings = + embeddingStore.findRelevant(questionEmbedding.content(), maxResults, minScore); + + // --------- Chat Template ------------- + + // Create a prompt for the model that includes question and relevant embeddings + PromptTemplate promptTemplate = PromptTemplate.from( + "Answer the following question to the best of your ability:\n" + + "\n" + + "Question:\n" + + "{{question}}\n" + + "\n" + + "Base your answer on the following information:\n" + + "{{information}}"); + + String information = relevantEmbeddings.stream() + .map(match -> match.embedded().text()) + .collect(joining("\n\n")); + + Map variables = new HashMap<>(); + variables.put("question", question); + variables.put("information", information); + + Prompt prompt = promptTemplate.apply(variables); + + // Send the prompt to the OpenAI chat model + ChatLanguageModel chatModel = OpenAiChatModel.builder() + .apiKey(openAIKey) + .modelName(completionModel) + .temperature(0.7) + .timeout(ofSeconds(15)) + .maxRetries(3) + .logResponses(true) + .logRequests(true) + .build(); + + Response aiMessage = chatModel.generate(prompt.toUserMessage()); + + // See an answer from the model + String answer = aiMessage.content().text(); + System.out.println(answer); + } +} \ No newline at end of file diff --git a/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndVertexAITest.java b/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndVertexAITest.java new file mode 100644 index 00000000..10e4d0dc --- /dev/null +++ b/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndVertexAITest.java @@ -0,0 +1,147 @@ +package com.datastax.demo; + +import com.dtsx.astra.sdk.utils.TestUtils; +import dev.langchain4j.data.document.Document; +import dev.langchain4j.data.document.DocumentSplitter; +import dev.langchain4j.data.document.DocumentType; +import dev.langchain4j.data.document.FileSystemDocumentLoader; +import dev.langchain4j.data.document.splitter.DocumentSplitters; +import dev.langchain4j.data.embedding.Embedding; +import dev.langchain4j.data.segment.TextSegment; +import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.model.input.Prompt; +import dev.langchain4j.model.input.PromptTemplate; +import dev.langchain4j.model.openai.OpenAiTokenizer; +import dev.langchain4j.model.output.Response; +import dev.langchain4j.model.vertexai.VertexAiEmbeddingModel; +import dev.langchain4j.model.vertexai.VextexAiLanguageModel; +import dev.langchain4j.store.embedding.EmbeddingMatch; +import dev.langchain4j.store.embedding.EmbeddingStore; +import dev.langchain4j.store.embedding.EmbeddingStoreIngestor; +import dev.langchain4j.store.embedding.cassandra.AstraDbEmbeddingStore; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import java.io.File; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static com.dtsx.astra.sdk.utils.TestUtils.getAstraToken; +import static com.dtsx.astra.sdk.utils.TestUtils.setupVectorDatabase; +import static dev.langchain4j.model.openai.OpenAiModelName.GPT_3_5_TURBO; +import static java.util.stream.Collectors.joining; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +class RagWithAstraAndVertexAITest { + + final String db = "langchain4j"; + final String vectorStore = "happy_carrot_vertex_ai"; + final String embeddingsModel = "textembedding-gecko@001"; + final String completionModel = "text-bison@001"; + final int vectorDimension = 384; + + + @Test + @EnabledIfEnvironmentVariable(named = "ASTRA_DB_APPLICATION_TOKEN", matches = "Astra.*") + void shouldRagWithOVertexAIAndAstra() { + + // Create a vector database in Astra if needed + final String databaseId = setupVectorDatabase(db, db); + + // Given + assertNotNull(databaseId); + + // --- Ingesting documents --- + + // Parsing input file + Path path = new File(getClass().getResource("/story-about-happy-carrot.txt").getFile()).toPath(); + Document document = FileSystemDocumentLoader.loadDocument(path, DocumentType.TXT); + DocumentSplitter splitter = DocumentSplitters + .recursive(100, 10, new OpenAiTokenizer(GPT_3_5_TURBO)); + + // Embedding model (OpenAI) + EmbeddingModel embeddingModel = VertexAiEmbeddingModel.builder() + .endpoint("us-central1-aiplatform.googleapis.com:443") + .project("integrations-379317") + .location("us-central1") + .publisher("google") + .modelName(embeddingsModel) + .maxRetries(3) + .build(); + + // Embed the document and it in the store + EmbeddingStore embeddingStore = AstraDbEmbeddingStore.builder() + .token(getAstraToken()) + .database(databaseId, TestUtils.TEST_REGION) + .table(db, vectorStore) + .vectorDimension(vectorDimension) // dimension of the gecko model + .build(); + + // Ingest method 2 + EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder() + .documentSplitter(splitter) + .embeddingModel(embeddingModel) + .embeddingStore(embeddingStore) + .build(); + ingestor.ingest(document); + + // --------- RAG ------------- + + // Specify the question you want to ask the model + String question = "Who is Charlie?"; + + // Embed the question + Response questionEmbedding = embeddingModel.embed(question); + + // Find relevant embeddings in embedding store by semantic similarity + // You can play with parameters below to find a sweet spot for your specific use case + int maxResults = 3; + double minScore = 0.8; + List> relevantEmbeddings = + embeddingStore.findRelevant(questionEmbedding.content(), maxResults, minScore); + + // --------- Chat Template ------------- + + // Create a prompt for the model that includes question and relevant embeddings + PromptTemplate promptTemplate = PromptTemplate.from( + "Answer the following question to the best of your ability:\n" + + "\n" + + "Question:\n" + + "{{question}}\n" + + "\n" + + "Base your answer on the following information:\n" + + "{{information}}"); + + String information = relevantEmbeddings.stream() + .map(match -> match.embedded().text()) + .collect(joining("\n\n")); + + Map variables = new HashMap<>(); + variables.put("question", question); + variables.put("information", information); + + Prompt prompt = promptTemplate.apply(variables); + + // Send the prompt to the OpenAI chat model + VextexAiLanguageModel chatModel = VextexAiLanguageModel.builder() + .endpoint("us-central1-aiplatform.googleapis.com:443") + .project("integrations-379317") + .location("us-central1") + .publisher("google") + .modelName(completionModel) + .temperature(0.2) + .maxOutputTokens(50) + .topK(40) + .topP(0.95) + .maxRetries(3) + .build(); + + Response aiMessage = chatModel.generate(prompt); + + // See an answer from the model + String answer = aiMessage.content(); + System.out.println(answer); + } +} \ No newline at end of file diff --git a/cassandra-example/src/test/resources/logback-test.xml b/cassandra-example/src/test/resources/logback-test.xml new file mode 100644 index 00000000..73314a62 --- /dev/null +++ b/cassandra-example/src/test/resources/logback-test.xml @@ -0,0 +1,32 @@ + + + + + %d{HH:mm:ss.SSS} %magenta(%-5level) %cyan(%-47logger) : %msg%n + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/cassandra-example/src/test/resources/story-about-happy-carrot.txt b/cassandra-example/src/test/resources/story-about-happy-carrot.txt new file mode 100644 index 00000000..66ae976d --- /dev/null +++ b/cassandra-example/src/test/resources/story-about-happy-carrot.txt @@ -0,0 +1,28 @@ +Once upon a time in the town of VeggieVille, there lived a cheerful carrot named Charlie. +Charlie was a radiant carrot, always beaming with joy and positivity. +His vibrant orange skin and lush green top were a sight to behold, but it was his infectious laughter and warm personality that really set him apart. + +Charlie had a diverse group of friends, each a vegetable with their own unique characteristics. +There was Bella the blushing beetroot, always ready with a riddle or two; Timmy the timid tomato, a gentle soul with a heart of gold; and Percy the prankster potato, whose jokes always brought a smile to everyone's faces. +Despite their differences, they shared a close bond, their friendship as robust as their natural goodness. + +Their lives were filled with delightful adventures, from playing hide-and-seek amidst the leafy lettuce to swimming in the dewy droplets that pooled on the cabbage leaves. +Their favorite place, though, was the sunlit corner of the vegetable patch, where they would bask in the warmth of the sun, share stories, and have hearty laughs. + +One day, a bunch of pesky caterpillars invaded VeggieVille. +The vegetables were terrified, fearing they would be nibbled to nothingness. +But Charlie, with his usual sunny disposition, had an idea. +He proposed they host a grand feast for the caterpillars, with the juiciest leaves from the outskirts of the town. +Charlie's optimism was contagious, and his friends eagerly joined in to prepare the feast. + +When the caterpillars arrived, they were pleasantly surprised. +They enjoyed the feast and were so impressed with the vegetables' hospitality that they promised not to trouble VeggieVille again. +In return, they agreed to help pollinate the flowers, contributing to a more lush and vibrant VeggieVille. + +Charlie's idea had saved the day, but he humbly attributed the success to their teamwork and friendship. +They celebrated their victory with a grand party, filled with laughter, dance, and merry games. +That night, under the twinkling stars, they made a pact to always stand by each other, come what may. + +From then on, the story of the happy carrot and his friends spread far and wide, a tale of friendship, unity, and positivity. +Charlie, Bella, Timmy, and Percy continued to live their joyful lives, their laughter echoing through VeggieVille. +And so, the tale of the happy carrot and his friends serves as a reminder that no matter the challenge, with optimism, teamwork, and a bit of creativity, anything is possible. \ No newline at end of file From 45b5bb31ca9a8c04327cbe7e1c81933dbeb0d6a5 Mon Sep 17 00:00:00 2001 From: clun Date: Wed, 18 Oct 2023 21:29:27 +0200 Subject: [PATCH 2/4] simple splitter --- .../com/datastax/demo/RagWithAstraAndVertexAITest.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndVertexAITest.java b/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndVertexAITest.java index 10e4d0dc..8817383c 100644 --- a/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndVertexAITest.java +++ b/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndVertexAITest.java @@ -1,13 +1,18 @@ package com.datastax.demo; import com.dtsx.astra.sdk.utils.TestUtils; +import dev.langchain4j.agent.tool.ToolSpecification; import dev.langchain4j.data.document.Document; import dev.langchain4j.data.document.DocumentSplitter; import dev.langchain4j.data.document.DocumentType; import dev.langchain4j.data.document.FileSystemDocumentLoader; +import dev.langchain4j.data.document.splitter.DocumentBySentenceSplitter; +import dev.langchain4j.data.document.splitter.DocumentByWordSplitter; import dev.langchain4j.data.document.splitter.DocumentSplitters; import dev.langchain4j.data.embedding.Embedding; +import dev.langchain4j.data.message.ChatMessage; import dev.langchain4j.data.segment.TextSegment; +import dev.langchain4j.model.Tokenizer; import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.model.input.Prompt; import dev.langchain4j.model.input.PromptTemplate; @@ -58,8 +63,7 @@ void shouldRagWithOVertexAIAndAstra() { // Parsing input file Path path = new File(getClass().getResource("/story-about-happy-carrot.txt").getFile()).toPath(); Document document = FileSystemDocumentLoader.loadDocument(path, DocumentType.TXT); - DocumentSplitter splitter = DocumentSplitters - .recursive(100, 10, new OpenAiTokenizer(GPT_3_5_TURBO)); + DocumentSplitter splitter = new DocumentByWordSplitter(100,10); // Embedding model (OpenAI) EmbeddingModel embeddingModel = VertexAiEmbeddingModel.builder() From 70ad82c2edb18d231fd19eb3938be0e7f1a08c05 Mon Sep 17 00:00:00 2001 From: Cedrick Lunven Date: Thu, 25 Apr 2024 21:23:28 +0200 Subject: [PATCH 3/4] Add Samples for Astra and Cassandra --- astradb-examples/pom.xml | 63 +++++++ .../src/test/java/AstraDbAssistant.java | 3 + .../src/test/java/AstraDbNaiveRagTestIT.java | 113 +++++++++++++ .../src/test/resources/logback-test.xml | 17 ++ .../resources/story-about-happy-carrot.txt | 0 .../demo/RagWithAstraAndOpenAiTest.java | 148 ----------------- .../demo/RagWithAstraAndVertexAITest.java | 151 ----------------- .../.idea/.gitignore | 0 .../.idea/compiler.xml | 0 .../.idea/encodings.xml | 0 .../.idea/jarRepositories.xml | 0 .../.idea/misc.xml | 0 .../.idea/uiDesigner.xml | 0 .../.idea/vcs.xml | 0 .../pom.xml | 50 ++++-- .../src/test/java/CassandraAssistant.java | 3 + .../test/java/CassandraNaiveRagTestIT.java | 154 ++++++++++++++++++ .../src/test/resources/logback-test.xml | 0 .../resources/story-about-happy-carrot.txt | 28 ++++ pom.xml | 2 + 20 files changed, 423 insertions(+), 309 deletions(-) create mode 100644 astradb-examples/pom.xml create mode 100644 astradb-examples/src/test/java/AstraDbAssistant.java create mode 100644 astradb-examples/src/test/java/AstraDbNaiveRagTestIT.java create mode 100644 astradb-examples/src/test/resources/logback-test.xml rename {cassandra-example => astradb-examples}/src/test/resources/story-about-happy-carrot.txt (100%) delete mode 100644 cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndOpenAiTest.java delete mode 100644 cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndVertexAITest.java rename {cassandra-example => cassandra-examples}/.idea/.gitignore (100%) rename {cassandra-example => cassandra-examples}/.idea/compiler.xml (100%) rename {cassandra-example => cassandra-examples}/.idea/encodings.xml (100%) rename {cassandra-example => cassandra-examples}/.idea/jarRepositories.xml (100%) rename {cassandra-example => cassandra-examples}/.idea/misc.xml (100%) rename {cassandra-example => cassandra-examples}/.idea/uiDesigner.xml (100%) rename {cassandra-example => cassandra-examples}/.idea/vcs.xml (100%) rename {cassandra-example => cassandra-examples}/pom.xml (52%) create mode 100644 cassandra-examples/src/test/java/CassandraAssistant.java create mode 100644 cassandra-examples/src/test/java/CassandraNaiveRagTestIT.java rename {cassandra-example => cassandra-examples}/src/test/resources/logback-test.xml (100%) create mode 100644 cassandra-examples/src/test/resources/story-about-happy-carrot.txt diff --git a/astradb-examples/pom.xml b/astradb-examples/pom.xml new file mode 100644 index 00000000..aeec3383 --- /dev/null +++ b/astradb-examples/pom.xml @@ -0,0 +1,63 @@ + + 4.0.0 + + dev.langchain4j + langchain4j-examples + 0.30.0 + + + astradb-examples + jar + astradb-examples + + + 11 + 11 + UTF-8 + 0.29.1 + + + + + + dev.langchain4j + langchain4j + ${langchain.version} + test + + + dev.langchain4j + langchain4j-core + ${langchain.version} + test + + + dev.langchain4j + langchain4j-astradb + ${langchain.version} + test + + + + ch.qos.logback + logback-classic + 1.5.3 + test + + + + dev.langchain4j + langchain4j-open-ai + ${langchain.version} + test + + + org.junit.jupiter + junit-jupiter-engine + 5.10.2 + test + + + + diff --git a/astradb-examples/src/test/java/AstraDbAssistant.java b/astradb-examples/src/test/java/AstraDbAssistant.java new file mode 100644 index 00000000..345c330a --- /dev/null +++ b/astradb-examples/src/test/java/AstraDbAssistant.java @@ -0,0 +1,3 @@ +interface AstraDbAssistant { + String answer(String query); +} diff --git a/astradb-examples/src/test/java/AstraDbNaiveRagTestIT.java b/astradb-examples/src/test/java/AstraDbNaiveRagTestIT.java new file mode 100644 index 00000000..d34a3f82 --- /dev/null +++ b/astradb-examples/src/test/java/AstraDbNaiveRagTestIT.java @@ -0,0 +1,113 @@ +import com.datastax.astra.client.DataAPIClient; +import dev.langchain4j.data.document.parser.TextDocumentParser; +import dev.langchain4j.data.segment.TextSegment; +import dev.langchain4j.memory.chat.MessageWindowChatMemory; +import dev.langchain4j.model.chat.ChatLanguageModel; +import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.model.openai.OpenAiChatModel; +import dev.langchain4j.model.openai.OpenAiEmbeddingModel; +import dev.langchain4j.model.openai.OpenAiTokenizer; +import dev.langchain4j.rag.content.retriever.ContentRetriever; +import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever; +import dev.langchain4j.service.AiServices; +import dev.langchain4j.store.embedding.EmbeddingStore; +import dev.langchain4j.store.embedding.EmbeddingStoreIngestor; +import dev.langchain4j.store.embedding.astradb.AstraDbEmbeddingStore; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import java.io.File; +import java.nio.file.Path; +import java.util.Objects; + +import static com.datastax.astra.client.model.SimilarityMetric.COSINE; +import static com.dtsx.astra.sdk.utils.TestUtils.getAstraToken; +import static dev.langchain4j.data.document.loader.FileSystemDocumentLoader.loadDocument; +import static dev.langchain4j.data.document.splitter.DocumentSplitters.recursive; +import static dev.langchain4j.model.openai.OpenAiChatModelName.GPT_3_5_TURBO; +import static dev.langchain4j.model.openai.OpenAiEmbeddingModelName.TEXT_EMBEDDING_ADA_002; +import static java.time.Duration.ofSeconds; + +@Disabled("AstraDB is not available in the CI") +class AstraDbNaiveRagTestIT { + + static final String VAR_OPENAI_API_KEY = "OPENAI_API_KEY"; + static final String VAR_ASTRA_TOKEN = "ASTRA_DB_APPLICATION_TOKEN"; + + @Test + @EnabledIfEnvironmentVariable(named = VAR_ASTRA_TOKEN, matches = "Astra.*") + @EnabledIfEnvironmentVariable(named = VAR_OPENAI_API_KEY, matches = "sk.*") + void shouldNaiveRagWithOpenAiAndAstraDbTest() { + + // Parsing input file + Path textFile = new File(Objects.requireNonNull(getClass() + .getResource("/story-about-happy-carrot.txt")) + .getFile()) + .toPath(); + + // === INGESTION === + + EmbeddingModel embeddingModel = initEmbeddingModelOpenAi(); + EmbeddingStore embeddingStore = initEmbeddingStoreAstraDb(); + EmbeddingStoreIngestor.builder() + .documentSplitter(recursive(100, 10, new OpenAiTokenizer(GPT_3_5_TURBO))) + .embeddingModel(embeddingModel) + .embeddingStore(embeddingStore) + .build() + .ingest(loadDocument(textFile, new TextDocumentParser())); + + // === NAIVE RETRIEVER === + + ContentRetriever contentRetriever = EmbeddingStoreContentRetriever.builder() + .embeddingStore(embeddingStore) + .embeddingModel(embeddingModel) + .maxResults(2) + .minScore(0.5) + .build(); + + AstraDbAssistant ai = AiServices.builder(AstraDbAssistant.class) + .contentRetriever(contentRetriever) + .chatLanguageModel(initChatLanguageModelOpenAi()) + .chatMemory(MessageWindowChatMemory.withMaxMessages(10)) + .build(); + + String response = ai.answer("What vegetable is Happy?"); + Assertions.assertNotNull(response); + } + + private ChatLanguageModel initChatLanguageModelOpenAi() { + return OpenAiChatModel.builder() + .apiKey(System.getenv(VAR_OPENAI_API_KEY)) + .modelName(GPT_3_5_TURBO) + .temperature(0.7) + .timeout(ofSeconds(15)) + .maxRetries(3) + .logResponses(true) + .logRequests(true) + .build(); + } + + private EmbeddingModel initEmbeddingModelOpenAi() { + return OpenAiEmbeddingModel.builder() + .apiKey(System.getenv(VAR_OPENAI_API_KEY)) + .modelName(TEXT_EMBEDDING_ADA_002) + .build(); + } + + private EmbeddingStore initEmbeddingStoreAstraDb() { + return new AstraDbEmbeddingStore( + // Astra Db Client + new DataAPIClient(getAstraToken()) + // Access the 'admin' part + .getAdmin() + // To create a database if it does not exist + .createDatabase("test_langchain4j") + // Select the created db + .getDatabase() + // And create a collection if it does not exist + .createCollection("story_collection", 1536, COSINE)); + } + +} \ No newline at end of file diff --git a/astradb-examples/src/test/resources/logback-test.xml b/astradb-examples/src/test/resources/logback-test.xml new file mode 100644 index 00000000..b176835f --- /dev/null +++ b/astradb-examples/src/test/resources/logback-test.xml @@ -0,0 +1,17 @@ + + + + + %d{HH:mm:ss.SSS} %magenta(%-5level) %cyan(%-20logger) : %msg%n + + + + + + + + + + + + \ No newline at end of file diff --git a/cassandra-example/src/test/resources/story-about-happy-carrot.txt b/astradb-examples/src/test/resources/story-about-happy-carrot.txt similarity index 100% rename from cassandra-example/src/test/resources/story-about-happy-carrot.txt rename to astradb-examples/src/test/resources/story-about-happy-carrot.txt diff --git a/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndOpenAiTest.java b/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndOpenAiTest.java deleted file mode 100644 index 3b4ad097..00000000 --- a/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndOpenAiTest.java +++ /dev/null @@ -1,148 +0,0 @@ -package com.datastax.demo; - -import com.dtsx.astra.sdk.utils.TestUtils; -import dev.langchain4j.data.document.Document; -import dev.langchain4j.data.document.DocumentSplitter; -import dev.langchain4j.data.document.DocumentType; -import dev.langchain4j.data.document.FileSystemDocumentLoader; -import dev.langchain4j.data.document.splitter.DocumentSplitters; -import dev.langchain4j.data.embedding.Embedding; -import dev.langchain4j.data.message.AiMessage; -import dev.langchain4j.data.segment.TextSegment; -import dev.langchain4j.model.chat.ChatLanguageModel; -import dev.langchain4j.model.embedding.EmbeddingModel; -import dev.langchain4j.model.input.Prompt; -import dev.langchain4j.model.input.PromptTemplate; -import dev.langchain4j.model.openai.OpenAiChatModel; -import dev.langchain4j.model.openai.OpenAiEmbeddingModel; -import dev.langchain4j.model.openai.OpenAiTokenizer; -import dev.langchain4j.model.output.Response; -import dev.langchain4j.store.embedding.EmbeddingMatch; -import dev.langchain4j.store.embedding.EmbeddingStore; -import dev.langchain4j.store.embedding.EmbeddingStoreIngestor; -import dev.langchain4j.store.embedding.cassandra.AstraDbEmbeddingStore; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; - -import java.io.File; -import java.nio.file.Path; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static com.dtsx.astra.sdk.utils.TestUtils.getAstraToken; -import static com.dtsx.astra.sdk.utils.TestUtils.setupVectorDatabase; -import static dev.langchain4j.model.openai.OpenAiModelName.GPT_3_5_TURBO; -import static dev.langchain4j.model.openai.OpenAiModelName.TEXT_EMBEDDING_ADA_002; -import static java.time.Duration.ofSeconds; -import static java.util.stream.Collectors.joining; -import static org.junit.jupiter.api.Assertions.assertNotNull; - -class RagWithAstraAndOpenAiTest { - - final String db = "langchain4j"; - final String vectorStore = "happy_carrot_open_ai"; - final String embeddingsModel = "text-embedding-ada-002"; - final String completionModel = "gpt-3.5-turbo"; - final int vectorDimension = 1536; - - @Test - @EnabledIfEnvironmentVariable(named = "ASTRA_DB_APPLICATION_TOKEN", matches = "Astra.*") - @EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = "sk.*") - void shouldRagWithOpenAiAndAstra() { - // Initialization - String databaseId = setupVectorDatabase(db, db); - String openAIKey = System.getenv("OPENAI_API_KEY"); - - // Given - assertNotNull(openAIKey); - assertNotNull(databaseId); - - // --- Ingesting documents --- - - // Parsing input file - Path path = new File(getClass().getResource("/story-about-happy-carrot.txt").getFile()).toPath(); - Document document = FileSystemDocumentLoader.loadDocument(path, DocumentType.TXT); - DocumentSplitter splitter = DocumentSplitters - .recursive(100, 10, new OpenAiTokenizer(completionModel)); - - // Embedding model (OpenAI) - EmbeddingModel embeddingModel = OpenAiEmbeddingModel.builder() - .apiKey(openAIKey) - .modelName(embeddingsModel) - .timeout(ofSeconds(15)) - .logRequests(true) - .logResponses(true) - .build(); - - // Embed the document and it in the store - EmbeddingStore embeddingStore = AstraDbEmbeddingStore.builder() - .token(getAstraToken()) - .database(databaseId, TestUtils.TEST_REGION) - .table(db, vectorStore) - .vectorDimension(vectorDimension) - .build(); - - // Ingest method 2 - EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder() - .documentSplitter(splitter) - .embeddingModel(embeddingModel) - .embeddingStore(embeddingStore) - .build(); - ingestor.ingest(document); - - // --------- RAG ------------- - - // Specify the question you want to ask the model - String question = "Who is Charlie?"; - - // Embed the question - Response questionEmbedding = embeddingModel.embed(question); - - // Find relevant embeddings in embedding store by semantic similarity - // You can play with parameters below to find a sweet spot for your specific use case - int maxResults = 3; - double minScore = 0.8; - List> relevantEmbeddings = - embeddingStore.findRelevant(questionEmbedding.content(), maxResults, minScore); - - // --------- Chat Template ------------- - - // Create a prompt for the model that includes question and relevant embeddings - PromptTemplate promptTemplate = PromptTemplate.from( - "Answer the following question to the best of your ability:\n" - + "\n" - + "Question:\n" - + "{{question}}\n" - + "\n" - + "Base your answer on the following information:\n" - + "{{information}}"); - - String information = relevantEmbeddings.stream() - .map(match -> match.embedded().text()) - .collect(joining("\n\n")); - - Map variables = new HashMap<>(); - variables.put("question", question); - variables.put("information", information); - - Prompt prompt = promptTemplate.apply(variables); - - // Send the prompt to the OpenAI chat model - ChatLanguageModel chatModel = OpenAiChatModel.builder() - .apiKey(openAIKey) - .modelName(completionModel) - .temperature(0.7) - .timeout(ofSeconds(15)) - .maxRetries(3) - .logResponses(true) - .logRequests(true) - .build(); - - Response aiMessage = chatModel.generate(prompt.toUserMessage()); - - // See an answer from the model - String answer = aiMessage.content().text(); - System.out.println(answer); - } -} \ No newline at end of file diff --git a/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndVertexAITest.java b/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndVertexAITest.java deleted file mode 100644 index 8817383c..00000000 --- a/cassandra-example/src/test/java/com/datastax/demo/RagWithAstraAndVertexAITest.java +++ /dev/null @@ -1,151 +0,0 @@ -package com.datastax.demo; - -import com.dtsx.astra.sdk.utils.TestUtils; -import dev.langchain4j.agent.tool.ToolSpecification; -import dev.langchain4j.data.document.Document; -import dev.langchain4j.data.document.DocumentSplitter; -import dev.langchain4j.data.document.DocumentType; -import dev.langchain4j.data.document.FileSystemDocumentLoader; -import dev.langchain4j.data.document.splitter.DocumentBySentenceSplitter; -import dev.langchain4j.data.document.splitter.DocumentByWordSplitter; -import dev.langchain4j.data.document.splitter.DocumentSplitters; -import dev.langchain4j.data.embedding.Embedding; -import dev.langchain4j.data.message.ChatMessage; -import dev.langchain4j.data.segment.TextSegment; -import dev.langchain4j.model.Tokenizer; -import dev.langchain4j.model.embedding.EmbeddingModel; -import dev.langchain4j.model.input.Prompt; -import dev.langchain4j.model.input.PromptTemplate; -import dev.langchain4j.model.openai.OpenAiTokenizer; -import dev.langchain4j.model.output.Response; -import dev.langchain4j.model.vertexai.VertexAiEmbeddingModel; -import dev.langchain4j.model.vertexai.VextexAiLanguageModel; -import dev.langchain4j.store.embedding.EmbeddingMatch; -import dev.langchain4j.store.embedding.EmbeddingStore; -import dev.langchain4j.store.embedding.EmbeddingStoreIngestor; -import dev.langchain4j.store.embedding.cassandra.AstraDbEmbeddingStore; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; - -import java.io.File; -import java.nio.file.Path; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static com.dtsx.astra.sdk.utils.TestUtils.getAstraToken; -import static com.dtsx.astra.sdk.utils.TestUtils.setupVectorDatabase; -import static dev.langchain4j.model.openai.OpenAiModelName.GPT_3_5_TURBO; -import static java.util.stream.Collectors.joining; -import static org.junit.jupiter.api.Assertions.assertNotNull; - -class RagWithAstraAndVertexAITest { - - final String db = "langchain4j"; - final String vectorStore = "happy_carrot_vertex_ai"; - final String embeddingsModel = "textembedding-gecko@001"; - final String completionModel = "text-bison@001"; - final int vectorDimension = 384; - - - @Test - @EnabledIfEnvironmentVariable(named = "ASTRA_DB_APPLICATION_TOKEN", matches = "Astra.*") - void shouldRagWithOVertexAIAndAstra() { - - // Create a vector database in Astra if needed - final String databaseId = setupVectorDatabase(db, db); - - // Given - assertNotNull(databaseId); - - // --- Ingesting documents --- - - // Parsing input file - Path path = new File(getClass().getResource("/story-about-happy-carrot.txt").getFile()).toPath(); - Document document = FileSystemDocumentLoader.loadDocument(path, DocumentType.TXT); - DocumentSplitter splitter = new DocumentByWordSplitter(100,10); - - // Embedding model (OpenAI) - EmbeddingModel embeddingModel = VertexAiEmbeddingModel.builder() - .endpoint("us-central1-aiplatform.googleapis.com:443") - .project("integrations-379317") - .location("us-central1") - .publisher("google") - .modelName(embeddingsModel) - .maxRetries(3) - .build(); - - // Embed the document and it in the store - EmbeddingStore embeddingStore = AstraDbEmbeddingStore.builder() - .token(getAstraToken()) - .database(databaseId, TestUtils.TEST_REGION) - .table(db, vectorStore) - .vectorDimension(vectorDimension) // dimension of the gecko model - .build(); - - // Ingest method 2 - EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder() - .documentSplitter(splitter) - .embeddingModel(embeddingModel) - .embeddingStore(embeddingStore) - .build(); - ingestor.ingest(document); - - // --------- RAG ------------- - - // Specify the question you want to ask the model - String question = "Who is Charlie?"; - - // Embed the question - Response questionEmbedding = embeddingModel.embed(question); - - // Find relevant embeddings in embedding store by semantic similarity - // You can play with parameters below to find a sweet spot for your specific use case - int maxResults = 3; - double minScore = 0.8; - List> relevantEmbeddings = - embeddingStore.findRelevant(questionEmbedding.content(), maxResults, minScore); - - // --------- Chat Template ------------- - - // Create a prompt for the model that includes question and relevant embeddings - PromptTemplate promptTemplate = PromptTemplate.from( - "Answer the following question to the best of your ability:\n" - + "\n" - + "Question:\n" - + "{{question}}\n" - + "\n" - + "Base your answer on the following information:\n" - + "{{information}}"); - - String information = relevantEmbeddings.stream() - .map(match -> match.embedded().text()) - .collect(joining("\n\n")); - - Map variables = new HashMap<>(); - variables.put("question", question); - variables.put("information", information); - - Prompt prompt = promptTemplate.apply(variables); - - // Send the prompt to the OpenAI chat model - VextexAiLanguageModel chatModel = VextexAiLanguageModel.builder() - .endpoint("us-central1-aiplatform.googleapis.com:443") - .project("integrations-379317") - .location("us-central1") - .publisher("google") - .modelName(completionModel) - .temperature(0.2) - .maxOutputTokens(50) - .topK(40) - .topP(0.95) - .maxRetries(3) - .build(); - - Response aiMessage = chatModel.generate(prompt); - - // See an answer from the model - String answer = aiMessage.content(); - System.out.println(answer); - } -} \ No newline at end of file diff --git a/cassandra-example/.idea/.gitignore b/cassandra-examples/.idea/.gitignore similarity index 100% rename from cassandra-example/.idea/.gitignore rename to cassandra-examples/.idea/.gitignore diff --git a/cassandra-example/.idea/compiler.xml b/cassandra-examples/.idea/compiler.xml similarity index 100% rename from cassandra-example/.idea/compiler.xml rename to cassandra-examples/.idea/compiler.xml diff --git a/cassandra-example/.idea/encodings.xml b/cassandra-examples/.idea/encodings.xml similarity index 100% rename from cassandra-example/.idea/encodings.xml rename to cassandra-examples/.idea/encodings.xml diff --git a/cassandra-example/.idea/jarRepositories.xml b/cassandra-examples/.idea/jarRepositories.xml similarity index 100% rename from cassandra-example/.idea/jarRepositories.xml rename to cassandra-examples/.idea/jarRepositories.xml diff --git a/cassandra-example/.idea/misc.xml b/cassandra-examples/.idea/misc.xml similarity index 100% rename from cassandra-example/.idea/misc.xml rename to cassandra-examples/.idea/misc.xml diff --git a/cassandra-example/.idea/uiDesigner.xml b/cassandra-examples/.idea/uiDesigner.xml similarity index 100% rename from cassandra-example/.idea/uiDesigner.xml rename to cassandra-examples/.idea/uiDesigner.xml diff --git a/cassandra-example/.idea/vcs.xml b/cassandra-examples/.idea/vcs.xml similarity index 100% rename from cassandra-example/.idea/vcs.xml rename to cassandra-examples/.idea/vcs.xml diff --git a/cassandra-example/pom.xml b/cassandra-examples/pom.xml similarity index 52% rename from cassandra-example/pom.xml rename to cassandra-examples/pom.xml index 0aa700e1..b2573265 100644 --- a/cassandra-example/pom.xml +++ b/cassandra-examples/pom.xml @@ -3,16 +3,21 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - - dev.langchain4j cassandra-example - 0.23.0 + + + dev.langchain4j + langchain4j-examples + 0.30.0 + + 11 11 UTF-8 - 0.23.0 + 0.29.1 + 1.19.7 @@ -23,15 +28,27 @@ ${langchain.version} - + + dev.langchain4j + langchain4j + ${langchain.version} + test + + + + dev.langchain4j + langchain4j-core + ${langchain.version} + test + + ch.qos.logback logback-classic - 1.4.11 + 1.5.3 test - dev.langchain4j langchain4j-open-ai @@ -39,9 +56,22 @@ test - dev.langchain4j - langchain4j-vertex-ai - ${langchain.version} + org.junit.jupiter + junit-jupiter-engine + 5.10.2 + test + + + + org.testcontainers + cassandra + ${testcontainers.version} + test + + + org.testcontainers + junit-jupiter + ${testcontainers.version} test diff --git a/cassandra-examples/src/test/java/CassandraAssistant.java b/cassandra-examples/src/test/java/CassandraAssistant.java new file mode 100644 index 00000000..b6a7185a --- /dev/null +++ b/cassandra-examples/src/test/java/CassandraAssistant.java @@ -0,0 +1,3 @@ +interface CassandraAssistant { + String answer(String query); +} diff --git a/cassandra-examples/src/test/java/CassandraNaiveRagTestIT.java b/cassandra-examples/src/test/java/CassandraNaiveRagTestIT.java new file mode 100644 index 00000000..280318a8 --- /dev/null +++ b/cassandra-examples/src/test/java/CassandraNaiveRagTestIT.java @@ -0,0 +1,154 @@ + +import com.datastax.oss.driver.api.core.CqlSession; +import dev.langchain4j.data.document.parser.TextDocumentParser; +import dev.langchain4j.data.segment.TextSegment; +import dev.langchain4j.memory.chat.MessageWindowChatMemory; +import dev.langchain4j.model.chat.ChatLanguageModel; +import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.model.openai.OpenAiChatModel; +import dev.langchain4j.model.openai.OpenAiEmbeddingModel; +import dev.langchain4j.model.openai.OpenAiTokenizer; +import dev.langchain4j.rag.content.retriever.ContentRetriever; +import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever; +import dev.langchain4j.service.AiServices; +import dev.langchain4j.store.cassio.SimilarityMetric; +import dev.langchain4j.store.embedding.EmbeddingStore; +import dev.langchain4j.store.embedding.EmbeddingStoreIngestor; +import dev.langchain4j.store.embedding.cassandra.CassandraCassioEmbeddingStore; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import org.testcontainers.DockerClientFactory; +import org.testcontainers.containers.CassandraContainer; +import org.testcontainers.junit.jupiter.Testcontainers; +import org.testcontainers.utility.DockerImageName; + +import java.io.File; +import java.net.InetSocketAddress; +import java.nio.file.Path; +import java.util.Collections; +import java.util.Objects; + +import static dev.langchain4j.data.document.loader.FileSystemDocumentLoader.loadDocument; +import static dev.langchain4j.data.document.splitter.DocumentSplitters.recursive; +import static dev.langchain4j.model.openai.OpenAiChatModelName.GPT_3_5_TURBO; +import static dev.langchain4j.model.openai.OpenAiEmbeddingModelName.TEXT_EMBEDDING_ADA_002; +import static java.time.Duration.ofSeconds; + +@Testcontainers +class CassandraNaiveRagTestIT { + + static final String VAR_OPENAI_API_KEY = "OPENAI_API_KEY"; + + static final String CASSANDRA_IMAGE = "cassandra:5.0"; + static final String DATACENTER = "datacenter1"; + static final String CLUSTER = "langchain4j"; + static final String VECTOR_STORE = "test_langchain4j"; + + static CassandraContainer cassandraContainer; + + /** + * Check Docker is installed and running on host + */ + @BeforeAll + static void ensureDockerIsRunning() { + DockerClientFactory.instance().client(); + if (cassandraContainer == null) { + cassandraContainer = new CassandraContainer<>( + DockerImageName.parse(CASSANDRA_IMAGE)) + .withEnv("CLUSTER_NAME", CLUSTER) + .withEnv("DC", DATACENTER); + cassandraContainer.start(); + + // Part of Database Creation, creating keyspace + final InetSocketAddress contactPoint = cassandraContainer.getContactPoint(); + CqlSession.builder() + .addContactPoint(contactPoint) + .withLocalDatacenter(DATACENTER) + .build().execute( + "CREATE KEYSPACE IF NOT EXISTS " + CLUSTER + + " WITH replication = {'class':'SimpleStrategy', 'replication_factor':'1'};"); + } + } + + /** + * Stop Cassandra Node + */ + @AfterAll + static void afterTests() throws Exception { + cassandraContainer.stop(); + } + + @Test + @EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = "sk.*") + void shouldRagWithOpenAiAndAstra() { + // Parsing input file + Path textFile = new File(Objects.requireNonNull(getClass() + .getResource("/story-about-happy-carrot.txt")) + .getFile()) + .toPath(); + + // === INGESTION === + + EmbeddingModel embeddingModel = initEmbeddingModelOpenAi(); + EmbeddingStore embeddingStore = initEmbeddingStoreCassandra(); + EmbeddingStoreIngestor.builder() + .documentSplitter(recursive(100, 10, new OpenAiTokenizer(GPT_3_5_TURBO))) + .embeddingModel(embeddingModel) + .embeddingStore(embeddingStore) + .build() + .ingest(loadDocument(textFile, new TextDocumentParser())); + + // === NAIVE RETRIEVER === + + ContentRetriever contentRetriever = EmbeddingStoreContentRetriever.builder() + .embeddingStore(embeddingStore) + .embeddingModel(embeddingModel) + .maxResults(2) + .minScore(0.5) + .build(); + + CassandraAssistant ai = AiServices.builder(CassandraAssistant.class) + .contentRetriever(contentRetriever) + .chatLanguageModel(initChatLanguageModelOpenAi()) + .chatMemory(MessageWindowChatMemory.withMaxMessages(10)) + .build(); + + String response = ai.answer("What vegetable is Happy?"); + Assertions.assertNotNull(response); + + } + + private EmbeddingStore initEmbeddingStoreCassandra() { + return CassandraCassioEmbeddingStore.builder() + .contactPoints(Collections.singletonList(cassandraContainer.getContactPoint().getHostName())) + .port(cassandraContainer.getContactPoint().getPort()) + .localDataCenter(DATACENTER) + .keyspace(CLUSTER) + .table(VECTOR_STORE) + .dimension(1536) + .metric(SimilarityMetric.COSINE) + .build(); + } + + private ChatLanguageModel initChatLanguageModelOpenAi() { + return OpenAiChatModel.builder() + .apiKey(System.getenv(VAR_OPENAI_API_KEY)) + .modelName(GPT_3_5_TURBO) + .temperature(0.7) + .timeout(ofSeconds(15)) + .maxRetries(3) + .logResponses(true) + .logRequests(true) + .build(); + } + + private EmbeddingModel initEmbeddingModelOpenAi() { + return OpenAiEmbeddingModel.builder() + .apiKey(System.getenv(VAR_OPENAI_API_KEY)) + .modelName(TEXT_EMBEDDING_ADA_002) + .build(); + } +} \ No newline at end of file diff --git a/cassandra-example/src/test/resources/logback-test.xml b/cassandra-examples/src/test/resources/logback-test.xml similarity index 100% rename from cassandra-example/src/test/resources/logback-test.xml rename to cassandra-examples/src/test/resources/logback-test.xml diff --git a/cassandra-examples/src/test/resources/story-about-happy-carrot.txt b/cassandra-examples/src/test/resources/story-about-happy-carrot.txt new file mode 100644 index 00000000..66ae976d --- /dev/null +++ b/cassandra-examples/src/test/resources/story-about-happy-carrot.txt @@ -0,0 +1,28 @@ +Once upon a time in the town of VeggieVille, there lived a cheerful carrot named Charlie. +Charlie was a radiant carrot, always beaming with joy and positivity. +His vibrant orange skin and lush green top were a sight to behold, but it was his infectious laughter and warm personality that really set him apart. + +Charlie had a diverse group of friends, each a vegetable with their own unique characteristics. +There was Bella the blushing beetroot, always ready with a riddle or two; Timmy the timid tomato, a gentle soul with a heart of gold; and Percy the prankster potato, whose jokes always brought a smile to everyone's faces. +Despite their differences, they shared a close bond, their friendship as robust as their natural goodness. + +Their lives were filled with delightful adventures, from playing hide-and-seek amidst the leafy lettuce to swimming in the dewy droplets that pooled on the cabbage leaves. +Their favorite place, though, was the sunlit corner of the vegetable patch, where they would bask in the warmth of the sun, share stories, and have hearty laughs. + +One day, a bunch of pesky caterpillars invaded VeggieVille. +The vegetables were terrified, fearing they would be nibbled to nothingness. +But Charlie, with his usual sunny disposition, had an idea. +He proposed they host a grand feast for the caterpillars, with the juiciest leaves from the outskirts of the town. +Charlie's optimism was contagious, and his friends eagerly joined in to prepare the feast. + +When the caterpillars arrived, they were pleasantly surprised. +They enjoyed the feast and were so impressed with the vegetables' hospitality that they promised not to trouble VeggieVille again. +In return, they agreed to help pollinate the flowers, contributing to a more lush and vibrant VeggieVille. + +Charlie's idea had saved the day, but he humbly attributed the success to their teamwork and friendship. +They celebrated their victory with a grand party, filled with laughter, dance, and merry games. +That night, under the twinkling stars, they made a pact to always stand by each other, come what may. + +From then on, the story of the happy carrot and his friends spread far and wide, a tale of friendship, unity, and positivity. +Charlie, Bella, Timmy, and Percy continued to live their joyful lives, their laughter echoing through VeggieVille. +And so, the tale of the happy carrot and his friends serves as a reminder that no matter the challenge, with optimism, teamwork, and a bit of creativity, anything is possible. \ No newline at end of file diff --git a/pom.xml b/pom.xml index 869f13a4..c3af367a 100644 --- a/pom.xml +++ b/pom.xml @@ -35,6 +35,8 @@ weaviate-example javafx-example quarkus-example + astradb-examples + cassandra-examples \ No newline at end of file From 6cdbb79fc2cdbe4b019b95208a96d5c33b5c14f3 Mon Sep 17 00:00:00 2001 From: Cedrick Lunven Date: Thu, 25 Apr 2024 21:23:54 +0200 Subject: [PATCH 4/4] Add Samples for Astra and Cassandra --- cassandra-examples/.idea/.gitignore | 8 -- cassandra-examples/.idea/compiler.xml | 14 --- cassandra-examples/.idea/encodings.xml | 9 -- cassandra-examples/.idea/jarRepositories.xml | 25 ---- cassandra-examples/.idea/misc.xml | 12 -- cassandra-examples/.idea/uiDesigner.xml | 124 ------------------- cassandra-examples/.idea/vcs.xml | 6 - 7 files changed, 198 deletions(-) delete mode 100644 cassandra-examples/.idea/.gitignore delete mode 100644 cassandra-examples/.idea/compiler.xml delete mode 100644 cassandra-examples/.idea/encodings.xml delete mode 100644 cassandra-examples/.idea/jarRepositories.xml delete mode 100644 cassandra-examples/.idea/misc.xml delete mode 100644 cassandra-examples/.idea/uiDesigner.xml delete mode 100644 cassandra-examples/.idea/vcs.xml diff --git a/cassandra-examples/.idea/.gitignore b/cassandra-examples/.idea/.gitignore deleted file mode 100644 index 13566b81..00000000 --- a/cassandra-examples/.idea/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Editor-based HTTP Client requests -/httpRequests/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml diff --git a/cassandra-examples/.idea/compiler.xml b/cassandra-examples/.idea/compiler.xml deleted file mode 100644 index d31fea84..00000000 --- a/cassandra-examples/.idea/compiler.xml +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - - - - - \ No newline at end of file diff --git a/cassandra-examples/.idea/encodings.xml b/cassandra-examples/.idea/encodings.xml deleted file mode 100644 index d5f2f75e..00000000 --- a/cassandra-examples/.idea/encodings.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/cassandra-examples/.idea/jarRepositories.xml b/cassandra-examples/.idea/jarRepositories.xml deleted file mode 100644 index afced2a7..00000000 --- a/cassandra-examples/.idea/jarRepositories.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/cassandra-examples/.idea/misc.xml b/cassandra-examples/.idea/misc.xml deleted file mode 100644 index 512b2e97..00000000 --- a/cassandra-examples/.idea/misc.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/cassandra-examples/.idea/uiDesigner.xml b/cassandra-examples/.idea/uiDesigner.xml deleted file mode 100644 index 2b63946d..00000000 --- a/cassandra-examples/.idea/uiDesigner.xml +++ /dev/null @@ -1,124 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/cassandra-examples/.idea/vcs.xml b/cassandra-examples/.idea/vcs.xml deleted file mode 100644 index 6c0b8635..00000000 --- a/cassandra-examples/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file