|
| 1 | +# Copyright 2025 Google LLC |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | +# |
| 15 | +# SPDX-License-Identifier: Apache-2.0 |
| 16 | + |
| 17 | +"""Unit tests for Ollama embedders package.""" |
| 18 | + |
| 19 | +import unittest |
| 20 | +from unittest.mock import AsyncMock, MagicMock |
| 21 | + |
| 22 | +import ollama as ollama_api |
| 23 | + |
| 24 | +from genkit.plugins.ollama.embedders import EmbeddingDefinition, OllamaEmbedder |
| 25 | +from genkit.types import ( |
| 26 | + Document, |
| 27 | + Embedding, |
| 28 | + EmbedRequest, |
| 29 | + EmbedResponse, |
| 30 | + TextPart, |
| 31 | +) |
| 32 | + |
| 33 | + |
| 34 | +class TestOllamaEmbedderEmbed(unittest.IsolatedAsyncioTestCase): |
| 35 | + """Unit tests for OllamaEmbedder.embed method.""" |
| 36 | + |
| 37 | + async def asyncSetUp(self): |
| 38 | + """Common setup.""" |
| 39 | + self.mock_ollama_client_instance = AsyncMock() |
| 40 | + self.mock_ollama_client_factory = MagicMock(return_value=self.mock_ollama_client_instance) |
| 41 | + |
| 42 | + self.mock_embedding_definition = EmbeddingDefinition(name='test-embed-model', dimensions=1536) |
| 43 | + self.ollama_embedder = OllamaEmbedder( |
| 44 | + client=self.mock_ollama_client_factory, embedding_definition=self.mock_embedding_definition |
| 45 | + ) |
| 46 | + |
| 47 | + async def test_embed_single_document_single_content(self): |
| 48 | + """Test embed with a single document containing single text content.""" |
| 49 | + request = EmbedRequest( |
| 50 | + input=[ |
| 51 | + Document.from_text(text='hello world'), |
| 52 | + ] |
| 53 | + ) |
| 54 | + expected_ollama_embeddings = [[0.1, 0.2, 0.3]] |
| 55 | + self.mock_ollama_client_instance.embed.return_value = ollama_api.EmbedResponse( |
| 56 | + embeddings=expected_ollama_embeddings |
| 57 | + ) |
| 58 | + |
| 59 | + response = await self.ollama_embedder.embed(request) |
| 60 | + |
| 61 | + # Assertions |
| 62 | + self.mock_ollama_client_instance.embed.assert_awaited_once_with( |
| 63 | + model='test-embed-model', |
| 64 | + input=['hello world'], |
| 65 | + ) |
| 66 | + expected_genkit_embeddings = [Embedding(embedding=[0.1, 0.2, 0.3])] |
| 67 | + self.assertEqual(response, EmbedResponse(embeddings=expected_genkit_embeddings)) |
| 68 | + |
| 69 | + async def test_embed_multiple_documents_multiple_content(self): |
| 70 | + """Test embed with multiple documents, each with multiple text contents.""" |
| 71 | + request = EmbedRequest( |
| 72 | + input=[ |
| 73 | + Document(content=[TextPart(text='doc1_part1'), TextPart(text='doc1_part2')]), |
| 74 | + Document(content=[TextPart(text='doc2_part1')]), |
| 75 | + ] |
| 76 | + ) |
| 77 | + expected_ollama_embeddings = [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]] |
| 78 | + self.mock_ollama_client_instance.embed.return_value = ollama_api.EmbedResponse( |
| 79 | + embeddings=expected_ollama_embeddings |
| 80 | + ) |
| 81 | + |
| 82 | + response = await self.ollama_embedder.embed(request) |
| 83 | + |
| 84 | + # Assertions |
| 85 | + self.mock_ollama_client_instance.embed.assert_awaited_once_with( |
| 86 | + model='test-embed-model', |
| 87 | + input=['doc1_part1', 'doc1_part2', 'doc2_part1'], |
| 88 | + ) |
| 89 | + expected_genkit_embeddings = [ |
| 90 | + Embedding(embedding=[0.1, 0.2]), |
| 91 | + Embedding(embedding=[0.3, 0.4]), |
| 92 | + Embedding(embedding=[0.5, 0.6]), |
| 93 | + ] |
| 94 | + self.assertEqual(response, EmbedResponse(embeddings=expected_genkit_embeddings)) |
| 95 | + |
| 96 | + async def test_embed_empty_input(self): |
| 97 | + """Test embed with an empty input request.""" |
| 98 | + request = EmbedRequest(input=[]) |
| 99 | + self.mock_ollama_client_instance.embed.return_value = ollama_api.EmbedResponse(embeddings=[]) |
| 100 | + |
| 101 | + response = await self.ollama_embedder.embed(request) |
| 102 | + |
| 103 | + # Assertions |
| 104 | + self.mock_ollama_client_instance.embed.assert_awaited_once_with( |
| 105 | + model='test-embed-model', |
| 106 | + input=[], |
| 107 | + ) |
| 108 | + self.assertEqual(response, EmbedResponse(embeddings=[])) |
| 109 | + |
| 110 | + async def test_embed_api_raises_exception(self): |
| 111 | + """Test embed method handles exception from client.embed.""" |
| 112 | + request = EmbedRequest(input=[Document(content=[TextPart(text='error text')])]) |
| 113 | + self.mock_ollama_client_instance.embed.side_effect = Exception('Ollama Embed API Error') |
| 114 | + |
| 115 | + with self.assertRaisesRegex(Exception, 'Ollama Embed API Error'): |
| 116 | + await self.ollama_embedder.embed(request) |
| 117 | + |
| 118 | + self.mock_ollama_client_instance.embed.assert_awaited_once() |
| 119 | + |
| 120 | + async def test_embed_response_mismatch_input_count(self): |
| 121 | + """Test embed when client returns fewer embeddings than input texts (edge case).""" |
| 122 | + request = EmbedRequest( |
| 123 | + input=[ |
| 124 | + Document(content=[TextPart(text='text1')]), |
| 125 | + Document(content=[TextPart(text='text2')]), |
| 126 | + ] |
| 127 | + ) |
| 128 | + # Simulate Ollama returning only one embedding for two inputs |
| 129 | + expected_ollama_embeddings = [[1.0, 2.0]] |
| 130 | + self.mock_ollama_client_instance.embed.return_value = ollama_api.EmbedResponse( |
| 131 | + embeddings=expected_ollama_embeddings |
| 132 | + ) |
| 133 | + |
| 134 | + response = await self.ollama_embedder.embed(request) |
| 135 | + |
| 136 | + # The current implementation will just use whatever embeddings are returned. |
| 137 | + # It's up to the caller or a higher layer to decide if this is an error. |
| 138 | + # This test ensures it doesn't crash and correctly maps the available embeddings. |
| 139 | + expected_genkit_embeddings = [Embedding(embedding=[1.0, 2.0])] |
| 140 | + self.assertEqual(response, EmbedResponse(embeddings=expected_genkit_embeddings)) |
| 141 | + self.assertEqual(len(response.embeddings), 1) # Confirm only one embedding was processed |
0 commit comments