Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Haystack llm and embedding wrapper #1901

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ all = [
"pandas",
"datacompy",
"llama_index",
"haystack-ai",
"haystack-experimental==0.4.0"
]
docs = [
"mkdocs>=1.6.1",
Expand Down Expand Up @@ -59,6 +61,9 @@ dev = [
"rapidfuzz",
"pandas",
"datacompy",
"haystack-ai",
"haystack-experimental==0.4.0",
"sacrebleu",
]
test = [
"pytest",
Expand Down
2 changes: 2 additions & 0 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@ nltk
rapidfuzz
pandas
datacompy
haystack-ai
haystack-experimental==0.4.0
2 changes: 2 additions & 0 deletions requirements/docs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ sphinx_design
astroid<3
myst-nb
llama_index
haystack-ai
haystack-experimental==0.4.0
2 changes: 2 additions & 0 deletions requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ pytest-asyncio
llama_index
nbmake
diskcache
haystack-ai
haystack-experimental==0.4.0
4 changes: 3 additions & 1 deletion src/ragas/embeddings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
LlamaIndexEmbeddingsWrapper,
embedding_factory,
)
from ragas.embeddings.haystack_wrapper import HaystackEmbeddingsWrapper

__all__ = [
"BaseRagasEmbeddings",
"HaystackEmbeddingsWrapper",
"HuggingfaceEmbeddings",
"LangchainEmbeddingsWrapper",
"LlamaIndexEmbeddingsWrapper",
"HuggingfaceEmbeddings",
"embedding_factory",
]
23 changes: 11 additions & 12 deletions src/ragas/embeddings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import typing as t
from abc import ABC, abstractmethod
from dataclasses import field
from typing import List

import numpy as np
from langchain_core.embeddings import Embeddings
Expand Down Expand Up @@ -51,15 +50,15 @@ def __init__(self, cache: t.Optional[CacheInterface] = None):
self.aembed_documents
)

async def embed_text(self, text: str, is_async=True) -> List[float]:
async def embed_text(self, text: str, is_async=True) -> t.List[float]:
"""
Embed a single text string.
"""
embs = await self.embed_texts([text], is_async=is_async)
return embs[0]

async def embed_texts(
self, texts: List[str], is_async: bool = True
self, texts: t.List[str], is_async: bool = True
) -> t.List[t.List[float]]:
"""
Embed multiple texts.
Expand All @@ -77,10 +76,10 @@ async def embed_texts(
return await loop.run_in_executor(None, embed_documents_with_retry, texts)

@abstractmethod
async def aembed_query(self, text: str) -> List[float]: ...
async def aembed_query(self, text: str) -> t.List[float]: ...

@abstractmethod
async def aembed_documents(self, texts: List[str]) -> t.List[t.List[float]]: ...
async def aembed_documents(self, texts: t.List[str]) -> t.List[t.List[float]]: ...

def set_run_config(self, run_config: RunConfig):
"""
Expand Down Expand Up @@ -117,25 +116,25 @@ def __init__(
run_config = RunConfig()
self.set_run_config(run_config)

def embed_query(self, text: str) -> List[float]:
def embed_query(self, text: str) -> t.List[float]:
"""
Embed a single query text.
"""
return self.embeddings.embed_query(text)

def embed_documents(self, texts: List[str]) -> List[List[float]]:
def embed_documents(self, texts: t.List[str]) -> t.List[t.List[float]]:
"""
Embed multiple documents.
"""
return self.embeddings.embed_documents(texts)

async def aembed_query(self, text: str) -> List[float]:
async def aembed_query(self, text: str) -> t.List[float]:
"""
Asynchronously embed a single query text.
"""
return await self.embeddings.aembed_query(text)

async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
async def aembed_documents(self, texts: t.List[str]) -> t.List[t.List[float]]:
"""
Asynchronously embed multiple documents.
"""
Expand Down Expand Up @@ -256,13 +255,13 @@ def __post_init__(self):
if self.cache is not None:
self.predict = cacher(cache_backend=self.cache)(self.predict)

def embed_query(self, text: str) -> List[float]:
def embed_query(self, text: str) -> t.List[float]:
"""
Embed a single query text.
"""
return self.embed_documents([text])[0]

def embed_documents(self, texts: List[str]) -> List[List[float]]:
def embed_documents(self, texts: t.List[str]) -> t.List[t.List[float]]:
"""
Embed multiple documents.
"""
Expand All @@ -279,7 +278,7 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]:
assert isinstance(embeddings, Tensor)
return embeddings.tolist()

def predict(self, texts: List[List[str]]) -> List[List[float]]:
def predict(self, texts: t.List[t.List[str]]) -> t.List[t.List[float]]:
"""
Make predictions using a cross-encoder model.
"""
Expand Down
96 changes: 96 additions & 0 deletions src/ragas/embeddings/haystack_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import asyncio
import typing as t

try:
from haystack_experimental.core import AsyncPipeline
except ImportError:
raise ImportError(
"haystack-experimental is not installed. Please install it using `pip install haystack-experimental==0.4.0`."
)
try:
from haystack.components.embedders import ( # type: ignore
AzureOpenAITextEmbedder,
HuggingFaceAPITextEmbedder,
OpenAITextEmbedder,
SentenceTransformersTextEmbedder,
)
except ImportError:
raise ImportError(
"pip install haystack-ai is not installed. Please install it using `pip install pip install haystack-ai`."
)


from ragas.cache import CacheInterface
from ragas.embeddings.base import BaseRagasEmbeddings
from ragas.run_config import RunConfig


class HaystackEmbeddingsWrapper(BaseRagasEmbeddings):
def __init__(
self,
embedder: t.Union[
OpenAITextEmbedder, # type: ignore
SentenceTransformersTextEmbedder, # type: ignore
HuggingFaceAPITextEmbedder, # type: ignore
AzureOpenAITextEmbedder, # type: ignore
],
run_config: t.Optional[RunConfig] = None,
cache: t.Optional[CacheInterface] = None,
):
super().__init__(cache=cache)
self.embedder = embedder
self.async_pipeline = AsyncPipeline()
self.async_pipeline.add_component("embedder", self.embedder)
if run_config is None:
run_config = RunConfig()
self.set_run_config(run_config)

def embed_query(self, text: str) -> t.List[float]:
"""
Embed a single query text.
"""
return self.embedder.run(text=text)["embedding"]

def embed_documents(self, texts: t.List[str]) -> t.List[t.List[float]]:
"""
Embed multiple documents.
"""
return [self.embed_query(text) for text in texts]

async def aembed_query(self, text: str) -> t.List[float]:
"""
Asynchronously embed a single query text.
"""

async def embedding_pipeline(text: str):
result = []

async for output in self.async_pipeline.run({"embedder": {"text": text}}):
if "embedder" in output and "embedding" in output["embedder"]:
result = output["embedder"]["embedding"]
break

return result

return await embedding_pipeline(text=text)

async def aembed_documents(self, texts: t.List[str]) -> t.List[t.List[float]]:
"""
Asynchronously embed multiple documents.
"""
results = await asyncio.gather(*(self.aembed_query(text) for text in texts))
return results

def __repr__(self) -> str:
if isinstance(
self.embedder, (OpenAITextEmbedder, SentenceTransformersTextEmbedder) # type: ignore
):
model = self.embedder.model
elif isinstance(self.embedder, AzureOpenAITextEmbedder): # type: ignore
model = self.embedder.azure_deployment
elif isinstance(self.embedder, HuggingFaceAPITextEmbedder): # type: ignore
model = self.embedder.api_params
else:
model = "Unknown"

return f"{self.__class__.__name__}(embeddings={model}(...))"
13 changes: 7 additions & 6 deletions src/ragas/integrations/langgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@


def convert_to_ragas_messages(
messages: List[Union[HumanMessage, SystemMessage, AIMessage, ToolMessage]], metadata: bool = False
messages: List[Union[HumanMessage, SystemMessage, AIMessage, ToolMessage]],
metadata: bool = False,
) -> List[Union[r.HumanMessage, r.AIMessage, r.ToolMessage]]:
"""
Convert LangChain messages into Ragas messages with metadata for agent evaluation.
Expand Down Expand Up @@ -47,16 +48,16 @@ def _validate_string_content(message, message_type: str) -> str:
def _extract_metadata(message) -> dict:

return {k: v for k, v in message.__dict__.items() if k != "content"}

if metadata:
MESSAGE_TYPE_MAP = {
HumanMessage: lambda m: r.HumanMessage(
content=_validate_string_content(m, "HumanMessage"),
metadata=_extract_metadata(m)
metadata=_extract_metadata(m),
),
ToolMessage: lambda m: r.ToolMessage(
content=_validate_string_content(m, "ToolMessage"),
metadata=_extract_metadata(m)
metadata=_extract_metadata(m),
),
}
else:
Expand Down Expand Up @@ -85,12 +86,12 @@ def _convert_ai_message(message: AIMessage, metadata: bool) -> r.AIMessage:
return r.AIMessage(
content=_validate_string_content(message, "AIMessage"),
tool_calls=tool_calls,
metadata=_extract_metadata(message)
metadata=_extract_metadata(message),
)
else:
return r.AIMessage(
content=_validate_string_content(message, "AIMessage"),
tool_calls=tool_calls
tool_calls=tool_calls,
)

def _convert_message(message, metadata: bool = False):
Expand Down
2 changes: 2 additions & 0 deletions src/ragas/llms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
LlamaIndexLLMWrapper,
llm_factory,
)
from ragas.llms.haystack_wrapper import HaystackLLMWrapper

__all__ = [
"BaseRagasLLM",
"HaystackLLMWrapper",
"LangchainLLMWrapper",
"LlamaIndexLLMWrapper",
"llm_factory",
Expand Down
4 changes: 3 additions & 1 deletion src/ragas/llms/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from langchain_core.prompt_values import PromptValue
from llama_index.core.base.llms.base import BaseLLM


logger = logging.getLogger(__name__)

MULTIPLE_COMPLETION_SUPPORTED = [
Expand Down Expand Up @@ -183,7 +184,8 @@ def is_finished(self, response: LLMResult) -> bool:
elif resp_message.response_metadata.get("stop_reason") is not None:
stop_reason = resp_message.response_metadata.get("stop_reason")
is_finished_list.append(
stop_reason in ["end_turn", "stop", "STOP", "MAX_TOKENS", "eos_token"]
stop_reason
in ["end_turn", "stop", "STOP", "MAX_TOKENS", "eos_token"]
)
# default to True
else:
Expand Down
Loading
Loading