explodinggradients · rhlbhatnagar · Oct 9, 2025 · anistark · Oct 9, 2025 · anistark
diff --git a/src/ragas/metrics/collections/_answer_relevancy_v2.py b/src/ragas/metrics/collections/_answer_relevancy_v2.py
@@ -0,0 +1,196 @@
+"""Answer Relevancy metric using SimplePydanticPrompt for easy modification and translation."""
+
+import typing as t
+
+import numpy as np
+from pydantic import BaseModel
+
+from ragas.metrics.collections.base import BaseMetric
+from ragas.metrics.result import MetricResult
+from ragas.prompt.simple_mixin import SimplePromptMixin
+from ragas.prompt.simple_pydantic_prompt import SimplePydanticPrompt
+
+if t.TYPE_CHECKING:
+    from ragas.embeddings.base import BaseRagasEmbedding
+    from ragas.llms.base import InstructorBaseRagasLLM
+
+
+# Input/Output models for the prompt
+class AnswerRelevanceInput(BaseModel):
+    """Input model for answer relevance evaluation."""
+
+    response: str
+
+
+class AnswerRelevanceOutput(BaseModel):
+    """Output model for answer relevance evaluation."""
+
+    question: str
+    noncommittal: int
+
+
+# The prompt definition using SimplePydanticPrompt
+class AnswerRelevancePrompt(
+    SimplePydanticPrompt[AnswerRelevanceInput, AnswerRelevanceOutput]
+):
+    """
+    Prompt for generating questions from responses and detecting noncommittal answers.
+
+    This prompt can be easily modified and translated using the SimplePromptMixin methods.
+    """
+
+    instruction = """Generate a question for the given answer and identify if the answer is noncommittal. 
+
+Give noncommittal as 1 if the answer is noncommittal and 0 if the answer is committal. 
+A noncommittal answer is one that is evasive, vague, or ambiguous. 
+For example, "I don't know" or "I'm not sure" are noncommittal answers."""
+
+    input_model = AnswerRelevanceInput
+    output_model = AnswerRelevanceOutput
+    name = "answer_relevance_prompt"
+
+    examples = [
+        (
+            AnswerRelevanceInput(response="Albert Einstein was born in Germany."),
+            AnswerRelevanceOutput(
+                question="Where was Albert Einstein born?", noncommittal=0
+            ),
+        ),
+        (
+            AnswerRelevanceInput(
+                response="I don't know about the groundbreaking feature of the smartphone invented in 2023 as I am unaware of information beyond 2022."
+            ),
+            AnswerRelevanceOutput(
+                question="What was the groundbreaking feature of the smartphone invented in 2023?",
+                noncommittal=1,
+            ),
+        ),
+    ]
+
+
+class AnswerRelevancy(BaseMetric, SimplePromptMixin):
+    """
+    Evaluate answer relevancy by generating questions from the response and comparing to original question.
+
+    This implementation uses SimplePydanticPrompt which supports:
+    - Easy modification of prompts via get_prompts()/set_prompts()
+    - Translation to different languages via adapt_prompts()
+    - Clean prompt structure without bloat
+
+    Usage:
+        >>> import instructor
+        >>> from openai import AsyncOpenAI
+        >>> from ragas.llms.base import instructor_llm_factory
+        >>> from ragas.embeddings.base import embedding_factory
+        >>> from ragas.metrics.collections import AnswerRelevancy
+        >>>
+        >>> # Setup dependencies
+        >>> client = AsyncOpenAI()
+        >>> llm = instructor_llm_factory("openai", client=client, model="gpt-4o-mini")
+        >>> embeddings = embedding_factory("openai", model="text-embedding-ada-002", client=client, interface="modern")
+        >>>
+        >>> # Create metric instance
+        >>> metric = AnswerRelevancy(llm=llm, embeddings=embeddings, strictness=3)
+        >>>
+        >>> # Modify the prompt instruction
+        >>> metric.modify_prompt("answer_relevance_prompt",
+        ...     instruction="Generate questions and detect evasive answers with extra care for technical topics.")
+        >>>
+        >>> # Translate prompts to Spanish
+        >>> adapted_prompts = await metric.adapt_prompts("spanish", llm)
+        >>> metric.set_adapted_prompts(adapted_prompts)
+        >>>
+        >>> # Single evaluation
+        >>> result = await metric.ascore(
+        ...     user_input="What is the capital of France?",
+        ...     response="Paris is the capital of France."
+        ... )
+        >>> print(f"Score: {result.value}")
+
+    Attributes:
+        llm: Modern instructor-based LLM for question generation
+        embeddings: Modern embeddings model with embed_text() and embed_texts() methods
+        name: The metric name
+        strictness: Number of questions to generate per answer (3-5 recommended)
+        answer_relevance_prompt: The prompt used for evaluation (modifiable)
+    """
+
+    # Type hints for linter
+    llm: "InstructorBaseRagasLLM"
+    embeddings: "BaseRagasEmbedding"
+
+    # The prompt attribute - this will be discovered by SimplePromptMixin
+    answer_relevance_prompt: AnswerRelevancePrompt
+
+    def __init__(
+        self,
+        llm: "InstructorBaseRagasLLM",
+        embeddings: "BaseRagasEmbedding",
+        name: str = "answer_relevancy",
+        strictness: int = 3,
+        **kwargs,
+    ):
+        """Initialize AnswerRelevancy metric with required components."""
+        # Set attributes explicitly before calling super()
+        self.llm = llm
+        self.embeddings = embeddings
+        self.strictness = strictness
+
+        # Initialize the prompt
+        self.answer_relevance_prompt = AnswerRelevancePrompt()
+
+        # Call super() for validation
+        super().__init__(name=name, **kwargs)
+
+    async def ascore(self, user_input: str, response: str) -> MetricResult:
+        """
+        Calculate answer relevancy score asynchronously.
+
+        Args:
+            user_input: The original question
+            response: The response to evaluate
+
+        Returns:
+            MetricResult with relevancy score (0.0-1.0)
+        """
+        input_data = AnswerRelevanceInput(response=response)
+
+        generated_questions = []
+        noncommittal_flags = []
+
+        # Generate multiple questions using the current prompt
+        for _ in range(self.strictness):
+            prompt_text = self.answer_relevance_prompt.to_string(input_data)
+            result = await self.llm.agenerate(prompt_text, AnswerRelevanceOutput)
+
+            if result.question:
+                generated_questions.append(result.question)
+                noncommittal_flags.append(result.noncommittal)
+
+        if not generated_questions:
+            return MetricResult(value=0.0)
+
+        # Check if all responses were noncommittal
+        all_noncommittal = np.all(noncommittal_flags)
+
+        # Calculate similarity between original question and generated questions
+        question_vec = np.asarray(self.embeddings.embed_text(user_input)).reshape(1, -1)
+        gen_question_vec = np.asarray(
+            self.embeddings.embed_texts(generated_questions)
+        ).reshape(len(generated_questions), -1)
+
+        # Calculate cosine similarity
+        norm = np.linalg.norm(gen_question_vec, axis=1) * np.linalg.norm(
+            question_vec, axis=1
+        )
+        cosine_sim = (
+            np.dot(gen_question_vec, question_vec.T).reshape(
+                -1,
+            )
+            / norm
+        )
+
+        # Average similarity, penalized if all answers were noncommittal
+        score = cosine_sim.mean() * int(not all_noncommittal)
+
+        return MetricResult(value=float(score))
diff --git a/src/ragas/prompt/simple_mixin.py b/src/ragas/prompt/simple_mixin.py
@@ -0,0 +1,161 @@
+"""
+Simplified PromptMixin that works with SimplePydanticPrompt.
+Focuses on core functionality without bloat.
+"""
+
+from __future__ import annotations
+
+import inspect
+import logging
+import typing as t
+
+from .simple_pydantic_prompt import SimplePydanticPrompt
+
+if t.TYPE_CHECKING:
+    from ragas.llms.base import InstructorBaseRagasLLM
+
+logger = logging.getLogger(__name__)
+
+
+class SimplePromptMixin:
+    """
+    Simplified mixin class for classes that have prompts.
+
+    Provides essential prompt management functionality:
+    - Get prompts from class attributes
+    - Set/modify prompts
+    - Translate prompts to different languages
+
+    Works with SimplePydanticPrompt instances.
+    """
+
+    def get_prompts(self) -> t.Dict[str, SimplePydanticPrompt]:
+        """
+        Get all prompts from this class.
+
+        Returns:
+            Dictionary mapping prompt names to prompt instances
+        """
+        prompts = {}
+
+        for attr_name, attr_value in inspect.getmembers(self):
+            if isinstance(attr_value, SimplePydanticPrompt):
+                # Use the prompt's name if it has one, otherwise use attribute name
+                prompt_name = attr_value.name or attr_name
+                prompts[prompt_name] = attr_value
+
+        return prompts
+
+    def set_prompts(self, **prompts: SimplePydanticPrompt) -> None:
+        """
+        Set/update prompts on this class.
+
+        Args:
+            **prompts: Keyword arguments where keys are prompt names and
+                      values are SimplePydanticPrompt instances
+
+        Raises:
+            ValueError: If prompt name doesn't exist or value is not a SimplePydanticPrompt
+        """
+        available_prompts = self.get_prompts()
+        name_to_attr = self._get_prompt_name_to_attr_mapping()
+
+        for prompt_name, new_prompt in prompts.items():
+            if prompt_name not in available_prompts:
+                available_names = list(available_prompts.keys())
+                raise ValueError(
+                    f"Prompt '{prompt_name}' not found. Available prompts: {available_names}"
+                )
+
+            if not isinstance(new_prompt, SimplePydanticPrompt):
+                raise ValueError(
+                    f"Prompt '{prompt_name}' must be a SimplePydanticPrompt instance"
+                )
+
+            # Set the prompt on the class
+            attr_name = name_to_attr[prompt_name]
+            setattr(self, attr_name, new_prompt)
+
+    async def adapt_prompts(
+        self,
+        target_language: str,
+        llm: InstructorBaseRagasLLM,
+        adapt_instruction: bool = False,
+    ) -> t.Dict[str, SimplePydanticPrompt]:
+        """
+        Translate all prompts to the target language.
+
+        Args:
+            target_language: Target language for translation
+            llm: LLM to use for translation
+            adapt_instruction: Whether to translate instructions as well as examples
+
+        Returns:
+            Dictionary of translated prompts
+        """
+        prompts = self.get_prompts()
+        adapted_prompts = {}
+
+        for prompt_name, prompt in prompts.items():
+            try:
+                adapted_prompt = await prompt.adapt(
+                    target_language, llm, adapt_instruction
+                )
+                adapted_prompts[prompt_name] = adapted_prompt
+            except Exception as e:
+                logger.warning(f"Failed to adapt prompt '{prompt_name}': {e}")
+                # Keep original prompt on failure
+                adapted_prompts[prompt_name] = prompt
+
+        return adapted_prompts
+
+    def set_adapted_prompts(
+        self, adapted_prompts: t.Dict[str, SimplePydanticPrompt]
+    ) -> None:
+        """
+        Set adapted/translated prompts on this class.
+
+        Args:
+            adapted_prompts: Dictionary of translated prompts from adapt_prompts()
+        """
+        self.set_prompts(**adapted_prompts)
+
+    def modify_prompt(
+        self,
+        prompt_name: str,
+        instruction: t.Optional[str] = None,
+        examples: t.Optional[t.List] = None,
+    ) -> None:
+        """
+        Modify a specific prompt's instruction or examples.
+
+        Args:
+            prompt_name: Name of the prompt to modify
+            instruction: New instruction (if provided)
+            examples: New examples (if provided)
+        """
+        current_prompts = self.get_prompts()
+
+        if prompt_name not in current_prompts:
+            available_names = list(current_prompts.keys())
+            raise ValueError(
+                f"Prompt '{prompt_name}' not found. Available prompts: {available_names}"
+            )
+
+        current_prompt = current_prompts[prompt_name]
+        modified_prompt = current_prompt.copy_with_modifications(
+            instruction=instruction, examples=examples
+        )
+
+        self.set_prompts(**{prompt_name: modified_prompt})
+
+    def _get_prompt_name_to_attr_mapping(self) -> t.Dict[str, str]:
+        """Get mapping from prompt names to attribute names."""
+        mapping = {}
+
+        for attr_name, attr_value in inspect.getmembers(self):
+            if isinstance(attr_value, SimplePydanticPrompt):
+                prompt_name = attr_value.name or attr_name
+                mapping[prompt_name] = attr_name
+
+        return mapping