Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ def __call__(
def __call__(
self,
*,
query: str,
response: List[dict],
query: str | List[dict],
response: str | List[dict],
tool_definitions: List[dict],
) -> Dict[str, Union[str, float]]:
"""Evaluate groundedness for agent response with tool calls. Only file_search tool is supported.
Expand Down Expand Up @@ -308,6 +308,18 @@ def has_context(self, eval_input: dict) -> bool:
Treats None, empty strings, empty lists, and lists of empty strings as no context.
"""
context = eval_input.get("context", None)
return self._validate_context(context)

def _validate_context(self, context) -> bool:
"""
Validate if the provided context is non-empty and meaningful.

Treats None, empty strings, empty lists, and lists of empty strings as no context.
:param context: The context to validate
:type context: Union[str, List, None]
:return: True if context is valid and non-empty, False otherwise
:rtype: bool
"""
if not context:
return False
if context == "<>": # Special marker for no context
Expand Down Expand Up @@ -363,6 +375,14 @@ async def _real_call(self, **kwargs):
else:
raise ex

def _is_single_entry(self, value):
"""Determine if the input value represents a single entry, unsure is returned as False."""
if isinstance(value, str):
return True
if isinstance(value, list) and len(value) == 1:
return True
return False

def _convert_kwargs_to_eval_input(self, **kwargs):
if kwargs.get("context") or kwargs.get("conversation"):
return super()._convert_kwargs_to_eval_input(**kwargs)
Expand All @@ -386,7 +406,16 @@ def _convert_kwargs_to_eval_input(self, **kwargs):
)
context = self._get_context_from_agent_response(response, tool_definitions)

filtered_response = self._filter_file_search_results(response)
if not self._validate_context(context) and self._is_single_entry(response) and self._is_single_entry(query):
msg = f"{type(self).__name__}: No valid context provided or could be extracted from the query or response."
raise EvaluationException(
message=msg,
blame=ErrorBlame.USER_ERROR,
category=ErrorCategory.NOT_APPLICABLE,
target=ErrorTarget.GROUNDEDNESS_EVALUATOR,
)

filtered_response = self._filter_file_search_results(response) if self._validate_context(context) else response
return super()._convert_kwargs_to_eval_input(response=filtered_response, context=context, query=query)

def _filter_file_search_results(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
Expand Down
2 changes: 1 addition & 1 deletion assets/evaluators/builtin/groundedness/spec.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
type: "evaluator"
name: "builtin.groundedness"
version: 3
version: 4
displayName: "Groundedness-Evaluator"
description: "Assesses whether the response stays true to the given context in a retrieval-augmented generation scenario. It’s best used for retrieval-augmented generation (RAG) scenarios, including question and answering and summarization. Use the groundedness metric when you need to verify that ai-generated responses align with and are validated by the provided context."
evaluatorType: "builtin"
Expand Down
Loading