Azure · ahibrahimm · Nov 14, 2025 · Nov 14, 2025 · Nov 15, 2025 · Nov 16, 2025
@@ -220,8 +220,8 @@ def __call__(
     def __call__(
         self,
         *,
-        query: str,
-        response: List[dict],
+        query: str | List[dict],
+        response: str | List[dict],
         tool_definitions: List[dict],
     ) -> Dict[str, Union[str, float]]:
         """Evaluate groundedness for agent response with tool calls. Only file_search tool is supported.
@@ -308,6 +308,18 @@ def has_context(self, eval_input: dict) -> bool:
         Treats None, empty strings, empty lists, and lists of empty strings as no context.
         """
         context = eval_input.get("context", None)
+        return self._validate_context(context)
+
+    def _validate_context(self, context) -> bool:
+        """
+        Validate if the provided context is non-empty and meaningful.
+
+        Treats None, empty strings, empty lists, and lists of empty strings as no context.
+        :param context: The context to validate
+        :type context: Union[str, List, None]
+        :return: True if context is valid and non-empty, False otherwise
+        :rtype: bool
+        """
         if not context:
             return False
         if context == "<>":  # Special marker for no context
@@ -363,6 +375,14 @@ async def _real_call(self, **kwargs):
             else:
                 raise ex
 
+    def _is_single_entry(self, value):
+        """Determine if the input value represents a single entry, unsure is returned as False."""
+        if isinstance(value, str):
+            return True
+        if isinstance(value, list) and len(value) == 1:
+            return True
+        return False
+
     def _convert_kwargs_to_eval_input(self, **kwargs):
         if kwargs.get("context") or kwargs.get("conversation"):
             return super()._convert_kwargs_to_eval_input(**kwargs)
@@ -386,7 +406,16 @@ def _convert_kwargs_to_eval_input(self, **kwargs):
             )
         context = self._get_context_from_agent_response(response, tool_definitions)
 
-        filtered_response = self._filter_file_search_results(response)
+        if not self._validate_context(context) and self._is_single_entry(response) and self._is_single_entry(query):
+            msg = f"{type(self).__name__}: No valid context provided or could be extracted from the query or response."
+            raise EvaluationException(
+                message=msg,
+                blame=ErrorBlame.USER_ERROR,
+                category=ErrorCategory.NOT_APPLICABLE,
+                target=ErrorTarget.GROUNDEDNESS_EVALUATOR,
+            )
+
+        filtered_response = self._filter_file_search_results(response) if self._validate_context(context) else response
         return super()._convert_kwargs_to_eval_input(response=filtered_response, context=context, query=query)
 
     def _filter_file_search_results(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:

@@ -1,6 +1,6 @@
 type: "evaluator"
 name: "builtin.groundedness"
-version: 3
+version: 4
 displayName: "Groundedness-Evaluator"
 description: "Assesses whether the response stays true to the given context in a retrieval-augmented generation scenario. It’s best used for retrieval-augmented generation (RAG) scenarios, including question and answering and summarization. Use the groundedness metric when you need to verify that ai-generated responses align with and are validated by the provided context."
 evaluatorType: "builtin"