feat(testset_generator): support error catching in generation process (#368)

yuukidach · Dash · web-flow · commit b8ba17117b07 · 2023-12-14T11:54:04.000+05:30
I am using Azure OpenAI API to generate testset. However, Azure's API is sensitive to content and can easily trigger the following returns: ```python {'id': 'fake_id', 'choices': [{'finish_reason': 'content_filter', 'index': 0, 'message': {'content ': None, 'role': 'assistant', 'function_call': None, 'tool_calls': None}}], 'created': 1702175894, 'model': 'gpt-4-32k', 'object' : 'chat.completion', 'system_fingerprint': None, 'usage': {'completion_tokens': 21, 'prompt_tokens': 4271, 'total_tokens': 4292}} ``` The `finish_reason` is `content_filter` and the content is `None`. When conducting [`create_llm_result()`](https://github.com/explodinggradients/ragas/blob/main/src/ragas/llms/openai.py#L134), because the content is `None`, an error will be raised. ```python Error occurred: ('Fatal error occurred while running async tasks.', ValidationError(model='Generation', errors=[{'loc': ('text',) , 'msg': 'none is not an allowed value', 'type': 'type_error.none.not_allowed'}])) ``` This error will interrupt the execution of the entire generation task. In fact, if such an error occurs, we can just skip it and moving on to generate another test item. --------- Co-authored-by: Dash <dash@minimaxi.com>
diff --git a/src/ragas/async_utils.py b/src/ragas/async_utils.py
@@ -44,5 +44,5 @@ async def _gather() -> List[Any]:
         # run the operation w/o tqdm on hitting a fatal
         # may occur in some environments where tqdm.asyncio
         # is not supported
-        raise RuntimeError("Fatal error occurred while running async tasks.", e)
+        raise RuntimeError("Fatal error occurred while running async tasks.", e) from e
     return outputs
diff --git a/src/ragas/testset/testset_generator.py b/src/ragas/testset/testset_generator.py
@@ -16,6 +16,11 @@
         "Please, install it with `pip install llama_index`."
     )
 
+try:
+    from pydantic.v1 import ValidationError
+except ImportError:
+    from pydantic import ValidationError
+
 import numpy as np
 import numpy.testing as npt
 import pandas as pd
@@ -58,6 +63,10 @@
     "conditional": "_condition_question",
 }
 
+retry_errors = (
+    ValidationError,
+)
+
 DataRow = namedtuple(
     "DataRow",
     [
@@ -69,6 +78,8 @@
     ],
 )
 
+Proposal = namedtuple("Proposal", ["question", "text_chunk"])
+
 
 @dataclass
 class TestDataset:
@@ -291,6 +302,70 @@ def _embed_nodes(self, nodes: t.List[BaseNode]) -> t.Dict[str, t.List[float]]:
 
         return embeddings
 
+    def _make_proposal(
+        self, cur_node: BaseNode, neighbor_nodes: t.List[BaseNode], evolve_type: str
+    ) -> t.Union[Proposal, None]:
+        # Append multiple nodes randomly to remove chunking bias
+        size = self.rng.integers(1, 3)
+        nodes = (
+            self._get_neighbour_node(cur_node, neighbor_nodes)
+            if size > 1 and evolve_type != "multi_context"
+            else [cur_node]
+        )
+
+        text_chunk = " ".join([node.get_content() for node in nodes])
+        score = self._filter_context(text_chunk)
+        if not score:
+            return None
+        seed_question = self._seed_question(text_chunk)
+        is_valid_question = self._filter_question(seed_question)
+        if not is_valid_question:
+            return None
+
+        if evolve_type == "multi_context":
+            # Find most similar chunk in same document
+            node_embedding = self._embed_nodes([nodes[-1]])
+            neighbor_nodes = self._remove_nodes(neighbor_nodes, nodes)
+            neighbor_emb = self._embed_nodes(neighbor_nodes)
+
+            _, indices = get_top_k_embeddings(
+                list(node_embedding.values())[0],
+                list(neighbor_emb.values()),
+                similarity_cutoff=self.threshold / 10,
+            )
+            if indices:
+                # type cast indices from list[Any] to list[int]
+                indices = t.cast(t.List[int], indices)
+                best_neighbor = neighbor_nodes[indices[0]]
+                question = self._multicontext_question(
+                    question=seed_question,
+                    context1=text_chunk,
+                    context2=best_neighbor.get_content(),
+                )
+                text_chunk = "\n".join([text_chunk, best_neighbor.get_content()])
+            else:
+                return None
+
+        # for reasoning and conditional modes, evolve question with the
+        # functions from question_deep_map
+        else:
+            evolve_fun = question_deep_map.get(evolve_type)
+            question = (
+                getattr(self, evolve_fun)(seed_question, text_chunk)
+                if evolve_fun
+                else seed_question
+            )
+
+        # compress question or convert into conversational questions
+        if evolve_type != "simple":
+            prob = self.rng.uniform(0, 1)
+            if self.chat_qa and prob <= self.chat_qa:
+                question = self._conversational_question(question=question)
+            else:
+                question = self._compress_question(question=question)
+
+        return Proposal(question=question, text_chunk=text_chunk)
+
     def generate(
         self,
         documents: t.List[LlamaindexDocument] | t.List[LangchainDocument],
@@ -339,64 +414,20 @@ def generate(
 
             neighbor_nodes = doc_nodes_map[curr_node.source_node.node_id]
 
-            # Append multiple nodes randomly to remove chunking bias
-            size = self.rng.integers(1, 3)
-            nodes = (
-                self._get_neighbour_node(curr_node, neighbor_nodes)
-                if size > 1 and evolve_type != "multi_context"
-                else [curr_node]
-            )
-
-            text_chunk = " ".join([node.get_content() for node in nodes])
-            score = self._filter_context(text_chunk)
-            if not score:
-                continue
-            seed_question = self._seed_question(text_chunk)
-            is_valid_question = self._filter_question(seed_question)
-            if not is_valid_question:
-                continue
-
-            if evolve_type == "multi_context":
-                # Find most similar chunk in same document
-                node_embedding = self._embed_nodes([nodes[-1]])
-                neighbor_nodes = self._remove_nodes(neighbor_nodes, nodes)
-                neighbor_emb = self._embed_nodes(neighbor_nodes)
-
-                _, indices = get_top_k_embeddings(
-                    list(node_embedding.values())[0],
-                    list(neighbor_emb.values()),
-                    similarity_cutoff=self.threshold / 10,
-                )
-                if indices:
-                    # type cast indices from list[Any] to list[int]
-                    indices = t.cast(t.List[int], indices)
-                    best_neighbor = neighbor_nodes[indices[0]]
-                    question = self._multicontext_question(
-                        question=seed_question,
-                        context1=text_chunk,
-                        context2=best_neighbor.get_content(),
-                    )
-                    text_chunk = "\n".join([text_chunk, best_neighbor.get_content()])
-                else:
-                    continue
-
-            # for reasoning and conditional modes, evolve question with the
-            # functions from question_deep_map
-            else:
-                evolve_fun = question_deep_map.get(evolve_type)
-                question = (
-                    getattr(self, evolve_fun)(seed_question, text_chunk)
-                    if evolve_fun
-                    else seed_question
+            proposal = None
+            try:
+                proposal = self._make_proposal(
+                    curr_node, neighbor_nodes, evolve_type
                 )
+            except Exception as e:
+                err_cause = e.__cause__
+                if not isinstance(err_cause, retry_errors):
+                    raise e
 
-            # compress question or convert into conversational questions
-            if evolve_type != "simple":
-                prob = self.rng.uniform(0, 1)
-                if self.chat_qa and prob <= self.chat_qa:
-                    question = self._conversational_question(question=question)
-                else:
-                    question = self._compress_question(question=question)
+            if proposal is None:
+                continue
+            question = proposal.question
+            text_chunk = proposal.text_chunk
 
             is_valid_question = self._filter_question(question)
             if is_valid_question: