Skip to content

Commit b8ba171

Browse files
yuukidachDash
and
Dash
authored
feat(testset_generator): support error catching in generation process (#368)
I am using Azure OpenAI API to generate testset. However, Azure's API is sensitive to content and can easily trigger the following returns: ```python {'id': 'fake_id', 'choices': [{'finish_reason': 'content_filter', 'index': 0, 'message': {'content ': None, 'role': 'assistant', 'function_call': None, 'tool_calls': None}}], 'created': 1702175894, 'model': 'gpt-4-32k', 'object' : 'chat.completion', 'system_fingerprint': None, 'usage': {'completion_tokens': 21, 'prompt_tokens': 4271, 'total_tokens': 4292}} ``` The `finish_reason` is `content_filter` and the content is `None`. When conducting [`create_llm_result()`](https://github.com/explodinggradients/ragas/blob/main/src/ragas/llms/openai.py#L134), because the content is `None`, an error will be raised. ```python Error occurred: ('Fatal error occurred while running async tasks.', ValidationError(model='Generation', errors=[{'loc': ('text',) , 'msg': 'none is not an allowed value', 'type': 'type_error.none.not_allowed'}])) ``` This error will interrupt the execution of the entire generation task. In fact, if such an error occurs, we can just skip it and moving on to generate another test item. --------- Co-authored-by: Dash <[email protected]>
1 parent 9b413ac commit b8ba171

File tree

2 files changed

+88
-57
lines changed

2 files changed

+88
-57
lines changed

src/ragas/async_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,5 @@ async def _gather() -> List[Any]:
4444
# run the operation w/o tqdm on hitting a fatal
4545
# may occur in some environments where tqdm.asyncio
4646
# is not supported
47-
raise RuntimeError("Fatal error occurred while running async tasks.", e)
47+
raise RuntimeError("Fatal error occurred while running async tasks.", e) from e
4848
return outputs

src/ragas/testset/testset_generator.py

Lines changed: 87 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616
"Please, install it with `pip install llama_index`."
1717
)
1818

19+
try:
20+
from pydantic.v1 import ValidationError
21+
except ImportError:
22+
from pydantic import ValidationError
23+
1924
import numpy as np
2025
import numpy.testing as npt
2126
import pandas as pd
@@ -58,6 +63,10 @@
5863
"conditional": "_condition_question",
5964
}
6065

66+
retry_errors = (
67+
ValidationError,
68+
)
69+
6170
DataRow = namedtuple(
6271
"DataRow",
6372
[
@@ -69,6 +78,8 @@
6978
],
7079
)
7180

81+
Proposal = namedtuple("Proposal", ["question", "text_chunk"])
82+
7283

7384
@dataclass
7485
class TestDataset:
@@ -291,6 +302,70 @@ def _embed_nodes(self, nodes: t.List[BaseNode]) -> t.Dict[str, t.List[float]]:
291302

292303
return embeddings
293304

305+
def _make_proposal(
306+
self, cur_node: BaseNode, neighbor_nodes: t.List[BaseNode], evolve_type: str
307+
) -> t.Union[Proposal, None]:
308+
# Append multiple nodes randomly to remove chunking bias
309+
size = self.rng.integers(1, 3)
310+
nodes = (
311+
self._get_neighbour_node(cur_node, neighbor_nodes)
312+
if size > 1 and evolve_type != "multi_context"
313+
else [cur_node]
314+
)
315+
316+
text_chunk = " ".join([node.get_content() for node in nodes])
317+
score = self._filter_context(text_chunk)
318+
if not score:
319+
return None
320+
seed_question = self._seed_question(text_chunk)
321+
is_valid_question = self._filter_question(seed_question)
322+
if not is_valid_question:
323+
return None
324+
325+
if evolve_type == "multi_context":
326+
# Find most similar chunk in same document
327+
node_embedding = self._embed_nodes([nodes[-1]])
328+
neighbor_nodes = self._remove_nodes(neighbor_nodes, nodes)
329+
neighbor_emb = self._embed_nodes(neighbor_nodes)
330+
331+
_, indices = get_top_k_embeddings(
332+
list(node_embedding.values())[0],
333+
list(neighbor_emb.values()),
334+
similarity_cutoff=self.threshold / 10,
335+
)
336+
if indices:
337+
# type cast indices from list[Any] to list[int]
338+
indices = t.cast(t.List[int], indices)
339+
best_neighbor = neighbor_nodes[indices[0]]
340+
question = self._multicontext_question(
341+
question=seed_question,
342+
context1=text_chunk,
343+
context2=best_neighbor.get_content(),
344+
)
345+
text_chunk = "\n".join([text_chunk, best_neighbor.get_content()])
346+
else:
347+
return None
348+
349+
# for reasoning and conditional modes, evolve question with the
350+
# functions from question_deep_map
351+
else:
352+
evolve_fun = question_deep_map.get(evolve_type)
353+
question = (
354+
getattr(self, evolve_fun)(seed_question, text_chunk)
355+
if evolve_fun
356+
else seed_question
357+
)
358+
359+
# compress question or convert into conversational questions
360+
if evolve_type != "simple":
361+
prob = self.rng.uniform(0, 1)
362+
if self.chat_qa and prob <= self.chat_qa:
363+
question = self._conversational_question(question=question)
364+
else:
365+
question = self._compress_question(question=question)
366+
367+
return Proposal(question=question, text_chunk=text_chunk)
368+
294369
def generate(
295370
self,
296371
documents: t.List[LlamaindexDocument] | t.List[LangchainDocument],
@@ -339,64 +414,20 @@ def generate(
339414

340415
neighbor_nodes = doc_nodes_map[curr_node.source_node.node_id]
341416

342-
# Append multiple nodes randomly to remove chunking bias
343-
size = self.rng.integers(1, 3)
344-
nodes = (
345-
self._get_neighbour_node(curr_node, neighbor_nodes)
346-
if size > 1 and evolve_type != "multi_context"
347-
else [curr_node]
348-
)
349-
350-
text_chunk = " ".join([node.get_content() for node in nodes])
351-
score = self._filter_context(text_chunk)
352-
if not score:
353-
continue
354-
seed_question = self._seed_question(text_chunk)
355-
is_valid_question = self._filter_question(seed_question)
356-
if not is_valid_question:
357-
continue
358-
359-
if evolve_type == "multi_context":
360-
# Find most similar chunk in same document
361-
node_embedding = self._embed_nodes([nodes[-1]])
362-
neighbor_nodes = self._remove_nodes(neighbor_nodes, nodes)
363-
neighbor_emb = self._embed_nodes(neighbor_nodes)
364-
365-
_, indices = get_top_k_embeddings(
366-
list(node_embedding.values())[0],
367-
list(neighbor_emb.values()),
368-
similarity_cutoff=self.threshold / 10,
369-
)
370-
if indices:
371-
# type cast indices from list[Any] to list[int]
372-
indices = t.cast(t.List[int], indices)
373-
best_neighbor = neighbor_nodes[indices[0]]
374-
question = self._multicontext_question(
375-
question=seed_question,
376-
context1=text_chunk,
377-
context2=best_neighbor.get_content(),
378-
)
379-
text_chunk = "\n".join([text_chunk, best_neighbor.get_content()])
380-
else:
381-
continue
382-
383-
# for reasoning and conditional modes, evolve question with the
384-
# functions from question_deep_map
385-
else:
386-
evolve_fun = question_deep_map.get(evolve_type)
387-
question = (
388-
getattr(self, evolve_fun)(seed_question, text_chunk)
389-
if evolve_fun
390-
else seed_question
417+
proposal = None
418+
try:
419+
proposal = self._make_proposal(
420+
curr_node, neighbor_nodes, evolve_type
391421
)
422+
except Exception as e:
423+
err_cause = e.__cause__
424+
if not isinstance(err_cause, retry_errors):
425+
raise e
392426

393-
# compress question or convert into conversational questions
394-
if evolve_type != "simple":
395-
prob = self.rng.uniform(0, 1)
396-
if self.chat_qa and prob <= self.chat_qa:
397-
question = self._conversational_question(question=question)
398-
else:
399-
question = self._compress_question(question=question)
427+
if proposal is None:
428+
continue
429+
question = proposal.question
430+
text_chunk = proposal.text_chunk
400431

401432
is_valid_question = self._filter_question(question)
402433
if is_valid_question:

0 commit comments

Comments
 (0)