diff --git a/pydantic_evals/pydantic_evals/generation.py b/pydantic_evals/pydantic_evals/generation.py index c1e68a6ea8..ba95b7a4d7 100644 --- a/pydantic_evals/pydantic_evals/generation.py +++ b/pydantic_evals/pydantic_evals/generation.py @@ -14,6 +14,7 @@ from typing_extensions import TypeVar from pydantic_ai import Agent, models +from pydantic_ai.output import NativeOutput, StructuredDict from pydantic_evals import Dataset from pydantic_evals.evaluators.evaluator import Evaluator @@ -59,21 +60,16 @@ async def generate_dataset( """ output_schema = dataset_type.model_json_schema_with_evaluators(custom_evaluator_types) - # TODO(DavidM): Update this once we add better response_format and/or ResultTool support to Pydantic AI agent = Agent( model, - system_prompt=( - f'Generate an object that is in compliance with this JSON schema:\n{output_schema}\n\n' - f'Include {n_examples} example cases.' - ' You must not include any characters in your response before the opening { of the JSON object, or after the closing }.' - ), - output_type=str, + system_prompt=f'Include {n_examples} example cases.', + output_type=NativeOutput(StructuredDict(output_schema)), retries=1, ) result = await agent.run(extra_instructions or 'Please generate the object.') try: - result = dataset_type.from_text(result.output, fmt='json', custom_evaluator_types=custom_evaluator_types) + result = dataset_type.from_dict(result.output, custom_evaluator_types=custom_evaluator_types) except ValidationError as e: # pragma: no cover print(f'Raw response from model:\n{result.output}') raise e