From fb29cb328b5699e2f154cf8f9ed929cb75900904 Mon Sep 17 00:00:00 2001 From: Valentin Date: Tue, 18 Mar 2025 14:17:16 -0300 Subject: [PATCH] fix: handle whitespace-only content in structured output parsing This commit fixes an issue where the API client would crash with JSON parsing errors when a model returns whitespace-only content (spaces, newlines, etc.) during structured output parsing. Changes: - Add a check in _parse_content to detect and gracefully handle empty or whitespace-only content before attempting JSON parsing - Improve streaming parser to skip JSON parsing for whitespace-only content - Update maybe_parse_content to catch and log parsing errors instead of letting them propagate - Add similar checks for tool argument parsing This fixes cases where users were getting "EOF while parsing a value" errors when using client.beta.chat.completions.parse with models that occasionally return only whitespace instead of structured JSON. With this change, parsing whitespace-only content now returns None for the parsed field instead of raising an exception, with an appropriate warning logged. --- src/openai/lib/_parsing/_completions.py | 15 ++++++++++++- src/openai/lib/streaming/chat/_completions.py | 22 ++++++++++++------- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py index c160070b66..67a687c6f1 100644 --- a/src/openai/lib/_parsing/_completions.py +++ b/src/openai/lib/_parsing/_completions.py @@ -158,7 +158,13 @@ def maybe_parse_content( message: ChatCompletionMessage | ParsedChatCompletionMessage[object], ) -> ResponseFormatT | None: if has_rich_response_format(response_format) and message.content and not message.refusal: - return _parse_content(response_format, message.content) + try: + return _parse_content(response_format, message.content) + except ValueError as e: + # if parsing fails due to whitespace content, log a warning and return None + import logging + logging.warning(f"Failed to parse content: {e}") + return None return None @@ -217,6 +223,13 @@ def is_parseable_tool(input_tool: ChatCompletionToolParam) -> bool: def _parse_content(response_format: type[ResponseFormatT], content: str) -> ResponseFormatT: + # checking here if the content is empty or contains only whitespace + if not content or content.isspace(): + raise ValueError( + f"Cannot parse empty or whitespace-only content as {response_format.__name__}. " + "The model returned content with no valid JSON." + ) + if is_basemodel_type(response_format): return cast(ResponseFormatT, model_parse_json(response_format, content)) diff --git a/src/openai/lib/streaming/chat/_completions.py b/src/openai/lib/streaming/chat/_completions.py index 2146091354..c11a727b68 100644 --- a/src/openai/lib/streaming/chat/_completions.py +++ b/src/openai/lib/streaming/chat/_completions.py @@ -435,10 +435,13 @@ def _accumulate_chunk(self, chunk: ChatCompletionChunk) -> ParsedChatCompletionS and not choice_snapshot.message.refusal and is_given(self._rich_response_format) ): - choice_snapshot.message.parsed = from_json( - bytes(choice_snapshot.message.content, "utf-8"), - partial_mode=True, - ) + # skipping parsing if content is just whitespace + content = choice_snapshot.message.content + if content.strip(): + choice_snapshot.message.parsed = from_json( + bytes(content, "utf-8"), + partial_mode=True, + ) for tool_call_chunk in choice.delta.tool_calls or []: tool_call_snapshot = (choice_snapshot.message.tool_calls or [])[tool_call_chunk.index] @@ -453,10 +456,13 @@ def _accumulate_chunk(self, chunk: ChatCompletionChunk) -> ParsedChatCompletionS and input_tool.get("function", {}).get("strict") and tool_call_snapshot.function.arguments ): - tool_call_snapshot.function.parsed_arguments = from_json( - bytes(tool_call_snapshot.function.arguments, "utf-8"), - partial_mode=True, - ) + arguments = tool_call_snapshot.function.arguments + # skipping parsing if arguments is just whitespace + if arguments.strip(): + tool_call_snapshot.function.parsed_arguments = from_json( + bytes(arguments, "utf-8"), + partial_mode=True, + ) elif TYPE_CHECKING: # type: ignore[unreachable] assert_never(tool_call_snapshot)