From fb29cb328b5699e2f154cf8f9ed929cb75900904 Mon Sep 17 00:00:00 2001
From: Valentin <fernandezradovich@gmail.com>
Date: Tue, 18 Mar 2025 14:17:16 -0300
Subject: [PATCH] fix: handle whitespace-only content in structured output
 parsing

This commit fixes an issue where the API client would crash with JSON parsing
errors when a model returns whitespace-only content (spaces, newlines, etc.)
during structured output parsing.

Changes:
- Add a check in _parse_content to detect and gracefully handle empty or
  whitespace-only content before attempting JSON parsing
- Improve streaming parser to skip JSON parsing for whitespace-only content
- Update maybe_parse_content to catch and log parsing errors instead of
  letting them propagate
- Add similar checks for tool argument parsing

This fixes cases where users were getting "EOF while parsing a value" errors
when using client.beta.chat.completions.parse with models that occasionally
return only whitespace instead of structured JSON.

With this change, parsing whitespace-only content now returns None for the
parsed field instead of raising an exception, with an appropriate warning
logged.
---
 src/openai/lib/_parsing/_completions.py       | 15 ++++++++++++-
 src/openai/lib/streaming/chat/_completions.py | 22 ++++++++++++-------
 2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/src/openai/lib/_parsing/_completions.py b/src/openai/lib/_parsing/_completions.py
index c160070b66..67a687c6f1 100644
--- a/src/openai/lib/_parsing/_completions.py
+++ b/src/openai/lib/_parsing/_completions.py
@@ -158,7 +158,13 @@ def maybe_parse_content(
     message: ChatCompletionMessage | ParsedChatCompletionMessage[object],
 ) -> ResponseFormatT | None:
     if has_rich_response_format(response_format) and message.content and not message.refusal:
-        return _parse_content(response_format, message.content)
+        try:
+            return _parse_content(response_format, message.content)
+        except ValueError as e:
+            # if parsing fails due to whitespace content, log a warning and return None
+            import logging
+            logging.warning(f"Failed to parse content: {e}")
+            return None
 
     return None
 
@@ -217,6 +223,13 @@ def is_parseable_tool(input_tool: ChatCompletionToolParam) -> bool:
 
 
 def _parse_content(response_format: type[ResponseFormatT], content: str) -> ResponseFormatT:
+    # checking here if the content is empty or contains only whitespace
+    if not content or content.isspace():
+        raise ValueError(
+            f"Cannot parse empty or whitespace-only content as {response_format.__name__}. "
+            "The model returned content with no valid JSON."
+        )
+        
     if is_basemodel_type(response_format):
         return cast(ResponseFormatT, model_parse_json(response_format, content))
 
diff --git a/src/openai/lib/streaming/chat/_completions.py b/src/openai/lib/streaming/chat/_completions.py
index 2146091354..c11a727b68 100644
--- a/src/openai/lib/streaming/chat/_completions.py
+++ b/src/openai/lib/streaming/chat/_completions.py
@@ -435,10 +435,13 @@ def _accumulate_chunk(self, chunk: ChatCompletionChunk) -> ParsedChatCompletionS
                 and not choice_snapshot.message.refusal
                 and is_given(self._rich_response_format)
             ):
-                choice_snapshot.message.parsed = from_json(
-                    bytes(choice_snapshot.message.content, "utf-8"),
-                    partial_mode=True,
-                )
+                # skipping parsing if content is just whitespace
+                content = choice_snapshot.message.content
+                if content.strip():
+                    choice_snapshot.message.parsed = from_json(
+                        bytes(content, "utf-8"),
+                        partial_mode=True,
+                    )
 
             for tool_call_chunk in choice.delta.tool_calls or []:
                 tool_call_snapshot = (choice_snapshot.message.tool_calls or [])[tool_call_chunk.index]
@@ -453,10 +456,13 @@ def _accumulate_chunk(self, chunk: ChatCompletionChunk) -> ParsedChatCompletionS
                         and input_tool.get("function", {}).get("strict")
                         and tool_call_snapshot.function.arguments
                     ):
-                        tool_call_snapshot.function.parsed_arguments = from_json(
-                            bytes(tool_call_snapshot.function.arguments, "utf-8"),
-                            partial_mode=True,
-                        )
+                        arguments = tool_call_snapshot.function.arguments
+                        # skipping parsing if arguments is just whitespace
+                        if arguments.strip():
+                            tool_call_snapshot.function.parsed_arguments = from_json(
+                                bytes(arguments, "utf-8"),
+                                partial_mode=True,
+                            )
                 elif TYPE_CHECKING:  # type: ignore[unreachable]
                     assert_never(tool_call_snapshot)