fix(prompting): Structured output class invalid chars (#74)

dolaameng · web-flow · commit 53b9aa138c7c · 2026-03-16T10:38:09.000-07:00
Fix the error when parsing primary types in LLM structured output:

```
"Expected the \'name\' field of a(n) \'json_schema\' \'response_format\' to be at most 64 characters containing a-z, A-Z, 0-9, dashes, or underscores; found: \'TypedResponse[int]\'."
```
diff --git a/golden_tests/test_cookbook_examples.py b/golden_tests/test_cookbook_examples.py
@@ -60,6 +60,8 @@
     "anthropic/claude-sonnet-4-5@20250929",
     "deepseek-ai/deepseek-r1-0528",
     "deepseek-ai/deepseek-v3.2",
+    "zai/glm-5",
+    "google/gemini-3.1-flash-lite-preview",
 }
 
 # Models to be used as judges for evaluation.
@@ -406,6 +408,7 @@ def test_dataset_eval(llm, df) -> tuple[float, float]:
         "deepseek-ai/deepseek-v3.2",
         "qwen/qwen3-235b-a22b-instruct-2507",
         "qwen/qwen3-next-80b-a3b-instruct",
+        "zai/glm-5",
     }
 )
 @kbench.task()
@@ -436,6 +439,7 @@ def test_image_url(llm):
         "qwen/qwen3-235b-a22b-instruct-2507",
         "qwen/qwen3-next-80b-a3b-instruct",
         "anthropic/claude-sonnet-4-5@20250929",
+        "zai/glm-5",
     }
 )
 @kbench.task()
@@ -466,6 +470,7 @@ def test_image_base64(llm):
         "deepseek-ai/deepseek-v3.2",
         "qwen/qwen3-235b-a22b-instruct-2507",
         "qwen/qwen3-next-80b-a3b-instruct",
+        "zai/glm-5",
     }
 )
 @kbench.task()
@@ -511,6 +516,7 @@ def run_simple_calculator(a: float, b: float, operator: str) -> float:
         "deepseek-ai/deepseek-r1-0528",
         "deepseek-ai/deepseek-v3.2",
         "google/gemma-3-12b",
+        "google/gemini-3.1-flash-lite-preview",
     }
 )
 @kbench.task()
diff --git a/src/kaggle_benchmarks/prompting.py b/src/kaggle_benchmarks/prompting.py
@@ -63,6 +63,14 @@ def get_payload(self):
 class TypedResponse(RenderablePydanticModel, Generic[T]):
     value: T
 
+    model_config = pydantic.ConfigDict(
+        title="Response",
+        extra="forbid",
+        arbitrary_types_allowed=False,
+    )
+
+    __name__ = "Response"
+
 
 class ResponseParsingError(ValueError):
     """Error raised when a model response cannot be parsed into the desired schema."""
@@ -178,6 +186,7 @@ def root_model_handler(cls):
 @handler(types=(float, int, datetime.datetime, bool))
 def primitive_type_handler(cls):
     model = TypedResponse[cls]
+    model.__name__ = "Response"
     response = yield (
         f"Output JSON using this schema: {json.dumps(model.model_json_schema())}",
         model,

Original file line number	Diff line number	Diff line change
`@@ -60,6 +60,8 @@`
`60`	`60`	`"anthropic/claude-sonnet-4-5@20250929",`
`61`	`61`	`"deepseek-ai/deepseek-r1-0528",`
`62`	`62`	`"deepseek-ai/deepseek-v3.2",`
	`63`	`+ "zai/glm-5",`
	`64`	`+ "google/gemini-3.1-flash-lite-preview",`
`63`	`65`	`}`
`64`	`66`
`65`	`67`	`# Models to be used as judges for evaluation.`
`@@ -406,6 +408,7 @@ def test_dataset_eval(llm, df) -> tuple[float, float]:`
`406`	`408`	`"deepseek-ai/deepseek-v3.2",`
`407`	`409`	`"qwen/qwen3-235b-a22b-instruct-2507",`
`408`	`410`	`"qwen/qwen3-next-80b-a3b-instruct",`
	`411`	`+ "zai/glm-5",`
`409`	`412`	`}`
`410`	`413`	`)`
`411`	`414`	`@kbench.task()`
`@@ -436,6 +439,7 @@ def test_image_url(llm):`
`436`	`439`	`"qwen/qwen3-235b-a22b-instruct-2507",`
`437`	`440`	`"qwen/qwen3-next-80b-a3b-instruct",`
`438`	`441`	`"anthropic/claude-sonnet-4-5@20250929",`
	`442`	`+ "zai/glm-5",`
`439`	`443`	`}`
`440`	`444`	`)`
`441`	`445`	`@kbench.task()`
`@@ -466,6 +470,7 @@ def test_image_base64(llm):`
`466`	`470`	`"deepseek-ai/deepseek-v3.2",`
`467`	`471`	`"qwen/qwen3-235b-a22b-instruct-2507",`
`468`	`472`	`"qwen/qwen3-next-80b-a3b-instruct",`
	`473`	`+ "zai/glm-5",`
`469`	`474`	`}`
`470`	`475`	`)`
`471`	`476`	`@kbench.task()`
`@@ -511,6 +516,7 @@ def run_simple_calculator(a: float, b: float, operator: str) -> float:`
`511`	`516`	`"deepseek-ai/deepseek-r1-0528",`
`512`	`517`	`"deepseek-ai/deepseek-v3.2",`
`513`	`518`	`"google/gemma-3-12b",`
	`519`	`+ "google/gemini-3.1-flash-lite-preview",`
`514`	`520`	`}`
`515`	`521`	`)`
`516`	`522`	`@kbench.task()`