diff --git a/src/openhuman/inference/provider/compatible_tests.rs b/src/openhuman/inference/provider/compatible_tests.rs index 1e123a3336..b6c7214214 100644 --- a/src/openhuman/inference/provider/compatible_tests.rs +++ b/src/openhuman/inference/provider/compatible_tests.rs @@ -2196,6 +2196,36 @@ fn reasoning_alias_captured_in_stream_delta() { ); } +/// Regression for Sentry TAURI-RUST-A5N: a provider that emits BOTH `reasoning` +/// and `reasoning_content` in the same message object must not fail with +/// `duplicate field \`reasoning_content\``. Both keys deserialize and fold into +/// the canonical field, which wins when both are present. +#[test] +fn reasoning_and_reasoning_content_both_present_does_not_error() { + let json = r#"{"choices":[{"message":{"content":null,"reasoning":"alias cot","reasoning_content":"canonical cot"}}]}"#; + let resp: ApiChatResponse = serde_json::from_str(json) + .expect("both reasoning keys must parse without a duplicate-field error"); + assert_eq!( + resp.choices[0].message.reasoning_content.as_deref(), + Some("canonical cot"), + "canonical reasoning_content wins when both keys are present" + ); +} + +/// Same regression on the streaming delta path (TAURI-RUST-A5N also hits the +/// native stream parser at `compatible_stream_native.rs`). +#[test] +fn reasoning_and_reasoning_content_both_present_in_stream_delta_does_not_error() { + let json = r#"{"choices":[{"delta":{"reasoning":"alias cot","reasoning_content":"canonical cot"},"finish_reason":null}]}"#; + let chunk: StreamChunkResponse = serde_json::from_str(json) + .expect("both reasoning keys must parse without a duplicate-field error"); + assert_eq!( + chunk.choices[0].delta.reasoning_content.as_deref(), + Some("canonical cot"), + "canonical reasoning_content wins when both keys are present" + ); +} + /// End-to-end: a tool-call turn whose reasoning arrived under the `reasoning` /// alias must still be surfaced by `parse_native_response` so the agent loop /// can replay it on the follow-up request (the issue #3094 failure path). diff --git a/src/openhuman/inference/provider/compatible_types.rs b/src/openhuman/inference/provider/compatible_types.rs index b5cd83e037..ade2234a71 100644 --- a/src/openhuman/inference/provider/compatible_types.rs +++ b/src/openhuman/inference/provider/compatible_types.rs @@ -4,7 +4,7 @@ //! as appropriate). External code only sees the public API on //! [`super::OpenAiCompatibleProvider`]. -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize}; // ── Request bodies ──────────────────────────────────────────────────────────── @@ -316,25 +316,58 @@ pub(crate) struct OpenHumanBilling { pub(crate) charged_amount_usd: f64, } -#[derive(Debug, Deserialize, Serialize)] +#[derive(Debug, Serialize)] pub(crate) struct ResponseMessage { - #[serde(default)] pub(crate) content: Option, /// Reasoning/thinking models may return their chain-of-thought in a /// dedicated field instead of (or alongside) `content`. DeepSeek, Qwen3 and /// GLM-4 name it `reasoning_content`; OpenRouter and vLLM/SGLang-backed - /// OpenAI-compatible proxies emit it as `reasoning`. Accept both so the CoT - /// is captured regardless of the (third-party) provider's field name — it + /// OpenAI-compatible proxies emit it as `reasoning`. Both names fold into + /// this single field (see the manual `Deserialize` impl below) — the CoT /// must be echoed back verbatim on tool-call turns or thinking models reject /// the follow-up request with HTTP 400. - #[serde(default, alias = "reasoning")] pub(crate) reasoning_content: Option, - #[serde(default)] pub(crate) tool_calls: Option>, - #[serde(default)] pub(crate) function_call: Option, } +// Manual `Deserialize` so that `reasoning` and `reasoning_content` are accepted +// as DISTINCT wire keys and then folded into the single canonical field. +// +// A serde `alias` maps both names onto one field slot, which makes a provider +// that emits BOTH keys in the same object (some OpenRouter / vLLM-SGLang +// proxies do) fail with `duplicate field \`reasoning_content\``, dropping the +// entire response. Deserializing them as separate optional fields tolerates +// any combination; the canonical `reasoning_content` wins when both are present. +impl<'de> Deserialize<'de> for ResponseMessage { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + #[derive(Deserialize)] + struct Shadow { + #[serde(default)] + content: Option, + #[serde(default)] + reasoning_content: Option, + #[serde(default)] + reasoning: Option, + #[serde(default)] + tool_calls: Option>, + #[serde(default)] + function_call: Option, + } + + let shadow = Shadow::deserialize(deserializer)?; + Ok(ResponseMessage { + content: shadow.content, + reasoning_content: shadow.reasoning_content.or(shadow.reasoning), + tool_calls: shadow.tool_calls, + function_call: shadow.function_call, + }) + } +} + impl ResponseMessage { /// Extract text content, falling back to `reasoning_content` when `content` /// is missing or empty. Reasoning/thinking models (Qwen3, GLM-4, etc.) @@ -426,22 +459,50 @@ pub(crate) struct StreamChoice { pub(crate) finish_reason: Option, } -#[derive(Debug, Deserialize)] +#[derive(Debug)] pub(crate) struct StreamDelta { - #[serde(default)] pub(crate) content: Option, /// Reasoning/thinking models may stream their chain-of-thought via /// `reasoning_content` (DeepSeek/Qwen3/GLM-4) or `reasoning` - /// (OpenRouter, vLLM/SGLang proxies). Accept both delta field names. - #[serde(default, alias = "reasoning")] + /// (OpenRouter, vLLM/SGLang proxies). Both delta field names fold into + /// this single field (see the manual `Deserialize` impl below). pub(crate) reasoning_content: Option, /// Native tool-call chunks. Each entry is keyed by `index`; the first /// chunk for a given index carries `id`/`type`/`function.name`, later /// chunks only carry fragments of `function.arguments`. - #[serde(default)] pub(crate) tool_calls: Option>, } +// Manual `Deserialize` for the same reason as `ResponseMessage`: a streaming +// delta that carries both `reasoning` and `reasoning_content` must not fail +// with `duplicate field`. They deserialize as distinct keys and fold into the +// canonical `reasoning_content` (canonical wins when both are present). +impl<'de> Deserialize<'de> for StreamDelta { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + #[derive(Deserialize)] + struct Shadow { + #[serde(default)] + content: Option, + #[serde(default)] + reasoning_content: Option, + #[serde(default)] + reasoning: Option, + #[serde(default)] + tool_calls: Option>, + } + + let shadow = Shadow::deserialize(deserializer)?; + Ok(StreamDelta { + content: shadow.content, + reasoning_content: shadow.reasoning_content.or(shadow.reasoning), + tool_calls: shadow.tool_calls, + }) + } +} + #[derive(Debug, Deserialize)] pub(crate) struct StreamToolCallDelta { /// Index of this tool call within the assistant message. Multiple