tinyhumansai · senamakel · Jun 1, 2026 · Jun 1, 2026 · sanil-23 · Jun 1, 2026
diff --git a/src/openhuman/inference/local/lm_studio.rs b/src/openhuman/inference/local/lm_studio.rs
@@ -187,7 +187,9 @@ pub(crate) struct LmStudioChatChoice {
 pub(crate) struct LmStudioChatResponseMessage {
     #[serde(default)]
     pub content: Option<String>,
-    #[serde(default)]
+    /// Local reasoning models expose chain-of-thought as `reasoning_content`
+    /// or `reasoning` depending on the runtime — accept both field names.
+    #[serde(default, alias = "reasoning")]
     pub reasoning_content: Option<String>,
 }
 
@@ -291,4 +293,13 @@ mod tests {
         };
         assert_eq!(msg.effective_content(), "Visible reply");
     }
+
+    #[test]
+    fn reasoning_content_accepts_reasoning_alias() {
+        // Local runtimes that name the field `reasoning` must still be captured
+        // (issue #3094) so reasoning round-trips like the canonical field.
+        let msg: LmStudioChatResponseMessage =
+            serde_json::from_str(r#"{"content":null,"reasoning":"local cot"}"#).unwrap();
+        assert_eq!(msg.reasoning_content.as_deref(), Some("local cot"));
+    }
 }
diff --git a/src/openhuman/inference/provider/compatible.rs b/src/openhuman/inference/provider/compatible.rs
@@ -417,10 +417,18 @@ impl OpenAiCompatibleProvider {
                                         })
                                         .collect::<Vec<_>>();
 
-                                    let content = value
-                                        .get("content")
-                                        .and_then(serde_json::Value::as_str)
-                                        .map(ToString::to_string);
+                                    // Default to empty string (not None) for
+                                    // tool-call assistant messages so the wire
+                                    // emits `"content":""` rather than omitting
+                                    // the key — some providers reject a missing
+                                    // content alongside reasoning_content.
+                                    let content = Some(
+                                        value
+                                            .get("content")
+                                            .and_then(serde_json::Value::as_str)
+                                            .unwrap_or("")
+                                            .to_string(),
+                                    );
 
                                     // Replay the assistant's reasoning so
                                     // DeepSeek thinking mode accepts the
@@ -556,6 +564,14 @@ impl OpenAiCompatibleProvider {
                 pruned_calls += before - kept.len();
                 let kept_ids: HashSet<String> = kept.iter().filter_map(|c| c.id.clone()).collect();
                 msg.tool_calls = if kept.is_empty() { None } else { Some(kept) };
+                // Strip reasoning_content when the message collapses to plain
+                // text (no surviving tool_calls). Thinking-mode providers
+                // (DeepSeek) require reasoning only on tool-call assistant
+                // messages; a stale reasoning_content on a non-tool-call
+                // message is at best ignored and at worst a malformed shape.
+                if msg.tool_calls.is_none() {
+                    msg.reasoning_content = None;
+                }
                 out.push(msg);
 
                 // Emit the run's responses that map to a surviving call; drop the
@@ -1637,6 +1653,13 @@ impl Provider for OpenAiCompatibleProvider {
             })
             .collect::<Vec<_>>();
 
+        tracing::debug!(
+            has_reasoning_content = reasoning_content.is_some(),
+            reasoning_content_chars = reasoning_content.as_ref().map_or(0, |r| r.chars().count()),
+            tool_calls = tool_calls.len(),
+            "[provider:chat] reasoning_content capture (non-streaming)"
+        );
+
         Ok(ProviderChatResponse {
             text,
             tool_calls,

diff --git a/src/openhuman/inference/provider/compatible_tests.rs b/src/openhuman/inference/provider/compatible_tests.rs
@@ -1060,17 +1060,21 @@ fn tool_invariants_drop_orphan_but_keep_following_cycle() {
 /// the wire message.
 #[test]
 fn convert_preserves_reasoning_content_on_tool_call_turn() {
-    let input = vec![ChatMessage::assistant(
-        r#"{"content":null,"reasoning_content":"let me think about this","tool_calls":[{"id":"call_x","name":"shell","arguments":"{}"}]}"#,
-    )];
+    let input = vec![
+        ChatMessage::assistant(
+            r#"{"content":null,"reasoning_content":"let me think about this","tool_calls":[{"id":"call_x","name":"shell","arguments":"{}"}]}"#,
+        ),
+        ChatMessage::tool(r#"{"tool_call_id":"call_x","content":"result"}"#),
+    ];
 
     let converted = OpenAiCompatibleProvider::convert_messages_for_native(&input);
 
-    assert_eq!(converted.len(), 1);
+    // First message is the assistant with tool_calls + reasoning.
     assert_eq!(
         converted[0].reasoning_content.as_deref(),
         Some("let me think about this")
     );
+    assert!(converted[0].tool_calls.is_some());
 
     // The wire payload must actually carry the field for DeepSeek to accept it.
     let wire = serde_json::to_value(&converted[0]).unwrap();
@@ -1098,6 +1102,72 @@ fn convert_omits_reasoning_content_when_absent() {
     );
 }
 
+/// Tool-call assistant messages with no narrative text must emit `"content":""`
+/// on the wire (not omit the key) so providers that validate the presence of a
+/// content field alongside reasoning_content don't reject the request.
+#[test]
+fn convert_tool_call_turn_emits_content_key_even_when_empty() {
+    let input = vec![
+        ChatMessage::assistant(
+            r#"{"content":null,"reasoning_content":"thinking","tool_calls":[{"id":"call_a","name":"web_fetch","arguments":"{}"}]}"#,
+        ),
+        ChatMessage::tool(r#"{"tool_call_id":"call_a","content":"fetched"}"#),
+    ];
+
+    let converted = OpenAiCompatibleProvider::convert_messages_for_native(&input);
+    let wire = serde_json::to_value(&converted[0]).unwrap();
+
+    assert!(
+        wire.get("content").is_some(),
+        "content key must be present on the wire even when the model emitted null/empty content"
+    );
+    assert_eq!(wire["content"], "");
+    assert_eq!(wire["reasoning_content"], "thinking");
+}
+
+/// When `enforce_tool_message_invariants` collapses an assistant tool-call
+/// message to plain text (all tool_calls pruned because no responses matched),
+/// it must also clear `reasoning_content` — leaving stale reasoning on a
+/// non-tool assistant message is a malformed shape for thinking-mode providers.
+#[test]
+fn enforce_invariants_clears_reasoning_when_assistant_collapses_to_text() {
+    let messages = vec![
+        NativeMessage {
+            role: "assistant".to_string(),
+            content: Some("partial thought".to_string()),
+            tool_call_id: None,
+            tool_calls: Some(vec![ToolCall {
+                id: Some("orphan_call".to_string()),
+                kind: Some("function".to_string()),
+                function: Some(Function {
+                    name: Some("web_fetch".to_string()),
+                    arguments: Some(serde_json::Value::String("{}".to_string())),
+                }),
+            }]),
+            reasoning_content: Some("deep reasoning".to_string()),
+        },
+        // No tool result follows — the tool_calls are orphaned.
+        NativeMessage {
+            role: "user".to_string(),
+            content: Some("next question".to_string()),
+            tool_call_id: None,
+            tool_calls: None,
+            reasoning_content: None,
+        },
+    ];
+
+    let sanitized = OpenAiCompatibleProvider::enforce_tool_message_invariants(messages);
+
+    // The assistant message should have been collapsed: tool_calls pruned.
+    let assistant = &sanitized[0];
+    assert!(assistant.tool_calls.is_none());
+    // reasoning_content must also be cleared on collapse.
+    assert!(
+        assistant.reasoning_content.is_none(),
+        "reasoning_content must be stripped when tool_calls are pruned to avoid malformed shape"
+    );
+}
+
 #[test]
 fn chat_message_identity_metadata_is_not_provider_wire_payload() {
     let message = ChatMessage {
@@ -1604,6 +1674,71 @@ fn reasoning_content_ignored_by_normal_models() {
     assert_eq!(msg.effective_content(), "Hello from Venice!");
 }
 
+// ----------------------------------------------------------
+// `reasoning` field-name alias (issue #3094)
+//
+// DeepSeek/Qwen3/GLM-4 emit chain-of-thought as `reasoning_content`, but
+// OpenRouter and vLLM/SGLang-backed OpenAI-compatible proxies emit it as
+// `reasoning`. If we only deserialize `reasoning_content`, a third-party
+// thinking-mode provider that uses `reasoning` is captured as `None`, so the
+// CoT is never replayed on the follow-up tool-call turn and the provider
+// rejects the request with `400 The reasoning_content in the thinking mode
+// must be passed back to the API`. The `#[serde(alias = "reasoning")]` makes
+// both field names map to the same captured value.
+// ----------------------------------------------------------
+
+#[test]
+fn reasoning_alias_captured_from_response_message() {
+    let json = r#"{"choices":[{"message":{"content":null,"reasoning":"weighing the options"}}]}"#;
+    let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
+    let msg = &resp.choices[0].message;
+    assert_eq!(
+        msg.reasoning_content.as_deref(),
+        Some("weighing the options")
+    );
+}
+
+#[test]
+fn reasoning_content_canonical_field_still_wins_over_alias_absence() {
+    // The canonical `reasoning_content` field keeps working unchanged.
+    let json = r#"{"choices":[{"message":{"content":null,"reasoning_content":"canonical cot"}}]}"#;
+    let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
+    let msg = &resp.choices[0].message;
+    assert_eq!(msg.reasoning_content.as_deref(), Some("canonical cot"));
+}
+
+#[test]
+fn reasoning_alias_captured_in_stream_delta() {
+    let json = r#"{"choices":[{"delta":{"reasoning":"streamed cot"},"finish_reason":null}]}"#;
+    let chunk: StreamChunkResponse = serde_json::from_str(json).unwrap();
+    assert_eq!(
+        chunk.choices[0].delta.reasoning_content.as_deref(),
+        Some("streamed cot")
+    );
+}
+
+/// End-to-end: a tool-call turn whose reasoning arrived under the `reasoning`
+/// alias must still be surfaced by `parse_native_response` so the agent loop
+/// can replay it on the follow-up request (the issue #3094 failure path).
+#[test]
+fn parse_native_response_captures_reasoning_from_alias() {
+    let json = r#"{
+        "choices":[{"message":{
+            "content":null,
+            "reasoning":"  let me think about this  ",
+            "tool_calls":[{"id":"call_z","type":"function","function":{"name":"web_fetch","arguments":"{}"}}]
+        }}]
+    }"#;
+    let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
+    let parsed = OpenAiCompatibleProvider::parse_native_response(resp, "deepseek").unwrap();
+    assert_eq!(parsed.tool_calls.len(), 1);
+    assert_eq!(
+        parsed.reasoning_content.as_deref(),
+        Some("let me think about this"),
+        "reasoning captured via the `reasoning` alias must be available to replay"
+    );
+}
+
 // ----------------------------------------------------------
 // SSE streaming reasoning_content fallback tests
 // ----------------------------------------------------------

diff --git a/src/openhuman/inference/provider/compatible_types.rs b/src/openhuman/inference/provider/compatible_types.rs
@@ -173,9 +173,14 @@ pub(crate) struct OpenHumanBilling {
 pub(crate) struct ResponseMessage {
     #[serde(default)]
     pub(crate) content: Option<String>,
-    /// Reasoning/thinking models (e.g. Qwen3, GLM-4) may return their output
-    /// in `reasoning_content` instead of `content`. Used as automatic fallback.
-    #[serde(default)]
+    /// Reasoning/thinking models may return their chain-of-thought in a
+    /// dedicated field instead of (or alongside) `content`. DeepSeek, Qwen3 and
+    /// GLM-4 name it `reasoning_content`; OpenRouter and vLLM/SGLang-backed
+    /// OpenAI-compatible proxies emit it as `reasoning`. Accept both so the CoT
+    /// is captured regardless of the (third-party) provider's field name — it
+    /// must be echoed back verbatim on tool-call turns or thinking models reject
+    /// the follow-up request with HTTP 400.
+    #[serde(default, alias = "reasoning")]
     pub(crate) reasoning_content: Option<String>,
     #[serde(default)]
     pub(crate) tool_calls: Option<Vec<ToolCall>>,
@@ -278,8 +283,10 @@ pub(crate) struct StreamChoice {
 pub(crate) struct StreamDelta {
     #[serde(default)]
     pub(crate) content: Option<String>,
-    /// Reasoning/thinking models may stream output via `reasoning_content`.
-    #[serde(default)]
+    /// Reasoning/thinking models may stream their chain-of-thought via
+    /// `reasoning_content` (DeepSeek/Qwen3/GLM-4) or `reasoning`
+    /// (OpenRouter, vLLM/SGLang proxies). Accept both delta field names.
+    #[serde(default, alias = "reasoning")]
     pub(crate) reasoning_content: Option<String>,
     /// Native tool-call chunks. Each entry is keyed by `index`; the first
     /// chunk for a given index carries `id`/`type`/`function.name`, later