Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion src/openhuman/inference/local/lm_studio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,9 @@ pub(crate) struct LmStudioChatChoice {
pub(crate) struct LmStudioChatResponseMessage {
#[serde(default)]
pub content: Option<String>,
#[serde(default)]
/// Local reasoning models expose chain-of-thought as `reasoning_content`
/// or `reasoning` depending on the runtime — accept both field names.
#[serde(default, alias = "reasoning")]
pub reasoning_content: Option<String>,
}

Expand Down Expand Up @@ -291,4 +293,13 @@ mod tests {
};
assert_eq!(msg.effective_content(), "Visible reply");
}

#[test]
fn reasoning_content_accepts_reasoning_alias() {
// Local runtimes that name the field `reasoning` must still be captured
// (issue #3094) so reasoning round-trips like the canonical field.
let msg: LmStudioChatResponseMessage =
serde_json::from_str(r#"{"content":null,"reasoning":"local cot"}"#).unwrap();
assert_eq!(msg.reasoning_content.as_deref(), Some("local cot"));
}
}
31 changes: 27 additions & 4 deletions src/openhuman/inference/provider/compatible.rs
Original file line number Diff line number Diff line change
Expand Up @@ -417,10 +417,18 @@ impl OpenAiCompatibleProvider {
})
.collect::<Vec<_>>();

let content = value
.get("content")
.and_then(serde_json::Value::as_str)
.map(ToString::to_string);
// Default to empty string (not None) for
// tool-call assistant messages so the wire
// emits `"content":""` rather than omitting
// the key — some providers reject a missing
// content alongside reasoning_content.
let content = Some(
value
.get("content")
.and_then(serde_json::Value::as_str)
.unwrap_or("")
.to_string(),
);

// Replay the assistant's reasoning so
// DeepSeek thinking mode accepts the
Expand Down Expand Up @@ -556,6 +564,14 @@ impl OpenAiCompatibleProvider {
pruned_calls += before - kept.len();
let kept_ids: HashSet<String> = kept.iter().filter_map(|c| c.id.clone()).collect();
msg.tool_calls = if kept.is_empty() { None } else { Some(kept) };
// Strip reasoning_content when the message collapses to plain
// text (no surviving tool_calls). Thinking-mode providers
// (DeepSeek) require reasoning only on tool-call assistant
// messages; a stale reasoning_content on a non-tool-call
// message is at best ignored and at worst a malformed shape.
if msg.tool_calls.is_none() {
msg.reasoning_content = None;
}
out.push(msg);

// Emit the run's responses that map to a surviving call; drop the
Expand Down Expand Up @@ -1637,6 +1653,13 @@ impl Provider for OpenAiCompatibleProvider {
})
.collect::<Vec<_>>();

tracing::debug!(
has_reasoning_content = reasoning_content.is_some(),
reasoning_content_chars = reasoning_content.as_ref().map_or(0, |r| r.chars().count()),
tool_calls = tool_calls.len(),
"[provider:chat] reasoning_content capture (non-streaming)"
);

Ok(ProviderChatResponse {
text,
tool_calls,
Expand Down
143 changes: 139 additions & 4 deletions src/openhuman/inference/provider/compatible_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1060,17 +1060,21 @@ fn tool_invariants_drop_orphan_but_keep_following_cycle() {
/// the wire message.
#[test]
fn convert_preserves_reasoning_content_on_tool_call_turn() {
let input = vec![ChatMessage::assistant(
r#"{"content":null,"reasoning_content":"let me think about this","tool_calls":[{"id":"call_x","name":"shell","arguments":"{}"}]}"#,
)];
let input = vec![
ChatMessage::assistant(
r#"{"content":null,"reasoning_content":"let me think about this","tool_calls":[{"id":"call_x","name":"shell","arguments":"{}"}]}"#,
),
ChatMessage::tool(r#"{"tool_call_id":"call_x","content":"result"}"#),
];

let converted = OpenAiCompatibleProvider::convert_messages_for_native(&input);

assert_eq!(converted.len(), 1);
// First message is the assistant with tool_calls + reasoning.
assert_eq!(
converted[0].reasoning_content.as_deref(),
Some("let me think about this")
);
assert!(converted[0].tool_calls.is_some());

// The wire payload must actually carry the field for DeepSeek to accept it.
let wire = serde_json::to_value(&converted[0]).unwrap();
Expand Down Expand Up @@ -1098,6 +1102,72 @@ fn convert_omits_reasoning_content_when_absent() {
);
}

/// Tool-call assistant messages with no narrative text must emit `"content":""`
/// on the wire (not omit the key) so providers that validate the presence of a
/// content field alongside reasoning_content don't reject the request.
#[test]
fn convert_tool_call_turn_emits_content_key_even_when_empty() {
let input = vec![
ChatMessage::assistant(
r#"{"content":null,"reasoning_content":"thinking","tool_calls":[{"id":"call_a","name":"web_fetch","arguments":"{}"}]}"#,
),
ChatMessage::tool(r#"{"tool_call_id":"call_a","content":"fetched"}"#),
];

let converted = OpenAiCompatibleProvider::convert_messages_for_native(&input);
let wire = serde_json::to_value(&converted[0]).unwrap();

assert!(
wire.get("content").is_some(),
"content key must be present on the wire even when the model emitted null/empty content"
);
assert_eq!(wire["content"], "");
assert_eq!(wire["reasoning_content"], "thinking");
}

/// When `enforce_tool_message_invariants` collapses an assistant tool-call
/// message to plain text (all tool_calls pruned because no responses matched),
/// it must also clear `reasoning_content` — leaving stale reasoning on a
/// non-tool assistant message is a malformed shape for thinking-mode providers.
#[test]
fn enforce_invariants_clears_reasoning_when_assistant_collapses_to_text() {
let messages = vec![
NativeMessage {
role: "assistant".to_string(),
content: Some("partial thought".to_string()),
tool_call_id: None,
tool_calls: Some(vec![ToolCall {
id: Some("orphan_call".to_string()),
kind: Some("function".to_string()),
function: Some(Function {
name: Some("web_fetch".to_string()),
arguments: Some(serde_json::Value::String("{}".to_string())),
}),
}]),
reasoning_content: Some("deep reasoning".to_string()),
},
// No tool result follows — the tool_calls are orphaned.
NativeMessage {
role: "user".to_string(),
content: Some("next question".to_string()),
tool_call_id: None,
tool_calls: None,
reasoning_content: None,
},
];

let sanitized = OpenAiCompatibleProvider::enforce_tool_message_invariants(messages);

// The assistant message should have been collapsed: tool_calls pruned.
let assistant = &sanitized[0];
assert!(assistant.tool_calls.is_none());
// reasoning_content must also be cleared on collapse.
assert!(
assistant.reasoning_content.is_none(),
"reasoning_content must be stripped when tool_calls are pruned to avoid malformed shape"
);
}

#[test]
fn chat_message_identity_metadata_is_not_provider_wire_payload() {
let message = ChatMessage {
Expand Down Expand Up @@ -1604,6 +1674,71 @@ fn reasoning_content_ignored_by_normal_models() {
assert_eq!(msg.effective_content(), "Hello from Venice!");
}

// ----------------------------------------------------------
// `reasoning` field-name alias (issue #3094)
//
// DeepSeek/Qwen3/GLM-4 emit chain-of-thought as `reasoning_content`, but
// OpenRouter and vLLM/SGLang-backed OpenAI-compatible proxies emit it as
// `reasoning`. If we only deserialize `reasoning_content`, a third-party
// thinking-mode provider that uses `reasoning` is captured as `None`, so the
// CoT is never replayed on the follow-up tool-call turn and the provider
// rejects the request with `400 The reasoning_content in the thinking mode
// must be passed back to the API`. The `#[serde(alias = "reasoning")]` makes
// both field names map to the same captured value.
// ----------------------------------------------------------

#[test]
fn reasoning_alias_captured_from_response_message() {
let json = r#"{"choices":[{"message":{"content":null,"reasoning":"weighing the options"}}]}"#;
let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
let msg = &resp.choices[0].message;
assert_eq!(
msg.reasoning_content.as_deref(),
Some("weighing the options")
);
}

#[test]
fn reasoning_content_canonical_field_still_wins_over_alias_absence() {
// The canonical `reasoning_content` field keeps working unchanged.
let json = r#"{"choices":[{"message":{"content":null,"reasoning_content":"canonical cot"}}]}"#;
let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
let msg = &resp.choices[0].message;
assert_eq!(msg.reasoning_content.as_deref(), Some("canonical cot"));
}

#[test]
fn reasoning_alias_captured_in_stream_delta() {
let json = r#"{"choices":[{"delta":{"reasoning":"streamed cot"},"finish_reason":null}]}"#;
let chunk: StreamChunkResponse = serde_json::from_str(json).unwrap();
assert_eq!(
chunk.choices[0].delta.reasoning_content.as_deref(),
Some("streamed cot")
);
}

/// End-to-end: a tool-call turn whose reasoning arrived under the `reasoning`
/// alias must still be surfaced by `parse_native_response` so the agent loop
/// can replay it on the follow-up request (the issue #3094 failure path).
#[test]
fn parse_native_response_captures_reasoning_from_alias() {
let json = r#"{
"choices":[{"message":{
"content":null,
"reasoning":" let me think about this ",
"tool_calls":[{"id":"call_z","type":"function","function":{"name":"web_fetch","arguments":"{}"}}]
}}]
}"#;
let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
let parsed = OpenAiCompatibleProvider::parse_native_response(resp, "deepseek").unwrap();
assert_eq!(parsed.tool_calls.len(), 1);
assert_eq!(
parsed.reasoning_content.as_deref(),
Some("let me think about this"),
"reasoning captured via the `reasoning` alias must be available to replay"
);
}

// ----------------------------------------------------------
// SSE streaming reasoning_content fallback tests
// ----------------------------------------------------------
Expand Down
17 changes: 12 additions & 5 deletions src/openhuman/inference/provider/compatible_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,14 @@ pub(crate) struct OpenHumanBilling {
pub(crate) struct ResponseMessage {
#[serde(default)]
pub(crate) content: Option<String>,
/// Reasoning/thinking models (e.g. Qwen3, GLM-4) may return their output
/// in `reasoning_content` instead of `content`. Used as automatic fallback.
#[serde(default)]
/// Reasoning/thinking models may return their chain-of-thought in a
/// dedicated field instead of (or alongside) `content`. DeepSeek, Qwen3 and
/// GLM-4 name it `reasoning_content`; OpenRouter and vLLM/SGLang-backed
/// OpenAI-compatible proxies emit it as `reasoning`. Accept both so the CoT
/// is captured regardless of the (third-party) provider's field name — it
/// must be echoed back verbatim on tool-call turns or thinking models reject
/// the follow-up request with HTTP 400.
#[serde(default, alias = "reasoning")]

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor robustness note, non-blocking: #[serde(alias = "reasoning")] makes serde treat reasoning and reasoning_content as the same field. If a proxy ever emits both keys in one message (some pass-through setups echo the canonical field and add their own), serde_json will now hard-error with duplicate field reasoning_content instead of silently ignoring the extra key as it did before — so this could swap one failure mode for another on those (rare) providers. Probably fine to ship given how uncommon that shape is, but if you want to be defensive, a small custom deserializer that prefers reasoning_content and falls back to reasoning would sidestep the duplicate-field path entirely. Same applies to the StreamDelta and LmStudioChatResponseMessage aliases.

pub(crate) reasoning_content: Option<String>,
#[serde(default)]
pub(crate) tool_calls: Option<Vec<ToolCall>>,
Expand Down Expand Up @@ -278,8 +283,10 @@ pub(crate) struct StreamChoice {
pub(crate) struct StreamDelta {
#[serde(default)]
pub(crate) content: Option<String>,
/// Reasoning/thinking models may stream output via `reasoning_content`.
#[serde(default)]
/// Reasoning/thinking models may stream their chain-of-thought via
/// `reasoning_content` (DeepSeek/Qwen3/GLM-4) or `reasoning`
/// (OpenRouter, vLLM/SGLang proxies). Accept both delta field names.
#[serde(default, alias = "reasoning")]
pub(crate) reasoning_content: Option<String>,
/// Native tool-call chunks. Each entry is keyed by `index`; the first
/// chunk for a given index carries `id`/`type`/`function.name`, later
Expand Down
Loading