Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 86 additions & 12 deletions src/openhuman/agent/harness/engine/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,33 @@ use super::super::parse::build_native_assistant_history;
use super::super::run_queue::RunQueue;
use super::super::session::transcript::{self, MessageUsage, TurnUsage};
use super::super::token_budget::trim_chat_messages_to_budget;
use super::super::tool_loop::{RepeatFailureGuard, RepeatOutputGuard, STREAM_CHUNK_MIN_CHARS};
use super::super::tool_loop::{
RepeatCallGuard, RepeatFailureGuard, RepeatOutputGuard, STREAM_CHUNK_MIN_CHARS,
};
use super::checkpoint::CheckpointStrategy;
use super::parser::ResponseParser;
use super::progress::ProgressReporter;
use super::state::TurnObserver;
use super::tool_source::ToolSource;

/// Why a turn ended. Both `Halted` and `Cap` produce a summary `text` but mean
/// the task did NOT reach its goal — stateful callers (notably the sub-agent
/// runner) use this to tell a genuine finish apart from a stuck stop, instead of
/// having to sniff the summary prose.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum TurnStop {
/// The model returned a final response with no tool calls — a real finish.
Final,
/// A circuit breaker halted the run (repeated identical call / repeated
/// output / repeated failure). `text` is the no-progress halt summary.
Halted,
/// The iteration cap was reached; `text` is the checkpoint summary.
Cap,
/// An early-exit tool (e.g. `ask_user_clarification`) requested the stop;
/// the tool name is in [`TurnEngineOutcome::early_exit_tool`].
EarlyExit,
}

fn transcript_turn_usage(
provider: &str,
model: &str,
Expand Down Expand Up @@ -78,11 +98,10 @@ pub(crate) struct TurnEngineOutcome {
pub text: String,
pub iterations: u32,
pub cost: TurnCost,
/// True when the turn stopped because it hit the iteration cap (the
/// `CheckpointStrategy` produced `text`), false for a normal final response
/// or an early circuit-breaker halt. `Agent::turn` keys its checkpoint-only
/// history/transcript handling off this.
pub hit_cap: bool,
/// Why the turn ended. `Agent::turn` keys its checkpoint-only
/// history/transcript handling off `Cap`; the sub-agent runner maps
/// `Halted`/`Cap` to an incomplete status.
pub stop: TurnStop,
/// When set, the turn exited early because a specific tool requested
/// it (e.g. `ask_user_clarification` inside a sub-agent). The tool
/// result is in `text`. Callers use this to propagate pause semantics
Expand Down Expand Up @@ -216,6 +235,11 @@ pub(crate) async fn run_turn_engine(
// response + tool call across iterations even when each call "succeeds"
// (the gap left by the failure guard + per-generation frequency_penalty).
let mut repeat_guard = RepeatOutputGuard::new();
// Repeated-CALL breaker — trips when the model re-issues the identical
// `(tool, args)` batch back-to-back even when each call SUCCEEDS (the gap
// left by the failure guard, which resets on success, and the repeat-output
// guard, which also keys on narration text so varied prose evades it).
let mut call_guard = RepeatCallGuard::new();
let mut halt_reason: Option<String> = None;
for iteration in 0..max_iterations {
progress
Expand Down Expand Up @@ -758,17 +782,31 @@ pub(crate) async fn run_turn_engine(
text: final_out,
iterations: (iteration + 1) as u32,
cost: turn_cost,
hit_cap: false,
stop: TurnStop::Final,
early_exit_tool: None,
});
}

// Polling/wait tools (e.g. `wait_subagent`) are contractually re-invoked
// with identical args + narration on each timeout while the work is still
// running, so an all-poll batch is legitimate progress, not a no-progress
// repeat. Exempt them from the no-progress breakers: reset the
// repeat-output guard here and skip its check, and skip the
// post-execution repeat-call guard below (Codex P1 on #4230). The
// iteration cap + cost budget still bound the wait.
let all_poll_exempt = tool_calls
.iter()
.all(|c| super::super::tool_loop::is_repeat_call_exempt(&c.name));
if all_poll_exempt {
repeat_guard.reset();
}

// No-progress narration breaker: if this iteration's assistant output
// (text + tool-call name/args) is byte-identical to the previous N in a
// row, the run is stuck re-issuing the same step. Halt with a summary
// rather than grinding to the iteration cap. Checked BEFORE executing
// the (repeated) tool call so we don't burn another no-op iteration.
{
if !all_poll_exempt {
let mut sig = response_text.trim().to_string();
for call in &tool_calls {
sig.push('\u{1}');
Expand Down Expand Up @@ -813,7 +851,7 @@ pub(crate) async fn run_turn_engine(
text: reason,
iterations: (iteration + 1) as u32,
cost: turn_cost,
hit_cap: false,
stop: TurnStop::Halted,
early_exit_tool: None,
});
}
Expand All @@ -831,6 +869,11 @@ pub(crate) async fn run_turn_engine(
let mut tool_results = String::new();
let mut individual_results: Vec<String> = Vec::new();
let mut early_exit_tool: Option<String> = None;
// Tracks whether every executed call this iteration succeeded — feeds the
// success-gated repeat-call breaker below (#4095). Failures are the
// failure guard's domain (with its per-class thresholds), so a batch with
// any failure resets the repeat-call streak instead of counting it.
let mut all_calls_succeeded = true;
for (call_idx, call) in tool_calls.iter().enumerate() {
// Stable id threaded through the start/complete pair. The fallback
// includes `call_idx` to stay unique when the same tool name
Expand All @@ -846,6 +889,9 @@ pub(crate) async fn run_turn_engine(
.await;

individual_results.push(outcome.text.clone());
if !outcome.success {
all_calls_succeeded = false;
}
let _ = writeln!(
tool_results,
"<tool_result name=\"{}\">\n{}\n</tool_result>",
Expand Down Expand Up @@ -1022,11 +1068,39 @@ pub(crate) async fn run_turn_engine(
text: exit_text,
iterations: (iteration + 1) as u32,
cost: turn_cost,
hit_cap: false,
stop: TurnStop::EarlyExit,
early_exit_tool,
});
}

// Repeat-call breaker for SUCCESSFUL no-op loops (#4095). The failure
// guard above owns repeated *failures* (with per-class thresholds) and
// resets on success, so an identical call that keeps SUCCEEDING with no
// new result slips past it. Count it here — post-execution and gated on
// success — so failing batches stay the failure guard's domain and
// poll/wait tools (`wait_subagent`) stay exempt. Sets `halt_reason`,
// handled just below; tool results are already in `history`.
if halt_reason.is_none() {
if all_poll_exempt || !all_calls_succeeded {
call_guard.reset();
} else {
let mut call_sig = String::new();
for call in &tool_calls {
call_sig.push('\u{1}');
call_sig.push_str(&call.name);
call_sig.push('\u{1}');
call_sig.push_str(&call.arguments.to_string());
}
if let Some(reason) = call_guard.record(&call_sig) {
tracing::warn!(
iteration,
"[agent_loop] repeat-call circuit breaker tripped — identical successful (tool,args) batch repeated; halting with no-progress summary"
);
halt_reason = Some(reason);
}
}
}

// Circuit breaker tripped this iteration: return the root-cause summary
// instead of looping to `max_iterations`. Tool results are already in
// `history`, so the caller still has full context.
Expand All @@ -1038,7 +1112,7 @@ pub(crate) async fn run_turn_engine(
text: reason,
iterations: (iteration + 1) as u32,
cost: turn_cost,
hit_cap: false,
stop: TurnStop::Halted,
early_exit_tool: None,
});
}
Expand Down Expand Up @@ -1067,7 +1141,7 @@ pub(crate) async fn run_turn_engine(
text: co.text,
iterations: max_iterations as u32,
cost: turn_cost,
hit_cap: true,
stop: TurnStop::Cap,
early_exit_tool: None,
})
}
Expand Down
Loading
Loading