diff --git a/prompts/en/fragments/tool_use_enforcement.md.j2 b/prompts/en/fragments/tool_use_enforcement.md.j2 new file mode 100644 index 000000000..c1d7ff2aa --- /dev/null +++ b/prompts/en/fragments/tool_use_enforcement.md.j2 @@ -0,0 +1,7 @@ +## Tool-Use Enforcement + +You MUST use your tools to take action — do not describe what you would do or plan to do without actually doing it. When you say you will perform an action (e.g. "I will run the tests", "Let me check the file", "I will create the project"), you MUST immediately make the corresponding tool call in the same response. Never end your turn with a promise of future action — execute it now. + +Keep working until the task is actually complete. Do not stop with a summary of what you plan to do next time. If you have tools available that can accomplish the task, use them instead of telling the user what you would do. + +Every response should either (a) contain tool calls that make progress, or (b) deliver a final result to the user. Responses that only describe intentions without acting are not acceptable. diff --git a/src/agent/channel.rs b/src/agent/channel.rs index ad9932f81..637d959eb 100644 --- a/src/agent/channel.rs +++ b/src/agent/channel.rs @@ -1637,7 +1637,11 @@ impl Channel { } }; - prompt_engine.render_channel_prompt_with_links( + let routing = rc.routing.load(); + let model_name = routing.resolve(ProcessType::Channel, None).to_string(); + let tool_use_enforcement = rc.tool_use_enforcement.load(); + + let system_prompt = prompt_engine.render_channel_prompt_with_links( empty_to_none(identity_context), memory_bulletin_text, empty_to_none(skills_prompt), @@ -1653,6 +1657,12 @@ impl Channel { self.backfill_transcript.clone(), empty_to_none(working_memory), empty_to_none(channel_activity_map), + )?; + + prompt_engine.maybe_append_tool_use_enforcement( + system_prompt, + tool_use_enforcement.as_ref(), + &model_name, ) } @@ -2346,8 +2356,11 @@ impl Channel { }; let empty_to_none = |s: String| if s.is_empty() { None } else { Some(s) }; + let routing = rc.routing.load(); + let model_name = routing.resolve(ProcessType::Channel, None).to_string(); + let tool_use_enforcement = rc.tool_use_enforcement.load(); - prompt_engine.render_channel_prompt_with_links( + let system_prompt = prompt_engine.render_channel_prompt_with_links( empty_to_none(identity_context), memory_bulletin_text, empty_to_none(skills_prompt), @@ -2363,6 +2376,12 @@ impl Channel { self.backfill_transcript.clone(), empty_to_none(working_memory), empty_to_none(channel_activity_map), + )?; + + prompt_engine.maybe_append_tool_use_enforcement( + system_prompt, + tool_use_enforcement.as_ref(), + &model_name, ) } diff --git a/src/agent/channel_dispatch.rs b/src/agent/channel_dispatch.rs index f08cb5a36..b4a884f46 100644 --- a/src/agent/channel_dispatch.rs +++ b/src/agent/channel_dispatch.rs @@ -11,7 +11,7 @@ use crate::agent::worker::Worker; use crate::conversation::settings::{WorkerContextMode, WorkerHistoryMode}; use crate::error::{AgentError, Error as SpacebotError}; use crate::tools::{BranchToolProfile, MemoryPersistenceContractState}; -use crate::{AgentDeps, BranchId, ChannelId, ProcessEvent, WorkerId}; +use crate::{AgentDeps, BranchId, ChannelId, ProcessEvent, ProcessType, WorkerId}; use futures::FutureExt as _; use std::sync::Arc; use tokio::sync::broadcast; @@ -127,11 +127,21 @@ pub async fn spawn_branch_from_state( let description = description.into(); let rc = &state.deps.runtime_config; let prompt_engine = rc.prompts.load(); + let routing = rc.routing.load(); + let model_name = routing.resolve(ProcessType::Branch, None).to_string(); + let tool_use_enforcement = rc.tool_use_enforcement.load(); let system_prompt = prompt_engine .render_branch_prompt( &rc.instance_dir.display().to_string(), &rc.workspace_dir.display().to_string(), ) + .and_then(|prompt| { + prompt_engine.maybe_append_tool_use_enforcement( + prompt, + tool_use_enforcement.as_ref(), + &model_name, + ) + }) .map_err(|e| AgentError::Other(anyhow::anyhow!("{e}")))?; spawn_branch( @@ -160,8 +170,18 @@ pub(crate) async fn spawn_memory_persistence_branch( let contract_state = Arc::new(MemoryPersistenceContractState::default()); let prompt_engine = deps.runtime_config.prompts.load(); + let routing = deps.runtime_config.routing.load(); + let model_name = routing.resolve(ProcessType::Branch, None).to_string(); + let tool_use_enforcement = deps.runtime_config.tool_use_enforcement.load(); let system_prompt = prompt_engine .render_static("memory_persistence") + .and_then(|prompt| { + prompt_engine.maybe_append_tool_use_enforcement( + prompt, + tool_use_enforcement.as_ref(), + &model_name, + ) + }) .map_err(|e| AgentError::Other(anyhow::anyhow!("{e}")))?; let prompt = prompt_engine .render_system_memory_persistence() @@ -481,6 +501,9 @@ async fn spawn_worker_inner( }; let browser_config = (**rc.browser_config.load()).clone(); + let routing = rc.routing.load(); + let model_name = routing.resolve(ProcessType::Worker, None).to_string(); + let tool_use_enforcement = rc.tool_use_enforcement.load(); let worker_system_prompt = prompt_engine .render_worker_prompt( &rc.instance_dir.display().to_string(), @@ -500,7 +523,7 @@ async fn spawn_worker_inner( // Append skills listing to worker system prompt. Suggested skills are // flagged so the worker knows the channel's intent, but it can read any // skill it decides is relevant via the read_skill tool. - let mut system_prompt = match skills.render_worker_skills(suggested_skills, &prompt_engine) { + let system_prompt = match skills.render_worker_skills(suggested_skills, &prompt_engine) { Ok(skills_prompt) if !skills_prompt.is_empty() => { format!("{worker_system_prompt}\n\n{skills_prompt}") } @@ -511,6 +534,16 @@ async fn spawn_worker_inner( } }; + // Append tool-use enforcement after skills so it's the last instruction + // in the preamble ("last instruction wins"). + let mut system_prompt = prompt_engine + .maybe_append_tool_use_enforcement( + system_prompt, + tool_use_enforcement.as_ref(), + &model_name, + ) + .map_err(|e| AgentError::Other(anyhow::anyhow!("{e}")))?; + // Inject memory context based on worker_context settings if worker_context.memory.ambient_enabled() { // Get knowledge synthesis and working memory @@ -1141,6 +1174,9 @@ pub async fn resume_idle_worker_into_state( None => Vec::new(), }; let browser_config = (**rc.browser_config.load()).clone(); + let routing = rc.routing.load(); + let model_name = routing.resolve(ProcessType::Worker, None).to_string(); + let tool_use_enforcement = rc.tool_use_enforcement.load(); let system_prompt = prompt_engine .render_worker_prompt( &rc.instance_dir.display().to_string(), @@ -1153,6 +1189,13 @@ pub async fn resume_idle_worker_into_state( browser_config.persist_session, worker_status_text, ) + .and_then(|prompt| { + prompt_engine.maybe_append_tool_use_enforcement( + prompt, + tool_use_enforcement.as_ref(), + &model_name, + ) + }) .map_err(|error| format!("failed to render worker prompt: {error}"))?; let brave_search_key = (**rc.brave_search_key.load()).clone(); diff --git a/src/agent/compactor.rs b/src/agent/compactor.rs index f6a5e3add..fb44ec86c 100644 --- a/src/agent/compactor.rs +++ b/src/agent/compactor.rs @@ -134,8 +134,10 @@ impl Compactor { let deps = self.deps.clone(); let model_override = self.model_override.clone(); let prompt_engine = deps.runtime_config.prompts.load(); + // The compactor is a toolless agent (summary-only), so tool-use + // enforcement is skipped — there are no tools to enforce. let compactor_prompt = match prompt_engine.render_static("compactor") { - Ok(p) => p, + Ok(prompt) => prompt, Err(error) => { tracing::error!(%error, "failed to render compactor prompt"); let mut flag = is_compacting.write().await; diff --git a/src/agent/cortex.rs b/src/agent/cortex.rs index 4afbf1448..226337dca 100644 --- a/src/agent/cortex.rs +++ b/src/agent/cortex.rs @@ -1497,8 +1497,21 @@ fn handle_cortex_receiver_result( pub fn spawn_cortex_loop(deps: AgentDeps, logger: CortexLogger) -> tokio::task::JoinHandle<()> { tokio::spawn(async move { let prompt_engine = deps.runtime_config.prompts.load(); + let routing = deps.runtime_config.routing.load(); + let model_name = routing.resolve(ProcessType::Cortex, None).to_string(); + let tool_use_enforcement = deps.runtime_config.tool_use_enforcement.load(); let system_prompt = match prompt_engine.render_static("cortex") { - Ok(prompt) => prompt, + Ok(prompt) => match prompt_engine.maybe_append_tool_use_enforcement( + prompt.clone(), + tool_use_enforcement.as_ref(), + &model_name, + ) { + Ok(prompt) => prompt, + Err(error) => { + tracing::warn!(%error, "failed to append tool-use enforcement, using base cortex prompt"); + prompt + } + }, Err(error) => { tracing::warn!(%error, "failed to render cortex prompt, using empty preamble"); String::new() @@ -3276,6 +3289,9 @@ async fn pickup_one_ready_task(deps: &AgentDeps, logger: &CortexLogger) -> anyho let current_time_line = temporal_context.current_time_line(); let worker_status_text = Some(system_info.render_for_worker(¤t_time_line)); + let routing = deps.runtime_config.routing.load(); + let model_name = routing.resolve(ProcessType::Worker, None).to_string(); + let tool_use_enforcement = deps.runtime_config.tool_use_enforcement.load(); let worker_system_prompt = prompt_engine .render_worker_prompt( &deps.runtime_config.instance_dir.display().to_string(), @@ -3288,6 +3304,13 @@ async fn pickup_one_ready_task(deps: &AgentDeps, logger: &CortexLogger) -> anyho browser_config.persist_session, worker_status_text, ) + .and_then(|prompt| { + prompt_engine.maybe_append_tool_use_enforcement( + prompt, + tool_use_enforcement.as_ref(), + &model_name, + ) + }) .map_err(|error| anyhow::anyhow!("failed to render worker prompt: {error}"))?; let mut task_prompt = format!("Execute task #{}: {}", task.task_number, task.title); diff --git a/src/agent/cortex_chat.rs b/src/agent/cortex_chat.rs index c1118048f..83ea43999 100644 --- a/src/agent/cortex_chat.rs +++ b/src/agent/cortex_chat.rs @@ -821,8 +821,11 @@ impl CortexChatSession { }; let empty_to_none = |s: String| if s.is_empty() { None } else { Some(s) }; + let routing = runtime_config.routing.load(); + let model_name = routing.resolve(ProcessType::Cortex, None).to_string(); + let tool_use_enforcement = runtime_config.tool_use_enforcement.load(); - prompt_engine.render_cortex_chat_prompt( + let system_prompt = prompt_engine.render_cortex_chat_prompt( empty_to_none(identity_context), empty_to_none(memory_bulletin.to_string()), channel_transcript, @@ -831,6 +834,12 @@ impl CortexChatSession { empty_to_none(runtime_config_snapshot), worker_capabilities, self.factory_enabled, + )?; + + prompt_engine.maybe_append_tool_use_enforcement( + system_prompt, + tool_use_enforcement.as_ref(), + &model_name, ) } diff --git a/src/agent/ingestion.rs b/src/agent/ingestion.rs index 7ed5f0c9e..e2c9e69b1 100644 --- a/src/agent/ingestion.rs +++ b/src/agent/ingestion.rs @@ -474,10 +474,14 @@ async fn process_chunk( deps: &AgentDeps, ) -> anyhow::Result<()> { let prompt_engine = deps.runtime_config.prompts.load(); - let ingestion_prompt = prompt_engine.render_static("ingestion")?; - let routing = deps.runtime_config.routing.load(); let model_name = routing.resolve(ProcessType::Branch, None).to_string(); + let tool_use_enforcement = deps.runtime_config.tool_use_enforcement.load(); + let ingestion_prompt = prompt_engine.maybe_append_tool_use_enforcement( + prompt_engine.render_static("ingestion")?, + tool_use_enforcement.as_ref(), + &model_name, + )?; let model = SpacebotModel::make(&deps.llm_manager, &model_name) .with_context(&*deps.agent_id, "branch") .with_worker_type("ingestion") @@ -526,11 +530,9 @@ async fn process_chunk( classify_chunk_prompt_result(result, filename, chunk_number, total_chunks)?; if !contract_state.has_terminal_outcome() { - tracing::warn!( - file = %filename, - chunk = %format!("{chunk_number}/{total_chunks}"), - "ingestion chunk completed without memory_persistence_complete signal" - ); + return Err(anyhow::anyhow!( + "ingestion chunk {chunk_number}/{total_chunks} for {filename} completed without memory_persistence_complete signal" + )); } Ok(()) diff --git a/src/api/agents.rs b/src/api/agents.rs index f4547a73d..90d790a56 100644 --- a/src/api/agents.rs +++ b/src/api/agents.rs @@ -709,6 +709,7 @@ pub async fn create_agent_internal( max_turns: None, branch_max_turns: None, context_window: None, + tool_use_enforcement: None, compaction: None, memory_persistence: None, coalesce: None, diff --git a/src/config.rs b/src/config.rs index db0e51b98..eb0891a9a 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2170,4 +2170,47 @@ command = "/usr/bin/test" // The mcp_servers data is silently dropped — verify it's not accessible assert!(parsed.defaults.mcp.is_empty()); } + + #[test] + fn tool_use_enforcement_parses_and_resolves() { + let toml = r#" +[defaults] +tool_use_enforcement = "always" + +[[agents]] +id = "main" +tool_use_enforcement = ["gemini", "deepseek"] +"#; + + let parsed: TomlConfig = toml::from_str(toml).expect("failed to parse test TOML"); + let config = Config::from_toml(parsed, PathBuf::from(".")).expect("failed to build Config"); + + assert_eq!( + config.defaults.tool_use_enforcement, + ToolUseEnforcement::Always + ); + assert_eq!( + config.agents[0].tool_use_enforcement, + Some(ToolUseEnforcement::Custom(vec![ + "gemini".to_string(), + "deepseek".to_string(), + ])) + ); + + let resolved = config.resolve_agents(); + assert_eq!( + resolved[0].tool_use_enforcement, + ToolUseEnforcement::Custom(vec!["gemini".to_string(), "deepseek".to_string()]) + ); + assert!( + resolved[0] + .tool_use_enforcement + .should_inject("google/gemini-2.5-pro") + ); + assert!( + !resolved[0] + .tool_use_enforcement + .should_inject("anthropic/claude-sonnet-4") + ); + } } diff --git a/src/config/load.rs b/src/config/load.rs index bf34c991e..e0bbf0b13 100644 --- a/src/config/load.rs +++ b/src/config/load.rs @@ -917,6 +917,7 @@ impl Config { max_turns: None, branch_max_turns: None, context_window: None, + tool_use_enforcement: None, compaction: None, memory_persistence: None, coalesce: None, @@ -1533,6 +1534,10 @@ impl Config { .defaults .context_window .unwrap_or(base_defaults.context_window), + tool_use_enforcement: toml + .defaults + .tool_use_enforcement + .unwrap_or_else(|| base_defaults.tool_use_enforcement.clone()), compaction: toml .defaults .compaction @@ -1781,6 +1786,7 @@ impl Config { max_turns: a.max_turns, branch_max_turns: a.branch_max_turns, context_window: a.context_window, + tool_use_enforcement: a.tool_use_enforcement, compaction: a.compaction.map(|c| CompactionConfig { background_threshold: c .background_threshold @@ -1922,6 +1928,7 @@ impl Config { max_turns: None, branch_max_turns: None, context_window: None, + tool_use_enforcement: None, compaction: None, memory_persistence: None, coalesce: None, diff --git a/src/config/runtime.rs b/src/config/runtime.rs index e91b780d4..065b14d1a 100644 --- a/src/config/runtime.rs +++ b/src/config/runtime.rs @@ -6,7 +6,8 @@ use arc_swap::ArcSwap; use super::{ BrowserConfig, ChannelConfig, CoalesceConfig, CompactionConfig, Config, CortexConfig, DefaultsConfig, IngestionConfig, McpServerConfig, MemoryPersistenceConfig, OpenCodeConfig, - ResolvedAgentConfig, WarmupConfig, WarmupStatus, WorkReadiness, evaluate_work_readiness, + ResolvedAgentConfig, ToolUseEnforcement, WarmupConfig, WarmupStatus, WorkReadiness, + evaluate_work_readiness, }; use crate::llm::routing::RoutingConfig; use crate::tools::browser::SharedBrowserHandle; @@ -34,6 +35,7 @@ pub struct RuntimeConfig { pub max_turns: ArcSwap, pub branch_max_turns: ArcSwap, pub context_window: ArcSwap, + pub tool_use_enforcement: ArcSwap, pub max_concurrent_branches: ArcSwap, pub max_concurrent_workers: ArcSwap, pub browser_config: ArcSwap, @@ -125,6 +127,7 @@ impl RuntimeConfig { max_turns: ArcSwap::from_pointee(agent_config.max_turns), branch_max_turns: ArcSwap::from_pointee(agent_config.branch_max_turns), context_window: ArcSwap::from_pointee(agent_config.context_window), + tool_use_enforcement: ArcSwap::from_pointee(agent_config.tool_use_enforcement.clone()), max_concurrent_branches: ArcSwap::from_pointee(agent_config.max_concurrent_branches), max_concurrent_workers: ArcSwap::from_pointee(agent_config.max_concurrent_workers), browser_config: ArcSwap::from_pointee(agent_config.browser.clone()), @@ -248,6 +251,8 @@ impl RuntimeConfig { self.branch_max_turns .store(Arc::new(resolved.branch_max_turns)); self.context_window.store(Arc::new(resolved.context_window)); + self.tool_use_enforcement + .store(Arc::new(resolved.tool_use_enforcement.clone())); self.max_concurrent_branches .store(Arc::new(resolved.max_concurrent_branches)); self.max_concurrent_workers diff --git a/src/config/toml_schema.rs b/src/config/toml_schema.rs index 5e19bb4d1..5ab153d86 100644 --- a/src/config/toml_schema.rs +++ b/src/config/toml_schema.rs @@ -1,5 +1,7 @@ // -- TOML deserialization types -- +use super::types::ToolUseEnforcement; + use serde::{Deserialize, Deserializer}; use std::collections::HashMap; @@ -282,6 +284,7 @@ pub(super) struct TomlDefaultsConfig { pub(super) max_turns: Option, pub(super) branch_max_turns: Option, pub(super) context_window: Option, + pub(super) tool_use_enforcement: Option, pub(super) compaction: Option, pub(super) memory_persistence: Option, pub(super) coalesce: Option, @@ -461,6 +464,7 @@ pub(super) struct TomlAgentConfig { pub(super) max_turns: Option, pub(super) branch_max_turns: Option, pub(super) context_window: Option, + pub(super) tool_use_enforcement: Option, pub(super) compaction: Option, pub(super) memory_persistence: Option, pub(super) coalesce: Option, diff --git a/src/config/types.rs b/src/config/types.rs index e4943bbb7..9bd9073e7 100644 --- a/src/config/types.rs +++ b/src/config/types.rs @@ -212,6 +212,78 @@ impl<'de> serde::Deserialize<'de> for ApiType { } } +/// Tool-use enforcement configuration for preventing models from describing +/// actions instead of calling tools. +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub enum ToolUseEnforcement { + /// Auto-detect based on model name (GPT/Codex models get enforcement). + #[default] + Auto, + /// Always inject tool-use enforcement guidance. + Always, + /// Never inject tool-use enforcement guidance. + Never, + /// Custom list of model name substrings to match. + Custom(Vec), +} + +impl ToolUseEnforcement { + /// Check if enforcement should be injected for the given model name. + pub fn should_inject(&self, model: &str) -> bool { + let model_lower = model.to_lowercase(); + match self { + Self::Auto => { + // Match GPT and Codex models by default + model_lower.contains("gpt") || model_lower.contains("codex") + } + Self::Always => true, + Self::Never => false, + Self::Custom(patterns) => patterns + .iter() + .any(|p| model_lower.contains(&p.to_lowercase())), + } + } +} + +impl<'de> serde::Deserialize<'de> for ToolUseEnforcement { + fn deserialize>( + deserializer: D, + ) -> std::result::Result { + use serde::de::Error; + let value = toml::Value::deserialize(deserializer)?; + match value { + toml::Value::String(s) => match s.to_lowercase().as_str() { + "auto" => Ok(Self::Auto), + "true" | "always" | "yes" | "on" => Ok(Self::Always), + "false" | "never" | "no" | "off" => Ok(Self::Never), + other => Err(D::Error::invalid_value( + serde::de::Unexpected::Str(other), + &"one of 'auto', 'true', 'false', 'always', 'never'", + )), + }, + toml::Value::Boolean(enabled) => Ok(if enabled { Self::Always } else { Self::Never }), + toml::Value::Array(arr) => { + let patterns: Vec = arr + .into_iter() + .map(|v| { + v.as_str().map(String::from).ok_or_else(|| { + D::Error::invalid_value( + serde::de::Unexpected::Other("non-string array element"), + &"array of strings", + ) + }) + }) + .collect::>()?; + Ok(Self::Custom(patterns)) + } + _ => Err(D::Error::invalid_value( + serde::de::Unexpected::Other("non-string/non-array value"), + &"string or array of strings", + )), + } + } +} + /// Configuration for a single LLM provider. #[derive(Clone, serde::Deserialize)] pub struct ProviderConfig { @@ -556,6 +628,9 @@ pub struct DefaultsConfig { pub user_timezone: Option, pub history_backfill_count: usize, pub cron: Vec, + /// Tool-use enforcement for preventing models from describing actions instead of calling tools. + /// "auto" (default) — matches GPT/Codex models; true — always inject; false — never inject. + pub tool_use_enforcement: ToolUseEnforcement, pub opencode: OpenCodeConfig, /// Worker log mode: "errors_only", "all_separate", or "all_combined". pub worker_log_mode: crate::settings::WorkerLogMode, @@ -589,6 +664,7 @@ impl std::fmt::Debug for DefaultsConfig { .field("user_timezone", &self.user_timezone) .field("history_backfill_count", &self.history_backfill_count) .field("cron", &self.cron) + .field("tool_use_enforcement", &self.tool_use_enforcement) .field("opencode", &self.opencode) .field("worker_log_mode", &self.worker_log_mode) .field("projects", &self.projects) @@ -1195,6 +1271,8 @@ pub struct AgentConfig { pub max_turns: Option, pub branch_max_turns: Option, pub context_window: Option, + /// Tool-use enforcement for preventing models from describing actions instead of calling tools. + pub tool_use_enforcement: Option, pub compaction: Option, pub memory_persistence: Option, pub coalesce: Option, @@ -1277,6 +1355,8 @@ pub struct ResolvedAgentConfig { /// Number of messages to fetch from the platform when a new channel is created. pub history_backfill_count: usize, pub cron: Vec, + /// Tool-use enforcement for preventing models from describing actions instead of calling tools. + pub tool_use_enforcement: ToolUseEnforcement, } impl Default for DefaultsConfig { @@ -1302,6 +1382,7 @@ impl Default for DefaultsConfig { user_timezone: None, history_backfill_count: 50, cron: Vec::new(), + tool_use_enforcement: ToolUseEnforcement::default(), opencode: OpenCodeConfig::default(), worker_log_mode: crate::settings::WorkerLogMode::default(), projects: ProjectsConfig::default(), @@ -1378,6 +1459,10 @@ impl AgentConfig { .unwrap_or_else(|| defaults.projects.clone()), history_backfill_count: defaults.history_backfill_count, cron: self.cron.clone(), + tool_use_enforcement: self + .tool_use_enforcement + .clone() + .unwrap_or_else(|| defaults.tool_use_enforcement.clone()), } } } diff --git a/src/prompts/engine.rs b/src/prompts/engine.rs index e6ce5aac5..3a7340ec8 100644 --- a/src/prompts/engine.rs +++ b/src/prompts/engine.rs @@ -164,6 +164,10 @@ impl PromptEngine { "fragments/system/tool_syntax_correction", crate::prompts::text::get("fragments/system/tool_syntax_correction"), )?; + env.add_template( + "fragments/tool_use_enforcement", + crate::prompts::text::get("fragments/tool_use_enforcement"), + )?; env.add_template( "fragments/coalesce_hint", crate::prompts::text::get("fragments/coalesce_hint"), @@ -215,6 +219,35 @@ impl PromptEngine { self.render(template_name, Value::UNDEFINED) } + /// Render the tool-use enforcement fragment. + pub fn render_tool_use_enforcement(&self) -> Result { + self.render_static("fragments/tool_use_enforcement") + } + + /// Append tool-use enforcement guidance when configured for the model. + pub fn maybe_append_tool_use_enforcement( + &self, + mut prompt: String, + tool_use_enforcement: &crate::config::ToolUseEnforcement, + model_name: &str, + ) -> Result { + if !tool_use_enforcement.should_inject(model_name) { + return Ok(prompt); + } + + let guidance = self.render_tool_use_enforcement()?; + let guidance = guidance.trim(); + if guidance.is_empty() { + return Ok(prompt); + } + + if !prompt.trim_end().is_empty() { + prompt.push_str("\n\n"); + } + prompt.push_str(guidance); + Ok(prompt) + } + /// Convenience method for rendering worker capabilities fragment. pub fn render_worker_capabilities( &self, @@ -740,4 +773,39 @@ pub struct ProjectWorktreeContext { } // All templates are now loaded from the centralized text registry (src/prompts/text.rs) + +#[cfg(test)] +mod tests { + use super::PromptEngine; + use crate::config::ToolUseEnforcement; + + #[test] + fn appends_tool_use_enforcement_for_matching_model() { + let engine = PromptEngine::new("en").expect("prompt engine should build"); + let prompt = engine + .maybe_append_tool_use_enforcement( + "Base prompt".to_string(), + &ToolUseEnforcement::Auto, + "openai/gpt-4.1", + ) + .expect("tool-use guidance should render"); + + assert!(prompt.contains("Base prompt")); + assert!(prompt.contains("Tool-Use Enforcement")); + } + + #[test] + fn skips_tool_use_enforcement_for_non_matching_model() { + let engine = PromptEngine::new("en").expect("prompt engine should build"); + let prompt = engine + .maybe_append_tool_use_enforcement( + "Base prompt".to_string(), + &ToolUseEnforcement::Auto, + "anthropic/claude-sonnet-4", + ) + .expect("tool-use guidance should render"); + + assert_eq!(prompt, "Base prompt"); + } +} // to support multiple languages at compile time. diff --git a/src/prompts/text.rs b/src/prompts/text.rs index 53122f42d..b05b1b10c 100644 --- a/src/prompts/text.rs +++ b/src/prompts/text.rs @@ -128,6 +128,9 @@ fn lookup(lang: &str, key: &str) -> &'static str { ("en", "fragments/system/tool_syntax_correction") => { include_str!("../../prompts/en/fragments/system/tool_syntax_correction.md.j2") } + ("en", "fragments/tool_use_enforcement") => { + include_str!("../../prompts/en/fragments/tool_use_enforcement.md.j2") + } // Agent Communication Fragments ("en", "fragments/org_context") => { include_str!("../../prompts/en/fragments/org_context.md.j2") diff --git a/src/tools/spawn_worker.rs b/src/tools/spawn_worker.rs index 180831c0a..fbb90a3c8 100644 --- a/src/tools/spawn_worker.rs +++ b/src/tools/spawn_worker.rs @@ -420,6 +420,11 @@ impl Tool for DetachedSpawnWorkerTool { }; let browser_config = (**rc.browser_config.load()).clone(); + let routing = rc.routing.load(); + let model_name = routing + .resolve(crate::ProcessType::Worker, None) + .to_string(); + let tool_use_enforcement = rc.tool_use_enforcement.load(); let worker_system_prompt = prompt_engine .render_worker_prompt( &rc.instance_dir.display().to_string(), @@ -432,6 +437,13 @@ impl Tool for DetachedSpawnWorkerTool { browser_config.persist_session, worker_status_text, ) + .and_then(|prompt| { + prompt_engine.maybe_append_tool_use_enforcement( + prompt, + tool_use_enforcement.as_ref(), + &model_name, + ) + }) .map_err(|error| { SpawnWorkerError(format!("failed to render worker prompt: {error}")) })?;