Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/apps/desktop/src/api/config_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,13 @@ pub async fn get_mode_configs(state: State<'_, AppState>) -> Result<Value, Strin
needs_save = true;
} else if let Some(config) = mode_configs.get_mut(&mode_id) {
config.default_tools = default_tools.clone();
// Migration: add ComputerUse to available_tools when the mode default includes it.
if default_tools.iter().any(|t| t == "ComputerUse")
&& !config.available_tools.iter().any(|t| t == "ComputerUse")
{
config.available_tools.push("ComputerUse".to_string());
needs_save = true;
}
// Migrate older Claw sessions that only allowlisted "ComputerUse" before split mouse tools existed;
// All desktop automation is now consolidated into ComputerUse.
// Remove any stale split tool names from available_tools.
Expand Down
118 changes: 95 additions & 23 deletions src/apps/desktop/src/computer_use/desktop_host.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1036,6 +1036,85 @@ end tell"#])
Ok(())
}

/// Ease 0..1 for pointer paths (smooth acceleration/deceleration).
fn smoothstep01(t: f64) -> f64 {
let t = t.clamp(0.0, 1.0);
t * t * (3.0 - 2.0 * t)
}

/// Move the pointer along a short visible path instead of warping in one event.
#[cfg(target_os = "macos")]
fn smooth_mouse_move_cg_global(x1: f64, y1: f64) -> BitFunResult<()> {
const MIN_DIST: f64 = 2.5;
const MIN_STEPS: usize = 8;
const MAX_STEPS: usize = 85;
const MAX_DURATION_MS: u64 = 400;

let (x0, y0) = macos::quartz_mouse_location().unwrap_or((x1, y1));
let dx = x1 - x0;
let dy = y1 - y0;
let dist = (dx * dx + dy * dy).sqrt();
if dist < MIN_DIST {
return Self::post_mouse_moved_cg_global(x1, y1);
}
let duration_ms = (70.0 + dist * 0.28).min(MAX_DURATION_MS as f64) as u64;
let steps = ((dist / 5.5).ceil() as usize).clamp(MIN_STEPS, MAX_STEPS);
let step_delay = Duration::from_millis((duration_ms / steps as u64).max(1));

for i in 1..=steps {
let t = i as f64 / steps as f64;
let te = Self::smoothstep01(t);
let x = x0 + dx * te;
let y = y0 + dy * te;
Self::post_mouse_moved_cg_global(x, y)?;
if i < steps {
std::thread::sleep(step_delay);
}
}
Ok(())
}

/// Windows/Linux: same smooth path using enigo absolute moves (single `Enigo` session).
#[cfg(not(target_os = "macos"))]
fn smooth_mouse_move_enigo_abs(x1: f64, y1: f64) -> BitFunResult<()> {
const MIN_DIST: f64 = 2.5;
const MIN_STEPS: usize = 8;
const MAX_STEPS: usize = 85;
const MAX_DURATION_MS: u64 = 400;

Self::run_enigo_job(|e| {
let (cx, cy) = e.location().map_err(|err| {
BitFunError::tool(format!("smooth_mouse_move: pointer location: {}", err))
})?;
let x0 = cx as f64;
let y0 = cy as f64;
let dx = x1 - x0;
let dy = y1 - y0;
let dist = (dx * dx + dy * dy).sqrt();
if dist < MIN_DIST {
return e
.move_mouse(x1.round() as i32, y1.round() as i32, Coordinate::Abs)
.map_err(|err| BitFunError::tool(format!("mouse_move: {}", err)));
}
let duration_ms = (70.0 + dist * 0.28).min(MAX_DURATION_MS as f64) as u64;
let steps = ((dist / 5.5).ceil() as usize).clamp(MIN_STEPS, MAX_STEPS);
let step_delay = Duration::from_millis((duration_ms / steps as u64).max(1));

for i in 1..=steps {
let t = i as f64 / steps as f64;
let te = Self::smoothstep01(t);
let x = x0 + dx * te;
let y = y0 + dy * te;
e.move_mouse(x.round() as i32, y.round() as i32, Coordinate::Abs)
.map_err(|err| BitFunError::tool(format!("mouse_move: {}", err)))?;
if i < steps {
std::thread::sleep(step_delay);
}
}
Ok(())
})
}

fn map_button(s: &str) -> BitFunResult<Button> {
match s.to_lowercase().as_str() {
"left" => Ok(Button::Left),
Expand Down Expand Up @@ -2313,30 +2392,19 @@ impl ComputerUseHost for DesktopComputerUseHost {
}

async fn mouse_move_global_f64(&self, gx: f64, gy: f64) -> BitFunResult<()> {
#[cfg(target_os = "macos")]
{
tokio::task::spawn_blocking(move || {
Self::run_enigo_job(|_| Self::post_mouse_moved_cg_global(gx, gy))
})
.await
.map_err(|e| BitFunError::tool(e.to_string()))??;
self.clear_vision_pixel_nudge_block();
ComputerUseHost::computer_use_after_pointer_mutation(self);
return Ok(());
}
#[cfg(not(target_os = "macos"))]
{
self.mouse_move(gx.round() as i32, gy.round() as i32).await
}
}

async fn mouse_move(&self, x: i32, y: i32) -> BitFunResult<()> {
debug!("computer_use: mouse_move absolute ({}, {})", x, y);
debug!(
"computer_use: mouse_move_global_f64 smooth target ({:.2}, {:.2})",
gx, gy
);
tokio::task::spawn_blocking(move || {
Self::run_enigo_job(|e| {
e.move_mouse(x, y, Coordinate::Abs)
.map_err(|err| BitFunError::tool(format!("mouse_move: {}", err)))
})
#[cfg(target_os = "macos")]
{
Self::run_enigo_job(|_| Self::smooth_mouse_move_cg_global(gx, gy))
}
#[cfg(not(target_os = "macos"))]
{
Self::smooth_mouse_move_enigo_abs(gx, gy)
}
})
.await
.map_err(|e| BitFunError::tool(e.to_string()))??;
Expand All @@ -2345,6 +2413,10 @@ impl ComputerUseHost for DesktopComputerUseHost {
Ok(())
}

async fn mouse_move(&self, x: i32, y: i32) -> BitFunResult<()> {
self.mouse_move_global_f64(x as f64, y as f64).await
}

async fn pointer_move_relative(&self, dx: i32, dy: i32) -> BitFunResult<()> {
if dx == 0 && dy == 0 {
return Ok(());
Expand Down
1 change: 1 addition & 0 deletions src/crates/core/src/agentic/agents/agentic_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ impl AgenticMode {
"AskUserQuestion".to_string(),
"Git".to_string(),
"TerminalControl".to_string(),
"ComputerUse".to_string(),
],
}
}
Expand Down
1 change: 1 addition & 0 deletions src/crates/core/src/agentic/agents/cowork_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ impl CoworkMode {
"Bash".to_string(),
"TerminalControl".to_string(),
"WebSearch".to_string(),
"ComputerUse".to_string(),
],
}
}
Expand Down
1 change: 1 addition & 0 deletions src/crates/core/src/agentic/agents/debug_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ Below is a snapshot of the current workspace's file structure.
"MermaidInteractive".to_string(),
"Log".to_string(),
"TerminalControl".to_string(),
"ComputerUse".to_string(),
]
}

Expand Down
1 change: 1 addition & 0 deletions src/crates/core/src/agentic/agents/init_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ impl InitAgent {
"Write".to_string(),
"Edit".to_string(),
"Bash".to_string(),
"ComputerUse".to_string(),
],
}
}
Expand Down
1 change: 1 addition & 0 deletions src/crates/core/src/agentic/agents/plan_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ impl PlanMode {
"Glob".to_string(),
"AskUserQuestion".to_string(),
"CreatePlan".to_string(),
"ComputerUse".to_string(),
],
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ pub struct PromptBuilderContext {
pub remote_execution: Option<RemoteExecutionHints>,
/// Pre-built tree text for `{PROJECT_LAYOUT}` when the workspace is not on the local disk.
pub remote_project_layout: Option<String>,
/// When `Some(false)`, system prompt append Computer use text-only guidance (no screenshot tool output).
pub supports_image_understanding: Option<bool>,
}

impl PromptBuilderContext {
Expand All @@ -56,9 +58,15 @@ impl PromptBuilderContext {
model_name,
remote_execution: None,
remote_project_layout: None,
supports_image_understanding: None,
}
}

pub fn with_supports_image_understanding(mut self, supports: bool) -> Self {
self.supports_image_understanding = Some(supports);
self
}

pub fn with_remote_prompt_overlay(
mut self,
execution: RemoteExecutionHints,
Expand Down Expand Up @@ -93,10 +101,10 @@ impl PromptBuilder {
let current_date = now.format("%Y-%m-%d").to_string();

let computer_use_keys = match host_os {
"macos" => "Computer use / `key_chord`: the **local BitFun desktop** is **macOS** — use `command`, `option`, `control`, `shift` (not Win/Linux modifier names). **System clipboard (prefer over long type_text):** command+a (select all), command+c (copy), command+x (cut), command+v (paste). Spotlight: command+space; switch app: command+tab.",
"windows" => "Computer use / `key_chord`: the **local BitFun desktop** is **Windows** — use `meta`/`super` for the Windows key, `alt`, `control`, `shift`. **System clipboard:** control+a/c/x/v. Start menu: meta; Alt+Tab for window switch.",
"linux" => "Computer use / `key_chord`: the **local BitFun desktop** is **Linux** — typically `control`, `alt`, `shift`, and sometimes `meta`/`super` depending on the desktop; match the user's session. **System clipboard:** usually control+a/c/x/v (confirm in-app menus if unsure).",
_ => "Computer use / `key_chord`: match modifier names to the **local BitFun desktop** OS below. Prefer standard clipboard chords before retyping long text.",
"macos" => "Computer use / `key_chord`: the **local BitFun desktop** is **macOS** — use `command`, `option`, `control`, `shift` (not Win/Linux modifier names). **ACTION PRIORITY:** 1) Terminal/CLI/system commands (use Bash tool for `osascript`, AppleScript, shell scripts) 2) Keyboard shortcuts: command+a/c/x/v (clipboard), command+space (Spotlight), command+tab (switch app) 3) UI control (AX/OCR/mouse) only when above fail.",
"windows" => "Computer use / `key_chord`: the **local BitFun desktop** is **Windows** — use `meta`/`super` for Windows key, `alt`, `control`, `shift`. **ACTION PRIORITY:** 1) Terminal/CLI/system commands (use Bash tool for PowerShell, cmd, scripts) 2) Keyboard shortcuts: control+a/c/x/v (clipboard), meta (Start menu), Alt+Tab (switch) 3) UI control only when above fail.",
"linux" => "Computer use / `key_chord`: the **local BitFun desktop** is **Linux** — typically `control`, `alt`, `shift`, and sometimes `meta`/`super`. **ACTION PRIORITY:** 1) Terminal/CLI/system commands (use Bash tool for shell scripts, system commands) 2) Keyboard shortcuts: control+a/c/x/v (clipboard) 3) UI control (AX/OCR/mouse) only when above fail.",
_ => "Computer use / `key_chord`: match modifier names to the **local BitFun desktop** OS below. **ACTION PRIORITY:** 1) Terminal/CLI/system commands first 2) Keyboard shortcuts second 3) UI control (mouse/OCR) last resort.",
};

if let Some(remote) = &self.context.remote_execution {
Expand Down Expand Up @@ -466,6 +474,16 @@ Do not read from, modify, create, move, or delete files outside this workspace u
result = result.replace(PLACEHOLDER_VISUAL_MODE, &visual_mode);
}

if self.context.supports_image_understanding == Some(false) {
result.push_str(
"\n\n# Computer use (text-only primary model)\n\n\
The configured **primary model does not accept image inputs**. When using **ComputerUse**:\n\
- **Do not** use **`screenshot`** or **`click_label`**.\n\
- **ACTION PRIORITY:** 1) Terminal/CLI/system commands (Bash tool) 2) Keyboard shortcuts (**`key_chord`**, **`type_text`**) 3) UI control: **`click_element`** (AX) → **`locate`** → **`move_to_text`** (use **`move_to_text_match_index`** when multiple OCR hits listed) → **`mouse_move`** (**`use_screen_coordinates`: true** with coordinates from tool JSON) → **`click`**.\n\
- **Never guess coordinates** — always use precise methods (AX, OCR, system coordinates from tool results).\n",
);
}

Ok(result.trim().to_string())
}
}
12 changes: 10 additions & 2 deletions src/crates/core/src/agentic/agents/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -385,10 +385,18 @@ impl AgentRegistry {
match entry.category {
AgentCategory::Mode => {
let mode_configs = get_mode_configs().await;
mode_configs
let mut tools = mode_configs
.get(agent_type)
.map(|config| config.available_tools.clone())
.unwrap_or_else(|| entry.agent.default_tools())
.unwrap_or_else(|| entry.agent.default_tools());
let defaults = entry.agent.default_tools();
const COMPUTER_USE: &str = "ComputerUse";
if defaults.iter().any(|t| t == COMPUTER_USE)
&& !tools.iter().any(|t| t == COMPUTER_USE)
{
tools.push(COMPUTER_USE.to_string());
}
tools
}
AgentCategory::SubAgent | AgentCategory::Hidden => entry.agent.default_tools(),
}
Expand Down
Loading
Loading