diff --git a/app/src/pages/onboarding/OnboardingLayout.tsx b/app/src/pages/onboarding/OnboardingLayout.tsx index edfd7d6edf..92b514b5bd 100644 --- a/app/src/pages/onboarding/OnboardingLayout.tsx +++ b/app/src/pages/onboarding/OnboardingLayout.tsx @@ -29,12 +29,26 @@ const OnboardingLayout = () => { }); try { + // Preserve a tool preference the user already customized (e.g. via + // Settings → Tools or an earlier onboarding run) rather than resetting + // to catalog defaults on every completion. Re-applying defaults here + // could silently narrow an existing selection. Only seed defaults when + // no preference has been persisted yet. The Rust-side filter + // (`filter_tools_by_user_preference`) is the authoritative guard against + // stale snapshots stripping newer tools (issue #3096); this is + // defense-in-depth on the write path. + const existingEnabledTools = snapshot.localState.onboardingTasks?.enabledTools; + const enabledTools = + existingEnabledTools && existingEnabledTools.length > 0 + ? existingEnabledTools + : getEnabledRustToolNames(getDefaultEnabledTools()); + await setOnboardingTasks({ accessibilityPermissionGranted: snapshot.localState.onboardingTasks?.accessibilityPermissionGranted ?? false, localModelConsentGiven: false, localModelDownloadStarted: false, - enabledTools: getEnabledRustToolNames(getDefaultEnabledTools()), + enabledTools, connectedSources: draft.connectedSources, updatedAtMs: Date.now(), }); diff --git a/app/src/pages/onboarding/__tests__/OnboardingLayout.test.tsx b/app/src/pages/onboarding/__tests__/OnboardingLayout.test.tsx index 9ff114a81e..6b0875e9cf 100644 --- a/app/src/pages/onboarding/__tests__/OnboardingLayout.test.tsx +++ b/app/src/pages/onboarding/__tests__/OnboardingLayout.test.tsx @@ -100,7 +100,7 @@ function buildStore() { }); } -async function setupLayout() { +async function setupLayout(onboardingTasks: unknown = null) { const { useCoreState } = await import('../../../providers/CoreStateProvider'); const mockSetOnboardingCompletedFlag = vi.fn().mockResolvedValue(undefined); @@ -114,7 +114,7 @@ async function setupLayout() { onboardingCompleted: false, chatOnboardingCompleted: false, analyticsEnabled: false, - localState: { encryptionKey: null, onboardingTasks: null, keyringConsent: null }, + localState: { encryptionKey: null, onboardingTasks, keyringConsent: null }, keyringStatus: { available: true, failureReason: null, @@ -254,4 +254,40 @@ describe('OnboardingLayout — Joyride walkthrough integration (#1123)', () => { warnSpy.mockRestore(); }); + + // [#3096] Tool preference persistence on completion. + it('seeds default enabledTools when no preference was persisted', async () => { + const { mockSetOnboardingTasks } = await setupLayout(null); + + await act(async () => { + fireEvent.click(screen.getByTestId('complete-btn')); + }); + + // Mocks: getDefaultEnabledTools() → [], getEnabledRustToolNames(x) → x. + expect(mockSetOnboardingTasks).toHaveBeenCalledWith( + expect.objectContaining({ enabledTools: [] }) + ); + }); + + it('preserves an existing customized enabledTools list instead of resetting to defaults', async () => { + const existing = ['shell', 'cron_add', 'cron_list']; + const { mockSetOnboardingTasks } = await setupLayout({ + accessibilityPermissionGranted: false, + localModelConsentGiven: false, + localModelDownloadStarted: false, + enabledTools: existing, + connectedSources: [], + updatedAtMs: 1, + }); + + await act(async () => { + fireEvent.click(screen.getByTestId('complete-btn')); + }); + + // The user's prior selection must be carried through verbatim — completing + // onboarding must never silently narrow an already-customized tool list. + expect(mockSetOnboardingTasks).toHaveBeenCalledWith( + expect.objectContaining({ enabledTools: existing }) + ); + }); }); diff --git a/src/openhuman/tools/user_filter.rs b/src/openhuman/tools/user_filter.rs index 7110017ded..6af19393a9 100644 --- a/src/openhuman/tools/user_filter.rs +++ b/src/openhuman/tools/user_filter.rs @@ -1,27 +1,108 @@ use std::collections::HashSet; +/// A user-toggleable tool *family*: one UI toggle that controls a set of Rust +/// tool `name()` values, plus whether that family ships enabled by default. +/// +/// `default_enabled` mirrors the frontend tool catalog +/// (`app/src/utils/toolDefinitions.ts` `defaultEnabled`). It is the key to the +/// additive-safe behaviour in [`filter_tools_by_user_preference`]: a default-ON +/// family that is *absent* from a persisted snapshot is treated as a baseline +/// capability and retained (so a stale snapshot written before the family +/// existed cannot silently disable it — issue #3096), whereas a default-OFF +/// family stays opt-in and is stripped unless explicitly enabled. +struct ToolFamily { + /// UI toggle ID stored in app state. + id: &'static str, + /// Rust tool `name()` values this family controls. + rust_names: &'static [&'static str], + /// Whether this family is enabled by default (catalog parity). + default_enabled: bool, +} + /// Maps UI-level tool toggle IDs (stored in app state) to the Rust tool /// `name()` values they control. Tools not covered by any mapping entry /// are always retained — only tools that appear here are filterable. -const TOOL_ID_TO_RUST_NAMES: &[(&str, &[&str])] = &[ - ("shell", &["shell"]), - ("detect_tools", &["detect_tools"]), - ("install_tool", &["install_tool"]), - ("git_operations", &["git_operations"]), - ("file_read", &["file_read", "read_diff", "csv_export"]), - ("file_write", &["file_write", "update_memory_md"]), - ("screenshot", &["screenshot"]), - ("image_info", &["image_info"]), - ("browser_open", &["browser_open"]), - ("browser", &["browser"]), - ("http_request", &["http_request"]), - ("web_search", &["web_search_tool"]), - ("memory_store", &["memory_store"]), - ("memory_recall", &["memory_recall"]), - ("memory_forget", &["memory_forget"]), - ( - "cron", - &[ +const TOOL_FAMILIES: &[ToolFamily] = &[ + ToolFamily { + id: "shell", + rust_names: &["shell"], + default_enabled: true, + }, + // detect_tools / install_tool are filterable but not surfaced in the + // default-ON catalog, so they stay opt-in (default-OFF). + ToolFamily { + id: "detect_tools", + rust_names: &["detect_tools"], + default_enabled: false, + }, + ToolFamily { + id: "install_tool", + rust_names: &["install_tool"], + default_enabled: false, + }, + ToolFamily { + id: "git_operations", + rust_names: &["git_operations"], + default_enabled: true, + }, + ToolFamily { + id: "file_read", + rust_names: &["file_read", "read_diff", "csv_export"], + default_enabled: true, + }, + ToolFamily { + id: "file_write", + rust_names: &["file_write", "update_memory_md"], + default_enabled: true, + }, + ToolFamily { + id: "screenshot", + rust_names: &["screenshot"], + default_enabled: true, + }, + ToolFamily { + id: "image_info", + rust_names: &["image_info"], + default_enabled: true, + }, + ToolFamily { + id: "browser_open", + rust_names: &["browser_open"], + default_enabled: false, + }, + ToolFamily { + id: "browser", + rust_names: &["browser"], + default_enabled: false, + }, + ToolFamily { + id: "http_request", + rust_names: &["http_request"], + default_enabled: false, + }, + ToolFamily { + id: "web_search", + rust_names: &["web_search_tool"], + default_enabled: true, + }, + ToolFamily { + id: "memory_store", + rust_names: &["memory_store"], + default_enabled: true, + }, + ToolFamily { + id: "memory_recall", + rust_names: &["memory_recall"], + default_enabled: true, + }, + ToolFamily { + id: "memory_forget", + rust_names: &["memory_forget"], + default_enabled: true, + }, + ToolFamily { + id: "cron", + rust_names: &[ "cron_add", "cron_list", "cron_remove", @@ -29,28 +110,43 @@ const TOOL_ID_TO_RUST_NAMES: &[(&str, &[&str])] = &[ "cron_run", "cron_runs", ], - ), - ("schedule", &["schedule"]), + default_enabled: true, + }, + ToolFamily { + id: "schedule", + rust_names: &["schedule"], + default_enabled: true, + }, // Self-update tools (issue #1435). Filterable so the onboarding // tool-toggle surface can default them off and let users opt in. // `update_check` is read-only; `update_apply` is gated by both the // tool-level autonomy check and `config.update.rpc_mutations_enabled`. - ("update", &["update_check", "update_apply"]), + ToolFamily { + id: "update", + rust_names: &["update_check", "update_apply"], + default_enabled: false, + }, // Knowledge & memory — overextending tools (agent-tool expansion). Listed // so onboarding can default them OFF; read/bounded-write siblings are not // listed and stay always-retained. - ( - "people_refresh_address_book", - &["people_refresh_address_book"], - ), - ( - "skill_manage", - &["skill_create", "skill_install_from_url", "skill_uninstall"], - ), - ("thread_destructive", &["thread_delete", "thread_purge_all"]), - ( - "billing_writes", - &[ + ToolFamily { + id: "people_refresh_address_book", + rust_names: &["people_refresh_address_book"], + default_enabled: false, + }, + ToolFamily { + id: "skill_manage", + rust_names: &["skill_create", "skill_install_from_url", "skill_uninstall"], + default_enabled: false, + }, + ToolFamily { + id: "thread_destructive", + rust_names: &["thread_delete", "thread_purge_all"], + default_enabled: false, + }, + ToolFamily { + id: "billing_writes", + rust_names: &[ "billing_purchase_plan", "billing_top_up_credits", "billing_create_coinbase_charge", @@ -60,10 +156,11 @@ const TOOL_ID_TO_RUST_NAMES: &[(&str, &[&str])] = &[ "billing_redeem_coupon", "billing_update_auto_recharge", ], - ), - ( - "team_admin", - &[ + default_enabled: false, + }, + ToolFamily { + id: "team_admin", + rust_names: &[ "team_create", "team_update", "team_delete", @@ -75,10 +172,11 @@ const TOOL_ID_TO_RUST_NAMES: &[(&str, &[&str])] = &[ "team_remove_member", "team_change_member_role", ], - ), - ( - "service_lifecycle", - &[ + default_enabled: false, + }, + ToolFamily { + id: "service_lifecycle", + rust_names: &[ "service_start", "service_stop", "service_restart", @@ -87,29 +185,33 @@ const TOOL_ID_TO_RUST_NAMES: &[(&str, &[&str])] = &[ "service_uninstall", "daemon_host_prefs_set", ], - ), - ( - "screen_permissions", - &[ + default_enabled: false, + }, + ToolFamily { + id: "screen_permissions", + rust_names: &[ "screen_intelligence_request_permissions", "screen_intelligence_request_permission", ], - ), - ( - "mcp_manage", - &["mcp_registry_install", "mcp_registry_uninstall"], - ), - ( - "workspace_manage", - &[ + default_enabled: false, + }, + ToolFamily { + id: "mcp_manage", + rust_names: &["mcp_registry_install", "mcp_registry_uninstall"], + default_enabled: false, + }, + ToolFamily { + id: "workspace_manage", + rust_names: &[ "workspace_update_persona", "workspace_reset_persona", "workspace_init", ], - ), - ( - "learning_manage", - &[ + default_enabled: false, + }, + ToolFamily { + id: "learning_manage", + rust_names: &[ "learning_update_facet", "learning_pin_facet", "learning_unpin_facet", @@ -119,38 +221,57 @@ const TOOL_ID_TO_RUST_NAMES: &[(&str, &[&str])] = &[ "learning_save_profile", "learning_enrich_profile", ], - ), + default_enabled: false, + }, // Task & workflow productivity — overextending tools (agent-tool // expansion). Only the destructive/persistent-config mutators are listed // here so the onboarding toggle surface can default them OFF and let users // opt in; the read-only + bounded-write siblings (e.g. `artifact_list`, // `todo_add`, `task_source_fetch`) are intentionally NOT listed, so they // are always-retained infrastructure. Grouped one toggle per risk family. - ("agent_workflow_uninstall", &["agent_workflow_uninstall"]), - ("artifact_delete", &["artifact_delete"]), - ( - "todo_destructive", - &["todo_remove", "todo_replace", "todo_clear"], - ), - ( - "task_source_manage", - &[ + ToolFamily { + id: "agent_workflow_uninstall", + rust_names: &["agent_workflow_uninstall"], + default_enabled: false, + }, + ToolFamily { + id: "artifact_delete", + rust_names: &["artifact_delete"], + default_enabled: false, + }, + ToolFamily { + id: "todo_destructive", + rust_names: &["todo_remove", "todo_replace", "todo_clear"], + default_enabled: false, + }, + ToolFamily { + id: "task_source_manage", + rust_names: &[ "task_source_add", "task_source_update", "task_source_remove", ], - ), + default_enabled: false, + }, ]; /// All Rust tool names that are filterable (union of all mapping values). /// Any tool whose name is NOT in this set is infrastructure and always retained. fn all_filterable_tool_names() -> HashSet<&'static str> { - TOOL_ID_TO_RUST_NAMES + TOOL_FAMILIES .iter() - .flat_map(|(_, names)| names.iter().copied()) + .flat_map(|fam| fam.rust_names.iter().copied()) .collect() } +/// Resolve a Rust tool `name()` to the [`ToolFamily`] that owns it. Returns +/// `None` for infrastructure tools that no mapping entry covers. +fn family_for_rust_name(name: &str) -> Option<&'static ToolFamily> { + TOOL_FAMILIES + .iter() + .find(|fam| fam.rust_names.iter().any(|n| *n == name)) +} + /// Expand persisted tool-preference entries into Rust tool `name()` values. /// /// Accepts both formats we may find in app state: @@ -161,16 +282,16 @@ fn all_filterable_tool_names() -> HashSet<&'static str> { fn expand_enabled_tool_names(enabled_tool_names: &[String]) -> HashSet { let mut expanded = HashSet::new(); for entry in enabled_tool_names { - if let Some((_, rust_names)) = TOOL_ID_TO_RUST_NAMES.iter().find(|(id, _)| id == &entry) { - for name in *rust_names { + if let Some(fam) = TOOL_FAMILIES.iter().find(|fam| fam.id == entry) { + for name in fam.rust_names { expanded.insert((*name).to_string()); } continue; } - if TOOL_ID_TO_RUST_NAMES + if TOOL_FAMILIES .iter() - .flat_map(|(_, names)| names.iter().copied()) + .flat_map(|fam| fam.rust_names.iter().copied()) .any(|name| name == entry) { expanded.insert(entry.clone()); @@ -180,10 +301,31 @@ fn expand_enabled_tool_names(enabled_tool_names: &[String]) -> HashSet { } /// Given the list of enabled tools from app state, retain only tools that are -/// either infrastructure (not filterable) or explicitly enabled. +/// either infrastructure (not filterable), explicitly enabled, or a default-ON +/// capability the snapshot never explicitly opted out of. /// /// An empty `enabled_tool_names` list means "all enabled" (default / not yet /// configured) — the filter is a no-op in that case. +/// +/// ## Additive-safe (issue #3096) +/// +/// The persisted `enabled_tool_names` is a *frozen snapshot* of the user's tool +/// choices, written once by the frontend (onboarding / Settings → Tools) from +/// the catalog that existed at that moment. Treating it as a strict exhaustive +/// allowlist silently disables any **default-ON** family that was added to the +/// catalog *after* the snapshot was written — the #3096 symptom, where the +/// agent reported it lacked `cron_add` because an older snapshot predated the +/// cron family. +/// +/// Fix: an absent filterable tool is stripped **only when its family is +/// default-OFF** (an opt-in capability the user never enabled). Default-ON +/// families are baseline capabilities and are retained even when absent, so a +/// stale snapshot can never silently remove a tool the user never saw. The +/// trade-off is that a default-ON family cannot be turned off purely by its +/// absence from the snapshot — a deliberate decision: silently breaking a +/// baseline capability (scheduled tasks) is far worse than keeping it +/// available. Default-OFF opt-in gating (the ~160 overextending tools from +/// #3050) is unchanged. pub(crate) fn filter_tools_by_user_preference( tools: &mut Vec>, enabled_tool_names: &[String], @@ -210,7 +352,27 @@ pub(crate) fn filter_tools_by_user_preference( if !filterable.contains(name) { return true; } - allowed.contains(name) + // Explicitly enabled by the snapshot. + if allowed.contains(name) { + return true; + } + // Filterable + not explicitly enabled. Default-ON families are baseline + // capabilities: retain them so a stale/partial snapshot can never + // silently disable a tool (#3096). Default-OFF families stay opt-in and + // are stripped. + match family_for_rust_name(name) { + Some(fam) if fam.default_enabled => { + log::debug!( + "[tool-filter] retaining default-ON '{}' (family '{}' absent from persisted allowlist; baseline capability)", + name, + fam.id + ); + true + } + Some(_) => false, + // Defensive: filterable name with no resolvable family — retain. + None => true, + } }); let after = tools.len(); @@ -226,7 +388,9 @@ pub(crate) fn filter_tools_by_user_preference( #[cfg(test)] mod tests { - use super::expand_enabled_tool_names; + use super::{expand_enabled_tool_names, filter_tools_by_user_preference}; + use crate::openhuman::tools::traits::{Tool, ToolResult}; + use async_trait::async_trait; #[test] fn expands_legacy_ui_toggle_ids_to_rust_tool_names() { @@ -249,4 +413,139 @@ mod tests { let allowed = expand_enabled_tool_names(&["totally_unknown".to_string()]); assert!(allowed.is_empty()); } + + /// Minimal name-only tool stub so the filter (which only reads `name()`) + /// can be exercised without constructing real tool implementations. + struct FakeTool(&'static str); + + #[async_trait] + impl Tool for FakeTool { + fn name(&self) -> &str { + self.0 + } + fn description(&self) -> &str { + "fake" + } + fn parameters_schema(&self) -> serde_json::Value { + serde_json::json!({}) + } + async fn execute(&self, _args: serde_json::Value) -> anyhow::Result { + Ok(ToolResult::success("ok")) + } + } + + fn names(tools: &[Box]) -> Vec<&str> { + tools.iter().map(|t| t.name()).collect() + } + + fn tools(names: &[&'static str]) -> Vec> { + names + .iter() + .map(|n| Box::new(FakeTool(n)) as Box) + .collect() + } + + #[test] + fn empty_preference_list_is_a_noop() { + let mut t = tools(&["cron_add", "shell", "file_read"]); + filter_tools_by_user_preference(&mut t, &[]); + assert_eq!(names(&t).len(), 3); + } + + /// Regression for #3096: a non-empty snapshot that never references the + /// cron family (e.g. written by an older build whose catalog predated the + /// cron family) must NOT strip cron tools — cron is default-ON, a baseline + /// capability, so it is retained. + #[test] + fn retains_cron_when_snapshot_predates_cron_family() { + let mut t = tools(&["cron_add", "cron_list", "web_search_tool", "shell"]); + // Snapshot only references web_search; the cron family is absent. + filter_tools_by_user_preference(&mut t, &["web_search_tool".to_string()]); + let kept = names(&t); + assert!( + kept.contains(&"cron_add"), + "cron_add must survive a cron-less snapshot" + ); + assert!( + kept.contains(&"cron_list"), + "cron_list must survive a cron-less snapshot" + ); + assert!(kept.contains(&"web_search_tool")); + // Infrastructure tool (not in any mapping) is always retained. + assert!(kept.contains(&"shell")); + } + + /// A default-ON sibling absent from a cron-aware snapshot is still retained + /// (default-ON families are baseline capabilities, not absence-disabled). + #[test] + fn retains_default_on_cron_sibling_even_when_family_referenced() { + let mut t = tools(&["cron_add", "cron_list", "shell"]); + // Snapshot references the cron family via cron_list but omits cron_add. + filter_tools_by_user_preference(&mut t, &["cron_list".to_string()]); + let kept = names(&t); + assert!(kept.contains(&"cron_list")); + assert!( + kept.contains(&"cron_add"), + "default-ON cron_add is a baseline capability" + ); + assert!(kept.contains(&"shell")); + } + + /// Default-OFF families stay opt-in: absent from the snapshot ⇒ stripped. + /// This is the opt-in gating the overextending tools (#3050) rely on. + #[test] + fn strips_default_off_family_when_not_opted_in() { + let mut t = tools(&["service_start", "service_stop", "file_read", "cron_add"]); + // Snapshot references only file_read (a default-ON family). + filter_tools_by_user_preference(&mut t, &["file_read".to_string()]); + let kept = names(&t); + assert!( + !kept.contains(&"service_start"), + "default-OFF service_start must be stripped" + ); + assert!( + !kept.contains(&"service_stop"), + "default-OFF service_stop must be stripped" + ); + assert!( + kept.contains(&"file_read"), + "explicitly enabled file_read stays" + ); + assert!( + kept.contains(&"cron_add"), + "default-ON cron_add stays even when absent" + ); + } + + /// Explicitly opting into a default-OFF family retains it. + #[test] + fn retains_default_off_family_when_opted_in() { + let mut t = tools(&["service_start", "service_stop", "file_read"]); + filter_tools_by_user_preference(&mut t, &["service_lifecycle".to_string()]); + let kept = names(&t); + assert!(kept.contains(&"service_start")); + assert!(kept.contains(&"service_stop")); + } + + /// The legacy UI toggle ID form expands to the whole family. + #[test] + fn ui_toggle_id_enables_whole_cron_family() { + let mut t = tools(&["cron_add", "cron_list", "cron_remove", "service_start"]); + filter_tools_by_user_preference(&mut t, &["cron".to_string()]); + let kept = names(&t); + assert!(kept.contains(&"cron_add")); + assert!(kept.contains(&"cron_list")); + assert!(kept.contains(&"cron_remove")); + // service_start (default-OFF) not opted in → stripped. + assert!(!kept.contains(&"service_start")); + } + + /// A list whose entries match no known UI ID or tool name yields an empty + /// allowed set, tripping the safety fallback that leaves tools unfiltered. + #[test] + fn unrecognized_only_list_leaves_tools_unfiltered() { + let mut t = tools(&["cron_add", "service_start"]); + filter_tools_by_user_preference(&mut t, &["totally_unknown".to_string()]); + assert_eq!(names(&t).len(), 2); + } }