diff --git a/src/crates/core/src/agentic/tools/implementations/glob_tool.rs b/src/crates/core/src/agentic/tools/implementations/glob_tool.rs index 081d9190..c0487806 100644 --- a/src/crates/core/src/agentic/tools/implementations/glob_tool.rs +++ b/src/crates/core/src/agentic/tools/implementations/glob_tool.rs @@ -3,31 +3,12 @@ use crate::util::errors::{BitFunError, BitFunResult}; use async_trait::async_trait; use globset::{GlobBuilder, GlobMatcher}; use ignore::WalkBuilder; -use log::warn; +use log::{info, warn}; use serde_json::{json, Value}; use std::cmp::Ordering; use std::collections::BinaryHeap; use std::path::{Component, Path, PathBuf}; - -#[derive(Debug, Clone, Eq, PartialEq)] -struct GlobCandidate { - depth: usize, - path: String, -} - -impl Ord for GlobCandidate { - fn cmp(&self, other: &Self) -> Ordering { - self.depth - .cmp(&other.depth) - .then_with(|| self.path.cmp(&other.path)) - } -} - -impl PartialOrd for GlobCandidate { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} +use std::process::Command; fn extract_glob_base_directory(pattern: &str) -> (String, String) { let glob_start = pattern.find(['*', '?', '[', '{']); @@ -78,6 +59,30 @@ fn normalize_path(path: &Path) -> String { dunce::simplified(path).to_string_lossy().replace('\\', "/") } +fn shell_escape(value: &str) -> String { + value.replace('\'', "'\\''") +} + +#[derive(Debug, Clone, Eq, PartialEq)] +struct GlobCandidate { + depth: usize, + path: String, +} + +impl Ord for GlobCandidate { + fn cmp(&self, other: &Self) -> Ordering { + self.depth + .cmp(&other.depth) + .then_with(|| self.path.cmp(&other.path)) + } +} + +impl PartialOrd for GlobCandidate { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + fn is_safe_relative_subpath(path: &Path) -> bool { !path.is_absolute() && path @@ -101,6 +106,48 @@ fn derive_walk_root(search_path_abs: &Path, pattern: &str) -> (PathBuf, String) } } +fn resolve_glob_config(pattern: &str) -> (bool, bool) { + let is_whitelisted = pattern.starts_with(".bitfun") + || pattern.contains("/.bitfun") + || pattern.contains("\\.bitfun"); + + let apply_gitignore = !is_whitelisted; + let ignore_hidden_files = !is_whitelisted; + (apply_gitignore, ignore_hidden_files) +} + +fn build_rg_args( + relative_pattern: &str, + apply_gitignore: bool, + ignore_hidden_files: bool, +) -> Vec { + let mut args = vec![ + "--files".to_string(), + "--glob".to_string(), + relative_pattern.to_string(), + "--sort".to_string(), + "path".to_string(), + ]; + + if !apply_gitignore { + args.push("--no-ignore".to_string()); + } + + if !ignore_hidden_files { + args.push("--hidden".to_string()); + } + + args +} + +fn build_fallback_matcher(relative_pattern: &str) -> Result { + GlobBuilder::new(relative_pattern) + .literal_separator(true) + .build() + .map_err(|err| err.to_string()) + .map(|glob| glob.compile_matcher()) +} + fn match_relative_path(matcher: &GlobMatcher, relative_path: &str, is_dir: bool) -> bool { if is_dir { matcher.is_match(relative_path) || matcher.is_match(&format!("{}/", relative_path)) @@ -109,60 +156,41 @@ fn match_relative_path(matcher: &GlobMatcher, relative_path: &str, is_dir: bool) } } -pub fn glob_with_ignore( - search_path: &str, - pattern: &str, - ignore: bool, - ignore_hidden: bool, +fn collect_with_walk_fallback( + walk_root: &Path, + relative_pattern: &str, + apply_gitignore: bool, + ignore_hidden_files: bool, limit: usize, -) -> Result, Box> { - let path = std::path::Path::new(search_path); - if !path.exists() { - return Err(format!("Search path '{}' does not exist", search_path).into()); - } - if !path.is_dir() { - return Err(format!("Search path '{}' is not a directory", search_path).into()); - } - - let search_path_abs = dunce::canonicalize(Path::new(search_path))?; - let (walk_root, relative_pattern) = derive_walk_root(&search_path_abs, pattern); - - if !walk_root.exists() || !walk_root.is_dir() || limit == 0 { - return Ok(Vec::new()); - } - - let glob = GlobBuilder::new(&relative_pattern) - .literal_separator(true) - .build()? - .compile_matcher(); - - let walker = WalkBuilder::new(&walk_root) - .ignore(ignore) - .git_ignore(ignore) - .git_global(ignore) - .git_exclude(ignore) - .hidden(ignore_hidden) +) -> Result, String> { + let matcher = build_fallback_matcher(relative_pattern)?; + let walker = WalkBuilder::new(walk_root) + .ignore(apply_gitignore) + .git_ignore(apply_gitignore) + .git_global(apply_gitignore) + .git_exclude(apply_gitignore) + .hidden(ignore_hidden_files) .build(); let mut best_matches = BinaryHeap::with_capacity(limit.saturating_add(1)); - for entry in walker { let entry = match entry { Ok(entry) => entry, Err(err) => { - warn!("Glob walker entry error (skipped): {}", err); + warn!("Glob walker fallback entry error (skipped): {}", err); continue; } }; + let path = entry.path().to_path_buf(); - let relative_path = match path.strip_prefix(&walk_root) { + let relative_path = match path.strip_prefix(walk_root) { Ok(relative) => relative, Err(_) => continue, }; let relative_path = normalize_path(relative_path); if match_relative_path( - &glob, + &matcher, &relative_path, entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false), ) { @@ -192,6 +220,100 @@ pub fn glob_with_ignore( Ok(results) } +fn call_rg(search_path: &str, pattern: &str, limit: usize) -> Result, String> { + let path = std::path::Path::new(search_path); + if !path.exists() { + return Err(format!("Search path '{}' does not exist", search_path)); + } + if !path.is_dir() { + return Err(format!("Search path '{}' is not a directory", search_path)); + } + + let search_path_abs = + dunce::canonicalize(Path::new(search_path)).map_err(|err| err.to_string())?; + let (walk_root, relative_pattern) = derive_walk_root(&search_path_abs, pattern); + let (apply_gitignore, ignore_hidden_files) = resolve_glob_config(pattern); + + if !walk_root.exists() || !walk_root.is_dir() || limit == 0 { + return Ok(Vec::new()); + } + + let args = build_rg_args(&relative_pattern, apply_gitignore, ignore_hidden_files); + let output = Command::new("rg") + .current_dir(&walk_root) + .args(&args) + .arg(".") + .output() + .map_err(|err| { + if err.kind() == std::io::ErrorKind::NotFound { + "ripgrep (rg) is required for Glob tool execution but was not found".to_string() + } else { + format!("Failed to execute rg for Glob tool: {}", err) + } + }); + + let output = match output { + Ok(output) => { + info!( + "Glob backend selected: backend=rg, search_root={}, pattern={}", + walk_root.display(), + relative_pattern + ); + output + } + Err(err) if err.contains("ripgrep (rg) is required") => { + info!( + "Glob backend selected: backend=fallback_walk, reason=rg_not_found, search_root={}, pattern={}", + walk_root.display(), + relative_pattern + ); + return collect_with_walk_fallback( + &walk_root, + &relative_pattern, + apply_gitignore, + ignore_hidden_files, + limit, + ); + } + Err(err) => return Err(err), + }; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string(); + let message = if stderr.is_empty() { + format!("rg --files failed with status {}", output.status) + } else { + format!("rg --files failed: {}", stderr) + }; + if stderr.contains("No such file or directory") || stderr.contains("not found") { + info!( + "Glob backend selected: backend=fallback_walk, reason=rg_execution_failed, search_root={}, pattern={}", + walk_root.display(), + relative_pattern + ); + return collect_with_walk_fallback( + &walk_root, + &relative_pattern, + apply_gitignore, + ignore_hidden_files, + limit, + ); + } + return Err(message); + } + + let all_paths = String::from_utf8_lossy(&output.stdout) + .lines() + .filter(|line| !line.is_empty()) + .map(|line| { + let relative_path = line.strip_prefix("./").unwrap_or(line); + let full_path = walk_root.join(relative_path); + normalize_path(&full_path) + }) + .collect::>(); + Ok(limit_paths(&all_paths, limit)) +} + fn limit_paths(paths: &[String], limit: usize) -> Vec { let mut depth_and_paths = paths .iter() @@ -212,22 +334,37 @@ fn limit_paths(paths: &[String], limit: usize) -> Vec { result } -fn call_glob(search_path: &str, pattern: &str, limit: usize) -> Result, String> { - let is_whitelisted = pattern.starts_with(".bitfun") - || pattern.contains("/.bitfun") - || pattern.contains("\\.bitfun"); +fn build_remote_rg_command(search_dir: &str, pattern: &str) -> String { + let search_dir_path = Path::new(search_dir); + let (remote_walk_root, remote_pattern) = derive_walk_root(search_dir_path, pattern); + let (apply_gitignore, ignore_hidden_files) = resolve_glob_config(pattern); + + let mut parts = vec![ + "cd".to_string(), + format!( + "'{}'", + shell_escape(remote_walk_root.to_string_lossy().as_ref()) + ), + "&&".to_string(), + "rg".to_string(), + "--files".to_string(), + "--glob".to_string(), + format!("'{}'", shell_escape(&remote_pattern)), + "--sort".to_string(), + "path".to_string(), + ]; + + if !apply_gitignore { + parts.push("--no-ignore".to_string()); + } - let apply_gitignore = !is_whitelisted; - let ignore_hidden_files = !is_whitelisted; + if !ignore_hidden_files { + parts.push("--hidden".to_string()); + } - glob_with_ignore( - search_path, - pattern, - apply_gitignore, - ignore_hidden_files, - limit, - ) - .map_err(|e| e.to_string()) + parts.push(".".to_string()); + parts.push("2>/dev/null".to_string()); + parts.join(" ") } fn build_remote_find_command(search_dir: &str, pattern: &str, limit: usize) -> String { @@ -272,7 +409,7 @@ impl Tool for GlobTool { - Use this tool when you need to find files by name patterns - You can call multiple tools in a single response. It is always better to speculatively perform multiple searches in parallel if they are potentially useful. -- List files and directories in path: path = "/path/to/search", pattern = "*" +- List files in path: path = "/path/to/search", pattern = "*" - Search all markdown files in path recursively: path = "/path/to/search", pattern = "**/*.md" "#.to_string()) @@ -340,24 +477,44 @@ impl Tool for GlobTool { .map(|v| v as usize) .unwrap_or(100); - // Remote workspace: use `find` via the workspace shell + // Remote workspace: prefer `rg --files --glob`, but fall back to `find` if context.is_remote() { let ws_shell = context .ws_shell() .ok_or_else(|| BitFunError::tool("Workspace shell not available".to_string()))?; let search_dir = resolved_str.clone(); - let find_cmd = build_remote_find_command(&search_dir, pattern, limit); - - let (stdout, _stderr, _exit_code) = ws_shell - .exec(&find_cmd, Some(30_000)) + let (_stdout, _stderr, exit_code) = ws_shell + .exec("command -v rg >/dev/null 2>&1", Some(5_000)) .await - .map_err(|e| BitFunError::tool(format!("Failed to glob on remote: {}", e)))?; + .map_err(|e| BitFunError::tool(format!("Failed to detect rg on remote: {}", e)))?; + + let remote_cmd = if exit_code == 0 { + info!( + "Glob backend selected: backend=remote_rg, search_path={}, pattern={}", + search_dir, pattern + ); + build_remote_rg_command(&search_dir, pattern) + } else { + info!( + "Glob backend selected: backend=remote_find, reason=rg_not_found, search_path={}, pattern={}", + search_dir, pattern + ); + build_remote_find_command(&search_dir, pattern, limit) + }; + + let (stdout, _stderr, _exit_code) = + ws_shell.exec(&remote_cmd, Some(30_000)).await.map_err(|e| { + BitFunError::tool(format!("Failed to glob on remote with rg: {}", e)) + })?; let matches: Vec = stdout .lines() .filter(|l| !l.is_empty()) - .map(|l| l.to_string()) + .map(|line| { + let relative_path = line.strip_prefix("./").unwrap_or(line); + normalize_path(&Path::new(&search_dir).join(relative_path)) + }) .collect(); let limited = limit_paths(&matches, limit); let result_text = if limited.is_empty() { @@ -378,7 +535,14 @@ impl Tool for GlobTool { }]); } - let matches = call_glob(&resolved_str, pattern, limit).map_err(|e| BitFunError::tool(e))?; + let resolved_str_for_rg = resolved_str.clone(); + let pattern_for_rg = pattern.to_string(); + let matches = tokio::task::spawn_blocking(move || { + call_rg(&resolved_str_for_rg, &pattern_for_rg, limit) + }) + .await + .map_err(|err| BitFunError::tool(format!("Glob tool task failed: {}", err)))? + .map_err(BitFunError::tool)?; let result_text = if matches.is_empty() { format!("No files found matching pattern '{}'", pattern) @@ -403,9 +567,10 @@ impl Tool for GlobTool { #[cfg(test)] mod tests { - use super::{call_glob, derive_walk_root, extract_glob_base_directory}; + use super::{call_rg, derive_walk_root, extract_glob_base_directory}; use std::fs; use std::path::PathBuf; + use std::process::Command; use std::time::{SystemTime, UNIX_EPOCH}; fn make_temp_dir(name: &str) -> PathBuf { @@ -444,7 +609,11 @@ mod tests { } #[test] - fn keeps_shallowest_matches_without_collecting_everything() { + fn keeps_shallowest_matches_from_rg_results() { + if Command::new("rg").arg("--version").output().is_err() { + return; + } + let root = make_temp_dir("limit"); fs::create_dir_all(root.join("src/deep")).unwrap(); fs::create_dir_all(root.join("tests")).unwrap(); @@ -453,7 +622,7 @@ mod tests { fs::write(root.join("src/deep/mod.rs"), "").unwrap(); fs::write(root.join("tests/mod.rs"), "").unwrap(); - let matches = call_glob(root.to_string_lossy().as_ref(), "**/*.rs", 2).unwrap(); + let matches = call_rg(root.to_string_lossy().as_ref(), "**/*.rs", 2).unwrap(); assert_eq!(matches.len(), 2); assert!(matches.iter().any(|path| path.ends_with("/src/lib.rs"))); @@ -464,4 +633,22 @@ mod tests { let _ = fs::remove_dir_all(root); } + + #[test] + fn wildcard_search_now_returns_files_only() { + if Command::new("rg").arg("--version").output().is_err() { + return; + } + + let root = make_temp_dir("files-only"); + fs::create_dir_all(root.join("src/nested")).unwrap(); + fs::write(root.join("src/nested/lib.rs"), "").unwrap(); + + let matches = call_rg(root.to_string_lossy().as_ref(), "*", 10).unwrap(); + + assert!(matches.iter().all(|path| !path.ends_with("/src"))); + assert!(!matches.is_empty()); + + let _ = fs::remove_dir_all(root); + } }