Skip to content

Commit 6db032c

Browse files
committed
feat: implement search_files_content tool
cleanup cleanup
1 parent 0c3303a commit 6db032c

File tree

8 files changed

+807
-112
lines changed

8 files changed

+807
-112
lines changed

Cargo.lock

Lines changed: 328 additions & 92 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ eula = false
1818
rust-mcp-sdk = { version = "0.4", default-features = false, features = [
1919
"server",
2020
"macros",
21+
"2025_03_26",
2122
] }
2223

2324
thiserror = { version = "2.0" }
@@ -35,6 +36,7 @@ async-trait = "0.1"
3536
futures = "0.3"
3637
tokio-util = "0.7"
3738
async_zip = { version = "0.0", features = ["full"] }
39+
grep = "0.3"
3840

3941
[dev-dependencies]
4042
tempfile = "3.2"

src/error.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ pub enum ServiceError {
2525
#[error("{0}")]
2626
SerdeJsonError(#[from] serde_json::Error),
2727
#[error("{0}")]
28+
ContentSearchError(#[from] grep::regex::Error),
29+
#[error("{0}")]
2830
McpSdkError(#[from] McpSdkError),
2931
#[error("{0}")]
3032
ZipError(#[from] ZipError),

src/fs_service.rs

Lines changed: 208 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
pub mod file_info;
22
pub mod utils;
3-
43
use file_info::FileInfo;
4+
use grep::{
5+
matcher::{Match, Matcher},
6+
regex::RegexMatcher,
7+
searcher::{sinks::UTF8, BinaryDetection, Searcher},
8+
};
59

610
use std::{
711
env,
@@ -29,10 +33,33 @@ use crate::{
2933
tools::EditOperation,
3034
};
3135

36+
const SNIPPET_MAX_LENGTH: usize = 200;
37+
const SNIPPET_BACKWARD_CHARS: usize = 30;
38+
3239
pub struct FileSystemService {
3340
allowed_path: Vec<PathBuf>,
3441
}
3542

43+
/// Represents a single match found in a file's content.
44+
#[derive(Debug, Clone)]
45+
pub struct ContentMatchResult {
46+
/// The line number where the match occurred (1-based).
47+
pub line_number: u64,
48+
pub start_pos: usize,
49+
/// The line of text containing the match.
50+
/// If the line exceeds 255 characters (excluding the search term), only a truncated portion will be shown.
51+
pub line_text: String,
52+
}
53+
54+
/// Represents all matches found in a specific file.
55+
#[derive(Debug, Clone)]
56+
pub struct FileSearchResult {
57+
/// The path to the file where matches were found.
58+
pub file_path: PathBuf,
59+
/// All individual match results within the file.
60+
pub matches: Vec<ContentMatchResult>,
61+
}
62+
3663
impl FileSystemService {
3764
pub fn try_new(allowed_directories: &[String]) -> ServiceResult<Self> {
3865
let normalized_dirs: Vec<PathBuf> = allowed_directories
@@ -376,19 +403,59 @@ impl FileSystemService {
376403
Ok(())
377404
}
378405

406+
/// Searches for files in the directory tree starting at `root_path` that match the given `pattern`,
407+
/// excluding paths that match any of the `exclude_patterns`.
408+
///
409+
/// # Arguments
410+
/// * `root_path` - The root directory to start the search from.
411+
/// * `pattern` - A glob pattern to match file names (case-insensitive). If no wildcards are provided,
412+
/// the pattern is wrapped in '*' for partial matching.
413+
/// * `exclude_patterns` - A list of glob patterns to exclude paths (case-sensitive).
414+
///
415+
/// # Returns
416+
/// A `ServiceResult` containing a vector of`walkdir::DirEntry` objects for matching files,
417+
/// or a `ServiceError` if an error occurs.
379418
pub fn search_files(
380419
&self,
381-
// root_path: impl Into<PathBuf>,
382420
root_path: &Path,
383421
pattern: String,
384422
exclude_patterns: Vec<String>,
385423
) -> ServiceResult<Vec<walkdir::DirEntry>> {
424+
let result = self.search_files_iter(root_path, pattern, exclude_patterns)?;
425+
Ok(result.collect::<Vec<walkdir::DirEntry>>())
426+
}
427+
428+
/// Returns an iterator over files in the directory tree starting at `root_path` that match
429+
/// the given `pattern`, excluding paths that match any of the `exclude_patterns`.
430+
///
431+
/// # Arguments
432+
/// * `root_path` - The root directory to start the search from.
433+
/// * `pattern` - A glob pattern to match file names. If no wildcards are provided, the pattern is wrapped in `**/*{pattern}*` for partial matching.
434+
/// * `exclude_patterns` - A list of glob patterns to exclude paths (case-sensitive).
435+
///
436+
/// # Returns
437+
/// A `ServiceResult` containing an iterator yielding `walkdir::DirEntry` objects for matching files,
438+
/// or a `ServiceError` if an error occurs.
439+
pub fn search_files_iter<'a>(
440+
&'a self,
441+
// root_path: impl Into<PathBuf>,
442+
root_path: &'a Path,
443+
pattern: String,
444+
exclude_patterns: Vec<String>,
445+
) -> ServiceResult<impl Iterator<Item = walkdir::DirEntry> + 'a> {
386446
let valid_path = self.validate_path(root_path)?;
387447

448+
let updated_pattern = if pattern.contains('*') {
449+
pattern.to_lowercase()
450+
} else {
451+
format!("**/*{}*", &pattern.to_lowercase())
452+
};
453+
let glob_pattern = Pattern::new(&updated_pattern);
454+
388455
let result = WalkDir::new(valid_path)
389456
.follow_links(true)
390457
.into_iter()
391-
.filter_entry(|dir_entry| {
458+
.filter_entry(move |dir_entry| {
392459
let full_path = dir_entry.path();
393460

394461
// Validate each path before processing
@@ -415,18 +482,9 @@ impl FileSystemService {
415482
});
416483

417484
!should_exclude
418-
});
419-
420-
let updated_pattern = if pattern.contains('*') {
421-
pattern.to_lowercase()
422-
} else {
423-
format!("**/*{}*", &pattern.to_lowercase())
424-
};
425-
let glob_pattern = Pattern::new(&updated_pattern);
426-
let final_result = result
427-
.into_iter()
485+
})
428486
.filter_map(|v| v.ok())
429-
.filter(|entry| {
487+
.filter(move |entry| {
430488
if root_path == entry.path() {
431489
return false;
432490
}
@@ -437,11 +495,10 @@ impl FileSystemService {
437495
glob.matches(&entry.file_name().to_str().unwrap_or("").to_lowercase())
438496
})
439497
.unwrap_or(false);
440-
441498
is_match
442-
})
443-
.collect::<Vec<walkdir::DirEntry>>();
444-
Ok(final_result)
499+
});
500+
501+
Ok(result)
445502
}
446503

447504
pub fn create_unified_diff(
@@ -631,4 +688,137 @@ impl FileSystemService {
631688

632689
Ok(formatted_diff)
633690
}
691+
692+
pub fn escape_regex(&self, text: &str) -> String {
693+
// Covers special characters in regex engines (RE2, PCRE, JS, Python)
694+
const SPECIAL_CHARS: &[char] = &[
695+
'.', '^', '$', '*', '+', '?', '(', ')', '[', ']', '{', '}', '\\', '|', '/',
696+
];
697+
698+
let mut escaped = String::with_capacity(text.len());
699+
700+
for ch in text.chars() {
701+
if SPECIAL_CHARS.contains(&ch) {
702+
escaped.push('\\');
703+
}
704+
escaped.push(ch);
705+
}
706+
707+
escaped
708+
}
709+
710+
// Searches the content of a file for occurrences of the given query string.
711+
///
712+
/// This method searches the file specified by `file_path` for lines matching the `query`.
713+
/// The search can be performed as a regular expression or as a literal string,
714+
/// depending on the `is_regex` flag.
715+
///
716+
/// If matched line is larger than 255 characters, a snippet will be extracted around the matched text.
717+
///
718+
pub fn content_search(
719+
&self,
720+
query: &str,
721+
file_path: impl AsRef<Path>,
722+
is_regex: Option<bool>,
723+
) -> ServiceResult<Option<FileSearchResult>> {
724+
let query = if is_regex.unwrap_or_default() {
725+
query.to_string()
726+
} else {
727+
self.escape_regex(query)
728+
};
729+
let matcher = RegexMatcher::new(query.as_str())?;
730+
731+
let mut searcher = Searcher::new();
732+
let mut result = FileSearchResult {
733+
file_path: file_path.as_ref().to_path_buf(),
734+
matches: vec![],
735+
};
736+
737+
searcher.set_binary_detection(BinaryDetection::quit(b'\x00'));
738+
739+
searcher.search_path(
740+
&matcher,
741+
file_path,
742+
UTF8(|line_number, line| {
743+
let actual_match = matcher.find(line.as_bytes())?.unwrap();
744+
745+
result.matches.push(ContentMatchResult {
746+
line_number,
747+
start_pos: actual_match.start(),
748+
line_text: self.extract_snippet(line, actual_match, None, None),
749+
});
750+
Ok(true)
751+
}),
752+
)?;
753+
754+
if result.matches.is_empty() {
755+
return Ok(None);
756+
}
757+
758+
Ok(Some(result))
759+
}
760+
761+
/// Extracts a snippet from a given line of text around a match.
762+
///
763+
/// It extracts a substring starting a fixed number of characters (`SNIPPET_BACKWARD_CHARS`)
764+
/// before the start position of the `match`, and extends up to `max_length` characters
765+
/// If the snippet does not include the beginning or end of the original line, ellipses (`"..."`) are added
766+
/// to indicate the truncation.
767+
pub fn extract_snippet(
768+
&self,
769+
line: &str,
770+
match_result: Match,
771+
max_length: Option<usize>,
772+
backward_chars: Option<usize>,
773+
) -> String {
774+
let max_length = max_length.unwrap_or(SNIPPET_MAX_LENGTH);
775+
let backward_chars = backward_chars.unwrap_or(SNIPPET_BACKWARD_CHARS);
776+
777+
let start_pos = line.len() - line.trim_start().len();
778+
779+
let line = line.trim();
780+
781+
// Start SNIPPET_BACKWARD_CHARS characters before match (or at 0)
782+
let snippet_start = (match_result.start() - start_pos).saturating_sub(backward_chars);
783+
784+
// Get up to SNIPPET_MAX_LENGTH characters from snippet_start
785+
let snippet_end = (snippet_start + max_length).min(line.len());
786+
787+
let snippet = &line[snippet_start..snippet_end];
788+
789+
// Add ellipses if line was truncated
790+
let mut result = String::new();
791+
if snippet_start > 0 {
792+
result.push_str("...");
793+
}
794+
result.push_str(snippet);
795+
if snippet_end < line.len() {
796+
result.push_str("...");
797+
}
798+
result
799+
}
800+
801+
pub fn search_files_content(
802+
&self,
803+
root_path: impl AsRef<Path>,
804+
pattern: &str,
805+
query: &str,
806+
is_regex: bool,
807+
exclude_patterns: Option<Vec<String>>,
808+
) -> ServiceResult<Vec<FileSearchResult>> {
809+
let files_iter = self.search_files_iter(
810+
root_path.as_ref(),
811+
pattern.to_string(),
812+
exclude_patterns.to_owned().unwrap_or_default(),
813+
)?;
814+
815+
let results: Vec<FileSearchResult> = files_iter
816+
.filter_map(|entry| {
817+
self.content_search(query, entry.path(), Some(is_regex))
818+
.ok()
819+
.and_then(|v| v)
820+
})
821+
.collect();
822+
Ok(results)
823+
}
634824
}

src/handler.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ impl ServerHandler for MyServerHandler {
147147
FileSystemTools::ZipDirectoryTool(params) => {
148148
ZipDirectoryTool::run_tool(params, &self.fs_service).await
149149
}
150+
FileSystemTools::SearchFilesContentTool(params) => {
151+
SearchFilesContentTool::run_tool(params, &self.fs_service).await
152+
}
150153
}
151154
}
152155
}

src/tools.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ mod move_file;
88
mod read_files;
99
mod read_multiple_files;
1010
mod search_file;
11+
mod search_files_content;
1112
mod write_file;
1213
mod zip_unzip;
1314

@@ -22,6 +23,7 @@ pub use read_files::ReadFileTool;
2223
pub use read_multiple_files::ReadMultipleFilesTool;
2324
pub use rust_mcp_sdk::tool_box;
2425
pub use search_file::SearchFilesTool;
26+
pub use search_files_content::SearchFilesContentTool;
2527
pub use write_file::WriteFileTool;
2628
pub use zip_unzip::{UnzipFileTool, ZipDirectoryTool, ZipFilesTool};
2729

@@ -42,7 +44,8 @@ tool_box!(
4244
WriteFileTool,
4345
ZipFilesTool,
4446
UnzipFileTool,
45-
ZipDirectoryTool
47+
ZipDirectoryTool,
48+
SearchFilesContentTool
4649
]
4750
);
4851

@@ -58,13 +61,13 @@ impl FileSystemTools {
5861
| FileSystemTools::ZipFilesTool(_)
5962
| FileSystemTools::UnzipFileTool(_)
6063
| FileSystemTools::ZipDirectoryTool(_) => true,
61-
6264
FileSystemTools::ReadFileTool(_)
6365
| FileSystemTools::DirectoryTreeTool(_)
6466
| FileSystemTools::GetFileInfoTool(_)
6567
| FileSystemTools::ListAllowedDirectoriesTool(_)
6668
| FileSystemTools::ListDirectoryTool(_)
6769
| FileSystemTools::ReadMultipleFilesTool(_)
70+
| FileSystemTools::SearchFilesContentTool(_)
6871
| FileSystemTools::SearchFilesTool(_) => false,
6972
}
7073
}

0 commit comments

Comments
 (0)