1
1
pub mod file_info;
2
2
pub mod utils;
3
-
4
3
use file_info:: FileInfo ;
4
+ use grep:: {
5
+ matcher:: { Match , Matcher } ,
6
+ regex:: RegexMatcher ,
7
+ searcher:: { sinks:: UTF8 , BinaryDetection , Searcher } ,
8
+ } ;
5
9
6
10
use std:: {
7
11
env,
@@ -29,10 +33,33 @@ use crate::{
29
33
tools:: EditOperation ,
30
34
} ;
31
35
36
+ const SNIPPET_MAX_LENGTH : usize = 200 ;
37
+ const SNIPPET_BACKWARD_CHARS : usize = 30 ;
38
+
32
39
pub struct FileSystemService {
33
40
allowed_path : Vec < PathBuf > ,
34
41
}
35
42
43
+ /// Represents a single match found in a file's content.
44
+ #[ derive( Debug , Clone ) ]
45
+ pub struct ContentMatchResult {
46
+ /// The line number where the match occurred (1-based).
47
+ pub line_number : u64 ,
48
+ pub start_pos : usize ,
49
+ /// The line of text containing the match.
50
+ /// If the line exceeds 255 characters (excluding the search term), only a truncated portion will be shown.
51
+ pub line_text : String ,
52
+ }
53
+
54
+ /// Represents all matches found in a specific file.
55
+ #[ derive( Debug , Clone ) ]
56
+ pub struct FileSearchResult {
57
+ /// The path to the file where matches were found.
58
+ pub file_path : PathBuf ,
59
+ /// All individual match results within the file.
60
+ pub matches : Vec < ContentMatchResult > ,
61
+ }
62
+
36
63
impl FileSystemService {
37
64
pub fn try_new ( allowed_directories : & [ String ] ) -> ServiceResult < Self > {
38
65
let normalized_dirs: Vec < PathBuf > = allowed_directories
@@ -376,19 +403,59 @@ impl FileSystemService {
376
403
Ok ( ( ) )
377
404
}
378
405
406
+ /// Searches for files in the directory tree starting at `root_path` that match the given `pattern`,
407
+ /// excluding paths that match any of the `exclude_patterns`.
408
+ ///
409
+ /// # Arguments
410
+ /// * `root_path` - The root directory to start the search from.
411
+ /// * `pattern` - A glob pattern to match file names (case-insensitive). If no wildcards are provided,
412
+ /// the pattern is wrapped in '*' for partial matching.
413
+ /// * `exclude_patterns` - A list of glob patterns to exclude paths (case-sensitive).
414
+ ///
415
+ /// # Returns
416
+ /// A `ServiceResult` containing a vector of`walkdir::DirEntry` objects for matching files,
417
+ /// or a `ServiceError` if an error occurs.
379
418
pub fn search_files (
380
419
& self ,
381
- // root_path: impl Into<PathBuf>,
382
420
root_path : & Path ,
383
421
pattern : String ,
384
422
exclude_patterns : Vec < String > ,
385
423
) -> ServiceResult < Vec < walkdir:: DirEntry > > {
424
+ let result = self . search_files_iter ( root_path, pattern, exclude_patterns) ?;
425
+ Ok ( result. collect :: < Vec < walkdir:: DirEntry > > ( ) )
426
+ }
427
+
428
+ /// Returns an iterator over files in the directory tree starting at `root_path` that match
429
+ /// the given `pattern`, excluding paths that match any of the `exclude_patterns`.
430
+ ///
431
+ /// # Arguments
432
+ /// * `root_path` - The root directory to start the search from.
433
+ /// * `pattern` - A glob pattern to match file names. If no wildcards are provided, the pattern is wrapped in `**/*{pattern}*` for partial matching.
434
+ /// * `exclude_patterns` - A list of glob patterns to exclude paths (case-sensitive).
435
+ ///
436
+ /// # Returns
437
+ /// A `ServiceResult` containing an iterator yielding `walkdir::DirEntry` objects for matching files,
438
+ /// or a `ServiceError` if an error occurs.
439
+ pub fn search_files_iter < ' a > (
440
+ & ' a self ,
441
+ // root_path: impl Into<PathBuf>,
442
+ root_path : & ' a Path ,
443
+ pattern : String ,
444
+ exclude_patterns : Vec < String > ,
445
+ ) -> ServiceResult < impl Iterator < Item = walkdir:: DirEntry > + ' a > {
386
446
let valid_path = self . validate_path ( root_path) ?;
387
447
448
+ let updated_pattern = if pattern. contains ( '*' ) {
449
+ pattern. to_lowercase ( )
450
+ } else {
451
+ format ! ( "**/*{}*" , & pattern. to_lowercase( ) )
452
+ } ;
453
+ let glob_pattern = Pattern :: new ( & updated_pattern) ;
454
+
388
455
let result = WalkDir :: new ( valid_path)
389
456
. follow_links ( true )
390
457
. into_iter ( )
391
- . filter_entry ( |dir_entry| {
458
+ . filter_entry ( move |dir_entry| {
392
459
let full_path = dir_entry. path ( ) ;
393
460
394
461
// Validate each path before processing
@@ -415,18 +482,9 @@ impl FileSystemService {
415
482
} ) ;
416
483
417
484
!should_exclude
418
- } ) ;
419
-
420
- let updated_pattern = if pattern. contains ( '*' ) {
421
- pattern. to_lowercase ( )
422
- } else {
423
- format ! ( "**/*{}*" , & pattern. to_lowercase( ) )
424
- } ;
425
- let glob_pattern = Pattern :: new ( & updated_pattern) ;
426
- let final_result = result
427
- . into_iter ( )
485
+ } )
428
486
. filter_map ( |v| v. ok ( ) )
429
- . filter ( |entry| {
487
+ . filter ( move |entry| {
430
488
if root_path == entry. path ( ) {
431
489
return false ;
432
490
}
@@ -437,11 +495,10 @@ impl FileSystemService {
437
495
glob. matches ( & entry. file_name ( ) . to_str ( ) . unwrap_or ( "" ) . to_lowercase ( ) )
438
496
} )
439
497
. unwrap_or ( false ) ;
440
-
441
498
is_match
442
- } )
443
- . collect :: < Vec < walkdir :: DirEntry > > ( ) ;
444
- Ok ( final_result )
499
+ } ) ;
500
+
501
+ Ok ( result )
445
502
}
446
503
447
504
pub fn create_unified_diff (
@@ -631,4 +688,137 @@ impl FileSystemService {
631
688
632
689
Ok ( formatted_diff)
633
690
}
691
+
692
+ pub fn escape_regex ( & self , text : & str ) -> String {
693
+ // Covers special characters in regex engines (RE2, PCRE, JS, Python)
694
+ const SPECIAL_CHARS : & [ char ] = & [
695
+ '.' , '^' , '$' , '*' , '+' , '?' , '(' , ')' , '[' , ']' , '{' , '}' , '\\' , '|' , '/' ,
696
+ ] ;
697
+
698
+ let mut escaped = String :: with_capacity ( text. len ( ) ) ;
699
+
700
+ for ch in text. chars ( ) {
701
+ if SPECIAL_CHARS . contains ( & ch) {
702
+ escaped. push ( '\\' ) ;
703
+ }
704
+ escaped. push ( ch) ;
705
+ }
706
+
707
+ escaped
708
+ }
709
+
710
+ // Searches the content of a file for occurrences of the given query string.
711
+ ///
712
+ /// This method searches the file specified by `file_path` for lines matching the `query`.
713
+ /// The search can be performed as a regular expression or as a literal string,
714
+ /// depending on the `is_regex` flag.
715
+ ///
716
+ /// If matched line is larger than 255 characters, a snippet will be extracted around the matched text.
717
+ ///
718
+ pub fn content_search (
719
+ & self ,
720
+ query : & str ,
721
+ file_path : impl AsRef < Path > ,
722
+ is_regex : Option < bool > ,
723
+ ) -> ServiceResult < Option < FileSearchResult > > {
724
+ let query = if is_regex. unwrap_or_default ( ) {
725
+ query. to_string ( )
726
+ } else {
727
+ self . escape_regex ( query)
728
+ } ;
729
+ let matcher = RegexMatcher :: new ( query. as_str ( ) ) ?;
730
+
731
+ let mut searcher = Searcher :: new ( ) ;
732
+ let mut result = FileSearchResult {
733
+ file_path : file_path. as_ref ( ) . to_path_buf ( ) ,
734
+ matches : vec ! [ ] ,
735
+ } ;
736
+
737
+ searcher. set_binary_detection ( BinaryDetection :: quit ( b'\x00' ) ) ;
738
+
739
+ searcher. search_path (
740
+ & matcher,
741
+ file_path,
742
+ UTF8 ( |line_number, line| {
743
+ let actual_match = matcher. find ( line. as_bytes ( ) ) ?. unwrap ( ) ;
744
+
745
+ result. matches . push ( ContentMatchResult {
746
+ line_number,
747
+ start_pos : actual_match. start ( ) ,
748
+ line_text : self . extract_snippet ( line, actual_match, None , None ) ,
749
+ } ) ;
750
+ Ok ( true )
751
+ } ) ,
752
+ ) ?;
753
+
754
+ if result. matches . is_empty ( ) {
755
+ return Ok ( None ) ;
756
+ }
757
+
758
+ Ok ( Some ( result) )
759
+ }
760
+
761
+ /// Extracts a snippet from a given line of text around a match.
762
+ ///
763
+ /// It extracts a substring starting a fixed number of characters (`SNIPPET_BACKWARD_CHARS`)
764
+ /// before the start position of the `match`, and extends up to `max_length` characters
765
+ /// If the snippet does not include the beginning or end of the original line, ellipses (`"..."`) are added
766
+ /// to indicate the truncation.
767
+ pub fn extract_snippet (
768
+ & self ,
769
+ line : & str ,
770
+ match_result : Match ,
771
+ max_length : Option < usize > ,
772
+ backward_chars : Option < usize > ,
773
+ ) -> String {
774
+ let max_length = max_length. unwrap_or ( SNIPPET_MAX_LENGTH ) ;
775
+ let backward_chars = backward_chars. unwrap_or ( SNIPPET_BACKWARD_CHARS ) ;
776
+
777
+ let start_pos = line. len ( ) - line. trim_start ( ) . len ( ) ;
778
+
779
+ let line = line. trim ( ) ;
780
+
781
+ // Start SNIPPET_BACKWARD_CHARS characters before match (or at 0)
782
+ let snippet_start = ( match_result. start ( ) - start_pos) . saturating_sub ( backward_chars) ;
783
+
784
+ // Get up to SNIPPET_MAX_LENGTH characters from snippet_start
785
+ let snippet_end = ( snippet_start + max_length) . min ( line. len ( ) ) ;
786
+
787
+ let snippet = & line[ snippet_start..snippet_end] ;
788
+
789
+ // Add ellipses if line was truncated
790
+ let mut result = String :: new ( ) ;
791
+ if snippet_start > 0 {
792
+ result. push_str ( "..." ) ;
793
+ }
794
+ result. push_str ( snippet) ;
795
+ if snippet_end < line. len ( ) {
796
+ result. push_str ( "..." ) ;
797
+ }
798
+ result
799
+ }
800
+
801
+ pub fn search_files_content (
802
+ & self ,
803
+ root_path : impl AsRef < Path > ,
804
+ pattern : & str ,
805
+ query : & str ,
806
+ is_regex : bool ,
807
+ exclude_patterns : Option < Vec < String > > ,
808
+ ) -> ServiceResult < Vec < FileSearchResult > > {
809
+ let files_iter = self . search_files_iter (
810
+ root_path. as_ref ( ) ,
811
+ pattern. to_string ( ) ,
812
+ exclude_patterns. to_owned ( ) . unwrap_or_default ( ) ,
813
+ ) ?;
814
+
815
+ let results: Vec < FileSearchResult > = files_iter
816
+ . filter_map ( |entry| {
817
+ self . content_search ( query, entry. path ( ) , Some ( is_regex) )
818
+ . ok ( )
819
+ . and_then ( |v| v)
820
+ } )
821
+ . collect ( ) ;
822
+ Ok ( results)
823
+ }
634
824
}
0 commit comments