diff --git a/dev_kit_mcp_server/tools/explore/search_text.py b/dev_kit_mcp_server/tools/explore/search_text.py index 063bc46..6f8810a 100644 --- a/dev_kit_mcp_server/tools/explore/search_text.py +++ b/dev_kit_mcp_server/tools/explore/search_text.py @@ -1,10 +1,12 @@ """Module for searching text content in files.""" +import asyncio import re from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, List, Optional +import aiofiles import git from ...core import AsyncOperation @@ -16,7 +18,70 @@ class SearchTextOperation(AsyncOperation): name = "search_text" - def _search_text( + async def _process_file_async( + self, + file_path: Path, + compiled_pattern: re.Pattern[str], + context: Optional[int] = None, + ) -> tuple[List[Dict[str, Any]], int]: + """Process a single file asynchronously for pattern matches. + + Args: + file_path: Path to the file to process + compiled_pattern: Compiled regex pattern to search for + context: Number of context lines to include before/after matches + + Returns: + Tuple of (matches found in file, number of lines searched) + + """ + matches = [] + lines_searched = 0 + + try: + async with aiofiles.open(file_path, "r", encoding="utf-8", errors="ignore") as f: + lines = await f.readlines() + + lines_searched = len(lines) + + # Find matching lines + for line_num, line in enumerate(lines, 1): + if compiled_pattern.search(line): + # Get relative path from project root + try: + relative_path = file_path.relative_to(self._root_path) + except ValueError: + relative_path = file_path + + match_data = { + "file": str(relative_path), + "line_number": line_num, + "line": line.rstrip("\n\r"), + } + + # Add context lines if requested + if context is not None and context > 0: + start_line = max(0, line_num - 1 - context) + end_line = min(len(lines), line_num + context) + + context_lines = [] + for i in range(start_line, end_line): + context_lines.append({ + "line_number": i + 1, + "line": lines[i].rstrip("\n\r"), + "is_match": i == line_num - 1, + }) + match_data["context"] = context_lines + + matches.append(match_data) + + except (UnicodeDecodeError, OSError, PermissionError): + # Skip binary files or files with access issues + pass + + return matches, lines_searched + + async def _search_text( self, pattern: str, files: Optional[List[str]] = None, @@ -78,54 +143,33 @@ def _search_text( raise ValueError(f"Path is not a file: {file_str}") search_files.append(file_path) - # Search for matches + # Process files concurrently with limited concurrency + # Use a semaphore to limit concurrent file operations + max_concurrent_files = min(20, len(search_files)) # Limit to 20 concurrent files + semaphore = asyncio.Semaphore(max_concurrent_files) + + async def process_file_with_semaphore(file_path: Path) -> tuple[List[Dict[str, Any]], int]: + async with semaphore: + return await self._process_file_async(file_path, compiled_pattern, context) + + # Process all files concurrently + tasks = [process_file_with_semaphore(file_path) for file_path in search_files] + results = await asyncio.gather(*tasks, return_exceptions=True) + + # Collect results and handle any exceptions matches: List[Dict[str, Any]] = [] total_files_searched = 0 total_lines_searched = 0 - for file_path in search_files: - total_files_searched += 1 - try: - # Try to read as text file - with open(file_path, "r", encoding="utf-8", errors="ignore") as f: - lines = f.readlines() - - total_lines_searched += len(lines) - - # Find matching lines - for line_num, line in enumerate(lines, 1): - if compiled_pattern.search(line): - # Get relative path from project root - try: - relative_path = file_path.relative_to(self._root_path) - except ValueError: - relative_path = file_path - - match_data = { - "file": str(relative_path), - "line_number": line_num, - "line": line.rstrip("\n\r"), - } - - # Add context lines if requested - if context is not None and context > 0: - start_line = max(0, line_num - 1 - context) - end_line = min(len(lines), line_num + context) - - context_lines = [] - for i in range(start_line, end_line): - context_lines.append({ - "line_number": i + 1, - "line": lines[i].rstrip("\n\r"), - "is_match": i == line_num - 1, - }) - match_data["context"] = context_lines - - matches.append(match_data) - - except (UnicodeDecodeError, OSError, PermissionError): - # Skip binary files or files with access issues + for result in results: + if isinstance(result, BaseException): + # Log the exception but continue processing other files continue + else: + file_matches, lines_count = result + matches.extend(file_matches) + total_files_searched += 1 + total_lines_searched += lines_count # Prepare output content_lines = [f"Text search results for pattern '{pattern}':", ""] @@ -182,7 +226,7 @@ async def __call__( """ try: - result = self._search_text(pattern, files, context, max_chars) + result = await self._search_text(pattern, files, context, max_chars) return { "status": "success", "message": ( diff --git a/pyproject.toml b/pyproject.toml index da9716f..e54abf8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,8 @@ dependencies = [ "mcp>=1.8.1", "fastmcp>=2.3.3", "gitpython>=3.1.43", - "toml>=0.10.2" + "toml>=0.10.2", + "aiofiles>=24.1.0" ] [project.optional-dependencies] @@ -51,6 +52,7 @@ dev = [ "pytest-parametrization>=2022", "ruff>=0.8.2", "pytest-asyncio>=0.23.5", + "types-aiofiles>=24.1.0", ] docs = [ "sphinx>=8.2.0; python_version >= '3.11'", diff --git a/tests/tools/explore/test_search_text_performance.py b/tests/tools/explore/test_search_text_performance.py new file mode 100644 index 0000000..886d8bc --- /dev/null +++ b/tests/tools/explore/test_search_text_performance.py @@ -0,0 +1,157 @@ +"""Performance tests for SearchTextOperation async implementation.""" + +import asyncio +import tempfile +import time +from pathlib import Path + +import git +import pytest + +from dev_kit_mcp_server.tools import SearchTextOperation + + +@pytest.fixture +def large_test_setup(): + """Create a test environment with many files for performance testing.""" + with tempfile.TemporaryDirectory() as temp_dir: + # Initialize git repository + git.Repo.init(temp_dir) + + # Create multiple files with varying content + files_created = [] + + # Create 50 files with content to search + for i in range(50): + file_path = Path(temp_dir) / f"test_file_{i:03d}.py" + with open(file_path, "w") as f: + f.write(f"# Test file {i}\n") + f.write("import os\n") + f.write("import sys\n") + f.write(f"def function_{i}():\n") + f.write(f' """This is function {i}"""\n') + f.write(f" return {i}\n") + f.write("\n") + f.write("if __name__ == '__main__':\n") + f.write(f" print(function_{i}())\n") + # Add some random content to make files different sizes + for j in range(i % 10): + f.write(f"# Additional line {j} in file {i}\n") + files_created.append(str(file_path)) + + # Also create some larger files + for i in range(5): + file_path = Path(temp_dir) / f"large_file_{i}.txt" + with open(file_path, "w") as f: + for line_num in range(1000): + if line_num % 100 == 0: + f.write(f"SEARCH_TARGET line {line_num} in large file {i}\n") + else: + f.write(f"Regular line {line_num} in large file {i}\n") + files_created.append(str(file_path)) + + yield temp_dir, files_created + + +@pytest.mark.asyncio +async def test_search_text_performance_many_files(large_test_setup): + """Test that async search performs well with many files.""" + temp_dir, files_created = large_test_setup + + operation = SearchTextOperation(root_dir=temp_dir) + + # Test searching for a pattern that will be found in many files + start_time = time.time() + result = await operation(pattern="import") + end_time = time.time() + + duration = end_time - start_time + + # Verify the search worked correctly + assert result["status"] == "success" + assert result["matches_found"] > 50 # Should find imports in the Python files + assert result["files_searched"] >= len(files_created) # Might find additional files like .git files + + # Performance should be reasonable (this is a basic check) + # With async processing, it should complete within a reasonable time + assert duration < 5.0 # Should complete within 5 seconds + + print(f"Search of {len(files_created)} files completed in {duration:.3f} seconds") + print(f"Found {result['matches_found']} matches in {result['files_searched']} files") + + +@pytest.mark.asyncio +async def test_search_text_performance_specific_files(large_test_setup): + """Test async search performance with specific files.""" + temp_dir, files_created = large_test_setup + + operation = SearchTextOperation(root_dir=temp_dir) + + # Get relative paths for just the large files + large_files = [f"large_file_{i}.txt" for i in range(5)] + + start_time = time.time() + result = await operation(pattern="SEARCH_TARGET", files=large_files) + end_time = time.time() + + duration = end_time - start_time + + # Verify the search worked correctly + assert result["status"] == "success" + assert result["matches_found"] == 50 # 10 matches per file * 5 files + assert result["files_searched"] == 5 + + # Performance check + assert duration < 2.0 # Should complete quickly for just 5 files + + print(f"Search of 5 large files completed in {duration:.3f} seconds") + print(f"Found {result['matches_found']} matches") + + +@pytest.mark.asyncio +async def test_search_text_concurrency_behavior(large_test_setup): + """Test that the async implementation handles concurrent operations properly.""" + temp_dir, files_created = large_test_setup + + operation = SearchTextOperation(root_dir=temp_dir) + + # Run multiple concurrent searches + async def search_task(pattern): + return await operation(pattern=pattern) + + start_time = time.time() + results = await asyncio.gather( + search_task("import"), search_task("def"), search_task("return"), return_exceptions=True + ) + end_time = time.time() + + duration = end_time - start_time + + # Verify all searches completed successfully + for result in results: + assert not isinstance(result, Exception) + assert result["status"] == "success" + assert result["matches_found"] > 0 + + # Concurrent execution should be efficient + assert duration < 10.0 # Should complete within reasonable time + + print(f"3 concurrent searches completed in {duration:.3f} seconds") + + +@pytest.mark.asyncio +async def test_search_text_error_handling_async(large_test_setup): + """Test that async error handling works correctly.""" + temp_dir, files_created = large_test_setup + + operation = SearchTextOperation(root_dir=temp_dir) + + # Test with invalid regex - should handle error gracefully + result = await operation(pattern="[invalid") + assert result["status"] == "error" + assert "Invalid regex pattern" in result["message"] + + # Test with non-existent file + result = await operation(pattern="test", files=["nonexistent.txt"]) + assert result["status"] == "error" + assert "does not exist" in result["message"] diff --git a/uv.lock b/uv.lock index 0519419..1146869 100644 --- a/uv.lock +++ b/uv.lock @@ -18,6 +18,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8d/3f/95338030883d8c8b91223b4e21744b04d11b161a3ef117295d8241f50ab4/accessible_pygments-0.0.5-py3-none-any.whl", hash = "sha256:88ae3211e68a1d0b011504b2ffc1691feafce124b845bd072ab6f9f66f34d4b7", size = 1395903, upload-time = "2024-05-10T11:23:08.421Z" }, ] +[[package]] +name = "aiofiles" +version = "24.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/03/a88171e277e8caa88a4c77808c20ebb04ba74cc4681bf1e9416c862de237/aiofiles-24.1.0.tar.gz", hash = "sha256:22a075c9e5a3810f0c2e48f3008c94d68c65d763b9b03857924c99e57355166c", size = 30247, upload-time = "2024-06-24T11:02:03.584Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/45/30bb92d442636f570cb5651bc661f52b610e2eec3f891a5dc3a4c3667db0/aiofiles-24.1.0-py3-none-any.whl", hash = "sha256:b4ec55f4195e3eb5d7abd1bf7e061763e864dd4954231fb8539a0ef8bb8260e5", size = 15896, upload-time = "2024-06-24T11:02:01.529Z" }, +] + [[package]] name = "alabaster" version = "1.0.0" @@ -367,6 +376,7 @@ wheels = [ name = "dev-kit-mcp-server" source = { editable = "." } dependencies = [ + { name = "aiofiles" }, { name = "fastmcp" }, { name = "gitpython" }, { name = "mcp" }, @@ -389,6 +399,7 @@ dev = [ { name = "pytest-cov" }, { name = "pytest-parametrization" }, { name = "ruff" }, + { name = "types-aiofiles" }, ] docs = [ { name = "pydata-sphinx-theme" }, @@ -399,6 +410,7 @@ docs = [ [package.metadata] requires-dist = [ + { name = "aiofiles", specifier = ">=24.1.0" }, { name = "fastmcp", specifier = ">=2.3.3" }, { name = "gitpython", specifier = ">=3.1.43" }, { name = "mcp", specifier = ">=1.8.1" }, @@ -418,6 +430,7 @@ dev = [ { name = "pytest-cov", specifier = ">=4.0.0" }, { name = "pytest-parametrization", specifier = ">=2022" }, { name = "ruff", specifier = ">=0.8.2" }, + { name = "types-aiofiles", specifier = ">=24.1.0" }, ] docs = [ { name = "pydata-sphinx-theme" }, @@ -1522,6 +1535,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/20/9d953de6f4367163d23ec823200eb3ecb0050a2609691e512c8b95827a9b/typer-0.15.3-py3-none-any.whl", hash = "sha256:c86a65ad77ca531f03de08d1b9cb67cd09ad02ddddf4b34745b5008f43b239bd", size = 45253, upload-time = "2025-04-28T21:40:56.269Z" }, ] +[[package]] +name = "types-aiofiles" +version = "24.1.0.20250606" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/64/6e/fac4ffc896cb3faf2ac5d23747b65dd8bae1d9ee23305d1a3b12111c3989/types_aiofiles-24.1.0.20250606.tar.gz", hash = "sha256:48f9e26d2738a21e0b0f19381f713dcdb852a36727da8414b1ada145d40a18fe", size = 14364, upload-time = "2025-06-06T03:09:26.515Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/71/de/f2fa2ab8a5943898e93d8036941e05bfd1e1f377a675ee52c7c307dccb75/types_aiofiles-24.1.0.20250606-py3-none-any.whl", hash = "sha256:e568c53fb9017c80897a9aa15c74bf43b7ee90e412286ec1e0912b6e79301aee", size = 14276, upload-time = "2025-06-06T03:09:25.662Z" }, +] + [[package]] name = "typing-extensions" version = "4.13.2"