Skip to content

Commit d404964

Browse files
authored
Merge pull request #112 from HKUDS/fix/grep-limit-overrun-merge-main
fix(grep): handle long rg lines without crashing
2 parents a5c2949 + 7a29e66 commit d404964

2 files changed

Lines changed: 50 additions & 4 deletions

File tree

src/openharness/tools/grep_tool.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ async def _rg_grep(
183183
cwd=str(root),
184184
stdout=asyncio.subprocess.PIPE,
185185
stderr=asyncio.subprocess.PIPE,
186+
limit=8 * 1024 * 1024,
186187
)
187188

188189
matches: list[str] = []
@@ -239,6 +240,7 @@ async def _rg_grep_file(
239240
cwd=str(path.parent),
240241
stdout=asyncio.subprocess.PIPE,
241242
stderr=asyncio.subprocess.PIPE,
243+
limit=8 * 1024 * 1024,
242244
)
243245

244246
matches: list[str] = []
@@ -282,7 +284,10 @@ async def _collect_rg_matches(
282284
) -> None:
283285
assert process.stdout is not None
284286
while len(matches) < limit:
285-
raw = await process.stdout.readline()
287+
try:
288+
raw = await process.stdout.readline()
289+
except ValueError:
290+
continue
286291
if not raw:
287292
break
288293
line = raw.decode("utf-8", errors="replace").rstrip("\n")
@@ -300,7 +305,10 @@ async def _collect_rg_file_matches(
300305
) -> None:
301306
assert process.stdout is not None
302307
while len(matches) < limit:
303-
raw = await process.stdout.readline()
308+
try:
309+
raw = await process.stdout.readline()
310+
except ValueError:
311+
continue
304312
if not raw:
305313
break
306314
line = raw.decode("utf-8", errors="replace").rstrip("\n")

tests/test_tools/test_grep_tool.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,20 @@ async def readline(self):
1212
return b""
1313

1414

15-
class _FakeProcess:
15+
class _ValueErrorThenEofStdout:
1616
def __init__(self):
17-
self.stdout = _FakeStdout()
17+
self.calls = 0
18+
19+
async def readline(self):
20+
self.calls += 1
21+
if self.calls == 1:
22+
raise ValueError("Separator is not found, and chunk exceed the limit")
23+
return b""
24+
25+
26+
class _FakeProcess:
27+
def __init__(self, stdout=None):
28+
self.stdout = stdout or _FakeStdout()
1829
self.stderr = None
1930
self.returncode = None
2031
self.terminated = False
@@ -54,3 +65,30 @@ async def fake_create_subprocess_exec(*args, **kwargs):
5465
assert result.is_error is True
5566
assert "grep timed out after 1 seconds" in result.output
5667
assert fake_process.terminated or fake_process.killed
68+
69+
70+
@pytest.mark.asyncio
71+
async def test_grep_tool_uses_large_stream_limit_and_skips_valueerror(monkeypatch, tmp_path: Path):
72+
tool = GrepTool()
73+
monkeypatch.setattr("openharness.tools.grep_tool.shutil.which", lambda _: "/usr/bin/rg")
74+
fake_process = _FakeProcess(stdout=_ValueErrorThenEofStdout())
75+
seen_kwargs = {}
76+
77+
async def fake_create_subprocess_exec(*args, **kwargs):
78+
seen_kwargs.update(kwargs)
79+
fake_process.returncode = 1
80+
return fake_process
81+
82+
monkeypatch.setattr(
83+
"openharness.tools.grep_tool.asyncio.create_subprocess_exec",
84+
fake_create_subprocess_exec,
85+
)
86+
87+
result = await tool.execute(
88+
GrepToolInput(pattern="foo"),
89+
type("Ctx", (), {"cwd": tmp_path})(),
90+
)
91+
92+
assert result.is_error is False
93+
assert result.output == "(no matches)"
94+
assert seen_kwargs["limit"] == 8 * 1024 * 1024

0 commit comments

Comments
 (0)