Skip to content

Commit 15bd35e

Browse files
committed
fix: format Python files with ruff
_Generated with `cmux`_
1 parent 42c8bff commit 15bd35e

File tree

3 files changed

+10
-18
lines changed

3 files changed

+10
-18
lines changed

benchmarks/terminal_bench/calculate_timeout.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,18 @@ def main():
3535
default="flag",
3636
help="Output format: 'seconds' (just the number) or 'flag' (--global-agent-timeout-sec VALUE)",
3737
)
38-
38+
3939
args = parser.parse_args()
40-
40+
4141
if args.task_ids:
4242
timeout = get_max_timeout_for_tasks(args.task_ids)
4343
else:
4444
# No specific tasks - use conservative default for full suite
4545
timeout = VERY_SLOW_TIMEOUT
46-
46+
4747
# Apply multiplier
4848
timeout = int(timeout * args.multiplier)
49-
49+
5050
if args.format == "seconds":
5151
print(timeout)
5252
else:

benchmarks/terminal_bench/task_timeouts.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
1111
Strategy:
1212
- FAST tasks (< 5 min): Simple file operations, basic commands
13-
- NORMAL tasks (15 min): Default for most tasks
13+
- NORMAL tasks (15 min): Default for most tasks
1414
- SLOW tasks (30 min): Data processing, model training, complex analysis
1515
- VERY_SLOW tasks (60 min): Kernel compilation, large builds
1616
"""
@@ -27,7 +27,6 @@
2727
"build-linux-kernel-qemu": VERY_SLOW_TIMEOUT, # Failed at 763s
2828
"build-initramfs-qemu": VERY_SLOW_TIMEOUT,
2929
"build-tcc-qemu": SLOW_TIMEOUT,
30-
3130
# SLOW: Data processing, ML training, complex analysis
3231
"count-dataset-tokens": SLOW_TIMEOUT, # Anthropic timed out at 808s, OpenAI succeeded at 344s
3332
"train-fasttext": SLOW_TIMEOUT, # Timed out at 900s
@@ -36,46 +35,39 @@
3635
"eval-mteb": SLOW_TIMEOUT,
3736
"eval-mteb.hard": SLOW_TIMEOUT,
3837
"reshard-c4-data": SLOW_TIMEOUT,
39-
4038
# SLOW: QEMU/emulation tasks
4139
"qemu-startup": SLOW_TIMEOUT, # Passed at 838s but hit timeout
4240
"qemu-alpine-ssh": SLOW_TIMEOUT,
4341
"run-pdp11-code": SLOW_TIMEOUT,
44-
4542
# SLOW: Complex algorithmic tasks
4643
"blind-maze-explorer-algorithm": SLOW_TIMEOUT,
4744
"blind-maze-explorer-algorithm.easy": SLOW_TIMEOUT,
4845
"blind-maze-explorer-algorithm.hard": SLOW_TIMEOUT, # Passed at 1200s!
4946
"path-tracing": SLOW_TIMEOUT, # Passed at 660s
5047
"path-tracing-reverse": SLOW_TIMEOUT, # Timed out at 660s
51-
5248
# SLOW: Security/crypto tasks that may need brute force
5349
"crack-7z-hash": SLOW_TIMEOUT,
5450
"crack-7z-hash.hard": SLOW_TIMEOUT,
5551
"password-recovery": SLOW_TIMEOUT,
5652
"security-vulhub-minio": SLOW_TIMEOUT,
57-
5853
# SLOW: Complex git/code analysis
5954
"git-workflow-hack": SLOW_TIMEOUT, # Passed but hit timeout
6055
"pytorch-model-cli": SLOW_TIMEOUT, # Passed at 541s
6156
"swe-bench-astropy-1": SLOW_TIMEOUT,
6257
"swe-bench-astropy-2": SLOW_TIMEOUT,
6358
"swe-bench-fsspec": SLOW_TIMEOUT,
6459
"swe-bench-langcodes": SLOW_TIMEOUT,
65-
6660
# SLOW: Compilation/code generation
6761
"gpt2-codegolf": SLOW_TIMEOUT,
6862
"polyglot-c-py": SLOW_TIMEOUT,
6963
"polyglot-rust-c": SLOW_TIMEOUT,
7064
"write-compressor": SLOW_TIMEOUT,
71-
7265
# SLOW: Complex system tasks
7366
"cron-broken-network": SLOW_TIMEOUT,
7467
"oom": SLOW_TIMEOUT,
7568
"fibonacci-server": SLOW_TIMEOUT,
7669
"incompatible-python-fasttext.base_with_hint": SLOW_TIMEOUT,
7770
"extract-safely": SLOW_TIMEOUT,
78-
7971
# FAST: Simple tasks that should complete quickly
8072
"hello-world": FAST_TIMEOUT,
8173
"fix-permissions": FAST_TIMEOUT,
@@ -99,5 +91,5 @@ def get_max_timeout_for_tasks(task_ids: list[str]) -> int:
9991
"""
10092
if not task_ids:
10193
return VERY_SLOW_TIMEOUT # Conservative default for unknown tasks
102-
94+
10395
return max(get_timeout_for_task(task_id) for task_id in task_ids)

benchmarks/terminal_bench/task_timeouts_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,15 @@ def test_max_timeout_for_tasks():
4242
# Mix of fast and slow
4343
tasks = ["hello-world", "count-dataset-tokens"]
4444
assert get_max_timeout_for_tasks(tasks) == SLOW_TIMEOUT
45-
45+
4646
# Mix of fast, slow, and very slow
4747
tasks = ["hello-world", "count-dataset-tokens", "build-linux-kernel-qemu"]
4848
assert get_max_timeout_for_tasks(tasks) == VERY_SLOW_TIMEOUT
49-
49+
5050
# All fast
5151
tasks = ["hello-world", "simple-web-scraper"]
5252
assert get_max_timeout_for_tasks(tasks) == FAST_TIMEOUT
53-
53+
5454
# Empty list should return conservative default
5555
assert get_max_timeout_for_tasks([]) == VERY_SLOW_TIMEOUT
5656

@@ -61,6 +61,6 @@ def test_timeout_values():
6161
assert NORMAL_TIMEOUT == 900 # 15 minutes
6262
assert SLOW_TIMEOUT == 1800 # 30 minutes
6363
assert VERY_SLOW_TIMEOUT == 3600 # 60 minutes
64-
64+
6565
# Ensure proper ordering
6666
assert FAST_TIMEOUT < NORMAL_TIMEOUT < SLOW_TIMEOUT < VERY_SLOW_TIMEOUT

0 commit comments

Comments
 (0)