Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions apps/backend/runners/github/context_gatherer.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ class PRContext:
ai_bot_comments: list[AIBotComment] = field(default_factory=list)
# Flag indicating if full diff was skipped (PR > 20K lines)
diff_truncated: bool = False
# Commit SHAs for worktree creation (PR review isolation)
head_sha: str = "" # Commit SHA of PR head (headRefOid)
base_sha: str = "" # Commit SHA of PR base (baseRefOid)


class PRContextGatherer:
Expand Down Expand Up @@ -291,6 +294,8 @@ async def gather(self) -> PRContext:
total_deletions=pr_data.get("deletions", 0),
ai_bot_comments=ai_bot_comments,
diff_truncated=diff_truncated,
head_sha=pr_data.get("headRefOid", ""),
base_sha=pr_data.get("baseRefOid", ""),
)

async def _fetch_pr_metadata(self) -> dict:
Expand Down
161 changes: 155 additions & 6 deletions apps/backend/runners/github/services/parallel_orchestrator_reviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
import hashlib
import logging
import os
import shutil
import subprocess
import uuid
from pathlib import Path
from typing import Any

Expand Down Expand Up @@ -60,6 +63,9 @@
# Check if debug mode is enabled
DEBUG_MODE = os.environ.get("DEBUG", "").lower() in ("true", "1", "yes")

# Directory for PR review worktrees (project-local, same filesystem)
PR_WORKTREE_DIR = ".auto-claude/pr-review-worktrees"


class ParallelOrchestratorReviewer:
"""
Expand Down Expand Up @@ -116,6 +122,115 @@ def _load_prompt(self, filename: str) -> str:
logger.warning(f"Prompt file not found: {prompt_file}")
return ""

def _create_pr_worktree(self, head_sha: str, pr_number: int) -> Path:
"""Create a temporary worktree at the PR head commit.

Args:
head_sha: The commit SHA of the PR head
pr_number: The PR number for naming

Returns:
Path to the created worktree

Raises:
RuntimeError: If worktree creation fails
"""
worktree_name = f"pr-{pr_number}-{uuid.uuid4().hex[:8]}"
worktree_dir = self.project_dir / PR_WORKTREE_DIR
worktree_dir.mkdir(parents=True, exist_ok=True)
worktree_path = worktree_dir / worktree_name

# Fetch the commit if not available locally (handles fork PRs)
subprocess.run(
["git", "fetch", "origin", head_sha],
cwd=self.project_dir,
capture_output=True,
timeout=60,
)
Comment on lines +144 to +149
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add error handling for git fetch command.

The git fetch operation doesn't verify success. If the fetch fails (network issues, invalid SHA, permissions), the subsequent git worktree add will fail with a less informative error message.

🔎 Proposed fix
-        subprocess.run(
+        fetch_result = subprocess.run(
             ["git", "fetch", "origin", head_sha],
             cwd=self.project_dir,
             capture_output=True,
+            text=True,
             timeout=60,
         )
+        
+        if fetch_result.returncode != 0:
+            raise RuntimeError(
+                f"Failed to fetch commit {head_sha}: {fetch_result.stderr}"
+            )
🤖 Prompt for AI Agents
In apps/backend/runners/github/services/parallel_orchestrator_reviewer.py around
lines 144 to 149, the git fetch subprocess call does not check for failure;
change the invocation to detect errors (either pass check=True or inspect the
CompletedProcess.returncode), capture and log stderr/stdout on failure, and
raise or return a clear exception so callers get a meaningful message (include
head_sha and project_dir in the log). Ensure timeout remains and include the
subprocess output in the logged/raised error to aid debugging.


# Create detached worktree at the PR commit
result = subprocess.run(
["git", "worktree", "add", "--detach", str(worktree_path), head_sha],
cwd=self.project_dir,
capture_output=True,
text=True,
)
Comment on lines +152 to +157
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add timeout to git worktree add command.

The git worktree add command lacks a timeout, which could cause the review process to hang indefinitely if the operation stalls.

🔎 Proposed fix
         result = subprocess.run(
             ["git", "worktree", "add", "--detach", str(worktree_path), head_sha],
             cwd=self.project_dir,
             capture_output=True,
             text=True,
+            timeout=60,
         )
🤖 Prompt for AI Agents
In apps/backend/runners/github/services/parallel_orchestrator_reviewer.py around
lines 152 to 157, the subprocess.run call invoking "git worktree add" has no
timeout and can hang; add a timeout argument (e.g., timeout=60 or a configurable
constant), wrap the call in a try/except catching subprocess.TimeoutExpired, log
an error including the timeout and command output, and abort/return failure in
that branch so the reviewer process does not hang indefinitely.


if result.returncode != 0:
raise RuntimeError(f"Failed to create worktree: {result.stderr}")

logger.info(f"[PRReview] Created worktree at {worktree_path}")
print(f"[PRReview] Created worktree at {worktree_path.name}", flush=True)
return worktree_path

def _cleanup_pr_worktree(self, worktree_path: Path) -> None:
"""Remove a temporary PR review worktree with fallback chain.

Args:
worktree_path: Path to the worktree to remove
"""
if not worktree_path or not worktree_path.exists():
return

# Try 1: git worktree remove
result = subprocess.run(
["git", "worktree", "remove", "--force", str(worktree_path)],
cwd=self.project_dir,
capture_output=True,
)

if result.returncode == 0:
logger.info(f"[PRReview] Cleaned up worktree: {worktree_path.name}")
print(f"[PRReview] Cleaned up worktree: {worktree_path.name}", flush=True)
return

# Try 2: shutil.rmtree fallback
try:
shutil.rmtree(worktree_path, ignore_errors=True)
subprocess.run(
["git", "worktree", "prune"],
cwd=self.project_dir,
capture_output=True,
)
logger.warning(f"[PRReview] Used shutil fallback for: {worktree_path.name}")
except Exception as e:
logger.error(f"[PRReview] Failed to cleanup worktree {worktree_path}: {e}")

def _cleanup_stale_pr_worktrees(self) -> None:
"""Clean up orphaned PR review worktrees on startup."""
worktree_dir = self.project_dir / PR_WORKTREE_DIR
if not worktree_dir.exists():
return

# Get registered worktrees from git
result = subprocess.run(
["git", "worktree", "list", "--porcelain"],
cwd=self.project_dir,
capture_output=True,
text=True,
)
registered = {
Path(line.split(" ", 1)[1])
for line in result.stdout.split("\n")
if line.startswith("worktree ")
}
Comment on lines +212 to +216
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Add defensive parsing for git worktree list output.

Line 213 assumes each line starting with "worktree " has a space and path component. If the git output is malformed or the format changes, this will raise an IndexError.

🔎 Proposed fix
         registered = {
-            Path(line.split(" ", 1)[1])
+            Path(parts[1])
             for line in result.stdout.split("\n")
-            if line.startswith("worktree ")
+            if line.startswith("worktree ") and (parts := line.split(" ", 1)) and len(parts) == 2
         }
🤖 Prompt for AI Agents
In apps/backend/runners/github/services/parallel_orchestrator_reviewer.py around
lines 212 to 216, the current comprehension assumes every line that starts with
"worktree " contains a space and a valid path and may raise IndexError on
malformed git output; change the parsing to be defensive by iterating lines,
trimming whitespace, skipping empty lines, verifying the line starts with
"worktree " and that splitting with maxsplit=1 yields a non-empty path component
(or use str.partition(" ") and check the third part), and only then construct
Path(path); optionally log or ignore malformed entries rather than letting an
exception propagate.


# Remove unregistered directories
stale_count = 0
for item in worktree_dir.iterdir():
if item.is_dir() and item not in registered:
logger.info(f"[PRReview] Removing stale worktree: {item.name}")
shutil.rmtree(item, ignore_errors=True)
stale_count += 1

if stale_count > 0:
subprocess.run(
["git", "worktree", "prune"],
cwd=self.project_dir,
capture_output=True,
)
print(f"[PRReview] Cleaned up {stale_count} stale worktree(s)", flush=True)

def _define_specialist_agents(self) -> dict[str, AgentDefinition]:
"""
Define specialist agents for the SDK.
Expand Down Expand Up @@ -400,6 +515,12 @@ async def review(self, context: PRContext) -> PRReviewResult:
f"[ParallelOrchestrator] Starting review for PR #{context.pr_number}"
)

# Clean up any stale worktrees from previous runs
self._cleanup_stale_pr_worktrees()

# Track worktree for cleanup
worktree_path: Path | None = None

try:
self._report_progress(
"orchestrating",
Expand All @@ -411,12 +532,36 @@ async def review(self, context: PRContext) -> PRReviewResult:
# Build orchestrator prompt
prompt = self._build_orchestrator_prompt(context)

# Get project root
project_root = (
self.project_dir.parent.parent
if self.project_dir.name == "backend"
else self.project_dir
)
# Create temporary worktree at PR head commit for isolated review
# This ensures agents read from the correct PR state, not the current checkout
head_sha = context.head_sha or context.head_branch
if not head_sha:
logger.warning(
"[ParallelOrchestrator] No head_sha available, using current checkout"
)
# Fallback to original behavior if no SHA available
project_root = (
self.project_dir.parent.parent
if self.project_dir.name == "backend"
else self.project_dir
)
else:
try:
worktree_path = self._create_pr_worktree(
head_sha, context.pr_number
)
project_root = worktree_path
except RuntimeError as e:
logger.warning(
f"[ParallelOrchestrator] Worktree creation failed, "
f"using current checkout: {e}"
)
# Fallback to original behavior if worktree creation fails
project_root = (
self.project_dir.parent.parent
if self.project_dir.name == "backend"
else self.project_dir
)
Comment on lines +535 to +564
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Refactor duplicate project_root calculation logic.

The project_root fallback calculation appears twice (lines 543-546 and 560-563) with identical logic. This violates the DRY principle and makes the code harder to maintain.

🔎 Proposed refactor
+            # Calculate fallback project root
+            def get_fallback_project_root() -> Path:
+                return (
+                    self.project_dir.parent.parent
+                    if self.project_dir.name == "backend"
+                    else self.project_dir
+                )
+
             # Create temporary worktree at PR head commit for isolated review
             # This ensures agents read from the correct PR state, not the current checkout
             head_sha = context.head_sha or context.head_branch
             if not head_sha:
                 logger.warning(
                     "[ParallelOrchestrator] No head_sha available, using current checkout"
                 )
-                # Fallback to original behavior if no SHA available
-                project_root = (
-                    self.project_dir.parent.parent
-                    if self.project_dir.name == "backend"
-                    else self.project_dir
-                )
+                project_root = get_fallback_project_root()
             else:
                 try:
                     worktree_path = self._create_pr_worktree(
                         head_sha, context.pr_number
                     )
                     project_root = worktree_path
                 except RuntimeError as e:
                     logger.warning(
                         f"[ParallelOrchestrator] Worktree creation failed, "
                         f"using current checkout: {e}"
                     )
-                    # Fallback to original behavior if worktree creation fails
-                    project_root = (
-                        self.project_dir.parent.parent
-                        if self.project_dir.name == "backend"
-                        else self.project_dir
-                    )
+                    project_root = get_fallback_project_root()
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# Create temporary worktree at PR head commit for isolated review
# This ensures agents read from the correct PR state, not the current checkout
head_sha = context.head_sha or context.head_branch
if not head_sha:
logger.warning(
"[ParallelOrchestrator] No head_sha available, using current checkout"
)
# Fallback to original behavior if no SHA available
project_root = (
self.project_dir.parent.parent
if self.project_dir.name == "backend"
else self.project_dir
)
else:
try:
worktree_path = self._create_pr_worktree(
head_sha, context.pr_number
)
project_root = worktree_path
except RuntimeError as e:
logger.warning(
f"[ParallelOrchestrator] Worktree creation failed, "
f"using current checkout: {e}"
)
# Fallback to original behavior if worktree creation fails
project_root = (
self.project_dir.parent.parent
if self.project_dir.name == "backend"
else self.project_dir
)
# Calculate fallback project root
def get_fallback_project_root() -> Path:
return (
self.project_dir.parent.parent
if self.project_dir.name == "backend"
else self.project_dir
)
# Create temporary worktree at PR head commit for isolated review
# This ensures agents read from the correct PR state, not the current checkout
head_sha = context.head_sha or context.head_branch
if not head_sha:
logger.warning(
"[ParallelOrchestrator] No head_sha available, using current checkout"
)
project_root = get_fallback_project_root()
else:
try:
worktree_path = self._create_pr_worktree(
head_sha, context.pr_number
)
project_root = worktree_path
except RuntimeError as e:
logger.warning(
f"[ParallelOrchestrator] Worktree creation failed, "
f"using current checkout: {e}"
)
project_root = get_fallback_project_root()
🤖 Prompt for AI Agents
In apps/backend/runners/github/services/parallel_orchestrator_reviewer.py around
lines 535 to 564, the fallback project_root calculation is duplicated in both
the head_sha-missing branch and the worktree-creation-exception branch; extract
that fallback computation into a single variable or small helper (e.g.,
compute_fallback_project_root or fallback_project_root =
(self.project_dir.parent.parent if self.project_dir.name == "backend" else
self.project_dir)) defined before the if/else, then use that variable in both
places so the logic is only defined once and behavior remains identical.


# Use model and thinking level from config (user settings)
model = self.config.model or "claude-sonnet-4-5-20250929"
Expand Down Expand Up @@ -559,6 +704,10 @@ async def review(self, context: PRContext) -> PRReviewResult:
success=False,
error=str(e),
)
finally:
# Always cleanup worktree, even on error
if worktree_path:
self._cleanup_pr_worktree(worktree_path)

def _parse_structured_output(
self, structured_output: dict[str, Any]
Expand Down
Loading