diff --git a/.github/workflows/pr-status-gate.yml b/.github/workflows/pr-status-gate.yml
index 4220cb644b..f79eb16936 100644
--- a/.github/workflows/pr-status-gate.yml
+++ b/.github/workflows/pr-status-gate.yml
@@ -2,7 +2,7 @@ name: PR Status Gate
 
 on:
   workflow_run:
-    workflows: [CI, Lint, Quality Security, CLA Assistant, Quality Commit Lint]
+    workflows: [CI, Lint, Quality Security]
     types: [completed]
 
 permissions:
@@ -36,7 +36,7 @@ jobs:
             // To find check names: Go to PR → Checks tab → copy exact name
             // To update: Edit this list when workflow jobs are added/renamed/removed
             //
-            // Last validated: 2025-12-26
+            // Last validated: 2025-12-31
             // ═══════════════════════════════════════════════════════════════════════
             const requiredChecks = [
               // CI workflow (ci.yml) - 3 checks
@@ -49,11 +49,7 @@ jobs:
               'Quality Security / CodeQL (javascript-typescript) (pull_request)',
               'Quality Security / CodeQL (python) (pull_request)',
               'Quality Security / Python Security (Bandit) (pull_request)',
-              'Quality Security / Security Summary (pull_request)',
-              // CLA Assistant workflow (cla.yml) - 1 check
-              'CLA Assistant / CLA Check',
-              // Quality Commit Lint workflow (quality-commit-lint.yml) - 1 check
-              'Quality Commit Lint / Conventional Commits (pull_request)'
+              'Quality Security / Security Summary (pull_request)'
             ];
 
             const statusLabels = {
diff --git a/.husky/pre-commit b/.husky/pre-commit
index 4dca91b56e..05dd657f3c 100755
--- a/.husky/pre-commit
+++ b/.husky/pre-commit
@@ -36,14 +36,44 @@ if git diff --cached --name-only | grep -q "^package.json$"; then
       echo "  Updated apps/backend/__init__.py to $VERSION"
     fi
 
-    # Sync to README.md
+    # Sync to README.md - section-aware updates (stable vs beta)
     if [ -f "README.md" ]; then
       # Escape hyphens for shields.io badge format (shields.io uses -- for literal hyphens)
       ESCAPED_VERSION=$(echo "$VERSION" | sed 's/-/--/g')
-      # Update version badge - match both stable (X.Y.Z) and prerelease (X.Y.Z-prerelease.N or X.Y.Z--prerelease.N)
-      sed -i.bak "s/version-[0-9]*\.[0-9]*\.[0-9]*\(-\{1,2\}[a-z]*\.[0-9]*\)*-blue/version-$ESCAPED_VERSION-blue/g" README.md
-      # Update download links - match both stable and prerelease versions
-      sed -i.bak "s/Auto-Claude-[0-9]*\.[0-9]*\.[0-9]*\(-[a-z]*\.[0-9]*\)*/Auto-Claude-$VERSION/g" README.md
+
+      # Detect if this is a prerelease (contains - after base version, e.g., 2.7.2-beta.10)
+      if echo "$VERSION" | grep -q '-'; then
+        # PRERELEASE: Update only beta sections
+        echo "  Detected PRERELEASE version: $VERSION"
+
+        # Update beta version badge (orange)
+        sed -i.bak "s/beta-[0-9]*\.[0-9]*\.[0-9]*\(--[a-z]*\.[0-9]*\)*-orange/beta-$ESCAPED_VERSION-orange/g" README.md
+
+        # Update beta version badge link (within BETA_VERSION_BADGE section)
+        sed -i.bak '/<!-- BETA_VERSION_BADGE -->/,/<!-- BETA_VERSION_BADGE_END -->/s|releases/tag/v[0-9.a-z-]*)|releases/tag/v'"$VERSION"')|g' README.md
+
+        # Update beta download links (within BETA_DOWNLOADS section only)
+        for SUFFIX in "win32-x64.exe" "darwin-arm64.dmg" "darwin-x64.dmg" "linux-x86_64.AppImage" "linux-amd64.deb" "linux-x86_64.flatpak"; do
+          sed -i.bak '/<!-- BETA_DOWNLOADS -->/,/<!-- BETA_DOWNLOADS_END -->/{s|Auto-Claude-[0-9.a-z-]*-'"$SUFFIX"'](https://github.com/AndyMik90/Auto-Claude/releases/download/v[^/]*/Auto-Claude-[^)]*-'"$SUFFIX"')|Auto-Claude-'"$VERSION"'-'"$SUFFIX"'](https://github.com/AndyMik90/Auto-Claude/releases/download/v'"$VERSION"'/Auto-Claude-'"$VERSION"'-'"$SUFFIX"')|g}' README.md
+        done
+      else
+        # STABLE: Update stable sections and top badge
+        echo "  Detected STABLE version: $VERSION"
+
+        # Update top version badge (blue) - within TOP_VERSION_BADGE section
+        sed -i.bak '/<!-- TOP_VERSION_BADGE -->/,/<!-- TOP_VERSION_BADGE_END -->/s/version-[0-9]*\.[0-9]*\.[0-9]*\(--[a-z]*\.[0-9]*\)*-blue/version-'"$ESCAPED_VERSION"'-blue/g' README.md
+        sed -i.bak '/<!-- TOP_VERSION_BADGE -->/,/<!-- TOP_VERSION_BADGE_END -->/s|releases/tag/v[0-9.a-z-]*)|releases/tag/v'"$VERSION"')|g' README.md
+
+        # Update stable version badge (blue) - within STABLE_VERSION_BADGE section
+        sed -i.bak '/<!-- STABLE_VERSION_BADGE -->/,/<!-- STABLE_VERSION_BADGE_END -->/s/stable-[0-9]*\.[0-9]*\.[0-9]*\(--[a-z]*\.[0-9]*\)*-blue/stable-'"$ESCAPED_VERSION"'-blue/g' README.md
+        sed -i.bak '/<!-- STABLE_VERSION_BADGE -->/,/<!-- STABLE_VERSION_BADGE_END -->/s|releases/tag/v[0-9.a-z-]*)|releases/tag/v'"$VERSION"')|g' README.md
+
+        # Update stable download links (within STABLE_DOWNLOADS section only)
+        for SUFFIX in "win32-x64.exe" "darwin-arm64.dmg" "darwin-x64.dmg" "linux-x86_64.AppImage" "linux-amd64.deb"; do
+          sed -i.bak '/<!-- STABLE_DOWNLOADS -->/,/<!-- STABLE_DOWNLOADS_END -->/{s|Auto-Claude-[0-9.a-z-]*-'"$SUFFIX"'](https://github.com/AndyMik90/Auto-Claude/releases/download/v[^/]*/Auto-Claude-[^)]*-'"$SUFFIX"')|Auto-Claude-'"$VERSION"'-'"$SUFFIX"'](https://github.com/AndyMik90/Auto-Claude/releases/download/v'"$VERSION"'/Auto-Claude-'"$VERSION"'-'"$SUFFIX"')|g}' README.md
+        done
+      fi
+
       rm -f README.md.bak
       git add README.md
       echo "  Updated README.md to $VERSION"
@@ -72,20 +102,25 @@ if git diff --cached --name-only | grep -q "^apps/backend/.*\.py$"; then
   fi
 
   if [ -n "$RUFF" ]; then
-    # Run ruff linting (auto-fix)
-    echo "Running ruff lint..."
-    $RUFF check apps/backend/ --fix
-    if [ $? -ne 0 ]; then
-      echo "Ruff lint failed. Please fix Python linting errors before committing."
-      exit 1
+    # Get only staged Python files in apps/backend (process only what's being committed)
+    STAGED_PY_FILES=$(git diff --cached --name-only --diff-filter=ACM | grep "^apps/backend/.*\.py$" || true)
+
+    if [ -n "$STAGED_PY_FILES" ]; then
+      # Run ruff linting (auto-fix) only on staged files
+      echo "Running ruff lint on staged files..."
+      echo "$STAGED_PY_FILES" | xargs $RUFF check --fix
+      if [ $? -ne 0 ]; then
+        echo "Ruff lint failed. Please fix Python linting errors before committing."
+        exit 1
+      fi
+
+      # Run ruff format (auto-fix) only on staged files
+      echo "Running ruff format on staged files..."
+      echo "$STAGED_PY_FILES" | xargs $RUFF format
+
+      # Re-stage only the files that were originally staged (in case ruff modified them)
+      echo "$STAGED_PY_FILES" | xargs git add
     fi
-
-    # Run ruff format (auto-fix)
-    echo "Running ruff format..."
-    $RUFF format apps/backend/
-
-    # Stage any files that were auto-fixed by ruff (POSIX-compliant)
-    find apps/backend -name "*.py" -type f -exec git add {} + 2>/dev/null || true
   else
     echo "Warning: ruff not found, skipping Python linting. Install with: uv pip install ruff"
   fi
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ac44d62a3a..f67b77c813 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -25,14 +25,41 @@ repos:
               # Sync to apps/backend/__init__.py
               sed -i.bak "s/__version__ = \"[^\"]*\"/__version__ = \"$VERSION\"/" apps/backend/__init__.py && rm -f apps/backend/__init__.py.bak
 
-              # Sync to README.md - shields.io version badge (text and URL)
+              # Sync to README.md - section-aware updates (stable vs beta)
               ESCAPED_VERSION=$(echo "$VERSION" | sed 's/-/--/g')
-              sed -i.bak -e "s/version-[0-9]*\.[0-9]*\.[0-9]*\(-\{1,2\}[a-z]*\.[0-9]*\)*-blue/version-$ESCAPED_VERSION-blue/g" -e "s|releases/tag/v[0-9.a-z-]*)|releases/tag/v$VERSION)|g" README.md
 
-              # Sync to README.md - download links with correct filenames and URLs
-              for SUFFIX in "win32-x64.exe" "darwin-arm64.dmg" "darwin-x64.dmg" "linux-x86_64.AppImage" "linux-amd64.deb"; do
-                sed -i.bak "s|Auto-Claude-[0-9.a-z-]*-${SUFFIX}](https://github.com/AndyMik90/Auto-Claude/releases/download/v[^/]*/Auto-Claude-[^)]*-${SUFFIX})|Auto-Claude-${VERSION}-${SUFFIX}](https://github.com/AndyMik90/Auto-Claude/releases/download/v${VERSION}/Auto-Claude-${VERSION}-${SUFFIX})|g" README.md
-              done
+              # Detect if this is a prerelease (contains - after base version)
+              if echo "$VERSION" | grep -q '-'; then
+                # PRERELEASE: Update only beta sections
+                echo "  Detected PRERELEASE version: $VERSION"
+
+                # Update beta version badge (orange)
+                sed -i.bak "s/beta-[0-9]*\.[0-9]*\.[0-9]*\(--[a-z]*\.[0-9]*\)*-orange/beta-$ESCAPED_VERSION-orange/g" README.md
+
+                # Update beta version badge link
+                sed -i.bak '/<!-- BETA_VERSION_BADGE -->/,/<!-- BETA_VERSION_BADGE_END -->/s|releases/tag/v[0-9.a-z-]*)|releases/tag/v'"$VERSION"')|g' README.md
+
+                # Update beta download links (within BETA_DOWNLOADS section only)
+                for SUFFIX in "win32-x64.exe" "darwin-arm64.dmg" "darwin-x64.dmg" "linux-x86_64.AppImage" "linux-amd64.deb" "linux-x86_64.flatpak"; do
+                  sed -i.bak '/<!-- BETA_DOWNLOADS -->/,/<!-- BETA_DOWNLOADS_END -->/{s|Auto-Claude-[0-9.a-z-]*-'"$SUFFIX"'](https://github.com/AndyMik90/Auto-Claude/releases/download/v[^/]*/Auto-Claude-[^)]*-'"$SUFFIX"')|Auto-Claude-'"$VERSION"'-'"$SUFFIX"'](https://github.com/AndyMik90/Auto-Claude/releases/download/v'"$VERSION"'/Auto-Claude-'"$VERSION"'-'"$SUFFIX"')|g}' README.md
+                done
+              else
+                # STABLE: Update stable sections and top badge
+                echo "  Detected STABLE version: $VERSION"
+
+                # Update top version badge (blue)
+                sed -i.bak '/<!-- TOP_VERSION_BADGE -->/,/<!-- TOP_VERSION_BADGE_END -->/s/version-[0-9]*\.[0-9]*\.[0-9]*\(--[a-z]*\.[0-9]*\)*-blue/version-'"$ESCAPED_VERSION"'-blue/g' README.md
+                sed -i.bak '/<!-- TOP_VERSION_BADGE -->/,/<!-- TOP_VERSION_BADGE_END -->/s|releases/tag/v[0-9.a-z-]*)|releases/tag/v'"$VERSION"')|g' README.md
+
+                # Update stable version badge (blue)
+                sed -i.bak '/<!-- STABLE_VERSION_BADGE -->/,/<!-- STABLE_VERSION_BADGE_END -->/s/stable-[0-9]*\.[0-9]*\.[0-9]*\(--[a-z]*\.[0-9]*\)*-blue/stable-'"$ESCAPED_VERSION"'-blue/g' README.md
+                sed -i.bak '/<!-- STABLE_VERSION_BADGE -->/,/<!-- STABLE_VERSION_BADGE_END -->/s|releases/tag/v[0-9.a-z-]*)|releases/tag/v'"$VERSION"')|g' README.md
+
+                # Update stable download links (within STABLE_DOWNLOADS section only)
+                for SUFFIX in "win32-x64.exe" "darwin-arm64.dmg" "darwin-x64.dmg" "linux-x86_64.AppImage" "linux-amd64.deb"; do
+                  sed -i.bak '/<!-- STABLE_DOWNLOADS -->/,/<!-- STABLE_DOWNLOADS_END -->/{s|Auto-Claude-[0-9.a-z-]*-'"$SUFFIX"'](https://github.com/AndyMik90/Auto-Claude/releases/download/v[^/]*/Auto-Claude-[^)]*-'"$SUFFIX"')|Auto-Claude-'"$VERSION"'-'"$SUFFIX"'](https://github.com/AndyMik90/Auto-Claude/releases/download/v'"$VERSION"'/Auto-Claude-'"$VERSION"'-'"$SUFFIX"')|g}' README.md
+                done
+              fi
               rm -f README.md.bak
 
               # Stage changes
diff --git a/apps/backend/__init__.py b/apps/backend/__init__.py
index 72e597cd0a..106d44d4a2 100644
--- a/apps/backend/__init__.py
+++ b/apps/backend/__init__.py
@@ -19,5 +19,5 @@
 See README.md for full documentation.
 """
 
-__version__ = "2.7.2-beta.10"
+__version__ = "2.7.2-beta.12"
 __author__ = "Auto Claude Team"
diff --git a/apps/backend/agents/session.py b/apps/backend/agents/session.py
index 39ae045893..89a5d5d48c 100644
--- a/apps/backend/agents/session.py
+++ b/apps/backend/agents/session.py
@@ -21,6 +21,7 @@
     is_build_complete,
 )
 from recovery import RecoveryManager
+from security.tool_input_validator import get_safe_tool_input
 from task_logger import (
     LogEntryType,
     LogPhase,
@@ -386,41 +387,43 @@ async def run_agent_session(
                             )
                     elif block_type == "ToolUseBlock" and hasattr(block, "name"):
                         tool_name = block.name
-                        tool_input = None
+                        tool_input_display = None
                         tool_count += 1
 
+                        # Safely extract tool input (handles None, non-dict, etc.)
+                        inp = get_safe_tool_input(block)
+
                         # Extract meaningful tool input for display
-                        if hasattr(block, "input") and block.input:
-                            inp = block.input
-                            if isinstance(inp, dict):
-                                if "pattern" in inp:
-                                    tool_input = f"pattern: {inp['pattern']}"
-                                elif "file_path" in inp:
-                                    fp = inp["file_path"]
-                                    if len(fp) > 50:
-                                        fp = "..." + fp[-47:]
-                                    tool_input = fp
-                                elif "command" in inp:
-                                    cmd = inp["command"]
-                                    if len(cmd) > 50:
-                                        cmd = cmd[:47] + "..."
-                                    tool_input = cmd
-                                elif "path" in inp:
-                                    tool_input = inp["path"]
+                        if inp:
+                            if "pattern" in inp:
+                                tool_input_display = f"pattern: {inp['pattern']}"
+                            elif "file_path" in inp:
+                                fp = inp["file_path"]
+                                if len(fp) > 50:
+                                    fp = "..." + fp[-47:]
+                                tool_input_display = fp
+                            elif "command" in inp:
+                                cmd = inp["command"]
+                                if len(cmd) > 50:
+                                    cmd = cmd[:47] + "..."
+                                tool_input_display = cmd
+                            elif "path" in inp:
+                                tool_input_display = inp["path"]
 
                         debug(
                             "session",
                             f"Tool call #{tool_count}: {tool_name}",
-                            tool_input=tool_input,
-                            full_input=str(block.input)[:500]
-                            if hasattr(block, "input")
-                            else None,
+                            tool_input=tool_input_display,
+                            full_input=str(inp)[:500] if inp else None,
                         )
 
                         # Log tool start (handles printing too)
                         if task_logger:
                             task_logger.tool_start(
-                                tool_name, tool_input, phase, print_to_console=True
+                                tool_name,
+                                tool_input_display,
+                                phase,
+                                print_to_console=True,
                             )
                         else:
                             print(f"\n[Tool: {tool_name}]", flush=True)
diff --git a/apps/backend/agents/tools_pkg/models.py b/apps/backend/agents/tools_pkg/models.py
index c1e5df5714..44d780d990 100644
--- a/apps/backend/agents/tools_pkg/models.py
+++ b/apps/backend/agents/tools_pkg/models.py
@@ -216,8 +216,9 @@ def is_electron_mcp_enabled() -> bool:
     # QA PHASES (Read + test + browser + Graphiti memory)
     # ═══════════════════════════════════════════════════════════════════════
     "qa_reviewer": {
-        # Read-only + Bash (for running tests) - reviewer should NOT edit code
-        "tools": BASE_READ_TOOLS + ["Bash"] + WEB_TOOLS,
+        # Read + Write/Edit (for QA reports and plan updates) + Bash (for tests)
+        # Note: Reviewer writes to spec directory only (qa_report.md, implementation_plan.json)
+        "tools": BASE_READ_TOOLS + BASE_WRITE_TOOLS + WEB_TOOLS,
         "mcp_servers": ["context7", "graphiti", "auto-claude", "browser"],
         "mcp_servers_optional": ["linear"],  # For updating issue status
         "auto_claude_tools": [
diff --git a/apps/backend/prompts/github/pr_finding_validator.md b/apps/backend/prompts/github/pr_finding_validator.md
new file mode 100644
index 0000000000..b054344ea9
--- /dev/null
+++ b/apps/backend/prompts/github/pr_finding_validator.md
@@ -0,0 +1,158 @@
+# Finding Validator Agent
+
+You are a finding re-investigator. For each unresolved finding from a previous PR review, you must actively investigate whether it is a REAL issue or a FALSE POSITIVE.
+
+Your job is to prevent false positives from persisting indefinitely by actually reading the code and verifying the issue exists.
+
+## Your Mission
+
+For each finding you receive:
+1. **READ** the actual code at the file/line location using the Read tool
+2. **ANALYZE** whether the described issue actually exists in the code
+3. **PROVIDE** concrete code evidence for your conclusion
+4. **RETURN** validation status with evidence
+
+## Investigation Process
+
+### Step 1: Fetch the Code
+
+Use the Read tool to get the actual code at `finding.file` around `finding.line`.
+Get sufficient context (±20 lines minimum).
+
+```
+Read the file: {finding.file}
+Focus on lines around: {finding.line}
+```
+
+### Step 2: Analyze with Fresh Eyes
+
+**Do NOT assume the original finding is correct.** Ask yourself:
+- Does the code ACTUALLY have this issue?
+- Is the described vulnerability/bug/problem present?
+- Could the original reviewer have misunderstood the code?
+- Is there context that makes this NOT an issue (e.g., sanitization elsewhere)?
+
+Be skeptical. The original review may have hallucinated this finding.
+
+### Step 3: Document Evidence
+
+You MUST provide concrete evidence:
+- **Exact code snippet** you examined (copy-paste from the file)
+- **Line numbers** where you found (or didn't find) the issue
+- **Your analysis** of whether the issue exists
+- **Confidence level** (0.0-1.0) in your conclusion
+
+## Validation Statuses
+
+### `confirmed_valid`
+Use when you verify the issue IS real:
+- The problematic code pattern exists exactly as described
+- The vulnerability/bug is present and exploitable
+- The code quality issue genuinely impacts the codebase
+
+### `dismissed_false_positive`
+Use when you verify the issue does NOT exist:
+- The described code pattern is not actually present
+- The original finding misunderstood the code
+- There is mitigating code that prevents the issue (e.g., input validation elsewhere)
+- The finding was based on incorrect assumptions
+
+### `needs_human_review`
+Use when you cannot determine with confidence:
+- The issue requires runtime analysis to verify
+- The code is too complex to analyze statically
+- You have conflicting evidence
+- Your confidence is below 0.70
+
+## Output Format
+
+Return one result per finding:
+
+```json
+{
+  "finding_id": "SEC-001",
+  "validation_status": "confirmed_valid",
+  "code_evidence": "const query = `SELECT * FROM users WHERE id = ${userId}`;",
+  "line_range": [45, 45],
+  "explanation": "SQL injection vulnerability confirmed. User input 'userId' is directly interpolated into the SQL query at line 45 without any sanitization. The query is executed via db.execute() on line 46.",
+  "confidence": 0.95
+}
+```
+
+```json
+{
+  "finding_id": "QUAL-002",
+  "validation_status": "dismissed_false_positive",
+  "code_evidence": "function processInput(data: string): string {\n  const sanitized = DOMPurify.sanitize(data);\n  return sanitized;\n}",
+  "line_range": [23, 26],
+  "explanation": "The original finding claimed XSS vulnerability, but the code uses DOMPurify.sanitize() before output. The input is properly sanitized at line 24 before being returned.",
+  "confidence": 0.88
+}
+```
+
+```json
+{
+  "finding_id": "LOGIC-003",
+  "validation_status": "needs_human_review",
+  "code_evidence": "async function handleRequest(req) {\n  // Complex async logic...\n}",
+  "line_range": [100, 150],
+  "explanation": "The original finding claims a race condition, but verifying this requires understanding the runtime behavior and concurrency model. Cannot determine statically.",
+  "confidence": 0.45
+}
+```
+
+## Confidence Guidelines
+
+Rate your confidence based on how certain you are:
+
+| Confidence | Meaning |
+|------------|---------|
+| 0.90-1.00 | Definitive evidence - code clearly shows the issue exists/doesn't exist |
+| 0.80-0.89 | Strong evidence - high confidence with minor uncertainty |
+| 0.70-0.79 | Moderate evidence - likely correct but some ambiguity |
+| 0.50-0.69 | Uncertain - use `needs_human_review` |
+| Below 0.50 | Insufficient evidence - must use `needs_human_review` |
+
+**Minimum thresholds:**
+- To confirm as `confirmed_valid`: confidence >= 0.70
+- To dismiss as `dismissed_false_positive`: confidence >= 0.80 (higher bar for dismissal)
+- If below thresholds: must use `needs_human_review`
+
+## Common False Positive Patterns
+
+Watch for these patterns that often indicate false positives:
+
+1. **Sanitization elsewhere**: Input is validated/sanitized before reaching the flagged code
+2. **Internal-only code**: Code only handles trusted internal data, not user input
+3. **Framework protection**: Framework provides automatic protection (e.g., ORM parameterization)
+4. **Dead code**: The flagged code is never executed in the current codebase
+5. **Test code**: The issue is in test files where it's acceptable
+6. **Misread syntax**: Original reviewer misunderstood the language syntax
+
+## Common Valid Issue Patterns
+
+These patterns often confirm the issue is real:
+
+1. **Direct string concatenation** in SQL/commands with user input
+2. **Missing null checks** where null values can flow through
+3. **Hardcoded credentials** that are actually used (not examples)
+4. **Missing error handling** in critical paths
+5. **Race conditions** with clear concurrent access
+
+## Critical Rules
+
+1. **ALWAYS read the actual code** - Never rely on memory or the original finding description
+2. **ALWAYS provide code_evidence** - No empty strings. Quote the actual code.
+3. **Be skeptical of original findings** - Many AI reviews produce false positives
+4. **Higher bar for dismissal** - Need 0.80 confidence to dismiss (vs 0.70 to confirm)
+5. **When uncertain, escalate** - Use `needs_human_review` rather than guessing
+6. **Look for mitigations** - Check surrounding code for sanitization/validation
+7. **Check the full context** - Read ±20 lines, not just the flagged line
+
+## Anti-Patterns to Avoid
+
+- **Trusting the original finding blindly** - Always verify
+- **Dismissing without reading code** - Must provide code_evidence
+- **Low confidence dismissals** - Needs 0.80+ confidence to dismiss
+- **Vague explanations** - Be specific about what you found
+- **Missing line numbers** - Always include line_range
diff --git a/apps/backend/prompts/github/pr_followup_orchestrator.md b/apps/backend/prompts/github/pr_followup_orchestrator.md
index 39851d864e..da2ee6b97a 100644
--- a/apps/backend/prompts/github/pr_followup_orchestrator.md
+++ b/apps/backend/prompts/github/pr_followup_orchestrator.md
@@ -35,6 +35,20 @@ You have access to these specialist agents via the Task tool:
 - Flags concerns that need addressing
 - **Invoke when**: There are comments or reviews since last review
 
+### 4. finding-validator (CRITICAL - Prevent False Positives)
+**Use for**: Re-investigating unresolved findings to validate they are real issues
+- Reads the ACTUAL CODE at the finding location with fresh eyes
+- Actively investigates whether the described issue truly exists
+- Can DISMISS findings as false positives if original review was incorrect
+- Can CONFIRM findings as valid if issue is genuine
+- Requires concrete CODE EVIDENCE for any conclusion
+- **ALWAYS invoke after resolution-verifier for ALL unresolved findings**
+- **Invoke when**: There are findings still marked as unresolved
+
+**Why this is critical**: Initial reviews may produce false positives (hallucinated issues).
+Without validation, these persist indefinitely. This agent prevents that by actually
+examining the code and determining if the issue is real.
+
 ## Workflow
 
 ### Phase 1: Analyze Scope
@@ -50,6 +64,9 @@ Based on your analysis, invoke the appropriate agents:
 
 **Always invoke** `resolution-verifier` if there are previous findings.
 
+**ALWAYS invoke** `finding-validator` for ALL unresolved findings from resolution-verifier.
+This is CRITICAL to prevent false positives from persisting.
+
 **Invoke** `new-code-reviewer` if:
 - Diff is substantial (>50 lines)
 - Changes touch security-sensitive areas
@@ -61,17 +78,28 @@ Based on your analysis, invoke the appropriate agents:
 - There are AI tool reviews to triage
 - Questions remain unanswered
 
-### Phase 3: Synthesize Results
-After agents complete:
+### Phase 3: Validate Unresolved Findings
+After resolution-verifier returns findings marked as unresolved:
+1. Pass ALL unresolved findings to finding-validator
+2. finding-validator will read the actual code at each location
+3. For each finding, it returns:
+   - `confirmed_valid`: Issue IS real → keep as unresolved
+   - `dismissed_false_positive`: Original finding was WRONG → remove from findings
+   - `needs_human_review`: Cannot determine → flag for human
+
+### Phase 4: Synthesize Results
+After all agents complete:
 1. Combine resolution verifications
-2. Merge new findings (deduplicate if needed)
-3. Incorporate comment analysis
-4. Generate final verdict
+2. Apply validation results (remove dismissed false positives)
+3. Merge new findings (deduplicate if needed)
+4. Incorporate comment analysis
+5. Generate final verdict based on VALIDATED findings only
 
 ## Verdict Guidelines
 
 ### READY_TO_MERGE
-- All previous findings verified as resolved
+- All previous findings verified as resolved OR dismissed as false positives
+- No CONFIRMED_VALID critical/high issues remaining
 - No new critical/high issues
 - No blocking concerns from comments
 - Contributor questions addressed
@@ -82,15 +110,17 @@ After agents complete:
 - Optional polish items can be addressed post-merge
 
 ### NEEDS_REVISION (Strict Quality Gates)
-- HIGH or MEDIUM severity findings unresolved
+- HIGH or MEDIUM severity findings CONFIRMED_VALID (not dismissed as false positive)
 - New HIGH or MEDIUM severity issues introduced
 - Important contributor concerns unaddressed
 - **Note: Both HIGH and MEDIUM block merge** (AI fixes quickly, so be strict)
+- **Note: Only count findings that passed validation** (dismissed_false_positive findings don't block)
 
 ### BLOCKED
-- CRITICAL findings remain unresolved
+- CRITICAL findings remain CONFIRMED_VALID (not dismissed as false positive)
 - New CRITICAL issues introduced
 - Fundamental problems with the fix approach
+- **Note: Only block for findings that passed validation**
 
 ## Cross-Validation
 
@@ -106,10 +136,28 @@ Provide your synthesis as a structured response matching the ParallelFollowupRes
 ```json
 {
   "analysis_summary": "Brief summary of what was analyzed",
-  "agents_invoked": ["resolution-verifier", "new-code-reviewer"],
+  "agents_invoked": ["resolution-verifier", "finding-validator", "new-code-reviewer"],
   "commits_analyzed": 5,
   "files_changed": 12,
   "resolution_verifications": [...],
+  "finding_validations": [
+    {
+      "finding_id": "SEC-001",
+      "validation_status": "confirmed_valid",
+      "code_evidence": "const query = `SELECT * FROM users WHERE id = ${userId}`;",
+      "line_range": [45, 45],
+      "explanation": "SQL injection is present - user input is concatenated...",
+      "confidence": 0.92
+    },
+    {
+      "finding_id": "QUAL-002",
+      "validation_status": "dismissed_false_positive",
+      "code_evidence": "const sanitized = DOMPurify.sanitize(data);",
+      "line_range": [23, 26],
+      "explanation": "Original finding claimed XSS but code uses DOMPurify...",
+      "confidence": 0.88
+    }
+  ],
   "new_findings": [...],
   "comment_analyses": [...],
   "comment_findings": [...],
@@ -119,7 +167,7 @@ Provide your synthesis as a structured response matching the ParallelFollowupRes
     "resolution_notes": null
   },
   "verdict": "READY_TO_MERGE",
-  "verdict_reasoning": "All 3 previous findings verified as resolved..."
+  "verdict_reasoning": "2 findings resolved, 1 dismissed as false positive, 1 confirmed valid but LOW severity..."
 }
 ```
 
diff --git a/apps/backend/qa/fixer.py b/apps/backend/qa/fixer.py
index 0901c9d285..d17f97d11b 100644
--- a/apps/backend/qa/fixer.py
+++ b/apps/backend/qa/fixer.py
@@ -9,6 +9,7 @@
 
 from claude_agent_sdk import ClaudeSDKClient
 from debug import debug, debug_detailed, debug_error, debug_section, debug_success
+from security.tool_input_validator import get_safe_tool_input
 from task_logger import (
     LogEntryType,
     LogPhase,
@@ -128,34 +129,35 @@ async def run_qa_fixer_session(
                             )
                     elif block_type == "ToolUseBlock" and hasattr(block, "name"):
                         tool_name = block.name
-                        tool_input = None
+                        tool_input_display = None
                         tool_count += 1
 
-                        if hasattr(block, "input") and block.input:
-                            inp = block.input
-                            if isinstance(inp, dict):
-                                if "file_path" in inp:
-                                    fp = inp["file_path"]
-                                    if len(fp) > 50:
-                                        fp = "..." + fp[-47:]
-                                    tool_input = fp
-                                elif "command" in inp:
-                                    cmd = inp["command"]
-                                    if len(cmd) > 50:
-                                        cmd = cmd[:47] + "..."
-                                    tool_input = cmd
+                        # Safely extract tool input (handles None, non-dict, etc.)
+                        inp = get_safe_tool_input(block)
+
+                        if inp:
+                            if "file_path" in inp:
+                                fp = inp["file_path"]
+                                if len(fp) > 50:
+                                    fp = "..." + fp[-47:]
+                                tool_input_display = fp
+                            elif "command" in inp:
+                                cmd = inp["command"]
+                                if len(cmd) > 50:
+                                    cmd = cmd[:47] + "..."
+                                tool_input_display = cmd
 
                         debug(
                             "qa_fixer",
                             f"Tool call #{tool_count}: {tool_name}",
-                            tool_input=tool_input,
+                            tool_input=tool_input_display,
                         )
 
                         # Log tool start (handles printing)
                         if task_logger:
                             task_logger.tool_start(
                                 tool_name,
-                                tool_input,
+                                tool_input_display,
                                 LogPhase.VALIDATION,
                                 print_to_console=True,
                             )
diff --git a/apps/backend/qa/reviewer.py b/apps/backend/qa/reviewer.py
index 49de9577fa..3d06a06c03 100644
--- a/apps/backend/qa/reviewer.py
+++ b/apps/backend/qa/reviewer.py
@@ -11,6 +11,7 @@
 from claude_agent_sdk import ClaudeSDKClient
 from debug import debug, debug_detailed, debug_error, debug_section, debug_success
 from prompts_pkg import get_qa_reviewer_prompt
+from security.tool_input_validator import get_safe_tool_input
 from task_logger import (
     LogEntryType,
     LogPhase,
@@ -195,32 +196,33 @@ async def run_qa_agent_session(
                             )
                     elif block_type == "ToolUseBlock" and hasattr(block, "name"):
                         tool_name = block.name
-                        tool_input = None
+                        tool_input_display = None
                         tool_count += 1
 
+                        # Safely extract tool input (handles None, non-dict, etc.)
+                        inp = get_safe_tool_input(block)
+
                         # Extract tool input for display
-                        if hasattr(block, "input") and block.input:
-                            inp = block.input
-                            if isinstance(inp, dict):
-                                if "file_path" in inp:
-                                    fp = inp["file_path"]
-                                    if len(fp) > 50:
-                                        fp = "..." + fp[-47:]
-                                    tool_input = fp
-                                elif "pattern" in inp:
-                                    tool_input = f"pattern: {inp['pattern']}"
+                        if inp:
+                            if "file_path" in inp:
+                                fp = inp["file_path"]
+                                if len(fp) > 50:
+                                    fp = "..." + fp[-47:]
+                                tool_input_display = fp
+                            elif "pattern" in inp:
+                                tool_input_display = f"pattern: {inp['pattern']}"
 
                         debug(
                             "qa_reviewer",
                             f"Tool call #{tool_count}: {tool_name}",
-                            tool_input=tool_input,
+                            tool_input=tool_input_display,
                         )
 
                         # Log tool start (handles printing)
                         if task_logger:
                             task_logger.tool_start(
                                 tool_name,
-                                tool_input,
+                                tool_input_display,
                                 LogPhase.VALIDATION,
                                 print_to_console=True,
                             )
diff --git a/apps/backend/runners/github/gh_client.py b/apps/backend/runners/github/gh_client.py
index d6e7ddbdc1..942aefa2b4 100644
--- a/apps/backend/runners/github/gh_client.py
+++ b/apps/backend/runners/github/gh_client.py
@@ -810,3 +810,65 @@ async def get_pr_head_sha(self, pr_number: int) -> str | None:
             # Last commit is the HEAD
             return commits[-1].get("oid")
         return None
+
+    async def get_pr_checks(self, pr_number: int) -> dict[str, Any]:
+        """
+        Get CI check runs status for a PR.
+
+        Uses `gh pr checks` to get the status of all check runs.
+
+        Args:
+            pr_number: PR number
+
+        Returns:
+            Dict with:
+            - checks: List of check runs with name, status, conclusion
+            - passing: Number of passing checks
+            - failing: Number of failing checks
+            - pending: Number of pending checks
+            - failed_checks: List of failed check names
+        """
+        try:
+            args = ["pr", "checks", str(pr_number), "--json", "name,state,conclusion"]
+            args = self._add_repo_flag(args)
+
+            result = await self.run(args, timeout=30.0)
+            checks = json.loads(result.stdout) if result.stdout.strip() else []
+
+            passing = 0
+            failing = 0
+            pending = 0
+            failed_checks = []
+
+            for check in checks:
+                state = check.get("state", "").upper()
+                conclusion = check.get("conclusion", "").upper()
+                name = check.get("name", "Unknown")
+
+                if state == "COMPLETED":
+                    if conclusion in ("SUCCESS", "NEUTRAL", "SKIPPED"):
+                        passing += 1
+                    elif conclusion in ("FAILURE", "TIMED_OUT", "CANCELLED"):
+                        failing += 1
+                        failed_checks.append(name)
+                else:
+                    # PENDING, QUEUED, IN_PROGRESS, etc.
+                    pending += 1
+
+            return {
+                "checks": checks,
+                "passing": passing,
+                "failing": failing,
+                "pending": pending,
+                "failed_checks": failed_checks,
+            }
+        except (GHCommandError, GHTimeoutError, json.JSONDecodeError) as e:
+            logger.warning(f"Failed to get PR checks for #{pr_number}: {e}")
+            return {
+                "checks": [],
+                "passing": 0,
+                "failing": 0,
+                "pending": 0,
+                "failed_checks": [],
+                "error": str(e),
+            }
diff --git a/apps/backend/runners/github/models.py b/apps/backend/runners/github/models.py
index ed9f83c3e3..cb7dbe22e9 100644
--- a/apps/backend/runners/github/models.py
+++ b/apps/backend/runners/github/models.py
@@ -221,6 +221,14 @@ class PRReviewFinding:
     )
     redundant_with: str | None = None  # Reference to duplicate code (file:line)
 
+    # NEW: Finding validation fields (from finding-validator re-investigation)
+    validation_status: str | None = (
+        None  # confirmed_valid, dismissed_false_positive, needs_human_review
+    )
+    validation_evidence: str | None = None  # Code snippet examined during validation
+    validation_confidence: float | None = None  # Confidence of validation (0.0-1.0)
+    validation_explanation: str | None = None  # Why finding was validated/dismissed
+
     def to_dict(self) -> dict:
         return {
             "id": self.id,
@@ -237,6 +245,11 @@ def to_dict(self) -> dict:
             "confidence": self.confidence,
             "verification_note": self.verification_note,
             "redundant_with": self.redundant_with,
+            # Validation fields
+            "validation_status": self.validation_status,
+            "validation_evidence": self.validation_evidence,
+            "validation_confidence": self.validation_confidence,
+            "validation_explanation": self.validation_explanation,
         }
 
     @classmethod
@@ -256,6 +269,11 @@ def from_dict(cls, data: dict) -> PRReviewFinding:
             confidence=data.get("confidence", 0.85),
             verification_note=data.get("verification_note"),
             redundant_with=data.get("redundant_with"),
+            # Validation fields
+            validation_status=data.get("validation_status"),
+            validation_evidence=data.get("validation_evidence"),
+            validation_confidence=data.get("validation_confidence"),
+            validation_explanation=data.get("validation_explanation"),
         )
 
 
diff --git a/apps/backend/runners/github/orchestrator.py b/apps/backend/runners/github/orchestrator.py
index 5456f5b0f6..0cfb078efe 100644
--- a/apps/backend/runners/github/orchestrator.py
+++ b/apps/backend/runners/github/orchestrator.py
@@ -389,9 +389,17 @@ async def review_pr(
                 pr_number=pr_number,
             )
 
-            # Generate verdict
+            # Check CI status
+            ci_status = await self.gh_client.get_pr_checks(pr_number)
+            print(
+                f"[DEBUG orchestrator] CI status: {ci_status.get('passing', 0)} passing, "
+                f"{ci_status.get('failing', 0)} failing, {ci_status.get('pending', 0)} pending",
+                flush=True,
+            )
+
+            # Generate verdict (now includes CI status)
             verdict, verdict_reasoning, blockers = self._generate_verdict(
-                findings, structural_issues, ai_triages
+                findings, structural_issues, ai_triages, ci_status
             )
             print(
                 f"[DEBUG orchestrator] Verdict: {verdict.value} - {verdict_reasoning}",
@@ -661,6 +669,37 @@ async def followup_review_pr(self, pr_number: int) -> PRReviewResult:
                 )
                 result = await reviewer.review_followup(followup_context)
 
+            # Check CI status and override verdict if failing
+            ci_status = await self.gh_client.get_pr_checks(pr_number)
+            failed_checks = ci_status.get("failed_checks", [])
+            if failed_checks:
+                print(
+                    f"[Followup] CI checks failing: {failed_checks}",
+                    flush=True,
+                )
+                # Override verdict if CI is failing
+                if result.verdict in (
+                    MergeVerdict.READY_TO_MERGE,
+                    MergeVerdict.MERGE_WITH_CHANGES,
+                ):
+                    result.verdict = MergeVerdict.BLOCKED
+                    result.verdict_reasoning = (
+                        f"Blocked: {len(failed_checks)} CI check(s) failing. "
+                        "Fix CI before merge."
+                    )
+                    result.overall_status = "request_changes"
+                # Add CI failures to blockers
+                for check_name in failed_checks:
+                    if f"CI Failed: {check_name}" not in result.blockers:
+                        result.blockers.append(f"CI Failed: {check_name}")
+                # Update summary to reflect CI status
+                ci_warning = (
+                    f"\n\n**⚠️ CI Status:** {len(failed_checks)} check(s) failing: "
+                    f"{', '.join(failed_checks)}"
+                )
+                if ci_warning not in result.summary:
+                    result.summary += ci_warning
+
             # Save result
             await result.save(self.github_dir)
 
@@ -690,13 +729,16 @@ def _generate_verdict(
         findings: list[PRReviewFinding],
         structural_issues: list[StructuralIssue],
         ai_triages: list[AICommentTriage],
+        ci_status: dict | None = None,
     ) -> tuple[MergeVerdict, str, list[str]]:
         """
-        Generate merge verdict based on all findings.
+        Generate merge verdict based on all findings and CI status.
 
-        NEW: Strengthened to block on verification failures and redundancy issues.
+        NEW: Strengthened to block on verification failures, redundancy issues,
+        and failing CI checks.
         """
         blockers = []
+        ci_status = ci_status or {}
 
         # Count by severity
         critical = [f for f in findings if f.severity == ReviewSeverity.CRITICAL]
@@ -733,6 +775,11 @@ def _generate_verdict(
         ai_critical = [t for t in ai_triages if t.verdict == AICommentVerdict.CRITICAL]
 
         # Build blockers list with NEW categories first
+        # CI failures block merging
+        failed_checks = ci_status.get("failed_checks", [])
+        for check_name in failed_checks:
+            blockers.append(f"CI Failed: {check_name}")
+
         # NEW: Verification failures block merging
         for f in verification_failures:
             note = f" - {f.verification_note}" if f.verification_note else ""
@@ -765,10 +812,17 @@ def _generate_verdict(
             )
             blockers.append(f"{t.tool_name}: {summary}")
 
-        # Determine verdict with NEW verification and redundancy checks
+        # Determine verdict with CI, verification and redundancy checks
         if blockers:
+            # CI failures are always blockers
+            if failed_checks:
+                verdict = MergeVerdict.BLOCKED
+                reasoning = (
+                    f"Blocked: {len(failed_checks)} CI check(s) failing. "
+                    "Fix CI before merge."
+                )
             # NEW: Prioritize verification failures
-            if verification_failures:
+            elif verification_failures:
                 verdict = MergeVerdict.BLOCKED
                 reasoning = (
                     f"Blocked: Cannot verify {len(verification_failures)} claim(s) in PR. "
diff --git a/apps/backend/runners/github/services/parallel_followup_reviewer.py b/apps/backend/runners/github/services/parallel_followup_reviewer.py
index 2009301092..fb7a04365b 100644
--- a/apps/backend/runners/github/services/parallel_followup_reviewer.py
+++ b/apps/backend/runners/github/services/parallel_followup_reviewer.py
@@ -150,6 +150,7 @@ def _define_specialist_agents(self) -> dict[str, AgentDefinition]:
         resolution_prompt = self._load_prompt("pr_followup_resolution_agent.md")
         newcode_prompt = self._load_prompt("pr_followup_newcode_agent.md")
         comment_prompt = self._load_prompt("pr_followup_comment_agent.md")
+        validator_prompt = self._load_prompt("pr_finding_validator.md")
 
         return {
             "resolution-verifier": AgentDefinition(
@@ -186,6 +187,20 @@ def _define_specialist_agents(self) -> dict[str, AgentDefinition]:
                 tools=["Read", "Grep", "Glob"],
                 model="inherit",
             ),
+            "finding-validator": AgentDefinition(
+                description=(
+                    "Finding re-investigation specialist. Re-investigates unresolved findings "
+                    "to validate they are actually real issues, not false positives. "
+                    "Actively reads the code at the finding location with fresh eyes. "
+                    "Can confirm findings as valid OR dismiss them as false positives. "
+                    "CRITICAL: Invoke for ALL unresolved findings after resolution-verifier runs. "
+                    "Invoke when: There are findings marked as unresolved that need validation."
+                ),
+                prompt=validator_prompt
+                or "You validate whether unresolved findings are real issues.",
+                tools=["Read", "Grep", "Glob"],
+                model="inherit",
+            ),
         }
 
     def _format_previous_findings(self, context: FollowupReviewContext) -> str:
@@ -444,6 +459,11 @@ async def review(self, context: FollowupReviewContext) -> PRReviewResult:
                 f"{len(resolved_ids)} resolved, {len(unresolved_ids)} unresolved"
             )
 
+            # Extract validation counts
+            dismissed_count = len(result_data.get("dismissed_false_positive_ids", []))
+            confirmed_count = result_data.get("confirmed_valid_count", 0)
+            needs_human_count = result_data.get("needs_human_review_count", 0)
+
             # Generate summary
             summary = self._generate_summary(
                 verdict=verdict,
@@ -452,6 +472,9 @@ async def review(self, context: FollowupReviewContext) -> PRReviewResult:
                 unresolved_count=len(unresolved_ids),
                 new_count=len(new_finding_ids),
                 agents_invoked=final_agents,
+                dismissed_false_positive_count=dismissed_count,
+                confirmed_valid_count=confirmed_count,
+                needs_human_review_count=needs_human_count,
             )
 
             # Map verdict to overall_status
@@ -545,10 +568,34 @@ def _parse_structured_output(
             new_finding_ids = []
 
             # Process resolution verifications
+            # First, build a map of finding validations (from finding-validator agent)
+            validation_map = {}
+            dismissed_ids = []
+            for fv in response.finding_validations:
+                validation_map[fv.finding_id] = fv
+                if fv.validation_status == "dismissed_false_positive":
+                    dismissed_ids.append(fv.finding_id)
+                    print(
+                        f"[ParallelFollowup] Finding {fv.finding_id} DISMISSED as false positive: {fv.explanation[:100]}",
+                        flush=True,
+                    )
+
             for rv in response.resolution_verifications:
                 if rv.status == "resolved":
                     resolved_ids.append(rv.finding_id)
                 elif rv.status in ("unresolved", "partially_resolved", "cant_verify"):
+                    # Check if finding was validated and dismissed as false positive
+                    if rv.finding_id in dismissed_ids:
+                        # Finding-validator determined this was a false positive - skip it
+                        print(
+                            f"[ParallelFollowup] Skipping {rv.finding_id} - dismissed as false positive by finding-validator",
+                            flush=True,
+                        )
+                        resolved_ids.append(
+                            rv.finding_id
+                        )  # Count as resolved (false positive)
+                        continue
+
                     # Include "cant_verify" as unresolved - if we can't verify, assume not fixed
                     unresolved_ids.append(rv.finding_id)
                     # Add unresolved as a finding
@@ -563,6 +610,19 @@ def _parse_structured_output(
                             None,
                         )
                         if original:
+                            # Check if we have validation evidence
+                            validation = validation_map.get(rv.finding_id)
+                            validation_status = None
+                            validation_evidence = None
+                            validation_confidence = None
+                            validation_explanation = None
+
+                            if validation:
+                                validation_status = validation.validation_status
+                                validation_evidence = validation.code_evidence
+                                validation_confidence = validation.confidence
+                                validation_explanation = validation.explanation
+
                             findings.append(
                                 PRReviewFinding(
                                     id=rv.finding_id,
@@ -574,6 +634,10 @@ def _parse_structured_output(
                                     line=original.line,
                                     suggested_fix=original.suggested_fix,
                                     fixable=original.fixable,
+                                    validation_status=validation_status,
+                                    validation_evidence=validation_evidence,
+                                    validation_confidence=validation_confidence,
+                                    validation_explanation=validation_explanation,
                                 )
                             )
 
@@ -626,6 +690,18 @@ def _parse_structured_output(
             }
             verdict = verdict_map.get(response.verdict, MergeVerdict.NEEDS_REVISION)
 
+            # Count validation results
+            confirmed_valid_count = sum(
+                1
+                for fv in response.finding_validations
+                if fv.validation_status == "confirmed_valid"
+            )
+            needs_human_count = sum(
+                1
+                for fv in response.finding_validations
+                if fv.validation_status == "needs_human_review"
+            )
+
             # Log findings summary for verification
             print(
                 f"[ParallelFollowup] Parsed {len(findings)} findings, "
@@ -633,11 +709,22 @@ def _parse_structured_output(
                 f"{len(new_finding_ids)} new",
                 flush=True,
             )
+            if dismissed_ids:
+                print(
+                    f"[ParallelFollowup] Validation: {len(dismissed_ids)} findings dismissed as false positives, "
+                    f"{confirmed_valid_count} confirmed valid, {needs_human_count} need human review",
+                    flush=True,
+                )
             if findings:
                 print("[ParallelFollowup] Findings summary:", flush=True)
                 for i, f in enumerate(findings, 1):
+                    validation_note = ""
+                    if f.validation_status == "confirmed_valid":
+                        validation_note = " [VALIDATED]"
+                    elif f.validation_status == "needs_human_review":
+                        validation_note = " [NEEDS HUMAN REVIEW]"
                     print(
-                        f"  [{f.severity.value.upper()}] {i}. {f.title} ({f.file}:{f.line})",
+                        f"  [{f.severity.value.upper()}] {i}. {f.title} ({f.file}:{f.line}){validation_note}",
                         flush=True,
                     )
 
@@ -646,6 +733,9 @@ def _parse_structured_output(
                 "resolved_ids": resolved_ids,
                 "unresolved_ids": unresolved_ids,
                 "new_finding_ids": new_finding_ids,
+                "dismissed_false_positive_ids": dismissed_ids,
+                "confirmed_valid_count": confirmed_valid_count,
+                "needs_human_review_count": needs_human_count,
                 "verdict": verdict,
                 "verdict_reasoning": response.verdict_reasoning,
                 "agents_invoked": agents_from_output,
@@ -719,6 +809,9 @@ def _generate_summary(
         unresolved_count: int,
         new_count: int,
         agents_invoked: list[str],
+        dismissed_false_positive_count: int = 0,
+        confirmed_valid_count: int = 0,
+        needs_human_review_count: int = 0,
     ) -> str:
         """Generate a human-readable summary of the follow-up review."""
         status_emoji = {
@@ -733,13 +826,27 @@ def _generate_summary(
             ", ".join(agents_invoked) if agents_invoked else "orchestrator only"
         )
 
+        # Build validation section if there are validation results
+        validation_section = ""
+        if (
+            dismissed_false_positive_count > 0
+            or confirmed_valid_count > 0
+            or needs_human_review_count > 0
+        ):
+            validation_section = f"""
+### Finding Validation
+- 🔍 **Dismissed as False Positives**: {dismissed_false_positive_count} findings were re-investigated and found to be incorrect
+- ✓ **Confirmed Valid**: {confirmed_valid_count} findings verified as genuine issues
+- 👤 **Needs Human Review**: {needs_human_review_count} findings require manual verification
+"""
+
         summary = f"""## {emoji} Follow-up Review: {verdict.value.replace("_", " ").title()}
 
 ### Resolution Status
 - ✅ **Resolved**: {resolved_count} previous findings addressed
 - ❌ **Unresolved**: {unresolved_count} previous findings remain
 - 🆕 **New Issues**: {new_count} new findings in recent changes
-
+{validation_section}
 ### Verdict
 {verdict_reasoning}
 
@@ -747,6 +854,6 @@ def _generate_summary(
 Agents invoked: {agents_str}
 
 ---
-*This is an AI-generated follow-up review using parallel specialist analysis.*
+*This is an AI-generated follow-up review using parallel specialist analysis with finding validation.*
 """
         return summary
diff --git a/apps/backend/runners/github/services/pydantic_models.py b/apps/backend/runners/github/services/pydantic_models.py
index 1bfa638343..3c91a219eb 100644
--- a/apps/backend/runners/github/services/pydantic_models.py
+++ b/apps/backend/runners/github/services/pydantic_models.py
@@ -591,6 +591,15 @@ class ParallelFollowupResponse(BaseModel):
         description="AI-verified resolution status for each previous finding",
     )
 
+    # Finding validations (from finding-validator agent)
+    finding_validations: list[FindingValidationResult] = Field(
+        default_factory=list,
+        description=(
+            "Re-investigation results for unresolved findings. "
+            "Validates whether findings are real issues or false positives."
+        ),
+    )
+
     # New findings (from new-code-reviewer agent)
     new_findings: list[ParallelFollowupFinding] = Field(
         default_factory=list,
@@ -618,3 +627,77 @@ class ParallelFollowupResponse(BaseModel):
         "READY_TO_MERGE", "MERGE_WITH_CHANGES", "NEEDS_REVISION", "BLOCKED"
     ] = Field(description="Overall merge verdict")
     verdict_reasoning: str = Field(description="Explanation for the verdict")
+
+
+# =============================================================================
+# Finding Validation Response (Re-investigation of unresolved findings)
+# =============================================================================
+
+
+class FindingValidationResult(BaseModel):
+    """
+    Result of re-investigating an unresolved finding to validate it's actually real.
+
+    The finding-validator agent uses this to report whether a previous finding
+    is a genuine issue or a false positive that should be dismissed.
+    """
+
+    finding_id: str = Field(description="ID of the finding being validated")
+    validation_status: Literal[
+        "confirmed_valid", "dismissed_false_positive", "needs_human_review"
+    ] = Field(
+        description=(
+            "Validation result: "
+            "confirmed_valid = issue IS real, keep as unresolved; "
+            "dismissed_false_positive = original finding was incorrect, remove; "
+            "needs_human_review = cannot determine with confidence"
+        )
+    )
+    code_evidence: str = Field(
+        min_length=1,
+        description=(
+            "REQUIRED: Exact code snippet examined from the file. "
+            "Must be actual code, not a description."
+        ),
+    )
+    line_range: tuple[int, int] = Field(
+        description="Start and end line numbers of the examined code"
+    )
+    explanation: str = Field(
+        min_length=20,
+        description=(
+            "Detailed explanation of why the finding is valid/invalid. "
+            "Must reference specific code and explain the reasoning."
+        ),
+    )
+    confidence: float = Field(
+        ge=0.0,
+        le=1.0,
+        description=(
+            "Confidence in the validation result (0.0-1.0). "
+            "Must be >= 0.80 to dismiss as false positive, >= 0.70 to confirm valid."
+        ),
+    )
+
+    @field_validator("confidence", mode="before")
+    @classmethod
+    def normalize_confidence(cls, v: int | float) -> float:
+        """Normalize confidence to 0.0-1.0 range (accepts 0-100 or 0.0-1.0)."""
+        if v > 1:
+            return v / 100.0
+        return float(v)
+
+
+class FindingValidationResponse(BaseModel):
+    """Complete response from the finding-validator agent."""
+
+    validations: list[FindingValidationResult] = Field(
+        default_factory=list,
+        description="Validation results for each finding investigated",
+    )
+    summary: str = Field(
+        description=(
+            "Brief summary of validation results: how many confirmed, "
+            "how many dismissed, how many need human review"
+        )
+    )
diff --git a/apps/backend/security/__init__.py b/apps/backend/security/__init__.py
index c2e4624e67..9b389373b6 100644
--- a/apps/backend/security/__init__.py
+++ b/apps/backend/security/__init__.py
@@ -50,6 +50,12 @@
     reset_profile_cache,
 )
 
+# Tool input validation
+from .tool_input_validator import (
+    get_safe_tool_input,
+    validate_tool_input,
+)
+
 # Validators (for advanced usage)
 from .validator import (
     VALIDATORS,
@@ -100,4 +106,7 @@
     "is_command_allowed",
     "needs_validation",
     "BASE_COMMANDS",
+    # Tool input validation
+    "validate_tool_input",
+    "get_safe_tool_input",
 ]
diff --git a/apps/backend/security/hooks.py b/apps/backend/security/hooks.py
index 6aaa9f8ec8..35152d4433 100644
--- a/apps/backend/security/hooks.py
+++ b/apps/backend/security/hooks.py
@@ -26,10 +26,11 @@ async def bash_security_hook(
     Pre-tool-use hook that validates bash commands using dynamic allowlist.
 
     This is the main security enforcement point. It:
-    1. Extracts command names from the command string
-    2. Checks each command against the project's security profile
-    3. Runs additional validation for sensitive commands
-    4. Blocks disallowed commands with clear error messages
+    1. Validates tool_input structure (must be dict with 'command' key)
+    2. Extracts command names from the command string
+    3. Checks each command against the project's security profile
+    4. Runs additional validation for sensitive commands
+    5. Blocks disallowed commands with clear error messages
 
     Args:
         input_data: Dict containing tool_name and tool_input
@@ -42,7 +43,25 @@ async def bash_security_hook(
     if input_data.get("tool_name") != "Bash":
         return {}
 
-    command = input_data.get("tool_input", {}).get("command", "")
+    # Validate tool_input structure before accessing
+    tool_input = input_data.get("tool_input")
+
+    # Check if tool_input is None (malformed tool call)
+    if tool_input is None:
+        return {
+            "decision": "block",
+            "reason": "Bash tool_input is None - malformed tool call from SDK",
+        }
+
+    # Check if tool_input is a dict
+    if not isinstance(tool_input, dict):
+        return {
+            "decision": "block",
+            "reason": f"Bash tool_input must be dict, got {type(tool_input).__name__}",
+        }
+
+    # Now safe to access command
+    command = tool_input.get("command", "")
     if not command:
         return {}
 
diff --git a/apps/backend/security/tool_input_validator.py b/apps/backend/security/tool_input_validator.py
new file mode 100644
index 0000000000..7c702388a9
--- /dev/null
+++ b/apps/backend/security/tool_input_validator.py
@@ -0,0 +1,97 @@
+"""
+Tool Input Validator
+====================
+
+Validates tool_input structure before tool execution.
+Catches malformed inputs (None, wrong type, missing required keys) early.
+"""
+
+from typing import Any
+
+# Required keys per tool type
+TOOL_REQUIRED_KEYS: dict[str, list[str]] = {
+    "Bash": ["command"],
+    "Read": ["file_path"],
+    "Write": ["file_path", "content"],
+    "Edit": ["file_path", "old_string", "new_string"],
+    "Glob": ["pattern"],
+    "Grep": ["pattern"],
+    "WebFetch": ["url"],
+    "WebSearch": ["query"],
+}
+
+
+def validate_tool_input(
+    tool_name: str,
+    tool_input: Any,
+) -> tuple[bool, str | None]:
+    """
+    Validate tool input structure.
+
+    Args:
+        tool_name: Name of the tool being called
+        tool_input: The tool_input value from the SDK
+
+    Returns:
+        (is_valid, error_message) where error_message is None if valid
+    """
+    # Must not be None
+    if tool_input is None:
+        return False, f"{tool_name}: tool_input is None (malformed tool call)"
+
+    # Must be a dict
+    if not isinstance(tool_input, dict):
+        return (
+            False,
+            f"{tool_name}: tool_input must be dict, got {type(tool_input).__name__}",
+        )
+
+    # Check required keys for known tools
+    required_keys = TOOL_REQUIRED_KEYS.get(tool_name, [])
+    missing_keys = [key for key in required_keys if key not in tool_input]
+
+    if missing_keys:
+        return (
+            False,
+            f"{tool_name}: missing required keys: {', '.join(missing_keys)}",
+        )
+
+    # Additional validation for specific tools
+    if tool_name == "Bash":
+        command = tool_input.get("command")
+        if not isinstance(command, str):
+            return (
+                False,
+                f"Bash: 'command' must be string, got {type(command).__name__}",
+            )
+        if not command.strip():
+            return False, "Bash: 'command' is empty"
+
+    return True, None
+
+
+def get_safe_tool_input(block: Any, default: dict | None = None) -> dict:
+    """
+    Safely extract tool_input from a ToolUseBlock, defaulting to empty dict.
+
+    Args:
+        block: A ToolUseBlock from Claude SDK
+        default: Default value if extraction fails (defaults to empty dict)
+
+    Returns:
+        The tool input as a dict (never None)
+    """
+    if default is None:
+        default = {}
+
+    if not hasattr(block, "input"):
+        return default
+
+    tool_input = block.input
+    if tool_input is None:
+        return default
+
+    if not isinstance(tool_input, dict):
+        return default
+
+    return tool_input
diff --git a/apps/backend/spec/pipeline/agent_runner.py b/apps/backend/spec/pipeline/agent_runner.py
index 57e08f0632..d1ee2a78d7 100644
--- a/apps/backend/spec/pipeline/agent_runner.py
+++ b/apps/backend/spec/pipeline/agent_runner.py
@@ -14,6 +14,7 @@
 
 from core.client import create_client
 from debug import debug, debug_detailed, debug_error, debug_section, debug_success
+from security.tool_input_validator import get_safe_tool_input
 from task_logger import (
     LogEntryType,
     LogPhase,
@@ -160,25 +161,24 @@ async def run_agent(
                                 block, "name"
                             ):
                                 tool_name = block.name
-                                tool_input = None
                                 tool_count += 1
 
-                                # Extract meaningful tool input for display
-                                if hasattr(block, "input") and block.input:
-                                    tool_input = self._extract_tool_input_display(
-                                        block.input
-                                    )
+                                # Safely extract tool input (handles None, non-dict, etc.)
+                                inp = get_safe_tool_input(block)
+                                tool_input_display = self._extract_tool_input_display(
+                                    inp
+                                )
 
                                 debug(
                                     "agent_runner",
                                     f"Tool call #{tool_count}: {tool_name}",
-                                    tool_input=tool_input,
+                                    tool_input=tool_input_display,
                                 )
 
                                 if self.task_logger:
                                     self.task_logger.tool_start(
                                         tool_name,
-                                        tool_input,
+                                        tool_input_display,
                                         LogPhase.PLANNING,
                                         print_to_console=True,
                                     )
diff --git a/apps/frontend/package.json b/apps/frontend/package.json
index 53e82ed114..f07759f183 100644
--- a/apps/frontend/package.json
+++ b/apps/frontend/package.json
@@ -1,6 +1,6 @@
 {
   "name": "auto-claude-ui",
-  "version": "2.7.2-beta.10",
+  "version": "2.7.2-beta.12",
   "type": "module",
   "description": "Desktop UI for Auto Claude autonomous coding framework",
   "homepage": "https://github.com/AndyMik90/Auto-Claude",
diff --git a/apps/frontend/src/main/ipc-handlers/claude-code-handlers.ts b/apps/frontend/src/main/ipc-handlers/claude-code-handlers.ts
index 2f93f83a05..2b6a151002 100644
--- a/apps/frontend/src/main/ipc-handlers/claude-code-handlers.ts
+++ b/apps/frontend/src/main/ipc-handlers/claude-code-handlers.ts
@@ -105,7 +105,9 @@ export function escapePowerShellCommand(str: string): string {
     .replace(/\(/g, '`(')     // Escape opening parentheses
     .replace(/\)/g, '`)')     // Escape closing parentheses
     .replace(/;/g, '`;')      // Escape semicolons (statement separator)
-    .replace(/&/g, '`&');     // Escape ampersands (call operator)
+    .replace(/&/g, '`&')      // Escape ampersands (call operator)
+    .replace(/\r/g, '`r')     // Escape carriage returns
+    .replace(/\n/g, '`n');    // Escape newlines
 }
 
 /**
diff --git a/apps/frontend/src/main/ipc-handlers/github/pr-handlers.ts b/apps/frontend/src/main/ipc-handlers/github/pr-handlers.ts
index ba175abbce..9c42076b93 100644
--- a/apps/frontend/src/main/ipc-handlers/github/pr-handlers.ts
+++ b/apps/frontend/src/main/ipc-handlers/github/pr-handlers.ts
@@ -405,17 +405,21 @@ function savePRLogs(project: Project, logs: PRLogs): void {
 
 /**
  * Add a log entry to PR logs
+ * Returns true if the phase status changed (for triggering immediate save)
  */
-function addLogEntry(logs: PRLogs, entry: PRLogEntry): void {
+function addLogEntry(logs: PRLogs, entry: PRLogEntry): boolean {
   const phase = logs.phases[entry.phase];
+  let statusChanged = false;
 
-  // Update phase status if needed
+  // Start the phase if it was pending
   if (phase.status === 'pending') {
     phase.status = 'active';
     phase.started_at = entry.timestamp;
+    statusChanged = true;
   }
 
   phase.entries.push(entry);
+  return statusChanged;
 }
 
 /**
@@ -442,14 +446,20 @@ class PRLogCollector {
 
     const phase = getPhaseFromSource(parsed.source);
 
-    // Track phase transitions - mark previous phases as complete
+    // Track phase transitions - mark previous phases as complete (only if they were active)
     if (phase !== this.currentPhase) {
       // When moving to a new phase, mark the previous phase as complete
+      // Only mark complete if the phase was actually active (received log entries)
+      // This prevents marking phases as "completed" if they were skipped
       if (this.currentPhase === 'context' && (phase === 'analysis' || phase === 'synthesis')) {
-        this.markPhaseComplete('context', true);
+        if (this.logs.phases.context.status === 'active') {
+          this.markPhaseComplete('context', true);
+        }
       }
       if (this.currentPhase === 'analysis' && phase === 'synthesis') {
-        this.markPhaseComplete('analysis', true);
+        if (this.logs.phases.analysis.status === 'active') {
+          this.markPhaseComplete('analysis', true);
+        }
       }
       this.currentPhase = phase;
     }
@@ -462,11 +472,12 @@ class PRLogCollector {
       source: parsed.source,
     };
 
-    addLogEntry(this.logs, entry);
+    const phaseStatusChanged = addLogEntry(this.logs, entry);
     this.entryCount++;
 
-    // Save periodically for real-time streaming (every N entries)
-    if (this.entryCount % this.saveInterval === 0) {
+    // Save immediately if phase status changed (so frontend sees phase activation)
+    // OR save periodically for real-time streaming (every N entries)
+    if (phaseStatusChanged || this.entryCount % this.saveInterval === 0) {
       this.save();
     }
   }
@@ -484,20 +495,15 @@ class PRLogCollector {
   }
 
   finalize(success: boolean): void {
-    // Mark all phases as completed based on success status
-    // For phases with entries: mark based on success
-    // For phases without entries (pending): mark as completed if previous phases completed
-    let previousCompleted = true;
+    // Mark active phases as completed based on success status
+    // Pending phases with no entries should stay pending (they never ran)
     for (const phase of ['context', 'analysis', 'synthesis'] as PRLogPhase[]) {
       const phaseLog = this.logs.phases[phase];
       if (phaseLog.status === 'active') {
         this.markPhaseComplete(phase, success);
-        previousCompleted = success;
-      } else if (phaseLog.status === 'pending' && previousCompleted && success) {
-        // If review succeeded, mark pending phases as completed (they just had no logs)
-        phaseLog.status = 'completed';
-        phaseLog.completed_at = new Date().toISOString();
       }
+      // Note: Pending phases stay pending - they never received any log entries
+      // This is correct behavior for follow-up reviews where some phases may be skipped
     }
     this.save();
   }
diff --git a/apps/frontend/src/renderer/App.tsx b/apps/frontend/src/renderer/App.tsx
index 8ddeb48aa5..b380347b04 100644
--- a/apps/frontend/src/renderer/App.tsx
+++ b/apps/frontend/src/renderer/App.tsx
@@ -755,13 +755,17 @@ export function App() {
                     onNavigateToTask={handleGoToTask}
                   />
                 )}
-                {activeView === 'github-prs' && (activeProjectId || selectedProjectId) && (
-                  <GitHubPRs
-                    onOpenSettings={() => {
-                      setSettingsInitialProjectSection('github');
-                      setIsSettingsDialogOpen(true);
-                    }}
-                  />
+                {/* GitHubPRs is always mounted but hidden when not active to preserve review state */}
+                {(activeProjectId || selectedProjectId) && (
+                  <div className={activeView === 'github-prs' ? 'h-full' : 'hidden'}>
+                    <GitHubPRs
+                      onOpenSettings={() => {
+                        setSettingsInitialProjectSection('github');
+                        setIsSettingsDialogOpen(true);
+                      }}
+                      isActive={activeView === 'github-prs'}
+                    />
+                  </div>
                 )}
                 {activeView === 'gitlab-merge-requests' && (activeProjectId || selectedProjectId) && (
                   <GitLabMergeRequests
diff --git a/apps/frontend/src/renderer/components/github-prs/GitHubPRs.tsx b/apps/frontend/src/renderer/components/github-prs/GitHubPRs.tsx
index fd88f87ace..553d89d507 100644
--- a/apps/frontend/src/renderer/components/github-prs/GitHubPRs.tsx
+++ b/apps/frontend/src/renderer/components/github-prs/GitHubPRs.tsx
@@ -9,6 +9,7 @@ import { ResizablePanels } from '../ui/resizable-panels';
 
 interface GitHubPRsProps {
   onOpenSettings?: () => void;
+  isActive?: boolean;
 }
 
 function NotConnectedState({
@@ -50,7 +51,7 @@ function EmptyState({ message }: { message: string }) {
   );
 }
 
-export function GitHubPRs({ onOpenSettings }: GitHubPRsProps) {
+export function GitHubPRs({ onOpenSettings, isActive = false }: GitHubPRsProps) {
   const { t } = useTranslation('common');
   const projects = useProjectStore((state) => state.projects);
   const selectedProjectId = useProjectStore((state) => state.selectedProjectId);
@@ -232,6 +233,7 @@ export function GitHubPRs({ onOpenSettings }: GitHubPRsProps) {
               reviewProgress={reviewProgress}
               isReviewing={isReviewing}
               initialNewCommitsCheck={storedNewCommitsCheck}
+              isActive={isActive}
               onRunReview={handleRunReview}
               onRunFollowupReview={handleRunFollowupReview}
               onCheckNewCommits={handleCheckNewCommits}
diff --git a/apps/frontend/src/renderer/components/github-prs/components/PRDetail.tsx b/apps/frontend/src/renderer/components/github-prs/components/PRDetail.tsx
index 0f2499588b..e5421faf47 100644
--- a/apps/frontend/src/renderer/components/github-prs/components/PRDetail.tsx
+++ b/apps/frontend/src/renderer/components/github-prs/components/PRDetail.tsx
@@ -38,6 +38,7 @@ interface PRDetailProps {
   reviewProgress: PRReviewProgress | null;
   isReviewing: boolean;
   initialNewCommitsCheck?: NewCommitsCheck | null;
+  isActive?: boolean;
   onRunReview: () => void;
   onRunFollowupReview: () => void;
   onCheckNewCommits: () => Promise<NewCommitsCheck>;
@@ -67,6 +68,7 @@ export function PRDetail({
   reviewProgress,
   isReviewing,
   initialNewCommitsCheck,
+  isActive = false,
   onRunReview,
   onRunFollowupReview,
   onCheckNewCommits,
diff --git a/apps/frontend/src/renderer/components/github-prs/components/PRLogs.tsx b/apps/frontend/src/renderer/components/github-prs/components/PRLogs.tsx
index 9c3522ae36..d7aa393652 100644
--- a/apps/frontend/src/renderer/components/github-prs/components/PRLogs.tsx
+++ b/apps/frontend/src/renderer/components/github-prs/components/PRLogs.tsx
@@ -167,6 +167,17 @@ function PhaseLogSection({ phase, phaseLog, isExpanded, onToggle, isStreaming =
         </Badge>
       );
     }
+
+    // Defensive check: During streaming, if a phase shows "completed" but has no entries,
+    // treat it as pending (this catches edge cases where phases are marked complete incorrectly)
+    if (isStreaming && status === 'completed' && !hasEntries) {
+      return (
+        <Badge variant="secondary" className="text-xs text-muted-foreground">
+          Pending
+        </Badge>
+      );
+    }
+
     switch (status) {
       case 'completed':
         return (
diff --git a/apps/frontend/src/renderer/components/github-prs/hooks/useGitHubPRs.ts b/apps/frontend/src/renderer/components/github-prs/hooks/useGitHubPRs.ts
index 24c7d4682c..db39633662 100644
--- a/apps/frontend/src/renderer/components/github-prs/hooks/useGitHubPRs.ts
+++ b/apps/frontend/src/renderer/components/github-prs/hooks/useGitHubPRs.ts
@@ -48,6 +48,7 @@ export function useGitHubPRs(projectId?: string): UseGitHubPRsResult {
   const prReviews = usePRReviewStore((state) => state.prReviews);
   const getPRReviewState = usePRReviewStore((state) => state.getPRReviewState);
   const getActivePRReviews = usePRReviewStore((state) => state.getActivePRReviews);
+  const setNewCommitsCheckAction = usePRReviewStore((state) => state.setNewCommitsCheck);
 
   // Get review state for the selected PR from the store
   const selectedPRReviewState = useMemo(() => {
@@ -137,7 +138,8 @@ export function useGitHubPRs(projectId?: string): UseGitHubPRsResult {
                 prsWithReviews.map(async ({ prNumber }) => {
                   try {
                     const newCommitsResult = await window.electronAPI.github.checkNewCommits(projectId, prNumber);
-                    usePRReviewStore.getState().setNewCommitsCheck(projectId, prNumber, newCommitsResult);
+                    // Use the action from the hook subscription to ensure proper React re-renders
+                    setNewCommitsCheckAction(projectId, prNumber, newCommitsResult);
                   } catch (err) {
                     // Silently fail for individual PR checks - don't block the list
                     console.warn(`Failed to check new commits for PR #${prNumber}:`, err);
@@ -158,7 +160,7 @@ export function useGitHubPRs(projectId?: string): UseGitHubPRsResult {
     } finally {
       setIsLoading(false);
     }
-  }, [projectId, getPRReviewState]);
+  }, [projectId, getPRReviewState, setNewCommitsCheckAction]);
 
   useEffect(() => {
     fetchPRs();
@@ -212,13 +214,14 @@ export function useGitHubPRs(projectId?: string): UseGitHubPRsResult {
     try {
       const result = await window.electronAPI.github.checkNewCommits(projectId, prNumber);
       // Cache the result in the store so the list view can use it
-      usePRReviewStore.getState().setNewCommitsCheck(projectId, prNumber, result);
+      // Use the action from the hook subscription to ensure proper React re-renders
+      setNewCommitsCheckAction(projectId, prNumber, result);
       return result;
     } catch (err) {
       setError(err instanceof Error ? err.message : 'Failed to check for new commits');
       return { hasNewCommits: false, newCommitCount: 0 };
     }
-  }, [projectId]);
+  }, [projectId, setNewCommitsCheckAction]);
 
   const cancelReview = useCallback(async (prNumber: number): Promise<boolean> => {
     if (!projectId) return false;
diff --git a/package.json b/package.json
index 29f6ec966b..5d855f8cda 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "auto-claude",
-  "version": "2.7.2-beta.10",
+  "version": "2.7.2-beta.12",
   "description": "Autonomous multi-agent coding framework powered by Claude AI",
   "license": "AGPL-3.0",
   "author": "Auto Claude Team",
diff --git a/tests/test_agent_configs.py b/tests/test_agent_configs.py
index 00af90638a..d24d30bf42 100644
--- a/tests/test_agent_configs.py
+++ b/tests/test_agent_configs.py
@@ -233,15 +233,15 @@ def test_coder_includes_write_tools(self):
         assert "Edit" in tools
         assert "Bash" in tools
 
-    def test_qa_reviewer_is_read_only_plus_bash(self):
-        """QA reviewer should be read-only plus Bash for running tests - no Write/Edit."""
+    def test_qa_reviewer_has_write_for_reports(self):
+        """QA reviewer needs Write/Edit to create qa_report.md and update implementation_plan.json."""
         from agents.tools_pkg.permissions import get_allowed_tools
 
         tools = get_allowed_tools("qa_reviewer")
         assert "Read" in tools
         assert "Bash" in tools  # Can run tests
-        assert "Write" not in tools  # Should NOT be able to edit code
-        assert "Edit" not in tools  # Should NOT be able to edit code
+        assert "Write" in tools  # Needs to write qa_report.md
+        assert "Edit" in tools  # Needs to edit implementation_plan.json
 
     def test_pr_reviewer_is_read_only(self):
         """PR reviewer should only have Read tools."""
diff --git a/tests/test_finding_validation.py b/tests/test_finding_validation.py
new file mode 100644
index 0000000000..f01b96011f
--- /dev/null
+++ b/tests/test_finding_validation.py
@@ -0,0 +1,414 @@
+"""
+Tests for Finding Validation System
+====================================
+
+Tests the finding-validator agent integration and FindingValidationResult models.
+This system prevents false positives from persisting by re-investigating unresolved findings.
+"""
+
+import sys
+from pathlib import Path
+
+import pytest
+from pydantic import ValidationError
+
+# Add the backend directory to path
+_backend_dir = Path(__file__).parent.parent / "apps" / "backend"
+_github_dir = _backend_dir / "runners" / "github"
+_services_dir = _github_dir / "services"
+
+if str(_services_dir) not in sys.path:
+    sys.path.insert(0, str(_services_dir))
+if str(_github_dir) not in sys.path:
+    sys.path.insert(0, str(_github_dir))
+if str(_backend_dir) not in sys.path:
+    sys.path.insert(0, str(_backend_dir))
+
+from pydantic_models import (
+    FindingValidationResult,
+    FindingValidationResponse,
+    ParallelFollowupResponse,
+    ResolutionVerification,
+)
+from models import (
+    PRReviewFinding,
+    PRReviewResult,
+    ReviewSeverity,
+    ReviewCategory,
+    MergeVerdict,
+)
+
+
+# ============================================================================
+# FindingValidationResult Model Tests
+# ============================================================================
+
+
+class TestFindingValidationResultModel:
+    """Tests for the FindingValidationResult Pydantic model."""
+
+    def test_valid_confirmed_valid(self):
+        """Test creating a confirmed_valid validation result."""
+        result = FindingValidationResult(
+            finding_id="SEC-001",
+            validation_status="confirmed_valid",
+            code_evidence="const query = `SELECT * FROM users WHERE id = ${userId}`;",
+            line_range=(45, 45),
+            explanation="SQL injection is present - user input is concatenated directly into the query.",
+            confidence=0.92,
+        )
+        assert result.finding_id == "SEC-001"
+        assert result.validation_status == "confirmed_valid"
+        assert "SELECT" in result.code_evidence
+        assert result.confidence == 0.92
+
+    def test_valid_dismissed_false_positive(self):
+        """Test creating a dismissed_false_positive validation result."""
+        result = FindingValidationResult(
+            finding_id="QUAL-002",
+            validation_status="dismissed_false_positive",
+            code_evidence="const sanitized = DOMPurify.sanitize(data);",
+            line_range=(23, 26),
+            explanation="Original finding claimed XSS but code uses DOMPurify.sanitize() for protection.",
+            confidence=0.88,
+        )
+        assert result.validation_status == "dismissed_false_positive"
+        assert result.confidence == 0.88
+
+    def test_valid_needs_human_review(self):
+        """Test creating a needs_human_review validation result."""
+        result = FindingValidationResult(
+            finding_id="LOGIC-003",
+            validation_status="needs_human_review",
+            code_evidence="async function handleRequest(req) { ... }",
+            line_range=(100, 150),
+            explanation="Race condition claim requires runtime analysis to verify.",
+            confidence=0.45,
+        )
+        assert result.validation_status == "needs_human_review"
+        assert result.confidence == 0.45
+
+    def test_code_evidence_required(self):
+        """Test that code_evidence cannot be empty."""
+        with pytest.raises(ValidationError) as exc_info:
+            FindingValidationResult(
+                finding_id="SEC-001",
+                validation_status="confirmed_valid",
+                code_evidence="",  # Empty string should fail
+                line_range=(45, 45),
+                explanation="This is a detailed explanation of the issue.",
+                confidence=0.92,
+            )
+        errors = exc_info.value.errors()
+        assert any("code_evidence" in str(e) for e in errors)
+
+    def test_explanation_min_length(self):
+        """Test that explanation must be at least 20 characters."""
+        with pytest.raises(ValidationError) as exc_info:
+            FindingValidationResult(
+                finding_id="SEC-001",
+                validation_status="confirmed_valid",
+                code_evidence="const x = 1;",
+                line_range=(45, 45),
+                explanation="Too short",  # Less than 20 chars
+                confidence=0.92,
+            )
+        errors = exc_info.value.errors()
+        assert any("explanation" in str(e) for e in errors)
+
+    def test_confidence_normalized_from_percentage(self):
+        """Test that confidence 0-100 is normalized to 0.0-1.0."""
+        result = FindingValidationResult(
+            finding_id="SEC-001",
+            validation_status="confirmed_valid",
+            code_evidence="const query = `SELECT * FROM users`;",
+            line_range=(45, 45),
+            explanation="SQL injection vulnerability found in the query construction.",
+            confidence=85,  # Percentage value
+        )
+        assert result.confidence == 0.85
+
+    def test_confidence_range_validation(self):
+        """Test that confidence must be between 0.0 and 1.0 after normalization."""
+        with pytest.raises(ValidationError):
+            FindingValidationResult(
+                finding_id="SEC-001",
+                validation_status="confirmed_valid",
+                code_evidence="const x = 1;",
+                line_range=(45, 45),
+                explanation="This is a detailed explanation of the issue.",
+                confidence=150,  # Will normalize to 1.5, which is out of range
+            )
+
+    def test_invalid_validation_status(self):
+        """Test that invalid validation_status values are rejected."""
+        with pytest.raises(ValidationError):
+            FindingValidationResult(
+                finding_id="SEC-001",
+                validation_status="invalid_status",  # Not a valid status
+                code_evidence="const x = 1;",
+                line_range=(45, 45),
+                explanation="This is a detailed explanation of the issue.",
+                confidence=0.92,
+            )
+
+
+class TestFindingValidationResponse:
+    """Tests for the FindingValidationResponse container model."""
+
+    def test_valid_response_with_multiple_validations(self):
+        """Test creating a response with multiple validation results."""
+        response = FindingValidationResponse(
+            validations=[
+                FindingValidationResult(
+                    finding_id="SEC-001",
+                    validation_status="confirmed_valid",
+                    code_evidence="const query = `SELECT * FROM users`;",
+                    line_range=(45, 45),
+                    explanation="SQL injection confirmed in this query.",
+                    confidence=0.92,
+                ),
+                FindingValidationResult(
+                    finding_id="QUAL-002",
+                    validation_status="dismissed_false_positive",
+                    code_evidence="const sanitized = DOMPurify.sanitize(data);",
+                    line_range=(23, 26),
+                    explanation="Code uses DOMPurify so XSS claim is false.",
+                    confidence=0.88,
+                ),
+            ],
+            summary="1 finding confirmed valid, 1 dismissed as false positive",
+        )
+        assert len(response.validations) == 2
+        assert "1 finding confirmed" in response.summary
+
+
+class TestParallelFollowupResponseWithValidation:
+    """Tests for ParallelFollowupResponse including finding_validations."""
+
+    def test_response_includes_finding_validations(self):
+        """Test that ParallelFollowupResponse accepts finding_validations."""
+        response = ParallelFollowupResponse(
+            analysis_summary="Follow-up review with validation",
+            agents_invoked=["resolution-verifier", "finding-validator"],
+            commits_analyzed=3,
+            files_changed=5,
+            resolution_verifications=[
+                ResolutionVerification(
+                    finding_id="SEC-001",
+                    status="unresolved",
+                    confidence=0.85,
+                    evidence="File was not modified",
+                )
+            ],
+            finding_validations=[
+                FindingValidationResult(
+                    finding_id="SEC-001",
+                    validation_status="confirmed_valid",
+                    code_evidence="const query = `SELECT * FROM users`;",
+                    line_range=(45, 45),
+                    explanation="SQL injection confirmed in this query.",
+                    confidence=0.92,
+                )
+            ],
+            new_findings=[],
+            comment_analyses=[],
+            comment_findings=[],
+            verdict="NEEDS_REVISION",
+            verdict_reasoning="1 confirmed valid security issue remains",
+        )
+        assert len(response.finding_validations) == 1
+        assert response.finding_validations[0].validation_status == "confirmed_valid"
+
+    def test_response_with_dismissed_findings(self):
+        """Test response where findings are dismissed as false positives."""
+        response = ParallelFollowupResponse(
+            analysis_summary="All findings dismissed as false positives",
+            agents_invoked=["resolution-verifier", "finding-validator"],
+            commits_analyzed=3,
+            files_changed=5,
+            resolution_verifications=[
+                ResolutionVerification(
+                    finding_id="SEC-001",
+                    status="unresolved",
+                    confidence=0.50,
+                    evidence="Line wasn't changed but need to verify",
+                )
+            ],
+            finding_validations=[
+                FindingValidationResult(
+                    finding_id="SEC-001",
+                    validation_status="dismissed_false_positive",
+                    code_evidence="const query = db.prepare('SELECT * FROM users WHERE id = ?').get(userId);",
+                    line_range=(45, 48),
+                    explanation="Original review misread - using parameterized query.",
+                    confidence=0.95,
+                )
+            ],
+            new_findings=[],
+            comment_analyses=[],
+            comment_findings=[],
+            verdict="READY_TO_MERGE",
+            verdict_reasoning="Previous finding was a false positive, now dismissed",
+        )
+        assert len(response.finding_validations) == 1
+        assert response.finding_validations[0].validation_status == "dismissed_false_positive"
+
+
+# ============================================================================
+# PRReviewFinding Validation Fields Tests
+# ============================================================================
+
+
+class TestPRReviewFindingValidationFields:
+    """Tests for validation fields on PRReviewFinding model."""
+
+    def test_finding_with_validation_fields(self):
+        """Test creating a finding with validation fields populated."""
+        finding = PRReviewFinding(
+            id="SEC-001",
+            severity=ReviewSeverity.HIGH,
+            category=ReviewCategory.SECURITY,
+            title="SQL Injection",
+            description="User input not sanitized",
+            file="src/db.py",
+            line=42,
+            validation_status="confirmed_valid",
+            validation_evidence="const query = `SELECT * FROM users`;",
+            validation_confidence=0.92,
+            validation_explanation="SQL injection confirmed in the query.",
+        )
+        assert finding.validation_status == "confirmed_valid"
+        assert finding.validation_confidence == 0.92
+
+    def test_finding_without_validation_fields(self):
+        """Test that validation fields are optional."""
+        finding = PRReviewFinding(
+            id="SEC-001",
+            severity=ReviewSeverity.HIGH,
+            category=ReviewCategory.SECURITY,
+            title="SQL Injection",
+            description="User input not sanitized",
+            file="src/db.py",
+            line=42,
+        )
+        assert finding.validation_status is None
+        assert finding.validation_evidence is None
+        assert finding.validation_confidence is None
+        assert finding.validation_explanation is None
+
+    def test_finding_to_dict_includes_validation(self):
+        """Test that to_dict includes validation fields."""
+        finding = PRReviewFinding(
+            id="SEC-001",
+            severity=ReviewSeverity.HIGH,
+            category=ReviewCategory.SECURITY,
+            title="SQL Injection",
+            description="User input not sanitized",
+            file="src/db.py",
+            line=42,
+            validation_status="confirmed_valid",
+            validation_evidence="const query = ...;",
+            validation_confidence=0.92,
+            validation_explanation="Issue confirmed.",
+        )
+        data = finding.to_dict()
+        assert data["validation_status"] == "confirmed_valid"
+        assert data["validation_evidence"] == "const query = ...;"
+        assert data["validation_confidence"] == 0.92
+        assert data["validation_explanation"] == "Issue confirmed."
+
+    def test_finding_from_dict_with_validation(self):
+        """Test that from_dict loads validation fields."""
+        data = {
+            "id": "SEC-001",
+            "severity": "high",
+            "category": "security",
+            "title": "SQL Injection",
+            "description": "User input not sanitized",
+            "file": "src/db.py",
+            "line": 42,
+            "validation_status": "dismissed_false_positive",
+            "validation_evidence": "parameterized query used",
+            "validation_confidence": 0.88,
+            "validation_explanation": "False positive - using prepared statements.",
+        }
+        finding = PRReviewFinding.from_dict(data)
+        assert finding.validation_status == "dismissed_false_positive"
+        assert finding.validation_confidence == 0.88
+
+
+# ============================================================================
+# Integration Tests
+# ============================================================================
+
+
+class TestValidationIntegration:
+    """Integration tests for the validation flow."""
+
+    def test_validation_summary_format(self):
+        """Test that validation summary format is correct when validation results exist."""
+        # Test the expected summary format when validation results are present
+        # We can't directly import ParallelFollowupReviewer due to complex imports,
+        # so we verify the Pydantic models work correctly instead
+
+        response = ParallelFollowupResponse(
+            analysis_summary="Follow-up with validation",
+            agents_invoked=["resolution-verifier", "finding-validator"],
+            commits_analyzed=3,
+            files_changed=5,
+            resolution_verifications=[],
+            finding_validations=[
+                FindingValidationResult(
+                    finding_id="SEC-001",
+                    validation_status="confirmed_valid",
+                    code_evidence="const query = `SELECT * FROM users`;",
+                    line_range=(45, 45),
+                    explanation="SQL injection confirmed in this query construction.",
+                    confidence=0.92,
+                ),
+                FindingValidationResult(
+                    finding_id="QUAL-002",
+                    validation_status="dismissed_false_positive",
+                    code_evidence="const sanitized = DOMPurify.sanitize(data);",
+                    line_range=(23, 26),
+                    explanation="Original XSS claim was incorrect - uses DOMPurify.",
+                    confidence=0.88,
+                ),
+            ],
+            new_findings=[],
+            comment_analyses=[],
+            comment_findings=[],
+            verdict="READY_TO_MERGE",
+            verdict_reasoning="1 dismissed as false positive, 1 confirmed valid but low severity",
+        )
+
+        # Verify validation counts can be computed from the response
+        confirmed_count = sum(
+            1 for fv in response.finding_validations
+            if fv.validation_status == "confirmed_valid"
+        )
+        dismissed_count = sum(
+            1 for fv in response.finding_validations
+            if fv.validation_status == "dismissed_false_positive"
+        )
+
+        assert confirmed_count == 1
+        assert dismissed_count == 1
+        assert len(response.finding_validations) == 2
+        assert "finding-validator" in response.agents_invoked
+
+    def test_validation_status_enum_values(self):
+        """Test all valid validation status values."""
+        valid_statuses = ["confirmed_valid", "dismissed_false_positive", "needs_human_review"]
+
+        for status in valid_statuses:
+            result = FindingValidationResult(
+                finding_id="TEST-001",
+                validation_status=status,
+                code_evidence="const x = 1;",
+                line_range=(1, 1),
+                explanation="This is a valid explanation for the finding status.",
+                confidence=0.85,
+            )
+            assert result.validation_status == status