diff --git a/.env.example b/.env.example
index 759964fc6..b02159071 100644
--- a/.env.example
+++ b/.env.example
@@ -65,17 +65,17 @@ GITTENSORY_REVIEW_ENRICHMENT=false
 # Current analyzer names:
 # dependency,lockfileDrift,secret,license,installScript,heavyDependency,actionPin,eol,redos
 # provenance,codeowners,secretLog,assetWeight,typosquat,commitSignature,iacMisconfig,nativeBuild
-# history,docCommentDrift
+# history,docCommentDrift,duplication
 #
 # Profile defaults:
 # fast: dependency,lockfileDrift,secret,license,installScript,heavyDependency,actionPin,eol
 # redos,provenance,secretLog,typosquat,iacMisconfig,nativeBuild
 # balanced (default): dependency,lockfileDrift,secret,license,installScript,heavyDependency
 # actionPin,eol,redos,provenance,codeowners,secretLog,assetWeight,typosquat,commitSignature
-# iacMisconfig,nativeBuild,history,docCommentDrift
+# iacMisconfig,nativeBuild,history,docCommentDrift,duplication
 # deep: dependency,lockfileDrift,secret,license,installScript,heavyDependency,actionPin,eol
 # redos,provenance,codeowners,secretLog,assetWeight,typosquat,commitSignature,iacMisconfig
-# nativeBuild,history,docCommentDrift
+# nativeBuild,history,docCommentDrift,duplication
 # END GENERATED REES ANALYZERS
 
 # Submitter-reputation spend control (internal-only): downgrades new/burst/low-rep
diff --git a/apps/gittensory-ui/src/lib/rees-analyzers.ts b/apps/gittensory-ui/src/lib/rees-analyzers.ts
index a73ff5eec..e84023a25 100644
--- a/apps/gittensory-ui/src/lib/rees-analyzers.ts
+++ b/apps/gittensory-ui/src/lib/rees-analyzers.ts
@@ -505,6 +505,34 @@ export const REES_ANALYZERS = [
         "Conservative: only named function declarations with confidently-enumerable params; non-parameter signature edits are not reported.",
     },
   },
+  {
+    name: "duplication",
+    title: "Near-verbatim duplicated code",
+    category: "quality",
+    cost: "github-light",
+    defaultEnabled: true,
+    profiles: ["balanced", "deep"],
+    requires: ["files", "github-token", "head-sha"],
+    limits: {
+      minRun: 8,
+      maxCandidates: 40,
+      maxFetches: 30,
+      maxFindings: 25,
+      maxFileBytes: 500000,
+    },
+    docs: {
+      summary:
+        "Flags added code that is a near-verbatim duplicate of a block already present elsewhere in the repo.",
+      looksAt:
+        "Added diff hunks in changed source files compared against same-extension repo files fetched from the git tree at headSha.",
+      reports:
+        "The head file:line, the existing source file:line it duplicates, and the matched line count.",
+      network:
+        "Calls the GitHub API for the git tree and candidate blobs. Requires headSha and token forwarding for private repos.",
+      notes:
+        "Conservative: trivial/boilerplate lines are dropped and a long contiguous run is required, so incidental overlap is not flagged. Never returns code content.",
+    },
+  },
 ] as const satisfies readonly ReesAnalyzerDoc[];
 
 export const REES_ANALYZER_NAMES = REES_ANALYZERS.map((analyzer) => analyzer.name);
diff --git a/review-enrichment/analyzer-metadata.json b/review-enrichment/analyzer-metadata.json
index 9ad7e58ad..98d05a2e4 100644
--- a/review-enrichment/analyzer-metadata.json
+++ b/review-enrichment/analyzer-metadata.json
@@ -579,6 +579,36 @@
         "network": "Calls the GitHub API for changed file contents. Requires headSha and token forwarding for private repos.",
         "notes": "Conservative: only named function declarations with confidently-enumerable params; non-parameter signature edits are not reported."
       }
+    },
+    {
+      "name": "duplication",
+      "title": "Near-verbatim duplicated code",
+      "category": "quality",
+      "cost": "github-light",
+      "defaultEnabled": true,
+      "profiles": [
+        "balanced",
+        "deep"
+      ],
+      "requires": [
+        "files",
+        "github-token",
+        "head-sha"
+      ],
+      "limits": {
+        "minRun": 8,
+        "maxCandidates": 40,
+        "maxFetches": 30,
+        "maxFindings": 25,
+        "maxFileBytes": 500000
+      },
+      "docs": {
+        "summary": "Flags added code that is a near-verbatim duplicate of a block already present elsewhere in the repo.",
+        "looksAt": "Added diff hunks in changed source files compared against same-extension repo files fetched from the git tree at headSha.",
+        "reports": "The head file:line, the existing source file:line it duplicates, and the matched line count.",
+        "network": "Calls the GitHub API for the git tree and candidate blobs. Requires headSha and token forwarding for private repos.",
+        "notes": "Conservative: trivial/boilerplate lines are dropped and a long contiguous run is required, so incidental overlap is not flagged. Never returns code content."
+      }
     }
   ]
 }
diff --git a/review-enrichment/src/analyzers/duplication-scan.ts b/review-enrichment/src/analyzers/duplication-scan.ts
new file mode 100644
index 000000000..79c15b0bb
--- /dev/null
+++ b/review-enrichment/src/analyzers/duplication-scan.ts
@@ -0,0 +1,533 @@
+// Full-file / near-verbatim duplication scan (#1520). Flags code a PR ADDS that is a near-verbatim duplicate of a
+// block that already exists elsewhere in the repo — i.e. copy-paste instead of importing the existing helper. The
+// no-checkout reviewer sees only the diff, so it cannot know the added lines already live in another file; this
+// analyzer fetches the repo's git tree at headSha (ONE recursive call) plus a bounded set of candidate blobs and
+// looks for a contiguous run of significant added lines that reappears verbatim (after whitespace normalization) in
+// a candidate file. It is deliberately CONSERVATIVE — trivial/boilerplate lines are dropped and a long contiguous
+// run is required — so it does not flag incidental overlap. It reports ONLY locations (`head:path:line` vs
+// `source:path:line`) + the matched line count — never the code content. Fail-safe: returns [] without a token /
+// headSha, on a bad repoFullName, or when the tree fetch fails; a single malformed candidate is skipped, never
+// aborting the scan.
+import type {
+  AnalyzerDiagnostics,
+  EnrichRequest,
+  DuplicationFinding,
+} from "../types.js";
+import type { AnalysisContext } from "../analysis-context.js";
+import { boundedFetchJson } from "../external-fetch.js";
+
+const GITHUB_API = "https://api.github.com";
+const GITHUB_API_VERSION = "2022-11-28";
+
+const MIN_RUN = 8; // a contiguous run of >= this many significant normalized lines is required to flag a duplicate
+const MAX_CANDIDATES = 40; // cap candidate files (closest-by-path first) we consider per scan
+const MAX_FETCHES = 30; // global cap on candidate blob fetches per scan
+const MAX_FINDINGS = 25; // keep the brief bounded
+const MIN_SIGNIFICANT_LEN = 12; // lines shorter than this (after trim) are treated as trivial and dropped
+const MAX_FILE_BYTES = 500_000; // skip an oversized candidate blob so one huge (likely generated) file can't eat the budget
+const MAX_TREE_JSON_BYTES = 4 * 1024 * 1024; // recursive git tree can be large; bound it like asset-weight does
+const MAX_BLOB_JSON_BYTES = 1024 * 1024; // a base64 blob payload is ~4/3 of the file; bound the JSON we read
+
+// Source-code extensions whose copy-paste is meaningful. Text data / config / lockfiles are intentionally excluded.
+const SOURCE_EXTS = new Set([
+  "ts", "tsx", "js", "jsx", "mjs", "cjs",
+  "py", "go", "rb", "java", "kt", "rs",
+  "c", "cc", "cpp", "h", "hpp", "cs", "php", "swift", "scala",
+]);
+
+// A single repo path segment (owner or name): word chars, dot, dash only. Whole-segment `.`/`..` are rejected
+// separately so a hostile repoFullName can't traverse or redirect the token-bearing request to another repository.
+const REPO_SEGMENT = /^[A-Za-z0-9._-]+$/;
+const SHA_RE = /^[0-9a-fA-F]{7,64}$/;
+
+interface ScanOptions {
+  signal?: AbortSignal;
+  analysis?: Pick<AnalysisContext, "fetchJson">;
+  diagnostics?: AnalyzerDiagnostics;
+}
+
+function githubHeaders(token: string): Record<string, string> {
+  return {
+    Authorization: `Bearer ${token}`,
+    Accept: "application/vnd.github+json",
+    "X-GitHub-Api-Version": GITHUB_API_VERSION,
+    "User-Agent": "gittensory-review-enrichment",
+  };
+}
+
+/** Parse `owner/repo`, rejecting anything that isn't exactly two safe segments (no traversal, no extra slashes) so a
+ *  hostile `repoFullName` cannot redirect the token-bearing request elsewhere. Returns null when unsafe. */
+function parseRepo(
+  repoFullName: string,
+): { owner: string; repo: string } | null {
+  const parts = repoFullName.split("/");
+  if (parts.length !== 2) return null;
+  const [owner, repo] = parts;
+  for (const seg of [owner, repo]) {
+    if (!seg || seg === "." || seg === ".." || !REPO_SEGMENT.test(seg)) {
+      return null;
+    }
+  }
+  return { owner: owner!, repo: repo! };
+}
+
+/** Fetch JSON from a GitHub git endpoint through the shared bounded helper. Mirrors asset-weight: when an analysis
+ *  context is supplied its caching/metered `fetchJson` is used, otherwise the bare `boundedFetchJson`. Returns the
+ *  parsed body on a 2xx with valid JSON, or null on any non-OK / malformed / over-budget / network outcome so the
+ *  caller fails safe. */
+async function fetchGithubJson<T>(
+  url: string,
+  token: string,
+  fetchImpl: typeof fetch,
+  options: ScanOptions,
+  endpointCategory: "github-trees" | "github-blobs",
+): Promise<T | null> {
+  const fetchOptions = {
+    endpointCategory,
+    headers: githubHeaders(token),
+    signal: options.signal,
+    fetchImpl,
+    diagnostics: options.diagnostics,
+    phase: "duplication",
+    subcall: endpointCategory,
+    maxBytes:
+      endpointCategory === "github-trees"
+        ? MAX_TREE_JSON_BYTES
+        : MAX_BLOB_JSON_BYTES,
+    maxCallsPerCategory:
+      endpointCategory === "github-blobs" ? MAX_FETCHES : 1,
+  };
+  const response = options.analysis
+    ? await options.analysis.fetchJson<T>(url, fetchOptions)
+    : await boundedFetchJson<T>(url, fetchOptions);
+  return response.ok ? response.data : null;
+}
+
+function extOf(path: string): string | null {
+  const slash = path.lastIndexOf("/");
+  const dot = path.lastIndexOf(".");
+  if (dot <= slash) return null; // no extension, or the dot is in a directory segment
+  return path.slice(dot + 1).toLowerCase();
+}
+
+function isSourceExt(path: string): boolean {
+  const ext = extOf(path);
+  return ext !== null && SOURCE_EXTS.has(ext);
+}
+
+/** Paths that are generated / vendored / minified / type-declaration: copy-paste there is noise, not a defect. */
+function isExcludedPath(path: string): boolean {
+  if (path.endsWith(".d.ts")) return true;
+  if (path.includes(".min.")) return true;
+  const lower = path.toLowerCase();
+  for (const seg of lower.split("/")) {
+    if (
+      seg === "node_modules" ||
+      seg === "dist" ||
+      seg === "build" ||
+      seg === "vendor"
+    ) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/** Normalize a source line for comparison: trim + collapse internal whitespace to single spaces. Returns null when
+ *  the line is blank or "trivial" (too short, pure punctuation/braces, or bare boilerplate like a lone `import`) so
+ *  incidental matches on closing braces / import keywords cannot accumulate into a false-positive run. */
+export function normalizeLine(raw: string): string | null {
+  const collapsed = raw.trim().replace(/\s+/g, " ");
+  if (!collapsed) return null;
+  if (collapsed.length < MIN_SIGNIFICANT_LEN) return null;
+  // Pure punctuation / brackets (e.g. `});`, `} else {`-free closers) carry no copy-paste signal.
+  if (!/[A-Za-z0-9]/.test(collapsed)) return null;
+  // Bare import/include/use boilerplate that legitimately recurs across files. `export` is intentionally NOT dropped
+  // (an `export function`/`export const` declaration is meaningful and part of a copy-pasted block); only re-exports
+  // of the form `export ... from ...` are import-like.
+  if (/^(import|from|using|use|#include|package|require)\b/.test(collapsed)) {
+    return null;
+  }
+  if (/^export\b.*\bfrom\b/.test(collapsed)) return null; // re-export, e.g. `export { x } from "./y"`
+  return collapsed;
+}
+
+interface NormBlock {
+  /** Normalized significant lines, in order. */
+  norm: string[];
+  /** Parallel array: the ORIGINAL new-file line number for each entry of `norm`. */
+  lineNos: number[];
+}
+
+/** Parse a unified-diff `patch` into blocks of ADDED significant lines with their new-file line numbers. Lines are
+ *  grouped into a fresh block whenever the added run is broken (a context/removed line, or a trivial line dropped by
+ *  normalization), so a contiguous run requirement is meaningful. */
+export function extractAddedBlocks(patch: string | undefined): NormBlock[] {
+  if (!patch) return [];
+  const blocks: NormBlock[] = [];
+  let current: NormBlock | null = null;
+  let newLine = 0;
+  let inHunk = false;
+
+  const flush = () => {
+    if (current && current.norm.length) blocks.push(current);
+    current = null;
+  };
+
+  for (const line of patch.split("\n")) {
+    const header = /^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/.exec(line);
+    if (header) {
+      flush();
+      newLine = Number(header[1]);
+      inHunk = true;
+      continue;
+    }
+    if (!inHunk) continue;
+    if (line.startsWith("+++")) continue; // file header inside the patch, not an added line
+    if (line.startsWith("+")) {
+      const norm = normalizeLine(line.slice(1));
+      if (norm === null) {
+        // A trivial added line breaks the contiguous significant run.
+        flush();
+      } else {
+        if (!current) current = { norm: [], lineNos: [] };
+        current.norm.push(norm);
+        current.lineNos.push(newLine);
+      }
+      newLine++;
+      continue;
+    }
+    if (line.startsWith("-")) {
+      // Removed line: does not advance the new-file counter, breaks the added run.
+      flush();
+      continue;
+    }
+    if (line.startsWith("\\")) {
+      // "\ No newline at end of file" — not a real line.
+      continue;
+    }
+    // Context line (starts with a space, or an empty line in the patch): advances new-file counter, breaks the run.
+    flush();
+    newLine++;
+  }
+  flush();
+  return blocks;
+}
+
+/** Split a full file's text into blocks of significant lines (with 1-based source line numbers), breaking a block
+ *  at every blank/trivial line — the same gap-aware grouping `extractAddedBlocks` uses. Indexing each block
+ *  separately means a matched run can never bridge across a blank or trivial line in the source file. */
+function normalizeFileBlocks(text: string): NormBlock[] {
+  const blocks: NormBlock[] = [];
+  let current: NormBlock | null = null;
+  const lines = text.split("\n");
+  for (let i = 0; i < lines.length; i += 1) {
+    const n = normalizeLine(lines[i]!);
+    if (n === null) {
+      if (current && current.norm.length) blocks.push(current);
+      current = null;
+    } else {
+      if (!current) current = { norm: [], lineNos: [] };
+      current.norm.push(n);
+      current.lineNos.push(i + 1);
+    }
+  }
+  if (current && current.norm.length) blocks.push(current);
+  return blocks;
+}
+
+interface GramIndex {
+  block: NormBlock;
+  /** Map of joined MIN_RUN-window key → list of window start indices in `block.norm`. */
+  windows: Map<string, number[]>;
+}
+
+/** Build a rolling MIN_RUN-line window index over a candidate file's significant lines, for fast run lookup. */
+function buildGramIndex(block: NormBlock): GramIndex {
+  const windows = new Map<string, number[]>();
+  for (let i = 0; i + MIN_RUN <= block.norm.length; i++) {
+    const key = block.norm.slice(i, i + MIN_RUN).join("\n");
+    const list = windows.get(key);
+    if (list) list.push(i);
+    else windows.set(key, [i]);
+  }
+  return { block, windows };
+}
+
+/** Find the LONGEST contiguous run shared between an added block and an indexed candidate, anchored on a matching
+ *  MIN_RUN window then extended forward. Returns the head + source line numbers of the run start and its length,
+ *  or null when no run of >= MIN_RUN significant lines is shared. */
+function longestSharedRun(
+  added: NormBlock,
+  index: GramIndex,
+): { headLine: number; sourceLine: number; length: number } | null {
+  const cand = index.block;
+  let best: { headLine: number; sourceLine: number; length: number } | null =
+    null;
+  for (let a = 0; a + MIN_RUN <= added.norm.length; a++) {
+    const key = added.norm.slice(a, a + MIN_RUN).join("\n");
+    const starts = index.windows.get(key);
+    if (!starts) continue;
+    for (const c of starts) {
+      // Verify + extend the run forward from the matched window.
+      let len = MIN_RUN;
+      while (
+        a + len < added.norm.length &&
+        c + len < cand.norm.length &&
+        added.norm[a + len] === cand.norm[c + len]
+      ) {
+        len++;
+      }
+      if (!best || len > best.length) {
+        best = {
+          headLine: added.lineNos[a]!,
+          sourceLine: cand.lineNos[c]!,
+          length: len,
+        };
+      }
+    }
+  }
+  return best;
+}
+
+/** Fetch the recursive git tree at `sha`. Returns the blob entries (path + blob sha) or null on a non-OK / malformed
+ *  reply / network error so the caller fails safe. */
+async function fetchTree(
+  owner: string,
+  repo: string,
+  sha: string,
+  token: string,
+  fetchImpl: typeof fetch,
+  options: ScanOptions,
+): Promise<Array<{ path: string; sha: string }> | null> {
+  if (!SHA_RE.test(sha)) return null;
+  const url = `${GITHUB_API}/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/git/trees/${encodeURIComponent(sha)}?recursive=1`;
+  const json = await fetchGithubJson<{
+    tree?: Array<{ path?: string; type?: string; sha?: string }>;
+    truncated?: boolean;
+  }>(url, token, fetchImpl, options, "github-trees");
+  if (!json || !Array.isArray(json.tree)) return null;
+  // `json.truncated` (very large repos) is intentionally not treated as an error: this analyzer is best-effort and
+  // additive, so a partial tree only means fewer candidate files to compare against. That can at worst MISS a
+  // duplicate (a false negative), never produce a wrong finding, so scanning the partial tree is acceptable.
+  const out: Array<{ path: string; sha: string }> = [];
+  for (const entry of json.tree) {
+    if (
+      entry.type === "blob" &&
+      typeof entry.path === "string" &&
+      typeof entry.sha === "string"
+    ) {
+      out.push({ path: entry.path, sha: entry.sha });
+    }
+  }
+  return out;
+}
+
+/** Fetch a single git blob's decoded UTF-8 content, or null on a non-OK / malformed reply / network error. Only
+ *  base64-encoded string content is decoded — anything else fails safe to null. */
+async function fetchBlob(
+  owner: string,
+  repo: string,
+  blobSha: string,
+  token: string,
+  fetchImpl: typeof fetch,
+  options: ScanOptions,
+): Promise<string | null> {
+  if (!SHA_RE.test(blobSha)) return null;
+  const url = `${GITHUB_API}/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/git/blobs/${encodeURIComponent(blobSha)}`;
+  const json = await fetchGithubJson<{
+    content?: string;
+    encoding?: string;
+  }>(url, token, fetchImpl, options, "github-blobs");
+  if (!json) return null;
+  if (json.encoding !== "base64" || typeof json.content !== "string") {
+    return null;
+  }
+  // Cap on the encoded size first so a huge blob never burns CPU on decode, then on the true decoded byte length.
+  if (json.content.length > MAX_FILE_BYTES * 2) return null;
+  const decoded = decodeBase64Utf8(json.content);
+  if (!decoded || decoded.byteLength > MAX_FILE_BYTES) return null;
+  return decoded.text;
+}
+
+/** Worker-safe base64 → UTF-8 decode (no Node `Buffer` dependency, so the Cloudflare Worker deployment path works,
+ *  not only Node). GitHub wraps blob `content` with newlines, which `atob` rejects, so whitespace is stripped first.
+ *  Returns the decoded text plus its true UTF-8 byte length, or null on malformed base64 (fail safe, never throws). */
+export function decodeBase64Utf8(content: string): { text: string; byteLength: number } | null {
+  try {
+    const binary = atob(content.replace(/\s+/g, ""));
+    const bytes = new Uint8Array(binary.length);
+    for (let i = 0; i < binary.length; i += 1) bytes[i] = binary.charCodeAt(i);
+    // `fatal: true` throws on invalid UTF-8, so a binary/non-text blob fails safe to null (skipped) instead of being
+    // turned into replacement-character garbage that could be compared as if it were source.
+    return { text: new TextDecoder("utf-8", { fatal: true }).decode(bytes), byteLength: bytes.length };
+  } catch {
+    return null;
+  }
+}
+
+/** Score a candidate path by how close it sits to any changed path: more shared leading directory segments ⇒ higher
+ *  score (a copy-pasted helper most often lives near the file that should have imported it). */
+function proximityScore(candidate: string, changedDirs: string[][]): number {
+  const candSegs = candidate.split("/").slice(0, -1);
+  let best = 0;
+  for (const dir of changedDirs) {
+    let shared = 0;
+    const max = Math.min(candSegs.length, dir.length);
+    while (shared < max && candSegs[shared] === dir[shared]) shared++;
+    if (shared > best) best = shared;
+  }
+  return best;
+}
+
+/** Analyzer entrypoint: flag added code that is a near-verbatim duplicate of an existing block elsewhere in the repo.
+ *  Fail-safe (returns [] without a token/headSha, on a bad repoFullName, or on a failed tree fetch). */
+export async function scanDuplication(
+  req: EnrichRequest,
+  fetchImpl: typeof fetch = fetch,
+  options: ScanOptions = {},
+): Promise<DuplicationFinding[]> {
+  const token = req.githubToken;
+  if (!token || !req.headSha) return [];
+  if (options.signal?.aborted) return [];
+  const repo = parseRepo(req.repoFullName);
+  if (!repo) return [];
+
+  // Changed source files (kept) and their added significant blocks. Test/spec files are KEPT — copy-pasted tests are
+  // a real maintenance smell too — only generated/vendored/minified/declaration files are excluded.
+  const changed = (req.files ?? []).filter(
+    (f) =>
+      f.status !== "removed" &&
+      isSourceExt(f.path) &&
+      !isExcludedPath(f.path),
+  );
+  if (!changed.length) return [];
+
+  const changedPaths = new Set(changed.map((f) => f.path));
+  const changedExts = new Set<string>();
+  for (const f of changed) {
+    const ext = extOf(f.path);
+    if (ext) changedExts.add(ext);
+  }
+
+  const addedByFile: Array<{ path: string; ext: string | null; blocks: NormBlock[] }> = [];
+  for (const f of changed) {
+    const blocks = extractAddedBlocks(f.patch).filter(
+      (b) => b.norm.length >= MIN_RUN,
+    );
+    if (blocks.length) addedByFile.push({ path: f.path, ext: extOf(f.path), blocks });
+  }
+  if (!addedByFile.length) return [];
+
+  const tree = await fetchTree(
+    repo.owner,
+    repo.repo,
+    req.headSha,
+    token,
+    fetchImpl,
+    options,
+  );
+  if (tree === null) return [];
+
+  // Group candidate blobs BY extension, then proximity-sort + cap WITHIN each extension bucket (MAX_CANDIDATES each).
+  // Scanning is then round-robin across buckets under one global fetch budget (below), so no single extension's
+  // candidates can starve another's, while the TOTAL blob fetches stay bounded by MAX_FETCHES.
+  const changedDirs = [...changedPaths].map((p) => p.split("/").slice(0, -1));
+  const candidatesByExt = new Map<string, Array<{ path: string; sha: string }>>();
+  for (const entry of tree) {
+    if (changedPaths.has(entry.path) || isExcludedPath(entry.path)) continue;
+    const ext = extOf(entry.path);
+    if (ext === null || !changedExts.has(ext)) continue;
+    const bucket = candidatesByExt.get(ext);
+    if (bucket) bucket.push(entry);
+    else candidatesByExt.set(ext, [entry]);
+  }
+  for (const [ext, bucket] of candidatesByExt) {
+    bucket.sort((a, b) => {
+      const sa = proximityScore(a.path, changedDirs);
+      const sb = proximityScore(b.path, changedDirs);
+      return sb !== sa ? sb - sa : a.path.localeCompare(b.path);
+    });
+    candidatesByExt.set(ext, bucket.slice(0, MAX_CANDIDATES));
+  }
+
+  // Added blocks grouped by extension, so each extension is scanned only against its own candidate bucket.
+  const addedByExt = new Map<string, Array<{ path: string; blocks: NormBlock[] }>>();
+  for (const f of addedByFile) {
+    if (f.ext === null) continue;
+    const arr = addedByExt.get(f.ext);
+    if (arr) arr.push({ path: f.path, blocks: f.blocks });
+    else addedByExt.set(f.ext, [{ path: f.path, blocks: f.blocks }]);
+  }
+
+  const findings: DuplicationFinding[] = [];
+  const seen = new Set<string>();
+
+  // Scan round-robin across extensions under ONE global MAX_FETCHES budget: fair (each changed extension gets its
+  // turn, so none is starved) AND bounded (total blob fetches never exceed MAX_FETCHES, regardless of how many
+  // extensions changed). One pointer per extension bucket; we cycle until the budget is spent or every bucket drains.
+  const buckets = [...addedByExt.entries()]
+    .map(([ext, addedFiles]) => ({
+      addedFiles,
+      cands: candidatesByExt.get(ext) ?? [],
+      cursor: 0,
+    }))
+    .filter((bk) => bk.cands.length > 0);
+
+  let fetches = 0;
+  let aborted = false;
+  let progressed = true;
+  while (fetches < MAX_FETCHES && progressed && !aborted) {
+    progressed = false;
+    for (const bk of buckets) {
+      if (fetches >= MAX_FETCHES) break;
+      if (bk.cursor >= bk.cands.length) continue;
+      if (options.signal?.aborted) {
+        aborted = true;
+        break;
+      }
+      const cand = bk.cands[bk.cursor];
+      bk.cursor += 1;
+      if (!cand) continue;
+      progressed = true;
+      fetches += 1;
+      const text = await fetchBlob(
+        repo.owner,
+        repo.repo,
+        cand.sha,
+        token,
+        fetchImpl,
+        options,
+      );
+      if (text === null) continue; // bad/empty/oversized candidate — skip, never abort (size capped in fetchBlob)
+      // Index each gap-delimited block of the candidate separately so a run cannot bridge a blank/trivial line.
+      const indices = normalizeFileBlocks(text)
+        .filter((b) => b.norm.length >= MIN_RUN)
+        .map(buildGramIndex);
+      if (!indices.length) continue;
+
+      for (const file of bk.addedFiles) {
+        for (const block of file.blocks) {
+          let best: { headLine: number; sourceLine: number; length: number } | null = null;
+          for (const index of indices) {
+            const run = longestSharedRun(block, index);
+            if (run && (!best || run.length > best.length)) best = run;
+          }
+          if (!best) continue;
+          const key = `${file.path}:${best.headLine}|${cand.path}:${best.sourceLine}`;
+          if (seen.has(key)) continue;
+          seen.add(key);
+          findings.push({
+            file: file.path,
+            line: best.headLine,
+            sourceFile: cand.path,
+            sourceLine: best.sourceLine,
+            lines: best.length,
+          });
+        }
+      }
+    }
+  }
+
+  return findings.sort((a, b) => b.lines - a.lines).slice(0, MAX_FINDINGS);
+}
diff --git a/review-enrichment/src/analyzers/registry.ts b/review-enrichment/src/analyzers/registry.ts
index 9d8967055..33252a53b 100644
--- a/review-enrichment/src/analyzers/registry.ts
+++ b/review-enrichment/src/analyzers/registry.ts
@@ -4,6 +4,7 @@ import { scanCodeowners } from "./codeowners.js";
 import { scanCommitSignature } from "./commit-signature.js";
 import { dependencyAnalyzer } from "./dependency/descriptor.js";
 import { scanDocCommentDrift } from "./doc-comment-drift.js";
+import { scanDuplication } from "./duplication-scan.js";
 import { scanEol } from "./eol-check.js";
 import { scanHeavyDependencies } from "./heavy-dependency.js";
 import { scanHistory } from "./history.js";
@@ -388,6 +389,35 @@ export const ANALYZER_DESCRIPTORS = [
     },
     run: (req, { signal }) => scanDocCommentDrift(req, fetch, { signal }),
   }),
+  descriptor({
+    name: "duplication",
+    title: "Near-verbatim duplicated code",
+    category: "quality",
+    cost: "github-light",
+    defaultEnabled: true,
+    requires: ["files", "github-token", "head-sha"],
+    limits: {
+      minRun: 8,
+      maxCandidates: 40,
+      maxFetches: 30,
+      maxFindings: 25,
+      maxFileBytes: 500_000,
+    },
+    docs: {
+      summary:
+        "Flags added code that is a near-verbatim duplicate of a block already present elsewhere in the repo.",
+      looksAt:
+        "Added diff hunks in changed source files compared against same-extension repo files fetched from the git tree at headSha.",
+      reports:
+        "The head file:line, the existing source file:line it duplicates, and the matched line count.",
+      network:
+        "Calls the GitHub API for the git tree and candidate blobs. Requires headSha and token forwarding for private repos.",
+      notes:
+        "Conservative: trivial/boilerplate lines are dropped and a long contiguous run is required, so incidental overlap is not flagged. Never returns code content.",
+    },
+    run: (req, { signal, analysis, diagnostics }) =>
+      scanDuplication(req, fetch, { signal, analysis, diagnostics }),
+  }),
 ] as const satisfies readonly AnyAnalyzerDescriptor[];
 
 export const ANALYZER_NAMES = ANALYZER_DESCRIPTORS.map(
diff --git a/review-enrichment/src/render.ts b/review-enrichment/src/render.ts
index c90461c43..47ed5ae95 100644
--- a/review-enrichment/src/render.ts
+++ b/review-enrichment/src/render.ts
@@ -353,6 +353,18 @@ export function renderBrief(
     }
   }
 
+  const duplication = findings.duplication ?? [];
+  if (duplication.length) {
+    lines.push(
+      "### Near-verbatim duplicated code (prefer importing the existing implementation)",
+    );
+    for (const item of duplication) {
+      lines.push(
+        `- ${safeCodeSpan(`${item.file}:${item.line}`)} duplicates ${safeCodeSpan(`${item.sourceFile}:${item.sourceLine}`)} (~${item.lines} lines)`,
+      );
+    }
+  }
+
   if (!lines.length) return { promptSection: "", systemSuffix: "" };
 
   const header =
diff --git a/review-enrichment/src/types.ts b/review-enrichment/src/types.ts
index d3e4bd61e..29bd606dc 100644
--- a/review-enrichment/src/types.ts
+++ b/review-enrichment/src/types.ts
@@ -291,6 +291,7 @@ export interface BriefFindings {
   nativeBuild?: NativeBuildFinding[];
   history?: HistoryFinding[];
   docCommentDrift?: DocCommentDriftFinding[];
+  duplication?: DuplicationFinding[];
 }
 
 /** A JSDoc/TSDoc block whose `@param` tags name parameters the adjacent function no longer declares — a
@@ -305,6 +306,22 @@ export interface DocCommentDriftFinding {
   staleParams: string[];
 }
 
+/** Added code that is a near-verbatim duplicate of a contiguous block already present elsewhere in the repo — a
+ *  copy-paste the no-checkout reviewer cannot see, where importing the existing implementation is usually better.
+ *  Reports the head location, the matching source location, and the matched line count only — never the code. (#1520) */
+export interface DuplicationFinding {
+  /** Path of the changed file that ADDED the duplicated block. */
+  file: string;
+  /** New-file line where the duplicated run begins in the changed file. */
+  line: number;
+  /** Path of the existing repo file that already contains the same block. */
+  sourceFile: string;
+  /** 1-based line where the run begins in the existing source file. */
+  sourceLine: number;
+  /** Number of contiguous significant lines that matched verbatim (after whitespace normalization). */
+  lines: number;
+}
+
 export type AnalyzerStatus = "ok" | "degraded" | "skipped" | "capped" | "timeout";
 
 /** Internal, public-safe analyzer diagnostics for Sentry. Never attach request bodies, diffs, tokens, or raw prompts. */
diff --git a/review-enrichment/test/analyzer-registry.test.ts b/review-enrichment/test/analyzer-registry.test.ts
index 6ec736433..f4f44de07 100644
--- a/review-enrichment/test/analyzer-registry.test.ts
+++ b/review-enrichment/test/analyzer-registry.test.ts
@@ -29,6 +29,7 @@ const EXPECTED_ANALYZERS = [
   "nativeBuild",
   "history",
   "docCommentDrift",
+  "duplication",
 ];
 
 test("analyzer descriptors cover the runtime registry in stable order", () => {
diff --git a/review-enrichment/test/duplication-scan.test.ts b/review-enrichment/test/duplication-scan.test.ts
new file mode 100644
index 000000000..2d5be4264
--- /dev/null
+++ b/review-enrichment/test/duplication-scan.test.ts
@@ -0,0 +1,528 @@
+// Units for the near-verbatim duplication-scan analyzer (#1520). Own file (not enrichment.test.ts) so concurrent
+// analyzer PRs don't collide. Runs against the compiled dist/.
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import {
+  scanDuplication,
+  normalizeLine,
+  extractAddedBlocks,
+  decodeBase64Utf8,
+} from "../dist/analyzers/duplication-scan.js";
+import { renderBrief } from "../dist/render.js";
+
+// ── helpers ────────────────────────────────────────────────────────────────────
+
+// A run of >= MIN_RUN (8) significant, non-trivial, non-import lines shared between the patch and a source file.
+const SHARED = [
+  "const totalRewardScaled = baseReward * decayFactor(epochAge)",
+  "const clampedScore = Math.min(maxScore, Math.max(minScore, rawScore))",
+  "const weightedAverage = sumOfWeights === 0 ? 0 : weightedTotal / sumOfWeights",
+  "const normalizedVector = values.map((value) => value / vectorMagnitude)",
+  "const adjustedPenalty = penaltyBase * Math.log2(violationCount + 1)",
+  "const finalEmission = totalRewardScaled - adjustedPenalty + bonusAmount",
+  "const roundedEmission = Math.round(finalEmission * 1000000) / 1000000",
+  "const settledEmission = Number.isFinite(roundedEmission) ? roundedEmission : 0",
+  "const persistedEmission = await store.write(minerHotkey, settledEmission)",
+];
+
+// Build a +-prefixed unified-diff patch that adds the given lines starting at new-file line `start`.
+const addedPatch = (addedLines, start = 10) => {
+  const body = addedLines.map((l) => `+${l}`).join("\n");
+  return `@@ -1,0 +${start},${addedLines.length} @@\n${body}`;
+};
+
+// A source file's text where the SHARED block begins at line `at` (1-based), padded above with filler lines.
+const sourceWithBlock = (block, at = 5) => {
+  const filler = Array.from(
+    { length: at - 1 },
+    (_, i) => `const fillerVariableNumber${i} = computeFillerValueFromIndex(${i})`,
+  );
+  return [...filler, ...block].join("\n");
+};
+
+const b64 = (text) => Buffer.from(text, "utf8").toString("base64");
+const b64Bytes = (arr) => Buffer.from(Uint8Array.from(arr)).toString("base64");
+// Build a real Response so the shared boundedFetchJson body reader (used when no analysis context is injected)
+// parses it exactly as it would a live GitHub reply.
+const jsonResponse = (body, init) => new Response(JSON.stringify(body), init);
+
+// Mock fetch that switches on the URL: git/trees → the tree; git/blobs/{sha} → that blob's content.
+const makeFetch = ({
+  tree,
+  blobs,
+  truncated = false,
+  treeOk = true,
+  treeBadJson = false,
+  blobStatus = {},
+  blobThrow = new Set(),
+  counter,
+}) => {
+  return async (url) => {
+    if (counter) counter.calls.push(url);
+    if (url.includes("/git/trees/")) {
+      if (counter) counter.tree++;
+      if (!treeOk) return jsonResponse({}, { status: 500 });
+      if (treeBadJson) return jsonResponse({ nope: 1 });
+      return jsonResponse({
+        tree: tree.map((t) => ({ path: t.path, type: "blob", sha: t.sha })),
+        truncated,
+      });
+    }
+    const m = /\/git\/blobs\/([0-9a-fA-F]+)/.exec(url);
+    if (m) {
+      const sha = m[1];
+      if (counter) counter.blob++;
+      if (blobThrow.has(sha)) throw new Error("network down");
+      const code = blobStatus[sha];
+      if (code && !(code >= 200 && code < 300)) {
+        return jsonResponse({}, { status: code });
+      }
+      const content = blobs[sha];
+      if (content === undefined) {
+        return jsonResponse({ encoding: "utf8", content: "x" });
+      }
+      return jsonResponse({ encoding: "base64", content: b64(content) });
+    }
+    throw new Error(`unexpected url ${url}`);
+  };
+};
+
+const baseReq = (overrides = {}) => ({
+  repoFullName: "o/r",
+  prNumber: 1,
+  headSha: "a".repeat(40),
+  githubToken: "tok",
+  files: [{ path: "src/new-scorer.ts", status: "added", patch: addedPatch(SHARED) }],
+  ...overrides,
+});
+
+// ── normalizeLine / extractAddedBlocks units ────────────────────────────────────
+
+test("normalizeLine: drops blanks, trivial, punctuation-only and bare imports; keeps significant code", () => {
+  assert.equal(normalizeLine("   "), null);
+  assert.equal(normalizeLine("})"), null); // punctuation only
+  assert.equal(normalizeLine("} else {"), null); // < MIN_SIGNIFICANT_LEN after trim
+  assert.equal(normalizeLine("short"), null); // too short
+  assert.equal(normalizeLine("import { foo } from './bar'"), null); // bare import
+  assert.equal(normalizeLine("   const x = computeWeightedScore(a, b)  "), "const x = computeWeightedScore(a, b)");
+  assert.equal(
+    normalizeLine("const   y    =   1234567890 + somethingLong"),
+    "const y = 1234567890 + somethingLong",
+  ); // internal whitespace collapsed
+});
+
+test("extractAddedBlocks: groups consecutive added lines, breaks on context/removed/trivial, tracks new-file line numbers", () => {
+  const patch = [
+    "@@ -1,2 +5,6 @@",
+    "+const firstSignificantLineHere = makeValue(1)",
+    "+const secondSignificantLineHere = makeValue(2)",
+    " contextLineThatBreaksTheRun",
+    "+const thirdSignificantLineHere = makeValue(3)",
+    "-removedLineDoesNotCount",
+    "+const fourthSignificantLineHere = makeValue(4)",
+  ].join("\n");
+  const blocks = extractAddedBlocks(patch);
+  // block1: lines 5,6 ; block2: line 8 (after one context line at 7) ; block3: line 9 (removed doesn't advance)
+  assert.equal(blocks.length, 3);
+  assert.deepEqual(blocks[0].lineNos, [5, 6]);
+  assert.deepEqual(blocks[1].lineNos, [8]);
+  assert.deepEqual(blocks[2].lineNos, [9]);
+});
+
+test("decodeBase64Utf8: decodes UTF-8 blob content without globalThis.Buffer (Cloudflare Worker deployment path)", () => {
+  // The production decode must not depend on Node's Buffer. Encode while Buffer exists, then remove it and decode.
+  const text = "const computedScore = weight * factor + base\nconst total = computedScore + bonusAmount + café";
+  const encoded = b64(text);
+  const savedBuffer = globalThis.Buffer;
+  globalThis.Buffer = undefined; // simulate a runtime with no Node Buffer (Worker)
+  try {
+    const out = decodeBase64Utf8(encoded);
+    assert.equal(out?.text, text); // atob/TextDecoder path, multi-byte UTF-8 (é) preserved
+    assert.ok(out.byteLength >= text.length); // byte length, not UTF-16 code-unit length
+  } finally {
+    globalThis.Buffer = savedBuffer;
+  }
+});
+
+test("decodeBase64Utf8: malformed base64 fails safe to null (never throws)", () => {
+  assert.equal(decodeBase64Utf8("!!!not base64!!!"), null);
+});
+
+test("decodeBase64Utf8: invalid UTF-8 (binary) content fails safe to null (fatal decode)", () => {
+  // base64 of the bytes 0xFF 0xFE 0xFD — not valid UTF-8; a binary blob must decode to null, not garbage.
+  const invalidUtf8B64 = b64Bytes([0xff, 0xfe, 0xfd]);
+  assert.equal(decodeBase64Utf8(invalidUtf8B64), null);
+});
+
+test("scanDuplication: a match cannot bridge a blank/trivial line in the source (per-block indexing)", async () => {
+  // Added: 8 contiguous significant lines. Candidate: the same 8 lines, but split by a blank line into two 4-line
+  // blocks. With per-block indexing neither candidate block reaches MIN_RUN (8) contiguously, so there is NO match.
+  const eight = SHARED.slice(0, 8);
+  const splitCandidate = [...eight.slice(0, 4), "", ...eight.slice(4)].join("\n");
+  const fetchImpl = makeFetch({
+    tree: [{ path: "src/other.ts", sha: "c".repeat(40) }],
+    blobs: { ["c".repeat(40)]: splitCandidate },
+  });
+  const req = baseReq({ files: [{ path: "src/new.ts", status: "added", patch: addedPatch(eight) }] });
+  assert.deepEqual(await scanDuplication(req, fetchImpl), []);
+});
+
+test("scanDuplication: mixed-extension PR does not cross-match (a .ts block is not compared to a .py candidate)", async () => {
+  // A changed .ts file whose added block lives VERBATIM in a .py candidate file. Cross-extension matching is a bug;
+  // the per-extension guard must prevent any finding here.
+  const req = {
+    repoFullName: "o/r",
+    prNumber: 1,
+    headSha: "a".repeat(40),
+    githubToken: "tok",
+    files: [
+      { path: "src/scorer.ts", status: "added", patch: addedPatch(SHARED) },
+      { path: "src/util.py", status: "added", patch: "@@ -1,0 +1,1 @@\n+def helper(value): return value + 1" },
+    ],
+  };
+  const fetchImpl = makeFetch({
+    tree: [
+      { path: "lib/copy.py", sha: "b".repeat(40) }, // .py file containing the .ts block — must NOT match
+      { path: "lib/other.ts", sha: "c".repeat(40) }, // unrelated .ts
+    ],
+    blobs: {
+      ["b".repeat(40)]: sourceWithBlock(SHARED),
+      ["c".repeat(40)]: "const unrelatedConstantValue = computeSomethingDifferentEntirely(42)",
+    },
+  });
+  assert.deepEqual(await scanDuplication(req, fetchImpl), []);
+});
+
+test("scanDuplication: per-extension budget — a .py duplicate is still found despite >40 unrelated .ts candidates", async () => {
+  // 8 significant Python lines for the .py side.
+  const PY_SHARED = [
+    "total_reward_scaled = base_reward * decay_factor(epoch_age)",
+    "clamped_score = min(max_score, max(min_score, raw_score))",
+    "weighted_average = 0 if sum_of_weights == 0 else weighted_total / sum_of_weights",
+    "normalized_vector = [value / vector_magnitude for value in values]",
+    "adjusted_penalty = penalty_base * math.log2(violation_count + 1)",
+    "final_emission = total_reward_scaled - adjusted_penalty + bonus_amount",
+    "rounded_emission = round(final_emission * 1000000) / 1000000",
+    "settled_emission = rounded_emission if math.isfinite(rounded_emission) else 0",
+  ];
+  // 45 unrelated .ts candidates that would overflow a single global slice(40) and starve the .py side.
+  const tsFillers = Array.from({ length: 45 }, (_, i) => ({
+    path: `lib/ts/filler${i}.ts`,
+    sha: "a".repeat(38) + i.toString().padStart(2, "0"),
+  }));
+  const pyDup = { path: "lib/py/copy.py", sha: "f".repeat(40) };
+  const blobs = { [pyDup.sha]: sourceWithBlock(PY_SHARED) };
+  for (const f of tsFillers) {
+    blobs[f.sha] = `const noMatchFiller${f.sha.slice(-2)} = computeUnrelatedFillerValue(123456)`;
+  }
+  const req = {
+    repoFullName: "o/r",
+    prNumber: 1,
+    headSha: "a".repeat(40),
+    githubToken: "tok",
+    files: [
+      { path: "src/scorer.ts", status: "added", patch: addedPatch(SHARED) },
+      { path: "src/scorer.py", status: "added", patch: addedPatch(PY_SHARED) },
+    ],
+  };
+  const findings = await scanDuplication(req, makeFetch({ tree: [...tsFillers, pyDup], blobs }));
+  assert.ok(
+    findings.some((x) => x.sourceFile === "lib/py/copy.py" && x.file === "src/scorer.py"),
+    JSON.stringify(findings),
+  );
+});
+
+test("scanDuplication: total blob fetches across all changed extensions never exceed the global MAX_FETCHES budget", async () => {
+  // Two extensions, each with 25 candidates (50 total). Round-robin under ONE global budget must cap total fetches.
+  const PY = [
+    "py_total_reward = base_reward * decay_factor_for_epoch(epoch_age)",
+    "py_clamped_score = min(max_score, max(min_score, raw_score_value))",
+    "py_weighted_avg = 0 if total_weight == 0 else weighted_sum / total_weight",
+    "py_normalized = [value / vector_magnitude for value in raw_values]",
+    "py_adjusted_penalty = penalty_base * log2_of(violation_count + 1)",
+    "py_final_emission = py_total_reward - py_adjusted_penalty + bonus_amount",
+    "py_rounded = round(py_final_emission * 1000000) / 1000000.0",
+    "py_settled = py_rounded if is_finite_number(py_rounded) else 0.0",
+  ];
+  const tsFillers = Array.from({ length: 25 }, (_, i) => ({ path: `lib/ts/f${i}.ts`, sha: "a".repeat(38) + i.toString().padStart(2, "0") }));
+  const pyFillers = Array.from({ length: 25 }, (_, i) => ({ path: `lib/py/f${i}.py`, sha: "b".repeat(38) + i.toString().padStart(2, "0") }));
+  const blobs = {};
+  for (const f of [...tsFillers, ...pyFillers]) {
+    blobs[f.sha] = `const unrelatedFiller${f.sha.slice(-2)} = computeUnrelatedFillerValue(987654)`;
+  }
+  const counter = { calls: [], tree: 0, blob: 0 };
+  const req = {
+    repoFullName: "o/r",
+    prNumber: 1,
+    headSha: "a".repeat(40),
+    githubToken: "tok",
+    files: [
+      { path: "src/a.ts", status: "added", patch: addedPatch(SHARED) },
+      { path: "src/b.py", status: "added", patch: addedPatch(PY) },
+    ],
+  };
+  await scanDuplication(req, makeFetch({ tree: [...tsFillers, ...pyFillers], blobs, counter }));
+  assert.ok(counter.blob <= 30, `blob fetches=${counter.blob} exceeded MAX_FETCHES (30)`);
+  assert.ok(counter.blob > 0, "expected some candidate fetches");
+});
+
+test("extractAddedBlocks: empty/undefined patch → no blocks", () => {
+  assert.deepEqual(extractAddedBlocks(undefined), []);
+  assert.deepEqual(extractAddedBlocks(""), []);
+});
+
+test("scanDuplication: an oversized candidate blob is skipped so it cannot eat the budget", async () => {
+  // The SHARED block IS present in this candidate, but the file exceeds MAX_FILE_BYTES → never scanned → no finding.
+  const oversized = sourceWithBlock(SHARED) + "\n" + "x".repeat(500_001);
+  const fetch = makeFetch({
+    tree: [{ path: "src/huge-generated.ts", sha: "b".repeat(40) }],
+    blobs: { ["b".repeat(40)]: oversized },
+  });
+  assert.deepEqual(await scanDuplication(baseReq(), fetch), []);
+});
+
+// ── scanDuplication detection ────────────────────────────────────────────────────
+
+test("scanDuplication: detects a near-verbatim duplicate with correct head vs source location and line count", async () => {
+  const fetchImpl = makeFetch({
+    tree: [{ path: "src/existing-scorer.ts", sha: "b".repeat(40) }],
+    blobs: { ["b".repeat(40)]: sourceWithBlock(SHARED, 5) },
+  });
+  const findings = await scanDuplication(baseReq(), fetchImpl);
+  assert.equal(findings.length, 1);
+  assert.equal(findings[0].file, "src/new-scorer.ts");
+  assert.equal(findings[0].line, 10); // patch starts the added block at new-file line 10
+  assert.equal(findings[0].sourceFile, "src/existing-scorer.ts");
+  assert.equal(findings[0].sourceLine, 5); // SHARED block begins at line 5 in the source
+  assert.equal(findings[0].lines, SHARED.length); // entire 9-line run matched
+});
+
+test("scanDuplication: a run shorter than MIN_RUN is not flagged (no false positive)", async () => {
+  const short = SHARED.slice(0, 7); // 7 < MIN_RUN (8)
+  const req = baseReq({
+    files: [{ path: "src/new-scorer.ts", status: "added", patch: addedPatch(short) }],
+  });
+  const fetchImpl = makeFetch({
+    tree: [{ path: "src/existing-scorer.ts", sha: "b".repeat(40) }],
+    blobs: { ["b".repeat(40)]: sourceWithBlock(short, 5) },
+  });
+  assert.deepEqual(await scanDuplication(req, fetchImpl), []);
+});
+
+test("scanDuplication: boilerplate-only / import-only added lines are never flagged", async () => {
+  const boilerplate = Array.from({ length: 12 }, (_, i) => `import { thing${i} } from './m${i}'`);
+  const req = baseReq({
+    files: [{ path: "src/new-scorer.ts", status: "added", patch: addedPatch(boilerplate) }],
+  });
+  const fetchImpl = makeFetch({
+    tree: [{ path: "src/existing-scorer.ts", sha: "b".repeat(40) }],
+    blobs: { ["b".repeat(40)]: boilerplate.join("\n") },
+  });
+  assert.deepEqual(await scanDuplication(req, fetchImpl), []);
+});
+
+test("scanDuplication: the changed file itself is excluded as a candidate", async () => {
+  // Only candidate in the tree IS the changed file → no other source to match against → no finding.
+  const counter = { calls: [], tree: 0, blob: 0 };
+  const fetchImpl = makeFetch({
+    tree: [{ path: "src/new-scorer.ts", sha: "b".repeat(40) }],
+    blobs: { ["b".repeat(40)]: sourceWithBlock(SHARED, 5) },
+    counter,
+  });
+  assert.deepEqual(await scanDuplication(baseReq(), fetchImpl), []);
+  assert.equal(counter.blob, 0); // no blob fetched — the only tree entry was the changed file
+});
+
+test("scanDuplication: only same-extension candidates are considered", async () => {
+  const counter = { calls: [], tree: 0, blob: 0 };
+  const fetchImpl = makeFetch({
+    tree: [{ path: "src/existing-scorer.py", sha: "b".repeat(40) }], // .py vs changed .ts
+    blobs: { ["b".repeat(40)]: sourceWithBlock(SHARED, 5) },
+    counter,
+  });
+  assert.deepEqual(await scanDuplication(baseReq(), fetchImpl), []);
+  assert.equal(counter.blob, 0); // wrong extension → never fetched
+});
+
+// ── fail-safe guards ─────────────────────────────────────────────────────────────
+
+test("scanDuplication: fails safe with no githubToken", async () => {
+  assert.deepEqual(await scanDuplication(baseReq({ githubToken: undefined }), async () => {
+    throw new Error("should not fetch");
+  }), []);
+});
+
+test("scanDuplication: fails safe with no headSha", async () => {
+  assert.deepEqual(await scanDuplication(baseReq({ headSha: undefined }), async () => {
+    throw new Error("should not fetch");
+  }), []);
+});
+
+test("scanDuplication: fails safe on a bad repoFullName", async () => {
+  assert.deepEqual(await scanDuplication(baseReq({ repoFullName: "../evil" }), async () => {
+    throw new Error("should not fetch");
+  }), []);
+});
+
+test("scanDuplication: fails safe when the tree fetch is non-OK", async () => {
+  const fetchImpl = makeFetch({ tree: [], blobs: {}, treeOk: false });
+  assert.deepEqual(await scanDuplication(baseReq(), fetchImpl), []);
+});
+
+test("scanDuplication: fails safe on malformed tree json (no tree array)", async () => {
+  const fetchImpl = makeFetch({ tree: [], blobs: {}, treeBadJson: true });
+  assert.deepEqual(await scanDuplication(baseReq(), fetchImpl), []);
+});
+
+test("scanDuplication: a candidate whose blob fetch is non-OK is skipped, scan continues", async () => {
+  const fetchImpl = makeFetch({
+    tree: [
+      { path: "src/broken.ts", sha: "b".repeat(40) }, // non-OK blob
+      { path: "src/existing-scorer.ts", sha: "c".repeat(40) }, // good duplicate
+    ],
+    blobs: { ["c".repeat(40)]: sourceWithBlock(SHARED, 5) },
+    blobStatus: { ["b".repeat(40)]: 404 },
+  });
+  const findings = await scanDuplication(baseReq(), fetchImpl);
+  assert.equal(findings.length, 1);
+  assert.equal(findings[0].sourceFile, "src/existing-scorer.ts");
+});
+
+test("scanDuplication: a candidate whose blob fetch throws is skipped, scan continues", async () => {
+  const fetchImpl = makeFetch({
+    tree: [
+      { path: "src/broken.ts", sha: "b".repeat(40) },
+      { path: "src/existing-scorer.ts", sha: "c".repeat(40) },
+    ],
+    blobs: { ["c".repeat(40)]: sourceWithBlock(SHARED, 5) },
+    blobThrow: new Set(["b".repeat(40)]),
+  });
+  const findings = await scanDuplication(baseReq(), fetchImpl);
+  assert.equal(findings.length, 1);
+  assert.equal(findings[0].sourceFile, "src/existing-scorer.ts");
+});
+
+test("scanDuplication: a candidate blob with non-base64 encoding is skipped", async () => {
+  // "src/plain.ts" returns encoding utf8 (not base64) → decoded to null → skipped; the real dup still matches.
+  const fetchImpl = makeFetch({
+    tree: [
+      { path: "src/plain.ts", sha: "b".repeat(40) },
+      { path: "src/existing-scorer.ts", sha: "c".repeat(40) },
+    ],
+    blobs: { ["c".repeat(40)]: sourceWithBlock(SHARED, 5) }, // b-sha has no entry → utf8 fallback in mock
+  });
+  const findings = await scanDuplication(baseReq(), fetchImpl);
+  assert.equal(findings.length, 1);
+  assert.equal(findings[0].sourceFile, "src/existing-scorer.ts");
+});
+
+test("scanDuplication: a truncated tree still works on the returned entries", async () => {
+  const fetchImpl = makeFetch({
+    tree: [{ path: "src/existing-scorer.ts", sha: "b".repeat(40) }],
+    blobs: { ["b".repeat(40)]: sourceWithBlock(SHARED, 5) },
+    truncated: true,
+  });
+  const findings = await scanDuplication(baseReq(), fetchImpl);
+  assert.equal(findings.length, 1);
+});
+
+// ── bounding ──────────────────────────────────────────────────────────────────────
+
+test("scanDuplication: respects MAX_FETCHES (caps candidate blob fetches at 30)", async () => {
+  // 50 same-extension non-matching candidates → only MAX_FETCHES (30) blobs fetched.
+  const tree = Array.from({ length: 50 }, (_, i) => ({
+    path: `src/zzz/cand${String(i).padStart(3, "0")}.ts`,
+    sha: (i + 16).toString(16).padStart(40, "0"),
+  }));
+  const blobs = {};
+  for (const t of tree) blobs[t.sha] = "const irrelevantNonMatchingContentLine = makeUniqueValue()";
+  const counter = { calls: [], tree: 0, blob: 0 };
+  const fetchImpl = makeFetch({ tree, blobs, counter });
+  await scanDuplication(baseReq(), fetchImpl);
+  assert.equal(counter.blob, 30);
+});
+
+test("scanDuplication: respects MAX_CANDIDATES (only the closest 40 candidates are considered)", async () => {
+  // 100 candidates but MAX_CANDIDATES=40 caps the set; combined with MAX_FETCHES=30 only 30 blobs fetched, and the
+  // proximity sort means the in-directory candidate (sharing src/) is preferred and the match is still found.
+  const tree = [
+    { path: "src/existing-scorer.ts", sha: "b".repeat(40) }, // shares src/ → high proximity → fetched first
+  ];
+  for (let i = 0; i < 100; i++) {
+    tree.push({
+      path: `far/away/dir/cand${String(i).padStart(3, "0")}.ts`,
+      sha: (i + 32).toString(16).padStart(40, "0"),
+    });
+  }
+  const blobs = { ["b".repeat(40)]: sourceWithBlock(SHARED, 5) };
+  for (const t of tree) if (!(t.sha in blobs)) blobs[t.sha] = "const unrelatedFillerContentLine = uniqueValue()";
+  const counter = { calls: [], tree: 0, blob: 0 };
+  const fetchImpl = makeFetch({ tree, blobs, counter });
+  const findings = await scanDuplication(baseReq(), fetchImpl);
+  assert.equal(findings.length, 1);
+  assert.equal(findings[0].sourceFile, "src/existing-scorer.ts");
+  assert.ok(counter.blob <= 30); // MAX_FETCHES still bounds the work
+});
+
+test("scanDuplication: an already-aborted signal yields [] without fetching", async () => {
+  const counter = { calls: [], tree: 0, blob: 0 };
+  const fetchImpl = makeFetch({
+    tree: [{ path: "src/existing-scorer.ts", sha: "b".repeat(40) }],
+    blobs: { ["b".repeat(40)]: sourceWithBlock(SHARED, 5) },
+    counter,
+  });
+  const findings = await scanDuplication(baseReq(), fetchImpl, { signal: AbortSignal.abort() });
+  assert.deepEqual(findings, []);
+  assert.equal(counter.tree, 0); // never even fetched the tree
+});
+
+test("scanDuplication: no changed source files → [] without fetching", async () => {
+  const req = baseReq({
+    files: [{ path: "README.md", status: "modified", patch: addedPatch(SHARED) }],
+  });
+  let called = false;
+  await scanDuplication(req, async () => {
+    called = true;
+    throw new Error("nope");
+  });
+  assert.equal(called, false);
+});
+
+test("scanDuplication: removed files and no added-block files are ignored", async () => {
+  const req = baseReq({
+    files: [
+      { path: "src/gone.ts", status: "removed", patch: addedPatch(SHARED) }, // removed → skipped
+    ],
+  });
+  let called = false;
+  await scanDuplication(req, async () => {
+    called = true;
+    throw new Error("nope");
+  });
+  assert.equal(called, false);
+});
+
+// ── render ──────────────────────────────────────────────────────────────────────
+
+test("renderBrief emits a public-safe duplication block with file:line, escaping paths, never the code", () => {
+  const { promptSection } = renderBrief({
+    duplication: [
+      { file: "src/new-scorer.ts", line: 10, sourceFile: "src/existing-scorer.ts", sourceLine: 5, lines: 9 },
+    ],
+  });
+  assert.match(promptSection, /Near-verbatim duplicated code/);
+  assert.match(promptSection, /`src\/new-scorer\.ts:10`/);
+  assert.match(promptSection, /`src\/existing-scorer\.ts:5`/);
+  assert.match(promptSection, /~9 lines/);
+  // No code content from SHARED ever leaks into the rendered brief.
+  assert.ok(!promptSection.includes("totalRewardScaled"));
+});
+
+test("renderBrief escapes a backtick in a duplication path (no code-span breakout)", () => {
+  const { promptSection } = renderBrief({
+    duplication: [
+      { file: "src/we`ird.ts", line: 1, sourceFile: "src/o`ther.ts", sourceLine: 2, lines: 8 },
+    ],
+  });
+  assert.ok(!promptSection.includes("we`ird")); // raw backtick neutralized by safeCodeSpan
+});