diff --git a/.env.example b/.env.example index b02159071..748b79164 100644 --- a/.env.example +++ b/.env.example @@ -65,17 +65,17 @@ GITTENSORY_REVIEW_ENRICHMENT=false # Current analyzer names: # dependency,lockfileDrift,secret,license,installScript,heavyDependency,actionPin,eol,redos # provenance,codeowners,secretLog,assetWeight,typosquat,commitSignature,iacMisconfig,nativeBuild -# history,docCommentDrift,duplication +# history,docCommentDrift,duplication,churnHotspot # # Profile defaults: # fast: dependency,lockfileDrift,secret,license,installScript,heavyDependency,actionPin,eol # redos,provenance,secretLog,typosquat,iacMisconfig,nativeBuild # balanced (default): dependency,lockfileDrift,secret,license,installScript,heavyDependency # actionPin,eol,redos,provenance,codeowners,secretLog,assetWeight,typosquat,commitSignature -# iacMisconfig,nativeBuild,history,docCommentDrift,duplication +# iacMisconfig,nativeBuild,history,docCommentDrift,duplication,churnHotspot # deep: dependency,lockfileDrift,secret,license,installScript,heavyDependency,actionPin,eol # redos,provenance,codeowners,secretLog,assetWeight,typosquat,commitSignature,iacMisconfig -# nativeBuild,history,docCommentDrift,duplication +# nativeBuild,history,docCommentDrift,duplication,churnHotspot # END GENERATED REES ANALYZERS # Submitter-reputation spend control (internal-only): downgrades new/burst/low-rep diff --git a/apps/gittensory-ui/src/lib/rees-analyzers.ts b/apps/gittensory-ui/src/lib/rees-analyzers.ts index e84023a25..6a2f1a18a 100644 --- a/apps/gittensory-ui/src/lib/rees-analyzers.ts +++ b/apps/gittensory-ui/src/lib/rees-analyzers.ts @@ -533,6 +533,32 @@ export const REES_ANALYZERS = [ "Conservative: trivial/boilerplate lines are dropped and a long contiguous run is required, so incidental overlap is not flagged. Never returns code content.", }, }, + { + name: "churnHotspot", + title: "Churn hotspots", + category: "history", + cost: "github-heavy", + defaultEnabled: true, + profiles: ["balanced", "deep"], + requires: ["files", "github-token"], + limits: { + maxFilesProbed: 8, + windowDays: 90, + perPage: 100, + }, + docs: { + summary: + "Flags changed files that are statistical fragility hotspots — high commit frequency and a high fix/revert fraction.", + looksAt: + "Each changed file's recent commit history (a 90-day window), excluding lockfiles, generated output, and binaries.", + reports: + "File, commit count, fix/revert count, and the window — counts only, never file contents.", + network: + "Calls the GitHub commits API once per probed file. Requires GitHub token forwarding for private repos.", + notes: + "Distinct from the history analyzer's author track record; this scores the change AREA's defect density.", + }, + }, ] as const satisfies readonly ReesAnalyzerDoc[]; export const REES_ANALYZER_NAMES = REES_ANALYZERS.map((analyzer) => analyzer.name); diff --git a/review-enrichment/analyzer-metadata.json b/review-enrichment/analyzer-metadata.json index 98d05a2e4..6b4b3c324 100644 --- a/review-enrichment/analyzer-metadata.json +++ b/review-enrichment/analyzer-metadata.json @@ -609,6 +609,33 @@ "network": "Calls the GitHub API for the git tree and candidate blobs. Requires headSha and token forwarding for private repos.", "notes": "Conservative: trivial/boilerplate lines are dropped and a long contiguous run is required, so incidental overlap is not flagged. Never returns code content." } + }, + { + "name": "churnHotspot", + "title": "Churn hotspots", + "category": "history", + "cost": "github-heavy", + "defaultEnabled": true, + "profiles": [ + "balanced", + "deep" + ], + "requires": [ + "files", + "github-token" + ], + "limits": { + "maxFilesProbed": 8, + "windowDays": 90, + "perPage": 100 + }, + "docs": { + "summary": "Flags changed files that are statistical fragility hotspots — high commit frequency and a high fix/revert fraction.", + "looksAt": "Each changed file's recent commit history (a 90-day window), excluding lockfiles, generated output, and binaries.", + "reports": "File, commit count, fix/revert count, and the window — counts only, never file contents.", + "network": "Calls the GitHub commits API once per probed file. Requires GitHub token forwarding for private repos.", + "notes": "Distinct from the history analyzer's author track record; this scores the change AREA's defect density." + } } ] } diff --git a/review-enrichment/src/analyzers/churn-hotspot.ts b/review-enrichment/src/analyzers/churn-hotspot.ts new file mode 100644 index 000000000..5c6f5492b --- /dev/null +++ b/review-enrichment/src/analyzers/churn-hotspot.ts @@ -0,0 +1,133 @@ +// Churn-hotspot analyzer (#1513). For the files a PR changes, reads each file's recent commit history from the +// GitHub API and flags the ones that are statistical fragility hotspots — a high commit frequency AND a high +// fraction of fix/revert commits in the window. These are areas where defects historically cluster, so the +// reviewer should scrutinize the change harder. This is heavy/external/historical analysis the no-checkout +// `claude --print` reviewer cannot do. Surfaces only counts derived from the public commit log — never file +// contents. Distinct from the history analyzer (#1478), which scores the AUTHOR's track record. +import type { + AnalyzerDiagnostics, + EnrichRequest, + ChurnHotspotFinding, +} from "../types.js"; +import type { AnalysisContext } from "../analysis-context.js"; +import { boundedFetchJson } from "../external-fetch.js"; + +const GITHUB_API = "https://api.github.com"; +const SLUG_RE = /^[A-Za-z0-9._-]+$/; +const WINDOW_DAYS = 90; +const PER_PAGE = 100; // one page; a file with a full page of commits in the window is already a clear hotspot +const MAX_FILES_PROBED = 8; // bound the GitHub round-trips, matching the other history-class analyzers +const MIN_COMMITS = 8; // a hotspot must change frequently within the window +const MIN_FIX_FRACTION = 0.3; // and a meaningful share of those changes must be fixes/reverts +// Files whose commit churn is not a useful code-fragility signal — lockfiles, generated output, and binaries. +const SKIP_RE = + /(?:^|\/)(?:package-lock\.json|yarn\.lock|pnpm-lock\.yaml|poetry\.lock|go\.sum)$|\.(?:lock|min\.js|map|snap|png|jpe?g|gif|svg|ico|pdf|zip|gz|woff2?)$|(?:^|\/)(?:dist|build|vendor)\//i; +// Defect-correcting commit subjects: fix/bugfix/hotfix/revert/regression (conventional-commit `fix:` included). +const FIX_RE = /\b(?:fix(?:e[ds]|ing)?|bug ?fix|hotfix|revert(?:ed|s)?|regression)\b/i; + +interface ScanOptions { + signal?: AbortSignal; + analysis?: Pick; + diagnostics?: AnalyzerDiagnostics; +} + +/** The slice of a GitHub commit-list item this analyzer reads. */ +interface CommitItem { + commit?: { message?: string }; +} + +/** True when a commit's SUBJECT line describes a defect correction. Pure. */ +export function isFixCommit(message: string): boolean { + const subject = message.split("\n", 1)[0] ?? ""; + return FIX_RE.test(subject); +} + +/** Reduce a commit list to total + fix counts, capped flag, and the fix fraction. Pure. */ +export function summarizeChurn(commits: CommitItem[]): { + commitCount: number; + fixCount: number; + fixFraction: number; +} { + let fixCount = 0; + for (const item of commits) if (isFixCommit(item.commit?.message ?? "")) fixCount += 1; + const commitCount = commits.length; + return { commitCount, fixCount, fixFraction: commitCount ? fixCount / commitCount : 0 }; +} + +/** True when a file's churn summary meets the hotspot thresholds (enough commits AND enough of them fixes). Pure. */ +export function isHotspot(summary: { commitCount: number; fixFraction: number }): boolean { + return summary.commitCount >= MIN_COMMITS && summary.fixFraction >= MIN_FIX_FRACTION; +} + +function githubHeaders(token: string): Record { + return { + Authorization: `Bearer ${token}`, + Accept: "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + }; +} + +/** Fetch one page of commits touching `path` since `since`. Returns the list, or null on any error / non-200. */ +async function fetchFileCommits( + url: string, + headers: Record, + fetchFn: typeof fetch, + signal: AbortSignal | undefined, + options: Pick, +): Promise { + const fetchOptions = { + endpointCategory: "github-commits", + headers, + signal, + fetchImpl: fetchFn, + diagnostics: options.diagnostics, + phase: "churn-hotspot", + subcall: "github-commits", + maxBytes: 512 * 1024, + maxCallsPerCategory: MAX_FILES_PROBED, + }; + const response = options.analysis + ? await options.analysis.fetchJson(url, fetchOptions) + : await boundedFetchJson(url, fetchOptions); + return response.ok && Array.isArray(response.data) ? response.data : null; +} + +/** Analyzer entrypoint: changed files → per-file recent commit history → fragility hotspots. Fail-safe. */ +export async function scanChurnHotspot( + req: EnrichRequest, + fetchFn: typeof fetch = fetch, + options: ScanOptions = {}, +): Promise { + const { repoFullName, githubToken, files = [] } = req; + if (!githubToken) return []; + const [owner, repo] = repoFullName.split("/"); + if (!owner || !repo || !SLUG_RE.test(owner) || !SLUG_RE.test(repo)) return []; + + const headers = githubHeaders(githubToken); + const since = new Date(Date.now() - WINDOW_DAYS * 86_400_000).toISOString(); + // A newly-added file has no prior history; skip it (and non-code/generated files) before spending a round-trip. + const paths = files + .filter((file) => file.status !== "added" && !SKIP_RE.test(file.path)) + .map((file) => file.path) + .slice(0, MAX_FILES_PROBED); + + const findings: ChurnHotspotFinding[] = []; + for (const path of paths) { + if (options.signal?.aborted) break; + const url = + `${GITHUB_API}/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/commits` + + `?path=${encodeURIComponent(path)}&since=${encodeURIComponent(since)}&per_page=${PER_PAGE}`; + const commits = await fetchFileCommits(url, headers, fetchFn, options.signal, options); + if (!commits) continue; + const summary = summarizeChurn(commits); + if (!isHotspot(summary)) continue; + findings.push({ + file: path, + commitCount: summary.commitCount, + fixCount: summary.fixCount, + windowDays: WINDOW_DAYS, + capped: summary.commitCount >= PER_PAGE, + }); + } + return findings; +} diff --git a/review-enrichment/src/analyzers/registry.ts b/review-enrichment/src/analyzers/registry.ts index 33252a53b..b21f28f10 100644 --- a/review-enrichment/src/analyzers/registry.ts +++ b/review-enrichment/src/analyzers/registry.ts @@ -1,5 +1,6 @@ import { scanActionPins } from "./actions-pin.js"; import { scanAssetWeight } from "./asset-weight.js"; +import { scanChurnHotspot } from "./churn-hotspot.js"; import { scanCodeowners } from "./codeowners.js"; import { scanCommitSignature } from "./commit-signature.js"; import { dependencyAnalyzer } from "./dependency/descriptor.js"; @@ -418,6 +419,27 @@ export const ANALYZER_DESCRIPTORS = [ run: (req, { signal, analysis, diagnostics }) => scanDuplication(req, fetch, { signal, analysis, diagnostics }), }), + descriptor({ + name: "churnHotspot", + title: "Churn hotspots", + category: "history", + cost: "github-heavy", + defaultEnabled: true, + requires: ["files", "github-token"], + limits: { maxFilesProbed: 8, windowDays: 90, perPage: 100 }, + docs: { + summary: + "Flags changed files that are statistical fragility hotspots — high commit frequency and a high fix/revert fraction.", + looksAt: + "Each changed file's recent commit history (a 90-day window), excluding lockfiles, generated output, and binaries.", + reports: "File, commit count, fix/revert count, and the window — counts only, never file contents.", + network: "Calls the GitHub commits API once per probed file. Requires GitHub token forwarding for private repos.", + notes: + "Distinct from the history analyzer's author track record; this scores the change AREA's defect density.", + }, + run: (req, { signal, analysis, diagnostics }) => + scanChurnHotspot(req, fetch, { signal, analysis, diagnostics }), + }), ] as const satisfies readonly AnyAnalyzerDescriptor[]; export const ANALYZER_NAMES = ANALYZER_DESCRIPTORS.map( diff --git a/review-enrichment/src/render.ts b/review-enrichment/src/render.ts index 47ed5ae95..8c6193127 100644 --- a/review-enrichment/src/render.ts +++ b/review-enrichment/src/render.ts @@ -365,6 +365,20 @@ export function renderBrief( } } + const churnHotspots = findings.churnHotspot ?? []; + if (churnHotspots.length) { + lines.push( + "### Churn hotspots (high commit + fix/revert density — historically fragile, scrutinize)", + ); + for (const item of churnHotspots) { + const pct = item.commitCount ? Math.round((item.fixCount / item.commitCount) * 100) : 0; + const count = `${item.commitCount}${item.capped ? "+" : ""}`; + lines.push( + `- ${safeCodeSpan(item.file)} — ${count} commits in ${item.windowDays}d, ${item.fixCount} fix/revert (${pct}%)`, + ); + } + } + if (!lines.length) return { promptSection: "", systemSuffix: "" }; const header = diff --git a/review-enrichment/src/types.ts b/review-enrichment/src/types.ts index 29bd606dc..07b7ec54b 100644 --- a/review-enrichment/src/types.ts +++ b/review-enrichment/src/types.ts @@ -270,6 +270,21 @@ export interface HistoryFinding { partial: boolean; } +/** A changed file that is a statistical churn hotspot: many recent commits AND a high fraction of fix/revert + * commits, so defects historically cluster there. Counts come from the repository's public commit history within + * a fixed window — never file contents. (#1513) */ +export interface ChurnHotspotFinding { + file: string; + /** Commits touching this file in the window (capped at one page; `capped` marks the cap was hit). */ + commitCount: number; + /** Of those, how many were fix/revert/hotfix/regression commits. */ + fixCount: number; + /** The lookback window in days. */ + windowDays: number; + /** True when `commitCount` reached the per-page cap, so the real count is at least that. */ + capped: boolean; +} + /** Structured analyzer output. Each analyzer fills its own key; more land as analyzers ship (#1477/#1478). */ export interface BriefFindings { dependency?: DependencyFinding[]; @@ -292,6 +307,7 @@ export interface BriefFindings { history?: HistoryFinding[]; docCommentDrift?: DocCommentDriftFinding[]; duplication?: DuplicationFinding[]; + churnHotspot?: ChurnHotspotFinding[]; } /** A JSDoc/TSDoc block whose `@param` tags name parameters the adjacent function no longer declares — a diff --git a/review-enrichment/test/analyzer-registry.test.ts b/review-enrichment/test/analyzer-registry.test.ts index f4f44de07..f471544d1 100644 --- a/review-enrichment/test/analyzer-registry.test.ts +++ b/review-enrichment/test/analyzer-registry.test.ts @@ -30,6 +30,7 @@ const EXPECTED_ANALYZERS = [ "history", "docCommentDrift", "duplication", + "churnHotspot", ]; test("analyzer descriptors cover the runtime registry in stable order", () => { diff --git a/review-enrichment/test/churn-hotspot.test.ts b/review-enrichment/test/churn-hotspot.test.ts new file mode 100644 index 000000000..9d99f3289 --- /dev/null +++ b/review-enrichment/test/churn-hotspot.test.ts @@ -0,0 +1,104 @@ +// Units for the churn-hotspot analyzer (#1513). Own file (not enrichment.test.ts) so concurrent analyzer PRs +// don't collide. All network is mocked. Runs against the compiled dist/. +import { test } from "node:test"; +import assert from "node:assert/strict"; +import { + isFixCommit, + summarizeChurn, + isHotspot, + scanChurnHotspot, +} from "../dist/analyzers/churn-hotspot.js"; +import { renderBrief } from "../dist/render.js"; + +const jsonResponse = (body, code = 200) => new Response(JSON.stringify(body), { status: code }); +// n commit items; every `fixEvery`-th one is a fix commit (fixEvery 0 = none). +const commits = (n, fixEvery = 0) => + Array.from({ length: n }, (_, i) => ({ + commit: { message: fixEvery && i % fixEvery === 0 ? `fix: bug ${i}` : `feat: thing ${i}` }, + })); +const req = (files) => ({ repoFullName: "octo/repo", prNumber: 1, githubToken: "ghp_test", files }); + +test("isFixCommit: matches defect-correcting subjects, not lookalikes", () => { + for (const m of ["fix: crash", "fixed the bug", "fixes #12", "bugfix in parser", "hotfix prod", "revert bad change", "regression in build", "fixing a flake"]) + assert.equal(isFixCommit(m), true, m); + for (const m of ["feat: add x", "refactor prefix handling", "update fixtures", "docs: suffix note"]) + assert.equal(isFixCommit(m), false, m); +}); + +test("isFixCommit: classifies on the subject line only, not the body", () => { + assert.equal(isFixCommit("feat: thing\n\nthis also reverts an idea"), false); +}); + +test("summarizeChurn + isHotspot: counts, fraction, and thresholds", () => { + const s = summarizeChurn(commits(10, 2)); // 5 of 10 are fixes + assert.deepEqual([s.commitCount, s.fixCount, s.fixFraction], [10, 5, 0.5]); + assert.equal(isHotspot({ commitCount: 10, fixFraction: 0.5 }), true); + assert.equal(isHotspot({ commitCount: 4, fixFraction: 0.9 }), false); // too few commits + assert.equal(isHotspot({ commitCount: 20, fixFraction: 0.1 }), false); // too few fixes +}); + +test("scanChurnHotspot: flags a high-churn, high-fix file", async () => { + const findings = await scanChurnHotspot(req([{ path: "src/a.ts" }]), async () => jsonResponse(commits(12, 2))); + assert.equal(findings.length, 1); + assert.equal(findings[0].file, "src/a.ts"); + assert.equal(findings[0].commitCount, 12); + assert.equal(findings[0].fixCount, 6); + assert.equal(findings[0].windowDays, 90); + assert.equal(findings[0].capped, false); +}); + +test("scanChurnHotspot: a low-churn or low-fix file is not flagged", async () => { + assert.deepEqual(await scanChurnHotspot(req([{ path: "src/a.ts" }]), async () => jsonResponse(commits(5, 1))), []); // < 8 commits + assert.deepEqual(await scanChurnHotspot(req([{ path: "src/b.ts" }]), async () => jsonResponse(commits(20, 0))), []); // no fixes +}); + +test("scanChurnHotspot: marks the count capped when the page is full", async () => { + const f = await scanChurnHotspot(req([{ path: "src/a.ts" }]), async () => jsonResponse(commits(100, 2))); + assert.equal(f.length, 1); + assert.equal(f[0].capped, true); +}); + +test("scanChurnHotspot: skips lockfiles, binaries, and newly-added files without fetching", async () => { + let called = false; + const out = await scanChurnHotspot( + req([{ path: "package-lock.json" }, { path: "assets/logo.png" }, { path: "src/new.ts", status: "added" }]), + async () => { + called = true; + return jsonResponse(commits(50, 2)); + }, + ); + assert.deepEqual(out, []); + assert.equal(called, false); +}); + +test("scanChurnHotspot: requires a github token and a valid repo slug", async () => { + assert.deepEqual(await scanChurnHotspot({ repoFullName: "octo/repo", prNumber: 1, files: [{ path: "src/a.ts" }] }, async () => jsonResponse(commits(20, 2))), []); + assert.deepEqual(await scanChurnHotspot({ repoFullName: "bad slug/x!", prNumber: 1, githubToken: "t", files: [{ path: "src/a.ts" }] }, async () => jsonResponse(commits(20, 2))), []); +}); + +test("scanChurnHotspot: fails safe on a non-ok or throwing fetch", async () => { + assert.deepEqual(await scanChurnHotspot(req([{ path: "src/a.ts" }]), async () => jsonResponse({}, 500)), []); + assert.deepEqual( + await scanChurnHotspot(req([{ path: "src/a.ts" }]), async () => { + throw new Error("network"); + }), + [], + ); +}); + +test("scanChurnHotspot: stops on an already-aborted signal", async () => { + assert.deepEqual( + await scanChurnHotspot(req([{ path: "src/a.ts" }]), async () => jsonResponse(commits(20, 2)), { signal: AbortSignal.abort() }), + [], + ); +}); + +test("renderBrief emits a public-safe churn-hotspot block", () => { + const { promptSection } = renderBrief({ + churnHotspot: [{ file: "src/a.ts", commitCount: 100, fixCount: 40, windowDays: 90, capped: true }], + }); + assert.match(promptSection, /Churn hotspots/); + assert.match(promptSection, /src\/a\.ts/); + assert.match(promptSection, /100\+ commits in 90d/); + assert.match(promptSection, /40 fix\/revert/); +});