Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,17 @@ GITTENSORY_REVIEW_ENRICHMENT=false
# Current analyzer names:
# dependency,lockfileDrift,secret,license,installScript,heavyDependency,actionPin,eol,redos
# provenance,codeowners,secretLog,assetWeight,typosquat,commitSignature,iacMisconfig,nativeBuild
# history,docCommentDrift,duplication
# history,docCommentDrift,duplication,churnHotspot
#
# Profile defaults:
# fast: dependency,lockfileDrift,secret,license,installScript,heavyDependency,actionPin,eol
# redos,provenance,secretLog,typosquat,iacMisconfig,nativeBuild
# balanced (default): dependency,lockfileDrift,secret,license,installScript,heavyDependency
# actionPin,eol,redos,provenance,codeowners,secretLog,assetWeight,typosquat,commitSignature
# iacMisconfig,nativeBuild,history,docCommentDrift,duplication
# iacMisconfig,nativeBuild,history,docCommentDrift,duplication,churnHotspot
# deep: dependency,lockfileDrift,secret,license,installScript,heavyDependency,actionPin,eol
# redos,provenance,codeowners,secretLog,assetWeight,typosquat,commitSignature,iacMisconfig
# nativeBuild,history,docCommentDrift,duplication
# nativeBuild,history,docCommentDrift,duplication,churnHotspot
# END GENERATED REES ANALYZERS

# Submitter-reputation spend control (internal-only): downgrades new/burst/low-rep
Expand Down
26 changes: 26 additions & 0 deletions apps/gittensory-ui/src/lib/rees-analyzers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,32 @@ export const REES_ANALYZERS = [
"Conservative: trivial/boilerplate lines are dropped and a long contiguous run is required, so incidental overlap is not flagged. Never returns code content.",
},
},
{
name: "churnHotspot",
title: "Churn hotspots",
category: "history",
cost: "github-heavy",
defaultEnabled: true,
profiles: ["balanced", "deep"],
requires: ["files", "github-token"],
limits: {
maxFilesProbed: 8,
windowDays: 90,
perPage: 100,
},
docs: {
summary:
"Flags changed files that are statistical fragility hotspots — high commit frequency and a high fix/revert fraction.",
looksAt:
"Each changed file's recent commit history (a 90-day window), excluding lockfiles, generated output, and binaries.",
reports:
"File, commit count, fix/revert count, and the window — counts only, never file contents.",
network:
"Calls the GitHub commits API once per probed file. Requires GitHub token forwarding for private repos.",
notes:
"Distinct from the history analyzer's author track record; this scores the change AREA's defect density.",
},
},
] as const satisfies readonly ReesAnalyzerDoc[];

export const REES_ANALYZER_NAMES = REES_ANALYZERS.map((analyzer) => analyzer.name);
27 changes: 27 additions & 0 deletions review-enrichment/analyzer-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,33 @@
"network": "Calls the GitHub API for the git tree and candidate blobs. Requires headSha and token forwarding for private repos.",
"notes": "Conservative: trivial/boilerplate lines are dropped and a long contiguous run is required, so incidental overlap is not flagged. Never returns code content."
}
},
{
"name": "churnHotspot",
"title": "Churn hotspots",
"category": "history",
"cost": "github-heavy",
"defaultEnabled": true,
"profiles": [
"balanced",
"deep"
],
"requires": [
"files",
"github-token"
],
"limits": {
"maxFilesProbed": 8,
"windowDays": 90,
"perPage": 100
},
"docs": {
"summary": "Flags changed files that are statistical fragility hotspots — high commit frequency and a high fix/revert fraction.",
"looksAt": "Each changed file's recent commit history (a 90-day window), excluding lockfiles, generated output, and binaries.",
"reports": "File, commit count, fix/revert count, and the window — counts only, never file contents.",
"network": "Calls the GitHub commits API once per probed file. Requires GitHub token forwarding for private repos.",
"notes": "Distinct from the history analyzer's author track record; this scores the change AREA's defect density."
}
}
]
}
133 changes: 133 additions & 0 deletions review-enrichment/src/analyzers/churn-hotspot.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// Churn-hotspot analyzer (#1513). For the files a PR changes, reads each file's recent commit history from the
// GitHub API and flags the ones that are statistical fragility hotspots — a high commit frequency AND a high
// fraction of fix/revert commits in the window. These are areas where defects historically cluster, so the
// reviewer should scrutinize the change harder. This is heavy/external/historical analysis the no-checkout
// `claude --print` reviewer cannot do. Surfaces only counts derived from the public commit log — never file
// contents. Distinct from the history analyzer (#1478), which scores the AUTHOR's track record.
import type {
AnalyzerDiagnostics,
EnrichRequest,
ChurnHotspotFinding,
} from "../types.js";
import type { AnalysisContext } from "../analysis-context.js";
import { boundedFetchJson } from "../external-fetch.js";

const GITHUB_API = "https://api.github.com";
const SLUG_RE = /^[A-Za-z0-9._-]+$/;
const WINDOW_DAYS = 90;
const PER_PAGE = 100; // one page; a file with a full page of commits in the window is already a clear hotspot
const MAX_FILES_PROBED = 8; // bound the GitHub round-trips, matching the other history-class analyzers
const MIN_COMMITS = 8; // a hotspot must change frequently within the window
const MIN_FIX_FRACTION = 0.3; // and a meaningful share of those changes must be fixes/reverts
// Files whose commit churn is not a useful code-fragility signal — lockfiles, generated output, and binaries.
const SKIP_RE =
/(?:^|\/)(?:package-lock\.json|yarn\.lock|pnpm-lock\.yaml|poetry\.lock|go\.sum)$|\.(?:lock|min\.js|map|snap|png|jpe?g|gif|svg|ico|pdf|zip|gz|woff2?)$|(?:^|\/)(?:dist|build|vendor)\//i;
// Defect-correcting commit subjects: fix/bugfix/hotfix/revert/regression (conventional-commit `fix:` included).
const FIX_RE = /\b(?:fix(?:e[ds]|ing)?|bug ?fix|hotfix|revert(?:ed|s)?|regression)\b/i;

interface ScanOptions {
signal?: AbortSignal;
analysis?: Pick<AnalysisContext, "fetchJson">;
diagnostics?: AnalyzerDiagnostics;
}

/** The slice of a GitHub commit-list item this analyzer reads. */
interface CommitItem {
commit?: { message?: string };
}

/** True when a commit's SUBJECT line describes a defect correction. Pure. */
export function isFixCommit(message: string): boolean {
const subject = message.split("\n", 1)[0] ?? "";
return FIX_RE.test(subject);
}

/** Reduce a commit list to total + fix counts, capped flag, and the fix fraction. Pure. */
export function summarizeChurn(commits: CommitItem[]): {
commitCount: number;
fixCount: number;
fixFraction: number;
} {
let fixCount = 0;
for (const item of commits) if (isFixCommit(item.commit?.message ?? "")) fixCount += 1;
const commitCount = commits.length;
return { commitCount, fixCount, fixFraction: commitCount ? fixCount / commitCount : 0 };
}

/** True when a file's churn summary meets the hotspot thresholds (enough commits AND enough of them fixes). Pure. */
export function isHotspot(summary: { commitCount: number; fixFraction: number }): boolean {
return summary.commitCount >= MIN_COMMITS && summary.fixFraction >= MIN_FIX_FRACTION;
}

function githubHeaders(token: string): Record<string, string> {
return {
Authorization: `Bearer ${token}`,
Accept: "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
};
}

/** Fetch one page of commits touching `path` since `since`. Returns the list, or null on any error / non-200. */
async function fetchFileCommits(
url: string,
headers: Record<string, string>,
fetchFn: typeof fetch,
signal: AbortSignal | undefined,
options: Pick<ScanOptions, "analysis" | "diagnostics">,
): Promise<CommitItem[] | null> {
const fetchOptions = {
endpointCategory: "github-commits",
headers,
signal,
fetchImpl: fetchFn,
diagnostics: options.diagnostics,
phase: "churn-hotspot",
subcall: "github-commits",
maxBytes: 512 * 1024,
maxCallsPerCategory: MAX_FILES_PROBED,
};
const response = options.analysis
? await options.analysis.fetchJson<CommitItem[]>(url, fetchOptions)
: await boundedFetchJson<CommitItem[]>(url, fetchOptions);
return response.ok && Array.isArray(response.data) ? response.data : null;
}

/** Analyzer entrypoint: changed files → per-file recent commit history → fragility hotspots. Fail-safe. */
export async function scanChurnHotspot(
req: EnrichRequest,
fetchFn: typeof fetch = fetch,
options: ScanOptions = {},
): Promise<ChurnHotspotFinding[]> {
const { repoFullName, githubToken, files = [] } = req;
if (!githubToken) return [];
const [owner, repo] = repoFullName.split("/");
if (!owner || !repo || !SLUG_RE.test(owner) || !SLUG_RE.test(repo)) return [];

const headers = githubHeaders(githubToken);
const since = new Date(Date.now() - WINDOW_DAYS * 86_400_000).toISOString();
// A newly-added file has no prior history; skip it (and non-code/generated files) before spending a round-trip.
const paths = files
.filter((file) => file.status !== "added" && !SKIP_RE.test(file.path))
.map((file) => file.path)
.slice(0, MAX_FILES_PROBED);

const findings: ChurnHotspotFinding[] = [];
for (const path of paths) {
if (options.signal?.aborted) break;
const url =
`${GITHUB_API}/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/commits` +
`?path=${encodeURIComponent(path)}&since=${encodeURIComponent(since)}&per_page=${PER_PAGE}`;
const commits = await fetchFileCommits(url, headers, fetchFn, options.signal, options);
if (!commits) continue;
const summary = summarizeChurn(commits);
if (!isHotspot(summary)) continue;
findings.push({
file: path,
commitCount: summary.commitCount,
fixCount: summary.fixCount,
windowDays: WINDOW_DAYS,
capped: summary.commitCount >= PER_PAGE,
});
}
return findings;
}
22 changes: 22 additions & 0 deletions review-enrichment/src/analyzers/registry.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { scanActionPins } from "./actions-pin.js";
import { scanAssetWeight } from "./asset-weight.js";
import { scanChurnHotspot } from "./churn-hotspot.js";
import { scanCodeowners } from "./codeowners.js";
import { scanCommitSignature } from "./commit-signature.js";
import { dependencyAnalyzer } from "./dependency/descriptor.js";
Expand Down Expand Up @@ -418,6 +419,27 @@ export const ANALYZER_DESCRIPTORS = [
run: (req, { signal, analysis, diagnostics }) =>
scanDuplication(req, fetch, { signal, analysis, diagnostics }),
}),
descriptor({
name: "churnHotspot",
title: "Churn hotspots",
category: "history",
cost: "github-heavy",
defaultEnabled: true,
requires: ["files", "github-token"],
limits: { maxFilesProbed: 8, windowDays: 90, perPage: 100 },
docs: {
summary:
"Flags changed files that are statistical fragility hotspots — high commit frequency and a high fix/revert fraction.",
looksAt:
"Each changed file's recent commit history (a 90-day window), excluding lockfiles, generated output, and binaries.",
reports: "File, commit count, fix/revert count, and the window — counts only, never file contents.",
network: "Calls the GitHub commits API once per probed file. Requires GitHub token forwarding for private repos.",
notes:
"Distinct from the history analyzer's author track record; this scores the change AREA's defect density.",
},
run: (req, { signal, analysis, diagnostics }) =>
scanChurnHotspot(req, fetch, { signal, analysis, diagnostics }),
}),
] as const satisfies readonly AnyAnalyzerDescriptor[];

export const ANALYZER_NAMES = ANALYZER_DESCRIPTORS.map(
Expand Down
14 changes: 14 additions & 0 deletions review-enrichment/src/render.ts
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,20 @@ export function renderBrief(
}
}

const churnHotspots = findings.churnHotspot ?? [];
if (churnHotspots.length) {
lines.push(
"### Churn hotspots (high commit + fix/revert density — historically fragile, scrutinize)",
);
for (const item of churnHotspots) {
const pct = item.commitCount ? Math.round((item.fixCount / item.commitCount) * 100) : 0;
const count = `${item.commitCount}${item.capped ? "+" : ""}`;
lines.push(
`- ${safeCodeSpan(item.file)} — ${count} commits in ${item.windowDays}d, ${item.fixCount} fix/revert (${pct}%)`,
);
}
}

if (!lines.length) return { promptSection: "", systemSuffix: "" };

const header =
Expand Down
16 changes: 16 additions & 0 deletions review-enrichment/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,21 @@ export interface HistoryFinding {
partial: boolean;
}

/** A changed file that is a statistical churn hotspot: many recent commits AND a high fraction of fix/revert
* commits, so defects historically cluster there. Counts come from the repository's public commit history within
* a fixed window — never file contents. (#1513) */
export interface ChurnHotspotFinding {
file: string;
/** Commits touching this file in the window (capped at one page; `capped` marks the cap was hit). */
commitCount: number;
/** Of those, how many were fix/revert/hotfix/regression commits. */
fixCount: number;
/** The lookback window in days. */
windowDays: number;
/** True when `commitCount` reached the per-page cap, so the real count is at least that. */
capped: boolean;
}

/** Structured analyzer output. Each analyzer fills its own key; more land as analyzers ship (#1477/#1478). */
export interface BriefFindings {
dependency?: DependencyFinding[];
Expand All @@ -292,6 +307,7 @@ export interface BriefFindings {
history?: HistoryFinding[];
docCommentDrift?: DocCommentDriftFinding[];
duplication?: DuplicationFinding[];
churnHotspot?: ChurnHotspotFinding[];
}

/** A JSDoc/TSDoc block whose `@param` tags name parameters the adjacent function no longer declares — a
Expand Down
1 change: 1 addition & 0 deletions review-enrichment/test/analyzer-registry.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ const EXPECTED_ANALYZERS = [
"history",
"docCommentDrift",
"duplication",
"churnHotspot",
];

test("analyzer descriptors cover the runtime registry in stable order", () => {
Expand Down
Loading
Loading