Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions DESIGN.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ BanGD 是一个面向 **BanDB 数据库引擎(Go)垂类**的 AI PR 评审助
**只看 diff 会让评审退化成通用 linter。** 架构级判断需要 diff 之外的上下文:被改函数的调用方/被调用方、相关数据结构定义、并发上下文。BanGD 的上下文策略分三层:

1. **固定领域知识(已实现)**:系统提示词 = 资深内核工程师人设 + 评审 **rubric**(9 个维度,每条带"该追问的架构方向")+ **few-shot 范例**(并发 panic→双表、WAL 顺序→组提交,刻意跨维度防过拟合)。这是"垂类"的来源,约占评审质量的 80%。
2. **PR 上下文(已实现)**:通过 `PrContext` 端口取 diff 与 PR 元信息。
3. **周边代码(端口已就位,循环待实现)**:`PrContext.readFile` 已定义,下一步让 core 按需 agentic 地拉取周边代码——这是准确性与上下文理解的最大增量。
2. **PR 上下文(已实现)**:通过 `PrContext` 端口取 diff 与 PR 元信息;并读取**被改动文件的完整内容**,让评审看到 diff 之外的同文件上下文(其它方法、同文件类型定义)。
3. **周边相关代码(已实现,`src/core/related.ts`)**:在评审前增加一轮**模型主导的上下文规划**——模型依据改动代码里已出现的 import / 标识符,列出需要补读的**未改动**文件(被引用的 struct/interface 定义、持同一把锁的调用方、同 package 承载相同不变量的文件),core 通过 `readFile` 拉取(命中即附入上下文,未命中返回 null,代价极低),再进入评审。这把评审从"看 diff"升级为"看系统",是准确性与上下文理解的最大增量。
- **刻意的取舍**:只做**一轮**规划、复用单次 `generateStructured` 端口(不改成多轮交错 tool-use),并对补读文件数(`MAX_RELATED_FILES`)与字符预算(`RELATED_CHAR_BUDGET`)设硬上限——以一次未缓存的额外调用换取关键上下文,成本可控。补读的文件路径随 `ReviewOutcome.relatedFiles` 暴露并写入评论页脚/日志,让评审"补读了哪些文件来做架构判断"可被直接看到。规划失败永不致命(返回空,评审照常进行)。

**Prompt caching 与速度**:第 1 层(系统提示词+rubric+范例)是大块且稳定的内容,`AnthropicLlmClient` 在该块上打 `cache_control` 断点,使**每个 PR 只有 diff 这条尾巴是未缓存的**,显著降低延迟与成本——这是响应速度的主要手段。

Expand Down Expand Up @@ -72,7 +73,7 @@ BanGD 是一个面向 **BanDB 数据库引擎(Go)垂类**的 AI PR 评审助

按"性价比/对质量的杠杆"排序:

1. **Agentic 周边代码读取**(最大质量杠杆):让 core 通过 `readFile` 主动探索调用链与数据结构定义,从"看 diff"升级到"看系统"
1. **Agentic 周边代码读取**(最大质量杠杆,**已落地第一版**,见 §三.3):当前是单轮规划 + 有界拉取。后续可做**多轮**(看到补读内容后再决定是否继续探索,loop-until-enough)、或升级为模型交错发起 `read_file` 的真正 tool-use 循环;并可引入仓库文件树(`listFiles`,限定到 diff 触及的目录以控 token)提高补读命中率
2. **评测语料集 + rubric 迭代**:用历史 PR + 专家评审度量 precision/recall,数据驱动地打磨 rubric 与范例。
3. **对抗式验证**:对每条 finding 派多个"反驳者",多数否决则丢弃,系统性压低误报。
4. **RAG 知识库**:索引 BanDB 设计文档、历史评审决策、数据库系统模式,增强上下文理解。
Expand Down
126 changes: 121 additions & 5 deletions dist/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -35733,7 +35733,7 @@ function assembleSystemPrompt(systemPrompt, rubricFragments, examples) {
}
return parts.join('\n\n---\n\n');
}
function assembleUserPrompt(metadata, diff, filesText) {
function assembleUserPrompt(metadata, diff, filesText, relatedText = '') {
const sections = [
`# 待评审的 PR`,
`标题:${metadata.title}`,
Expand All @@ -35746,6 +35746,9 @@ function assembleUserPrompt(metadata, diff, filesText) {
if (filesText) {
sections.push(`# 被改动文件的完整内容(用于理解 diff 之外的上下文)`, filesText);
}
if (relatedText) {
sections.push(`# 周边相关代码(未被改动,按需拉取,用于架构级推理)`, relatedText);
}
sections.push(`请按系统提示词中的输出格式产出:1) PR 变更总结(changeSummary);2) 整体风险等级(overallRisk);3) 逐条风险识别与 Review 建议(findings)。`);
return sections.join('\n\n');
}
Expand Down Expand Up @@ -35808,6 +35811,104 @@ function partitionGroups(groups, existingKeys) {
}


/***/ }),

/***/ 5640:
/***/ ((__unused_webpack_module, exports, __nccwpck_require__) => {


Object.defineProperty(exports, "__esModule", ({ value: true }));
exports.MAX_RELATED_FILES = exports.RELATED_PLAN_SCHEMA = void 0;
exports.planRelatedFiles = planRelatedFiles;
exports.gatherRelatedFiles = gatherRelatedFiles;
const context_js_1 = __nccwpck_require__(8480);
/** JSON Schema for the planner's structured output: a list of file paths. */
exports.RELATED_PLAN_SCHEMA = {
type: 'object',
additionalProperties: false,
required: ['paths'],
properties: {
paths: { type: 'array', items: { type: 'string' } },
},
};
/** Hard cap on how many related files one round may fetch. */
exports.MAX_RELATED_FILES = 8;
/** Cap on the changed-file material handed to the planner (chars). */
const PLAN_MATERIAL_CAP = 24_000;
const PLANNER_SYSTEM = [
'你是一个资深数据库内核工程师的"上下文规划助手"。',
'下面给你一个 PR 的 diff 和被改动文件的完整内容。',
'请判断:要在架构层面(并发/所有权/锁/内存生命周期/存储/schema 等)正确评审这个改动,',
'还需要阅读哪些**未被改动**的周边源码文件——例如:',
'- 被引用的 struct / interface / 常量的定义所在文件;',
'- 持有同一把锁、或操作同一数据结构的调用方 / 被调用方;',
'- 同一 package 内承载相同不变量的其它文件。',
'只能依据改动代码里**已经出现**的 import 路径与标识符来推断文件路径(Go 惯例:包路径≈目录,类型名≈文件名)。',
'规则:只返回未被改动、且确有助于评审的文件路径;不要返回已给出的被改动文件;不要臆造无依据的路径;',
`最多返回 ${exports.MAX_RELATED_FILES} 个;若改动是自包含的、无需额外上下文,返回空数组。`,
].join('\n');
/**
* Ask the model which untouched related files it needs. Returns candidate paths,
* de-duplicated, with the already-changed files removed, capped to MAX_RELATED_FILES.
* Never throws: on any planner failure it returns [] (the review proceeds without
* related context rather than failing).
*/
async function planRelatedFiles(llm, input) {
const changedSet = new Set(input.changedFiles);
const material = input.loaded
.map((f) => `### ${f.path}\n${f.content}`)
.join('\n\n');
const user = [
'# Diff',
'```diff',
input.diff,
'```',
'# 被改动文件的完整内容',
(0, context_js_1.truncate)(material, PLAN_MATERIAL_CAP).text,
'请返回需要补充阅读的未改动文件路径(paths)。',
].join('\n\n');
let raw;
try {
raw = await llm.generateStructured({ system: PLANNER_SYSTEM, user, outputSchema: exports.RELATED_PLAN_SCHEMA });
}
catch {
return [];
}
const paths = raw.paths;
if (!Array.isArray(paths))
return [];
const seen = new Set();
const result = [];
for (const p of paths) {
if (typeof p !== 'string')
continue;
const path = p.trim().replace(/^[ab]\//, '');
if (!path || changedSet.has(path) || seen.has(path))
continue;
seen.add(path);
result.push(path);
if (result.length >= exports.MAX_RELATED_FILES)
break;
}
return result;
}
/**
* One planning round end-to-end: plan the related paths, then fetch each via
* `readFile`, dropping misses (null). Returns the successfully read files (each
* with its content) for inclusion in the review context.
*/
async function gatherRelatedFiles(llm, readFile, input) {
const paths = await planRelatedFiles(llm, input);
const loaded = [];
for (const path of paths) {
const content = await readFile(path);
if (content !== null)
loaded.push({ path, content });
}
return loaded;
}


/***/ }),

/***/ 7253:
Expand Down Expand Up @@ -35849,6 +35950,7 @@ Object.defineProperty(exports, "__esModule", ({ value: true }));
exports.review = review;
const prompt_js_1 = __nccwpck_require__(5301);
const context_js_1 = __nccwpck_require__(8480);
const related_js_1 = __nccwpck_require__(5640);
const router_js_1 = __nccwpck_require__(2086);
const dimensions_js_1 = __nccwpck_require__(8094);
const retry_js_1 = __nccwpck_require__(7253);
Expand All @@ -35860,6 +35962,8 @@ const schema_js_1 = __nccwpck_require__(2032);
const DIFF_CHAR_CAP = 60_000;
const FILES_CHAR_BUDGET = 40_000;
const MAX_FILES_TO_READ = 40;
/** Budget (chars) for the untouched related files pulled by the planner. */
const RELATED_CHAR_BUDGET = 30_000;
const ROUTING_MATERIAL_CAP = 8_000;
/** Total tries for the generate-then-validate step (re-generate on bad output). */
const GENERATE_ATTEMPTS = 2;
Expand Down Expand Up @@ -35887,13 +35991,23 @@ async function review(deps, prompts) {
loaded.push({ path, content });
}
const filesBlock = (0, context_js_1.assembleFilesBlock)(loaded, FILES_CHAR_BUDGET);
const cappedDiff = (0, context_js_1.truncate)(diff, DIFF_CHAR_CAP).text;
// Agentic related-code reading: let the model name the untouched files it needs
// (struct defs, lock-holding callers) and pull them into the review context.
const related = await (0, related_js_1.gatherRelatedFiles)(deps.llm, (path) => deps.pr.readFile(path), {
diff: cappedDiff,
changedFiles,
loaded,
});
const relatedBlock = (0, context_js_1.assembleFilesBlock)(related, RELATED_CHAR_BUDGET);
const dimensions = await selectDimensions(deps.llm, diff, changedFiles, loaded);
const rubricFragments = dimensions.map((id) => prompts.rubric[id]);
const examples = dimensions
.map((id) => prompts.examples[id])
.filter((ex) => ex !== undefined);
const system = (0, prompt_js_1.assembleSystemPrompt)(prompts.systemPrompt, rubricFragments, examples);
const user = (0, prompt_js_1.assembleUserPrompt)(deps.pr.metadata, (0, context_js_1.truncate)(diff, DIFF_CHAR_CAP).text, filesBlock.text);
const user = (0, prompt_js_1.assembleUserPrompt)(deps.pr.metadata, cappedDiff, filesBlock.text, relatedBlock.text);
const relatedFiles = related.map((f) => f.path);
// The client returns the model's output unvalidated; the core owns the
// validation boundary so the result is typed without `any`. Retry the whole
// generate-then-validate step so a single malformed generation isn't fatal.
Expand All @@ -35909,7 +36023,7 @@ async function review(deps, prompts) {
});
return schema_js_1.ReviewResultSchema.parse(lastRaw);
}, GENERATE_ATTEMPTS);
return { result, dimensions };
return { result, dimensions, relatedFiles };
}
catch (error) {
if (error instanceof zod_1.ZodError)
Expand Down Expand Up @@ -36237,13 +36351,15 @@ async function run() {
}
throw error;
}
const { result, dimensions } = reviewed;
const { result, dimensions, relatedFiles } = reviewed;
const usage = llm.usage;
const footer = `本次评审消耗 token:${(0, usage_js_1.formatUsage)(usage)}|维度 [${dimensions.join(', ')}]`;
const relatedNote = relatedFiles.length > 0 ? `|补充阅读周边文件 [${relatedFiles.join(', ')}]` : '';
const footer = `本次评审消耗 token:${(0, usage_js_1.formatUsage)(usage)}|维度 [${dimensions.join(', ')}]${relatedNote}`;
const published = await (0, publish_js_1.publishReview)(publisher, result, pr.number, footer);
core.setOutput('finding_count', result.findings.length);
core.setOutput('total_tokens', (0, usage_js_1.totalTokens)(usage));
core.info(`BanGD 评审完成,维度=[${dimensions.join(', ')}],共 ${result.findings.length} 条 finding;` +
`补充阅读周边文件 ${relatedFiles.length} 个${relatedFiles.length > 0 ? ` [${relatedFiles.join(', ')}]` : ''};` +
`新建 Issue ${published.created} 个,复用 ${published.reused} 个${published.degraded ? '(部分降级为内联)' : ''}。`);
core.info(`Token 用量:${(0, usage_js_1.formatUsage)(usage)}`);
await core.summary
Expand Down
2 changes: 1 addition & 1 deletion dist/index.js.map

Large diffs are not rendered by default.

13 changes: 12 additions & 1 deletion src/core/prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,12 @@ export function assembleSystemPrompt(
return parts.join('\n\n---\n\n');
}

export function assembleUserPrompt(metadata: PrMetadata, diff: string, filesText: string): string {
export function assembleUserPrompt(
metadata: PrMetadata,
diff: string,
filesText: string,
relatedText = '',
): string {
const sections = [
`# 待评审的 PR`,
`标题:${metadata.title}`,
Expand All @@ -53,6 +58,12 @@ export function assembleUserPrompt(metadata: PrMetadata, diff: string, filesText
filesText,
);
}
if (relatedText) {
sections.push(
`# 周边相关代码(未被改动,按需拉取,用于架构级推理)`,
relatedText,
);
}
sections.push(
`请按系统提示词中的输出格式产出:1) PR 变更总结(changeSummary);2) 整体风险等级(overallRisk);3) 逐条风险识别与 Review 建议(findings)。`,
);
Expand Down
87 changes: 87 additions & 0 deletions src/core/related.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import { describe, it, expect, vi } from 'vitest';
import type { LlmClient, LlmRequest } from './ports.js';
import {
planRelatedFiles,
gatherRelatedFiles,
RELATED_PLAN_SCHEMA,
MAX_RELATED_FILES,
} from './related.js';

function plannerLlm(paths: unknown, capture?: (r: LlmRequest) => void): LlmClient {
return {
generateStructured: (request: LlmRequest) => {
capture?.(request);
return Promise.resolve({ paths });
},
};
}

const input = {
diff: '+++ b/cache/block.go\n+c.hits++',
changedFiles: ['cache/block.go'],
loaded: [{ path: 'cache/block.go', content: 'package cache\nimport "db/storage"' }],
};

describe('planRelatedFiles', () => {
it('returns the planner-named paths, trimmed and de-duplicated', async () => {
const llm = plannerLlm(['storage/page.go', ' storage/page.go ', 'a/storage/wal.go']);
const paths = await planRelatedFiles(llm, input);
// duplicate collapsed; the `a/` prefix stripped.
expect(paths).toEqual(['storage/page.go', 'storage/wal.go']);
});

it('excludes files already in the changed set', async () => {
const llm = plannerLlm(['cache/block.go', 'storage/page.go']);
const paths = await planRelatedFiles(llm, input);
expect(paths).toEqual(['storage/page.go']);
});

it('drops non-string and empty entries', async () => {
const llm = plannerLlm(['storage/page.go', 42, '', null]);
const paths = await planRelatedFiles(llm, input);
expect(paths).toEqual(['storage/page.go']);
});

it(`caps the result at ${MAX_RELATED_FILES}`, async () => {
const many = Array.from({ length: MAX_RELATED_FILES + 5 }, (_, i) => `pkg/file${i}.go`);
const paths = await planRelatedFiles(plannerLlm(many), input);
expect(paths).toHaveLength(MAX_RELATED_FILES);
});

it('returns [] when the planner output has no paths array', async () => {
expect(await planRelatedFiles(plannerLlm(undefined), input)).toEqual([]);
expect(await planRelatedFiles(plannerLlm('nope'), input)).toEqual([]);
});

it('never throws — returns [] when the planner call fails', async () => {
const llm: LlmClient = { generateStructured: () => Promise.reject(new Error('boom')) };
expect(await planRelatedFiles(llm, input)).toEqual([]);
});

it('calls the planner with the related-files schema', async () => {
let seen: LlmRequest | undefined;
await planRelatedFiles(plannerLlm(['storage/page.go'], (r) => (seen = r)), input);
expect(seen?.outputSchema).toBe(RELATED_PLAN_SCHEMA);
expect(seen?.user).toContain('cache/block.go');
});
});

describe('gatherRelatedFiles', () => {
it('fetches each planned path and drops misses (null)', async () => {
const llm = plannerLlm(['storage/page.go', 'storage/gone.go']);
const readFile = vi.fn((path: string) =>
Promise.resolve(path === 'storage/page.go' ? 'package storage\ntype Page struct{}' : null),
);
const loaded = await gatherRelatedFiles(llm, readFile, input);
expect(readFile).toHaveBeenCalledWith('storage/page.go');
expect(readFile).toHaveBeenCalledWith('storage/gone.go');
expect(loaded).toEqual([{ path: 'storage/page.go', content: 'package storage\ntype Page struct{}' }]);
});

it('returns [] when the planner names nothing', async () => {
const readFile = vi.fn(() => Promise.resolve('x'));
const loaded = await gatherRelatedFiles(plannerLlm([]), readFile, input);
expect(loaded).toEqual([]);
expect(readFile).not.toHaveBeenCalled();
});
});
Loading
Loading