Skip to content

Commit 54da220

Browse files
Copilotdsymepelikhan
authored
Enforce per-type safe-output max count at MCP invocation time (MCE4) (#40348)
* Initial plan * Enforce per-type safe-output max count at MCP invocation time (MCE4) Add session-scoped per-type operation counters inside createHandlers that enforce the user-configured max at tool-invocation time (the MCP-server half of MCE4 dual enforcement). Previously the count cap was only applied downstream in collect_ndjson_output.cjs / safe_output_processor.cjs, so the agent received no in-loop feedback and silently over-produced outputs that were then dropped. Changes: - operationCounts Map tracks how many items of each type have been appended during the current MCP session. - getExplicitMax(type) reads the user's explicit max: setting via getSafeOutputsToolConfig; returns null when no explicit limit is set or when max: -1 (unlimited), so only intentional limits are enforced. - enforcePerTypeMax(type) throws a JSON-RPC -32602 error with E002 code and actionable guidance when the configured limit has already been reached. - appendSafeOutputCounted(entry) wraps appendSafeOutput: checks the limit, delegates to the real append, then increments the counter only on success (mirrors the inlineReviewCommentCount pattern). - All appendSafeOutput(entry) call sites within createHandlers replaced with appendSafeOutputCounted(entry). Downstream enforcement in collect_ndjson_output.cjs and safe_output_processor.cjs is preserved unchanged as the processor-time defence-in-depth layer (MCE4). 10 new tests added covering: limit enforcement, key normalisation, addCommentHandler, independent per-type budgets, unlimited (-1), unconfigured types, error message content, failed-write counter isolation, and fresh counter per createHandlers() call. Closes #40311 Co-authored-by: dsyme <7204669+dsyme@users.noreply.github.com> * fix: run Prettier on safe_outputs_handlers.test.cjs to fix lint-cjs CI failure Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: dsyme <7204669+dsyme@users.noreply.github.com> Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> Co-authored-by: Peli de Halleux <pelikhan@users.noreply.github.com>
1 parent daa889e commit 54da220

2 files changed

Lines changed: 268 additions & 13 deletions

File tree

actions/setup/js/safe_outputs_handlers.cjs

Lines changed: 87 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,80 @@ function resolvePatchWorkspacePath(workspacePath) {
203203
function createHandlers(server, appendSafeOutput, config = {}) {
204204
const TOKEN_THRESHOLD = 16000;
205205

206+
/**
207+
* Session-scoped per-type operation counters.
208+
* Incremented on every successful appendSafeOutput call (MCE4 dual enforcement).
209+
* @type {Map<string, number>}
210+
*/
211+
const operationCounts = new Map();
212+
213+
/**
214+
* Return the explicitly user-configured max for a safe-output type, or null if not set / unlimited.
215+
* Uses getSafeOutputsToolConfig for consistent key-normalisation (hyphens → underscores).
216+
* Does NOT fall back to validation-config defaults: MCP-time enforcement is only
217+
* applied when the user has explicitly set a limit; downstream enforcement covers defaults.
218+
* Per Safe Outputs Specification MCE5: the same config source as the processor.
219+
* @param {string} type - normalised safe-output type name (e.g. "add_comment")
220+
* @returns {number | null}
221+
*/
222+
function getExplicitMax(type) {
223+
const toolConfig = getSafeOutputsToolConfig(config, type);
224+
if (!toolConfig || typeof toolConfig !== "object") return null;
225+
if (!("max" in toolConfig)) return null;
226+
const maxVal = toolConfig.max;
227+
if (maxVal === -1) return null; // -1 means unlimited
228+
if (typeof maxVal === "number" && Number.isInteger(maxVal) && maxVal > 0) {
229+
return maxVal;
230+
}
231+
return null;
232+
}
233+
234+
/**
235+
* Enforce the per-type operation count limit at invocation time.
236+
* Throws a JSON-RPC -32602 error when the configured max has already been reached.
237+
* Per Safe Outputs Specification MCE4: Dual Enforcement — constraints MUST be
238+
* enforced at both invocation time (MCP server) and processing time (safe output
239+
* processor) to provide defence-in-depth.
240+
* @param {string} type - normalised safe-output type name
241+
*/
242+
function enforcePerTypeMax(type) {
243+
const maxAllowed = getExplicitMax(type);
244+
if (maxAllowed === null) return; // no explicit limit configured
245+
const current = operationCounts.get(type) || 0;
246+
if (current >= maxAllowed) {
247+
throw {
248+
code: -32602,
249+
message: `E002: ${type} limit reached — ${current} of ${maxAllowed} already used this run`,
250+
data: {
251+
constraint: "max",
252+
type,
253+
limit: maxAllowed,
254+
guidance:
255+
`You have used all ${maxAllowed} ${type} operations for this run. ` +
256+
`Further ${type} calls will be ignored. Prioritize the most important items ` +
257+
`(e.g. consolidate multiple updates into one), or call noop. ` +
258+
`Note: other safe-output types have independent budgets, so applying one type ` +
259+
`without its companion type can leave inconsistent state.`,
260+
},
261+
};
262+
}
263+
}
264+
265+
/**
266+
* Append a safe-output entry after enforcing the per-type max count.
267+
* Increments the session counter only after a successful write, mirroring the
268+
* approach used by inlineReviewCommentCount so that write errors do not advance
269+
* the counter.
270+
* Per Safe Outputs Specification MCE4: invocation-time half of dual enforcement.
271+
* @param {Record<string, any>} entry
272+
*/
273+
const appendSafeOutputCounted = entry => {
274+
const type = entry?.type;
275+
if (type) enforcePerTypeMax(type);
276+
appendSafeOutput(entry);
277+
if (type) operationCounts.set(type, (operationCounts.get(type) || 0) + 1);
278+
};
279+
206280
/**
207281
* Validate schema-declared explicit target parameters for wildcard-target tools.
208282
* @param {Record<string, any>} entry
@@ -258,7 +332,7 @@ function createHandlers(server, appendSafeOutput, config = {}) {
258332

259333
const fileInfo = writeLargeContentToFile(largeContent);
260334
entry[largeFieldName] = `[Content too large, saved to file: ${fileInfo.filename}]`;
261-
appendSafeOutput(entry);
335+
appendSafeOutputCounted(entry);
262336

263337
return {
264338
content: [
@@ -286,7 +360,7 @@ function createHandlers(server, appendSafeOutput, config = {}) {
286360
if (largeContentResponse) return largeContentResponse;
287361

288362
// Normal case - no large content
289-
appendSafeOutput(entry);
363+
appendSafeOutputCounted(entry);
290364
return {
291365
content: [
292366
{
@@ -416,7 +490,7 @@ function createHandlers(server, appendSafeOutput, config = {}) {
416490
targetFileName: targetFileName,
417491
};
418492

419-
appendSafeOutput(entry);
493+
appendSafeOutputCounted(entry);
420494

421495
return {
422496
content: [
@@ -620,7 +694,7 @@ function createHandlers(server, appendSafeOutput, config = {}) {
620694
if (allowEmpty) {
621695
server.debug(`allow-empty is enabled for create_pull_request - skipping patch generation`);
622696
// Append the safe output entry without generating a patch
623-
appendSafeOutput(entry);
697+
appendSafeOutputCounted(entry);
624698
return {
625699
content: [
626700
{
@@ -834,7 +908,7 @@ function createHandlers(server, appendSafeOutput, config = {}) {
834908
entry.base_commit = bundleResult.baseCommit;
835909
}
836910

837-
appendSafeOutput(entry);
911+
appendSafeOutputCounted(entry);
838912
return {
839913
content: [
840914
{
@@ -856,7 +930,7 @@ function createHandlers(server, appendSafeOutput, config = {}) {
856930
};
857931
}
858932

859-
appendSafeOutput(entry);
933+
appendSafeOutputCounted(entry);
860934
return {
861935
content: [
862936
{
@@ -1277,7 +1351,7 @@ function createHandlers(server, appendSafeOutput, config = {}) {
12771351
entry.base_commit = bundleResult.baseCommit;
12781352
}
12791353

1280-
appendSafeOutput(entry);
1354+
appendSafeOutputCounted(entry);
12811355
return {
12821356
content: [
12831357
{
@@ -1299,7 +1373,7 @@ function createHandlers(server, appendSafeOutput, config = {}) {
12991373
};
13001374
}
13011375

1302-
appendSafeOutput(entry);
1376+
appendSafeOutputCounted(entry);
13031377
return {
13041378
content: [
13051379
{
@@ -1551,7 +1625,7 @@ function createHandlers(server, appendSafeOutput, config = {}) {
15511625
};
15521626
const largeContentResponse = maybeHandleLargeContent(droppedEntry);
15531627
if (!largeContentResponse) {
1554-
appendSafeOutput(droppedEntry);
1628+
appendSafeOutputCounted(droppedEntry);
15551629
}
15561630
return {
15571631
content: [
@@ -1572,7 +1646,7 @@ function createHandlers(server, appendSafeOutput, config = {}) {
15721646
const largeContentResponse = maybeHandleLargeContent(entry);
15731647
if (largeContentResponse) return largeContentResponse;
15741648

1575-
appendSafeOutput(entry);
1649+
appendSafeOutputCounted(entry);
15761650
return {
15771651
content: [
15781652
{
@@ -1603,7 +1677,7 @@ function createHandlers(server, appendSafeOutput, config = {}) {
16031677
server.debug(`temporary_id for create_project: ${entry.temporary_id}`);
16041678

16051679
// Append to safe outputs
1606-
appendSafeOutput(entry);
1680+
appendSafeOutputCounted(entry);
16071681

16081682
// Return the temporary_id to the agent so it can reference this project
16091683
return {
@@ -1714,7 +1788,7 @@ function createHandlers(server, appendSafeOutput, config = {}) {
17141788
server.debug(`temporary_id for add_comment: ${entry.temporary_id}`);
17151789

17161790
// Append to safe outputs
1717-
appendSafeOutput(entry);
1791+
appendSafeOutputCounted(entry);
17181792

17191793
// Return the temporary_id to the agent so it can reference this comment
17201794
return {
@@ -1893,7 +1967,7 @@ function createHandlers(server, appendSafeOutput, config = {}) {
18931967
server.debug(`upload_artifact: staged ${filePath} as ${destName}`);
18941968
}
18951969

1896-
appendSafeOutput(entry);
1970+
appendSafeOutputCounted(entry);
18971971

18981972
const temporaryId = entry.temporary_id || null;
18991973
return {

actions/setup/js/safe_outputs_handlers.test.cjs

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2736,6 +2736,187 @@ describe("safe_outputs_handlers", () => {
27362736
});
27372737
});
27382738

2739+
describe("per-type max enforcement (MCE4 dual enforcement)", () => {
2740+
let mockServer;
2741+
let mockAppendSafeOutput;
2742+
2743+
beforeEach(() => {
2744+
vi.clearAllMocks();
2745+
mockServer = { debug: vi.fn() };
2746+
mockAppendSafeOutput = vi.fn();
2747+
});
2748+
2749+
it("allows calls up to the configured max and rejects the (max+1)th call via defaultHandler", () => {
2750+
const h = createHandlers(mockServer, mockAppendSafeOutput, {
2751+
add_labels: { max: 2 },
2752+
});
2753+
2754+
// First two calls succeed
2755+
expect(h.defaultHandler("add_labels")({ labels: ["approved"] })).not.toHaveProperty("isError");
2756+
expect(h.defaultHandler("add_labels")({ labels: ["approved"] })).not.toHaveProperty("isError");
2757+
expect(mockAppendSafeOutput).toHaveBeenCalledTimes(2);
2758+
2759+
// Third call must throw E002
2760+
expect(() => h.defaultHandler("add_labels")({ labels: ["approved"] })).toThrow(
2761+
expect.objectContaining({
2762+
code: -32602,
2763+
message: expect.stringContaining("E002"),
2764+
})
2765+
);
2766+
// No additional append after limit
2767+
expect(mockAppendSafeOutput).toHaveBeenCalledTimes(2);
2768+
});
2769+
2770+
it("rejects immediately when max is 0 and config uses hyphen-keyed type (key normalisation)", () => {
2771+
// Ensure getSafeOutputsToolConfig's hyphen→underscore lookup works for max checks
2772+
const h = createHandlers(mockServer, mockAppendSafeOutput, {
2773+
"add-labels": { max: 1 },
2774+
});
2775+
2776+
h.defaultHandler("add_labels")({ labels: ["ok"] });
2777+
expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1);
2778+
2779+
expect(() => h.defaultHandler("add_labels")({ labels: ["ok"] })).toThrow(expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") }));
2780+
expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1);
2781+
});
2782+
2783+
it("enforces max for addCommentHandler", () => {
2784+
const h = createHandlers(mockServer, mockAppendSafeOutput, {
2785+
add_comment: { max: 1 },
2786+
});
2787+
2788+
global.context = { repo: { owner: "o", repo: "r" }, eventName: "issues", payload: { issue: { number: 1 } } };
2789+
try {
2790+
const ok = h.addCommentHandler({ body: "first comment", item_number: 1 });
2791+
expect(ok).not.toHaveProperty("isError");
2792+
expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1);
2793+
2794+
expect(() => h.addCommentHandler({ body: "second comment", item_number: 2 })).toThrow(expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") }));
2795+
expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1);
2796+
} finally {
2797+
global.context = { repo: { owner: "test-owner", repo: "test-repo" }, eventName: "push", payload: {} };
2798+
}
2799+
});
2800+
2801+
it("independent per-type budgets: exceeding add_comment limit does not affect add_labels", () => {
2802+
const h = createHandlers(mockServer, mockAppendSafeOutput, {
2803+
add_comment: { max: 1 },
2804+
add_labels: { max: 3 },
2805+
});
2806+
2807+
global.context = { repo: { owner: "o", repo: "r" }, eventName: "issues", payload: { issue: { number: 1 } } };
2808+
try {
2809+
h.addCommentHandler({ body: "first comment", item_number: 1 });
2810+
expect(() => h.addCommentHandler({ body: "second comment", item_number: 2 })).toThrow(expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") }));
2811+
2812+
// add_labels budget is separate — all 3 calls should succeed
2813+
expect(h.defaultHandler("add_labels")({ labels: ["a"] })).not.toHaveProperty("isError");
2814+
expect(h.defaultHandler("add_labels")({ labels: ["b"] })).not.toHaveProperty("isError");
2815+
expect(h.defaultHandler("add_labels")({ labels: ["c"] })).not.toHaveProperty("isError");
2816+
2817+
// 4th add_labels call must fail
2818+
expect(() => h.defaultHandler("add_labels")({ labels: ["d"] })).toThrow(expect.objectContaining({ code: -32602, message: expect.stringContaining("E002") }));
2819+
} finally {
2820+
global.context = { repo: { owner: "test-owner", repo: "test-repo" }, eventName: "push", payload: {} };
2821+
}
2822+
});
2823+
2824+
it("does not enforce when max is -1 (unlimited)", () => {
2825+
const h = createHandlers(mockServer, mockAppendSafeOutput, {
2826+
add_labels: { max: -1 },
2827+
});
2828+
2829+
for (let i = 0; i < 20; i++) {
2830+
expect(h.defaultHandler("add_labels")({ labels: ["ok"] })).not.toHaveProperty("isError");
2831+
}
2832+
expect(mockAppendSafeOutput).toHaveBeenCalledTimes(20);
2833+
});
2834+
2835+
it("does not enforce when max is not explicitly configured", () => {
2836+
// Only target is set — no max → no invocation-time limit
2837+
const h = createHandlers(mockServer, mockAppendSafeOutput, {
2838+
add_labels: { target: "*" },
2839+
});
2840+
2841+
for (let i = 0; i < 5; i++) {
2842+
expect(h.defaultHandler("add_labels")({ labels: ["ok"] })).not.toHaveProperty("isError");
2843+
}
2844+
expect(mockAppendSafeOutput).toHaveBeenCalledTimes(5);
2845+
});
2846+
2847+
it("does not enforce when config is empty (no safe-outputs config)", () => {
2848+
const h = createHandlers(mockServer, mockAppendSafeOutput);
2849+
2850+
for (let i = 0; i < 5; i++) {
2851+
expect(h.defaultHandler("add_labels")({ labels: ["ok"] })).not.toHaveProperty("isError");
2852+
}
2853+
expect(mockAppendSafeOutput).toHaveBeenCalledTimes(5);
2854+
});
2855+
2856+
it("error message includes type, current count, and limit", () => {
2857+
const h = createHandlers(mockServer, mockAppendSafeOutput, {
2858+
add_labels: { max: 3 },
2859+
});
2860+
2861+
h.defaultHandler("add_labels")({ labels: ["a"] });
2862+
h.defaultHandler("add_labels")({ labels: ["b"] });
2863+
h.defaultHandler("add_labels")({ labels: ["c"] });
2864+
2865+
let thrown;
2866+
try {
2867+
h.defaultHandler("add_labels")({ labels: ["d"] });
2868+
} catch (err) {
2869+
thrown = err;
2870+
}
2871+
2872+
expect(thrown).toBeDefined();
2873+
expect(thrown.code).toBe(-32602);
2874+
expect(thrown.message).toContain("add_labels");
2875+
expect(thrown.message).toContain("3 of 3");
2876+
expect(thrown.data).toMatchObject({
2877+
constraint: "max",
2878+
type: "add_labels",
2879+
limit: 3,
2880+
});
2881+
expect(thrown.data.guidance).toContain("add_labels");
2882+
});
2883+
2884+
it("counter does not increment when append throws (write error)", () => {
2885+
const h = createHandlers(mockServer, mockAppendSafeOutput, {
2886+
add_labels: { max: 2 },
2887+
});
2888+
2889+
// First call succeeds
2890+
h.defaultHandler("add_labels")({ labels: ["ok"] });
2891+
expect(mockAppendSafeOutput).toHaveBeenCalledTimes(1);
2892+
2893+
// Second call: append throws a write error
2894+
mockAppendSafeOutput.mockImplementationOnce(() => {
2895+
throw new Error("disk write error");
2896+
});
2897+
expect(() => h.defaultHandler("add_labels")({ labels: ["fail"] })).toThrow("disk write error");
2898+
2899+
// Counter is still 1 (not 2) because the failed write shouldn't count
2900+
// Third call should succeed (not hit limit)
2901+
expect(h.defaultHandler("add_labels")({ labels: ["ok2"] })).not.toHaveProperty("isError");
2902+
expect(mockAppendSafeOutput).toHaveBeenCalledTimes(3); // call 1 + (failed) call 2 + call 3
2903+
});
2904+
2905+
it("each createHandlers() call gets a fresh independent counter", () => {
2906+
const config = { add_labels: { max: 1 } };
2907+
2908+
const h1 = createHandlers(mockServer, mockAppendSafeOutput, config);
2909+
const h2 = createHandlers(mockServer, mockAppendSafeOutput, config);
2910+
2911+
h1.defaultHandler("add_labels")({ labels: ["a"] });
2912+
// h1's budget is now exhausted — must throw
2913+
expect(() => h1.defaultHandler("add_labels")({ labels: ["b"] })).toThrow(expect.objectContaining({ code: -32602 }));
2914+
2915+
// h2 has its own fresh counter — should still allow 1 call
2916+
expect(h2.defaultHandler("add_labels")({ labels: ["a"] })).not.toHaveProperty("isError");
2917+
});
2918+
});
2919+
27392920
describe("hasUpdatePullRequestFields", () => {
27402921
it("returns false for empty object", () => {
27412922
expect(hasUpdatePullRequestFields({})).toBe(false);

0 commit comments

Comments
 (0)