diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f0513e869b..05acd49abd 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -63,7 +63,7 @@ jobs: run: bun run build - name: Run unit tests - run: bun test --max-concurrency=1 + run: bun test --feature=UNATTENDED_RETRY --max-concurrency=1 - name: Smoke test run: bun run smoke diff --git a/package.json b/package.json index 19794bd5ce..dd4893d829 100644 --- a/package.json +++ b/package.json @@ -52,9 +52,9 @@ "web:build": "bun run --cwd web build", "web:preview": "bun run --cwd web preview", "web:typecheck": "bun run --cwd web typecheck", - "test": "bun test", - "test:full": "bun test --max-concurrency=1", - "test:coverage": "bun test --coverage --coverage-reporter=lcov --coverage-dir=coverage --max-concurrency=1 && bun run scripts/render-coverage-heatmap.ts", + "test": "bun test --feature=UNATTENDED_RETRY", + "test:full": "bun test --feature=UNATTENDED_RETRY --max-concurrency=1", + "test:coverage": "bun test --feature=UNATTENDED_RETRY --coverage --coverage-reporter=lcov --coverage-dir=coverage --max-concurrency=1 && bun run scripts/render-coverage-heatmap.ts", "test:coverage:ui": "bun run scripts/render-coverage-heatmap.ts", "security:pr-scan": "bun run scripts/pr-intent-scan.ts", "test:provider-recommendation": "bun test src/utils/providerRecommendation.test.ts src/utils/providerProfile.test.ts", @@ -64,7 +64,7 @@ "check": "bun run smoke && bun run test:full", "verify:privacy": "bun run scripts/verify-no-phone-home.ts", "build:verified": "bun run build && bun run verify:privacy", - "test:provider": "bun test --max-concurrency=1 src/services/api/*.test.ts src/utils/context.test.ts", + "test:provider": "bun test --feature=UNATTENDED_RETRY --max-concurrency=1 src/services/api/*.test.ts src/utils/context.test.ts", "doctor:runtime": "bun run scripts/system-check.ts", "doctor:runtime:json": "bun run scripts/system-check.ts --json", "doctor:report": "bun run scripts/system-check.ts --out reports/doctor-runtime.json", diff --git a/src/QueryEngine.ts b/src/QueryEngine.ts index 21cd6a615c..f8e6edba4b 100644 --- a/src/QueryEngine.ts +++ b/src/QueryEngine.ts @@ -14,6 +14,7 @@ import type { SDKStatus, SDKUserMessageReplay, } from 'src/entrypoints/agentSdkTypes.js' +import { EXTERNAL_PERMISSION_MODES } from 'src/types/permissions.js' import { accumulateUsage, updateUsage } from 'src/services/api/claude.js' import type { NonNullableUsage } from 'src/services/api/logging.js' import { EMPTY_USAGE } from 'src/services/api/logging.js' @@ -547,12 +548,18 @@ export class QueryEngine { ]) headlessProfilerCheckpoint('after_skills_plugins') + const rawPermissionMode = initialAppState.toolPermissionContext.mode + const validPermissionMode: PermissionMode = ( + EXTERNAL_PERMISSION_MODES as readonly string[] + ).includes(rawPermissionMode) + ? (rawPermissionMode as PermissionMode) + : 'default' + yield buildSystemInitMessage({ tools, mcpClients, model: mainLoopModel, - permissionMode: initialAppState.toolPermissionContext - .mode as PermissionMode, // TODO: avoid the cast + permissionMode: validPermissionMode, commands, agents, skills, @@ -939,8 +946,11 @@ export class QueryEngine { ) if (snipResult !== undefined) { if (snipResult.executed) { - this.mutableMessages.length = 0 - this.mutableMessages.push(...snipResult.messages) + this.mutableMessages.splice( + 0, + this.mutableMessages.length, + ...snipResult.messages, + ) // Persist the snip boundary so a resumed session replays the same // removal. recordTranscript is append-only by UUID, so the // pre-snip messages already on disk remain; appending this @@ -948,7 +958,7 @@ export class QueryEngine { // applySnipRemovals prune them in loadTranscriptFile(). Without // this, --resume/restart rebuilds the un-snipped history and the // context reduction is lost. Mirror the boundary into the local - // `messages` recording copy — like the compact_boundary path — +// `messages` recording copy — like the compact_boundary path — // so later writes and the parent chain stay consistent. messages.push(message) if (persistSession) { diff --git a/src/hooks/useReplBridge.tsx b/src/hooks/useReplBridge.tsx index 38d21917ea..95ef63d857 100644 --- a/src/hooks/useReplBridge.tsx +++ b/src/hooks/useReplBridge.tsx @@ -11,6 +11,7 @@ import { getSlashCommandToolSkills, isBridgeSafeCommand } from '../commands.js'; import { getRemoteSessionUrl } from '../constants/product.js'; import { useNotifications } from '../context/notifications.js'; import type { PermissionMode, SDKMessage } from '../entrypoints/agentSdkTypes.js'; +import { EXTERNAL_PERMISSION_MODES } from '../types/permissions.js'; import type { SDKControlResponse } from '../entrypoints/sdk/controlTypes.js'; import { Text } from '../ink.js'; import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'; @@ -295,6 +296,8 @@ export function useReplBridge(messages: Message[], setMessages: (action: React.S const skills = await getSlashCommandToolSkills(getCwd()); if (cancelled) return; const state_0 = store.getState(); + const rawPermissionMode = state_0.toolPermissionContext.mode + const validPermissionMode: PermissionMode = (EXTERNAL_PERMISSION_MODES as readonly string[]).includes(rawPermissionMode) ? (rawPermissionMode as PermissionMode) : 'default' handleRef.current?.writeSdkMessages([buildSystemInitMessage({ // tools/mcpClients/plugins redacted for REPL-bridge: // MCP-prefixed tool names and server names leak which @@ -307,8 +310,7 @@ export function useReplBridge(messages: Message[], setMessages: (action: React.S tools: [], mcpClients: [], model: mainLoopModelRef.current, - permissionMode: state_0.toolPermissionContext.mode as PermissionMode, - // TODO: avoid the cast + permissionMode: validPermissionMode, // Remote clients can only invoke bridge-safe commands — // advertising unsafe ones (local-jsx, unallowed local) // would let mobile/web attempt them and hit errors. diff --git a/src/main.tsx b/src/main.tsx index f087089c76..2daa2c484a 100644 --- a/src/main.tsx +++ b/src/main.tsx @@ -344,9 +344,9 @@ function runMigrations(): void { // Async migration - fire and forget since it's non-blocking migrateChangelogFromConfig().catch(error => { logError( - new Error('Changelog migration failed; will retry on next startup', { - cause: error, - }), + new Error( + `Changelog migration failed; will retry on next startup: ${errorMessage(error)}`, + ), ) }); } diff --git a/src/services/api/client.test.ts b/src/services/api/client.test.ts index e1c8d2fd00..6da43c012b 100644 --- a/src/services/api/client.test.ts +++ b/src/services/api/client.test.ts @@ -32,14 +32,17 @@ const originalEnv = { OPENAI_BASE_URL: process.env.OPENAI_BASE_URL, OPENAI_API_BASE: process.env.OPENAI_API_BASE, OPENAI_API_FORMAT: process.env.OPENAI_API_FORMAT, + OPENAI_AUTH_HEADER: process.env.OPENAI_AUTH_HEADER, + OPENAI_AUTH_SCHEME: process.env.OPENAI_AUTH_SCHEME, + OPENAI_AUTH_HEADER_VALUE: process.env.OPENAI_AUTH_HEADER_VALUE, OPENAI_MODEL: process.env.OPENAI_MODEL, MINIMAX_API_KEY: process.env.MINIMAX_API_KEY, XAI_API_KEY: process.env.XAI_API_KEY, + MIMO_API_KEY: process.env.MIMO_API_KEY, + VENICE_API_KEY: process.env.VENICE_API_KEY, FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY, - OPENAI_AUTH_HEADER: process.env.OPENAI_AUTH_HEADER, - OPENAI_AUTH_SCHEME: process.env.OPENAI_AUTH_SCHEME, - OPENAI_AUTH_HEADER_VALUE: process.env.OPENAI_AUTH_HEADER_VALUE, NVIDIA_NIM: process.env.NVIDIA_NIM, + NVIDIA_API_KEY: process.env.NVIDIA_API_KEY, ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY, ANTHROPIC_AUTH_TOKEN: process.env.ANTHROPIC_AUTH_TOKEN, ANTHROPIC_BASE_URL: process.env.ANTHROPIC_BASE_URL, @@ -79,12 +82,16 @@ function clearEnvForMiniMaxOnlyTest(): void { delete process.env.OPENAI_BASE_URL delete process.env.OPENAI_API_BASE delete process.env.OPENAI_MODEL - delete process.env.XAI_API_KEY - delete process.env.FIREWORKS_API_KEY + delete process.env.OPENAI_API_FORMAT delete process.env.OPENAI_AUTH_HEADER delete process.env.OPENAI_AUTH_SCHEME delete process.env.OPENAI_AUTH_HEADER_VALUE + delete process.env.XAI_API_KEY + delete process.env.MIMO_API_KEY + delete process.env.VENICE_API_KEY + delete process.env.FIREWORKS_API_KEY delete process.env.NVIDIA_NIM + delete process.env.NVIDIA_API_KEY delete process.env.ANTHROPIC_API_KEY delete process.env.ANTHROPIC_AUTH_TOKEN delete process.env.ANTHROPIC_BASE_URL @@ -116,11 +123,14 @@ beforeEach(async () => { delete process.env.OPENAI_MODEL delete process.env.MINIMAX_API_KEY delete process.env.XAI_API_KEY + delete process.env.MIMO_API_KEY + delete process.env.VENICE_API_KEY delete process.env.FIREWORKS_API_KEY delete process.env.OPENAI_AUTH_HEADER delete process.env.OPENAI_AUTH_SCHEME delete process.env.OPENAI_AUTH_HEADER_VALUE delete process.env.NVIDIA_NIM + delete process.env.NVIDIA_API_KEY delete process.env.ANTHROPIC_API_KEY delete process.env.ANTHROPIC_AUTH_TOKEN delete process.env.ANTHROPIC_BASE_URL @@ -153,14 +163,17 @@ afterEach(() => { restoreEnv('OPENAI_BASE_URL', originalEnv.OPENAI_BASE_URL) restoreEnv('OPENAI_API_BASE', originalEnv.OPENAI_API_BASE) restoreEnv('OPENAI_API_FORMAT', originalEnv.OPENAI_API_FORMAT) + restoreEnv('OPENAI_AUTH_HEADER', originalEnv.OPENAI_AUTH_HEADER) + restoreEnv('OPENAI_AUTH_SCHEME', originalEnv.OPENAI_AUTH_SCHEME) + restoreEnv('OPENAI_AUTH_HEADER_VALUE', originalEnv.OPENAI_AUTH_HEADER_VALUE) restoreEnv('OPENAI_MODEL', originalEnv.OPENAI_MODEL) restoreEnv('MINIMAX_API_KEY', originalEnv.MINIMAX_API_KEY) restoreEnv('XAI_API_KEY', originalEnv.XAI_API_KEY) + restoreEnv('MIMO_API_KEY', originalEnv.MIMO_API_KEY) + restoreEnv('VENICE_API_KEY', originalEnv.VENICE_API_KEY) restoreEnv('FIREWORKS_API_KEY', originalEnv.FIREWORKS_API_KEY) - restoreEnv('OPENAI_AUTH_HEADER', originalEnv.OPENAI_AUTH_HEADER) - restoreEnv('OPENAI_AUTH_SCHEME', originalEnv.OPENAI_AUTH_SCHEME) - restoreEnv('OPENAI_AUTH_HEADER_VALUE', originalEnv.OPENAI_AUTH_HEADER_VALUE) restoreEnv('NVIDIA_NIM', originalEnv.NVIDIA_NIM) + restoreEnv('NVIDIA_API_KEY', originalEnv.NVIDIA_API_KEY) restoreEnv('ANTHROPIC_API_KEY', originalEnv.ANTHROPIC_API_KEY) restoreEnv('ANTHROPIC_AUTH_TOKEN', originalEnv.ANTHROPIC_AUTH_TOKEN) restoreEnv('ANTHROPIC_BASE_URL', originalEnv.ANTHROPIC_BASE_URL) @@ -199,7 +212,14 @@ test('first-party Anthropic requests execute the configured fetch wrapper withou delete process.env.OPENAI_API_BASE delete process.env.OPENAI_MODEL delete process.env.NVIDIA_NIM + delete process.env.NVIDIA_API_KEY + delete process.env.XAI_API_KEY + delete process.env.MIMO_API_KEY + delete process.env.VENICE_API_KEY + delete process.env.ANTHROPIC_API_KEY + delete process.env.ANTHROPIC_AUTH_TOKEN delete process.env.ANTHROPIC_BASE_URL + delete process.env.ANTHROPIC_MODEL const fetchOverride = (async (_input, init) => { capturedHeaders = new Headers(init?.headers) diff --git a/src/services/api/withRetry.test.ts b/src/services/api/withRetry.test.ts index 3db58f31bc..63bc273d8a 100644 --- a/src/services/api/withRetry.test.ts +++ b/src/services/api/withRetry.test.ts @@ -7,13 +7,12 @@ type ProvidersModule = typeof import('../../utils/model/providers.js') // Helper to build a mock APIError with specific headers function makeError(headers: Record): APIError { const headersObj = new Headers(headers) - return { - headers: headersObj, - status: 429, - message: 'rate limit exceeded', - name: 'APIError', - error: {}, - } as unknown as APIError + return new APIError( + 429, + { error: { type: 'rate_limit_error', message: 'rate limit exceeded' } }, + 'rate limit exceeded', + headersObj, + ) } // Save/restore env vars between tests @@ -27,6 +26,7 @@ const envKeys = [ 'CLAUDE_CODE_USE_BEDROCK', 'CLAUDE_CODE_USE_VERTEX', 'CLAUDE_CODE_USE_FOUNDRY', + 'CLAUDE_CODE_UNATTENDED_RETRY', 'CLAUDE_CODE_MAX_RETRIES', 'OPENCLAUDE_MAX_RETRIES', 'OPENCLAUDE_RETRY_DELAY_MS', @@ -76,6 +76,9 @@ async function importFreshWithRetryModule( ) { mock.restore() originalProvidersModule ??= await importActualProviders() + mock.module('src/utils/sleep.js', () => ({ + sleep: async () => undefined, + })) mock.module('src/utils/model/providers.js', () => ({ ...originalProvidersModule!, getAPIProvider: () => provider, @@ -526,3 +529,42 @@ describe('parseOpenRouterAffordableMaxTokensError (#1125)', () => { expect(shouldRetry(err)).toBe(true) }) }) + +describe('persistent retry cap', () => { + test('persistent retries stop after 100 retryable 429s', async () => { + // Drive the real persistent retry gate — no runtime override. The + // UNATTENDED_RETRY feature must be enabled via `bun test --feature=UNATTENDED_RETRY` + // (see package.json), and the env var must be truthy, otherwise + // isPersistentRetryEnabled() returns false and the cap never triggers. + process.env.CLAUDE_CODE_UNATTENDED_RETRY = '1' + const retryModule = await importFreshWithRetryModule('firstParty') + const { CannotRetryError, withRetry, _PERSISTENT_MAX_ATTEMPTS_FOR_TEST, isPersistentRetryEnabled } = retryModule + expect(_PERSISTENT_MAX_ATTEMPTS_FOR_TEST).toBe(100) + + const retryableRateLimit = makeError({ 'retry-after': '1' }) + const operation = mock(async () => { + throw retryableRateLimit + }) + + const runRetries = async () => { + for await (const _ of withRetry( + async () => ({} as never), + operation, + { + maxRetries: 0, + model: 'claude-sonnet-4-6', + thinkingConfig: { type: 'disabled' }, + }, + )) { + void _ + } + } + + await expect(runRetries()).rejects.toBeInstanceOf(CannotRetryError) + // isPersistentRetryEnabled() checks the real Bun compile-time feature gate. + // Without --feature=UNATTENDED_RETRY, it returns false and only 1 call is made. + // With the flag and CLAUDE_CODE_UNATTENDED_RETRY=1, the cap triggers after 101 calls. + const expectedCalls = isPersistentRetryEnabled() ? 101 : 1 + expect(operation).toHaveBeenCalledTimes(expectedCalls) + }) +}) diff --git a/src/services/api/withRetry.ts b/src/services/api/withRetry.ts index 119479359b..b11b30a3c8 100644 --- a/src/services/api/withRetry.ts +++ b/src/services/api/withRetry.ts @@ -104,6 +104,12 @@ function shouldRetry529(querySource: QuerySource | undefined): boolean { const PERSISTENT_MAX_BACKOFF_MS = 5 * 60 * 1000 const PERSISTENT_RESET_CAP_MS = 6 * 60 * 60 * 1000 const HEARTBEAT_INTERVAL_MS = 30_000 +const PERSISTENT_MAX_ATTEMPTS = 100 +// Exposed for unit-test assertion only. The persistent retry cap itself is +// driven by isPersistentRetryEnabled() — there is no runtime override seam +// (tests must enable UNATTENDED_RETRY via `bun test --feature=UNATTENDED_RETRY` +// and set CLAUDE_CODE_UNATTENDED_RETRY to exercise this path). +export { PERSISTENT_MAX_ATTEMPTS as _PERSISTENT_MAX_ATTEMPTS_FOR_TEST, isPersistentRetryEnabled } function isPersistentRetryEnabled(): boolean { return feature('UNATTENDED_RETRY') @@ -194,6 +200,7 @@ export async function* withRetry( options: RetryOptions, ): AsyncGenerator { const maxRetries = getMaxRetries(options) + const persistentRetryEnabled = isPersistentRetryEnabled() const retryContext: RetryContext = { model: options.model, thinkingConfig: options.thinkingConfig, @@ -293,7 +300,7 @@ export async function* withRetry( // keep-alive path instead of fast-mode cache-preservation anyway. if ( wasFastModeActive && - !isPersistentRetryEnabled() && + !persistentRetryEnabled && error instanceof APIError && (error.status === 429 || is529Error(error)) ) { @@ -380,7 +387,7 @@ export async function* withRetry( if ( process.env.USER_TYPE === 'external' && !process.env.IS_SANDBOX && - !isPersistentRetryEnabled() + !persistentRetryEnabled ) { logEvent('tengu_api_custom_529_overloaded_error', {}) throw new CannotRetryError( @@ -393,14 +400,37 @@ export async function* withRetry( // Only retry if the error indicates we should const persistent = - isPersistentRetryEnabled() && isTransientCapacityError(error) + persistentRetryEnabled && isTransientCapacityError(error) if (attempt > maxRetries && !persistent) { throw new CannotRetryError(error, retryContext) } + // Cap persistent retries to prevent unbounded loops (100 attempts * ~5min max backoff = 8 hours). + // NOTE: the "~8 hours" estimate applies only to the exponential-backoff path. The + // reset-delay path can wait up to PERSISTENT_RESET_CAP_MS (6 hours) per attempt, so + // exhausting 100 attempts can take far longer. + if (persistent && persistentAttempt >= PERSISTENT_MAX_ATTEMPTS) { + logEvent('tengu_api_persistent_retry_cap_reached', { + error: (error as APIError).message as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + status: (error as APIError).status, + model: retryContext.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + persistentAttempt, + PERSISTENT_MAX_BACKOFF_MS, + PERSISTENT_MAX_ATTEMPTS, + provider: getAPIProviderForStatsig(), + }) + throw new CannotRetryError(error, retryContext) + } // AWS/GCP errors aren't always APIError, but can be retried const handledCloudAuthError = handleAwsCredentialError(error) || handleGcpCredentialError(error) + if ( + !handledCloudAuthError && + (!(error instanceof APIError) || + !shouldRetry(error, persistentRetryEnabled)) + ) { + throw new CannotRetryError(error, retryContext) + } // OpenRouter / OpenAI-compatible quota gateways: HTTP 402 with the // affordable max_tokens in the message. Retry once at the affordable @@ -428,13 +458,6 @@ export async function* withRetry( } } - if ( - !handledCloudAuthError && - (!(error instanceof APIError) || !shouldRetry(error)) - ) { - throw new CannotRetryError(error, retryContext) - } - // Handle max tokens context overflow errors by adjusting max_tokens for the next attempt // NOTE: With extended-context-window beta, this 400 error should not occur. // The API now returns 'model_context_window_exceeded' stop_reason instead. @@ -783,7 +806,7 @@ function handleGcpCredentialError(error: unknown): boolean { return false } -function shouldRetry(error: APIError): boolean { +function shouldRetry(error: APIError, persistentRetryEnabled: boolean): boolean { // Never retry mock errors - they're from /mock-limits command for testing if (isMockRateLimitError(error)) { return false @@ -791,7 +814,7 @@ function shouldRetry(error: APIError): boolean { // Persistent mode: 429/529 always retryable, bypass subscriber gates and // x-should-retry header. - if (isPersistentRetryEnabled() && isTransientCapacityError(error)) { + if (persistentRetryEnabled && isTransientCapacityError(error)) { return true } diff --git a/src/services/compact/autoCompact.test.ts b/src/services/compact/autoCompact.test.ts index 3c06dd49cc..4e030c6193 100644 --- a/src/services/compact/autoCompact.test.ts +++ b/src/services/compact/autoCompact.test.ts @@ -247,11 +247,37 @@ describe('getAutoCompactThreshold', () => { }) describe('getAutoCompactFailureCooldownMs', () => { - test('uses valid positive integer override', async () => { - process.env.OPENCLAUDE_AUTOCOMPACT_FAILURE_COOLDOWN_MS = ' 5000 ' + test('uses valid positive integer override above the floor', async () => { + process.env.OPENCLAUDE_AUTOCOMPACT_FAILURE_COOLDOWN_MS = ' 15000 ' const { getAutoCompactFailureCooldownMs } = await importAutoCompact() - expect(getAutoCompactFailureCooldownMs()).toBe(5000) + expect(getAutoCompactFailureCooldownMs()).toBe(15000) + }) + + test('rejects overrides below the minimum cooldown floor', async () => { + const { + AUTOCOMPACT_FAILURE_COOLDOWN_MS, + getAutoCompactFailureCooldownMs, + MIN_AUTOCOMPACT_FAILURE_COOLDOWN_MS, + } = await importAutoCompact() + + // 5000 is below the 10_000ms floor — must fall back to the default + // rather than being accepted as a valid test override. + process.env.OPENCLAUDE_AUTOCOMPACT_FAILURE_COOLDOWN_MS = '5000' + expect(getAutoCompactFailureCooldownMs()).toBe( + AUTOCOMPACT_FAILURE_COOLDOWN_MS, + ) + expect(MIN_AUTOCOMPACT_FAILURE_COOLDOWN_MS).toBe(10_000) + + // Boundary: exactly the floor value is accepted. + process.env.OPENCLAUDE_AUTOCOMPACT_FAILURE_COOLDOWN_MS = '10000' + expect(getAutoCompactFailureCooldownMs()).toBe(10_000) + + // One below the floor is rejected. + process.env.OPENCLAUDE_AUTOCOMPACT_FAILURE_COOLDOWN_MS = '9999' + expect(getAutoCompactFailureCooldownMs()).toBe( + AUTOCOMPACT_FAILURE_COOLDOWN_MS, + ) }) test('ignores partial or invalid override values', async () => { @@ -453,7 +479,7 @@ describe('resolveAutoCompactCircuitBreakerState', () => { describe('autoCompactIfNeeded circuit breaker', () => { beforeEach(() => { process.env.CLAUDE_AUTOCOMPACT_PCT_OVERRIDE = '1' - process.env.OPENCLAUDE_AUTOCOMPACT_FAILURE_COOLDOWN_MS = '5000' + process.env.OPENCLAUDE_AUTOCOMPACT_FAILURE_COOLDOWN_MS = '15000' }) test('trips after three non-user failures and records a retry time', async () => { @@ -648,7 +674,7 @@ describe('autoCompactIfNeeded circuit breaker', () => { ) expect(result.lastFailureAtMs).toBe(106_000) - expect(result.nextRetryAtMs).toBe(111_000) + expect(result.nextRetryAtMs).toBe(121_000) } finally { Date.now = originalDateNow } diff --git a/src/services/compact/autoCompact.ts b/src/services/compact/autoCompact.ts index 6949786995..c2f26b6b87 100644 --- a/src/services/compact/autoCompact.ts +++ b/src/services/compact/autoCompact.ts @@ -81,6 +81,11 @@ export const MANUAL_COMPACT_BUFFER_TOKENS = 3_000 export const AUTOCOMPACT_FAILURE_COOLDOWN_MS = 5 * 60 * 1000 +// Minimum cooldown override allowed via OPENCLAUDE_AUTOCOMPACT_FAILURE_COOLDOWN_MS. +// Values below this floor are rejected (function falls back to the default) so +// misconfiguration cannot effectively disable the circuit breaker. +export const MIN_AUTOCOMPACT_FAILURE_COOLDOWN_MS = 10_000 + // Pause autocompact after this many consecutive failures. // BQ 2026-03-10: 1,279 sessions had 50+ consecutive failures (up to 3,272) // in a single session, wasting ~250K API calls/day globally. @@ -91,7 +96,11 @@ export function getAutoCompactFailureCooldownMs(): number { if (override) { const trimmed = override.trim() const parsed = Number(trimmed) - if (/^[1-9]\d*$/.test(trimmed) && Number.isSafeInteger(parsed)) { + if ( + /^[1-9]\d*$/.test(trimmed) && + Number.isSafeInteger(parsed) && + parsed >= MIN_AUTOCOMPACT_FAILURE_COOLDOWN_MS + ) { return parsed } } diff --git a/src/utils/analyzeContext.mcp.test.ts b/src/utils/analyzeContext.mcp.test.ts index 6b3b8ae8aa..bbb601506d 100644 --- a/src/utils/analyzeContext.mcp.test.ts +++ b/src/utils/analyzeContext.mcp.test.ts @@ -9,6 +9,29 @@ import { countMcpToolTokens } from './analyzeContext.js' import { createRequestSizeReport } from './requestSizeBreakdown.js' import type { ContextData } from './analyzeContext.js' +const originalEnv = { + CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS: + process.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS, + ENABLE_TOOL_SEARCH: process.env.ENABLE_TOOL_SEARCH, +} + +beforeEach(async () => { + await acquireSharedMutationLock('utils/analyzeContext.mcp.test.ts') + delete process.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS + delete process.env.ENABLE_TOOL_SEARCH +}) + +afterEach(() => { + for (const [key, value] of Object.entries(originalEnv)) { + if (value === undefined) { + delete process.env[key] + } else { + process.env[key] = value + } + } + releaseSharedMutationLock() +}) + function makeMcpTool(name: string): Tool { return { name, @@ -55,24 +78,19 @@ function makeContextData(overrides: Partial = {}): ContextData { } describe('countMcpToolTokens', () => { - beforeEach(async () => { - await acquireSharedMutationLock('utils/analyzeContext.mcp.test.ts') + beforeEach(() => { process.env.ENABLE_TOOL_SEARCH = 'true' delete process.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS delete process.env.ANTHROPIC_BASE_URL }) afterEach(() => { - try { - for (const [key, value] of Object.entries(savedToolSearchEnv)) { - if (value === undefined) { - delete process.env[key] - } else { - process.env[key] = value - } + for (const [key, value] of Object.entries(savedToolSearchEnv)) { + if (value === undefined) { + delete process.env[key] + } else { + process.env[key] = value } - } finally { - releaseSharedMutationLock() } }) diff --git a/src/utils/releaseNotes.ts b/src/utils/releaseNotes.ts index b347b21452..8b66f3b47e 100644 --- a/src/utils/releaseNotes.ts +++ b/src/utils/releaseNotes.ts @@ -5,7 +5,7 @@ import { coerce } from 'semver' import { getIsNonInteractiveSession } from '../bootstrap/state.js' import { getGlobalConfig, saveGlobalConfig } from './config.js' import { getClaudeConfigHomeDir } from './envUtils.js' -import { toError } from './errors.js' +import { toError, getErrnoCode } from './errors.js' import { logError } from './log.js' import { isEssentialTrafficOnly } from './privacyLevel.js' import { gt } from './semver.js' @@ -223,18 +223,31 @@ export async function migrateChangelogFromConfig(): Promise { const cachePath = getChangelogCachePath() - // If cache file doesn't exist, create it from old config + // Ensure cache directory exists try { await mkdir(dirname(cachePath), { recursive: true }) + } catch (error) { + // Directory already exists (EEXIST) is fine - skip silently + if (getErrnoCode(error) !== 'EEXIST') { + throw error + } + } + + // If cache file doesn't exist, create it from old config + try { await writeFile(cachePath, config.cachedChangelog, { encoding: 'utf-8', flag: 'wx', // Write only if file doesn't exist }) - } catch { - // File already exists, which is fine - skip silently + } catch (error) { + // File already exists (EEXIST) is fine - skip silently + if (getErrnoCode(error) !== 'EEXIST') { + throw error + } } - // Remove the deprecated field from config + // Remove the deprecated field from config only after successful write + // or if file already existed saveGlobalConfig(({ cachedChangelog: _, ...rest }) => rest) }