diff --git a/.github/workflows/npm-app-release-staging.yml b/.github/workflows/npm-app-release-staging.yml index 58c6a1ade..c1407eaf3 100644 --- a/.github/workflows/npm-app-release-staging.yml +++ b/.github/workflows/npm-app-release-staging.yml @@ -134,7 +134,7 @@ jobs: new-version: ${{ needs.prepare-and-commit-staging.outputs.new_version }} artifact-name: updated-staging-package checkout-ref: ${{ github.event.pull_request.head.sha }} - env-overrides: '{"NEXT_PUBLIC_CB_ENVIRONMENT": "prod", "NEXT_PUBLIC_CODEBUFF_BACKEND_URL": "backend-pr-221-we0m.onrender.com"}' + env-overrides: '{"NEXT_PUBLIC_CB_ENVIRONMENT": "prod", "NEXT_PUBLIC_CODEBUFF_BACKEND_URL": "backend-pr-311-2xab.onrender.com"}' secrets: inherit # Create GitHub prerelease with all binaries diff --git a/backend/package.json b/backend/package.json index aeac65984..c2c1bfbac 100644 --- a/backend/package.json +++ b/backend/package.json @@ -24,6 +24,7 @@ }, "dependencies": { "@ai-sdk/google-vertex": "3.0.6", + "benchify": "^0.1.0-alpha.41", "@ai-sdk/openai": "2.0.11", "@codebuff/billing": "workspace:*", "@codebuff/common": "workspace:*", diff --git a/backend/src/process-str-replace.ts b/backend/src/process-str-replace.ts index 16821ac71..bcb7f71f0 100644 --- a/backend/src/process-str-replace.ts +++ b/backend/src/process-str-replace.ts @@ -21,8 +21,22 @@ export async function processStrReplace( } | { tool: 'str_replace'; path: string; error: string } > { + logger.debug( + { + path, + replacementsCount: replacements.length, + }, + 'processStrReplace: Starting to process str_replace', + ) + const initialContent = await initialContentPromise if (initialContent === null) { + logger.warn( + { + path, + }, + 'processStrReplace: File does not exist', + ) return { tool: 'str_replace', path, @@ -31,6 +45,15 @@ export async function processStrReplace( } } + logger.debug( + { + path, + initialContentLength: initialContent.length, + hasContent: !!initialContent, + }, + 'processStrReplace: Successfully read initial file content', + ) + // Process each old/new string pair let currentContent = initialContent let messages: string[] = [] diff --git a/backend/src/run-agent-step.ts b/backend/src/run-agent-step.ts index c06f37a54..f285e347a 100644 --- a/backend/src/run-agent-step.ts +++ b/backend/src/run-agent-step.ts @@ -289,13 +289,22 @@ export const runAgentStep = async ( const stream = getStream(messagesWithSystem(agentMessages, system)) + logger.info( + { + agentStepId, + userInputId, + agentType, + agentId: agentState.agentId, + }, + 'run-agent-step: About to call processStreamWithTools', + ) + const { toolCalls, toolResults: newToolResults, state, fullResponse: fullResponseAfterStream, fullResponseChunks, - messageId, } = await processStreamWithTools({ stream, ws, @@ -314,6 +323,17 @@ export const runAgentStep = async ( onResponseChunk, fullResponse, }) + + logger.info( + { + agentStepId, + userInputId, + toolCallsCount: toolCalls.length, + toolResultsCount: newToolResults.length, + toolCallTypes: toolCalls.map((tc) => tc.toolName), + }, + 'run-agent-step: Completed processStreamWithTools', + ) toolResults.push(...newToolResults) fullResponse = fullResponseAfterStream @@ -403,7 +423,7 @@ export const runAgentStep = async ( agentState, fullResponse, shouldEndTurn, - messageId, + messageId: null, } } diff --git a/backend/src/tools/batch-str-replace.ts b/backend/src/tools/batch-str-replace.ts new file mode 100644 index 000000000..4011ed695 --- /dev/null +++ b/backend/src/tools/batch-str-replace.ts @@ -0,0 +1,754 @@ +import { handleStrReplace } from './handlers/tool/str-replace' +import { getFileProcessingValues } from './handlers/tool/write-file' +import { logger } from '../util/logger' +import { Benchify } from 'benchify' +import { env } from '@codebuff/internal/env' +import { requestToolCall } from '../websockets/websocket-action' +import { createPatch } from 'diff' +import type { CodebuffToolCall } from '@codebuff/common/tools/list' +import type { ToolResultPart } from '@codebuff/common/types/messages/content-part' +import type { PrintModeEvent } from '@codebuff/common/types/print-mode' +import type { AgentTemplate } from '../templates/types' +import type { ProjectFileContext } from '@codebuff/common/util/file' +import type { WebSocket } from 'ws' +import { file } from 'bun' + +export type DeferredStrReplace = { + toolCall: CodebuffToolCall<'str_replace'> +} + +export type BatchStrReplaceState = { + deferredStrReplaces: DeferredStrReplace[] + otherToolsQueue: any[] + strReplacePhaseComplete: boolean + failures: any[] +} + +const BENCHIFY_FILE_TYPES = ['tsx', 'ts', 'jsx', 'js'] + +// Global Benchify client instance +let benchifyClient: Benchify | null = null + +function getBenchifyClient(): Benchify | null { + if (!benchifyClient) { + let benchifyApiKey: string | undefined + try { + // Log available environment variables (partial for debugging) + const envKeys = Object.keys(process.env) + .filter((key) => key.includes('BENCHIFY') || key.includes('API')) + .slice(0, 10) // Limit to first 10 for safety + + benchifyApiKey = env.BENCHIFY_API_KEY + logger.info( + { + hasApiKey: !!benchifyApiKey, + apiKeyLength: benchifyApiKey?.length || 0, + apiKeyPrefix: benchifyApiKey?.substring(0, 8) || 'none', + availableEnvKeys: envKeys, + nodeEnv: process.env.NODE_ENV, + }, + 'getBenchifyClient: Attempting to access BENCHIFY_API_KEY from environment', + ) + } catch (error) { + logger.warn( + { + error: error instanceof Error ? error.message : String(error), + nodeEnv: process.env.NODE_ENV, + }, + 'getBenchifyClient: Failed to access BENCHIFY_API_KEY from environment', + ) + return null + } + + if (!benchifyApiKey) { + logger.warn( + 'getBenchifyClient: No BENCHIFY_API_KEY found, returning null', + ) + return null + } + + try { + benchifyClient = new Benchify({ + apiKey: benchifyApiKey, + }) + logger.info('getBenchifyClient: Successfully created Benchify client') + } catch (error) { + logger.error( + { + error: error instanceof Error ? error.message : String(error), + }, + 'getBenchifyClient: Failed to create Benchify client', + ) + return null + } + } + return benchifyClient +} + +export async function executeBatchStrReplaces({ + deferredStrReplaces, + toolCalls, + toolResults, + ws, + agentTemplate, + fileContext, + agentStepId, + clientSessionId, + userInputId, + fullResponse, + onResponseChunk, + state, + userId, +}: { + deferredStrReplaces: DeferredStrReplace[] + toolCalls: (CodebuffToolCall | any)[] + toolResults: ToolResultPart[] + ws: WebSocket + agentTemplate: AgentTemplate + fileContext: ProjectFileContext + agentStepId: string + clientSessionId: string + userInputId: string + fullResponse: string + onResponseChunk: (chunk: string | PrintModeEvent) => void + state: Record + userId: string | undefined +}) { + logger.info( + { + deferredCount: deferredStrReplaces.length, + agentStepId, + userInputId, + userId, + }, + 'executeBatchStrReplaces: Starting batch execution', + ) + + if (deferredStrReplaces.length === 0) { + logger.info( + 'executeBatchStrReplaces: No deferred str_replace operations, returning early', + ) + return + } + + const batchPromises: Promise[] = [] + let previousPromise = Promise.resolve() + + // Track successfully edited files for benchify call + const editedFiles: { path: string; contents: string }[] = [] + // Track intended changes from LLM for benchify call (even if str_replace fails) + const intendedChanges: { path: string; contents: string }[] = [] + // Track original file contents before any modifications + const originalContents: Record = {} + + // Execute all str_replace calls in sequence to maintain file consistency + for (let i = 0; i < deferredStrReplaces.length; i++) { + const { toolCall } = deferredStrReplaces[i] + + // Read original content before any modifications (only once per file) + const isFileEligibleForBenchify = benchifyCanFixLanguage( + toolCall.input.path, + ) + + if (!isFileEligibleForBenchify) { + logger.debug( + { + path: toolCall.input.path, + supportedExtensions: BENCHIFY_FILE_TYPES, + agentStepId, + userInputId, + }, + 'executeBatchStrReplaces: File not eligible for benchify (unsupported file type)', + ) + } + + if (isFileEligibleForBenchify && !originalContents[toolCall.input.path]) { + try { + const originalContent = await extractOriginalContent( + toolCall.input.path, + fileContext, + ) + if (originalContent) { + originalContents[toolCall.input.path] = originalContent + } + } catch (error) { + logger.warn( + { + error: error instanceof Error ? error.message : String(error), + path: toolCall.input.path, + }, + 'Failed to read original content for benchify', + ) + } + } + + // Extract intended content from str_replace operation before attempting execution + if ( + benchifyCanFixLanguage(toolCall.input.path) && + originalContents[toolCall.input.path] + ) { + try { + const intendedContent = await extractIntendedContent( + toolCall, + originalContents[toolCall.input.path], + ) + if (intendedContent) { + const existingIndex = intendedChanges.findIndex( + (f) => f.path === toolCall.input.path, + ) + if (existingIndex >= 0) { + intendedChanges[existingIndex].contents = intendedContent + } else { + intendedChanges.push({ + path: toolCall.input.path, + contents: intendedContent, + }) + } + logger.debug( + { + path: toolCall.input.path, + intendedContentLength: intendedContent.length, + agentStepId, + userInputId, + }, + 'executeBatchStrReplaces: Successfully extracted intended content for benchify', + ) + } + } catch (error) { + logger.warn( + { + error: error instanceof Error ? error.message : String(error), + path: toolCall.input.path, + }, + 'executeBatchStrReplaces: Failed to extract intended content for benchify', + ) + } + } else { + logger.debug( + { + path: toolCall.input.path, + canFixLanguage: benchifyCanFixLanguage(toolCall.input.path), + hasOriginalContent: !!originalContents[toolCall.input.path], + agentStepId, + userInputId, + }, + 'executeBatchStrReplaces: Skipping intended content extraction (not benchify-compatible file or no original content)', + ) + } + + // Chain each str_replace to the previous one to ensure proper ordering + const strReplacePromise = previousPromise.then(async () => { + try { + const { result } = handleStrReplace({ + previousToolCallFinished: Promise.resolve(), + toolCall, + requestClientToolCall: async () => { + throw new Error('Client tool calls not supported in batch mode') + }, + writeToClient: onResponseChunk, + getLatestState: () => getFileProcessingValues(state), + state: { ...state, ws }, + }) + + const toolResult = await result + + if (toolResult) { + const toolResultPart: ToolResultPart = { + type: 'tool-result', + toolName: 'str_replace', + toolCallId: toolCall.toolCallId, + output: toolResult, + } + + toolResults.push(toolResultPart) + + onResponseChunk({ + type: 'tool_result', + toolCallId: toolCall.toolCallId, + output: toolResult, + }) + + // Add to message history + state.messages.push({ + role: 'tool' as const, + content: toolResultPart, + }) + + // Track successfully edited files + if ( + Array.isArray(toolResult) && + toolResult.length > 0 && + benchifyCanFixLanguage(toolCall.input.path) + ) { + const result = toolResult[0] + if ( + result.type === 'json' && + result.value && + 'content' in result.value + ) { + const existingFileIndex = editedFiles.findIndex( + (f) => f.path === toolCall.input.path, + ) + const fileContent = result.value.content as string + + if (existingFileIndex >= 0) { + // Update existing file with latest content + editedFiles[existingFileIndex].contents = fileContent + } else { + // Add new file to tracking + editedFiles.push({ + path: toolCall.input.path, + contents: fileContent, + }) + } + } + } + } + } catch (error) { + logger.error( + { + error: + error instanceof Error + ? { + message: error.message, + stack: error.stack, + name: error.name, + } + : error, + toolCallId: toolCall.toolCallId, + toolCallInput: JSON.stringify(toolCall.input, null, 2), + agentStepId, + userInputId, + }, + `Error executing batched str_replace ${i + 1}/${deferredStrReplaces.length}`, + ) + + // Create error result + const errorResult: ToolResultPart = { + type: 'tool-result', + toolName: 'str_replace', + toolCallId: toolCall.toolCallId, + output: [ + { + type: 'json', + value: { + errorMessage: `Batched str_replace failed: ${error instanceof Error ? error.message : String(error)}`, + }, + }, + ], + } + + toolResults.push(errorResult) + onResponseChunk({ + type: 'tool_result', + toolCallId: toolCall.toolCallId, + output: errorResult.output, + }) + + // Add to message history even for errors + state.messages.push({ + role: 'tool' as const, + content: errorResult, + }) + + logger.info( + { + toolCallId: toolCall.toolCallId, + path: toolCall.input.path, + errorMessage: + error instanceof Error ? error.message : String(error), + agentStepId, + userInputId, + }, + 'executeBatchStrReplaces: str_replace failed, but continuing with batch execution for benchify', + ) + } + }) + + // Add to toolCalls array + toolCalls.push(toolCall) + batchPromises.push(strReplacePromise) + previousPromise = strReplacePromise + } + + // Wait for all batched operations to complete + await Promise.all(batchPromises) + + logger.info( + { + totalOperations: deferredStrReplaces.length, + successfulEdits: editedFiles.length, + intendedChangesCount: intendedChanges.length, + benchifyEligibleFiles: deferredStrReplaces.filter((d) => + benchifyCanFixLanguage(d.toolCall.input.path), + ).length, + agentStepId, + userInputId, + }, + 'executeBatchStrReplaces: Batch operations completed, summary before benchify call', + ) + + // Call benchify with intended changes (even if str_replace operations failed) + logger.info( + { + intendedChangesCount: intendedChanges.length, + editedFilesCount: editedFiles.length, + intendedChangeFiles: intendedChanges.map((f) => f.path), + editedFilesList: editedFiles.map((f) => f.path), + agentStepId, + userInputId, + }, + 'executeBatchStrReplaces: Preparing to call benchify', + ) + + const client = getBenchifyClient() + if (!client) { + logger.warn( + 'executeBatchStrReplaces: No benchify client available, skipping benchify call', + ) + return + } + + if (intendedChanges.length === 0) { + logger.warn( + 'executeBatchStrReplaces: No intended changes for benchify, skipping benchify call', + ) + return + } + + try { + const benchifyResult = await callBenchify(intendedChanges, { + agentStepId, + clientSessionId, + userInputId, + userId, + }) + + if (benchifyResult && benchifyResult.length > 0) { + logger.info( + { + benchifyResultCount: benchifyResult.length, + resultFiles: benchifyResult.map((r) => r.path), + agentStepId, + userInputId, + }, + `executeBatchStrReplaces: Benchify returned ${benchifyResult.length} results, applying them`, + ) + + // Apply benchify results back to files + await applyBenchifyResults(benchifyResult, { + ws, + onResponseChunk, + state: { ...state, originalContents }, + toolResults, + toolCalls: deferredStrReplaces.map((d) => d.toolCall), + userInputId, + }) + } + } catch (error) { + logger.error( + { + error: error instanceof Error ? error.message : String(error), + intendedChangeFiles: intendedChanges.map((f) => f.path), + agentStepId, + userInputId, + }, + 'executeBatchStrReplaces: Failed to call benchify with intended changes', + ) + } +} + +/** + * Calls benchify API with the list of edited files + */ +async function callBenchify( + editedFiles: { path: string; contents: string }[], + context: { + agentStepId: string + clientSessionId: string + userInputId: string + userId: string | undefined + }, +): Promise<{ path: string; contents: string }[] | null> { + logger.info( + { + editedFilesCount: editedFiles.length, + editedFilesList: editedFiles.map((f) => f.path), + totalContentLength: editedFiles.reduce( + (sum, f) => sum + f.contents.length, + 0, + ), + ...context, + }, + 'callBenchify: Starting benchify API call', + ) + + const client = getBenchifyClient() + if (!client) { + logger.error('callBenchify: No benchify client available') + return null + } + + try { + logger.info( + { + fixTypes: ['string_literals'], + ...context, + }, + 'callBenchify: Calling client.runFixer', + ) + + const response = await client.runFixer(editedFiles, { + fix_types: ['string_literals'], + }) + + logger.info( + { + responseReceived: !!response, + responseLength: response?.length || 0, + responseFiles: response?.map((r) => r.path) || [], + responseContentLengths: response?.map((r) => r.contents.length) || [], + ...context, + }, + 'callBenchify: Benchify runFixer API response received successfully', + ) + + return response + } catch (error) { + logger.error( + { + error: + error instanceof Error + ? { + message: error.message, + stack: error.stack, + name: error.name, + } + : String(error), + editedFilesCount: editedFiles.length, + ...context, + }, + 'callBenchify: Failed to call benchify API', + ) + throw error + } +} + +/** + * Applies benchify results back to the file system and updates tool results + */ +async function applyBenchifyResults( + benchifyFiles: { path: string; contents: string }[], + context: { + ws: WebSocket + onResponseChunk: (chunk: string | PrintModeEvent) => void + state: Record + toolResults: ToolResultPart[] + toolCalls: CodebuffToolCall<'str_replace'>[] + userInputId: string + }, +) { + logger.info( + { + benchifyFilesCount: benchifyFiles.length, + benchifyFilesList: benchifyFiles.map((f) => f.path), + toolCallsCount: context.toolCalls.length, + userInputId: context.userInputId, + }, + 'applyBenchifyResults: Starting to apply benchify results', + ) + + for (const benchifyFile of benchifyFiles) { + logger.debug( + { + fileName: benchifyFile.path, + contentLength: benchifyFile.contents.length, + userInputId: context.userInputId, + }, + 'applyBenchifyResults: Processing benchify file', + ) + try { + // Find the corresponding tool call for this file + const relatedToolCall = context.toolCalls.find( + (tc) => tc.input.path === benchifyFile.path, + ) + + if (!relatedToolCall) { + logger.warn( + { + fileName: benchifyFile.path, + availableToolCallPaths: context.toolCalls.map( + (tc) => tc.input.path, + ), + userInputId: context.userInputId, + }, + 'applyBenchifyResults: No matching tool call found for benchify result', + ) + continue + } + + logger.debug( + { + fileName: benchifyFile.path, + relatedToolCallId: relatedToolCall.toolCallId, + userInputId: context.userInputId, + }, + 'applyBenchifyResults: Found matching tool call for benchify result', + ) + + // Get the original file content from our stored contents + const originalContent = + context.state.originalContents?.[benchifyFile.path] + + if (!originalContent) { + logger.error( + { path: benchifyFile.path }, + 'Could not find original file content for diff generation', + ) + continue + } + + // Generate a proper unified diff patch + const patch = createPatch( + benchifyFile.path, + originalContent, + benchifyFile.contents, + '', + '', + ) + + // Request the client to apply the benchify changes as a patch + const toolCallResult = await requestToolCall( + context.ws, + context.userInputId, + 'str_replace', + { + type: 'patch', + path: benchifyFile.path, + content: patch, + }, + ) + + // Create a tool result indicating benchify was applied + const benchifyToolResult: ToolResultPart = { + type: 'tool-result', + toolName: 'str_replace', + toolCallId: relatedToolCall.toolCallId, + output: toolCallResult.output, + } + + // Update the existing tool result + const existingResultIndex = context.toolResults.findIndex( + (tr) => tr.toolCallId === relatedToolCall.toolCallId, + ) + + if (existingResultIndex >= 0) { + context.toolResults[existingResultIndex] = benchifyToolResult + } else { + context.toolResults.push(benchifyToolResult) + } + + // Notify client about the benchify update + context.onResponseChunk({ + type: 'tool_result', + toolCallId: relatedToolCall.toolCallId, + output: benchifyToolResult.output, + }) + } catch (error) { + logger.error( + { error, fileName: benchifyFile.path }, + 'Failed to apply benchify result to file', + ) + } + } +} + +/** + * Extracts the original file content before any modifications + */ +async function extractOriginalContent( + filePath: string, + fileContext: ProjectFileContext, +): Promise { + try { + const absolutePath = `${fileContext.projectRoot}/${filePath}` + const currentFile = await file(absolutePath) + return await currentFile.text() + } catch (error) { + logger.warn( + { + error: error instanceof Error ? error.message : String(error), + path: filePath, + }, + 'Failed to read original file content', + ) + return null + } +} + +/** + * Extracts the intended file content by applying str_replace operations to the current file + */ +async function extractIntendedContent( + toolCall: CodebuffToolCall<'str_replace'>, + originalContent: string, +): Promise { + try { + let currentContent = originalContent + + // Apply all replacements to get the intended content + for (const replacement of toolCall.input.replacements) { + const { old, new: newStr, allowMultiple } = replacement + + if (allowMultiple) { + currentContent = currentContent.replaceAll(old, newStr) + } else { + // Find the first occurrence and replace it + const index = currentContent.indexOf(old) + if (index !== -1) { + currentContent = + currentContent.substring(0, index) + + newStr + + currentContent.substring(index + old.length) + logger.debug( + { + old: old.substring(0, 100) + (old.length > 100 ? '...' : ''), + new: + newStr.substring(0, 100) + (newStr.length > 100 ? '...' : ''), + path: toolCall.input.path, + }, + 'extractIntendedContent: Successfully applied replacement for benchify', + ) + } else { + // If we can't find the old string, log it but continue with other replacements + logger.warn( + { + old: old.substring(0, 200) + (old.length > 200 ? '...' : ''), + new: + newStr.substring(0, 100) + (newStr.length > 100 ? '...' : ''), + allowMultiple, + path: toolCall.input.path, + contentLength: currentContent.length, + }, + 'extractIntendedContent: Failed to find old string in currentContent for benchify', + ) + } + } + } + + return currentContent + } catch (error) { + logger.warn( + { + error: error instanceof Error ? error.message : String(error), + path: toolCall.input.path, + }, + 'Failed to apply replacements for intended content extraction', + ) + return null + } +} + +function benchifyCanFixLanguage(path: string): boolean { + return BENCHIFY_FILE_TYPES.some((extension) => path.endsWith(`.${extension}`)) +} diff --git a/backend/src/tools/handlers/tool/write-file.ts b/backend/src/tools/handlers/tool/write-file.ts index 4b912a061..261cff72d 100644 --- a/backend/src/tools/handlers/tool/write-file.ts +++ b/backend/src/tools/handlers/tool/write-file.ts @@ -230,7 +230,7 @@ export async function postStreamProcessing( if (errors.length > 0) { if (errors.length > 1) { throw new Error( - `Internal error: Unexpected number of matching errors for ${{ toolCall }}, found ${errors.length}, expected 1`, + `Internal error: Unexpected number of matching errors for ${JSON.stringify(toolCall)}, found ${errors.length}, expected 1`, ) } @@ -251,7 +251,7 @@ export async function postStreamProcessing( ) if (changes.length !== 1) { throw new Error( - `Internal error: Unexpected number of matching changes for ${{ toolCall }}, found ${changes.length}, expected 1`, + `Internal error: Unexpected number of matching changes for ${JSON.stringify(toolCall)}, found ${changes.length}, expected 1`, ) } diff --git a/backend/src/tools/stream-parser.ts b/backend/src/tools/stream-parser.ts index 1b0c69b71..be820526a 100644 --- a/backend/src/tools/stream-parser.ts +++ b/backend/src/tools/stream-parser.ts @@ -8,9 +8,14 @@ import { buildArray } from '@codebuff/common/util/array' import { generateCompactId } from '@codebuff/common/util/string' import { expireMessages } from '../util/messages' +import { logger } from '../util/logger' import { sendAction } from '../websockets/websocket-action' import { processStreamWithTags } from '../xml-stream-parser' import { executeCustomToolCall, executeToolCall } from './tool-executor' +import { + executeBatchStrReplaces, + BatchStrReplaceState, +} from './batch-str-replace' import type { CustomToolCall } from './tool-executor' import type { StreamChunk } from '../llm-apis/vercel-ai-sdk/ai-sdk' @@ -32,7 +37,7 @@ export type ToolCallError = { } & Omit export async function processStreamWithTools(options: { - stream: AsyncGenerator + stream: AsyncGenerator ws: WebSocket agentStepId: string clientSessionId: string @@ -74,6 +79,15 @@ export async function processStreamWithTools(options: { const { promise: streamDonePromise, resolve: resolveStreamDonePromise } = Promise.withResolvers() let previousToolCallFinished = streamDonePromise + + // Two-phase execution state + const batchState: BatchStrReplaceState = { + deferredStrReplaces: [], + otherToolsQueue: [], + strReplacePhaseComplete: false, + failures: [], + } + const state: Record = { ws, fingerprintId, @@ -103,24 +117,95 @@ export async function processStreamWithTools(options: { return { onTagStart: () => {}, onTagEnd: async (_: string, input: Record) => { - // delegated to reusable helper - previousToolCallFinished = executeToolCall({ - toolName, - input, - toolCalls, - toolResults, - previousToolCallFinished, - ws, - agentTemplate, - fileContext, - agentStepId, - clientSessionId, - userInputId, - fullResponse: fullResponseChunks.join(''), - onResponseChunk, - state, - userId, - }) + // Two-phase execution: defer str_replace tools, queue others + if (toolName === 'str_replace' && !batchState.strReplacePhaseComplete) { + // Defer str_replace execution + const toolCallId = generateCompactId() + const toolCall: CodebuffToolCall<'str_replace'> = { + toolName: 'str_replace', + input: input as any, + toolCallId, + } + + batchState.deferredStrReplaces.push({ toolCall }) + + logger.debug( + { + toolCallId, + filePath: input.path, + replacementsCount: input.replacements?.length || 0, + currentDeferredCount: batchState.deferredStrReplaces.length, + agentStepId, + userInputId, + }, + 'stream-parser: Deferring str_replace tool for batch execution', + ) + + // Still emit the tool call event + onResponseChunk({ + type: 'tool_call', + toolCallId, + toolName, + input, + }) + } else { + // First non-str_replace tool marks end of str_replace phase + if ( + !batchState.strReplacePhaseComplete && + batchState.deferredStrReplaces.length > 0 + ) { + logger.info( + { + triggeringTool: toolName, + deferredCount: batchState.deferredStrReplaces.length, + agentStepId, + userInputId, + }, + `toolCallback: Triggering batch str_replace execution (${batchState.deferredStrReplaces.length} deferred tools) due to ${toolName}`, + ) + + batchState.strReplacePhaseComplete = true + + // Execute all deferred str_replace tools as a batch + previousToolCallFinished = previousToolCallFinished.then( + async () => { + await executeBatchStrReplaces({ + deferredStrReplaces: batchState.deferredStrReplaces, + toolCalls, + toolResults, + ws, + agentTemplate, + fileContext, + agentStepId, + clientSessionId, + userInputId, + fullResponse: fullResponseChunks.join(''), + onResponseChunk, + state, + userId, + }) + }, + ) + } + + previousToolCallFinished = executeToolCall({ + toolName, + input, + toolCalls, + toolResults, + previousToolCallFinished, + ws, + agentTemplate, + fileContext, + agentStepId, + clientSessionId, + userInputId, + fullResponse: fullResponseChunks.join(''), + onResponseChunk, + state, + userId, + }) + } }, } } @@ -176,14 +261,7 @@ export async function processStreamWithTools(options: { ) let reasoning = false - let messageId: string | null = null - while (true) { - const { value: chunk, done } = await streamWithTags.next() - if (done) { - messageId = chunk - break - } - + for await (const chunk of streamWithTags) { if (chunk.type === 'reasoning') { if (!reasoning) { reasoning = true @@ -215,16 +293,63 @@ export async function processStreamWithTools(options: { ]) resolveStreamDonePromise() - await previousToolCallFinished - console.log({ - toolCalls, - toolResults, - state, - fullResponse: fullResponseChunks.join(''), - fullResponseChunks, - messageId, - }) + // Handle case where only str_replace tools were generated and stream ended + if ( + !batchState.strReplacePhaseComplete && + batchState.deferredStrReplaces.length > 0 + ) { + logger.info( + { + triggeringEvent: 'stream_end', + deferredCount: batchState.deferredStrReplaces.length, + deferredFiles: batchState.deferredStrReplaces.map( + (d) => d.toolCall.input.path, + ), + agentStepId, + userInputId, + }, + `stream-parser: Triggering batch str_replace execution (${batchState.deferredStrReplaces.length} deferred tools) due to stream end`, + ) + + batchState.strReplacePhaseComplete = true + + // Execute all deferred str_replace tools as a batch + previousToolCallFinished = previousToolCallFinished.then(async () => { + logger.info( + { + agentStepId, + userInputId, + deferredCount: batchState.deferredStrReplaces.length, + }, + 'stream-parser: About to call executeBatchStrReplaces from stream end handler', + ) + await executeBatchStrReplaces({ + deferredStrReplaces: batchState.deferredStrReplaces, + toolCalls, + toolResults, + ws, + agentTemplate, + fileContext, + agentStepId, + clientSessionId, + userInputId, + fullResponse: fullResponseChunks.join(''), + onResponseChunk, + state, + userId, + }) + logger.info( + { + agentStepId, + userInputId, + }, + 'stream-parser: Completed executeBatchStrReplaces from stream end handler', + ) + }) + } + + await previousToolCallFinished return { toolCalls, @@ -232,6 +357,5 @@ export async function processStreamWithTools(options: { state, fullResponse: fullResponseChunks.join(''), fullResponseChunks, - messageId, } } diff --git a/bun.lock b/bun.lock index 933bdf12b..398f1b60a 100644 --- a/bun.lock +++ b/bun.lock @@ -49,6 +49,7 @@ "@jitl/quickjs-wasmfile-release-sync": "0.31.0", "@openrouter/ai-sdk-provider": "1.1.2", "ai": "5.0.0", + "benchify": "^0.1.0-alpha.41", "cors": "^2.8.5", "diff": "5.2.0", "dotenv": "16.4.5", @@ -236,7 +237,7 @@ }, "sdk": { "name": "@codebuff/sdk", - "version": "0.2.3", + "version": "0.2.4", "dependencies": { "@vscode/tree-sitter-wasm": "0.1.4", "ai": "^5.0.0", @@ -1627,6 +1628,8 @@ "basic-ftp": ["basic-ftp@5.0.5", "", {}, "sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg=="], + "benchify": ["benchify@0.1.0-alpha.41", "", { "dependencies": { "minimatch": "^9.0.3" }, "peerDependencies": { "react": ">=16.8.0" }, "optionalPeers": ["react"] }, "sha512-iZAH2JFcGld/lruJEZKO9dv7XAU8ozEznPtxNLQj+6s1CQMIohzRLnEbvCWLWkMoqSQlJOIp2gCY6N9gt956yQ=="], + "bidi-js": ["bidi-js@1.0.3", "", { "dependencies": { "require-from-string": "^2.0.2" } }, "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw=="], "big.js": ["big.js@6.2.2", "", {}, "sha512-y/ie+Faknx7sZA5MfGA2xKlu0GDv8RWrXGsmlteyJQ2lvoKv9GBK/fpRMc2qlSoBAgNxrixICFCBefIq8WCQpQ=="], @@ -4201,6 +4204,8 @@ "babel-plugin-istanbul/istanbul-lib-instrument": ["istanbul-lib-instrument@5.2.1", "", { "dependencies": { "@babel/core": "^7.12.3", "@babel/parser": "^7.14.7", "@istanbuljs/schema": "^0.1.2", "istanbul-lib-coverage": "^3.2.0", "semver": "^6.3.0" } }, "sha512-pzqtp31nLv/XFOzXGuvhCb8qhjmTVo5vjVk19XE4CRlSWz0KoeJ3bw9XsA7nOp9YBf4qHjwBxkDzKcME/J29Yg=="], + "benchify/minimatch": ["minimatch@9.0.5", "", { "dependencies": { "brace-expansion": "^2.0.1" } }, "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow=="], + "bl/buffer": ["buffer@5.7.1", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.1.13" } }, "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ=="], "bl/readable-stream": ["readable-stream@3.6.2", "", { "dependencies": { "inherits": "^2.0.3", "string_decoder": "^1.1.1", "util-deprecate": "^1.0.1" } }, "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA=="], @@ -4939,6 +4944,8 @@ "babel-plugin-istanbul/istanbul-lib-instrument/semver": ["semver@6.3.1", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA=="], + "benchify/minimatch/brace-expansion": ["brace-expansion@2.0.2", "", { "dependencies": { "balanced-match": "^1.0.0" } }, "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ=="], + "body-parser/debug/ms": ["ms@2.0.0", "", {}, "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="], "chalk/ansi-styles/color-convert": ["color-convert@2.0.1", "", { "dependencies": { "color-name": "~1.1.4" } }, "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ=="], diff --git a/npm-app/src/index.ts b/npm-app/src/index.ts index 57fbcbe9e..57507e49b 100644 --- a/npm-app/src/index.ts +++ b/npm-app/src/index.ts @@ -52,6 +52,7 @@ async function codebuff({ cwd, trace, }: CliOptions) { + console.log('🚀 Starting Codebuff!') enableSquashNewlines() const workingDir = getWorkingDirectory() const projectRoot = getProjectRoot() diff --git a/npm-app/src/tool-handlers.ts b/npm-app/src/tool-handlers.ts index 7e90d8126..cf8c947c8 100644 --- a/npm-app/src/tool-handlers.ts +++ b/npm-app/src/tool-handlers.ts @@ -67,16 +67,26 @@ export const handleUpdateFile = async < console.log(green(`- Created ${file} ${counts}`)) } for (const file of modified) { - // Calculate added/deleted lines from the diff content + // Calculate added/deleted lines from the diff content, excluding metadata let addedLines = 0 let deletedLines = 0 - lines.forEach((line) => { + + for (const line of lines) { + // Skip all diff metadata lines (headers, hunk headers, etc.) + if ( + line.startsWith('---') || + line.startsWith('+++') || + line.startsWith('@@') + ) { + continue + } + // Count actual added/removed code lines if (line.startsWith('+')) { addedLines++ } else if (line.startsWith('-')) { deletedLines++ } - }) + } const counts = `(${green(`+${addedLines}`)}, ${red(`-${deletedLines}`)})` result.push([ diff --git a/packages/internal/src/env.ts b/packages/internal/src/env.ts index ecc451030..44db06004 100644 --- a/packages/internal/src/env.ts +++ b/packages/internal/src/env.ts @@ -10,6 +10,7 @@ const envSchema = { server: { // Backend variables CODEBUFF_API_KEY: z.string().optional(), + BENCHIFY_API_KEY: z.string().optional(), OPEN_ROUTER_API_KEY: z.string().min(1), RELACE_API_KEY: z.string().min(1), LINKUP_API_KEY: z.string().min(1), @@ -51,6 +52,7 @@ const envSchema = { runtimeEnv: { // Backend variables CODEBUFF_API_KEY: process.env.CODEBUFF_API_KEY, + BENCHIFY_API_KEY: process.env.BENCHIFY_API_KEY, OPEN_ROUTER_API_KEY: process.env.OPEN_ROUTER_API_KEY, RELACE_API_KEY: process.env.RELACE_API_KEY, LINKUP_API_KEY: process.env.LINKUP_API_KEY,