diff --git a/.github/workflows/npm-app-release-staging.yml b/.github/workflows/npm-app-release-staging.yml
index 58c6a1ade..c1407eaf3 100644
--- a/.github/workflows/npm-app-release-staging.yml
+++ b/.github/workflows/npm-app-release-staging.yml
@@ -134,7 +134,7 @@ jobs:
       new-version: ${{ needs.prepare-and-commit-staging.outputs.new_version }}
       artifact-name: updated-staging-package
       checkout-ref: ${{ github.event.pull_request.head.sha }}
-      env-overrides: '{"NEXT_PUBLIC_CB_ENVIRONMENT": "prod", "NEXT_PUBLIC_CODEBUFF_BACKEND_URL": "backend-pr-221-we0m.onrender.com"}'
+      env-overrides: '{"NEXT_PUBLIC_CB_ENVIRONMENT": "prod", "NEXT_PUBLIC_CODEBUFF_BACKEND_URL": "backend-pr-311-2xab.onrender.com"}'
     secrets: inherit
 
   # Create GitHub prerelease with all binaries
diff --git a/backend/package.json b/backend/package.json
index aeac65984..c2c1bfbac 100644
--- a/backend/package.json
+++ b/backend/package.json
@@ -24,6 +24,7 @@
   },
   "dependencies": {
     "@ai-sdk/google-vertex": "3.0.6",
+    "benchify": "^0.1.0-alpha.41",
     "@ai-sdk/openai": "2.0.11",
     "@codebuff/billing": "workspace:*",
     "@codebuff/common": "workspace:*",
diff --git a/backend/src/process-str-replace.ts b/backend/src/process-str-replace.ts
index 16821ac71..bcb7f71f0 100644
--- a/backend/src/process-str-replace.ts
+++ b/backend/src/process-str-replace.ts
@@ -21,8 +21,22 @@ export async function processStrReplace(
     }
   | { tool: 'str_replace'; path: string; error: string }
 > {
+  logger.debug(
+    {
+      path,
+      replacementsCount: replacements.length,
+    },
+    'processStrReplace: Starting to process str_replace',
+  )
+
   const initialContent = await initialContentPromise
   if (initialContent === null) {
+    logger.warn(
+      {
+        path,
+      },
+      'processStrReplace: File does not exist',
+    )
     return {
       tool: 'str_replace',
       path,
@@ -31,6 +45,15 @@ export async function processStrReplace(
     }
   }
 
+  logger.debug(
+    {
+      path,
+      initialContentLength: initialContent.length,
+      hasContent: !!initialContent,
+    },
+    'processStrReplace: Successfully read initial file content',
+  )
+
   // Process each old/new string pair
   let currentContent = initialContent
   let messages: string[] = []
diff --git a/backend/src/run-agent-step.ts b/backend/src/run-agent-step.ts
index c06f37a54..f285e347a 100644
--- a/backend/src/run-agent-step.ts
+++ b/backend/src/run-agent-step.ts
@@ -289,13 +289,22 @@ export const runAgentStep = async (
 
   const stream = getStream(messagesWithSystem(agentMessages, system))
 
+  logger.info(
+    {
+      agentStepId,
+      userInputId,
+      agentType,
+      agentId: agentState.agentId,
+    },
+    'run-agent-step: About to call processStreamWithTools',
+  )
+
   const {
     toolCalls,
     toolResults: newToolResults,
     state,
     fullResponse: fullResponseAfterStream,
     fullResponseChunks,
-    messageId,
   } = await processStreamWithTools({
     stream,
     ws,
@@ -314,6 +323,17 @@ export const runAgentStep = async (
     onResponseChunk,
     fullResponse,
   })
+
+  logger.info(
+    {
+      agentStepId,
+      userInputId,
+      toolCallsCount: toolCalls.length,
+      toolResultsCount: newToolResults.length,
+      toolCallTypes: toolCalls.map((tc) => tc.toolName),
+    },
+    'run-agent-step: Completed processStreamWithTools',
+  )
   toolResults.push(...newToolResults)
 
   fullResponse = fullResponseAfterStream
@@ -403,7 +423,7 @@ export const runAgentStep = async (
     agentState,
     fullResponse,
     shouldEndTurn,
-    messageId,
+    messageId: null,
   }
 }
 
diff --git a/backend/src/tools/batch-str-replace.ts b/backend/src/tools/batch-str-replace.ts
new file mode 100644
index 000000000..4011ed695
--- /dev/null
+++ b/backend/src/tools/batch-str-replace.ts
@@ -0,0 +1,754 @@
+import { handleStrReplace } from './handlers/tool/str-replace'
+import { getFileProcessingValues } from './handlers/tool/write-file'
+import { logger } from '../util/logger'
+import { Benchify } from 'benchify'
+import { env } from '@codebuff/internal/env'
+import { requestToolCall } from '../websockets/websocket-action'
+import { createPatch } from 'diff'
+import type { CodebuffToolCall } from '@codebuff/common/tools/list'
+import type { ToolResultPart } from '@codebuff/common/types/messages/content-part'
+import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
+import type { AgentTemplate } from '../templates/types'
+import type { ProjectFileContext } from '@codebuff/common/util/file'
+import type { WebSocket } from 'ws'
+import { file } from 'bun'
+
+export type DeferredStrReplace = {
+  toolCall: CodebuffToolCall<'str_replace'>
+}
+
+export type BatchStrReplaceState = {
+  deferredStrReplaces: DeferredStrReplace[]
+  otherToolsQueue: any[]
+  strReplacePhaseComplete: boolean
+  failures: any[]
+}
+
+const BENCHIFY_FILE_TYPES = ['tsx', 'ts', 'jsx', 'js']
+
+// Global Benchify client instance
+let benchifyClient: Benchify | null = null
+
+function getBenchifyClient(): Benchify | null {
+  if (!benchifyClient) {
+    let benchifyApiKey: string | undefined
+    try {
+      // Log available environment variables (partial for debugging)
+      const envKeys = Object.keys(process.env)
+        .filter((key) => key.includes('BENCHIFY') || key.includes('API'))
+        .slice(0, 10) // Limit to first 10 for safety
+
+      benchifyApiKey = env.BENCHIFY_API_KEY
+      logger.info(
+        {
+          hasApiKey: !!benchifyApiKey,
+          apiKeyLength: benchifyApiKey?.length || 0,
+          apiKeyPrefix: benchifyApiKey?.substring(0, 8) || 'none',
+          availableEnvKeys: envKeys,
+          nodeEnv: process.env.NODE_ENV,
+        },
+        'getBenchifyClient: Attempting to access BENCHIFY_API_KEY from environment',
+      )
+    } catch (error) {
+      logger.warn(
+        {
+          error: error instanceof Error ? error.message : String(error),
+          nodeEnv: process.env.NODE_ENV,
+        },
+        'getBenchifyClient: Failed to access BENCHIFY_API_KEY from environment',
+      )
+      return null
+    }
+
+    if (!benchifyApiKey) {
+      logger.warn(
+        'getBenchifyClient: No BENCHIFY_API_KEY found, returning null',
+      )
+      return null
+    }
+
+    try {
+      benchifyClient = new Benchify({
+        apiKey: benchifyApiKey,
+      })
+      logger.info('getBenchifyClient: Successfully created Benchify client')
+    } catch (error) {
+      logger.error(
+        {
+          error: error instanceof Error ? error.message : String(error),
+        },
+        'getBenchifyClient: Failed to create Benchify client',
+      )
+      return null
+    }
+  }
+  return benchifyClient
+}
+
+export async function executeBatchStrReplaces({
+  deferredStrReplaces,
+  toolCalls,
+  toolResults,
+  ws,
+  agentTemplate,
+  fileContext,
+  agentStepId,
+  clientSessionId,
+  userInputId,
+  fullResponse,
+  onResponseChunk,
+  state,
+  userId,
+}: {
+  deferredStrReplaces: DeferredStrReplace[]
+  toolCalls: (CodebuffToolCall | any)[]
+  toolResults: ToolResultPart[]
+  ws: WebSocket
+  agentTemplate: AgentTemplate
+  fileContext: ProjectFileContext
+  agentStepId: string
+  clientSessionId: string
+  userInputId: string
+  fullResponse: string
+  onResponseChunk: (chunk: string | PrintModeEvent) => void
+  state: Record<string, any>
+  userId: string | undefined
+}) {
+  logger.info(
+    {
+      deferredCount: deferredStrReplaces.length,
+      agentStepId,
+      userInputId,
+      userId,
+    },
+    'executeBatchStrReplaces: Starting batch execution',
+  )
+
+  if (deferredStrReplaces.length === 0) {
+    logger.info(
+      'executeBatchStrReplaces: No deferred str_replace operations, returning early',
+    )
+    return
+  }
+
+  const batchPromises: Promise<void>[] = []
+  let previousPromise = Promise.resolve()
+
+  // Track successfully edited files for benchify call
+  const editedFiles: { path: string; contents: string }[] = []
+  // Track intended changes from LLM for benchify call (even if str_replace fails)
+  const intendedChanges: { path: string; contents: string }[] = []
+  // Track original file contents before any modifications
+  const originalContents: Record<string, string> = {}
+
+  // Execute all str_replace calls in sequence to maintain file consistency
+  for (let i = 0; i < deferredStrReplaces.length; i++) {
+    const { toolCall } = deferredStrReplaces[i]
+
+    // Read original content before any modifications (only once per file)
+    const isFileEligibleForBenchify = benchifyCanFixLanguage(
+      toolCall.input.path,
+    )
+
+    if (!isFileEligibleForBenchify) {
+      logger.debug(
+        {
+          path: toolCall.input.path,
+          supportedExtensions: BENCHIFY_FILE_TYPES,
+          agentStepId,
+          userInputId,
+        },
+        'executeBatchStrReplaces: File not eligible for benchify (unsupported file type)',
+      )
+    }
+
+    if (isFileEligibleForBenchify && !originalContents[toolCall.input.path]) {
+      try {
+        const originalContent = await extractOriginalContent(
+          toolCall.input.path,
+          fileContext,
+        )
+        if (originalContent) {
+          originalContents[toolCall.input.path] = originalContent
+        }
+      } catch (error) {
+        logger.warn(
+          {
+            error: error instanceof Error ? error.message : String(error),
+            path: toolCall.input.path,
+          },
+          'Failed to read original content for benchify',
+        )
+      }
+    }
+
+    // Extract intended content from str_replace operation before attempting execution
+    if (
+      benchifyCanFixLanguage(toolCall.input.path) &&
+      originalContents[toolCall.input.path]
+    ) {
+      try {
+        const intendedContent = await extractIntendedContent(
+          toolCall,
+          originalContents[toolCall.input.path],
+        )
+        if (intendedContent) {
+          const existingIndex = intendedChanges.findIndex(
+            (f) => f.path === toolCall.input.path,
+          )
+          if (existingIndex >= 0) {
+            intendedChanges[existingIndex].contents = intendedContent
+          } else {
+            intendedChanges.push({
+              path: toolCall.input.path,
+              contents: intendedContent,
+            })
+          }
+          logger.debug(
+            {
+              path: toolCall.input.path,
+              intendedContentLength: intendedContent.length,
+              agentStepId,
+              userInputId,
+            },
+            'executeBatchStrReplaces: Successfully extracted intended content for benchify',
+          )
+        }
+      } catch (error) {
+        logger.warn(
+          {
+            error: error instanceof Error ? error.message : String(error),
+            path: toolCall.input.path,
+          },
+          'executeBatchStrReplaces: Failed to extract intended content for benchify',
+        )
+      }
+    } else {
+      logger.debug(
+        {
+          path: toolCall.input.path,
+          canFixLanguage: benchifyCanFixLanguage(toolCall.input.path),
+          hasOriginalContent: !!originalContents[toolCall.input.path],
+          agentStepId,
+          userInputId,
+        },
+        'executeBatchStrReplaces: Skipping intended content extraction (not benchify-compatible file or no original content)',
+      )
+    }
+
+    // Chain each str_replace to the previous one to ensure proper ordering
+    const strReplacePromise = previousPromise.then(async () => {
+      try {
+        const { result } = handleStrReplace({
+          previousToolCallFinished: Promise.resolve(),
+          toolCall,
+          requestClientToolCall: async () => {
+            throw new Error('Client tool calls not supported in batch mode')
+          },
+          writeToClient: onResponseChunk,
+          getLatestState: () => getFileProcessingValues(state),
+          state: { ...state, ws },
+        })
+
+        const toolResult = await result
+
+        if (toolResult) {
+          const toolResultPart: ToolResultPart = {
+            type: 'tool-result',
+            toolName: 'str_replace',
+            toolCallId: toolCall.toolCallId,
+            output: toolResult,
+          }
+
+          toolResults.push(toolResultPart)
+
+          onResponseChunk({
+            type: 'tool_result',
+            toolCallId: toolCall.toolCallId,
+            output: toolResult,
+          })
+
+          // Add to message history
+          state.messages.push({
+            role: 'tool' as const,
+            content: toolResultPart,
+          })
+
+          // Track successfully edited files
+          if (
+            Array.isArray(toolResult) &&
+            toolResult.length > 0 &&
+            benchifyCanFixLanguage(toolCall.input.path)
+          ) {
+            const result = toolResult[0]
+            if (
+              result.type === 'json' &&
+              result.value &&
+              'content' in result.value
+            ) {
+              const existingFileIndex = editedFiles.findIndex(
+                (f) => f.path === toolCall.input.path,
+              )
+              const fileContent = result.value.content as string
+
+              if (existingFileIndex >= 0) {
+                // Update existing file with latest content
+                editedFiles[existingFileIndex].contents = fileContent
+              } else {
+                // Add new file to tracking
+                editedFiles.push({
+                  path: toolCall.input.path,
+                  contents: fileContent,
+                })
+              }
+            }
+          }
+        }
+      } catch (error) {
+        logger.error(
+          {
+            error:
+              error instanceof Error
+                ? {
+                    message: error.message,
+                    stack: error.stack,
+                    name: error.name,
+                  }
+                : error,
+            toolCallId: toolCall.toolCallId,
+            toolCallInput: JSON.stringify(toolCall.input, null, 2),
+            agentStepId,
+            userInputId,
+          },
+          `Error executing batched str_replace ${i + 1}/${deferredStrReplaces.length}`,
+        )
+
+        // Create error result
+        const errorResult: ToolResultPart = {
+          type: 'tool-result',
+          toolName: 'str_replace',
+          toolCallId: toolCall.toolCallId,
+          output: [
+            {
+              type: 'json',
+              value: {
+                errorMessage: `Batched str_replace failed: ${error instanceof Error ? error.message : String(error)}`,
+              },
+            },
+          ],
+        }
+
+        toolResults.push(errorResult)
+        onResponseChunk({
+          type: 'tool_result',
+          toolCallId: toolCall.toolCallId,
+          output: errorResult.output,
+        })
+
+        // Add to message history even for errors
+        state.messages.push({
+          role: 'tool' as const,
+          content: errorResult,
+        })
+
+        logger.info(
+          {
+            toolCallId: toolCall.toolCallId,
+            path: toolCall.input.path,
+            errorMessage:
+              error instanceof Error ? error.message : String(error),
+            agentStepId,
+            userInputId,
+          },
+          'executeBatchStrReplaces: str_replace failed, but continuing with batch execution for benchify',
+        )
+      }
+    })
+
+    // Add to toolCalls array
+    toolCalls.push(toolCall)
+    batchPromises.push(strReplacePromise)
+    previousPromise = strReplacePromise
+  }
+
+  // Wait for all batched operations to complete
+  await Promise.all(batchPromises)
+
+  logger.info(
+    {
+      totalOperations: deferredStrReplaces.length,
+      successfulEdits: editedFiles.length,
+      intendedChangesCount: intendedChanges.length,
+      benchifyEligibleFiles: deferredStrReplaces.filter((d) =>
+        benchifyCanFixLanguage(d.toolCall.input.path),
+      ).length,
+      agentStepId,
+      userInputId,
+    },
+    'executeBatchStrReplaces: Batch operations completed, summary before benchify call',
+  )
+
+  // Call benchify with intended changes (even if str_replace operations failed)
+  logger.info(
+    {
+      intendedChangesCount: intendedChanges.length,
+      editedFilesCount: editedFiles.length,
+      intendedChangeFiles: intendedChanges.map((f) => f.path),
+      editedFilesList: editedFiles.map((f) => f.path),
+      agentStepId,
+      userInputId,
+    },
+    'executeBatchStrReplaces: Preparing to call benchify',
+  )
+
+  const client = getBenchifyClient()
+  if (!client) {
+    logger.warn(
+      'executeBatchStrReplaces: No benchify client available, skipping benchify call',
+    )
+    return
+  }
+
+  if (intendedChanges.length === 0) {
+    logger.warn(
+      'executeBatchStrReplaces: No intended changes for benchify, skipping benchify call',
+    )
+    return
+  }
+
+  try {
+    const benchifyResult = await callBenchify(intendedChanges, {
+      agentStepId,
+      clientSessionId,
+      userInputId,
+      userId,
+    })
+
+    if (benchifyResult && benchifyResult.length > 0) {
+      logger.info(
+        {
+          benchifyResultCount: benchifyResult.length,
+          resultFiles: benchifyResult.map((r) => r.path),
+          agentStepId,
+          userInputId,
+        },
+        `executeBatchStrReplaces: Benchify returned ${benchifyResult.length} results, applying them`,
+      )
+
+      // Apply benchify results back to files
+      await applyBenchifyResults(benchifyResult, {
+        ws,
+        onResponseChunk,
+        state: { ...state, originalContents },
+        toolResults,
+        toolCalls: deferredStrReplaces.map((d) => d.toolCall),
+        userInputId,
+      })
+    }
+  } catch (error) {
+    logger.error(
+      {
+        error: error instanceof Error ? error.message : String(error),
+        intendedChangeFiles: intendedChanges.map((f) => f.path),
+        agentStepId,
+        userInputId,
+      },
+      'executeBatchStrReplaces: Failed to call benchify with intended changes',
+    )
+  }
+}
+
+/**
+ * Calls benchify API with the list of edited files
+ */
+async function callBenchify(
+  editedFiles: { path: string; contents: string }[],
+  context: {
+    agentStepId: string
+    clientSessionId: string
+    userInputId: string
+    userId: string | undefined
+  },
+): Promise<{ path: string; contents: string }[] | null> {
+  logger.info(
+    {
+      editedFilesCount: editedFiles.length,
+      editedFilesList: editedFiles.map((f) => f.path),
+      totalContentLength: editedFiles.reduce(
+        (sum, f) => sum + f.contents.length,
+        0,
+      ),
+      ...context,
+    },
+    'callBenchify: Starting benchify API call',
+  )
+
+  const client = getBenchifyClient()
+  if (!client) {
+    logger.error('callBenchify: No benchify client available')
+    return null
+  }
+
+  try {
+    logger.info(
+      {
+        fixTypes: ['string_literals'],
+        ...context,
+      },
+      'callBenchify: Calling client.runFixer',
+    )
+
+    const response = await client.runFixer(editedFiles, {
+      fix_types: ['string_literals'],
+    })
+
+    logger.info(
+      {
+        responseReceived: !!response,
+        responseLength: response?.length || 0,
+        responseFiles: response?.map((r) => r.path) || [],
+        responseContentLengths: response?.map((r) => r.contents.length) || [],
+        ...context,
+      },
+      'callBenchify: Benchify runFixer API response received successfully',
+    )
+
+    return response
+  } catch (error) {
+    logger.error(
+      {
+        error:
+          error instanceof Error
+            ? {
+                message: error.message,
+                stack: error.stack,
+                name: error.name,
+              }
+            : String(error),
+        editedFilesCount: editedFiles.length,
+        ...context,
+      },
+      'callBenchify: Failed to call benchify API',
+    )
+    throw error
+  }
+}
+
+/**
+ * Applies benchify results back to the file system and updates tool results
+ */
+async function applyBenchifyResults(
+  benchifyFiles: { path: string; contents: string }[],
+  context: {
+    ws: WebSocket
+    onResponseChunk: (chunk: string | PrintModeEvent) => void
+    state: Record<string, any>
+    toolResults: ToolResultPart[]
+    toolCalls: CodebuffToolCall<'str_replace'>[]
+    userInputId: string
+  },
+) {
+  logger.info(
+    {
+      benchifyFilesCount: benchifyFiles.length,
+      benchifyFilesList: benchifyFiles.map((f) => f.path),
+      toolCallsCount: context.toolCalls.length,
+      userInputId: context.userInputId,
+    },
+    'applyBenchifyResults: Starting to apply benchify results',
+  )
+
+  for (const benchifyFile of benchifyFiles) {
+    logger.debug(
+      {
+        fileName: benchifyFile.path,
+        contentLength: benchifyFile.contents.length,
+        userInputId: context.userInputId,
+      },
+      'applyBenchifyResults: Processing benchify file',
+    )
+    try {
+      // Find the corresponding tool call for this file
+      const relatedToolCall = context.toolCalls.find(
+        (tc) => tc.input.path === benchifyFile.path,
+      )
+
+      if (!relatedToolCall) {
+        logger.warn(
+          {
+            fileName: benchifyFile.path,
+            availableToolCallPaths: context.toolCalls.map(
+              (tc) => tc.input.path,
+            ),
+            userInputId: context.userInputId,
+          },
+          'applyBenchifyResults: No matching tool call found for benchify result',
+        )
+        continue
+      }
+
+      logger.debug(
+        {
+          fileName: benchifyFile.path,
+          relatedToolCallId: relatedToolCall.toolCallId,
+          userInputId: context.userInputId,
+        },
+        'applyBenchifyResults: Found matching tool call for benchify result',
+      )
+
+      // Get the original file content from our stored contents
+      const originalContent =
+        context.state.originalContents?.[benchifyFile.path]
+
+      if (!originalContent) {
+        logger.error(
+          { path: benchifyFile.path },
+          'Could not find original file content for diff generation',
+        )
+        continue
+      }
+
+      // Generate a proper unified diff patch
+      const patch = createPatch(
+        benchifyFile.path,
+        originalContent,
+        benchifyFile.contents,
+        '',
+        '',
+      )
+
+      // Request the client to apply the benchify changes as a patch
+      const toolCallResult = await requestToolCall(
+        context.ws,
+        context.userInputId,
+        'str_replace',
+        {
+          type: 'patch',
+          path: benchifyFile.path,
+          content: patch,
+        },
+      )
+
+      // Create a tool result indicating benchify was applied
+      const benchifyToolResult: ToolResultPart = {
+        type: 'tool-result',
+        toolName: 'str_replace',
+        toolCallId: relatedToolCall.toolCallId,
+        output: toolCallResult.output,
+      }
+
+      // Update the existing tool result
+      const existingResultIndex = context.toolResults.findIndex(
+        (tr) => tr.toolCallId === relatedToolCall.toolCallId,
+      )
+
+      if (existingResultIndex >= 0) {
+        context.toolResults[existingResultIndex] = benchifyToolResult
+      } else {
+        context.toolResults.push(benchifyToolResult)
+      }
+
+      // Notify client about the benchify update
+      context.onResponseChunk({
+        type: 'tool_result',
+        toolCallId: relatedToolCall.toolCallId,
+        output: benchifyToolResult.output,
+      })
+    } catch (error) {
+      logger.error(
+        { error, fileName: benchifyFile.path },
+        'Failed to apply benchify result to file',
+      )
+    }
+  }
+}
+
+/**
+ * Extracts the original file content before any modifications
+ */
+async function extractOriginalContent(
+  filePath: string,
+  fileContext: ProjectFileContext,
+): Promise<string | null> {
+  try {
+    const absolutePath = `${fileContext.projectRoot}/${filePath}`
+    const currentFile = await file(absolutePath)
+    return await currentFile.text()
+  } catch (error) {
+    logger.warn(
+      {
+        error: error instanceof Error ? error.message : String(error),
+        path: filePath,
+      },
+      'Failed to read original file content',
+    )
+    return null
+  }
+}
+
+/**
+ * Extracts the intended file content by applying str_replace operations to the current file
+ */
+async function extractIntendedContent(
+  toolCall: CodebuffToolCall<'str_replace'>,
+  originalContent: string,
+): Promise<string | null> {
+  try {
+    let currentContent = originalContent
+
+    // Apply all replacements to get the intended content
+    for (const replacement of toolCall.input.replacements) {
+      const { old, new: newStr, allowMultiple } = replacement
+
+      if (allowMultiple) {
+        currentContent = currentContent.replaceAll(old, newStr)
+      } else {
+        // Find the first occurrence and replace it
+        const index = currentContent.indexOf(old)
+        if (index !== -1) {
+          currentContent =
+            currentContent.substring(0, index) +
+            newStr +
+            currentContent.substring(index + old.length)
+          logger.debug(
+            {
+              old: old.substring(0, 100) + (old.length > 100 ? '...' : ''),
+              new:
+                newStr.substring(0, 100) + (newStr.length > 100 ? '...' : ''),
+              path: toolCall.input.path,
+            },
+            'extractIntendedContent: Successfully applied replacement for benchify',
+          )
+        } else {
+          // If we can't find the old string, log it but continue with other replacements
+          logger.warn(
+            {
+              old: old.substring(0, 200) + (old.length > 200 ? '...' : ''),
+              new:
+                newStr.substring(0, 100) + (newStr.length > 100 ? '...' : ''),
+              allowMultiple,
+              path: toolCall.input.path,
+              contentLength: currentContent.length,
+            },
+            'extractIntendedContent: Failed to find old string in currentContent for benchify',
+          )
+        }
+      }
+    }
+
+    return currentContent
+  } catch (error) {
+    logger.warn(
+      {
+        error: error instanceof Error ? error.message : String(error),
+        path: toolCall.input.path,
+      },
+      'Failed to apply replacements for intended content extraction',
+    )
+    return null
+  }
+}
+
+function benchifyCanFixLanguage(path: string): boolean {
+  return BENCHIFY_FILE_TYPES.some((extension) => path.endsWith(`.${extension}`))
+}
diff --git a/backend/src/tools/handlers/tool/write-file.ts b/backend/src/tools/handlers/tool/write-file.ts
index 4b912a061..261cff72d 100644
--- a/backend/src/tools/handlers/tool/write-file.ts
+++ b/backend/src/tools/handlers/tool/write-file.ts
@@ -230,7 +230,7 @@ export async function postStreamProcessing<T extends FileProcessingTools>(
   if (errors.length > 0) {
     if (errors.length > 1) {
       throw new Error(
-        `Internal error: Unexpected number of matching errors for ${{ toolCall }}, found ${errors.length}, expected 1`,
+        `Internal error: Unexpected number of matching errors for ${JSON.stringify(toolCall)}, found ${errors.length}, expected 1`,
       )
     }
 
@@ -251,7 +251,7 @@ export async function postStreamProcessing<T extends FileProcessingTools>(
   )
   if (changes.length !== 1) {
     throw new Error(
-      `Internal error: Unexpected number of matching changes for ${{ toolCall }}, found ${changes.length}, expected 1`,
+      `Internal error: Unexpected number of matching changes for ${JSON.stringify(toolCall)}, found ${changes.length}, expected 1`,
     )
   }
 
diff --git a/backend/src/tools/stream-parser.ts b/backend/src/tools/stream-parser.ts
index 1b0c69b71..be820526a 100644
--- a/backend/src/tools/stream-parser.ts
+++ b/backend/src/tools/stream-parser.ts
@@ -8,9 +8,14 @@ import { buildArray } from '@codebuff/common/util/array'
 import { generateCompactId } from '@codebuff/common/util/string'
 
 import { expireMessages } from '../util/messages'
+import { logger } from '../util/logger'
 import { sendAction } from '../websockets/websocket-action'
 import { processStreamWithTags } from '../xml-stream-parser'
 import { executeCustomToolCall, executeToolCall } from './tool-executor'
+import {
+  executeBatchStrReplaces,
+  BatchStrReplaceState,
+} from './batch-str-replace'
 
 import type { CustomToolCall } from './tool-executor'
 import type { StreamChunk } from '../llm-apis/vercel-ai-sdk/ai-sdk'
@@ -32,7 +37,7 @@ export type ToolCallError = {
 } & Omit<ToolCallPart, 'type'>
 
 export async function processStreamWithTools(options: {
-  stream: AsyncGenerator<StreamChunk, string | null>
+  stream: AsyncGenerator<StreamChunk>
   ws: WebSocket
   agentStepId: string
   clientSessionId: string
@@ -74,6 +79,15 @@ export async function processStreamWithTools(options: {
   const { promise: streamDonePromise, resolve: resolveStreamDonePromise } =
     Promise.withResolvers<void>()
   let previousToolCallFinished = streamDonePromise
+
+  // Two-phase execution state
+  const batchState: BatchStrReplaceState = {
+    deferredStrReplaces: [],
+    otherToolsQueue: [],
+    strReplacePhaseComplete: false,
+    failures: [],
+  }
+
   const state: Record<string, any> = {
     ws,
     fingerprintId,
@@ -103,24 +117,95 @@ export async function processStreamWithTools(options: {
     return {
       onTagStart: () => {},
       onTagEnd: async (_: string, input: Record<string, string>) => {
-        // delegated to reusable helper
-        previousToolCallFinished = executeToolCall({
-          toolName,
-          input,
-          toolCalls,
-          toolResults,
-          previousToolCallFinished,
-          ws,
-          agentTemplate,
-          fileContext,
-          agentStepId,
-          clientSessionId,
-          userInputId,
-          fullResponse: fullResponseChunks.join(''),
-          onResponseChunk,
-          state,
-          userId,
-        })
+        // Two-phase execution: defer str_replace tools, queue others
+        if (toolName === 'str_replace' && !batchState.strReplacePhaseComplete) {
+          // Defer str_replace execution
+          const toolCallId = generateCompactId()
+          const toolCall: CodebuffToolCall<'str_replace'> = {
+            toolName: 'str_replace',
+            input: input as any,
+            toolCallId,
+          }
+
+          batchState.deferredStrReplaces.push({ toolCall })
+
+          logger.debug(
+            {
+              toolCallId,
+              filePath: input.path,
+              replacementsCount: input.replacements?.length || 0,
+              currentDeferredCount: batchState.deferredStrReplaces.length,
+              agentStepId,
+              userInputId,
+            },
+            'stream-parser: Deferring str_replace tool for batch execution',
+          )
+
+          // Still emit the tool call event
+          onResponseChunk({
+            type: 'tool_call',
+            toolCallId,
+            toolName,
+            input,
+          })
+        } else {
+          // First non-str_replace tool marks end of str_replace phase
+          if (
+            !batchState.strReplacePhaseComplete &&
+            batchState.deferredStrReplaces.length > 0
+          ) {
+            logger.info(
+              {
+                triggeringTool: toolName,
+                deferredCount: batchState.deferredStrReplaces.length,
+                agentStepId,
+                userInputId,
+              },
+              `toolCallback: Triggering batch str_replace execution (${batchState.deferredStrReplaces.length} deferred tools) due to ${toolName}`,
+            )
+
+            batchState.strReplacePhaseComplete = true
+
+            // Execute all deferred str_replace tools as a batch
+            previousToolCallFinished = previousToolCallFinished.then(
+              async () => {
+                await executeBatchStrReplaces({
+                  deferredStrReplaces: batchState.deferredStrReplaces,
+                  toolCalls,
+                  toolResults,
+                  ws,
+                  agentTemplate,
+                  fileContext,
+                  agentStepId,
+                  clientSessionId,
+                  userInputId,
+                  fullResponse: fullResponseChunks.join(''),
+                  onResponseChunk,
+                  state,
+                  userId,
+                })
+              },
+            )
+          }
+
+          previousToolCallFinished = executeToolCall({
+            toolName,
+            input,
+            toolCalls,
+            toolResults,
+            previousToolCallFinished,
+            ws,
+            agentTemplate,
+            fileContext,
+            agentStepId,
+            clientSessionId,
+            userInputId,
+            fullResponse: fullResponseChunks.join(''),
+            onResponseChunk,
+            state,
+            userId,
+          })
+        }
       },
     }
   }
@@ -176,14 +261,7 @@ export async function processStreamWithTools(options: {
   )
 
   let reasoning = false
-  let messageId: string | null = null
-  while (true) {
-    const { value: chunk, done } = await streamWithTags.next()
-    if (done) {
-      messageId = chunk
-      break
-    }
-
+  for await (const chunk of streamWithTags) {
     if (chunk.type === 'reasoning') {
       if (!reasoning) {
         reasoning = true
@@ -215,16 +293,63 @@ export async function processStreamWithTools(options: {
   ])
 
   resolveStreamDonePromise()
-  await previousToolCallFinished
 
-  console.log({
-    toolCalls,
-    toolResults,
-    state,
-    fullResponse: fullResponseChunks.join(''),
-    fullResponseChunks,
-    messageId,
-  })
+  // Handle case where only str_replace tools were generated and stream ended
+  if (
+    !batchState.strReplacePhaseComplete &&
+    batchState.deferredStrReplaces.length > 0
+  ) {
+    logger.info(
+      {
+        triggeringEvent: 'stream_end',
+        deferredCount: batchState.deferredStrReplaces.length,
+        deferredFiles: batchState.deferredStrReplaces.map(
+          (d) => d.toolCall.input.path,
+        ),
+        agentStepId,
+        userInputId,
+      },
+      `stream-parser: Triggering batch str_replace execution (${batchState.deferredStrReplaces.length} deferred tools) due to stream end`,
+    )
+
+    batchState.strReplacePhaseComplete = true
+
+    // Execute all deferred str_replace tools as a batch
+    previousToolCallFinished = previousToolCallFinished.then(async () => {
+      logger.info(
+        {
+          agentStepId,
+          userInputId,
+          deferredCount: batchState.deferredStrReplaces.length,
+        },
+        'stream-parser: About to call executeBatchStrReplaces from stream end handler',
+      )
+      await executeBatchStrReplaces({
+        deferredStrReplaces: batchState.deferredStrReplaces,
+        toolCalls,
+        toolResults,
+        ws,
+        agentTemplate,
+        fileContext,
+        agentStepId,
+        clientSessionId,
+        userInputId,
+        fullResponse: fullResponseChunks.join(''),
+        onResponseChunk,
+        state,
+        userId,
+      })
+      logger.info(
+        {
+          agentStepId,
+          userInputId,
+        },
+        'stream-parser: Completed executeBatchStrReplaces from stream end handler',
+      )
+    })
+  }
+
+  await previousToolCallFinished
 
   return {
     toolCalls,
@@ -232,6 +357,5 @@ export async function processStreamWithTools(options: {
     state,
     fullResponse: fullResponseChunks.join(''),
     fullResponseChunks,
-    messageId,
   }
 }
diff --git a/bun.lock b/bun.lock
index 933bdf12b..398f1b60a 100644
--- a/bun.lock
+++ b/bun.lock
@@ -49,6 +49,7 @@
         "@jitl/quickjs-wasmfile-release-sync": "0.31.0",
         "@openrouter/ai-sdk-provider": "1.1.2",
         "ai": "5.0.0",
+        "benchify": "^0.1.0-alpha.41",
         "cors": "^2.8.5",
         "diff": "5.2.0",
         "dotenv": "16.4.5",
@@ -236,7 +237,7 @@
     },
     "sdk": {
       "name": "@codebuff/sdk",
-      "version": "0.2.3",
+      "version": "0.2.4",
       "dependencies": {
         "@vscode/tree-sitter-wasm": "0.1.4",
         "ai": "^5.0.0",
@@ -1627,6 +1628,8 @@
 
     "basic-ftp": ["basic-ftp@5.0.5", "", {}, "sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg=="],
 
+    "benchify": ["benchify@0.1.0-alpha.41", "", { "dependencies": { "minimatch": "^9.0.3" }, "peerDependencies": { "react": ">=16.8.0" }, "optionalPeers": ["react"] }, "sha512-iZAH2JFcGld/lruJEZKO9dv7XAU8ozEznPtxNLQj+6s1CQMIohzRLnEbvCWLWkMoqSQlJOIp2gCY6N9gt956yQ=="],
+
     "bidi-js": ["bidi-js@1.0.3", "", { "dependencies": { "require-from-string": "^2.0.2" } }, "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw=="],
 
     "big.js": ["big.js@6.2.2", "", {}, "sha512-y/ie+Faknx7sZA5MfGA2xKlu0GDv8RWrXGsmlteyJQ2lvoKv9GBK/fpRMc2qlSoBAgNxrixICFCBefIq8WCQpQ=="],
@@ -4201,6 +4204,8 @@
 
     "babel-plugin-istanbul/istanbul-lib-instrument": ["istanbul-lib-instrument@5.2.1", "", { "dependencies": { "@babel/core": "^7.12.3", "@babel/parser": "^7.14.7", "@istanbuljs/schema": "^0.1.2", "istanbul-lib-coverage": "^3.2.0", "semver": "^6.3.0" } }, "sha512-pzqtp31nLv/XFOzXGuvhCb8qhjmTVo5vjVk19XE4CRlSWz0KoeJ3bw9XsA7nOp9YBf4qHjwBxkDzKcME/J29Yg=="],
 
+    "benchify/minimatch": ["minimatch@9.0.5", "", { "dependencies": { "brace-expansion": "^2.0.1" } }, "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow=="],
+
     "bl/buffer": ["buffer@5.7.1", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.1.13" } }, "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ=="],
 
     "bl/readable-stream": ["readable-stream@3.6.2", "", { "dependencies": { "inherits": "^2.0.3", "string_decoder": "^1.1.1", "util-deprecate": "^1.0.1" } }, "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA=="],
@@ -4939,6 +4944,8 @@
 
     "babel-plugin-istanbul/istanbul-lib-instrument/semver": ["semver@6.3.1", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA=="],
 
+    "benchify/minimatch/brace-expansion": ["brace-expansion@2.0.2", "", { "dependencies": { "balanced-match": "^1.0.0" } }, "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ=="],
+
     "body-parser/debug/ms": ["ms@2.0.0", "", {}, "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="],
 
     "chalk/ansi-styles/color-convert": ["color-convert@2.0.1", "", { "dependencies": { "color-name": "~1.1.4" } }, "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ=="],
diff --git a/npm-app/src/index.ts b/npm-app/src/index.ts
index 57fbcbe9e..57507e49b 100644
--- a/npm-app/src/index.ts
+++ b/npm-app/src/index.ts
@@ -52,6 +52,7 @@ async function codebuff({
   cwd,
   trace,
 }: CliOptions) {
+  console.log('🚀 Starting Codebuff!')
   enableSquashNewlines()
   const workingDir = getWorkingDirectory()
   const projectRoot = getProjectRoot()
diff --git a/npm-app/src/tool-handlers.ts b/npm-app/src/tool-handlers.ts
index 7e90d8126..cf8c947c8 100644
--- a/npm-app/src/tool-handlers.ts
+++ b/npm-app/src/tool-handlers.ts
@@ -67,16 +67,26 @@ export const handleUpdateFile = async <
     console.log(green(`- Created ${file} ${counts}`))
   }
   for (const file of modified) {
-    // Calculate added/deleted lines from the diff content
+    // Calculate added/deleted lines from the diff content, excluding metadata
     let addedLines = 0
     let deletedLines = 0
-    lines.forEach((line) => {
+
+    for (const line of lines) {
+      // Skip all diff metadata lines (headers, hunk headers, etc.)
+      if (
+        line.startsWith('---') ||
+        line.startsWith('+++') ||
+        line.startsWith('@@')
+      ) {
+        continue
+      }
+      // Count actual added/removed code lines
       if (line.startsWith('+')) {
         addedLines++
       } else if (line.startsWith('-')) {
         deletedLines++
       }
-    })
+    }
 
     const counts = `(${green(`+${addedLines}`)}, ${red(`-${deletedLines}`)})`
     result.push([
diff --git a/packages/internal/src/env.ts b/packages/internal/src/env.ts
index ecc451030..44db06004 100644
--- a/packages/internal/src/env.ts
+++ b/packages/internal/src/env.ts
@@ -10,6 +10,7 @@ const envSchema = {
   server: {
     // Backend variables
     CODEBUFF_API_KEY: z.string().optional(),
+    BENCHIFY_API_KEY: z.string().optional(),
     OPEN_ROUTER_API_KEY: z.string().min(1),
     RELACE_API_KEY: z.string().min(1),
     LINKUP_API_KEY: z.string().min(1),
@@ -51,6 +52,7 @@ const envSchema = {
   runtimeEnv: {
     // Backend variables
     CODEBUFF_API_KEY: process.env.CODEBUFF_API_KEY,
+    BENCHIFY_API_KEY: process.env.BENCHIFY_API_KEY,
     OPEN_ROUTER_API_KEY: process.env.OPEN_ROUTER_API_KEY,
     RELACE_API_KEY: process.env.RELACE_API_KEY,
     LINKUP_API_KEY: process.env.LINKUP_API_KEY,