Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ on:
branches:
- main

permissions:
actions: read
contents: read

# Concurrency: only one E2E job runs at a time
concurrency:
group: e2e-tests
Expand All @@ -50,6 +46,10 @@ jobs:
needs: matrix-setup
runs-on: ubuntu-latest
timeout-minutes: 40
permissions:
actions: read
contents: read
copilot-requests: write
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -95,7 +95,7 @@ jobs:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }}
FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
COPILOT_GITHUB_TOKEN: ${{ github.token }}
run: go run ./e2e/bootstrap

- name: Run E2E Tests
Expand All @@ -106,7 +106,7 @@ jobs:
E2E_CODEX_MODEL: ${{ matrix.agent == 'codex' && 'gpt-5.1-codex-mini' || '' }}
CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }}
FACTORY_API_KEY: ${{ secrets.FACTORY_API_KEY }}
COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
COPILOT_GITHUB_TOKEN: ${{ github.token }}
E2E_CONCURRENT_TEST_LIMIT: ${{ matrix.agent == 'gemini-cli' && '6' || matrix.agent == 'factoryai-droid' && '1' || '' }}
run: mise run test:e2e --agent ${{ matrix.agent }} ${{ matrix.agent == 'roger-roger' && 'TestExternalAgent' || '' }}

Expand Down
8 changes: 4 additions & 4 deletions e2e/agents/copilot-cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func (c *CopilotCLI) IsTransientError(out Output, err error) bool {
"ECONNRESET",
"ETIMEDOUT",
"Too Many Requests",
// gpt-4.1 sometimes calls Copilot's Edit tool without old_str,
// Copilot sometimes calls its Edit tool without old_str,
// resulting in zero code changes despite a successful exit.
"old_str is required",
} {
Expand All @@ -57,7 +57,7 @@ func (c *CopilotCLI) Bootstrap() error {
}

func (c *CopilotCLI) RunPrompt(ctx context.Context, dir string, prompt string, opts ...Option) (Output, error) {
cfg := &runConfig{Model: "gpt-4.1"}
cfg := &runConfig{Model: "claude-haiku-4.5"}
for _, o := range opts {
o(cfg)
}
Expand Down Expand Up @@ -103,7 +103,7 @@ func (c *CopilotCLI) RunPrompt(ctx context.Context, dir string, prompt string, o
ExitCode: exitCode,
}

// gpt-4.1 sometimes calls Copilot's Edit tool without required parameters,
// Copilot sometimes calls its Edit tool without required parameters,
// producing zero code changes despite exit 0. Surface this as an error so
// the transient-error retry mechanism can restart the scenario.
// Only trigger when copilot reports zero changes — it may retry internally.
Expand Down Expand Up @@ -297,7 +297,7 @@ func (c *CopilotCLI) StartSession(ctx context.Context, dir string) (Session, err
}
}
args := append([]string{"env"}, envArgs...)
args = append(args, bin, "--model", "gpt-4.1", "--allow-all")
args = append(args, bin, "--model", "claude-haiku-4.5", "--allow-all")

name := fmt.Sprintf("copilot-test-%d", time.Now().UnixNano())
// Strip CI env vars that may affect interactive mode.
Expand Down
Loading