diff --git a/cli/azd/extensions/azure.ai.agents/cspell.yaml b/cli/azd/extensions/azure.ai.agents/cspell.yaml index 4041a93cd5f..eb8b9cdf86c 100644 --- a/cli/azd/extensions/azure.ai.agents/cspell.yaml +++ b/cli/azd/extensions/azure.ai.agents/cspell.yaml @@ -41,6 +41,7 @@ words: - aoai - authorizationfailed - azdaiagent + - azuresdk - bicepless - CLIENTSECRET - curr @@ -89,5 +90,9 @@ words: - ttfb - Bhadauria - ensurepip + # Live E2E (Tier 2) Go driver + - creack + - elive + - testdir # Test infrastructure - recordproxy diff --git a/cli/azd/extensions/azure.ai.agents/go.mod b/cli/azd/extensions/azure.ai.agents/go.mod index b7a6b685f00..dc8e461f274 100644 --- a/cli/azd/extensions/azure.ai.agents/go.mod +++ b/cli/azd/extensions/azure.ai.agents/go.mod @@ -36,6 +36,9 @@ require ( require github.com/denormal/go-gitignore v0.0.0-20180930084346-ae8ad1d07817 require ( + github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2 + github.com/creack/pty v1.1.24 + github.com/hinshun/vt10x v0.0.0-20220119200601-820417d04eec go.opentelemetry.io/otel v1.43.0 go.opentelemetry.io/otel/trace v1.43.0 golang.org/x/term v0.44.0 diff --git a/cli/azd/extensions/azure.ai.agents/go.sum b/cli/azd/extensions/azure.ai.agents/go.sum index 396d8b1ea21..3a4cc258c9e 100644 --- a/cli/azd/extensions/azure.ai.agents/go.sum +++ b/cli/azd/extensions/azure.ai.agents/go.sum @@ -100,8 +100,9 @@ github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEX github.com/clipperhouse/uax29/v2 v2.5.0 h1:x7T0T4eTHDONxFJsL94uKNKPHrclyFI0lm7+w94cO8U= github.com/clipperhouse/uax29/v2 v2.5.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= -github.com/creack/pty v1.1.17 h1:QeVUsEDNrLBW4tMgZHvxy18sKtr6VI492kBhUfhDJNI= github.com/creack/pty v1.1.17/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= +github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s= +github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE= github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 h1:y5HC9v93H5EPKqaS1UYVg1uYah5Xf51mBfIoWehClUQ= github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964/go.mod h1:Xd9hchkHSWYkEqJwUGisez3G1QY8Ryz0sdWrLPMGjLk= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/cli/azd/extensions/azure.ai.agents/tests/e2e-live/README.md b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/README.md new file mode 100644 index 00000000000..e085e26c10c --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/README.md @@ -0,0 +1,147 @@ +# azure.ai.agents — Live E2E (Tier 2) + +Full golden-path tests that exercise the real `azd ai agent` CLI against **live +Azure** resources: + +``` +init → provision → deploy → invoke → down +``` + +A Go test driver answers the interactive `azd ai agent init` prompts through a +**pseudo-terminal** — [go-expect] sends keystrokes and [vt10x] renders the CLI's +terminal UI so the test can assert on the on-screen text, with [creack/pty] +providing the PTY. Synchronization is **event-driven**: the driver blocks on +go-expect reads until the survey UI stops emitting — i.e. a prompt is fully +drawn and waiting for input — instead of sleeping a fixed interval, then +dispatches on the rendered prompt text. The deploy mode is chosen up front via +`azd ai agent init --deploy-mode code|container` (it is not an interactive +prompt once a manifest is supplied). The non-interactive phases (`provision`, +`deploy`, `invoke`, `down`) shell out to `azd ... --no-prompt`. Both deploy +modes are covered: + +| Mode | What it does | +| ----------- | ------------------------------------------------------- | +| `code` | Source-code (zip) deploy of the agent service | +| `container` | Container (ACR build) deploy of the agent service | + +The two modes run **sequentially** (same subscription → avoids resource races). + +[go-expect]: https://github.com/Netflix/go-expect +[vt10x]: https://github.com/hinshun/vt10x +[creack/pty]: https://github.com/creack/pty + +## How the `init` driver answers prompts + +The interactive sub-flows (Foundry project selection, model/deployment) branch +on live runtime state, so the exact set and order of prompts is not fixed ahead +of time. Rather than a linear expect script, the driver runs a **dispatch +loop**: it waits for output to settle, reads the rendered screen, matches the +active `?` prompt against the verbatim strings the extension prints — each case +in `dispatchPrompt` is annotated with the source `file:line` it mirrors — and +sends the answer. A loop detector bounds any prompt that fails to advance so a +wording change upstream fails fast instead of hanging. + +Because the prompt strings are calibrated against the extension source, changes +there can require updating `dispatchPrompt`. And because a real PTY, Azure auth, +and the installed extension are all required, the **end-to-end interactive +correctness is only exercised by a live Tier 2 run** — it cannot be reproduced +by the platform-agnostic unit tests in this package. + +## Where this fits + +| Tier | Coverage | Where it runs | +| ---- | ----------------------------------------- | ------------------------------------------------------ | +| 0 | Offline CLI validation (no auth) | PR gate — `.github/workflows/lint-ext-azure-ai-agents.yml` | +| 1 | `init` variants (recording/playback) | PR gate — same workflow | +| 2 | **Full live golden path** (this folder) | **`eng/pipelines/ext-azure-ai-agents-live.yml`** | + +Live Azure access is deliberately kept **out** of the automatic PR pipeline (Azure +SDK EngSys / SFI guidance). Tier 2 runs only on demand or on a schedule. + +## Running in CI + +Pipeline: `eng/pipelines/ext-azure-ai-agents-live.yml` (ADO). The Tier 2 step +builds `azd` + the extension and runs `go test -run TestTier2Live` inside an +`AzureCLI@2` task (so the federated az session stays valid for the whole run). + +- **On demand (per PR):** comment `/azp run ext-azure-ai-agents-live` on the PR. + Requires write permission on the repo. +- **Scheduled:** weekly, Monday 07:00 UTC against `main`. +- **Manual:** queue the pipeline and pick `deployModes` = `both` / `code` / + `container`. + +Logs for each run are published as the `tier2-live-logs-` artifact. + +### One-time admin setup + +1. **Register the pipeline** in Azure DevOps pointing at + `eng/pipelines/ext-azure-ai-agents-live.yml`, named `ext-azure-ai-agents-live` + (the name used by `/azp run`). +2. **Service connection** — the `serviceConnection` parameter (default + `azure-sdk-tests`) must map to the shared **TME test subscription** via OIDC / + workload-identity federation. The federated identity needs enough RBAC to + create Foundry projects and deploy models (Contributor + Azure AI Developer + + Cognitive Services Contributor, or equivalent). +3. **GitHub auth** — clones of the starter template use the azure-sdk org secret + `azuresdk-github-pat` (already provided by the Azure SDK ADO project) to avoid + anonymous rate limits, so no extra secret setup is required. + +## Running locally (Linux / WSL) + +The live driver is tagged `//go:build linux` — it relies on a real PTY and a +controlling terminal (the platform CI runs on). On Windows, run it under WSL. + +Prerequisites: Linux (including WSL), a Go toolchain matching `go.mod` +(`GOTOOLCHAIN=auto` fetches the right version automatically), `azd` (>= 1.25.5) +with the `azure.ai.agents` extension installed, and `az` logged in. + +Run from the extension root (`cli/azd/extensions/azure.ai.agents`): + +```bash +# Use azd's built-in auth locally (NOT az CLI auth — it is slow under WSL). +azd config unset auth.useAzCliAuth +azd auth login + +# Both modes (sequential): +AZURE_AI_AGENTS_E2E_LIVE=1 E2E_DEPLOY_MODES=both \ + go test -run TestTier2Live -count=1 -timeout 130m -v ./tests/e2e-live/ + +# A single golden path: +AZURE_AI_AGENTS_E2E_LIVE=1 E2E_DEPLOY_MODES=code \ + go test -run TestTier2Live -count=1 -timeout 90m -v ./tests/e2e-live/ +``` + +Without `AZURE_AI_AGENTS_E2E_LIVE=1` the test is **skipped**, so the package is +safe to include in a normal `go test ./...`. + +### Useful environment variables + +| Variable | Default | Purpose | +| -------------------------- | ------------------------------ | ----------------------------------------------------------- | +| `AZURE_AI_AGENTS_E2E_LIVE` | — | **Required** `=1` gate; unset → the test is skipped | +| `E2E_DEPLOY_MODES` | `both` | `both` / `code` / `container` | +| `E2E_CREATE_PROJECT` | `false` | `true` → always create a fresh Foundry project | +| `E2E_PROJECT` | — | Name of an existing Foundry project to select instead | +| `E2E_LOCATION` | `eastus2` | Region for new projects (needs model quota) | +| `E2E_SUBSCRIPTION` | — | Subscription id (filters the picker) | +| `E2E_TENANT` | — | AAD tenant id (sets `AZURE_TENANT_ID` for azd) | +| `E2E_USE_AZ_CLI_AUTH` | — | `true` → set `auth.useAzCliAuth` (CI; auto-on under ADO/GHA) | +| `E2E_TESTDIR` | `/tmp/e2e-tests/tier2-` | Scratch dir for the scaffolded project | +| `E2E_KEEP_ARTIFACTS` | — | `true` → keep the per-run `AZD_CONFIG_DIR` copy for debugging | +| `GH_TOKEN` | — | GitHub token for template clone (optional) | + +In CI the driver auto-detects GitHub Actions (`GITHUB_ACTIONS`) and Azure DevOps +(`TF_BUILD`) and switches to `az` CLI auth automatically. Azure resources are +always torn down (`azd down --force --purge`) via `t.Cleanup`, even on failure. + +## Files + +| File | Purpose | +| -------------------- | -------------------------------------------------------------------------------- | +| `tier2_live_test.go` | `TestTier2Live` — drives init/provision/deploy/invoke/down per mode (Linux-only) | +| `console_test.go` | PTY + vt10x console helper that renders the interactive CLI (Linux-only) | +| `assert.go` | Pure-logic answer matcher (`responseHasExpectedAnswer`) — builds on any platform | +| `assert_test.go` | Unit tests for the matcher — run anywhere via `go test ./tests/e2e-live/` | + +Each phase has bounded timeouts and best-effort `azd down --force --purge` +teardown so a crash mid-run does not leak billable resources. diff --git a/cli/azd/extensions/azure.ai.agents/tests/e2e-live/assert.go b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/assert.go new file mode 100644 index 00000000000..6026116653b --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/assert.go @@ -0,0 +1,100 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// Package e2elive contains the Tier 2 live golden-path end-to-end test for the +// azure.ai.agents extension: init -> provision -> deploy -> invoke -> down, +// driven against real Azure resources. See README.md for setup and how to run. +package e2elive + +import ( + "regexp" + "unicode" +) + +// spelledFourRe matches the spelled-out word "four" as a standalone word +// (case-insensitive), e.g. "the answer is four". +var spelledFourRe = regexp.MustCompile(`(?i)\bfour\b`) + +// agentLineRe matches the start of an agent reply line, which invoke prints as +// "[] " (invoke.go printf "[%s] %s"). responseEndRe matches the +// green footer invoke prints after the reply, "Server responded in ..." — the +// region between them is exactly the model's answer, with no surrounding noise. +var ( + agentLineRe = regexp.MustCompile(`(?m)^\[[^\]]+\] `) + responseEndRe = regexp.MustCompile(`Server responded in`) +) + +// agentResponseRegion returns just the agent's printed answer, sliced from the +// first "[] " line to the "Server responded in" footer. Scoping the +// 2+2 check to this region keeps stray "4"s from the rest of the CLI output +// (model names, versions, status codes) from passing the test. If either marker +// is missing the format changed, so it returns the full text and lets the +// standalone-digit rules below guard against false positives. +func agentResponseRegion(out string) string { + start := agentLineRe.FindStringIndex(out) + if start == nil { + return out + } + rest := out[start[0]:] + if end := responseEndRe.FindStringIndex(rest); end != nil { + return rest[:end[0]] + } + return out +} + +// responseHasExpectedAnswer reports whether text answers "what is 2+2?" with a +// standalone "4" or the spelled-out word "four". +// +// A live model may answer either, and the captured CLI output also contains +// unrelated digits — model names ("gpt-4o-mini"), versions ("4.1"), or status +// codes ("404") — so a bare substring search would produce false positives. +// The "4" must therefore stand alone: not part of a larger word or number. +// The standalone-"4" rule is the lookaround (?" form is treated as a version/decimal token to +// keep "4.1"-style strings out, and a live model answering "2+2" replies "4" or +// "four", never "4.0". +// +// Go's regexp engine (RE2) has no lookahead/lookbehind, so the standalone-"4" +// rule is implemented by scanning runes instead of with a single expression. +func responseHasExpectedAnswer(text string) bool { + if spelledFourRe.MatchString(text) { + return true + } + return hasStandaloneFour(text) +} + +// hasStandaloneFour reports whether text contains a "4" digit that stands alone, +// reproducing the lookaround in the Python regex (? 0 { + if prev := runes[i-1]; prev == '.' || isWordRune(prev) { + continue + } + } + if i+2 < len(runes) && runes[i+1] == '.' && unicode.IsDigit(runes[i+2]) { + continue + } + if i+1 < len(runes) && isWordRune(runes[i+1]) { + continue + } + return true + } + return false +} + +// isWordRune reports whether r is a word character, matching the Python regex +// \w class (Unicode letters, digits, and underscore). +func isWordRune(r rune) bool { + return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) +} diff --git a/cli/azd/extensions/azure.ai.agents/tests/e2e-live/assert_test.go b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/assert_test.go new file mode 100644 index 00000000000..d81b7ba309a --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/assert_test.go @@ -0,0 +1,87 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package e2elive + +import "testing" + +func TestResponseHasExpectedAnswer(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + text string + want bool + }{ + {"plain four digit", "The answer is 4.", true}, + {"bare four", "4", true}, + {"equation", "2+2=4", true}, + {"spelled word", "It is four.", true}, + {"spelled upper", "FOUR", true}, + {"parenthesized", "(4)", true}, + {"trailing period mid-sentence", "the value 4. is final", true}, + {"model name", "gpt-4o-mini", false}, + {"version", "4.1", false}, + {"decimal four", "4.0", false}, // intentional: see responseHasExpectedAnswer doc + {"status code", "404", false}, + {"price", "$40", false}, + {"ratio", "24/7", false}, + {"fourteen", "fourteen apples", false}, + {"no answer", "I am not sure", false}, + {"empty", "", false}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + if got := responseHasExpectedAnswer(tc.text); got != tc.want { + t.Errorf("responseHasExpectedAnswer(%q) = %v, want %v", tc.text, got, tc.want) + } + }) + } +} + +func TestAgentResponseRegion(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + out string + want bool // responseHasExpectedAnswer over the sliced region + }{ + { + "answer scoped between markers", + "using model gpt-4o-mini\n[agent] The answer is 4.\nServer responded in 2s (first byte: 1s)\n", + true, + }, + { + "stray digits outside region rejected", + "gpt-4o-mini deployed (404 cached)\n[agent] I am not sure.\nServer responded in 4.0s\n", + false, + }, + { + "standalone 4 before agent line excluded by region", + "completed step 4\n[agent] I don't know.\nServer responded in 1s\n", + false, + }, + { + "missing footer falls back to full text", + "using gpt-4o-mini\n[agent] four", + true, + }, + { + "no agent line falls back to full text", + "the answer is four", + true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + if got := responseHasExpectedAnswer(agentResponseRegion(tc.out)); got != tc.want { + t.Errorf("region(%q) -> %v, want %v", tc.out, got, tc.want) + } + }) + } +} diff --git a/cli/azd/extensions/azure.ai.agents/tests/e2e-live/console_test.go b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/console_test.go new file mode 100644 index 00000000000..2a0153b571a --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/console_test.go @@ -0,0 +1,204 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +//go:build linux + +package e2elive + +import ( + "fmt" + "os" + "strings" + "sync" + "time" + + expect "github.com/Netflix/go-expect" + "github.com/creack/pty" + "github.com/hinshun/vt10x" +) + +// Key sequences sent to the interactive CLI over the pseudo-terminal. +const ( + keyEnter = "\r" + keyDown = "\x1b[B" + keyUp = "\x1b[A" +) + +// tailBytes caps the rolling raw-output buffer kept for failure diagnostics +// (the interactive init screen is otherwise not echoed to the test log). +const tailBytes = 16 << 10 + +// console drives an interactive child process through a pseudo-terminal and +// renders its output with a vt10x virtual terminal so tests can both block on +// expected output (go-expect) and assert on the on-screen text (the role tmux +// capture-pane played in the old driver). +// +// Wiring (mirrors AlecAivazis/survey's posix expect tests): +// +// child stdio ── ec.Tty() (pts) ─┐ +// ├─ go-expect tees child output ─► vt10x screen + tail +// vt10x query replies ─► extSlave ┘ ▲ +// extMaster ─ go-expect feeds back to child stdin +// +// go-expect creates its own internal pty for the child (ec.Tty()). The external +// pty pair (extMaster/extSlave) exists solely so vt10x can answer terminal +// queries (e.g. cursor-position reports) back to the child; it is closed via +// WithCloser when the console is closed. +type console struct { + term vt10x.Terminal + ec *expect.Console + tail *ringBuffer +} + +// newConsole creates a console with a virtual terminal of the given size. +func newConsole(cols, rows int) (*console, error) { + extMaster, extSlave, err := pty.Open() + if err != nil { + return nil, fmt.Errorf("open feedback pty: %w", err) + } + + term := vt10x.New(vt10x.WithWriter(extSlave), vt10x.WithSize(cols, rows)) + tail := newRingBuffer(tailBytes) + + // go-expect tees everything it reads to these writers, so every read driven + // by expect()/waitForQuiet() simultaneously renders the screen (term) and + // records the raw bytes (tail) for diagnostics. No WithDefaultTimeout: each + // read's deadline is supplied per call via expect.WithTimeout. + ec, err := expect.NewConsole( + expect.WithStdin(extMaster), + expect.WithStdout(term, tail), + expect.WithCloser(extMaster, extSlave), + ) + if err != nil { + _ = extMaster.Close() + _ = extSlave.Close() + return nil, fmt.Errorf("create expect console: %w", err) + } + + // Match the child tty size to the virtual terminal so line wrapping in the + // rendered screen matches what the CLI actually drew. + //nolint:gosec // cols/rows are small fixed test dimensions; no overflow. + _ = pty.Setsize(ec.Tty(), &pty.Winsize{Cols: uint16(cols), Rows: uint16(rows)}) + + return &console{term: term, ec: ec, tail: tail}, nil +} + +// tty returns the slave pseudo-terminal the child process should attach its +// stdin/stdout/stderr to. +func (c *console) tty() *os.File { + return c.ec.Tty() +} + +// send writes raw bytes (keystrokes) to the child's tty. +func (c *console) send(s string) { + _, _ = c.ec.Send(s) +} + +// expect reads child output (teeing it to the screen and the tail buffer) until +// one of opts matches, idle elapses with no new byte, or the child's tty +// closes. It is the event-driven synchronization primitive that replaces the +// old fixed-interval polling: go-expect only renders output to the screen while +// a read is in flight, so every wait routes through here. +// +// Return contract (go-expect's passthrough pipe, see passthrough_pipe.go): +// - a match => (buf, nil) +// - idle of silence => (buf, err) with os.IsTimeout(err) == true +// - child exit / pts close => (buf, err) with a non-timeout error +func (c *console) expect(idle time.Duration, opts ...expect.ExpectOpt) (string, error) { + return c.ec.Expect(append(opts, expect.WithTimeout(idle))...) +} + +// waitForQuiet renders pending output to the screen until the UI stops emitting +// for quiet (a survey prompt fully drawn and now blocking on input) or the +// child exits. It returns exited=true once the child's tty has closed. +// +// It passes no matchers, so go-expect can only return on the idle read deadline +// (os.IsTimeout) or on a terminal read error (EOF / pts closed == child gone). +func (c *console) waitForQuiet(quiet time.Duration) (exited bool) { + _, err := c.expect(quiet) + return err != nil && !os.IsTimeout(err) +} + +// screen returns the current rendered virtual-terminal contents, cleaned of NUL +// padding and trailing whitespace on each line. +func (c *console) screen() string { + return cleanScreen(c.term.String()) +} + +// tailString returns the most recent raw child output captured for diagnostics. +func (c *console) tailString() string { + return c.tail.String() +} + +// close tears down the console and all of its pseudo-terminals. +func (c *console) close() { + _ = c.ec.Close() +} + +// ringBuffer is an io.Writer that retains only the last max bytes written, used +// to keep a bounded tail of raw child output for failure diagnostics. +type ringBuffer struct { + mu sync.Mutex + buf []byte + max int +} + +func newRingBuffer(max int) *ringBuffer { + return &ringBuffer{max: max} +} + +func (r *ringBuffer) Write(p []byte) (int, error) { + r.mu.Lock() + defer r.mu.Unlock() + r.buf = append(r.buf, p...) + if len(r.buf) > r.max { + r.buf = r.buf[len(r.buf)-r.max:] + } + return len(p), nil +} + +func (r *ringBuffer) String() string { + r.mu.Lock() + defer r.mu.Unlock() + return string(r.buf) +} + +// cleanScreen normalizes a vt10x screen dump: empty cells render as NUL, which +// is replaced with spaces, then trailing whitespace is trimmed from each row. +func cleanScreen(s string) string { + s = strings.ReplaceAll(s, "\x00", " ") + lines := strings.Split(s, "\n") + for i, l := range lines { + lines[i] = strings.TrimRight(l, " \t") + } + return strings.Join(lines, "\n") +} + +// nonEmptyLines returns the screen's non-blank lines, trimmed. +func nonEmptyLines(screen string) []string { + var out []string + for l := range strings.SplitSeq(screen, "\n") { + if t := strings.TrimSpace(l); t != "" { + out = append(out, t) + } + } + return out +} + +// activePrompt returns the lowercased text of the last survey "?" prompt line on +// screen, or "" if none is visible. The last "?" line is the one survey is +// currently blocking on (earlier "?" lines are answered prompts it echoed). +func activePrompt(screen string) string { + lines := nonEmptyLines(screen) + for i := len(lines) - 1; i >= 0; i-- { + if strings.HasPrefix(lines[i], "?") { + return strings.ToLower(lines[i]) + } + } + return "" +} + +// screenContains reports whether screen contains sub (case-insensitive). +func screenContains(screen, sub string) bool { + return strings.Contains(strings.ToLower(screen), strings.ToLower(sub)) +} diff --git a/cli/azd/extensions/azure.ai.agents/tests/e2e-live/tier2_live_test.go b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/tier2_live_test.go new file mode 100644 index 00000000000..ad963a1bbe8 --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/tier2_live_test.go @@ -0,0 +1,954 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +//go:build linux + +package e2elive + +import ( + "bytes" + "context" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "io" + "io/fs" + "os" + "os/exec" + "path/filepath" + "strings" + "syscall" + "testing" + "time" + + "gopkg.in/yaml.v3" +) + +// liveEnvVar gates the live test: it only runs when set to "1". This keeps the +// expensive, Azure-touching test out of the normal `go test ./...` run. +const liveEnvVar = "AZURE_AI_AGENTS_E2E_LIVE" + +// Virtual terminal dimensions for the interactive init phase. +const ( + initCols = 200 + initRows = 50 +) + +// Phase time budgets. The per-mode runTimeout must exceed the sum of the phase +// budgets so a slow-but-healthy run is never preempted (which would also skip +// the teardown and leak resources). Two modes at runTimeout each fit inside the +// `go test -timeout 125m` cap used by the pipeline, whose ADO step adds a small +// margin on top before force-killing the process. +const ( + runTimeout = 60 * time.Minute + initTimeout = 8 * time.Minute + provisionTimeout = 10 * time.Minute + deployTimeout = 10 * time.Minute + invokeTimeout = 3 * time.Minute + monitorTimeout = 60 * time.Second + teardownTimeout = 10 * time.Minute + tagTimeout = 2 * time.Minute + + // deleteAfterRetention is how far ahead the DeleteAfter cleanup tag is set + // on the provisioned resource group. It must exceed a full run so a healthy + // in-flight test is never reclaimed, with margin to inspect a failed run + // before the EngSys garbage collector deletes it. + deleteAfterRetention = 48 * time.Hour + + // Event-driven tuning for the interactive init loop. promptQuiet is how long + // the survey UI must stop emitting before we treat the current prompt as + // "drawn and waiting for input"; listSettle is the shorter pause we let a + // filtered Select list redraw after typing before confirming with Enter. + // Both replace the old fixed 3s poll; the hard init cap is the ctx deadline. + promptQuiet = 800 * time.Millisecond + listSettle = 600 * time.Millisecond +) + +// TestTier2Live exercises the full golden path against live Azure for each +// requested deploy mode, sequentially (concurrent deploys in one subscription +// race on shared resources and exhaust model quota). +func TestTier2Live(t *testing.T) { + if os.Getenv(liveEnvVar) != "1" { + t.Skipf("set %s=1 to run the live Tier 2 golden-path test", liveEnvVar) + } + + for _, mode := range deployModesFromEnv() { + t.Run(mode, func(t *testing.T) { + r := newRunner(t, mode) + ctx, cancel := context.WithTimeout(t.Context(), runTimeout) + defer cancel() + r.run(ctx) + }) + } +} + +// deployModesFromEnv reads E2E_DEPLOY_MODES (code|container|both); default both. +func deployModesFromEnv() []string { + switch strings.ToLower(strings.TrimSpace(os.Getenv("E2E_DEPLOY_MODES"))) { + case "code": + return []string{"code"} + case "container": + return []string{"container"} + default: + return []string{"code", "container"} + } +} + +// runner holds the per-mode state for one golden-path run. +type runner struct { + t *testing.T + mode string + testDir string + agentName string + env []string + projectDir string + c *console +} + +// newRunner prepares an isolated working directory, a private AZD_CONFIG_DIR +// (copied from ~/.azd so the installed extension is available), and a unique +// agent name, then registers teardown so resources are cleaned up even on +// failure. +func newRunner(t *testing.T, mode string) *runner { + t.Helper() + + testDir := getenvDefault("E2E_TESTDIR", "/tmp/e2e-tests/tier2-"+mode) + if err := assertSafeTestDir(testDir); err != nil { + t.Fatal(err) + } + if err := os.RemoveAll(testDir); err != nil { + t.Fatalf("clean test dir: %v", err) + } + if err := os.MkdirAll(testDir, 0o700); err != nil { + t.Fatalf("create test dir: %v", err) + } + + configDir := filepath.Join(os.TempDir(), "e2e-azd-config-"+mode) + setupConfigDir(t, configDir) + + env := os.Environ() + env = append(env, "AZD_CONFIG_DIR="+configDir) + if tenant := os.Getenv("E2E_TENANT"); tenant != "" { + env = append(env, "AZURE_TENANT_ID="+tenant) + } + if tok := ghToken(); tok != "" { + env = append(env, "GH_TOKEN="+tok, "GITHUB_TOKEN="+tok) + } + + r := &runner{ + t: t, + mode: mode, + testDir: testDir, + agentName: fmt.Sprintf("e2e-%s-%s", mode, shortHash(mode)), + env: env, + } + + // Cleanups run LIFO, so register the config-dir delete first and teardown + // second: teardown (azd down) runs before the config copy it relies on is + // removed. + if !envTrue("E2E_KEEP_ARTIFACTS") { + t.Cleanup(func() { _ = os.RemoveAll(configDir) }) + } + t.Cleanup(r.teardown) + + // CI (GitHub Actions / Azure DevOps / explicit override) uses the az CLI + // session for auth; local WSL uses azd's slower-to-avoid built-in auth. + if useAzCliAuth() { + _, _ = r.runAzd(t.Context(), testDir, time.Minute, + "config", "set", "auth.useAzCliAuth", "true") + } + + return r +} + +// setupConfigDir creates configDir as a copy of ~/.azd (so installed extensions +// resolve), or an empty dir if ~/.azd is absent. cp -a preserves the extension +// binary's executable bit. +func setupConfigDir(t *testing.T, configDir string) { + t.Helper() + + home, err := os.UserHomeDir() + if err != nil { + t.Fatalf("resolve home dir: %v", err) + } + defaultAzd := filepath.Join(home, ".azd") + if info, err := os.Stat(defaultAzd); err == nil && info.IsDir() { + _ = os.RemoveAll(configDir) + //nolint:gosec // both paths derive from HOME / TempDir, not user input. + out, err := exec.Command("cp", "-a", defaultAzd, configDir).CombinedOutput() + if err != nil { + t.Fatalf("copy azd config dir: %v: %s", err, out) + } + return + } + if err := os.MkdirAll(configDir, 0o700); err != nil { + t.Fatalf("create azd config dir: %v", err) + } +} + +// run executes the phases in order, stopping at the first failure. Teardown is +// registered separately as a cleanup, so it always runs. +func (r *runner) run(ctx context.Context) { + if err := r.phaseInit(ctx); err != nil { + r.t.Errorf("init: %v", err) + return + } + if err := r.phaseProvision(ctx); err != nil { + r.t.Errorf("provision: %v", err) + return + } + if err := r.phaseDeploy(ctx); err != nil { + r.t.Errorf("deploy: %v", err) + return + } + if err := r.phaseInvoke(ctx); err != nil { + r.t.Errorf("invoke: %v", err) + return + } +} + +// phaseInit runs `azd ai agent init` attached to a pseudo-terminal and drives +// its interactive prompts until the project is scaffolded on disk. +func (r *runner) phaseInit(ctx context.Context) error { + c, err := newConsole(initCols, initRows) + if err != nil { + return err + } + defer c.close() + r.c = c + + ictx, cancel := context.WithTimeout(ctx, initTimeout) + defer cancel() + + // Deploy mode is NOT an interactive prompt in the template/--agent-name + // flow: promptDeployMode (init_from_code.go) auto-resolves it to "container" + // when a manifest is provided, so it must be chosen via newInitCommand's + // --deploy-mode flag (init.go). r.mode is exactly "container" or "code". + args := []string{"ai", "agent", "init", "--agent-name", r.agentName, "--deploy-mode", r.mode} + //nolint:gosec // azd is a trusted fixed binary; args are test-controlled. + cmd := exec.CommandContext(ictx, "azd", args...) + cmd.Dir = r.testDir + cmd.Env = r.env + cmd.Stdin = c.tty() + cmd.Stdout = c.tty() + cmd.Stderr = c.tty() + // Give the child the pts as its controlling terminal (as tmux did), so + // survey treats it as a real interactive terminal. + cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true, Setctty: true} + + if err := cmd.Start(); err != nil { + return fmt.Errorf("start azd ai agent init: %w", err) + } + + // No separate render goroutine: go-expect's passthrough pipe drains the + // child's pty in the background, and driveInit's expect()/waitForQuiet() + // calls do the reading that renders the screen. (A concurrent reader would + // race those calls for the same stream.) + exited := make(chan struct{}) + go func() { + _ = cmd.Wait() + close(exited) + }() + + driveErr := r.driveInit(ictx, exited) + + // Make sure the child is gone before returning (it normally exits itself). + select { + case <-exited: + case <-time.After(10 * time.Second): + _ = cmd.Process.Kill() + <-exited + } + + return driveErr +} + +// driveInit is the event-driven prompt loop: it waits (via go-expect) for the +// survey UI to settle on a prompt, reads the rendered screen, and answers it, +// until init reports completion (or the process exits, or it times out). +// +// Why a screen-dispatch loop and not a linear ExpectString script: the live +// model/deployment and Foundry-project sub-flows branch on runtime state — +// whether the just-created project already has the model deployed, region/model +// availability, existing-name collisions — so the exact set and order of +// prompts cannot be predetermined. A linear ExpectString sequence would desync +// at the first conditional prompt. Instead we block on output settling (the +// go-expect read), then dispatch on the verbatim prompt strings the extension +// prints (each case annotated with the source function that prints it). +func (r *runner) driveInit(ctx context.Context, exited <-chan struct{}) error { + var lastKey string + repeat := 0 + + for { + select { + case <-ctx.Done(): + return fmt.Errorf("init timed out: %w\n--- tail ---\n%s", + ctx.Err(), tail(r.c.tailString(), 2000)) + case <-exited: + return r.finishInit(ctx) + default: + } + + // Block until the UI stops emitting (prompt fully drawn, awaiting input) + // or the child exits. Replaces the old fixed-interval poll. + if r.c.waitForQuiet(promptQuiet) { + return r.finishInit(ctx) + } + + screen := r.c.screen() + if isInitComplete(screen) { + return r.finishInit(ctx) + } + + prompt := activePrompt(screen) + if prompt == "" { + continue // spinner / transient output, not a survey prompt yet + } + r.t.Logf("prompt: %s", truncate(prompt, 100)) + + // Loop detection: compare the question text before ':' so varying filter + // text on the same prompt doesn't reset the counter. + key := promptKey(prompt) + if key == lastKey { + repeat++ + } else { + repeat, lastKey = 1, key + } + if repeat >= 3 { + if strings.Contains(prompt, "model") || strings.Contains(prompt, "is specified") { + r.t.Log("loop detected on model prompt; trying next option") + r.c.send(keyDown) + r.c.waitForQuiet(listSettle) + r.c.send(keyEnter) + continue + } + if repeat >= 5 { + return fmt.Errorf("init stuck in prompt loop: %q\n--- screen ---\n%s", key, screen) + } + } + + r.dispatchPrompt(screen, prompt) + } +} + +// finishInit confirms init produced the expected artifacts on disk, allowing a +// brief grace for files to flush after the completion marker or process exit. +func (r *runner) finishInit(ctx context.Context) error { + if r.validateInitOutput() { + return nil + } + _ = sleepCtx(ctx, 5*time.Second) + if r.validateInitOutput() { + return nil + } + return fmt.Errorf( + "init finished but expected artifacts are missing on disk\n--- tail ---\n%s", + tail(r.c.tailString(), 2000), + ) +} + +// isInitComplete reports whether the success marker is on screen. Source: +// runInitFromManifest (init.go) prints "AI agent definition added to your azd +// project successfully!" in green at the end. +func isInitComplete(screen string) bool { + return screenContains(screen, "added to your azd project") || + screenContains(screen, "agent definition added") +} + +// promptKey reduces a prompt line to its stable question text (before the first +// ':') for loop detection. +func promptKey(prompt string) string { + if i := strings.Index(prompt, ":"); i > 0 { + return strings.TrimSpace(prompt[:i]) + } + return prompt +} + +// dispatchPrompt answers a single survey prompt. Cases are ordered specific → +// generic and keyed on the verbatim messages the extension prints; the function +// in each comment points at the source string this matches. The prompt argument +// is already lowercased (see activePrompt). +// +// Only a subset of these fire on the --agent-name template critical path +// (language, template, Foundry project, subscription, location, the manifest +// model, deployment name, capacity/sku/version). The rest are kept as defensive +// handlers because init auto-resolves them under userProvidedManifest=true (so +// they normally do NOT prompt) or only surfaces them for specific runtime state. +func (r *runner) dispatchPrompt(screen, prompt string) { + has := func(sub string) bool { return strings.Contains(prompt, sub) } + + switch { + // Yes/No confirms. "Continue with this existing agent name?" + // (resolveExistingAgentNameConflictWithChecker) only fires when the unique + // name already exists; decline it to reach the fresh-name input. Any other + // confirm: accept. + case has("[y/n]") || has("(y/n)") || has("continue with this existing agent name"): + if has("continue with this existing agent name") { + r.c.send("n") + } else { + r.c.send("y") + } + r.c.send(keyEnter) + + // Language select — "Select a language" (promptAgentTemplate). + case has("select a language"): + r.selectByText("Python") + + // Template select — "Select a starter template" / "Select an agent template" + // (promptAgentTemplate). + case has("starter template") || has("agent template"): + r.selectByText("Basic agent (Invocations") + + // Foundry project hosting — "Select a Foundry project to host your agent..." + // (runInitFromManifest); choices "Use an existing..." / "Create a new...". + case has("foundry project to host"): + if r.createProject() { + r.selectByText("Create a new Foundry project") + } else { + r.selectByText("Use an existing Foundry project") + } + + // Existing-project picker — "Select a Foundry project" + // (selectFoundryProject); only when reusing a project. + case has("select a foundry project"): + if p := os.Getenv("E2E_PROJECT"); p != "" { + r.selectByText(p) + } else { + r.enter() + } + + // Subscription — the extension prints a descriptive preamble via fmt.Println + // (runInitFromManifest), but that line isn't the survey "?" line activePrompt + // reads. ensureSubscription passes an empty request, so the picker shows + // azd-core's default message "Select subscription" (promptSubscriptionMessage) + // — match that, not the preamble. + case has("select subscription"): + if sub := os.Getenv("E2E_SUBSCRIPTION"); sub != "" { + r.selectByText(sub[:min(8, len(sub))]) + } else { + r.enter() + } + + // Location — preamble "Select an Azure location..." (ensureLocation) + + // azd-core picker. + case has("location") || has("region"): + r.selectByText(getenvDefault("E2E_LOCATION", "eastus2")) + + // Manifest model decision — "Model '%s' is specified in the agent manifest." + // (getModelDetails); keep the manifest model (default first choice). + case has("is specified in the agent manifest"): + r.enter() + + // Existing deployments / generic proceed — getModelDeploymentDetails. + case has("how would you like to proceed") || has("existing deployment"): + r.enter() + + // Model deployment name input — getModelDeploymentDetails (default = model name). + case has("model deployment name") || (has("deployment name") && has("model")): + r.enter() + + // Model select — "Select a model" (promptForAlternativeModel etc.). + case has("select a model"): + r.selectByText("gpt-4o-mini") + + // Deployment version / SKU / capacity — azd-core's PromptAiDeployment renders + // these exact picker messages; accept defaults. Match the full message rather + // than the bare keyword so a future prompt merely containing + // "version"/"sku"/"capacity" can't match by accident (it would fall through to + // the logged default instead). + case has("select a version for") || has("select a sku for") || + has("enter deployment capacity for"): + r.enter() + + // Code-deploy prompts (promptCodeConfig). Auto-resolved under + // userProvidedManifest=true, so kept as defensive handlers only. + case has("select the runtime for your agent"): + r.enter() // default Python 3.13 + case has("entry point"): + r.enter() // accept detected default + case has("how should dependencies be resolved"): + r.enter() // default remote build + + // Optional infra (blank => create new): ACR login server + // (configureAcrConnection), App Insights (configureAppInsightsConnection). + case has("acr login server") || has("container registry"): + r.enter() + case has("application insights"): + r.enter() + + // Startup command (resolveStartupCommandForInit); blank => skip. + case has("command to start your agent"): + r.enter() + + // Replacement agent name after declining the existing-name confirm + // (promptForReplacementAgentName) / the name input (resolveInitAgentName); + // accept the default. + case has("enter a different name for your agent") || has("enter a name for your agent"): + r.enter() + + default: + // No specific case matched: send Enter as a safe default, but log the + // fall-through so CI can distinguish "matched and answered correctly" + // from "hit the catch-all" when a new or changed prompt appears. + r.t.Logf("unhandled prompt (default Enter): %s", truncate(prompt, 100)) + r.enter() + } +} + +// phaseProvision finds the scaffolded project and runs `azd provision`. +func (r *runner) phaseProvision(ctx context.Context) error { + dir := r.findProjectDir() + if dir == "" { + return errors.New("no project directory with azure.yaml found") + } + r.projectDir = dir + r.t.Logf("project dir: %s", dir) + + _, code := r.runAzd(ctx, dir, provisionTimeout, "provision", "--no-prompt") + if code != 0 { + return fmt.Errorf("azd provision failed (exit %d)", code) + } + + // Stamp the resource group with a DeleteAfter cleanup tag as soon as it + // exists. The post-run `azd down` teardown is the primary cleanup, but it is + // unreliable in CI (the agent can exhaust its post-timeout budget, crash + // mid-delete, or lose its network connection); the tag lets the EngSys + // garbage collector reclaim the group regardless. Best-effort: never fails. + r.tagResourceGroupForCleanup(ctx) + return nil +} + +// phaseDeploy runs `azd deploy`. +func (r *runner) phaseDeploy(ctx context.Context) error { + _, code := r.runAzd(ctx, r.projectDir, deployTimeout, "deploy", "--no-prompt") + if code != 0 { + return fmt.Errorf("azd deploy failed (exit %d)", code) + } + return nil +} + +// phaseInvoke calls the deployed agent and verifies it answers "2+2" with 4. +func (r *runner) phaseInvoke(ctx context.Context) error { + wait := 30 * time.Second + if r.mode == "container" { + wait = 60 * time.Second + } + r.t.Logf("waiting %s for agent startup (%s mode)", wait, r.mode) + if err := sleepCtx(ctx, wait); err != nil { + return err + } + + svc := r.findServiceName() + if svc == "" { + return errors.New("could not determine service name from azure.yaml") + } + r.t.Logf("service name: %s", svc) + + // The invocations protocol requires a JSON body via --input-file. + payload := filepath.Join(r.testDir, ".invoke-payload.json") + if err := os.WriteFile(payload, []byte(`{"message": "Hello, what is 2+2?"}`), 0o600); err != nil { + return fmt.Errorf("write invoke payload: %w", err) + } + + const maxRetries = 3 + for attempt := 1; attempt <= maxRetries; attempt++ { + r.t.Logf("invoke attempt %d/%d", attempt, maxRetries) + out, code := r.runAzd(ctx, r.projectDir, invokeTimeout, + "ai", "agent", "invoke", svc, "--new-session", "-f", payload) + + if code != 0 { + if attempt == maxRetries { + logs, _ := r.runAzd(ctx, r.projectDir, monitorTimeout, + "ai", "agent", "monitor", svc, "--tail", "50") + r.t.Logf("agent logs (tail):\n%s", tail(logs, 4000)) + return fmt.Errorf("azd invoke failed (exit %d)", code) + } + delay := 15 * time.Second + if strings.Contains(out, "500") || + strings.Contains(strings.ToLower(out), "internal server error") { + delay = 30 * time.Second // container may still be starting + } + r.t.Logf("invoke failed (exit %d); retrying in %s", code, delay) + if err := sleepCtx(ctx, delay); err != nil { + return err + } + continue + } + + if !responseHasExpectedAnswer(agentResponseRegion(out)) { + if attempt < maxRetries { + r.t.Log("response missing expected '4'/'four'; retrying") + if err := sleepCtx(ctx, 15*time.Second); err != nil { + return err + } + continue + } + return fmt.Errorf("invoke response missing expected '4'/'four': %s", truncate(out, 200)) + } + + r.t.Log("invoke succeeded; response contains the expected answer") + return nil + } + return errors.New("invoke failed after all retries") +} + +// teardown runs `azd down` so a run never leaves billable resources behind. It +// uses a fresh context because the per-run deadline may already have fired. +func (r *runner) teardown() { + if r.projectDir == "" { + r.projectDir = r.findProjectDir() + } + if r.projectDir == "" { + return + } + r.t.Log("teardown: azd down --force --purge") + _, code := r.runAzd(context.Background(), r.projectDir, teardownTimeout, + "down", "--force", "--purge", "--no-prompt") + if code != 0 { + r.t.Errorf("azd down failed (exit %d) — Azure resources may be leaked", code) + } +} + +// runAzd runs an azd command in dir with a timeout, streaming combined output to +// the test log and returning it along with the exit code. +func (r *runner) runAzd(ctx context.Context, dir string, timeout time.Duration, args ...string) (string, int) { + cctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + //nolint:gosec // azd is a trusted fixed binary; args are test-controlled. + cmd := exec.CommandContext(cctx, "azd", args...) + cmd.Dir = dir + cmd.Env = r.env + + var buf bytes.Buffer + lw := &lineLogger{t: r.t} + cmd.Stdout = io.MultiWriter(&buf, lw) + // Same writer value as Stdout => os/exec uses one pipe and one copier + // goroutine, so there is no concurrent write to buf/lw. + cmd.Stderr = cmd.Stdout + + err := cmd.Run() + lw.flush() + return buf.String(), exitCode(err) +} + +// tagResourceGroupForCleanup best-effort stamps a DeleteAfter tag on the +// provisioned resource group so the EngSys garbage collector can find and +// delete it even when the explicit `azd down` teardown never runs. Failures are +// logged and ignored: the tag is a safety net layered on top of teardown, not a +// gate on the test. See the EngSys resource-management spec for the tag format. +func (r *runner) tagResourceGroupForCleanup(ctx context.Context) { + vals := r.azdEnvValues(ctx) + rg := vals["AZURE_RESOURCE_GROUP"] + if rg == "" { + r.t.Log("skip DeleteAfter tag: AZURE_RESOURCE_GROUP not found in azd env") + return + } + // EngSys expects an RFC 3339 / ISO 8601 UTC instant; the group is reclaimed + // once that time has passed. `--set tags.DeleteAfter=` adds just this one + // tag, leaving azd's own tags (e.g. azd-env-name) intact. + deleteAfter := time.Now().UTC().Add(deleteAfterRetention).Format(time.RFC3339) + args := []string{"group", "update", "--name", rg, + "--set", "tags.DeleteAfter=" + deleteAfter, "--output", "none"} + if sub := vals["AZURE_SUBSCRIPTION_ID"]; sub != "" { + args = append(args, "--subscription", sub) + } + if out, code := r.runQuiet(ctx, r.projectDir, tagTimeout, "az", args...); code != 0 { + r.t.Logf("warning: could not tag resource group %q with DeleteAfter (exit %d): %s", + rg, code, truncate(strings.TrimSpace(out), 200)) + return + } + r.t.Logf("tagged resource group %q with DeleteAfter=%s", rg, deleteAfter) +} + +// azdEnvValues returns the project's azd environment as a key→value map. Output +// is captured quietly (never streamed to the test log) because azd env values +// can include provisioning secrets. A failure yields an empty map. +func (r *runner) azdEnvValues(ctx context.Context) map[string]string { + out, code := r.runQuiet(ctx, r.projectDir, tagTimeout, "azd", "env", "get-values") + vals := map[string]string{} + if code != 0 { + r.t.Logf("warning: azd env get-values failed (exit %d)", code) + return vals + } + // Lines are KEY="value"; Cut on the first '=' so values containing '=' are + // preserved, then strip the surrounding quotes azd always emits. + for line := range strings.SplitSeq(out, "\n") { + key, val, ok := strings.Cut(strings.TrimSpace(line), "=") + if !ok { + continue + } + vals[strings.TrimSpace(key)] = strings.Trim(strings.TrimSpace(val), `"`) + } + return vals +} + +// runQuiet runs name+args in dir with a timeout and returns combined output and +// exit code WITHOUT streaming to the test log. Used for commands whose output +// may carry secrets (`azd env get-values`) or is pure side effect (`az group +// update`). +func (r *runner) runQuiet( + ctx context.Context, dir string, timeout time.Duration, name string, args ...string, +) (string, int) { + cctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + //nolint:gosec // name and args are fixed, test-controlled values. + cmd := exec.CommandContext(cctx, name, args...) + cmd.Dir = dir + cmd.Env = r.env + out, err := cmd.CombinedOutput() + return string(out), exitCode(err) +} + +// selectByText filters a survey list by typing target, waits (event-driven) for +// the filtered list to stop redrawing, then confirms with Enter. This assumes +// the survey / azd-core Select supports type-to-filter; that behavior is only +// verifiable against a live run (documented in README). waitForQuiet's exited +// result is intentionally ignored: a child that exited mid-select makes the +// trailing Enter a harmless no-op on the closed pty. +func (r *runner) selectByText(target string) { + r.c.send(target) + r.c.waitForQuiet(listSettle) + r.c.send(keyEnter) +} + +// enter accepts a prompt's default by pressing Enter. +func (r *runner) enter() { + r.c.send(keyEnter) +} + +// createProject reports whether the run should create a fresh Foundry project. +func (r *runner) createProject() bool { + return envTrue("E2E_CREATE_PROJECT") +} + +// findProjectDir returns the first immediate subdirectory of testDir that +// contains an azure.yaml (the project scaffolded by init), or "". +func (r *runner) findProjectDir() string { + entries, err := os.ReadDir(r.testDir) + if err != nil { + return "" + } + for _, e := range entries { + if !e.IsDir() { + continue + } + dir := filepath.Join(r.testDir, e.Name()) + if _, err := os.Stat(filepath.Join(dir, "azure.yaml")); err == nil { + return dir + } + } + return "" +} + +// findServiceName reads the service name from the project's azure.yaml. azd +// scaffolds exactly one service, so the sole key under services: is the name. +func (r *runner) findServiceName() string { + dir := r.projectDir + if dir == "" { + dir = r.findProjectDir() + } + if dir == "" { + return "" + } + //nolint:gosec // azure.yaml path is under the test-controlled testDir. + data, err := os.ReadFile(filepath.Join(dir, "azure.yaml")) + if err != nil { + return "" + } + // A struct unmarshal is more robust than scanning lines: it tolerates + // comments and indentation changes that a naive parser would mishandle. + var proj struct { + Services map[string]any `yaml:"services"` + } + if err := yaml.Unmarshal(data, &proj); err != nil || len(proj.Services) == 0 { + return "" + } + for name := range proj.Services { + return name + } + return "" +} + +// validateInitOutput confirms init produced an agent project on disk: a project +// dir whose azure.yaml targets the agent host and a nested agent.yaml. +func (r *runner) validateInitOutput() bool { + entries, err := os.ReadDir(r.testDir) + if err != nil { + return false + } + for _, e := range entries { + if !e.IsDir() { + continue + } + subdir := filepath.Join(r.testDir, e.Name()) + //nolint:gosec // azure.yaml path is under the test-controlled testDir. + data, err := os.ReadFile(filepath.Join(subdir, "azure.yaml")) + if err != nil { + continue + } + content := string(data) + if strings.Contains(content, "host:") && strings.Contains(content, "azure.ai.agent") && + hasAgentYAML(subdir) { + return true + } + } + return false +} + +// hasAgentYAML reports whether an agent.yaml exists anywhere under root. +func hasAgentYAML(root string) bool { + found := false + _ = filepath.WalkDir(root, func(_ string, d fs.DirEntry, err error) error { + if err != nil { + return nil + } + if !d.IsDir() && d.Name() == "agent.yaml" { + found = true + return filepath.SkipAll + } + return nil + }) + return found +} + +// lineLogger forwards a stream to t.Log one line at a time so long-running azd +// output is visible live in the CI log. +type lineLogger struct { + t *testing.T + buf []byte +} + +func (l *lineLogger) Write(p []byte) (int, error) { + l.buf = append(l.buf, p...) + for { + i := bytes.IndexByte(l.buf, '\n') + if i < 0 { + break + } + l.t.Log(strings.TrimRight(string(l.buf[:i]), "\r")) + l.buf = l.buf[i+1:] + } + return len(p), nil +} + +func (l *lineLogger) flush() { + if len(l.buf) > 0 { + l.t.Log(strings.TrimRight(string(l.buf), "\r")) + l.buf = nil + } +} + +// exitCode extracts a process exit code from an exec error (-1 if it never ran). +func exitCode(err error) int { + if err == nil { + return 0 + } + if ee, ok := errors.AsType[*exec.ExitError](err); ok { + return ee.ExitCode() + } + return -1 +} + +// ghToken resolves a GitHub token from the environment, falling back to `gh`. +func ghToken() string { + for _, k := range []string{"GITHUB_TOKEN", "GH_TOKEN"} { + if v := os.Getenv(k); v != "" { + return v + } + } + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + //nolint:gosec // gh is a trusted fixed binary; no user input in args. + out, err := exec.CommandContext(ctx, "gh", "auth", "token").Output() + if err != nil { + return "" + } + return strings.TrimSpace(string(out)) +} + +// shortHash returns a short, non-cryptographic uniqueness suffix for the agent +// name (sha256 only to avoid noise from security scanners). +func shortHash(mode string) string { + sum := sha256.Sum256(fmt.Appendf(nil, "%s-%d", mode, os.Getpid())) + return hex.EncodeToString(sum[:])[:6] +} + +// assertSafeTestDir refuses a path that is not clearly a disposable test dir, so +// a bad E2E_TESTDIR (e.g. "/", "/tmp", "$HOME") can never trigger a destructive +// delete. +func assertSafeTestDir(path string) error { + abs, err := filepath.Abs(path) + if err != nil { + return fmt.Errorf("resolve test dir: %w", err) + } + abs = filepath.Clean(abs) + protected := map[string]bool{ + "/": true, "/tmp": true, "/var": true, "/usr": true, "/etc": true, + "/bin": true, "/lib": true, "/root": true, "/home": true, + } + if home, err := os.UserHomeDir(); err == nil && home != "" { + protected[filepath.Clean(home)] = true + } + if protected[abs] || strings.Count(abs, "/") < 2 { + return fmt.Errorf("refusing to delete unsafe test dir %q (resolved %q)", path, abs) + } + return nil +} + +// useAzCliAuth reports whether to use the az CLI session for azd auth (CI), as +// opposed to azd's built-in auth (local WSL). +func useAzCliAuth() bool { + return envTrue("E2E_USE_AZ_CLI_AUTH") || + os.Getenv("GITHUB_ACTIONS") != "" || + os.Getenv("TF_BUILD") != "" +} + +// getenvDefault returns the env var value, or def if unset/empty. +func getenvDefault(key, def string) string { + if v := os.Getenv(key); v != "" { + return v + } + return def +} + +// envTrue reports whether an env var is set to a truthy value. +func envTrue(key string) bool { + switch strings.ToLower(strings.TrimSpace(os.Getenv(key))) { + case "1", "true", "yes": + return true + default: + return false + } +} + +// sleepCtx sleeps for d unless ctx is cancelled first, returning ctx.Err() then. +func sleepCtx(ctx context.Context, d time.Duration) error { + timer := time.NewTimer(d) + defer timer.Stop() + select { + case <-ctx.Done(): + return ctx.Err() + case <-timer.C: + return nil + } +} + +// truncate trims s and caps it to n characters with an ellipsis. +func truncate(s string, n int) string { + s = strings.TrimSpace(s) + if len(s) <= n { + return s + } + return s[:n] + "..." +} + +// tail returns the last n bytes of s with a leading ellipsis when truncated. +func tail(s string, n int) string { + if len(s) <= n { + return s + } + return "..." + s[len(s)-n:] +} diff --git a/eng/pipelines/ext-azure-ai-agents-live.yml b/eng/pipelines/ext-azure-ai-agents-live.yml new file mode 100644 index 00000000000..05b4b1713ae --- /dev/null +++ b/eng/pipelines/ext-azure-ai-agents-live.yml @@ -0,0 +1,239 @@ +# Live E2E: azure.ai.agents extension — Tier 2 golden path +# +# Runs the full agent lifecycle (init -> provision -> deploy -> invoke -> down) +# against LIVE Azure resources. The interactive `azd ai agent init` prompts are +# driven by the Go pseudo-terminal test driver (go-expect + vt10x); the other +# phases shell out to azd with --no-prompt. See tests/e2e-live/README.md. +# +# This pipeline is the live counterpart to the PR-gate checks in +# `.github/workflows/lint-ext-azure-ai-agents.yml` (Tier 0 offline + Tier 1 +# recording/playback). Live Azure access is intentionally kept OUT of the +# automatic PR pipeline (per Azure SDK EngSys / SFI guidance) and runs here only: +# - On demand via the PR comment: /azp run ext-azure-ai-agents-live +# (requires write permission on the repo) +# - On the weekly schedule below. +# +# Required ADO setup (one-time, admin) — see tests/e2e-live/README.md: +# - Register this YAML as a pipeline named `ext-azure-ai-agents-live`. +# - Service connection (parameter `serviceConnection`, default `azure-sdk-tests`) +# must map to the shared TME test subscription with Contributor + the RBAC +# needed to create Foundry projects and deploy models. +# - GitHub clones of the starter template authenticate with the azure-sdk org +# secret `azuresdk-github-pat` (already provided by the Azure SDK ADO +# project) to avoid anonymous rate limits — no extra secret setup required. + +trigger: none +pr: none + +schedules: + # 7am UTC Monday (offset from other weekly E2E pipelines to reduce contention). + - cron: "0 7 * * 1" + displayName: Weekly live golden-path E2E + branches: + include: + - main + always: true + +parameters: + - name: deployModes + displayName: "Tier 2 deploy modes" + type: string + default: both + values: + - both + - code + - container + - name: serviceConnection + displayName: "Azure service connection (TME subscription)" + type: string + default: azure-sdk-tests + +extends: + template: /eng/pipelines/templates/stages/1es-redirect.yml + parameters: + stages: + - stage: AiAgentsLiveE2E + displayName: AI Agents Live Golden Path + variables: + - template: /eng/pipelines/templates/variables/image.yml + jobs: + - job: Tier2 + displayName: Tier 2 — init/provision/deploy/invoke/down + pool: + name: $(LINUXPOOL) + image: $(LINUXVMIMAGE) + os: linux + # Two golden paths (code + container) run sequentially (~13-15 min + # each in the typical case), plus build/provision overhead. The cap + # is sized for the worst case so an ungraceful job timeout never + # preempts the in-test teardown: 2x the per-mode 60 min runTimeout + # (tier2_live_test.go) + per-run cleanup + build/setup steps. + timeoutInMinutes: 150 + steps: + - checkout: self + + - template: /eng/pipelines/templates/steps/setup-go.yml + + # Live build — NO `-tags=record`, so the CLI/extension talk to real + # Azure instead of the recording proxy used by the PR-gate tests. + - bash: go build -o azd . + workingDirectory: cli/azd + displayName: Build azd + + - bash: go build -o azure-ai-agents . + workingDirectory: cli/azd/extensions/azure.ai.agents + displayName: Build azure.ai.agents extension + + - bash: echo "##vso[task.prependpath]$(Build.SourcesDirectory)/cli/azd" + displayName: Add azd to PATH + + # Install the freshly built (live, non-record) extension into the + # azd config dir: copy the binary where azd expects it and write a + # config.json so `azd ai agent` resolves the extension. The config + # is generated FROM extension.yaml via yq so the manifest fields + # (capabilities, namespace, usage, ...) can never drift from a + # hand-maintained copy here; only test-specific fields (path, + # source, sentinel version) are injected. + - bash: | + set -euo pipefail + # Map the agent architecture to azd's expected binary suffix so + # this keeps working if the pool ever moves off linux/amd64. + # Use backticks (not $(...)) for the command substitution so + # Azure DevOps cannot mistake it for a $(macro) variable; assign + # once and reference the plain shell var ($ARCH) thereafter. + ARCH=`uname -m` + case "$ARCH" in + x86_64|amd64) GOARCH=amd64 ;; + aarch64|arm64) GOARCH=arm64 ;; + *) echo "Unsupported architecture: $ARCH" >&2; exit 1 ;; + esac + BIN_NAME="azure-ai-agents-linux-${GOARCH}" + EXT_DIR="$HOME/.azd/extensions/azure.ai.agents" + mkdir -p "$EXT_DIR" + cp cli/azd/extensions/azure.ai.agents/azure-ai-agents "$EXT_DIR/$BIN_NAME" + chmod +x "$EXT_DIR/$BIN_NAME" + # yq ships on the azure-sdk Linux images; install the pinned + # version as a fallback if a future image drops it. + command -v yq >/dev/null 2>&1 || go install github.com/mikefarah/yq/v4@v4.44.3 + export BIN_NAME + yq -o=json ' + .id as $id | { + "extension": {"installed": {$id: { + "id": .id, + "namespace": .namespace, + "capabilities": .capabilities, + "displayName": .displayName, + "description": .description, + "version": "0.0.0-test", + "usage": .usage, + "path": "extensions/azure.ai.agents/" + env(BIN_NAME), + "source": "azd" + }}} + }' cli/azd/extensions/azure.ai.agents/extension.yaml > "$HOME/.azd/config.json" + displayName: Install azure.ai.agents extension + + # Run the live golden path INSIDE the AzureCLI@2 task so the az CLI + # session (consumed by azd via auth.useAzCliAuth) stays valid for the + # whole run. keepAzSessionActive is REQUIRED: the service connection + # uses Workload Identity Federation and the test runs well past the + # ~10 min default token lifetime. A separate login step would NOT + # work — AzureCLI@2 isolates AZURE_CONFIG_DIR to a task-temp dir, so + # the session does not persist to later plain bash steps. + - task: AzureCLI@2 + displayName: Run Tier 2 live golden path + # Holds BOTH deploy modes run sequentially. `go test -timeout` + # (below) self-caps at 125 min — under this 130 min step budget — + # so the test process exits before ADO force-kills the step, and + # the per-mode 60 min runTimeout drives the graceful azd teardown. + timeoutInMinutes: 130 + inputs: + azureSubscription: ${{ parameters.serviceConnection }} + keepAzSessionActive: true + visibleAzLogin: false + scriptType: bash + scriptLocation: inlineScript + workingDirectory: cli/azd/extensions/azure.ai.agents + inlineScript: | + set -euo pipefail + azd config set auth.useAzCliAuth true + # Assign first (not `export X=$(...)`, which hides command + # substitution failures from set -e), then verify non-empty. + E2E_SUBSCRIPTION="$(az account show --query id -o tsv)" + E2E_TENANT="$(az account show --query tenantId -o tsv)" + if [ -z "$E2E_SUBSCRIPTION" ] || [ -z "$E2E_TENANT" ]; then + echo "ERROR: failed to resolve subscription/tenant from az account show" >&2 + exit 1 + fi + export E2E_SUBSCRIPTION E2E_TENANT + echo "Using subscription: $E2E_SUBSCRIPTION" + mkdir -p "$(Build.ArtifactStagingDirectory)/logs" + # Drive the live golden path through the Go pseudo-terminal + # test driver. -v streams per-phase logs; -count=1 defeats the + # test cache (a live test must always re-run); -timeout self- + # caps the process under this step's budget so the per-mode + # teardown (t.Cleanup) runs before ADO force-kills the step. + go test -run TestTier2Live -count=1 -timeout 125m -v ./tests/e2e-live/ 2>&1 \ + | tee "$(Build.ArtifactStagingDirectory)/logs/tier2.log" + env: + # Gate + mode selection consumed by tier2_live_test.go. + AZURE_AI_AGENTS_E2E_LIVE: "1" + E2E_DEPLOY_MODES: ${{ parameters.deployModes }} + E2E_CREATE_PROJECT: "true" + E2E_LOCATION: eastus2 + E2E_USE_AZ_CLI_AUTH: "true" + # azure-sdk org PAT (ambient in the ADO project) used only to + # avoid anonymous GitHub rate limits when cloning the template. + GH_TOKEN: $(azuresdk-github-pat) + + - task: PublishPipelineArtifact@1 + condition: always() + inputs: + targetPath: $(Build.ArtifactStagingDirectory) + artifactName: tier2-live-logs-$(Build.BuildId) + displayName: Publish test logs + + # Safety net for hard crashes / step timeout: the in-test teardown + # runs `azd down` already, but if the run died mid-way, force-purge + # any leftover project environments. Must run inside AzureCLI@2 so it + # is authenticated — the previous task's az session does not persist. + - task: AzureCLI@2 + condition: always() + continueOnError: true + displayName: Cleanup leaked Azure resources + inputs: + azureSubscription: ${{ parameters.serviceConnection }} + keepAzSessionActive: true + visibleAzLogin: false + scriptType: bash + scriptLocation: inlineScript + inlineScript: | + azd config set auth.useAzCliAuth true + echo "Best-effort teardown of any leaked resources..." + # RFC 3339 UTC instant for the EngSys DeleteAfter cleanup tag. + delete_after=$(date -u -d '+2 days' +%Y-%m-%dT%H:%M:%SZ) + for dir in /tmp/e2e-tests/tier2-*/; do + [ -d "$dir" ] || continue + proj=$(find "$dir" -maxdepth 2 -name azure.yaml -exec dirname {} \; | head -1) + [ -n "$proj" ] || continue + ( + cd "$proj" + # Tag the resource group(s) before attempting `azd down` so + # the EngSys garbage collector can still reclaim them if the + # delete below fails - the cleanup pipeline keys off + # DeleteAfter. Enumerate by azd env name rather than reading + # AZURE_RESOURCE_GROUP from env values, which a provision that + # times out may not have persisted yet; azd creates the group + # as rg- at the start of provisioning, so the name is + # known even when the run dies mid-deploy. + for d in .azure/*/; do + [ -d "$d" ] || continue + name=${d#.azure/} + name=${name%/} + rg="rg-$name" + echo "Tagging $rg (env=$name) with DeleteAfter=$delete_after" + az group update --name "$rg" \ + --set "tags.DeleteAfter=$delete_after" --output none || true + done + azd down --force --purge --no-prompt + ) 2>&1 || true + done