diff --git a/cli/azd/extensions/azure.ai.agents/cspell.yaml b/cli/azd/extensions/azure.ai.agents/cspell.yaml
index 4041a93cd5f..eb8b9cdf86c 100644
--- a/cli/azd/extensions/azure.ai.agents/cspell.yaml
+++ b/cli/azd/extensions/azure.ai.agents/cspell.yaml
@@ -41,6 +41,7 @@ words:
   - aoai
   - authorizationfailed
   - azdaiagent
+  - azuresdk
   - bicepless
   - CLIENTSECRET
   - curr
@@ -89,5 +90,9 @@ words:
   - ttfb
   - Bhadauria
   - ensurepip
+  # Live E2E (Tier 2) Go driver
+  - creack
+  - elive
+  - testdir
   # Test infrastructure
   - recordproxy
diff --git a/cli/azd/extensions/azure.ai.agents/go.mod b/cli/azd/extensions/azure.ai.agents/go.mod
index b7a6b685f00..dc8e461f274 100644
--- a/cli/azd/extensions/azure.ai.agents/go.mod
+++ b/cli/azd/extensions/azure.ai.agents/go.mod
@@ -36,6 +36,9 @@ require (
 require github.com/denormal/go-gitignore v0.0.0-20180930084346-ae8ad1d07817
 
 require (
+	github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2
+	github.com/creack/pty v1.1.24
+	github.com/hinshun/vt10x v0.0.0-20220119200601-820417d04eec
 	go.opentelemetry.io/otel v1.43.0
 	go.opentelemetry.io/otel/trace v1.43.0
 	golang.org/x/term v0.44.0
diff --git a/cli/azd/extensions/azure.ai.agents/go.sum b/cli/azd/extensions/azure.ai.agents/go.sum
index 396d8b1ea21..3a4cc258c9e 100644
--- a/cli/azd/extensions/azure.ai.agents/go.sum
+++ b/cli/azd/extensions/azure.ai.agents/go.sum
@@ -100,8 +100,9 @@ github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEX
 github.com/clipperhouse/uax29/v2 v2.5.0 h1:x7T0T4eTHDONxFJsL94uKNKPHrclyFI0lm7+w94cO8U=
 github.com/clipperhouse/uax29/v2 v2.5.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g=
 github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
-github.com/creack/pty v1.1.17 h1:QeVUsEDNrLBW4tMgZHvxy18sKtr6VI492kBhUfhDJNI=
 github.com/creack/pty v1.1.17/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
+github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s=
+github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE=
 github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 h1:y5HC9v93H5EPKqaS1UYVg1uYah5Xf51mBfIoWehClUQ=
 github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964/go.mod h1:Xd9hchkHSWYkEqJwUGisez3G1QY8Ryz0sdWrLPMGjLk=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
diff --git a/cli/azd/extensions/azure.ai.agents/tests/e2e-live/README.md b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/README.md
new file mode 100644
index 00000000000..e085e26c10c
--- /dev/null
+++ b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/README.md
@@ -0,0 +1,147 @@
+# azure.ai.agents — Live E2E (Tier 2)
+
+Full golden-path tests that exercise the real `azd ai agent` CLI against **live
+Azure** resources:
+
+```
+init → provision → deploy → invoke → down
+```
+
+A Go test driver answers the interactive `azd ai agent init` prompts through a
+**pseudo-terminal** — [go-expect] sends keystrokes and [vt10x] renders the CLI's
+terminal UI so the test can assert on the on-screen text, with [creack/pty]
+providing the PTY. Synchronization is **event-driven**: the driver blocks on
+go-expect reads until the survey UI stops emitting — i.e. a prompt is fully
+drawn and waiting for input — instead of sleeping a fixed interval, then
+dispatches on the rendered prompt text. The deploy mode is chosen up front via
+`azd ai agent init --deploy-mode code|container` (it is not an interactive
+prompt once a manifest is supplied). The non-interactive phases (`provision`,
+`deploy`, `invoke`, `down`) shell out to `azd ... --no-prompt`. Both deploy
+modes are covered:
+
+| Mode        | What it does                                            |
+| ----------- | ------------------------------------------------------- |
+| `code`      | Source-code (zip) deploy of the agent service           |
+| `container` | Container (ACR build) deploy of the agent service       |
+
+The two modes run **sequentially** (same subscription → avoids resource races).
+
+[go-expect]: https://github.com/Netflix/go-expect
+[vt10x]: https://github.com/hinshun/vt10x
+[creack/pty]: https://github.com/creack/pty
+
+## How the `init` driver answers prompts
+
+The interactive sub-flows (Foundry project selection, model/deployment) branch
+on live runtime state, so the exact set and order of prompts is not fixed ahead
+of time. Rather than a linear expect script, the driver runs a **dispatch
+loop**: it waits for output to settle, reads the rendered screen, matches the
+active `?` prompt against the verbatim strings the extension prints — each case
+in `dispatchPrompt` is annotated with the source `file:line` it mirrors — and
+sends the answer. A loop detector bounds any prompt that fails to advance so a
+wording change upstream fails fast instead of hanging.
+
+Because the prompt strings are calibrated against the extension source, changes
+there can require updating `dispatchPrompt`. And because a real PTY, Azure auth,
+and the installed extension are all required, the **end-to-end interactive
+correctness is only exercised by a live Tier 2 run** — it cannot be reproduced
+by the platform-agnostic unit tests in this package.
+
+## Where this fits
+
+| Tier | Coverage                                  | Where it runs                                          |
+| ---- | ----------------------------------------- | ------------------------------------------------------ |
+| 0    | Offline CLI validation (no auth)          | PR gate — `.github/workflows/lint-ext-azure-ai-agents.yml` |
+| 1    | `init` variants (recording/playback)      | PR gate — same workflow                                |
+| 2    | **Full live golden path** (this folder)   | **`eng/pipelines/ext-azure-ai-agents-live.yml`**       |
+
+Live Azure access is deliberately kept **out** of the automatic PR pipeline (Azure
+SDK EngSys / SFI guidance). Tier 2 runs only on demand or on a schedule.
+
+## Running in CI
+
+Pipeline: `eng/pipelines/ext-azure-ai-agents-live.yml` (ADO). The Tier 2 step
+builds `azd` + the extension and runs `go test -run TestTier2Live` inside an
+`AzureCLI@2` task (so the federated az session stays valid for the whole run).
+
+- **On demand (per PR):** comment `/azp run ext-azure-ai-agents-live` on the PR.
+  Requires write permission on the repo.
+- **Scheduled:** weekly, Monday 07:00 UTC against `main`.
+- **Manual:** queue the pipeline and pick `deployModes` = `both` / `code` /
+  `container`.
+
+Logs for each run are published as the `tier2-live-logs-<BuildId>` artifact.
+
+### One-time admin setup
+
+1. **Register the pipeline** in Azure DevOps pointing at
+   `eng/pipelines/ext-azure-ai-agents-live.yml`, named `ext-azure-ai-agents-live`
+   (the name used by `/azp run`).
+2. **Service connection** — the `serviceConnection` parameter (default
+   `azure-sdk-tests`) must map to the shared **TME test subscription** via OIDC /
+   workload-identity federation. The federated identity needs enough RBAC to
+   create Foundry projects and deploy models (Contributor + Azure AI Developer +
+   Cognitive Services Contributor, or equivalent).
+3. **GitHub auth** — clones of the starter template use the azure-sdk org secret
+   `azuresdk-github-pat` (already provided by the Azure SDK ADO project) to avoid
+   anonymous rate limits, so no extra secret setup is required.
+
+## Running locally (Linux / WSL)
+
+The live driver is tagged `//go:build linux` — it relies on a real PTY and a
+controlling terminal (the platform CI runs on). On Windows, run it under WSL.
+
+Prerequisites: Linux (including WSL), a Go toolchain matching `go.mod`
+(`GOTOOLCHAIN=auto` fetches the right version automatically), `azd` (>= 1.25.5)
+with the `azure.ai.agents` extension installed, and `az` logged in.
+
+Run from the extension root (`cli/azd/extensions/azure.ai.agents`):
+
+```bash
+# Use azd's built-in auth locally (NOT az CLI auth — it is slow under WSL).
+azd config unset auth.useAzCliAuth
+azd auth login
+
+# Both modes (sequential):
+AZURE_AI_AGENTS_E2E_LIVE=1 E2E_DEPLOY_MODES=both \
+  go test -run TestTier2Live -count=1 -timeout 130m -v ./tests/e2e-live/
+
+# A single golden path:
+AZURE_AI_AGENTS_E2E_LIVE=1 E2E_DEPLOY_MODES=code \
+  go test -run TestTier2Live -count=1 -timeout 90m -v ./tests/e2e-live/
+```
+
+Without `AZURE_AI_AGENTS_E2E_LIVE=1` the test is **skipped**, so the package is
+safe to include in a normal `go test ./...`.
+
+### Useful environment variables
+
+| Variable                   | Default                        | Purpose                                                      |
+| -------------------------- | ------------------------------ | ----------------------------------------------------------- |
+| `AZURE_AI_AGENTS_E2E_LIVE` | —                              | **Required** `=1` gate; unset → the test is skipped         |
+| `E2E_DEPLOY_MODES`         | `both`                         | `both` / `code` / `container`                               |
+| `E2E_CREATE_PROJECT`       | `false`                        | `true` → always create a fresh Foundry project              |
+| `E2E_PROJECT`              | —                              | Name of an existing Foundry project to select instead       |
+| `E2E_LOCATION`             | `eastus2`                      | Region for new projects (needs model quota)                 |
+| `E2E_SUBSCRIPTION`         | —                              | Subscription id (filters the picker)                        |
+| `E2E_TENANT`               | —                              | AAD tenant id (sets `AZURE_TENANT_ID` for azd)              |
+| `E2E_USE_AZ_CLI_AUTH`      | —                              | `true` → set `auth.useAzCliAuth` (CI; auto-on under ADO/GHA) |
+| `E2E_TESTDIR`              | `/tmp/e2e-tests/tier2-<mode>`  | Scratch dir for the scaffolded project                      |
+| `E2E_KEEP_ARTIFACTS`       | —                              | `true` → keep the per-run `AZD_CONFIG_DIR` copy for debugging |
+| `GH_TOKEN`                 | —                              | GitHub token for template clone (optional)                  |
+
+In CI the driver auto-detects GitHub Actions (`GITHUB_ACTIONS`) and Azure DevOps
+(`TF_BUILD`) and switches to `az` CLI auth automatically. Azure resources are
+always torn down (`azd down --force --purge`) via `t.Cleanup`, even on failure.
+
+## Files
+
+| File                 | Purpose                                                                          |
+| -------------------- | -------------------------------------------------------------------------------- |
+| `tier2_live_test.go` | `TestTier2Live` — drives init/provision/deploy/invoke/down per mode (Linux-only) |
+| `console_test.go`    | PTY + vt10x console helper that renders the interactive CLI (Linux-only)         |
+| `assert.go`          | Pure-logic answer matcher (`responseHasExpectedAnswer`) — builds on any platform |
+| `assert_test.go`     | Unit tests for the matcher — run anywhere via `go test ./tests/e2e-live/`        |
+
+Each phase has bounded timeouts and best-effort `azd down --force --purge`
+teardown so a crash mid-run does not leak billable resources.
diff --git a/cli/azd/extensions/azure.ai.agents/tests/e2e-live/assert.go b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/assert.go
new file mode 100644
index 00000000000..6026116653b
--- /dev/null
+++ b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/assert.go
@@ -0,0 +1,100 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// Package e2elive contains the Tier 2 live golden-path end-to-end test for the
+// azure.ai.agents extension: init -> provision -> deploy -> invoke -> down,
+// driven against real Azure resources. See README.md for setup and how to run.
+package e2elive
+
+import (
+	"regexp"
+	"unicode"
+)
+
+// spelledFourRe matches the spelled-out word "four" as a standalone word
+// (case-insensitive), e.g. "the answer is four".
+var spelledFourRe = regexp.MustCompile(`(?i)\bfour\b`)
+
+// agentLineRe matches the start of an agent reply line, which invoke prints as
+// "[<agentName>] <text>" (invoke.go printf "[%s] %s"). responseEndRe matches the
+// green footer invoke prints after the reply, "Server responded in ..." — the
+// region between them is exactly the model's answer, with no surrounding noise.
+var (
+	agentLineRe   = regexp.MustCompile(`(?m)^\[[^\]]+\] `)
+	responseEndRe = regexp.MustCompile(`Server responded in`)
+)
+
+// agentResponseRegion returns just the agent's printed answer, sliced from the
+// first "[<agent>] " line to the "Server responded in" footer. Scoping the
+// 2+2 check to this region keeps stray "4"s from the rest of the CLI output
+// (model names, versions, status codes) from passing the test. If either marker
+// is missing the format changed, so it returns the full text and lets the
+// standalone-digit rules below guard against false positives.
+func agentResponseRegion(out string) string {
+	start := agentLineRe.FindStringIndex(out)
+	if start == nil {
+		return out
+	}
+	rest := out[start[0]:]
+	if end := responseEndRe.FindStringIndex(rest); end != nil {
+		return rest[:end[0]]
+	}
+	return out
+}
+
+// responseHasExpectedAnswer reports whether text answers "what is 2+2?" with a
+// standalone "4" or the spelled-out word "four".
+//
+// A live model may answer either, and the captured CLI output also contains
+// unrelated digits — model names ("gpt-4o-mini"), versions ("4.1"), or status
+// codes ("404") — so a bare substring search would produce false positives.
+// The "4" must therefore stand alone: not part of a larger word or number.
+// The standalone-"4" rule is the lookaround (?<![\w.])4(?!\.\d)(?!\w); the
+// spelled-out "four" is matched case-insensitively as a whole word.
+//
+// A decimal such as "4.0" is deliberately rejected too: although 4.0 == 4
+// mathematically, the "4.<digit>" form is treated as a version/decimal token to
+// keep "4.1"-style strings out, and a live model answering "2+2" replies "4" or
+// "four", never "4.0".
+//
+// Go's regexp engine (RE2) has no lookahead/lookbehind, so the standalone-"4"
+// rule is implemented by scanning runes instead of with a single expression.
+func responseHasExpectedAnswer(text string) bool {
+	if spelledFourRe.MatchString(text) {
+		return true
+	}
+	return hasStandaloneFour(text)
+}
+
+// hasStandaloneFour reports whether text contains a "4" digit that stands alone,
+// reproducing the lookaround in the Python regex (?<![\w.])4(?!\.\d)(?!\w):
+//   - not preceded by a word rune or '.'  (rejects "x4", "_4", ".4")
+//   - not followed by '.' then a digit    (rejects "4.1", "4.0")
+//   - not followed by a word rune         (rejects "40", "4o")
+func hasStandaloneFour(text string) bool {
+	runes := []rune(text)
+	for i, r := range runes {
+		if r != '4' {
+			continue
+		}
+		if i > 0 {
+			if prev := runes[i-1]; prev == '.' || isWordRune(prev) {
+				continue
+			}
+		}
+		if i+2 < len(runes) && runes[i+1] == '.' && unicode.IsDigit(runes[i+2]) {
+			continue
+		}
+		if i+1 < len(runes) && isWordRune(runes[i+1]) {
+			continue
+		}
+		return true
+	}
+	return false
+}
+
+// isWordRune reports whether r is a word character, matching the Python regex
+// \w class (Unicode letters, digits, and underscore).
+func isWordRune(r rune) bool {
+	return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
+}
diff --git a/cli/azd/extensions/azure.ai.agents/tests/e2e-live/assert_test.go b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/assert_test.go
new file mode 100644
index 00000000000..d81b7ba309a
--- /dev/null
+++ b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/assert_test.go
@@ -0,0 +1,87 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package e2elive
+
+import "testing"
+
+func TestResponseHasExpectedAnswer(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name string
+		text string
+		want bool
+	}{
+		{"plain four digit", "The answer is 4.", true},
+		{"bare four", "4", true},
+		{"equation", "2+2=4", true},
+		{"spelled word", "It is four.", true},
+		{"spelled upper", "FOUR", true},
+		{"parenthesized", "(4)", true},
+		{"trailing period mid-sentence", "the value 4. is final", true},
+		{"model name", "gpt-4o-mini", false},
+		{"version", "4.1", false},
+		{"decimal four", "4.0", false}, // intentional: see responseHasExpectedAnswer doc
+		{"status code", "404", false},
+		{"price", "$40", false},
+		{"ratio", "24/7", false},
+		{"fourteen", "fourteen apples", false},
+		{"no answer", "I am not sure", false},
+		{"empty", "", false},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			if got := responseHasExpectedAnswer(tc.text); got != tc.want {
+				t.Errorf("responseHasExpectedAnswer(%q) = %v, want %v", tc.text, got, tc.want)
+			}
+		})
+	}
+}
+
+func TestAgentResponseRegion(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name string
+		out  string
+		want bool // responseHasExpectedAnswer over the sliced region
+	}{
+		{
+			"answer scoped between markers",
+			"using model gpt-4o-mini\n[agent] The answer is 4.\nServer responded in 2s (first byte: 1s)\n",
+			true,
+		},
+		{
+			"stray digits outside region rejected",
+			"gpt-4o-mini deployed (404 cached)\n[agent] I am not sure.\nServer responded in 4.0s\n",
+			false,
+		},
+		{
+			"standalone 4 before agent line excluded by region",
+			"completed step 4\n[agent] I don't know.\nServer responded in 1s\n",
+			false,
+		},
+		{
+			"missing footer falls back to full text",
+			"using gpt-4o-mini\n[agent] four",
+			true,
+		},
+		{
+			"no agent line falls back to full text",
+			"the answer is four",
+			true,
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			if got := responseHasExpectedAnswer(agentResponseRegion(tc.out)); got != tc.want {
+				t.Errorf("region(%q) -> %v, want %v", tc.out, got, tc.want)
+			}
+		})
+	}
+}
diff --git a/cli/azd/extensions/azure.ai.agents/tests/e2e-live/console_test.go b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/console_test.go
new file mode 100644
index 00000000000..2a0153b571a
--- /dev/null
+++ b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/console_test.go
@@ -0,0 +1,204 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+//go:build linux
+
+package e2elive
+
+import (
+	"fmt"
+	"os"
+	"strings"
+	"sync"
+	"time"
+
+	expect "github.com/Netflix/go-expect"
+	"github.com/creack/pty"
+	"github.com/hinshun/vt10x"
+)
+
+// Key sequences sent to the interactive CLI over the pseudo-terminal.
+const (
+	keyEnter = "\r"
+	keyDown  = "\x1b[B"
+	keyUp    = "\x1b[A"
+)
+
+// tailBytes caps the rolling raw-output buffer kept for failure diagnostics
+// (the interactive init screen is otherwise not echoed to the test log).
+const tailBytes = 16 << 10
+
+// console drives an interactive child process through a pseudo-terminal and
+// renders its output with a vt10x virtual terminal so tests can both block on
+// expected output (go-expect) and assert on the on-screen text (the role tmux
+// capture-pane played in the old driver).
+//
+// Wiring (mirrors AlecAivazis/survey's posix expect tests):
+//
+//	child stdio ── ec.Tty() (pts) ─┐
+//	                                ├─ go-expect tees child output ─► vt10x screen + tail
+//	vt10x query replies ─► extSlave ┘             ▲
+//	                       extMaster ─ go-expect feeds back to child stdin
+//
+// go-expect creates its own internal pty for the child (ec.Tty()). The external
+// pty pair (extMaster/extSlave) exists solely so vt10x can answer terminal
+// queries (e.g. cursor-position reports) back to the child; it is closed via
+// WithCloser when the console is closed.
+type console struct {
+	term vt10x.Terminal
+	ec   *expect.Console
+	tail *ringBuffer
+}
+
+// newConsole creates a console with a virtual terminal of the given size.
+func newConsole(cols, rows int) (*console, error) {
+	extMaster, extSlave, err := pty.Open()
+	if err != nil {
+		return nil, fmt.Errorf("open feedback pty: %w", err)
+	}
+
+	term := vt10x.New(vt10x.WithWriter(extSlave), vt10x.WithSize(cols, rows))
+	tail := newRingBuffer(tailBytes)
+
+	// go-expect tees everything it reads to these writers, so every read driven
+	// by expect()/waitForQuiet() simultaneously renders the screen (term) and
+	// records the raw bytes (tail) for diagnostics. No WithDefaultTimeout: each
+	// read's deadline is supplied per call via expect.WithTimeout.
+	ec, err := expect.NewConsole(
+		expect.WithStdin(extMaster),
+		expect.WithStdout(term, tail),
+		expect.WithCloser(extMaster, extSlave),
+	)
+	if err != nil {
+		_ = extMaster.Close()
+		_ = extSlave.Close()
+		return nil, fmt.Errorf("create expect console: %w", err)
+	}
+
+	// Match the child tty size to the virtual terminal so line wrapping in the
+	// rendered screen matches what the CLI actually drew.
+	//nolint:gosec // cols/rows are small fixed test dimensions; no overflow.
+	_ = pty.Setsize(ec.Tty(), &pty.Winsize{Cols: uint16(cols), Rows: uint16(rows)})
+
+	return &console{term: term, ec: ec, tail: tail}, nil
+}
+
+// tty returns the slave pseudo-terminal the child process should attach its
+// stdin/stdout/stderr to.
+func (c *console) tty() *os.File {
+	return c.ec.Tty()
+}
+
+// send writes raw bytes (keystrokes) to the child's tty.
+func (c *console) send(s string) {
+	_, _ = c.ec.Send(s)
+}
+
+// expect reads child output (teeing it to the screen and the tail buffer) until
+// one of opts matches, idle elapses with no new byte, or the child's tty
+// closes. It is the event-driven synchronization primitive that replaces the
+// old fixed-interval polling: go-expect only renders output to the screen while
+// a read is in flight, so every wait routes through here.
+//
+// Return contract (go-expect's passthrough pipe, see passthrough_pipe.go):
+//   - a match               => (buf, nil)
+//   - idle of silence       => (buf, err) with os.IsTimeout(err) == true
+//   - child exit / pts close => (buf, err) with a non-timeout error
+func (c *console) expect(idle time.Duration, opts ...expect.ExpectOpt) (string, error) {
+	return c.ec.Expect(append(opts, expect.WithTimeout(idle))...)
+}
+
+// waitForQuiet renders pending output to the screen until the UI stops emitting
+// for quiet (a survey prompt fully drawn and now blocking on input) or the
+// child exits. It returns exited=true once the child's tty has closed.
+//
+// It passes no matchers, so go-expect can only return on the idle read deadline
+// (os.IsTimeout) or on a terminal read error (EOF / pts closed == child gone).
+func (c *console) waitForQuiet(quiet time.Duration) (exited bool) {
+	_, err := c.expect(quiet)
+	return err != nil && !os.IsTimeout(err)
+}
+
+// screen returns the current rendered virtual-terminal contents, cleaned of NUL
+// padding and trailing whitespace on each line.
+func (c *console) screen() string {
+	return cleanScreen(c.term.String())
+}
+
+// tailString returns the most recent raw child output captured for diagnostics.
+func (c *console) tailString() string {
+	return c.tail.String()
+}
+
+// close tears down the console and all of its pseudo-terminals.
+func (c *console) close() {
+	_ = c.ec.Close()
+}
+
+// ringBuffer is an io.Writer that retains only the last max bytes written, used
+// to keep a bounded tail of raw child output for failure diagnostics.
+type ringBuffer struct {
+	mu  sync.Mutex
+	buf []byte
+	max int
+}
+
+func newRingBuffer(max int) *ringBuffer {
+	return &ringBuffer{max: max}
+}
+
+func (r *ringBuffer) Write(p []byte) (int, error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.buf = append(r.buf, p...)
+	if len(r.buf) > r.max {
+		r.buf = r.buf[len(r.buf)-r.max:]
+	}
+	return len(p), nil
+}
+
+func (r *ringBuffer) String() string {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	return string(r.buf)
+}
+
+// cleanScreen normalizes a vt10x screen dump: empty cells render as NUL, which
+// is replaced with spaces, then trailing whitespace is trimmed from each row.
+func cleanScreen(s string) string {
+	s = strings.ReplaceAll(s, "\x00", " ")
+	lines := strings.Split(s, "\n")
+	for i, l := range lines {
+		lines[i] = strings.TrimRight(l, " \t")
+	}
+	return strings.Join(lines, "\n")
+}
+
+// nonEmptyLines returns the screen's non-blank lines, trimmed.
+func nonEmptyLines(screen string) []string {
+	var out []string
+	for l := range strings.SplitSeq(screen, "\n") {
+		if t := strings.TrimSpace(l); t != "" {
+			out = append(out, t)
+		}
+	}
+	return out
+}
+
+// activePrompt returns the lowercased text of the last survey "?" prompt line on
+// screen, or "" if none is visible. The last "?" line is the one survey is
+// currently blocking on (earlier "?" lines are answered prompts it echoed).
+func activePrompt(screen string) string {
+	lines := nonEmptyLines(screen)
+	for i := len(lines) - 1; i >= 0; i-- {
+		if strings.HasPrefix(lines[i], "?") {
+			return strings.ToLower(lines[i])
+		}
+	}
+	return ""
+}
+
+// screenContains reports whether screen contains sub (case-insensitive).
+func screenContains(screen, sub string) bool {
+	return strings.Contains(strings.ToLower(screen), strings.ToLower(sub))
+}
diff --git a/cli/azd/extensions/azure.ai.agents/tests/e2e-live/tier2_live_test.go b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/tier2_live_test.go
new file mode 100644
index 00000000000..ad963a1bbe8
--- /dev/null
+++ b/cli/azd/extensions/azure.ai.agents/tests/e2e-live/tier2_live_test.go
@@ -0,0 +1,954 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+//go:build linux
+
+package e2elive
+
+import (
+	"bytes"
+	"context"
+	"crypto/sha256"
+	"encoding/hex"
+	"errors"
+	"fmt"
+	"io"
+	"io/fs"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"syscall"
+	"testing"
+	"time"
+
+	"gopkg.in/yaml.v3"
+)
+
+// liveEnvVar gates the live test: it only runs when set to "1". This keeps the
+// expensive, Azure-touching test out of the normal `go test ./...` run.
+const liveEnvVar = "AZURE_AI_AGENTS_E2E_LIVE"
+
+// Virtual terminal dimensions for the interactive init phase.
+const (
+	initCols = 200
+	initRows = 50
+)
+
+// Phase time budgets. The per-mode runTimeout must exceed the sum of the phase
+// budgets so a slow-but-healthy run is never preempted (which would also skip
+// the teardown and leak resources). Two modes at runTimeout each fit inside the
+// `go test -timeout 125m` cap used by the pipeline, whose ADO step adds a small
+// margin on top before force-killing the process.
+const (
+	runTimeout       = 60 * time.Minute
+	initTimeout      = 8 * time.Minute
+	provisionTimeout = 10 * time.Minute
+	deployTimeout    = 10 * time.Minute
+	invokeTimeout    = 3 * time.Minute
+	monitorTimeout   = 60 * time.Second
+	teardownTimeout  = 10 * time.Minute
+	tagTimeout       = 2 * time.Minute
+
+	// deleteAfterRetention is how far ahead the DeleteAfter cleanup tag is set
+	// on the provisioned resource group. It must exceed a full run so a healthy
+	// in-flight test is never reclaimed, with margin to inspect a failed run
+	// before the EngSys garbage collector deletes it.
+	deleteAfterRetention = 48 * time.Hour
+
+	// Event-driven tuning for the interactive init loop. promptQuiet is how long
+	// the survey UI must stop emitting before we treat the current prompt as
+	// "drawn and waiting for input"; listSettle is the shorter pause we let a
+	// filtered Select list redraw after typing before confirming with Enter.
+	// Both replace the old fixed 3s poll; the hard init cap is the ctx deadline.
+	promptQuiet = 800 * time.Millisecond
+	listSettle  = 600 * time.Millisecond
+)
+
+// TestTier2Live exercises the full golden path against live Azure for each
+// requested deploy mode, sequentially (concurrent deploys in one subscription
+// race on shared resources and exhaust model quota).
+func TestTier2Live(t *testing.T) {
+	if os.Getenv(liveEnvVar) != "1" {
+		t.Skipf("set %s=1 to run the live Tier 2 golden-path test", liveEnvVar)
+	}
+
+	for _, mode := range deployModesFromEnv() {
+		t.Run(mode, func(t *testing.T) {
+			r := newRunner(t, mode)
+			ctx, cancel := context.WithTimeout(t.Context(), runTimeout)
+			defer cancel()
+			r.run(ctx)
+		})
+	}
+}
+
+// deployModesFromEnv reads E2E_DEPLOY_MODES (code|container|both); default both.
+func deployModesFromEnv() []string {
+	switch strings.ToLower(strings.TrimSpace(os.Getenv("E2E_DEPLOY_MODES"))) {
+	case "code":
+		return []string{"code"}
+	case "container":
+		return []string{"container"}
+	default:
+		return []string{"code", "container"}
+	}
+}
+
+// runner holds the per-mode state for one golden-path run.
+type runner struct {
+	t          *testing.T
+	mode       string
+	testDir    string
+	agentName  string
+	env        []string
+	projectDir string
+	c          *console
+}
+
+// newRunner prepares an isolated working directory, a private AZD_CONFIG_DIR
+// (copied from ~/.azd so the installed extension is available), and a unique
+// agent name, then registers teardown so resources are cleaned up even on
+// failure.
+func newRunner(t *testing.T, mode string) *runner {
+	t.Helper()
+
+	testDir := getenvDefault("E2E_TESTDIR", "/tmp/e2e-tests/tier2-"+mode)
+	if err := assertSafeTestDir(testDir); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.RemoveAll(testDir); err != nil {
+		t.Fatalf("clean test dir: %v", err)
+	}
+	if err := os.MkdirAll(testDir, 0o700); err != nil {
+		t.Fatalf("create test dir: %v", err)
+	}
+
+	configDir := filepath.Join(os.TempDir(), "e2e-azd-config-"+mode)
+	setupConfigDir(t, configDir)
+
+	env := os.Environ()
+	env = append(env, "AZD_CONFIG_DIR="+configDir)
+	if tenant := os.Getenv("E2E_TENANT"); tenant != "" {
+		env = append(env, "AZURE_TENANT_ID="+tenant)
+	}
+	if tok := ghToken(); tok != "" {
+		env = append(env, "GH_TOKEN="+tok, "GITHUB_TOKEN="+tok)
+	}
+
+	r := &runner{
+		t:         t,
+		mode:      mode,
+		testDir:   testDir,
+		agentName: fmt.Sprintf("e2e-%s-%s", mode, shortHash(mode)),
+		env:       env,
+	}
+
+	// Cleanups run LIFO, so register the config-dir delete first and teardown
+	// second: teardown (azd down) runs before the config copy it relies on is
+	// removed.
+	if !envTrue("E2E_KEEP_ARTIFACTS") {
+		t.Cleanup(func() { _ = os.RemoveAll(configDir) })
+	}
+	t.Cleanup(r.teardown)
+
+	// CI (GitHub Actions / Azure DevOps / explicit override) uses the az CLI
+	// session for auth; local WSL uses azd's slower-to-avoid built-in auth.
+	if useAzCliAuth() {
+		_, _ = r.runAzd(t.Context(), testDir, time.Minute,
+			"config", "set", "auth.useAzCliAuth", "true")
+	}
+
+	return r
+}
+
+// setupConfigDir creates configDir as a copy of ~/.azd (so installed extensions
+// resolve), or an empty dir if ~/.azd is absent. cp -a preserves the extension
+// binary's executable bit.
+func setupConfigDir(t *testing.T, configDir string) {
+	t.Helper()
+
+	home, err := os.UserHomeDir()
+	if err != nil {
+		t.Fatalf("resolve home dir: %v", err)
+	}
+	defaultAzd := filepath.Join(home, ".azd")
+	if info, err := os.Stat(defaultAzd); err == nil && info.IsDir() {
+		_ = os.RemoveAll(configDir)
+		//nolint:gosec // both paths derive from HOME / TempDir, not user input.
+		out, err := exec.Command("cp", "-a", defaultAzd, configDir).CombinedOutput()
+		if err != nil {
+			t.Fatalf("copy azd config dir: %v: %s", err, out)
+		}
+		return
+	}
+	if err := os.MkdirAll(configDir, 0o700); err != nil {
+		t.Fatalf("create azd config dir: %v", err)
+	}
+}
+
+// run executes the phases in order, stopping at the first failure. Teardown is
+// registered separately as a cleanup, so it always runs.
+func (r *runner) run(ctx context.Context) {
+	if err := r.phaseInit(ctx); err != nil {
+		r.t.Errorf("init: %v", err)
+		return
+	}
+	if err := r.phaseProvision(ctx); err != nil {
+		r.t.Errorf("provision: %v", err)
+		return
+	}
+	if err := r.phaseDeploy(ctx); err != nil {
+		r.t.Errorf("deploy: %v", err)
+		return
+	}
+	if err := r.phaseInvoke(ctx); err != nil {
+		r.t.Errorf("invoke: %v", err)
+		return
+	}
+}
+
+// phaseInit runs `azd ai agent init` attached to a pseudo-terminal and drives
+// its interactive prompts until the project is scaffolded on disk.
+func (r *runner) phaseInit(ctx context.Context) error {
+	c, err := newConsole(initCols, initRows)
+	if err != nil {
+		return err
+	}
+	defer c.close()
+	r.c = c
+
+	ictx, cancel := context.WithTimeout(ctx, initTimeout)
+	defer cancel()
+
+	// Deploy mode is NOT an interactive prompt in the template/--agent-name
+	// flow: promptDeployMode (init_from_code.go) auto-resolves it to "container"
+	// when a manifest is provided, so it must be chosen via newInitCommand's
+	// --deploy-mode flag (init.go). r.mode is exactly "container" or "code".
+	args := []string{"ai", "agent", "init", "--agent-name", r.agentName, "--deploy-mode", r.mode}
+	//nolint:gosec // azd is a trusted fixed binary; args are test-controlled.
+	cmd := exec.CommandContext(ictx, "azd", args...)
+	cmd.Dir = r.testDir
+	cmd.Env = r.env
+	cmd.Stdin = c.tty()
+	cmd.Stdout = c.tty()
+	cmd.Stderr = c.tty()
+	// Give the child the pts as its controlling terminal (as tmux did), so
+	// survey treats it as a real interactive terminal.
+	cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true, Setctty: true}
+
+	if err := cmd.Start(); err != nil {
+		return fmt.Errorf("start azd ai agent init: %w", err)
+	}
+
+	// No separate render goroutine: go-expect's passthrough pipe drains the
+	// child's pty in the background, and driveInit's expect()/waitForQuiet()
+	// calls do the reading that renders the screen. (A concurrent reader would
+	// race those calls for the same stream.)
+	exited := make(chan struct{})
+	go func() {
+		_ = cmd.Wait()
+		close(exited)
+	}()
+
+	driveErr := r.driveInit(ictx, exited)
+
+	// Make sure the child is gone before returning (it normally exits itself).
+	select {
+	case <-exited:
+	case <-time.After(10 * time.Second):
+		_ = cmd.Process.Kill()
+		<-exited
+	}
+
+	return driveErr
+}
+
+// driveInit is the event-driven prompt loop: it waits (via go-expect) for the
+// survey UI to settle on a prompt, reads the rendered screen, and answers it,
+// until init reports completion (or the process exits, or it times out).
+//
+// Why a screen-dispatch loop and not a linear ExpectString script: the live
+// model/deployment and Foundry-project sub-flows branch on runtime state —
+// whether the just-created project already has the model deployed, region/model
+// availability, existing-name collisions — so the exact set and order of
+// prompts cannot be predetermined. A linear ExpectString sequence would desync
+// at the first conditional prompt. Instead we block on output settling (the
+// go-expect read), then dispatch on the verbatim prompt strings the extension
+// prints (each case annotated with the source function that prints it).
+func (r *runner) driveInit(ctx context.Context, exited <-chan struct{}) error {
+	var lastKey string
+	repeat := 0
+
+	for {
+		select {
+		case <-ctx.Done():
+			return fmt.Errorf("init timed out: %w\n--- tail ---\n%s",
+				ctx.Err(), tail(r.c.tailString(), 2000))
+		case <-exited:
+			return r.finishInit(ctx)
+		default:
+		}
+
+		// Block until the UI stops emitting (prompt fully drawn, awaiting input)
+		// or the child exits. Replaces the old fixed-interval poll.
+		if r.c.waitForQuiet(promptQuiet) {
+			return r.finishInit(ctx)
+		}
+
+		screen := r.c.screen()
+		if isInitComplete(screen) {
+			return r.finishInit(ctx)
+		}
+
+		prompt := activePrompt(screen)
+		if prompt == "" {
+			continue // spinner / transient output, not a survey prompt yet
+		}
+		r.t.Logf("prompt: %s", truncate(prompt, 100))
+
+		// Loop detection: compare the question text before ':' so varying filter
+		// text on the same prompt doesn't reset the counter.
+		key := promptKey(prompt)
+		if key == lastKey {
+			repeat++
+		} else {
+			repeat, lastKey = 1, key
+		}
+		if repeat >= 3 {
+			if strings.Contains(prompt, "model") || strings.Contains(prompt, "is specified") {
+				r.t.Log("loop detected on model prompt; trying next option")
+				r.c.send(keyDown)
+				r.c.waitForQuiet(listSettle)
+				r.c.send(keyEnter)
+				continue
+			}
+			if repeat >= 5 {
+				return fmt.Errorf("init stuck in prompt loop: %q\n--- screen ---\n%s", key, screen)
+			}
+		}
+
+		r.dispatchPrompt(screen, prompt)
+	}
+}
+
+// finishInit confirms init produced the expected artifacts on disk, allowing a
+// brief grace for files to flush after the completion marker or process exit.
+func (r *runner) finishInit(ctx context.Context) error {
+	if r.validateInitOutput() {
+		return nil
+	}
+	_ = sleepCtx(ctx, 5*time.Second)
+	if r.validateInitOutput() {
+		return nil
+	}
+	return fmt.Errorf(
+		"init finished but expected artifacts are missing on disk\n--- tail ---\n%s",
+		tail(r.c.tailString(), 2000),
+	)
+}
+
+// isInitComplete reports whether the success marker is on screen. Source:
+// runInitFromManifest (init.go) prints "AI agent definition added to your azd
+// project successfully!" in green at the end.
+func isInitComplete(screen string) bool {
+	return screenContains(screen, "added to your azd project") ||
+		screenContains(screen, "agent definition added")
+}
+
+// promptKey reduces a prompt line to its stable question text (before the first
+// ':') for loop detection.
+func promptKey(prompt string) string {
+	if i := strings.Index(prompt, ":"); i > 0 {
+		return strings.TrimSpace(prompt[:i])
+	}
+	return prompt
+}
+
+// dispatchPrompt answers a single survey prompt. Cases are ordered specific →
+// generic and keyed on the verbatim messages the extension prints; the function
+// in each comment points at the source string this matches. The prompt argument
+// is already lowercased (see activePrompt).
+//
+// Only a subset of these fire on the --agent-name template critical path
+// (language, template, Foundry project, subscription, location, the manifest
+// model, deployment name, capacity/sku/version). The rest are kept as defensive
+// handlers because init auto-resolves them under userProvidedManifest=true (so
+// they normally do NOT prompt) or only surfaces them for specific runtime state.
+func (r *runner) dispatchPrompt(screen, prompt string) {
+	has := func(sub string) bool { return strings.Contains(prompt, sub) }
+
+	switch {
+	// Yes/No confirms. "Continue with this existing agent name?"
+	// (resolveExistingAgentNameConflictWithChecker) only fires when the unique
+	// name already exists; decline it to reach the fresh-name input. Any other
+	// confirm: accept.
+	case has("[y/n]") || has("(y/n)") || has("continue with this existing agent name"):
+		if has("continue with this existing agent name") {
+			r.c.send("n")
+		} else {
+			r.c.send("y")
+		}
+		r.c.send(keyEnter)
+
+	// Language select — "Select a language" (promptAgentTemplate).
+	case has("select a language"):
+		r.selectByText("Python")
+
+	// Template select — "Select a starter template" / "Select an agent template"
+	// (promptAgentTemplate).
+	case has("starter template") || has("agent template"):
+		r.selectByText("Basic agent (Invocations")
+
+	// Foundry project hosting — "Select a Foundry project to host your agent..."
+	// (runInitFromManifest); choices "Use an existing..." / "Create a new...".
+	case has("foundry project to host"):
+		if r.createProject() {
+			r.selectByText("Create a new Foundry project")
+		} else {
+			r.selectByText("Use an existing Foundry project")
+		}
+
+	// Existing-project picker — "Select a Foundry project"
+	// (selectFoundryProject); only when reusing a project.
+	case has("select a foundry project"):
+		if p := os.Getenv("E2E_PROJECT"); p != "" {
+			r.selectByText(p)
+		} else {
+			r.enter()
+		}
+
+	// Subscription — the extension prints a descriptive preamble via fmt.Println
+	// (runInitFromManifest), but that line isn't the survey "?" line activePrompt
+	// reads. ensureSubscription passes an empty request, so the picker shows
+	// azd-core's default message "Select subscription" (promptSubscriptionMessage)
+	// — match that, not the preamble.
+	case has("select subscription"):
+		if sub := os.Getenv("E2E_SUBSCRIPTION"); sub != "" {
+			r.selectByText(sub[:min(8, len(sub))])
+		} else {
+			r.enter()
+		}
+
+	// Location — preamble "Select an Azure location..." (ensureLocation) +
+	// azd-core picker.
+	case has("location") || has("region"):
+		r.selectByText(getenvDefault("E2E_LOCATION", "eastus2"))
+
+	// Manifest model decision — "Model '%s' is specified in the agent manifest."
+	// (getModelDetails); keep the manifest model (default first choice).
+	case has("is specified in the agent manifest"):
+		r.enter()
+
+	// Existing deployments / generic proceed — getModelDeploymentDetails.
+	case has("how would you like to proceed") || has("existing deployment"):
+		r.enter()
+
+	// Model deployment name input — getModelDeploymentDetails (default = model name).
+	case has("model deployment name") || (has("deployment name") && has("model")):
+		r.enter()
+
+	// Model select — "Select a model" (promptForAlternativeModel etc.).
+	case has("select a model"):
+		r.selectByText("gpt-4o-mini")
+
+	// Deployment version / SKU / capacity — azd-core's PromptAiDeployment renders
+	// these exact picker messages; accept defaults. Match the full message rather
+	// than the bare keyword so a future prompt merely containing
+	// "version"/"sku"/"capacity" can't match by accident (it would fall through to
+	// the logged default instead).
+	case has("select a version for") || has("select a sku for") ||
+		has("enter deployment capacity for"):
+		r.enter()
+
+	// Code-deploy prompts (promptCodeConfig). Auto-resolved under
+	// userProvidedManifest=true, so kept as defensive handlers only.
+	case has("select the runtime for your agent"):
+		r.enter() // default Python 3.13
+	case has("entry point"):
+		r.enter() // accept detected default
+	case has("how should dependencies be resolved"):
+		r.enter() // default remote build
+
+	// Optional infra (blank => create new): ACR login server
+	// (configureAcrConnection), App Insights (configureAppInsightsConnection).
+	case has("acr login server") || has("container registry"):
+		r.enter()
+	case has("application insights"):
+		r.enter()
+
+	// Startup command (resolveStartupCommandForInit); blank => skip.
+	case has("command to start your agent"):
+		r.enter()
+
+	// Replacement agent name after declining the existing-name confirm
+	// (promptForReplacementAgentName) / the name input (resolveInitAgentName);
+	// accept the default.
+	case has("enter a different name for your agent") || has("enter a name for your agent"):
+		r.enter()
+
+	default:
+		// No specific case matched: send Enter as a safe default, but log the
+		// fall-through so CI can distinguish "matched and answered correctly"
+		// from "hit the catch-all" when a new or changed prompt appears.
+		r.t.Logf("unhandled prompt (default Enter): %s", truncate(prompt, 100))
+		r.enter()
+	}
+}
+
+// phaseProvision finds the scaffolded project and runs `azd provision`.
+func (r *runner) phaseProvision(ctx context.Context) error {
+	dir := r.findProjectDir()
+	if dir == "" {
+		return errors.New("no project directory with azure.yaml found")
+	}
+	r.projectDir = dir
+	r.t.Logf("project dir: %s", dir)
+
+	_, code := r.runAzd(ctx, dir, provisionTimeout, "provision", "--no-prompt")
+	if code != 0 {
+		return fmt.Errorf("azd provision failed (exit %d)", code)
+	}
+
+	// Stamp the resource group with a DeleteAfter cleanup tag as soon as it
+	// exists. The post-run `azd down` teardown is the primary cleanup, but it is
+	// unreliable in CI (the agent can exhaust its post-timeout budget, crash
+	// mid-delete, or lose its network connection); the tag lets the EngSys
+	// garbage collector reclaim the group regardless. Best-effort: never fails.
+	r.tagResourceGroupForCleanup(ctx)
+	return nil
+}
+
+// phaseDeploy runs `azd deploy`.
+func (r *runner) phaseDeploy(ctx context.Context) error {
+	_, code := r.runAzd(ctx, r.projectDir, deployTimeout, "deploy", "--no-prompt")
+	if code != 0 {
+		return fmt.Errorf("azd deploy failed (exit %d)", code)
+	}
+	return nil
+}
+
+// phaseInvoke calls the deployed agent and verifies it answers "2+2" with 4.
+func (r *runner) phaseInvoke(ctx context.Context) error {
+	wait := 30 * time.Second
+	if r.mode == "container" {
+		wait = 60 * time.Second
+	}
+	r.t.Logf("waiting %s for agent startup (%s mode)", wait, r.mode)
+	if err := sleepCtx(ctx, wait); err != nil {
+		return err
+	}
+
+	svc := r.findServiceName()
+	if svc == "" {
+		return errors.New("could not determine service name from azure.yaml")
+	}
+	r.t.Logf("service name: %s", svc)
+
+	// The invocations protocol requires a JSON body via --input-file.
+	payload := filepath.Join(r.testDir, ".invoke-payload.json")
+	if err := os.WriteFile(payload, []byte(`{"message": "Hello, what is 2+2?"}`), 0o600); err != nil {
+		return fmt.Errorf("write invoke payload: %w", err)
+	}
+
+	const maxRetries = 3
+	for attempt := 1; attempt <= maxRetries; attempt++ {
+		r.t.Logf("invoke attempt %d/%d", attempt, maxRetries)
+		out, code := r.runAzd(ctx, r.projectDir, invokeTimeout,
+			"ai", "agent", "invoke", svc, "--new-session", "-f", payload)
+
+		if code != 0 {
+			if attempt == maxRetries {
+				logs, _ := r.runAzd(ctx, r.projectDir, monitorTimeout,
+					"ai", "agent", "monitor", svc, "--tail", "50")
+				r.t.Logf("agent logs (tail):\n%s", tail(logs, 4000))
+				return fmt.Errorf("azd invoke failed (exit %d)", code)
+			}
+			delay := 15 * time.Second
+			if strings.Contains(out, "500") ||
+				strings.Contains(strings.ToLower(out), "internal server error") {
+				delay = 30 * time.Second // container may still be starting
+			}
+			r.t.Logf("invoke failed (exit %d); retrying in %s", code, delay)
+			if err := sleepCtx(ctx, delay); err != nil {
+				return err
+			}
+			continue
+		}
+
+		if !responseHasExpectedAnswer(agentResponseRegion(out)) {
+			if attempt < maxRetries {
+				r.t.Log("response missing expected '4'/'four'; retrying")
+				if err := sleepCtx(ctx, 15*time.Second); err != nil {
+					return err
+				}
+				continue
+			}
+			return fmt.Errorf("invoke response missing expected '4'/'four': %s", truncate(out, 200))
+		}
+
+		r.t.Log("invoke succeeded; response contains the expected answer")
+		return nil
+	}
+	return errors.New("invoke failed after all retries")
+}
+
+// teardown runs `azd down` so a run never leaves billable resources behind. It
+// uses a fresh context because the per-run deadline may already have fired.
+func (r *runner) teardown() {
+	if r.projectDir == "" {
+		r.projectDir = r.findProjectDir()
+	}
+	if r.projectDir == "" {
+		return
+	}
+	r.t.Log("teardown: azd down --force --purge")
+	_, code := r.runAzd(context.Background(), r.projectDir, teardownTimeout,
+		"down", "--force", "--purge", "--no-prompt")
+	if code != 0 {
+		r.t.Errorf("azd down failed (exit %d) — Azure resources may be leaked", code)
+	}
+}
+
+// runAzd runs an azd command in dir with a timeout, streaming combined output to
+// the test log and returning it along with the exit code.
+func (r *runner) runAzd(ctx context.Context, dir string, timeout time.Duration, args ...string) (string, int) {
+	cctx, cancel := context.WithTimeout(ctx, timeout)
+	defer cancel()
+
+	//nolint:gosec // azd is a trusted fixed binary; args are test-controlled.
+	cmd := exec.CommandContext(cctx, "azd", args...)
+	cmd.Dir = dir
+	cmd.Env = r.env
+
+	var buf bytes.Buffer
+	lw := &lineLogger{t: r.t}
+	cmd.Stdout = io.MultiWriter(&buf, lw)
+	// Same writer value as Stdout => os/exec uses one pipe and one copier
+	// goroutine, so there is no concurrent write to buf/lw.
+	cmd.Stderr = cmd.Stdout
+
+	err := cmd.Run()
+	lw.flush()
+	return buf.String(), exitCode(err)
+}
+
+// tagResourceGroupForCleanup best-effort stamps a DeleteAfter tag on the
+// provisioned resource group so the EngSys garbage collector can find and
+// delete it even when the explicit `azd down` teardown never runs. Failures are
+// logged and ignored: the tag is a safety net layered on top of teardown, not a
+// gate on the test. See the EngSys resource-management spec for the tag format.
+func (r *runner) tagResourceGroupForCleanup(ctx context.Context) {
+	vals := r.azdEnvValues(ctx)
+	rg := vals["AZURE_RESOURCE_GROUP"]
+	if rg == "" {
+		r.t.Log("skip DeleteAfter tag: AZURE_RESOURCE_GROUP not found in azd env")
+		return
+	}
+	// EngSys expects an RFC 3339 / ISO 8601 UTC instant; the group is reclaimed
+	// once that time has passed. `--set tags.DeleteAfter=` adds just this one
+	// tag, leaving azd's own tags (e.g. azd-env-name) intact.
+	deleteAfter := time.Now().UTC().Add(deleteAfterRetention).Format(time.RFC3339)
+	args := []string{"group", "update", "--name", rg,
+		"--set", "tags.DeleteAfter=" + deleteAfter, "--output", "none"}
+	if sub := vals["AZURE_SUBSCRIPTION_ID"]; sub != "" {
+		args = append(args, "--subscription", sub)
+	}
+	if out, code := r.runQuiet(ctx, r.projectDir, tagTimeout, "az", args...); code != 0 {
+		r.t.Logf("warning: could not tag resource group %q with DeleteAfter (exit %d): %s",
+			rg, code, truncate(strings.TrimSpace(out), 200))
+		return
+	}
+	r.t.Logf("tagged resource group %q with DeleteAfter=%s", rg, deleteAfter)
+}
+
+// azdEnvValues returns the project's azd environment as a key→value map. Output
+// is captured quietly (never streamed to the test log) because azd env values
+// can include provisioning secrets. A failure yields an empty map.
+func (r *runner) azdEnvValues(ctx context.Context) map[string]string {
+	out, code := r.runQuiet(ctx, r.projectDir, tagTimeout, "azd", "env", "get-values")
+	vals := map[string]string{}
+	if code != 0 {
+		r.t.Logf("warning: azd env get-values failed (exit %d)", code)
+		return vals
+	}
+	// Lines are KEY="value"; Cut on the first '=' so values containing '=' are
+	// preserved, then strip the surrounding quotes azd always emits.
+	for line := range strings.SplitSeq(out, "\n") {
+		key, val, ok := strings.Cut(strings.TrimSpace(line), "=")
+		if !ok {
+			continue
+		}
+		vals[strings.TrimSpace(key)] = strings.Trim(strings.TrimSpace(val), `"`)
+	}
+	return vals
+}
+
+// runQuiet runs name+args in dir with a timeout and returns combined output and
+// exit code WITHOUT streaming to the test log. Used for commands whose output
+// may carry secrets (`azd env get-values`) or is pure side effect (`az group
+// update`).
+func (r *runner) runQuiet(
+	ctx context.Context, dir string, timeout time.Duration, name string, args ...string,
+) (string, int) {
+	cctx, cancel := context.WithTimeout(ctx, timeout)
+	defer cancel()
+
+	//nolint:gosec // name and args are fixed, test-controlled values.
+	cmd := exec.CommandContext(cctx, name, args...)
+	cmd.Dir = dir
+	cmd.Env = r.env
+	out, err := cmd.CombinedOutput()
+	return string(out), exitCode(err)
+}
+
+// selectByText filters a survey list by typing target, waits (event-driven) for
+// the filtered list to stop redrawing, then confirms with Enter. This assumes
+// the survey / azd-core Select supports type-to-filter; that behavior is only
+// verifiable against a live run (documented in README). waitForQuiet's exited
+// result is intentionally ignored: a child that exited mid-select makes the
+// trailing Enter a harmless no-op on the closed pty.
+func (r *runner) selectByText(target string) {
+	r.c.send(target)
+	r.c.waitForQuiet(listSettle)
+	r.c.send(keyEnter)
+}
+
+// enter accepts a prompt's default by pressing Enter.
+func (r *runner) enter() {
+	r.c.send(keyEnter)
+}
+
+// createProject reports whether the run should create a fresh Foundry project.
+func (r *runner) createProject() bool {
+	return envTrue("E2E_CREATE_PROJECT")
+}
+
+// findProjectDir returns the first immediate subdirectory of testDir that
+// contains an azure.yaml (the project scaffolded by init), or "".
+func (r *runner) findProjectDir() string {
+	entries, err := os.ReadDir(r.testDir)
+	if err != nil {
+		return ""
+	}
+	for _, e := range entries {
+		if !e.IsDir() {
+			continue
+		}
+		dir := filepath.Join(r.testDir, e.Name())
+		if _, err := os.Stat(filepath.Join(dir, "azure.yaml")); err == nil {
+			return dir
+		}
+	}
+	return ""
+}
+
+// findServiceName reads the service name from the project's azure.yaml. azd
+// scaffolds exactly one service, so the sole key under services: is the name.
+func (r *runner) findServiceName() string {
+	dir := r.projectDir
+	if dir == "" {
+		dir = r.findProjectDir()
+	}
+	if dir == "" {
+		return ""
+	}
+	//nolint:gosec // azure.yaml path is under the test-controlled testDir.
+	data, err := os.ReadFile(filepath.Join(dir, "azure.yaml"))
+	if err != nil {
+		return ""
+	}
+	// A struct unmarshal is more robust than scanning lines: it tolerates
+	// comments and indentation changes that a naive parser would mishandle.
+	var proj struct {
+		Services map[string]any `yaml:"services"`
+	}
+	if err := yaml.Unmarshal(data, &proj); err != nil || len(proj.Services) == 0 {
+		return ""
+	}
+	for name := range proj.Services {
+		return name
+	}
+	return ""
+}
+
+// validateInitOutput confirms init produced an agent project on disk: a project
+// dir whose azure.yaml targets the agent host and a nested agent.yaml.
+func (r *runner) validateInitOutput() bool {
+	entries, err := os.ReadDir(r.testDir)
+	if err != nil {
+		return false
+	}
+	for _, e := range entries {
+		if !e.IsDir() {
+			continue
+		}
+		subdir := filepath.Join(r.testDir, e.Name())
+		//nolint:gosec // azure.yaml path is under the test-controlled testDir.
+		data, err := os.ReadFile(filepath.Join(subdir, "azure.yaml"))
+		if err != nil {
+			continue
+		}
+		content := string(data)
+		if strings.Contains(content, "host:") && strings.Contains(content, "azure.ai.agent") &&
+			hasAgentYAML(subdir) {
+			return true
+		}
+	}
+	return false
+}
+
+// hasAgentYAML reports whether an agent.yaml exists anywhere under root.
+func hasAgentYAML(root string) bool {
+	found := false
+	_ = filepath.WalkDir(root, func(_ string, d fs.DirEntry, err error) error {
+		if err != nil {
+			return nil
+		}
+		if !d.IsDir() && d.Name() == "agent.yaml" {
+			found = true
+			return filepath.SkipAll
+		}
+		return nil
+	})
+	return found
+}
+
+// lineLogger forwards a stream to t.Log one line at a time so long-running azd
+// output is visible live in the CI log.
+type lineLogger struct {
+	t   *testing.T
+	buf []byte
+}
+
+func (l *lineLogger) Write(p []byte) (int, error) {
+	l.buf = append(l.buf, p...)
+	for {
+		i := bytes.IndexByte(l.buf, '\n')
+		if i < 0 {
+			break
+		}
+		l.t.Log(strings.TrimRight(string(l.buf[:i]), "\r"))
+		l.buf = l.buf[i+1:]
+	}
+	return len(p), nil
+}
+
+func (l *lineLogger) flush() {
+	if len(l.buf) > 0 {
+		l.t.Log(strings.TrimRight(string(l.buf), "\r"))
+		l.buf = nil
+	}
+}
+
+// exitCode extracts a process exit code from an exec error (-1 if it never ran).
+func exitCode(err error) int {
+	if err == nil {
+		return 0
+	}
+	if ee, ok := errors.AsType[*exec.ExitError](err); ok {
+		return ee.ExitCode()
+	}
+	return -1
+}
+
+// ghToken resolves a GitHub token from the environment, falling back to `gh`.
+func ghToken() string {
+	for _, k := range []string{"GITHUB_TOKEN", "GH_TOKEN"} {
+		if v := os.Getenv(k); v != "" {
+			return v
+		}
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+	//nolint:gosec // gh is a trusted fixed binary; no user input in args.
+	out, err := exec.CommandContext(ctx, "gh", "auth", "token").Output()
+	if err != nil {
+		return ""
+	}
+	return strings.TrimSpace(string(out))
+}
+
+// shortHash returns a short, non-cryptographic uniqueness suffix for the agent
+// name (sha256 only to avoid noise from security scanners).
+func shortHash(mode string) string {
+	sum := sha256.Sum256(fmt.Appendf(nil, "%s-%d", mode, os.Getpid()))
+	return hex.EncodeToString(sum[:])[:6]
+}
+
+// assertSafeTestDir refuses a path that is not clearly a disposable test dir, so
+// a bad E2E_TESTDIR (e.g. "/", "/tmp", "$HOME") can never trigger a destructive
+// delete.
+func assertSafeTestDir(path string) error {
+	abs, err := filepath.Abs(path)
+	if err != nil {
+		return fmt.Errorf("resolve test dir: %w", err)
+	}
+	abs = filepath.Clean(abs)
+	protected := map[string]bool{
+		"/": true, "/tmp": true, "/var": true, "/usr": true, "/etc": true,
+		"/bin": true, "/lib": true, "/root": true, "/home": true,
+	}
+	if home, err := os.UserHomeDir(); err == nil && home != "" {
+		protected[filepath.Clean(home)] = true
+	}
+	if protected[abs] || strings.Count(abs, "/") < 2 {
+		return fmt.Errorf("refusing to delete unsafe test dir %q (resolved %q)", path, abs)
+	}
+	return nil
+}
+
+// useAzCliAuth reports whether to use the az CLI session for azd auth (CI), as
+// opposed to azd's built-in auth (local WSL).
+func useAzCliAuth() bool {
+	return envTrue("E2E_USE_AZ_CLI_AUTH") ||
+		os.Getenv("GITHUB_ACTIONS") != "" ||
+		os.Getenv("TF_BUILD") != ""
+}
+
+// getenvDefault returns the env var value, or def if unset/empty.
+func getenvDefault(key, def string) string {
+	if v := os.Getenv(key); v != "" {
+		return v
+	}
+	return def
+}
+
+// envTrue reports whether an env var is set to a truthy value.
+func envTrue(key string) bool {
+	switch strings.ToLower(strings.TrimSpace(os.Getenv(key))) {
+	case "1", "true", "yes":
+		return true
+	default:
+		return false
+	}
+}
+
+// sleepCtx sleeps for d unless ctx is cancelled first, returning ctx.Err() then.
+func sleepCtx(ctx context.Context, d time.Duration) error {
+	timer := time.NewTimer(d)
+	defer timer.Stop()
+	select {
+	case <-ctx.Done():
+		return ctx.Err()
+	case <-timer.C:
+		return nil
+	}
+}
+
+// truncate trims s and caps it to n characters with an ellipsis.
+func truncate(s string, n int) string {
+	s = strings.TrimSpace(s)
+	if len(s) <= n {
+		return s
+	}
+	return s[:n] + "..."
+}
+
+// tail returns the last n bytes of s with a leading ellipsis when truncated.
+func tail(s string, n int) string {
+	if len(s) <= n {
+		return s
+	}
+	return "..." + s[len(s)-n:]
+}
diff --git a/eng/pipelines/ext-azure-ai-agents-live.yml b/eng/pipelines/ext-azure-ai-agents-live.yml
new file mode 100644
index 00000000000..05b4b1713ae
--- /dev/null
+++ b/eng/pipelines/ext-azure-ai-agents-live.yml
@@ -0,0 +1,239 @@
+# Live E2E: azure.ai.agents extension — Tier 2 golden path
+#
+# Runs the full agent lifecycle (init -> provision -> deploy -> invoke -> down)
+# against LIVE Azure resources. The interactive `azd ai agent init` prompts are
+# driven by the Go pseudo-terminal test driver (go-expect + vt10x); the other
+# phases shell out to azd with --no-prompt. See tests/e2e-live/README.md.
+#
+# This pipeline is the live counterpart to the PR-gate checks in
+# `.github/workflows/lint-ext-azure-ai-agents.yml` (Tier 0 offline + Tier 1
+# recording/playback). Live Azure access is intentionally kept OUT of the
+# automatic PR pipeline (per Azure SDK EngSys / SFI guidance) and runs here only:
+#   - On demand via the PR comment:  /azp run ext-azure-ai-agents-live
+#     (requires write permission on the repo)
+#   - On the weekly schedule below.
+#
+# Required ADO setup (one-time, admin) — see tests/e2e-live/README.md:
+#   - Register this YAML as a pipeline named `ext-azure-ai-agents-live`.
+#   - Service connection (parameter `serviceConnection`, default `azure-sdk-tests`)
+#     must map to the shared TME test subscription with Contributor + the RBAC
+#     needed to create Foundry projects and deploy models.
+#   - GitHub clones of the starter template authenticate with the azure-sdk org
+#     secret `azuresdk-github-pat` (already provided by the Azure SDK ADO
+#     project) to avoid anonymous rate limits — no extra secret setup required.
+
+trigger: none
+pr: none
+
+schedules:
+  # 7am UTC Monday (offset from other weekly E2E pipelines to reduce contention).
+  - cron: "0 7 * * 1"
+    displayName: Weekly live golden-path E2E
+    branches:
+      include:
+        - main
+    always: true
+
+parameters:
+  - name: deployModes
+    displayName: "Tier 2 deploy modes"
+    type: string
+    default: both
+    values:
+      - both
+      - code
+      - container
+  - name: serviceConnection
+    displayName: "Azure service connection (TME subscription)"
+    type: string
+    default: azure-sdk-tests
+
+extends:
+  template: /eng/pipelines/templates/stages/1es-redirect.yml
+  parameters:
+    stages:
+      - stage: AiAgentsLiveE2E
+        displayName: AI Agents Live Golden Path
+        variables:
+          - template: /eng/pipelines/templates/variables/image.yml
+        jobs:
+          - job: Tier2
+            displayName: Tier 2 — init/provision/deploy/invoke/down
+            pool:
+              name: $(LINUXPOOL)
+              image: $(LINUXVMIMAGE)
+              os: linux
+            # Two golden paths (code + container) run sequentially (~13-15 min
+            # each in the typical case), plus build/provision overhead. The cap
+            # is sized for the worst case so an ungraceful job timeout never
+            # preempts the in-test teardown: 2x the per-mode 60 min runTimeout
+            # (tier2_live_test.go) + per-run cleanup + build/setup steps.
+            timeoutInMinutes: 150
+            steps:
+              - checkout: self
+
+              - template: /eng/pipelines/templates/steps/setup-go.yml
+
+              # Live build — NO `-tags=record`, so the CLI/extension talk to real
+              # Azure instead of the recording proxy used by the PR-gate tests.
+              - bash: go build -o azd .
+                workingDirectory: cli/azd
+                displayName: Build azd
+
+              - bash: go build -o azure-ai-agents .
+                workingDirectory: cli/azd/extensions/azure.ai.agents
+                displayName: Build azure.ai.agents extension
+
+              - bash: echo "##vso[task.prependpath]$(Build.SourcesDirectory)/cli/azd"
+                displayName: Add azd to PATH
+
+              # Install the freshly built (live, non-record) extension into the
+              # azd config dir: copy the binary where azd expects it and write a
+              # config.json so `azd ai agent` resolves the extension. The config
+              # is generated FROM extension.yaml via yq so the manifest fields
+              # (capabilities, namespace, usage, ...) can never drift from a
+              # hand-maintained copy here; only test-specific fields (path,
+              # source, sentinel version) are injected.
+              - bash: |
+                  set -euo pipefail
+                  # Map the agent architecture to azd's expected binary suffix so
+                  # this keeps working if the pool ever moves off linux/amd64.
+                  # Use backticks (not $(...)) for the command substitution so
+                  # Azure DevOps cannot mistake it for a $(macro) variable; assign
+                  # once and reference the plain shell var ($ARCH) thereafter.
+                  ARCH=`uname -m`
+                  case "$ARCH" in
+                    x86_64|amd64) GOARCH=amd64 ;;
+                    aarch64|arm64) GOARCH=arm64 ;;
+                    *) echo "Unsupported architecture: $ARCH" >&2; exit 1 ;;
+                  esac
+                  BIN_NAME="azure-ai-agents-linux-${GOARCH}"
+                  EXT_DIR="$HOME/.azd/extensions/azure.ai.agents"
+                  mkdir -p "$EXT_DIR"
+                  cp cli/azd/extensions/azure.ai.agents/azure-ai-agents "$EXT_DIR/$BIN_NAME"
+                  chmod +x "$EXT_DIR/$BIN_NAME"
+                  # yq ships on the azure-sdk Linux images; install the pinned
+                  # version as a fallback if a future image drops it.
+                  command -v yq >/dev/null 2>&1 || go install github.com/mikefarah/yq/v4@v4.44.3
+                  export BIN_NAME
+                  yq -o=json '
+                    .id as $id | {
+                      "extension": {"installed": {$id: {
+                        "id": .id,
+                        "namespace": .namespace,
+                        "capabilities": .capabilities,
+                        "displayName": .displayName,
+                        "description": .description,
+                        "version": "0.0.0-test",
+                        "usage": .usage,
+                        "path": "extensions/azure.ai.agents/" + env(BIN_NAME),
+                        "source": "azd"
+                      }}}
+                    }' cli/azd/extensions/azure.ai.agents/extension.yaml > "$HOME/.azd/config.json"
+                displayName: Install azure.ai.agents extension
+
+              # Run the live golden path INSIDE the AzureCLI@2 task so the az CLI
+              # session (consumed by azd via auth.useAzCliAuth) stays valid for the
+              # whole run. keepAzSessionActive is REQUIRED: the service connection
+              # uses Workload Identity Federation and the test runs well past the
+              # ~10 min default token lifetime. A separate login step would NOT
+              # work — AzureCLI@2 isolates AZURE_CONFIG_DIR to a task-temp dir, so
+              # the session does not persist to later plain bash steps.
+              - task: AzureCLI@2
+                displayName: Run Tier 2 live golden path
+                # Holds BOTH deploy modes run sequentially. `go test -timeout`
+                # (below) self-caps at 125 min — under this 130 min step budget —
+                # so the test process exits before ADO force-kills the step, and
+                # the per-mode 60 min runTimeout drives the graceful azd teardown.
+                timeoutInMinutes: 130
+                inputs:
+                  azureSubscription: ${{ parameters.serviceConnection }}
+                  keepAzSessionActive: true
+                  visibleAzLogin: false
+                  scriptType: bash
+                  scriptLocation: inlineScript
+                  workingDirectory: cli/azd/extensions/azure.ai.agents
+                  inlineScript: |
+                    set -euo pipefail
+                    azd config set auth.useAzCliAuth true
+                    # Assign first (not `export X=$(...)`, which hides command
+                    # substitution failures from set -e), then verify non-empty.
+                    E2E_SUBSCRIPTION="$(az account show --query id -o tsv)"
+                    E2E_TENANT="$(az account show --query tenantId -o tsv)"
+                    if [ -z "$E2E_SUBSCRIPTION" ] || [ -z "$E2E_TENANT" ]; then
+                      echo "ERROR: failed to resolve subscription/tenant from az account show" >&2
+                      exit 1
+                    fi
+                    export E2E_SUBSCRIPTION E2E_TENANT
+                    echo "Using subscription: $E2E_SUBSCRIPTION"
+                    mkdir -p "$(Build.ArtifactStagingDirectory)/logs"
+                    # Drive the live golden path through the Go pseudo-terminal
+                    # test driver. -v streams per-phase logs; -count=1 defeats the
+                    # test cache (a live test must always re-run); -timeout self-
+                    # caps the process under this step's budget so the per-mode
+                    # teardown (t.Cleanup) runs before ADO force-kills the step.
+                    go test -run TestTier2Live -count=1 -timeout 125m -v ./tests/e2e-live/ 2>&1 \
+                      | tee "$(Build.ArtifactStagingDirectory)/logs/tier2.log"
+                env:
+                  # Gate + mode selection consumed by tier2_live_test.go.
+                  AZURE_AI_AGENTS_E2E_LIVE: "1"
+                  E2E_DEPLOY_MODES: ${{ parameters.deployModes }}
+                  E2E_CREATE_PROJECT: "true"
+                  E2E_LOCATION: eastus2
+                  E2E_USE_AZ_CLI_AUTH: "true"
+                  # azure-sdk org PAT (ambient in the ADO project) used only to
+                  # avoid anonymous GitHub rate limits when cloning the template.
+                  GH_TOKEN: $(azuresdk-github-pat)
+
+              - task: PublishPipelineArtifact@1
+                condition: always()
+                inputs:
+                  targetPath: $(Build.ArtifactStagingDirectory)
+                  artifactName: tier2-live-logs-$(Build.BuildId)
+                displayName: Publish test logs
+
+              # Safety net for hard crashes / step timeout: the in-test teardown
+              # runs `azd down` already, but if the run died mid-way, force-purge
+              # any leftover project environments. Must run inside AzureCLI@2 so it
+              # is authenticated — the previous task's az session does not persist.
+              - task: AzureCLI@2
+                condition: always()
+                continueOnError: true
+                displayName: Cleanup leaked Azure resources
+                inputs:
+                  azureSubscription: ${{ parameters.serviceConnection }}
+                  keepAzSessionActive: true
+                  visibleAzLogin: false
+                  scriptType: bash
+                  scriptLocation: inlineScript
+                  inlineScript: |
+                    azd config set auth.useAzCliAuth true
+                    echo "Best-effort teardown of any leaked resources..."
+                    # RFC 3339 UTC instant for the EngSys DeleteAfter cleanup tag.
+                    delete_after=$(date -u -d '+2 days' +%Y-%m-%dT%H:%M:%SZ)
+                    for dir in /tmp/e2e-tests/tier2-*/; do
+                      [ -d "$dir" ] || continue
+                      proj=$(find "$dir" -maxdepth 2 -name azure.yaml -exec dirname {} \; | head -1)
+                      [ -n "$proj" ] || continue
+                      (
+                        cd "$proj"
+                        # Tag the resource group(s) before attempting `azd down` so
+                        # the EngSys garbage collector can still reclaim them if the
+                        # delete below fails - the cleanup pipeline keys off
+                        # DeleteAfter. Enumerate by azd env name rather than reading
+                        # AZURE_RESOURCE_GROUP from env values, which a provision that
+                        # times out may not have persisted yet; azd creates the group
+                        # as rg-<env> at the start of provisioning, so the name is
+                        # known even when the run dies mid-deploy.
+                        for d in .azure/*/; do
+                          [ -d "$d" ] || continue
+                          name=${d#.azure/}
+                          name=${name%/}
+                          rg="rg-$name"
+                          echo "Tagging $rg (env=$name) with DeleteAfter=$delete_after"
+                          az group update --name "$rg" \
+                            --set "tags.DeleteAfter=$delete_after" --output none || true
+                        done
+                        azd down --force --purge --no-prompt
+                      ) 2>&1 || true
+                    done