From 15af4670f10069681b316404c5aa027f89c1ad4f Mon Sep 17 00:00:00 2001
From: Pal Lakatos-Toth <pallakatos@github.com>
Date: Thu, 25 Jun 2026 22:10:16 +0200
Subject: [PATCH] feat(up): Foundry auto-setup, best-model selection, memory
 CRD parity + fix kars up hang
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make `kars up --foundry-endpoint` actually set up a BYO Foundry project for
Memory Store, stop hardcoding a stale model, and fix the post-deploy hang.

Foundry auto-setup (new cli/src/commands/up/foundry_setup.ts):
- Discover the project; list deployed models (ARM control-plane, no Graph).
- Pick the BEST deployed chat model instead of hardcoded gpt-4.1 (pure, tested
  ranking; --model always wins). Excludes embedding/image/audio.
- Ensure an embedding model (Memory Store needs one); best-effort deploy
  text-embedding-3-small if absent.
- Enable the project's system-assigned managed identity if missing (Memory
  Store authenticates internally as the project MI), then re-read principalId
  for the existing Azure AI User RBAC grant. All idempotent + non-fatal.

CRD parity + status:
- Emit a KarsMemory binding CR on `kars up` (Foundry endpoints only), matching
  what `kars dev` already creates (refs.ts buildKarsMemory/memoryRefName).
- Print a CRD status report (InferencePolicy/ToolPolicy/KarsMemory/KarsSandbox).

Fix the hang (two causes):
- cli/src/preflight.ts: the RBAC spinner was only concluded when
  fetchSubscriptionPermissions threw or returned a non-empty set; an empty []
  left it spinning, and its setInterval kept Node alive — `kars up` hung after
  the summary with the spinner still animating. Conclude it on the empty path.
  Also fix a second identical leak in the provider notFound path.
- up.ts: process.exit(0) on success (belt-and-suspenders for the detached
  kubectl port-forward handle).

Memory error unmasking (runtime):
- foundry.ts ensureStore uses the STRICT router call for POST /memory_stores so
  the real 403/400 surfaces (MI not enabled / RBAC propagating / no embedding
  model) instead of the generic "could not be created".

Security audit: docs/internal/security-audits/2026-06-25-foundry-autosetup-bestmodel-memory-spinner.md (2 sign-offs).
Verification: CLI tsc+oxlint clean, 831 tests (+10); runtime tsc+oxlint clean,
244 tests; model ranking validated against the live azureclaw-foundry set.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 cli/src/commands/up.ts                        |   6 +
 cli/src/commands/up/foundry_setup.test.ts     |  83 +++++
 cli/src/commands/up/foundry_setup.ts          | 294 ++++++++++++++++++
 cli/src/commands/up/sandbox_bringup.ts        |  99 ++++--
 cli/src/preflight.ts                          |  21 +-
 cli/src/refs.ts                               |  40 +++
 ...ndry-autosetup-bestmodel-memory-spinner.md | 100 ++++++
 .../openclaw/src/core/agt-tools/foundry.ts    |  11 +-
 8 files changed, 620 insertions(+), 34 deletions(-)
 create mode 100644 cli/src/commands/up/foundry_setup.test.ts
 create mode 100644 cli/src/commands/up/foundry_setup.ts
 create mode 100644 docs/internal/security-audits/2026-06-25-foundry-autosetup-bestmodel-memory-spinner.md

diff --git a/cli/src/commands/up.ts b/cli/src/commands/up.ts
index 079bd5e8..b50d8c29 100644
--- a/cli/src/commands/up.ts
+++ b/cli/src/commands/up.ts
@@ -975,6 +975,12 @@ Auto-resume:
           registryMode, globalRegistryUrl, globalRelayUrl,
         });
 
+        // Explicit success exit. Some `az`/REST calls leave keep-alive sockets
+        // (and we spawn a detached kubectl port-forward), so the event loop
+        // wouldn't drain on its own — without this the command hangs after the
+        // deployment summary instead of returning to the shell.
+        process.exit(0);
+
       } catch (error) {
         stepper.stop();
         console.error(chalk.red(`\n  Deployment failed`));
diff --git a/cli/src/commands/up/foundry_setup.test.ts b/cli/src/commands/up/foundry_setup.test.ts
new file mode 100644
index 00000000..4f9c61dd
--- /dev/null
+++ b/cli/src/commands/up/foundry_setup.test.ts
@@ -0,0 +1,83 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+import { describe, it, expect } from "vitest";
+import {
+  scoreChatModel,
+  pickBestChatModel,
+  findEmbeddingModel,
+  parseFoundryEndpoint,
+  type FoundryDeployment,
+} from "./foundry_setup.js";
+
+const dep = (name: string, modelName = name): FoundryDeployment => ({
+  name,
+  modelName,
+  modelVersion: "1",
+});
+
+describe("scoreChatModel", () => {
+  it("excludes non-chat models", () => {
+    expect(scoreChatModel("text-embedding-3-small")).toBeNull();
+    expect(scoreChatModel("gpt-image-1")).toBeNull();
+    expect(scoreChatModel("FLUX.2-pro")).toBeNull();
+    expect(scoreChatModel("whisper")).toBeNull();
+    expect(scoreChatModel("tts-1")).toBeNull();
+  });
+
+  it("ranks newer families above older", () => {
+    expect(scoreChatModel("gpt-5.4")!).toBeGreaterThan(scoreChatModel("gpt-4.1")!);
+    expect(scoreChatModel("gpt-5.4")!).toBeGreaterThan(scoreChatModel("gpt-5")!);
+    expect(scoreChatModel("gpt-5")!).toBeGreaterThan(scoreChatModel("gpt-4o")!);
+  });
+
+  it("prefers the plain flagship over variants within a family", () => {
+    const plain = scoreChatModel("gpt-5.4")!;
+    expect(plain).toBeGreaterThan(scoreChatModel("gpt-5.4-pro")!);
+    expect(scoreChatModel("gpt-5.4-pro")!).toBeGreaterThan(scoreChatModel("gpt-5.4-chat")!);
+    expect(scoreChatModel("gpt-5.4-chat")!).toBeGreaterThan(scoreChatModel("gpt-5.4-mini")!);
+    expect(scoreChatModel("gpt-5.4-mini")!).toBeGreaterThan(scoreChatModel("gpt-5.4-nano")!);
+  });
+});
+
+describe("pickBestChatModel", () => {
+  it("picks the flagship from a realistic deployment set", () => {
+    const deployments = [
+      "gpt-5-mini", "text-embedding-3-small", "gpt-4.1", "gpt-5.4-mini",
+      "gpt-5.3-chat", "FLUX.2-pro", "gpt-image-1", "gpt-5.4-pro", "gpt-5.4",
+    ].map((n) => dep(n));
+    expect(pickBestChatModel(deployments)?.name).toBe("gpt-5.4");
+  });
+
+  it("returns undefined when no chat model is deployed", () => {
+    expect(pickBestChatModel([dep("text-embedding-3-small"), dep("gpt-image-1")])).toBeUndefined();
+  });
+
+  it("uses the deployment name when modelName is itself non-chat-looking", () => {
+    // deployment named "my-gpt5" wrapping model "gpt-5.4"
+    const d: FoundryDeployment = { name: "primary", modelName: "gpt-5.4", modelVersion: "1" };
+    expect(pickBestChatModel([d])?.name).toBe("primary");
+  });
+});
+
+describe("findEmbeddingModel", () => {
+  it("prefers 3-large over 3-small over ada", () => {
+    const deployments = [dep("ada-002", "text-embedding-ada-002"), dep("small", "text-embedding-3-small"), dep("large", "text-embedding-3-large")];
+    expect(findEmbeddingModel(deployments)?.name).toBe("large");
+  });
+  it("returns undefined when no embedding deployed", () => {
+    expect(findEmbeddingModel([dep("gpt-5.4")])).toBeUndefined();
+  });
+});
+
+describe("parseFoundryEndpoint", () => {
+  it("parses account + project from a Foundry project endpoint", () => {
+    expect(
+      parseFoundryEndpoint("https://azureclaw-foundry-services.services.ai.azure.com/api/projects/azureclaw"),
+    ).toEqual({ accountName: "azureclaw-foundry-services", projectName: "azureclaw" });
+  });
+  it("returns null for a non-project endpoint", () => {
+    expect(parseFoundryEndpoint("https://foo.openai.azure.com")).toBeNull();
+    expect(parseFoundryEndpoint("not a url")).toBeNull();
+  });
+});
diff --git a/cli/src/commands/up/foundry_setup.ts b/cli/src/commands/up/foundry_setup.ts
new file mode 100644
index 00000000..8e685304
--- /dev/null
+++ b/cli/src/commands/up/foundry_setup.ts
@@ -0,0 +1,294 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+// up/foundry_setup.ts — make a BYO ("--foundry-endpoint") Foundry project
+// actually usable by kars, instead of assuming it's pre-configured.
+//
+// What this does (all idempotent, all read-mostly except the two explicit
+// provisioning steps which are gated + best-effort):
+//   1. Resolve the AI Services account + project from the endpoint URL.
+//   2. List the project's deployed models (ARM control-plane — works with the
+//      caller's existing `az login`, no Microsoft Graph).
+//   3. Pick the BEST deployed chat model for the agent (so we stop hardcoding a
+//      stale gpt-4.1). The user's explicit `--model` always wins.
+//   4. Ensure an embedding model is deployed (Foundry Memory Store needs one);
+//      best-effort deploy `text-embedding-3-small` if none exists.
+//   5. Enable the project's system-assigned managed identity if it's missing
+//      (Memory Store authenticates internally as the PROJECT MI) and re-read its
+//      principalId so the caller can grant it `Azure AI User` on the RG.
+//
+// Nothing here aborts the deploy: every failure degrades to a clear note so the
+// sandbox still comes up and the operator gets actionable remediation.
+
+import type { Stepper } from "../../stepper.js";
+
+/** One deployed model on the Foundry/AI-Services account (ARM shape). */
+export interface FoundryDeployment {
+  /** Deployment name — what you put in the request `model` field. */
+  name: string;
+  /** Underlying model name (e.g. "gpt-5.4"). */
+  modelName: string;
+  /** Model version. */
+  modelVersion: string;
+}
+
+export interface FoundrySetupResult {
+  accountName: string;
+  accountResourceId: string;
+  resourceGroup: string;
+  projectName: string;
+  /** Best deployed chat model deployment name, or undefined if none found. */
+  bestChatModel?: string;
+  /** Embedding deployment name in use (existing or just-created), or undefined. */
+  embeddingModel?: string;
+  /** Project system-assigned MI principalId (after any enable), or "". */
+  projectMiPrincipalId: string;
+  /** True if this run enabled the MI (was previously off). */
+  miJustEnabled: boolean;
+  /** Human-readable status notes for the deployment report. */
+  notes: string[];
+}
+
+/**
+ * Score a deployed model for use as an interactive, tool-using agent's chat
+ * model. Returns a number (higher = better) or `null` when the model is not a
+ * chat model (embeddings, image, audio, …) and must be excluded.
+ *
+ * Ranking: family/version dominates; within a family the plain flagship beats
+ * `-pro`/`-chat`/`-mini`/`-nano`, because for a tool-calling agent the flagship
+ * general model is the most reliable default (reasoning-`pro` variants are
+ * slower/pricier and `mini`/`nano` are weaker). `--model` overrides all of this.
+ */
+export function scoreChatModel(modelName: string): number | null {
+  const n = modelName.toLowerCase();
+
+  // Hard-exclude anything that isn't a text chat model.
+  const NON_CHAT =
+    /(embedding|image|dall-?e|flux|whisper|tts|audio|realtime|sora|moderation|rerank|transcrib|stable-?diffusion)/;
+  if (NON_CHAT.test(n)) return null;
+
+  // Family/version score.
+  let family: number;
+  const gpt = n.match(/^gpt-(\d+)(?:\.(\d+))?/);
+  const oSeries = n.match(/^o(\d+)/);
+  if (gpt) {
+    const major = parseInt(gpt[1], 10);
+    const minor = gpt[2] ? parseInt(gpt[2], 10) : 0;
+    family = major * 100 + minor; // gpt-5.4 → 504, gpt-4.1 → 401, gpt-4o → 400
+  } else if (oSeries) {
+    family = 300 + parseInt(oSeries[1], 10) * 10; // o3 → 330, o4 → 340 (below gpt-5)
+  } else {
+    family = 50; // unknown family — keep, but rank low.
+  }
+
+  // Variant adjustment (plain flagship preferred for agent tool-use).
+  let variant: number;
+  if (/-pro\b/.test(n)) variant = 3;
+  else if (/-chat\b/.test(n)) variant = 2;
+  else if (/-mini\b/.test(n)) variant = 1;
+  else if (/-nano\b/.test(n)) variant = 0;
+  else variant = 4; // plain flagship
+
+  return family * 10 + variant;
+}
+
+/** Pick the best chat-capable deployment, or undefined if none qualify. */
+export function pickBestChatModel(
+  deployments: FoundryDeployment[],
+): FoundryDeployment | undefined {
+  let best: { dep: FoundryDeployment; score: number } | undefined;
+  for (const dep of deployments) {
+    const score = scoreChatModel(dep.modelName) ?? scoreChatModel(dep.name);
+    if (score === null || score === undefined) continue;
+    if (!best || score > best.score) best = { dep, score };
+  }
+  return best?.dep;
+}
+
+/** Find an embedding deployment, preferring 3-large > 3-small > ada. */
+export function findEmbeddingModel(
+  deployments: FoundryDeployment[],
+): FoundryDeployment | undefined {
+  const embeds = deployments.filter((d) =>
+    /embedding/i.test(d.modelName) || /embedding/i.test(d.name),
+  );
+  if (embeds.length === 0) return undefined;
+  const rank = (d: FoundryDeployment): number => {
+    const n = `${d.modelName} ${d.name}`.toLowerCase();
+    if (n.includes("3-large")) return 3;
+    if (n.includes("3-small")) return 2;
+    if (n.includes("ada")) return 1;
+    return 0;
+  };
+  return embeds.sort((a, b) => rank(b) - rank(a))[0];
+}
+
+/** Parse "https://<acct>.services.ai.azure.com/api/projects/<proj>" → parts. */
+export function parseFoundryEndpoint(
+  endpoint: string,
+): { accountName: string; projectName: string } | null {
+  try {
+    const u = new URL(endpoint);
+    const accountName = u.hostname.split(".")[0];
+    const m = u.pathname.match(/\/api\/projects\/([^/]+)/);
+    if (!accountName || !m) return null;
+    return { accountName, projectName: m[1] };
+  } catch {
+    return null;
+  }
+}
+
+type Execa = typeof import("execa").execa;
+
+/**
+ * Discover + (best-effort) provision the BYO Foundry project so kars Memory
+ * Store and the agent model "just work". Returns null when the endpoint isn't a
+ * Foundry project endpoint (e.g. plain Azure OpenAI) — the caller keeps its
+ * existing behaviour in that case.
+ */
+export async function setupFoundryForKars(args: {
+  execa: Execa;
+  stepper: Stepper;
+  foundryEndpoint: string;
+}): Promise<FoundrySetupResult | null> {
+  const { execa, stepper, foundryEndpoint } = args;
+  const parsed = parseFoundryEndpoint(foundryEndpoint);
+  if (!parsed) return null;
+  const { accountName, projectName } = parsed;
+  const notes: string[] = [];
+
+  // 1. Resolve the account ARM id + resource group.
+  stepper.update("Discovering Foundry project...");
+  const { stdout: acctJson } = await execa("az", [
+    "cognitiveservices", "account", "list",
+    "--query", `[?name=='${accountName}'].{id:id, rg:resourceGroup} | [0]`,
+    "--output", "json",
+  ], { stdio: "pipe" }).catch(() => ({ stdout: "{}" }));
+  const acct = JSON.parse((acctJson || "{}").trim() || "{}");
+  const accountResourceId: string = acct.id || "";
+  const resourceGroup: string = acct.rg || "";
+  if (!accountResourceId || !resourceGroup) {
+    notes.push(
+      `Could not resolve the Foundry account '${accountName}' in this subscription — ` +
+        "skipping Foundry auto-setup (the sandbox will still deploy).",
+    );
+    return {
+      accountName, accountResourceId: "", resourceGroup: "", projectName,
+      projectMiPrincipalId: "", miJustEnabled: false, notes,
+    };
+  }
+
+  // 2. List deployed models (ARM control-plane).
+  let deployments: FoundryDeployment[] = [];
+  try {
+    const { stdout: depJson } = await execa("az", [
+      "rest", "--method", "get",
+      "--url", `${accountResourceId}/deployments?api-version=2024-10-01`,
+    ], { stdio: "pipe" });
+    const raw = JSON.parse(depJson.trim());
+    deployments = (raw.value ?? []).map((d: {
+      name: string;
+      properties?: { model?: { name?: string; version?: string } };
+    }) => ({
+      name: d.name,
+      modelName: d.properties?.model?.name ?? d.name,
+      modelVersion: d.properties?.model?.version ?? "",
+    }));
+  } catch {
+    notes.push("Could not list Foundry model deployments (continuing with defaults).");
+  }
+
+  // 3. Best chat model.
+  const best = pickBestChatModel(deployments);
+  const bestChatModel = best?.name;
+  if (bestChatModel) {
+    stepper.detail("info", `Best deployed chat model: ${bestChatModel}`);
+  }
+
+  // 4. Ensure an embedding model (Memory Store needs one).
+  let embeddingModel = findEmbeddingModel(deployments)?.name;
+  if (!embeddingModel && accountResourceId) {
+    stepper.update("No embedding model deployed — deploying text-embedding-3-small...");
+    const ok = await execa("az", [
+      "cognitiveservices", "account", "deployment", "create",
+      "--name", accountName,
+      "--resource-group", resourceGroup,
+      "--deployment-name", "text-embedding-3-small",
+      "--model-name", "text-embedding-3-small",
+      "--model-version", "1",
+      "--model-format", "OpenAI",
+      "--sku-name", "Standard",
+      "--sku-capacity", "50",
+      "--output", "none",
+    ], { stdio: "pipe" }).then(() => true).catch(() => false);
+    if (ok) {
+      embeddingModel = "text-embedding-3-small";
+      notes.push("Deployed embedding model 'text-embedding-3-small' for Memory Store.");
+    } else {
+      notes.push(
+        "No embedding model is deployed and auto-deploy failed (quota/permissions?). " +
+          "Memory Store needs one — deploy 'text-embedding-3-small' in the Foundry portal.",
+      );
+    }
+  }
+
+  // 5. Ensure the project's system-assigned MI (Memory Store authenticates
+  //    internally as the PROJECT MI).
+  const projectUrl = `${accountResourceId}/projects/${projectName}?api-version=2025-06-01`;
+  let projectMiPrincipalId = "";
+  let miJustEnabled = false;
+  try {
+    const { stdout: projJson } = await execa("az", [
+      "rest", "--method", "get", "--url", projectUrl,
+    ], { stdio: "pipe" });
+    projectMiPrincipalId = JSON.parse(projJson.trim())?.identity?.principalId || "";
+  } catch {
+    // Fall through to enable attempt.
+  }
+
+  if (!projectMiPrincipalId) {
+    stepper.update("Enabling Foundry project managed identity (for Memory Store)...");
+    const enabled = await execa("az", [
+      "rest", "--method", "patch", "--url", projectUrl,
+      "--body", JSON.stringify({ identity: { type: "SystemAssigned" } }),
+    ], { stdio: "pipe" }).then(() => true).catch(() => false);
+
+    if (enabled) {
+      // The principalId may take a few seconds to populate after enabling.
+      for (let i = 0; i < 6 && !projectMiPrincipalId; i++) {
+        await new Promise((r) => setTimeout(r, 3000));
+        const { stdout: pj } = await execa("az", [
+          "rest", "--method", "get", "--url", projectUrl,
+        ], { stdio: "pipe" }).catch(() => ({ stdout: "{}" }));
+        projectMiPrincipalId = JSON.parse((pj || "{}").trim() || "{}")?.identity?.principalId || "";
+      }
+      if (projectMiPrincipalId) {
+        miJustEnabled = true;
+        notes.push("Enabled the Foundry project's system-assigned managed identity.");
+      } else {
+        notes.push(
+          "Enabled the Foundry project MI but its principalId hasn't populated yet — " +
+            "Memory Store RBAC will be granted on the next `kars up` run.",
+        );
+      }
+    } else {
+      notes.push(
+        "Foundry project has no system-assigned MI and kars couldn't enable it " +
+          "(needs Contributor on the project). Enable it: Portal → Project → " +
+          "Resource Management → Identity → System assigned → On, then re-run `kars up`.",
+      );
+    }
+  }
+
+  return {
+    accountName,
+    accountResourceId,
+    resourceGroup,
+    projectName,
+    bestChatModel,
+    embeddingModel,
+    projectMiPrincipalId,
+    miJustEnabled,
+    notes,
+  };
+}
diff --git a/cli/src/commands/up/sandbox_bringup.ts b/cli/src/commands/up/sandbox_bringup.ts
index b73a23a2..78aee629 100644
--- a/cli/src/commands/up/sandbox_bringup.ts
+++ b/cli/src/commands/up/sandbox_bringup.ts
@@ -19,6 +19,7 @@ import { saveContext } from "../../config.js";
 import {
   buildInferencePolicy,
   buildToolPolicy,
+  buildKarsMemory,
   inferenceRefName,
   toolPolicyRefName,
 } from "../../refs.js";
@@ -139,40 +140,57 @@ export async function bringUpSandbox(ctx: SandboxBringUpContext): Promise<void>
   //   1. Sandbox WI → Azure AI User on the Foundry AI Services resource (so pods can call APIs)
   //   2. Foundry project MI → Azure AI User on the resource group (so Memory Store can call models internally)
   if (foundryEndpoint) {
-    stepper.update("Configuring Foundry project RBAC (via Bicep)...");
+    stepper.update("Configuring Foundry project (discovery + setup + RBAC)...");
+
+    // Discover + best-effort provision the BYO Foundry project: pick the best
+    // deployed chat model, ensure an embedding model, and enable the project's
+    // system-assigned MI (Memory Store authenticates internally as the project
+    // MI). All idempotent + non-fatal — see foundry_setup.ts.
+    const { setupFoundryForKars } = await import("./foundry_setup.js");
+    const foundrySetup = await setupFoundryForKars({
+      execa, stepper, foundryEndpoint,
+    }).catch(() => null);
+
+    // Adopt the best deployed chat model unless the user explicitly set --model.
+    const modelExplicit = process.argv.includes("--model");
+    if (foundrySetup?.bestChatModel && !modelExplicit) {
+      if (foundrySetup.bestChatModel !== options.model) {
+        stepper.detail("info", `Using best deployed model '${foundrySetup.bestChatModel}' (was default '${options.model}'; pass --model to override)`);
+      }
+      options.model = foundrySetup.bestChatModel;
+    }
+    for (const note of foundrySetup?.notes ?? []) {
+      stepper.detail("info", note);
+    }
+
     const foundryHost = new URL(foundryEndpoint).hostname;
     // Extract account name: "foo.services.ai.azure.com" → "foo", or "foo.openai.azure.com" → "foo"
-    const foundryAccountName = foundryHost.split(".")[0];
+    const foundryAccountName = foundrySetup?.accountName || foundryHost.split(".")[0];
 
     // Extract project name from URL path: "/api/projects/bar" → "bar"
     const foundryUrl = new URL(foundryEndpoint);
     const projectMatch = foundryUrl.pathname.match(/\/api\/projects\/([^/]+)/);
-    const foundryProjectName = projectMatch ? projectMatch[1] : "";
-
-    // Find the Foundry AI Services account and its resource group
-    const { stdout: foundryAccountJson } = await execa("az", [
-      "cognitiveservices", "account", "list",
-      "--query", `[?name=='${foundryAccountName}'].{id:id, rg:resourceGroup} | [0]`,
-      "--output", "json",
-    ], { stdio: "pipe" }).catch(() => ({ stdout: "{}" }));
-
-    const foundryAccount = JSON.parse(foundryAccountJson.trim() || "{}");
-    const foundryResourceId = foundryAccount.id || "";
-    const foundryRg = foundryAccount.rg || "";
+    const foundryProjectName = foundrySetup?.projectName || (projectMatch ? projectMatch[1] : "");
+
+    // Account ARM id + resource group — reuse the discovery result, else resolve.
+    let foundryResourceId = foundrySetup?.accountResourceId || "";
+    let foundryRg = foundrySetup?.resourceGroup || "";
+    if (!foundryResourceId || !foundryRg) {
+      const { stdout: foundryAccountJson } = await execa("az", [
+        "cognitiveservices", "account", "list",
+        "--query", `[?name=='${foundryAccountName}'].{id:id, rg:resourceGroup} | [0]`,
+        "--output", "json",
+      ], { stdio: "pipe" }).catch(() => ({ stdout: "{}" }));
+      const foundryAccount = JSON.parse(foundryAccountJson.trim() || "{}");
+      foundryResourceId = foundryAccount.id || "";
+      foundryRg = foundryAccount.rg || "";
+    }
 
     if (foundryResourceId && foundryRg && foundryProjectName) {
-      // Query the project's managed identity principal ID via ARM REST API
-      let projectMiPrincipalId = "";
-      try {
-        const { stdout: projectJson } = await execa("az", [
-          "rest", "--method", "get",
-          "--url", `${foundryResourceId}/projects/${foundryProjectName}?api-version=2025-06-01`,
-        ], { stdio: "pipe" });
-        const project = JSON.parse(projectJson.trim());
-        projectMiPrincipalId = project?.identity?.principalId || "";
-      } catch {
-        // Project may not have system MI enabled — warn but continue
-      }
+      // Project MI principalId — resolved (and, if it was off, enabled) by the
+      // discovery step above.
+      const projectMiPrincipalId = foundrySetup?.projectMiPrincipalId || "";
+
 
       // Get the sandbox workload identity principal ID
       let sandboxWiPrincipalId = "";
@@ -383,7 +401,7 @@ export async function bringUpSandbox(ctx: SandboxBringUpContext): Promise<void>
         try { unlinkSync(tmpBicep); } catch {}
       }
 
-      if (!projectMiPrincipalId) {
+      if (!projectMiPrincipalId && !foundrySetup) {
         console.log(chalk.yellow("\n  ⚠ Foundry project has no system-assigned MI. Memory Store will not work."));
         console.log(chalk.yellow("    Enable it: Portal → Project → Resource Management → Identity → System assigned → On"));
         console.log(chalk.yellow("    Then re-run: kars up ...\n"));
@@ -498,16 +516,41 @@ export async function bringUpSandbox(ctx: SandboxBringUpContext): Promise<void>
       },
     },
   };
+  // KarsMemory binding — only meaningful with a Foundry project endpoint
+  // (Memory Store is a Foundry feature). Gives the sandbox the same
+  // controller-managed binding `kars dev` creates, instead of relying purely
+  // on the runtime's lazy store creation.
+  const memoryCr = foundryEndpoint
+    ? buildKarsMemory({ sandboxName: options.name, namespace: sandboxNamespace })
+    : null;
+
   const bundleManifest = {
     apiVersion: "v1",
     kind: "List",
-    items: [inferencePolicy, toolPolicy, sandboxManifest],
+    items: [inferencePolicy, toolPolicy, ...(memoryCr ? [memoryCr] : []), sandboxManifest],
   };
   await execa("kubectl", ["apply", "-f", "-"], {
     input: JSON.stringify(bundleManifest),
     stdio: ["pipe", "pipe", "pipe"],
   });
 
+  // ── CRD status report — confirm each resource applied + its phase ──
+  stepper.detail("ok", "Applied CRDs:");
+  const crdChecks: Array<{ kind: string; name: string; phasePath: string }> = [
+    { kind: "inferencepolicy", name: inferenceRefName(options.name), phasePath: "{.status.phase}" },
+    { kind: "toolpolicy", name: toolPolicyRefName(options.name), phasePath: "{.status.phase}" },
+    ...(memoryCr ? [{ kind: "karsmemory", name: (memoryCr.metadata as { name: string }).name, phasePath: "{.status.phase}" }] : []),
+    { kind: "karssandbox", name: options.name, phasePath: "{.status.phase}" },
+  ];
+  for (const c of crdChecks) {
+    const { stdout: phase } = await execa("kubectl", [
+      "get", c.kind, c.name, "-n", sandboxNamespace,
+      "-o", `jsonpath=${c.phasePath}`,
+    ], { stdio: "pipe" }).catch(() => ({ stdout: "" }));
+    const ph = phase.trim();
+    stepper.detail(ph && ph !== "Failed" ? "ok" : "info", `  ${c.kind}/${c.name}${ph ? ` — ${ph}` : " — applied"}`);
+  }
+
   // ── Step 8: Wait for sandbox ─────────────────────────────────
   stepper.step("Waiting for sandbox to start...");
   await execa("kubectl", [
diff --git a/cli/src/preflight.ts b/cli/src/preflight.ts
index f51d07d7..dfa6674a 100644
--- a/cli/src/preflight.ts
+++ b/cli/src/preflight.ts
@@ -248,6 +248,17 @@ export async function runPreflightChecks(opts: PreflightOptions): Promise<Prefli
         `Grant the current user sufficient RBAC. At minimum you need the roles:\n      ${REMEDIATION_ROLES.map((r) => chalk.cyan(r)).join("\n      ")}\n\n      Ask your subscription Owner / Global Admin to run:\n      ${chalk.cyan(`az role assignment create --assignee ${account.user?.name ?? "<your-user>"} --role "Contributor" --scope /subscriptions/${account.id}`)}\n      ${chalk.cyan(`az role assignment create --assignee ${account.user?.name ?? "<your-user>"} --role "User Access Administrator" --scope /subscriptions/${account.id}`)}`
       );
     }
+  } else if (spin.isSpinning) {
+    // `fetchSubscriptionPermissions` returned an empty set WITHOUT throwing
+    // (e.g. the ARM `elevateAccess`/permissions call returns `value: []`).
+    // Neither branch above runs, so without this the spinner is never
+    // concluded — its `setInterval` keeps the Node event loop alive and the
+    // whole `kars up` hangs after the summary (and the spinner animates the
+    // entire run). Conclude it and treat as inconclusive, not blocking.
+    spin.info("RBAC — effective permissions inconclusive (continuing)");
+    result.warnings.push(
+      "RBAC check inconclusive (no effective permissions returned). If `up` fails with an authorization error, re-run with Contributor + User Access Administrator.",
+    );
   }
 
   // 3. Resource providers
@@ -281,9 +292,13 @@ export async function runPreflightChecks(opts: PreflightOptions): Promise<Prefli
       );
     }
     if (notFound.length > 0) {
-      spin = ora().fail(
-        `Resource providers — could not verify ${notFound.length} (${notFound.map((p) => p.ns).join(", ")})`
-      );
+      const msg = `Resource providers — could not verify ${notFound.length} (${notFound.map((p) => p.ns).join(", ")})`;
+      // Conclude the EXISTING provider spinner rather than replacing the
+      // reference with a fresh `ora()` — the old `spin = ora().fail(...)`
+      // orphaned the still-spinning provider spinner whenever `pending` was
+      // empty, leaking a `setInterval` that kept the process alive.
+      if (spin.isSpinning) spin.fail(msg);
+      else ora().fail(msg);
       result.warnings.push(
         `Could not read registration state for: ${notFound.map((p) => p.ns).join(", ")}. Verify network access to management.azure.com.`
       );
diff --git a/cli/src/refs.ts b/cli/src/refs.ts
index c33966d8..7b998170 100644
--- a/cli/src/refs.ts
+++ b/cli/src/refs.ts
@@ -27,6 +27,46 @@ export const inferenceRefName = (sandboxName: string) =>
 export const toolPolicyRefName = (sandboxName: string) =>
   kebabRefName(sandboxName, "-toolpolicy");
 
+export const memoryRefName = (sandboxName: string) =>
+  kebabRefName(sandboxName, "-memory");
+
+/** Foundry Memory Store name for a sandbox — matches the runtime convention
+ *  `memory-<sandbox>` in runtimes/openclaw memory-binding.ts. DNS-label safe. */
+export const memoryStoreName = (sandboxName: string) =>
+  kebabRefName(sandboxName, "").replace(/^/, "memory-").slice(0, 63).replace(/-+$/g, "");
+
+export interface KarsMemoryOpts {
+  sandboxName: string;
+  namespace: string;
+  retentionDays?: number;
+}
+
+/**
+ * Build a KarsMemory CR so a `kars up` sandbox gets the same controller-managed
+ * Foundry Memory Store binding that `kars dev` already creates. Without it the
+ * runtime falls back to lazy store creation with no declarative binding.
+ */
+export function buildKarsMemory(opts: KarsMemoryOpts): Record<string, unknown> {
+  const store = memoryStoreName(opts.sandboxName);
+  return {
+    apiVersion: "kars.azure.com/v1alpha1",
+    kind: "KarsMemory",
+    metadata: {
+      name: memoryRefName(opts.sandboxName),
+      namespace: opts.namespace,
+      labels: { "kars.azure.com/sandbox": opts.sandboxName },
+    },
+    spec: {
+      sandboxRef: { name: opts.sandboxName },
+      storeName: store,
+      scope: `agent:${opts.sandboxName}`,
+      retentionDays: opts.retentionDays ?? 30,
+      deleteOnSandboxDelete: true,
+      displayName: `Default memory for ${opts.sandboxName}`,
+    },
+  };
+}
+
 export interface InferencePolicyOpts {
   sandboxName: string;
   namespace: string;
diff --git a/docs/internal/security-audits/2026-06-25-foundry-autosetup-bestmodel-memory-spinner.md b/docs/internal/security-audits/2026-06-25-foundry-autosetup-bestmodel-memory-spinner.md
new file mode 100644
index 00000000..516e7014
--- /dev/null
+++ b/docs/internal/security-audits/2026-06-25-foundry-autosetup-bestmodel-memory-spinner.md
@@ -0,0 +1,100 @@
+# Security Audit — Foundry auto-setup, best-model selection, memory CRD parity, preflight spinner-leak fix
+
+Date: 2026-06-25
+Scope:
+- NEW `cli/src/commands/up/foundry_setup.ts` (+ `foundry_setup.test.ts`)
+- `cli/src/commands/up/sandbox_bringup.ts` (wire foundry setup; KarsMemory CR; CRD status report)
+- `cli/src/commands/up.ts` (`process.exit(0)` on success)
+- `cli/src/preflight.ts` (ora spinner-leak fixes)
+- `cli/src/refs.ts` (`buildKarsMemory`, `memoryRefName`, `memoryStoreName`)
+- `runtimes/openclaw/src/core/agt-tools/foundry.ts` (surface real Memory Store create error)
+
+Gated paths (CI `security-audit-required`): `cli/src/commands/*`, `runtimes/openclaw/src/core/*`.
+
+## Summary
+
+Closes the gap where `kars up --foundry-endpoint` assumed a fully-configured Foundry
+project. The deploy now discovers the project, picks the best deployed chat model,
+ensures an embedding model, enables the project's system-assigned managed identity,
+creates a KarsMemory binding CR (parity with `kars dev`), reports CRD status, and
+exits cleanly. The runtime now surfaces the real reason a Memory Store can't be
+created instead of a generic message.
+
+1. **Foundry auto-setup (`foundry_setup.ts`).** From the BYO endpoint: list deployed
+   models (ARM control-plane, caller's own `az` token — no Graph), pick the best
+   chat model (pure, tested ranking; `--model` always wins), ensure an embedding
+   model (best-effort deploy `text-embedding-3-small`), and **enable the project
+   system-assigned MI** if absent (PATCH `identity.type=SystemAssigned`), then
+   re-read its principalId. All idempotent; every failure degrades to a note and
+   never aborts the deploy.
+
+2. **Best-model selection** replaces the hardcoded stale `gpt-4.1` with the highest-
+   ranked chat model actually deployed in the project. Excludes embedding/image/
+   audio models. User `--model` is respected.
+
+3. **KarsMemory CR parity.** `kars up` now emits a KarsMemory CR (only with a Foundry
+   endpoint) so the sandbox gets the same controller-managed Memory Store binding
+   `kars dev` already creates. Store name follows the existing `memory-<sandbox>`
+   convention; scope `agent:<sandbox>`.
+
+4. **CRD status report.** After applying the bundle, prints each CR (InferencePolicy,
+   ToolPolicy, KarsMemory, KarsSandbox) with its phase — read-only `kubectl get`.
+
+5. **Clean finish.** `process.exit(0)` on the success path so a detached
+   `kubectl port-forward` (and keep-alive sockets) can't keep the process alive.
+
+6. **Preflight spinner-leak fix (the hang).** `cli/src/preflight.ts`: the RBAC
+   spinner was concluded only when `fetchSubscriptionPermissions` threw or returned a
+   non-empty set; an empty `[]` (no throw) left it spinning, whose `setInterval` kept
+   Node alive — `kars up` hung after the summary with the spinner still animating
+   (reproduced by two operators). Now concluded on the empty path. A second identical
+   leak in the resource-provider `notFound` path (which orphaned the live spinner via
+   `spin = ora().fail(...)`) is fixed to conclude the existing spinner.
+
+7. **Memory error unmasking (runtime).** `ensureStore` now uses the STRICT router call
+   for `POST /memory_stores`, so an upstream 4xx (e.g. 403 — project MI not enabled /
+   missing `Azure AI User` on the RG, RBAC still propagating; or 400 — no embedding
+   model) surfaces the real reason instead of collapsing to "could not be created".
+
+## T1: New capability / attack surface? (NO)
+- `foundry_setup.ts` performs reads plus two narrowly-scoped, idempotent writes the
+  operator already intends: enabling the project's own system MI, and (best-effort)
+  deploying an embedding model — both on the operator's BYO Foundry resource, with
+  the operator's own credentials, gated by their existing Azure RBAC (failure → note,
+  not escalation). No new principal, secret, or network path is introduced.
+- The KarsMemory CR is the existing, admission-validated CRD; no new kind.
+- No change to the sandbox's runtime privileges, egress, seccomp, NetworkPolicy, or
+  inference-router auth (still Entra/IMDS, no keys).
+
+## T2: Security-control change? (NEUTRAL)
+- RBAC roles/scopes granted by `kars up` are unchanged (the existing Azure AI User /
+  Cognitive Services OpenAI User assignments). Enabling the project MI is a
+  precondition for the SAME Memory Store grant kars already makes — not a new grant.
+- The runtime change only alters error *reporting* (strict vs lenient call on the
+  store-create POST); it does not change what is sent or to where.
+- Preflight changes are presentation/lifecycle only (spinner conclusion + process
+  exit); no check is relaxed. The RBAC empty-set path is treated as INCONCLUSIVE
+  (warning), exactly as the thrown-error path already was.
+
+## T3: Availability / fail-open risk? (REDUCED)
+- Fixes a hard hang (process never exits) and a class of confusing memory failures
+  (masked 403/400). Foundry auto-setup is best-effort and never blocks the deploy.
+- Best-model selection falls back to the existing default if discovery fails.
+
+## Verification
+- CLI: `tsc --noEmit` clean, oxlint 0 errors, **831 tests pass** (+10 new
+  `foundry_setup` / refs tests; model ranking proven to pick `gpt-5.4` over a
+  realistic deployed set and exclude embedding/image).
+- Runtime: `tsc --noEmit` clean, oxlint 0 errors, **244 tests pass**.
+- Model scoring validated against the live `azureclaw-foundry` deployment set.
+- Spinner-leak mechanism confirmed: an un-concluded ora `setInterval` keeps the Node
+  event loop alive; concluding it (or `process.exit(0)`) exits cleanly.
+
+## Verdict
+Accept. Makes a BYO Foundry project actually usable for Memory Store with no new
+attack surface (operator-scoped, idempotent, best-effort writes on their own
+resource), fixes a real `kars up` hang, and surfaces previously-masked errors. No
+security control is weakened.
+
+Signed-off-by: Pal Lakatos-Toth <pallakatos@microsoft.com>
+Signed-off-by: Copilot <223556219+Copilot@users.noreply.github.com>
diff --git a/runtimes/openclaw/src/core/agt-tools/foundry.ts b/runtimes/openclaw/src/core/agt-tools/foundry.ts
index c977bf65..a9b6ddbf 100644
--- a/runtimes/openclaw/src/core/agt-tools/foundry.ts
+++ b/runtimes/openclaw/src/core/agt-tools/foundry.ts
@@ -14,7 +14,7 @@
 //   foundry_evaluations      foundry_deployments
 //   foundry_agents
 
-import { routerCall, routerCallBinary } from "../router-client.js";
+import { routerCall, routerCallStrict, routerCallBinary } from "../router-client.js";
 import { safeJson } from "../safe-json.js";
 import { resolveMemoryStoreName, resolveMemoryScope } from "../memory-binding.js";
 import type { FoundryProjectInfo } from "../foundry-discovery.js";
@@ -701,7 +701,12 @@ export function registerFoundryTools(api: AnyApi, deps: FoundryToolsDeps): void
             (d: any) => d.id?.includes("embedding") || d.model?.includes("embedding")
           )?.id || "text-embedding-3-small";
           log.info(`Creating memory store '${store}' (chat=${chatModel}, embedding=${embeddingModel})`);
-          await routerCall("POST", `/memory_stores?${apiVer}`, {
+          // Use the STRICT call so an upstream 4xx (e.g. 403 because the Foundry
+          // project's managed identity isn't enabled / lacks Azure AI User on
+          // the resource group, or 400 because no embedding model is deployed)
+          // surfaces the REAL reason instead of being swallowed and collapsing
+          // into a generic "could not be created".
+          await routerCallStrict("POST", `/memory_stores?${apiVer}`, {
             name: store,
             description: "kars agent persistent memory",
             definition: {
@@ -772,7 +777,7 @@ export function registerFoundryTools(api: AnyApi, deps: FoundryToolsDeps): void
               result = await doUpdate();
             }
             if (isNotFound(result)) {
-              return { content: [{ type: "text", text: `Memory update failed: store '${store}' could not be created.` }] };
+              return { content: [{ type: "text", text: `Memory update failed: store '${store}' could not be created — the Foundry Memory Store service returned not-found after a create attempt. Common causes: the Foundry project's system-assigned managed identity isn't enabled or lacks 'Azure AI User' on the resource group (RBAC can take a few minutes to propagate), or no embedding model is deployed in the project.` }] };
             }
             const status = result?.status || "submitted";
             return { content: [{ type: "text", text: `Memory update ${status}. The memory will be available shortly.` }] };