Azure · pallakatos · Jun 25, 2026 · Jun 25, 2026
@@ -975,6 +975,12 @@ Auto-resume:
           registryMode, globalRegistryUrl, globalRelayUrl,
         });
 
+        // Explicit success exit. Some `az`/REST calls leave keep-alive sockets
+        // (and we spawn a detached kubectl port-forward), so the event loop
+        // wouldn't drain on its own — without this the command hangs after the
+        // deployment summary instead of returning to the shell.
+        process.exit(0);
+
       } catch (error) {
         stepper.stop();
         console.error(chalk.red(`\n  Deployment failed`));

@@ -0,0 +1,83 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+import { describe, it, expect } from "vitest";
+import {
+  scoreChatModel,
+  pickBestChatModel,
+  findEmbeddingModel,
+  parseFoundryEndpoint,
+  type FoundryDeployment,
+} from "./foundry_setup.js";
+
+const dep = (name: string, modelName = name): FoundryDeployment => ({
+  name,
+  modelName,
+  modelVersion: "1",
+});
+
+describe("scoreChatModel", () => {
+  it("excludes non-chat models", () => {
+    expect(scoreChatModel("text-embedding-3-small")).toBeNull();
+    expect(scoreChatModel("gpt-image-1")).toBeNull();
+    expect(scoreChatModel("FLUX.2-pro")).toBeNull();
+    expect(scoreChatModel("whisper")).toBeNull();
+    expect(scoreChatModel("tts-1")).toBeNull();
+  });
+
+  it("ranks newer families above older", () => {
+    expect(scoreChatModel("gpt-5.4")!).toBeGreaterThan(scoreChatModel("gpt-4.1")!);
+    expect(scoreChatModel("gpt-5.4")!).toBeGreaterThan(scoreChatModel("gpt-5")!);
+    expect(scoreChatModel("gpt-5")!).toBeGreaterThan(scoreChatModel("gpt-4o")!);
+  });
+
+  it("prefers the plain flagship over variants within a family", () => {
+    const plain = scoreChatModel("gpt-5.4")!;
+    expect(plain).toBeGreaterThan(scoreChatModel("gpt-5.4-pro")!);
+    expect(scoreChatModel("gpt-5.4-pro")!).toBeGreaterThan(scoreChatModel("gpt-5.4-chat")!);
+    expect(scoreChatModel("gpt-5.4-chat")!).toBeGreaterThan(scoreChatModel("gpt-5.4-mini")!);
+    expect(scoreChatModel("gpt-5.4-mini")!).toBeGreaterThan(scoreChatModel("gpt-5.4-nano")!);
+  });
+});
+
+describe("pickBestChatModel", () => {
+  it("picks the flagship from a realistic deployment set", () => {
+    const deployments = [
+      "gpt-5-mini", "text-embedding-3-small", "gpt-4.1", "gpt-5.4-mini",
+      "gpt-5.3-chat", "FLUX.2-pro", "gpt-image-1", "gpt-5.4-pro", "gpt-5.4",
+    ].map((n) => dep(n));
+    expect(pickBestChatModel(deployments)?.name).toBe("gpt-5.4");
+  });
+
+  it("returns undefined when no chat model is deployed", () => {
+    expect(pickBestChatModel([dep("text-embedding-3-small"), dep("gpt-image-1")])).toBeUndefined();
+  });
+
+  it("uses the deployment name when modelName is itself non-chat-looking", () => {
+    // deployment named "my-gpt5" wrapping model "gpt-5.4"
+    const d: FoundryDeployment = { name: "primary", modelName: "gpt-5.4", modelVersion: "1" };
+    expect(pickBestChatModel([d])?.name).toBe("primary");
+  });
+});
+
+describe("findEmbeddingModel", () => {
+  it("prefers 3-large over 3-small over ada", () => {
+    const deployments = [dep("ada-002", "text-embedding-ada-002"), dep("small", "text-embedding-3-small"), dep("large", "text-embedding-3-large")];
+    expect(findEmbeddingModel(deployments)?.name).toBe("large");
+  });
+  it("returns undefined when no embedding deployed", () => {
+    expect(findEmbeddingModel([dep("gpt-5.4")])).toBeUndefined();
+  });
+});
+
+describe("parseFoundryEndpoint", () => {
+  it("parses account + project from a Foundry project endpoint", () => {
+    expect(
+      parseFoundryEndpoint("https://azureclaw-foundry-services.services.ai.azure.com/api/projects/azureclaw"),
+    ).toEqual({ accountName: "azureclaw-foundry-services", projectName: "azureclaw" });
+  });
+  it("returns null for a non-project endpoint", () => {
+    expect(parseFoundryEndpoint("https://foo.openai.azure.com")).toBeNull();
+    expect(parseFoundryEndpoint("not a url")).toBeNull();
+  });
+});
@@ -0,0 +1,294 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+// up/foundry_setup.ts — make a BYO ("--foundry-endpoint") Foundry project
+// actually usable by kars, instead of assuming it's pre-configured.
+//
+// What this does (all idempotent, all read-mostly except the two explicit
+// provisioning steps which are gated + best-effort):
+//   1. Resolve the AI Services account + project from the endpoint URL.
+//   2. List the project's deployed models (ARM control-plane — works with the
+//      caller's existing `az login`, no Microsoft Graph).
+//   3. Pick the BEST deployed chat model for the agent (so we stop hardcoding a
+//      stale gpt-4.1). The user's explicit `--model` always wins.
+//   4. Ensure an embedding model is deployed (Foundry Memory Store needs one);
+//      best-effort deploy `text-embedding-3-small` if none exists.
+//   5. Enable the project's system-assigned managed identity if it's missing
+//      (Memory Store authenticates internally as the PROJECT MI) and re-read its
+//      principalId so the caller can grant it `Azure AI User` on the RG.
+//
+// Nothing here aborts the deploy: every failure degrades to a clear note so the
+// sandbox still comes up and the operator gets actionable remediation.
+
+import type { Stepper } from "../../stepper.js";
+
+/** One deployed model on the Foundry/AI-Services account (ARM shape). */
+export interface FoundryDeployment {
+  /** Deployment name — what you put in the request `model` field. */
+  name: string;
+  /** Underlying model name (e.g. "gpt-5.4"). */
+  modelName: string;
+  /** Model version. */
+  modelVersion: string;
+}
+
+export interface FoundrySetupResult {
+  accountName: string;
+  accountResourceId: string;
+  resourceGroup: string;
+  projectName: string;
+  /** Best deployed chat model deployment name, or undefined if none found. */
+  bestChatModel?: string;
+  /** Embedding deployment name in use (existing or just-created), or undefined. */
+  embeddingModel?: string;
+  /** Project system-assigned MI principalId (after any enable), or "". */
+  projectMiPrincipalId: string;
+  /** True if this run enabled the MI (was previously off). */
+  miJustEnabled: boolean;
+  /** Human-readable status notes for the deployment report. */
+  notes: string[];
+}
+
+/**
+ * Score a deployed model for use as an interactive, tool-using agent's chat
+ * model. Returns a number (higher = better) or `null` when the model is not a
+ * chat model (embeddings, image, audio, …) and must be excluded.
+ *
+ * Ranking: family/version dominates; within a family the plain flagship beats
+ * `-pro`/`-chat`/`-mini`/`-nano`, because for a tool-calling agent the flagship
+ * general model is the most reliable default (reasoning-`pro` variants are
+ * slower/pricier and `mini`/`nano` are weaker). `--model` overrides all of this.
+ */
+export function scoreChatModel(modelName: string): number | null {
+  const n = modelName.toLowerCase();
+
+  // Hard-exclude anything that isn't a text chat model.
+  const NON_CHAT =
+    /(embedding|image|dall-?e|flux|whisper|tts|audio|realtime|sora|moderation|rerank|transcrib|stable-?diffusion)/;
+  if (NON_CHAT.test(n)) return null;
+
+  // Family/version score.
+  let family: number;
+  const gpt = n.match(/^gpt-(\d+)(?:\.(\d+))?/);
+  const oSeries = n.match(/^o(\d+)/);
+  if (gpt) {
+    const major = parseInt(gpt[1], 10);
+    const minor = gpt[2] ? parseInt(gpt[2], 10) : 0;
+    family = major * 100 + minor; // gpt-5.4 → 504, gpt-4.1 → 401, gpt-4o → 400
+  } else if (oSeries) {
+    family = 300 + parseInt(oSeries[1], 10) * 10; // o3 → 330, o4 → 340 (below gpt-5)
+  } else {
+    family = 50; // unknown family — keep, but rank low.
+  }
+
+  // Variant adjustment (plain flagship preferred for agent tool-use).
+  let variant: number;
+  if (/-pro\b/.test(n)) variant = 3;
+  else if (/-chat\b/.test(n)) variant = 2;
+  else if (/-mini\b/.test(n)) variant = 1;
+  else if (/-nano\b/.test(n)) variant = 0;
+  else variant = 4; // plain flagship
+
+  return family * 10 + variant;
+}
+
+/** Pick the best chat-capable deployment, or undefined if none qualify. */
+export function pickBestChatModel(
+  deployments: FoundryDeployment[],
+): FoundryDeployment | undefined {
+  let best: { dep: FoundryDeployment; score: number } | undefined;
+  for (const dep of deployments) {
+    const score = scoreChatModel(dep.modelName) ?? scoreChatModel(dep.name);
+    if (score === null || score === undefined) continue;
+    if (!best || score > best.score) best = { dep, score };
+  }
+  return best?.dep;
+}
+
+/** Find an embedding deployment, preferring 3-large > 3-small > ada. */
+export function findEmbeddingModel(
+  deployments: FoundryDeployment[],
+): FoundryDeployment | undefined {
+  const embeds = deployments.filter((d) =>
+    /embedding/i.test(d.modelName) || /embedding/i.test(d.name),
+  );
+  if (embeds.length === 0) return undefined;
+  const rank = (d: FoundryDeployment): number => {
+    const n = `${d.modelName} ${d.name}`.toLowerCase();
+    if (n.includes("3-large")) return 3;
+    if (n.includes("3-small")) return 2;
+    if (n.includes("ada")) return 1;
+    return 0;
+  };
+  return embeds.sort((a, b) => rank(b) - rank(a))[0];
+}
+
+/** Parse "https://<acct>.services.ai.azure.com/api/projects/<proj>" → parts. */
+export function parseFoundryEndpoint(
+  endpoint: string,
+): { accountName: string; projectName: string } | null {
+  try {
+    const u = new URL(endpoint);
+    const accountName = u.hostname.split(".")[0];
+    const m = u.pathname.match(/\/api\/projects\/([^/]+)/);
+    if (!accountName || !m) return null;
+    return { accountName, projectName: m[1] };
+  } catch {
+    return null;
+  }
+}
+
+type Execa = typeof import("execa").execa;
+
+/**
+ * Discover + (best-effort) provision the BYO Foundry project so kars Memory
+ * Store and the agent model "just work". Returns null when the endpoint isn't a
+ * Foundry project endpoint (e.g. plain Azure OpenAI) — the caller keeps its
+ * existing behaviour in that case.
+ */
+export async function setupFoundryForKars(args: {
+  execa: Execa;
+  stepper: Stepper;
+  foundryEndpoint: string;
+}): Promise<FoundrySetupResult | null> {
+  const { execa, stepper, foundryEndpoint } = args;
+  const parsed = parseFoundryEndpoint(foundryEndpoint);
+  if (!parsed) return null;
+  const { accountName, projectName } = parsed;
+  const notes: string[] = [];
+
+  // 1. Resolve the account ARM id + resource group.
+  stepper.update("Discovering Foundry project...");
+  const { stdout: acctJson } = await execa("az", [
+    "cognitiveservices", "account", "list",
+    "--query", `[?name=='${accountName}'].{id:id, rg:resourceGroup} | [0]`,
+    "--output", "json",
+  ], { stdio: "pipe" }).catch(() => ({ stdout: "{}" }));
+  const acct = JSON.parse((acctJson || "{}").trim() || "{}");
+  const accountResourceId: string = acct.id || "";
+  const resourceGroup: string = acct.rg || "";
+  if (!accountResourceId || !resourceGroup) {
+    notes.push(
+      `Could not resolve the Foundry account '${accountName}' in this subscription — ` +
+        "skipping Foundry auto-setup (the sandbox will still deploy).",
+    );
+    return {
+      accountName, accountResourceId: "", resourceGroup: "", projectName,
+      projectMiPrincipalId: "", miJustEnabled: false, notes,
+    };
+  }
+
+  // 2. List deployed models (ARM control-plane).
+  let deployments: FoundryDeployment[] = [];
+  try {
+    const { stdout: depJson } = await execa("az", [
+      "rest", "--method", "get",
+      "--url", `${accountResourceId}/deployments?api-version=2024-10-01`,
+    ], { stdio: "pipe" });
+    const raw = JSON.parse(depJson.trim());
+    deployments = (raw.value ?? []).map((d: {
+      name: string;
+      properties?: { model?: { name?: string; version?: string } };
+    }) => ({
+      name: d.name,
+      modelName: d.properties?.model?.name ?? d.name,
+      modelVersion: d.properties?.model?.version ?? "",
+    }));
+  } catch {
+    notes.push("Could not list Foundry model deployments (continuing with defaults).");
+  }
+
+  // 3. Best chat model.
+  const best = pickBestChatModel(deployments);
+  const bestChatModel = best?.name;
+  if (bestChatModel) {
+    stepper.detail("info", `Best deployed chat model: ${bestChatModel}`);
+  }
+
+  // 4. Ensure an embedding model (Memory Store needs one).
+  let embeddingModel = findEmbeddingModel(deployments)?.name;
+  if (!embeddingModel && accountResourceId) {
+    stepper.update("No embedding model deployed — deploying text-embedding-3-small...");
+    const ok = await execa("az", [
+      "cognitiveservices", "account", "deployment", "create",
+      "--name", accountName,
+      "--resource-group", resourceGroup,
+      "--deployment-name", "text-embedding-3-small",
+      "--model-name", "text-embedding-3-small",
+      "--model-version", "1",
+      "--model-format", "OpenAI",
+      "--sku-name", "Standard",
+      "--sku-capacity", "50",
+      "--output", "none",
+    ], { stdio: "pipe" }).then(() => true).catch(() => false);
+    if (ok) {
+      embeddingModel = "text-embedding-3-small";
+      notes.push("Deployed embedding model 'text-embedding-3-small' for Memory Store.");
+    } else {
+      notes.push(
+        "No embedding model is deployed and auto-deploy failed (quota/permissions?). " +
+          "Memory Store needs one — deploy 'text-embedding-3-small' in the Foundry portal.",
+      );
+    }
+  }
+
+  // 5. Ensure the project's system-assigned MI (Memory Store authenticates
+  //    internally as the PROJECT MI).
+  const projectUrl = `${accountResourceId}/projects/${projectName}?api-version=2025-06-01`;
+  let projectMiPrincipalId = "";
+  let miJustEnabled = false;
+  try {
+    const { stdout: projJson } = await execa("az", [
+      "rest", "--method", "get", "--url", projectUrl,
+    ], { stdio: "pipe" });
+    projectMiPrincipalId = JSON.parse(projJson.trim())?.identity?.principalId || "";
+  } catch {
+    // Fall through to enable attempt.
+  }
+
+  if (!projectMiPrincipalId) {
+    stepper.update("Enabling Foundry project managed identity (for Memory Store)...");
+    const enabled = await execa("az", [
+      "rest", "--method", "patch", "--url", projectUrl,
+      "--body", JSON.stringify({ identity: { type: "SystemAssigned" } }),
+    ], { stdio: "pipe" }).then(() => true).catch(() => false);
+
+    if (enabled) {
+      // The principalId may take a few seconds to populate after enabling.
+      for (let i = 0; i < 6 && !projectMiPrincipalId; i++) {
+        await new Promise((r) => setTimeout(r, 3000));
+        const { stdout: pj } = await execa("az", [
+          "rest", "--method", "get", "--url", projectUrl,
+        ], { stdio: "pipe" }).catch(() => ({ stdout: "{}" }));
+        projectMiPrincipalId = JSON.parse((pj || "{}").trim() || "{}")?.identity?.principalId || "";
+      }
+      if (projectMiPrincipalId) {
+        miJustEnabled = true;
+        notes.push("Enabled the Foundry project's system-assigned managed identity.");
+      } else {
+        notes.push(
+          "Enabled the Foundry project MI but its principalId hasn't populated yet — " +
+            "Memory Store RBAC will be granted on the next `kars up` run.",
+        );
+      }
+    } else {
+      notes.push(
+        "Foundry project has no system-assigned MI and kars couldn't enable it " +
+          "(needs Contributor on the project). Enable it: Portal → Project → " +
+          "Resource Management → Identity → System assigned → On, then re-run `kars up`.",
+      );
+    }
+  }
+
+  return {
+    accountName,
+    accountResourceId,
+    resourceGroup,
+    projectName,
+    bestChatModel,
+    embeddingModel,
+    projectMiPrincipalId,
+    miJustEnabled,
+    notes,
+  };
+}