diff --git a/cli/src/commands/upgrade.ts b/cli/src/commands/upgrade.ts index 5678c926..c5ecc080 100644 --- a/cli/src/commands/upgrade.ts +++ b/cli/src/commands/upgrade.ts @@ -24,6 +24,10 @@ import { releaseImagePlan, compareVersions, fetchLatestReleaseTag, + fetchRecentReleases, + releasesBetween, + fetchTagMessage, + ghcrManifestDigests, parseVersionTag, } from "../lib/release.js"; @@ -176,11 +180,13 @@ export function upgradeCommand(): Command { .option("--rollback", "Roll the cluster back to the previous Helm revision.", false) .option("--skip-runtime-images", "Skip the 7 multi-runtime adapter images (faster).", false) .option("--force", "Re-run the upgrade even if already at the target version.", false) + .option("--yes", "Skip the confirmation prompt (assume yes).", false) .addHelpText("after", ` Examples: kars upgrade # Upgrade to the latest GitHub release kars upgrade --to v0.1.16 # Pin a specific release - kars upgrade --dry-run # Show what would change + kars upgrade --dry-run # Show changelog + impact + plan, make no changes + kars upgrade --yes # Skip the confirmation prompt kars upgrade --rollback # Revert to the previous Helm revision `) .action(async (options) => { @@ -231,6 +237,24 @@ Examples: } stepper.done(`Connected — kars release at revision ${karsRel.revision}`); + // ── Pre-flight: cluster must be able to run the upgrade ──────── + // Fail fast on a degraded/stopped cluster (e.g. all nodes NotReady) + // BEFORE the image import + Helm --wait that would only time out and + // roll back. Read-only; only hard-blocks when NO node is Ready. The + // existing post-upgrade health gate still guards correctness. + if (!options.rollback) { + const pre = await assertClusterUpgradeable(execa); + if (!pre.ok) { + stepper.stop(); + console.error(chalk.red(`\n ✗ Cluster is not in a state to upgrade — no changes made.`)); + console.error(chalk.red(` ${pre.reason}\n`)); + for (const hint of pre.hints) console.error(chalk.dim(` ${hint}`)); + console.error(); + process.exit(1); + } + for (const hint of pre.hints) stepper.detail("info", hint); + } + // ── Rollback path ───────────────────────────────────────────── if (options.rollback) { stepper.step("Rolling back to the previous Helm revision..."); @@ -276,6 +300,8 @@ Examples: const images = releaseImagePlan(target, { includeRuntimes: !options.skipRuntimeImages }); if (options.dryRun) { stepper.stop(); + await printChangelog(current, target); + await printImpactTable(execa); section("Upgrade plan (dry-run — no changes made)"); kvLine("Cluster", ctx.aksCluster); kvLine("ACR", ctx.acrLoginServer); @@ -289,6 +315,31 @@ Examples: process.exit(0); } + // ── Changelog summary + impact + confirmation ───────────────── + // Show what's about to change and the blast radius, then confirm + // before any write. The dry-run above already exited; this only runs + // for a real upgrade. Auto-proceeds under --yes or a non-TTY stdin. + stepper.stop(); + await printChangelog(current, target); + await printImpactTable(execa); + + const interactive = !options.yes && process.stdin.isTTY === true; + if (interactive) { + const { default: inquirer } = await import("inquirer"); + const { proceed } = await inquirer.prompt([{ + type: "confirm", + name: "proceed", + message: `Upgrade ${ctx.aksCluster} from ${current || "unknown"} to ${target}?`, + default: true, + }]); + if (!proceed) { + console.log(chalk.dim("\n Upgrade cancelled — no changes made.\n")); + process.exit(0); + } + } else { + console.log(chalk.dim(` Non-interactive — proceeding with upgrade to ${target}.\n`)); + } + // ── Step 3: Import target release images into ACR ───────────── stepper.step(`Importing ${target} images into ${acrName}...`); let requiredFailures = 0; @@ -411,7 +462,12 @@ type Execa = typeof import("execa").execa; * a version tag, not `:latest`). Only accepted when it parses as a version * (a `:latest`-era cluster has no version here and falls through). * 2. The `karsRelease` Helm value stamped by `kars up` / `kars upgrade`. - * 3. The chart `appVersion` — but the chart ships a static `0.1.0` sentinel + * 3. **Image-digest match** — the controller's running image digest matched + * against published release digests. Recovers the real version on a cluster + * deployed before the stamp existed and still on `:latest` (where 1 and 2 + * both come up empty), since `az acr import` preserves content-addressed + * digests. Best-effort network call; never overrides 1 or 2. + * 4. The chart `appVersion` — but the chart ships a static `0.1.0` sentinel * that is never bumped, so that exact value is treated as "unknown". * * Returns "" when genuinely unknown — important so a freshly-provisioned or @@ -435,11 +491,44 @@ export async function detectCurrentVersion(execa: Execa, appVersion?: string): P const vals = JSON.parse(stdout || "{}") as { karsRelease?: string }; if (vals.karsRelease) return vals.karsRelease; } catch { /* ignore */ } - // 3. Chart appVersion, ignoring the static `0.1.0` sentinel. + // 3. Image-digest match (recovers an old `:latest` cluster's real version). + const byDigest = await detectVersionByImageDigest(execa).catch(() => undefined); + if (byDigest) return byDigest; + // 4. Chart appVersion, ignoring the static `0.1.0` sentinel. const av = (appVersion ?? "").replace(/^v/, ""); return av && av !== "0.1.0" ? `v${av}` : ""; } +/** Resolve the deployed version by matching the controller pod's running image + * digest to the digests of recent published `kars-controller` release tags. + * Read-only + best-effort: any failure returns undefined so the caller falls + * through to the appVersion sentinel. */ +async function detectVersionByImageDigest(execa: Execa): Promise { + // Scan kars-controller container statuses for a running image digest + // (`imageID` is like `…/kars-controller@sha256:`). Skips Pending pods + // (empty imageID) and tolerates rollouts with multiple replicas. + const { stdout: ids } = await execa("kubectl", [ + "get", "pods", "-n", NS, "-l", "app.kubernetes.io/name=kars", + "-o", "jsonpath={range .items[*]}{range .status.containerStatuses[*]}{.image}{\"|\"}{.imageID}{\"\\n\"}{end}{end}", + ], { stdio: "pipe" }).catch(() => ({ stdout: "" })); + + let runningDigest: string | undefined; + for (const line of ids.split("\n")) { + if (!line.includes("kars-controller")) continue; + const m = line.match(/@(sha256:[a-f0-9]{64})/); + if (m) { runningDigest = m[1]; break; } + } + if (!runningDigest) return undefined; + + // Compare against recent release tags (newest first → report the newest match). + const releases = await fetchRecentReleases(20); + for (const r of releases) { + const digests = await ghcrManifestDigests("azure/kars-controller", r.tag); + if (digests.has(runningDigest)) return r.tag; + } + return undefined; +} + /** `az acr import --force` one image. Returns true on success. */ async function acrImport(execa: Execa, acrName: string, src: string, target: string): Promise { return execa("az", [ @@ -505,3 +594,198 @@ export async function verifyHealth(execa: Execa): Promise { } return { healthy: true, reason: "" }; } + +/** Read the cluster and print a table of every kars workload the upgrade would + * restart (controller + sandboxes), with namespace, readiness, and the running + * image — the blast radius, shown before the confirm. Best-effort: a read + * failure prints a note rather than aborting. */ +async function printImpactTable(execa: Execa): Promise { + section("Impact — workloads that will be restarted"); + + interface Row { component: string; namespace: string; name: string; ready: string; image: string } + const rows: Row[] = []; + + const shortImage = (img: string): string => { + if (!img) return "—"; + // ".../openclaw-sandbox:latest" → "openclaw-sandbox:latest"; strip digest. + const noDigest = img.split("@")[0]; + const parts = noDigest.split("/"); + return parts[parts.length - 1] || noDigest; + }; + + interface DeployJson { + metadata?: { name?: string; namespace?: string }; + spec?: { replicas?: number; template?: { spec?: { containers?: Array<{ name?: string; image?: string }> } } }; + status?: { readyReplicas?: number; replicas?: number }; + } + const readyOf = (d: DeployJson): string => { + const ready = d.status?.readyReplicas ?? 0; + const desired = d.spec?.replicas ?? d.status?.replicas ?? 0; + return `${ready}/${desired}`; + }; + const firstImage = (d: DeployJson, prefer?: string): string => { + const cs = d.spec?.template?.spec?.containers ?? []; + const pick = prefer ? cs.find((c) => c.name?.includes(prefer)) : undefined; + return shortImage((pick ?? cs[0])?.image ?? ""); + }; + + try { + // Controller. + const { stdout: ctrlJson } = await execa("kubectl", [ + "get", "deployment", "kars-controller", "-n", NS, "-o", "json", + ], { stdio: "pipe" }).catch(() => ({ stdout: "" })); + if (ctrlJson.trim()) { + const d = JSON.parse(ctrlJson) as DeployJson; + rows.push({ component: "controller", namespace: NS, name: "kars-controller", ready: readyOf(d), image: firstImage(d, "controller") }); + } + + // Sandboxes across all namespaces (the inference-router rides inside these). + const { stdout: sbJson } = await execa("kubectl", [ + "get", "deployment", "-A", "-l", "kars.azure.com/component=sandbox", "-o", "json", + ], { stdio: "pipe" }).catch(() => ({ stdout: "" })); + if (sbJson.trim()) { + const list = JSON.parse(sbJson) as { items?: DeployJson[] }; + for (const d of list.items ?? []) { + rows.push({ + component: "sandbox", + namespace: d.metadata?.namespace ?? "?", + name: d.metadata?.name ?? "?", + ready: readyOf(d), + image: firstImage(d, "openclaw"), + }); + } + } + } catch { + console.log(chalk.dim("\n (could not read cluster workloads — continuing)\n")); + return; + } + + if (rows.length === 0) { + console.log(chalk.dim("\n (no kars workloads found)\n")); + return; + } + + // Render a simple aligned table. + const headers = { component: "TYPE", namespace: "NAMESPACE", name: "NAME", ready: "READY", image: "IMAGE" }; + const w = { + component: Math.max(headers.component.length, ...rows.map((r) => r.component.length)), + namespace: Math.max(headers.namespace.length, ...rows.map((r) => r.namespace.length)), + name: Math.max(headers.name.length, ...rows.map((r) => r.name.length)), + ready: Math.max(headers.ready.length, ...rows.map((r) => r.ready.length)), + image: Math.max(headers.image.length, ...rows.map((r) => r.image.length)), + }; + const pad = (s: string, n: number) => s.padEnd(n); + console.log(); + console.log( + " " + chalk.dim( + `${pad(headers.component, w.component)} ${pad(headers.namespace, w.namespace)} ${pad(headers.name, w.name)} ${pad(headers.ready, w.ready)} ${headers.image}`, + ), + ); + for (const r of rows) { + const notReady = (() => { + const [a, b] = r.ready.split("/").map((n) => parseInt(n, 10)); + return !(b > 0 && a === b); + })(); + const readyCell = notReady ? chalk.yellow(pad(r.ready, w.ready)) : chalk.green(pad(r.ready, w.ready)); + console.log( + ` ${pad(r.component, w.component)} ${pad(r.namespace, w.namespace)} ${pad(r.name, w.name)} ${readyCell} ${chalk.dim(r.image)}`, + ); + } + const sandboxCount = rows.filter((r) => r.component === "sandbox").length; + const controllerCount = rows.length - sandboxCount; + console.log(chalk.dim(`\n ${rows.length} workload(s) will be rolling-restarted (${controllerCount} controller + ${sandboxCount} sandbox(es)).`)); + console.log(chalk.dim(` Each sandbox restarts its agent pod; in-flight agent work is interrupted briefly.\n`)); +} + +/** Print a concise changelog of the releases between current and target. Reads + * public GitHub release/tag APIs; best-effort and never throws. */ +async function printChangelog(current: string, target: string): Promise { + section("What's changing"); + kvLine("From", current || "unknown"); + kvLine("To", target); + + const releases = await fetchRecentReleases(20); + const between = current + ? releasesBetween(releases, current, target) + : releases.filter((r) => compareVersions(r.tag, target) <= 0).slice(0, 1); + if (between.length === 0) { + console.log(chalk.dim(`\n (no release notes found between ${current || "?"} and ${target})\n`)); + return; + } + console.log(); + // Newest first reads best in a terminal. Prefer the annotated tag message + // (real changelog) over the auto-generated release body (boilerplate). + for (const r of [...between].reverse()) { + const tagMsg = await fetchTagMessage(r.tag); + console.log(` ${chalk.bold(r.tag)}${r.name && r.name !== r.tag ? chalk.dim(` — ${r.name}`) : ""}`); + for (const line of summarizeChangelog(tagMsg || r.body)) { + console.log(chalk.dim(` ${line}`)); + } + } + console.log(); +} + +/** Pull human-meaningful lines (bullets, or the first prose lines) from an + * annotated tag message or release body, skipping install/verification + * boilerplate and the leading "kars vX.Y.Z" title line. */ +export function summarizeChangelog(text: string, maxLines = 8): string[] { + const lines = text.split("\n").map((l) => l.trim()); + const bullets: string[] = []; + const prose: string[] = []; + for (const l of lines) { + if (!l) continue; + if (/^#+\s*(container images|runtime adapter|verification|integrity|install)/i.test(l)) break; + if (l.startsWith("```")) continue; + if (/^kars v\d/i.test(l)) continue; // title line + if (/^[-*]\s+/.test(l)) { + bullets.push("• " + l.replace(/^[-*]\s+/, "").slice(0, 100)); + } else if (/^#+\s+/.test(l)) { + bullets.push(l.replace(/^#+\s+/, "").slice(0, 100)); + } else { + prose.push(l.slice(0, 100)); + } + if (bullets.length >= maxLines) { bullets.push("…"); break; } + } + // Prefer bullets; if none, fall back to the first couple of prose lines. + if (bullets.length > 0) return bullets; + return prose.slice(0, 3); +} + +/** Pre-flight: can this cluster actually accept an upgrade right now? The upgrade + * reimports images and runs `helm upgrade --wait`, which needs schedulable, + * Ready nodes. A stopped/degraded cluster (all nodes NotReady — e.g. an AKS + * cluster whose VMSS was deallocated, or a broken CNI) would burn minutes and + * then time out + roll back. Detect it up front. Read-only. */ +export async function assertClusterUpgradeable( + execa: Execa, +): Promise<{ ok: boolean; reason: string; hints: string[] }> { + const { stdout } = await execa("kubectl", [ + "get", "nodes", + "-o", "jsonpath={range .items[*]}{.metadata.name}{\"|\"}{range .status.conditions[?(@.type=='Ready')]}{.status}{end}{\"\\n\"}{end}", + ], { stdio: "pipe" }).catch(() => ({ stdout: "" })); + + const lines = stdout.split("\n").map((l) => l.trim()).filter(Boolean); + if (lines.length === 0) { + // Couldn't read nodes — don't hard-block on an unexpected API shape; the + // later `helm --wait` still guards correctness. + return { ok: true, reason: "", hints: [] }; + } + const total = lines.length; + const ready = lines.filter((l) => l.endsWith("|True")).length; + + if (ready === 0) { + return { + ok: false, + reason: `All ${total} cluster node(s) are NotReady — the upgrade can't schedule new pods and would time out.`, + hints: [ + "Check node health: kubectl get nodes", + "If the AKS cluster is stopped, start it: az aks start -g -n ", + "If nodes are stuck (CNI/kubelet), check: kubectl describe nodes", + "Re-run `kars upgrade` once nodes are Ready.", + ], + }; + } + // Some-but-not-all Ready is allowed (the upgrade can still proceed) but worth + // surfacing — the controller wants 2 replicas and `helm --wait` needs them. + return { ok: true, reason: "", hints: ready < total ? [`Note: ${ready}/${total} nodes Ready.`] : [] }; +} diff --git a/cli/src/lib/release.test.ts b/cli/src/lib/release.test.ts index f41b107b..73c52574 100644 --- a/cli/src/lib/release.test.ts +++ b/cli/src/lib/release.test.ts @@ -6,8 +6,10 @@ import { parseVersionTag, compareVersions, releaseImagePlan, + releasesBetween, + type ReleaseNote, } from "./release.js"; -import { buildHelmUpgradeArgs } from "../commands/upgrade.js"; +import { buildHelmUpgradeArgs, summarizeChangelog } from "../commands/upgrade.js"; describe("parseVersionTag", () => { it("parses stable + prerelease tags (v optional)", () => { @@ -85,3 +87,53 @@ describe("buildHelmUpgradeArgs", () => { expect(args.join(" ")).not.toContain("inferenceRouter.azure.openai.endpoint"); }); }); + +describe("releasesBetween", () => { + const rels: ReleaseNote[] = [ + { tag: "v0.1.18", name: "v0.1.18", body: "" }, + { tag: "v0.1.17", name: "v0.1.17", body: "" }, + { tag: "v0.1.16", name: "v0.1.16", body: "" }, + { tag: "v0.1.15", name: "v0.1.15", body: "" }, + { tag: "v0.1.14", name: "v0.1.14", body: "" }, + ]; + it("returns releases newer than current up to target, oldest→newest", () => { + expect(releasesBetween(rels, "v0.1.15", "v0.1.18").map((r) => r.tag)) + .toEqual(["v0.1.16", "v0.1.17", "v0.1.18"]); + }); + it("excludes the current version and anything above target", () => { + const got = releasesBetween(rels, "v0.1.16", "v0.1.17").map((r) => r.tag); + expect(got).toEqual(["v0.1.17"]); + expect(got).not.toContain("v0.1.16"); + expect(got).not.toContain("v0.1.18"); + }); + it("with no known current, includes everything up to target", () => { + expect(releasesBetween(rels, "", "v0.1.16").map((r) => r.tag)) + .toEqual(["v0.1.14", "v0.1.15", "v0.1.16"]); + }); +}); + +describe("summarizeChangelog", () => { + it("extracts bullet lines and skips the title + boilerplate", () => { + const msg = [ + "kars v0.1.17", + "", + "- First feature", + "* Second feature", + "", + "## Container images", + "- ghcr.io/azure/kars-controller:v0.1.17", + ].join("\n"); + const out = summarizeChangelog(msg); + expect(out).toEqual(["• First feature", "• Second feature"]); + }); + it("falls back to prose when there are no bullets", () => { + const out = summarizeChangelog("kars v0.1.5\n\nJust a prose summary line."); + expect(out).toEqual(["Just a prose summary line."]); + }); + it("caps the number of bullet lines", () => { + const many = ["kars v1.0.0", ...Array.from({ length: 20 }, (_, i) => `- item ${i}`)].join("\n"); + const out = summarizeChangelog(many, 8); + expect(out.length).toBeLessThanOrEqual(9); // 8 + the "…" marker + expect(out[out.length - 1]).toBe("…"); + }); +}); diff --git a/cli/src/lib/release.ts b/cli/src/lib/release.ts index 0243c1b2..aa882269 100644 --- a/cli/src/lib/release.ts +++ b/cli/src/lib/release.ts @@ -128,3 +128,133 @@ export async function fetchLatestReleaseTag( return null; } } + +export interface ReleaseNote { + tag: string; + name: string; + /** Raw release body (markdown). */ + body: string; +} + +/** + * Fetch recent published releases (newest first). Used for the changelog + * summary and the image-digest version fallback. Never throws. + */ +export async function fetchRecentReleases( + limit = 20, + fetchImpl: typeof fetch = fetch, +): Promise { + try { + const res = await fetchImpl( + `https://api.github.com/repos/Azure/kars/releases?per_page=${limit}`, + { headers: { Accept: "application/vnd.github+json", "User-Agent": "kars-cli" } }, + ); + if (!res.ok) return []; + const body = (await res.json()) as Array<{ tag_name?: string; name?: string; body?: string }>; + return body + .filter((r) => r.tag_name) + .map((r) => ({ tag: r.tag_name as string, name: r.name || (r.tag_name as string), body: r.body || "" })); + } catch { + return []; + } +} + +/** + * The set of releases strictly newer than `current` and up to (and including) + * `target`, oldest→newest — i.e. exactly what an upgrade would apply. Used for + * the changelog summary. + */ +export function releasesBetween( + releases: ReleaseNote[], + current: string, + target: string, +): ReleaseNote[] { + return releases + .filter((r) => { + const gtCurrent = current ? compareVersions(r.tag, current) > 0 : true; + const leTarget = compareVersions(r.tag, target) <= 0; + return gtCurrent && leTarget; + }) + .sort((a, b) => compareVersions(a.tag, b.tag)); +} + +/** + * Fetch the annotated tag message for a release tag — this carries the real, + * human-written changelog (feature bullets) for kars releases, unlike the + * auto-generated release body. Returns null when the tag is lightweight / + * unreachable. Never throws. + */ +export async function fetchTagMessage( + tag: string, + fetchImpl: typeof fetch = fetch, +): Promise { + try { + const refRes = await fetchImpl( + `https://api.github.com/repos/Azure/kars/git/refs/tags/${tag}`, + { headers: { Accept: "application/vnd.github+json", "User-Agent": "kars-cli" } }, + ); + if (!refRes.ok) return null; + const ref = (await refRes.json()) as { object?: { sha?: string; type?: string } }; + // Lightweight tags point straight at a commit (no annotation message). + if (ref.object?.type !== "tag" || !ref.object.sha) return null; + const tagRes = await fetchImpl( + `https://api.github.com/repos/Azure/kars/git/tags/${ref.object.sha}`, + { headers: { Accept: "application/vnd.github+json", "User-Agent": "kars-cli" } }, + ); + if (!tagRes.ok) return null; + return ((await tagRes.json()) as { message?: string }).message ?? null; + } catch { + return null; + } +} + +/** Anonymous GHCR pull token for a public repo (e.g. "azure/kars-controller"). */ +async function ghcrToken(repo: string, fetchImpl: typeof fetch): Promise { + try { + const res = await fetchImpl(`https://ghcr.io/token?scope=repository:${repo}:pull`, { + headers: { "User-Agent": "kars-cli" }, + }); + if (!res.ok) return null; + return ((await res.json()) as { token?: string }).token ?? null; + } catch { + return null; + } +} + +/** + * Collect every manifest digest (the multi-arch index digest plus each per-arch + * sub-manifest digest) for `ghcr.io/azure/:`. A running pod's + * `imageID` is a per-arch digest, while `:latest` resolves to the index digest — + * gathering both lets a caller match either. Digests are content-addressed, so + * GHCR and an `az acr import`-copied ACR share identical values. Never throws. + */ +export async function ghcrManifestDigests( + repo: string, + tag: string, + fetchImpl: typeof fetch = fetch, +): Promise> { + const out = new Set(); + const token = await ghcrToken(repo, fetchImpl); + if (!token) return out; + const accept = [ + "application/vnd.oci.image.index.v1+json", + "application/vnd.docker.distribution.manifest.list.v2+json", + "application/vnd.oci.image.manifest.v1+json", + "application/vnd.docker.distribution.manifest.v2+json", + ].join(", "); + try { + const res = await fetchImpl(`https://ghcr.io/v2/${repo}/manifests/${tag}`, { + headers: { Authorization: `Bearer ${token}`, Accept: accept, "User-Agent": "kars-cli" }, + }); + if (!res.ok) return out; + const indexDigest = res.headers.get("docker-content-digest"); + if (indexDigest) out.add(indexDigest); + const body = (await res.json()) as { manifests?: Array<{ digest?: string }> }; + for (const m of body.manifests ?? []) { + if (m.digest) out.add(m.digest); + } + } catch { + /* ignore — best-effort */ + } + return out; +} diff --git a/docs/security-audits/2026-06-29-upgrade-changelog-impact-confirm.md b/docs/security-audits/2026-06-29-upgrade-changelog-impact-confirm.md new file mode 100644 index 00000000..9e823d3a --- /dev/null +++ b/docs/security-audits/2026-06-29-upgrade-changelog-impact-confirm.md @@ -0,0 +1,73 @@ +# Security Audit — `kars upgrade` changelog + impact table + confirm (additive UX) + +Date: 2026-06-29 +Scope: `cli/src/commands/upgrade.ts`, `cli/src/lib/release.ts`, `cli/src/lib/release.test.ts`. +Gated paths: `cli/src/commands/upgrade.ts`. + +## Summary + +Re-lands the additive, non-conflicting parts of the stale PR #457 on top of the +current (v0.1.21) hardened `kars upgrade` flow, **without touching the repaired +write path** (image import → atomic Helm upgrade → mesh-first rolling restart → +health-gated success → `--atomic`/rollback). Four read-only UX additions: + +1. **Changelog summary** — before the confirm, prints the annotated tag messages + for the releases between current and target (`fetchRecentReleases`, + `releasesBetween`, `fetchTagMessage`, `summarizeChangelog`). +2. **Impact table** — reads the live cluster (`kubectl get deployment …`) and + lists the controller + sandboxes that will be rolling-restarted, with + readiness and running image. +3. **Y/N confirmation** — an interactive prompt before any write. Auto-proceeds + under `--yes` or a non-TTY stdin, so existing automation is unaffected. +4. **Pre-flight node-readiness gate** — `kubectl get nodes`; hard-blocks (with + guidance, no changes made) only when **every** node is NotReady, where the + upgrade would otherwise burn minutes and time out. + +Plus a **version-detection fallback**: when the controller runs `:latest` and no +`karsRelease` stamp exists (a cluster from before the stamp), match the running +image digest against published release digests to recover the real "Current:" +version. It is inserted **only as a new fallback step** — it never overrides the +existing image-tag or stamped-value detection. + +## T1: New capability / attack surface? (NO) +- No new endpoint, route, privilege, credential, or write path. All additions + are **read-only**: `kubectl get` (nodes/deployments) and anonymous public + GitHub / GHCR REST reads (release notes, tag messages, public manifest + digests). No tokens, no auth material, no mutating calls. +- The mutating upgrade sequence (`az acr import`, `helm upgrade --atomic`, + rollout restarts, rollback) is **unchanged**. A human confirm now *gates* it; + nothing new *performs* it. + +## T2: Security-control change? (STRENGTHENED) +- Adds a confirmation gate and a fail-fast pre-flight check in front of the + existing controls; removes none. The v0.1.21 post-upgrade health gate, + value preservation (`--reuse-values`), and `--atomic` rollback are untouched. +- Version detection is more accurate (digest fallback) but strictly additive — + it can only turn a previous "unknown" into a real version, never change a + correct answer, so the "cluster is NEWER than target" downgrade guard behaves + identically or better. + +## T3: Availability / fail-open risk? (REDUCED) +- Every new call is best-effort and never throws: GitHub/GHCR reads fall back to + "no notes"/"unknown"; `kubectl` read failures print a note and continue; + unreadable nodes do **not** hard-block (only an all-NotReady cluster does, and + that is a true pre-existing outage where the upgrade would fail anyway). +- Non-TTY / `--yes` auto-proceed preserves existing scripted/CI behaviour, so the + new prompt cannot wedge automation. + +## Verification +- CLI typecheck (`tsc --noEmit`) clean; `oxlint` 0 errors (no new warnings in the + changed files); `npm run build` clean. +- `vitest`: 888 pass / 2 skipped (49 files). `release.test.ts` gains 6 tests — + `releasesBetween` (3) and `summarizeChangelog` (3) — covering the changelog + selection + parsing logic. +- The hardened write path, health gate, and rollback in `upgrade.ts` are + byte-for-byte unchanged (additions only). + +## Verdict +Accept. Read-only UX + an extra confirm/pre-flight in front of an unchanged, +already-hardened write path; more accurate version reporting via a strictly +additive fallback. No security control weakened. + +Signed-off-by: Pal Lakatos-Toth +Signed-off-by: Copilot <223556219+Copilot@users.noreply.github.com>