diff --git a/.changeset/session-get-hang.md b/.changeset/session-get-hang.md new file mode 100644 index 000000000..885fef90a --- /dev/null +++ b/.changeset/session-get-hang.md @@ -0,0 +1,5 @@ +--- +"emdash": patch +--- + +Fix logged-in pages hanging indefinitely on Cloudflare Workers (#1274). A request cancelled mid-`session.get("user")` could leave the session-store read as a promise that never settles, poisoning the isolate so every later session-bearing request hung (0-CPU, multi-minute, `canceled`) — reliably reproducible on fresh isolates right after a deploy. Every session read on the request path (the main middleware, the auth middleware, and the preview-snapshot route) now goes through a shared `resolveSessionUser()` helper that anchors the read with `after()` so a cancelled request still drives it to completion (preventing the isolate poisoning), with a fail-closed timeout backstop that degrades a still-stalled read to "unauthenticated for this request" rather than hanging. diff --git a/packages/core/src/astro/middleware.ts b/packages/core/src/astro/middleware.ts index 06bb3b36b..ca6676d87 100644 --- a/packages/core/src/astro/middleware.ts +++ b/packages/core/src/astro/middleware.ts @@ -62,6 +62,7 @@ import type { EmDashConfig } from "./integration/runtime.js"; import { wrapBodyForStreamMetrics } from "./middleware/stream-end-metrics.js"; import { prefetchLayoutData } from "./prefetch.js"; import { createPublicPluginApiRouteHandler } from "./public-plugin-api-routes.js"; +import { resolveSessionUser } from "./session-user.js"; import type { EmDashHandlers } from "./types.js"; /** @@ -416,7 +417,7 @@ export const onRequest = defineMiddleware(async (context, next) => { // turns normal traffic into a flood of KV read misses. See #733. const hasSessionCookie = cookies.get("astro-session") !== undefined; const sessionUser = - context.isPrerendered || !hasSessionCookie ? null : await context.session?.get("user"); + context.isPrerendered || !hasSessionCookie ? null : await resolveSessionUser(context.session); if (!isEmDashRoute && !isPublicRuntimeRoute && !hasEditCookie && !hasPreviewToken) { if (!sessionUser && !playgroundDb) { diff --git a/packages/core/src/astro/middleware/auth.ts b/packages/core/src/astro/middleware/auth.ts index 29bf57f96..cce225e12 100644 --- a/packages/core/src/astro/middleware/auth.ts +++ b/packages/core/src/astro/middleware/auth.ts @@ -32,6 +32,7 @@ import { resolveApiToken, resolveOAuthToken } from "../../api/handlers/api-token import { hasScope } from "../../auth/api-tokens.js"; import { getAuthMode, type ExternalAuthMode } from "../../auth/mode.js"; import type { ExternalAuthConfig } from "../../auth/types.js"; +import { resolveSessionUser } from "../session-user.js"; import type { EmDashHandlers } from "../types.js"; import { buildEmDashCsp } from "./csp.js"; @@ -407,7 +408,7 @@ async function handlePluginRouteAuth( try { // Try session auth (sets locals.user if session exists) const { session } = context; - const sessionUser = await session?.get("user"); + const sessionUser = await resolveSessionUser(session); if (sessionUser?.id && emdash?.db) { const adapter = createKyselyAdapter(emdash.db); const user = await adapter.getUserById(sessionUser.id); @@ -435,7 +436,7 @@ async function handlePublicRouteAuth( const { emdash } = locals; try { - const sessionUser = await session?.get("user"); + const sessionUser = await resolveSessionUser(session); if (sessionUser?.id && emdash?.db) { const adapter = createKyselyAdapter(emdash.db); const user = await adapter.getUserById(sessionUser.id); @@ -650,7 +651,7 @@ async function handlePasskeyAuth( try { // Check session for user (session.get returns a Promise) - const sessionUser = await session?.get("user"); + const sessionUser = await resolveSessionUser(session); if (!sessionUser?.id) { if (isApiRoute) { diff --git a/packages/core/src/astro/routes/api/snapshot.ts b/packages/core/src/astro/routes/api/snapshot.ts index b1141656f..0b5c2f9ec 100644 --- a/packages/core/src/astro/routes/api/snapshot.ts +++ b/packages/core/src/astro/routes/api/snapshot.ts @@ -20,6 +20,8 @@ import { import { getPublicOrigin } from "#api/public-url.js"; import { resolveSecretsCached } from "#config/secrets.js"; +import { resolveSessionUser } from "../../session-user.js"; + export const prerender = false; export const GET: APIRoute = async ({ request, locals, url, session }) => { @@ -31,7 +33,7 @@ export const GET: APIRoute = async ({ request, locals, url, session }) => { if (!user && session && emdash?.db) { try { const { createKyselyAdapter } = await import("@emdash-cms/auth/adapters/kysely"); - const sessionUser = await session.get("user"); + const sessionUser = await resolveSessionUser(session); if (sessionUser?.id) { const adapter = createKyselyAdapter(emdash.db); const resolved = await adapter.getUserById(sessionUser.id); diff --git a/packages/core/src/astro/session-user.ts b/packages/core/src/astro/session-user.ts new file mode 100644 index 000000000..6976b8685 --- /dev/null +++ b/packages/core/src/astro/session-user.ts @@ -0,0 +1,57 @@ +import { after } from "../after.js"; + +/** + * Backstop timeout for resolving the session user. A live session-store read + * settles in a few ms, so this only ever fires on a genuinely stalled read. + */ +export const SESSION_GET_TIMEOUT_MS = 3_000; + +/** + * Resolve the Astro session user without risking an isolate-wide hang. + * + * On Cloudflare Workers, a request cancelled mid-`session.get()` (client + * disconnect, context teardown) can leave the underlying session-store read as + * a promise that never settles — neither resolving nor rejecting. Awaiting it + * directly hangs the request, and because the stalled promise is shared at the + * isolate level, every later session-bearing request hangs too (observed as + * 0-CPU, multi-minute, `canceled` responses; see #1274). A surrounding + * try/catch cannot help: the promise never rejects. + * + * Two layers, mirroring the reclaimable-cache pattern used elsewhere in core: + * 1. `after()` anchors the read so a cancelled request still drives it to + * completion — the promise settles and the isolate is not poisoned for + * subsequent requests (prevents the hang rather than merely surviving it). + * 2. A timeout is a fail-closed backstop: a still-stalled (or rejecting) read + * resolves to `undefined`, and every caller treats the absence of a session + * user as unauthenticated (anonymous on public routes, 401/redirect on + * protected ones). It can only ever drop privileges for that one request, + * never grant them. + * + * Used by every session read on the request path: the main middleware (the + * first read on a session-bearing request), the auth middleware, and the + * preview-snapshot route (which bypasses the auth middleware). + */ +export async function resolveSessionUser( + session: { get(key: "user"): Promise } | undefined, + timeoutMs = SESSION_GET_TIMEOUT_MS, +): Promise { + if (!session) return undefined; + const read: Promise = Promise.resolve(session.get("user")).catch(() => undefined); + // Keep the worker alive past response/cancellation so the read completes and + // the shared promise settles — this is what prevents the isolate poisoning. + after(() => + read.then( + () => undefined, + () => undefined, + ), + ); + let timer: ReturnType | undefined; + const timeout = new Promise((resolve) => { + timer = setTimeout(resolve, timeoutMs, undefined); + }); + try { + return await Promise.race([read, timeout]); + } finally { + clearTimeout(timer); + } +} diff --git a/packages/core/tests/unit/astro/session-user.test.ts b/packages/core/tests/unit/astro/session-user.test.ts new file mode 100644 index 000000000..d83e39a49 --- /dev/null +++ b/packages/core/tests/unit/astro/session-user.test.ts @@ -0,0 +1,34 @@ +import { describe, expect, it, vi } from "vitest"; + +// `after()` lazily imports this virtual module (provided by the Astro +// integration at build time, absent in unit tests). Stub it so the lazy +// import resolves cleanly to a no-op lifetime extender. +vi.mock("virtual:emdash/wait-until", () => ({ waitUntil: undefined })); + +import { resolveSessionUser } from "../../../src/astro/session-user.js"; + +describe("resolveSessionUser", () => { + it("returns the session user when the read resolves", async () => { + const session = { get: vi.fn(async () => ({ id: "user_1" })) }; + await expect(resolveSessionUser(session)).resolves.toEqual({ id: "user_1" }); + expect(session.get).toHaveBeenCalledWith("user"); + }); + + it("returns undefined when there is no session", async () => { + await expect(resolveSessionUser(undefined)).resolves.toBeUndefined(); + }); + + // The core #1274 guarantee: a read that never settles (a workerd context + // cancelled mid-read) must not hang — the timeout backstop resolves it. + it("falls back to undefined instead of hanging when the read never settles", async () => { + const session = { get: () => new Promise<{ id: string }>(() => {}) }; + await expect(resolveSessionUser(session, 20)).resolves.toBeUndefined(); + }); + + // Fail-closed: a rejecting read resolves to undefined (caller treats the + // request as unauthenticated) rather than throwing. + it("fails closed to undefined when the read rejects", async () => { + const session = { get: () => Promise.reject(new Error("boom")) }; + await expect(resolveSessionUser(session, 20)).resolves.toBeUndefined(); + }); +});