diff --git a/CHANGELOG.md b/CHANGELOG.md index 9527eb5c..5c42cafb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,47 @@ # Changelog +## [Unreleased] — security/cookie-auth-hardening (2026-03-30) + +### Security + +#### 🔐 Daemon Bearer-Token 认证(防本地进程横向移动) + +**风险**:Daemon HTTP 端口 `19825` 仅凭 `X-OpenCLI` 自定义头部防御 CSRF。任何本地进程只需加上该头部即可控制浏览器自动化会话,执行任意 JS、读取 Cookie 等高权限操作。 + +**修复**(`src/daemon.ts`、`src/browser/daemon-client.ts`): +- Daemon 启动时用 `crypto.randomBytes(32)` 生成 64 字符十六进制 Token,写入 `~/.opencli/daemon.token`(权限 `0o600`)。 +- 所有非 `/ping` HTTP 端点要求 `Authorization: Bearer ` 头部;验证采用恒时比较防时序攻击。 +- `daemon-client.ts` 懒加载并缓存 token;收到 `401` 时自动刷新缓存以应对 Daemon 重启。 +- Daemon 正常退出时删除 token 文件,避免旧 token 被复用。 + +#### 🔐 Extension ID 固定(可选加固) + +**修复**(`src/daemon.ts`): +- 新增环境变量 `OPENCLI_EXTENSION_ID`:设置后 WebSocket 握手将精确匹配扩展 ID,拒绝其他 `chrome-extension://` 来源。 + +#### 🍪 HttpOnly Cookie 访问警告 + 脱敏 + +**风险**:CDP `Network.getCookies` 可读取 `httpOnly` Cookie(session / auth token),而这些 Cookie 正常情况下对 JS 不可见,存在被意外记录或泄露的风险。 + +**修复**(`src/browser/page.ts`、`src/browser/cdp.ts`、`src/types.ts`): +- 开启 `OPENCLI_VERBOSE=1` 时输出 HttpOnly Cookie 数量警告。 +- 新增 `OPENCLI_REDACT_COOKIES=1` 环境变量和 `getCookies({ redact: true })` 选项,自动将 HttpOnly Cookie 及敏感名称(`session`、`token`、`auth`、`jwt` 等)的值替换为 `[REDACTED]`。 +- 新增 `isSensitiveCookieName()`、`redactCookies()` 工具函数供下游复用。 + +#### 🌐 CDP Endpoint 强制 Localhost 校验 + +**风险**:`OPENCLI_CDP_ENDPOINT` 无主机校验,若指向远程地址将把浏览器 Cookie 和 DOM 数据暴露给第三方。 + +**修复**(`src/browser/cdp.ts`): +- 连接前校验主机名,仅允许 `localhost`、`127.0.0.1`、`::1`。 +- 如需连接远程实例(高级调试),可设置 `OPENCLI_CDP_ALLOW_REMOTE=1` 解除限制。 + +#### ⚠️ Pipeline fetch 步骤 `credentials: "include"` 安全注释 + +**修复**(`src/pipeline/steps/fetch.ts`): +- 添加注释说明 `credentials: "include"` 的预期用途(登录态 API 抓取)及风险(不可传入不受信任的 URL),防止误用导致 CSRF。 + + ## [1.4.1](https://github.com/jackwener/opencli/compare/v1.4.0...v1.4.1) (2026-03-25) diff --git a/src/browser/cdp.test.ts b/src/browser/cdp.test.ts index 480f32ae..3b75358a 100644 --- a/src/browser/cdp.test.ts +++ b/src/browser/cdp.test.ts @@ -5,8 +5,11 @@ const { MockWebSocket } = vi.hoisted(() => { static OPEN = 1; readyState = 1; private handlers = new Map void>>(); + sent: any[] = []; + static instances: MockWebSocket[] = []; constructor(_url: string) { + MockWebSocket.instances.push(this); queueMicrotask(() => this.emit('open')); } @@ -16,7 +19,32 @@ const { MockWebSocket } = vi.hoisted(() => { this.handlers.set(event, handlers); } - send(_message: string): void {} + send(message: string): void { + const payload = JSON.parse(message); + this.sent.push(payload); + + if (payload.method === 'Target.createTarget') { + queueMicrotask(() => this.emit('message', JSON.stringify({ + id: payload.id, + result: { targetId: 'target-1' }, + }))); + return; + } + + if (payload.method === 'Target.attachToTarget') { + queueMicrotask(() => this.emit('message', JSON.stringify({ + id: payload.id, + result: { sessionId: 'session-1' }, + }))); + return; + } + + queueMicrotask(() => this.emit('message', JSON.stringify({ + id: payload.id, + result: {}, + sessionId: payload.sessionId, + }))); + } close(): void { this.readyState = 3; @@ -41,6 +69,7 @@ import { CDPBridge } from './cdp.js'; describe('CDPBridge cookies', () => { beforeEach(() => { vi.unstubAllEnvs(); + MockWebSocket.instances.length = 0; }); it('filters cookies by actual domain match instead of substring match', async () => { @@ -63,4 +92,31 @@ describe('CDPBridge cookies', () => { { name: 'exact', value: '2', domain: 'example.com' }, ]); }); + + it('attaches to a browser-level websocket endpoint and scopes page commands to the target session', async () => { + vi.stubEnv('OPENCLI_CDP_ENDPOINT', 'ws://127.0.0.1:9222/devtools/browser/browser-1'); + + const bridge = new CDPBridge(); + await bridge.connect(); + + const sent = MockWebSocket.instances[0]?.sent ?? []; + expect(sent.map((item) => item.method)).toEqual([ + 'Target.createTarget', + 'Target.attachToTarget', + 'Page.enable', + 'Page.addScriptToEvaluateOnNewDocument', + ]); + expect(sent[1]).toMatchObject({ + method: 'Target.attachToTarget', + params: { targetId: 'target-1', flatten: true }, + }); + expect(sent[2]).toMatchObject({ + method: 'Page.enable', + sessionId: 'session-1', + }); + expect(sent[3]).toMatchObject({ + method: 'Page.addScriptToEvaluateOnNewDocument', + sessionId: 'session-1', + }); + }); }); diff --git a/src/browser/cdp.ts b/src/browser/cdp.ts index 72bcf87f..4439de6f 100644 --- a/src/browser/cdp.ts +++ b/src/browser/cdp.ts @@ -12,6 +12,7 @@ import { WebSocket, type RawData } from 'ws'; import { request as httpRequest } from 'node:http'; import { request as httpsRequest } from 'node:https'; import type { BrowserCookie, IPage, ScreenshotOptions, SnapshotOptions, WaitOptions } from '../types.js'; +import { redactCookies } from '../types.js'; import type { IBrowserFactory } from '../runtime.js'; import { wrapForEval } from './utils.js'; import { generateSnapshotJs, scrollToRefJs, getFormStateJs } from './dom-snapshot.js'; @@ -55,6 +56,7 @@ export class CDPBridge implements IBrowserFactory { private _idCounter = 0; private _pending = new Map void; reject: (err: Error) => void; timer: ReturnType }>(); private _eventListeners = new Map void>>(); + private _sessionId: string | null = null; async connect(opts?: { timeout?: number; workspace?: string }): Promise { if (this._ws) throw new Error('CDPBridge is already connected. Call close() before reconnecting.'); @@ -62,7 +64,16 @@ export class CDPBridge implements IBrowserFactory { const endpoint = process.env.OPENCLI_CDP_ENDPOINT; if (!endpoint) throw new Error('OPENCLI_CDP_ENDPOINT is not set'); + // ── Security: enforce localhost-only CDP connections ───────────── + // Connecting to a remote host would expose all browser cookies and DOM + // content to a third party. Refuse unless OPENCLI_CDP_ALLOW_REMOTE=1 + // is explicitly set (power-users who need remote debugging). + if (process.env.OPENCLI_CDP_ALLOW_REMOTE !== '1') { + assertLocalhostEndpoint(endpoint); + } + let wsUrl = endpoint; + const isBrowserEndpoint = /^wss?:\/\/.+\/devtools\/browser\//i.test(endpoint); if (endpoint.startsWith('http')) { const targets = await fetchJsonDirect(`${endpoint.replace(/\/$/, '')}/json`) as CDPTarget[]; const target = selectCDPTarget(targets); @@ -81,6 +92,19 @@ export class CDPBridge implements IBrowserFactory { clearTimeout(timeout); this._ws = ws; try { + if (isBrowserEndpoint) { + const target = await this.sendRaw('Target.createTarget', { url: 'about:blank' }) as { targetId?: string }; + const targetId = typeof target?.targetId === 'string' ? target.targetId : ''; + if (!targetId) throw new Error('CDP browser endpoint did not return a targetId'); + + const attached = await this.sendRaw('Target.attachToTarget', { + targetId, + flatten: true, + }) as { sessionId?: string }; + const sessionId = typeof attached?.sessionId === 'string' ? attached.sessionId : ''; + if (!sessionId) throw new Error('CDP browser endpoint did not return a sessionId'); + this._sessionId = sessionId; + } await this.send('Page.enable'); await this.send('Page.addScriptToEvaluateOnNewDocument', { source: generateStealthJs() }); } catch {} @@ -105,7 +129,7 @@ export class CDPBridge implements IBrowserFactory { entry.resolve(msg.result); } } - if (msg.method) { + if (msg.method && (!this._sessionId || !msg.sessionId || msg.sessionId === this._sessionId)) { const listeners = this._eventListeners.get(msg.method); if (listeners) { for (const fn of listeners) fn(msg.params); @@ -130,6 +154,15 @@ export class CDPBridge implements IBrowserFactory { } async send(method: string, params: Record = {}, timeoutMs: number = CDP_SEND_TIMEOUT): Promise { + return this.sendRaw(method, params, timeoutMs, this._sessionId ?? undefined); + } + + private async sendRaw( + method: string, + params: Record = {}, + timeoutMs: number = CDP_SEND_TIMEOUT, + sessionId?: string, + ): Promise { if (!this._ws || this._ws.readyState !== WebSocket.OPEN) { throw new Error('CDP connection is not open'); } @@ -140,7 +173,7 @@ export class CDPBridge implements IBrowserFactory { reject(new Error(`CDP command '${method}' timed out after ${timeoutMs / 1000}s`)); }, timeoutMs); this._pending.set(id, { resolve, reject, timer }); - this._ws!.send(JSON.stringify({ id, method, params })); + this._ws!.send(JSON.stringify(sessionId ? { id, method, params, sessionId } : { id, method, params })); }); } @@ -206,13 +239,27 @@ class CDPPage implements IPage { return result.result?.value; } - async getCookies(opts: { domain?: string; url?: string } = {}): Promise { + async getCookies(opts: { domain?: string; url?: string; redact?: boolean } = {}): Promise { const result = await this.bridge.send('Network.getCookies', opts.url ? { urls: [opts.url] } : {}); - const cookies = isRecord(result) && Array.isArray(result.cookies) ? result.cookies : []; + const rawCookies = isRecord(result) && Array.isArray(result.cookies) ? result.cookies : []; const domain = opts.domain; - return domain - ? cookies.filter((cookie): cookie is BrowserCookie => isCookie(cookie) && matchesCookieDomain(cookie.domain, domain)) - : cookies; + const cookies: BrowserCookie[] = domain + ? rawCookies.filter((cookie): cookie is BrowserCookie => isCookie(cookie) && matchesCookieDomain(cookie.domain, domain)) + : rawCookies.filter(isCookie); + + // CDP Network.getCookies exposes HttpOnly cookies — warn operators. + const httpOnlyCount = cookies.filter((c) => c.httpOnly).length; + if (httpOnlyCount > 0 && process.env.OPENCLI_VERBOSE) { + console.error( + `[opencli] Warning: getCookies() returned ${httpOnlyCount} HttpOnly cookie(s) via CDP.` + + ' These may contain session tokens — avoid logging or storing raw values.', + ); + } + + if (opts.redact || process.env.OPENCLI_REDACT_COOKIES === '1') { + return redactCookies(cookies); + } + return cookies; } async snapshot(opts: SnapshotOptions = {}): Promise { @@ -436,6 +483,39 @@ export const __test__ = { scoreCDPTarget, }; +/** + * Verify that the CDP endpoint resolves to a loopback address. + * This prevents accidental (or malicious) connections to remote hosts, + * which would expose all browser cookies and DOM state to a third party. + * + * Allowed: http://localhost:*, http://127.0.0.1:*, http://[::1]:* + * ws://localhost:*, ws://127.0.0.1:* + * Blocked: anything else unless OPENCLI_CDP_ALLOW_REMOTE=1 + */ +function assertLocalhostEndpoint(endpoint: string): void { + let hostname: string; + try { + const url = new URL(endpoint); + hostname = url.hostname.replace(/^\[|\]$/g, ''); // strip IPv6 brackets + } catch { + // If it's not a valid URL (e.g. bare ws:// fragment), do a string check + hostname = endpoint; + } + + const LOOPBACK = ['localhost', '127.0.0.1', '::1', '0:0:0:0:0:0:0:1']; + const isLoopback = LOOPBACK.some( + (h) => hostname === h || hostname.toLowerCase() === h, + ); + + if (!isLoopback) { + throw new Error( + `Security: OPENCLI_CDP_ENDPOINT "${endpoint}" points to a non-loopback host.` + + ' Connecting to a remote CDP endpoint exposes all browser cookies and DOM data.' + + ' Set OPENCLI_CDP_ALLOW_REMOTE=1 to override (advanced users only).', + ); + } +} + function fetchJsonDirect(url: string): Promise { return new Promise((resolve, reject) => { const parsed = new URL(url); diff --git a/src/browser/daemon-client.ts b/src/browser/daemon-client.ts index 4798cb7e..577f56c5 100644 --- a/src/browser/daemon-client.ts +++ b/src/browser/daemon-client.ts @@ -2,16 +2,66 @@ * HTTP client for communicating with the opencli daemon. * * Provides a typed send() function that posts a Command and returns a Result. + * + * Token authentication: + * The daemon writes a per-process Bearer token to ~/.opencli/daemon.token + * (mode 0o600) at startup. This client reads the token lazily on first use + * and attaches it as `Authorization: Bearer ` on every request. + * If the file does not exist (daemon started by an older binary), the header + * is omitted and the daemon falls back to X-OpenCLI-only checks. */ import { DEFAULT_DAEMON_PORT } from '../constants.js'; import type { BrowserSessionInfo } from '../types.js'; import { sleep } from '../utils.js'; +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import * as os from 'node:os'; const DAEMON_PORT = parseInt(process.env.OPENCLI_DAEMON_PORT ?? String(DEFAULT_DAEMON_PORT), 10); const DAEMON_URL = `http://127.0.0.1:${DAEMON_PORT}`; +const TOKEN_FILE = path.join(os.homedir(), '.opencli', 'daemon.token'); + let _idCounter = 0; +/** Lazily-loaded token — undefined = not yet read, null = file absent or unreadable. */ +let _cachedToken: string | null | undefined = undefined; + +/** + * Read the daemon Bearer token from disk (lazy, cached per-process). + * Returns null when the file is absent (backward-compat with older daemons). + */ +function readDaemonToken(): string | null { + if (_cachedToken !== undefined) return _cachedToken; + try { + const raw = fs.readFileSync(TOKEN_FILE, 'utf-8').trim(); + _cachedToken = raw || null; + } catch { + _cachedToken = null; // file not found or unreadable — non-fatal + } + return _cachedToken; +} + +/** + * Build authentication headers for a daemon HTTP request. + * Always includes X-OpenCLI (CSRF guard). + * Includes Authorization: Bearer when a token file exists. + */ +export function buildDaemonAuthHeaders(): Record { + const headers: Record = { 'X-OpenCLI': '1' }; + const token = readDaemonToken(); + if (token) headers['Authorization'] = `Bearer ${token}`; + return headers; +} + +/** + * Invalidate the in-process token cache. + * Call this if the daemon is known to have restarted so the next request + * picks up the fresh token written by the new daemon process. + */ +export function resetTokenCache(): void { + _cachedToken = undefined; +} function generateId(): string { return `cmd_${Date.now()}_${++_idCounter}`; @@ -51,7 +101,7 @@ export async function isDaemonRunning(): Promise { const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), 2000); const res = await fetch(`${DAEMON_URL}/status`, { - headers: { 'X-OpenCLI': '1' }, + headers: buildDaemonAuthHeaders(), signal: controller.signal, }); clearTimeout(timer); @@ -69,7 +119,7 @@ export async function isExtensionConnected(): Promise { const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), 2000); const res = await fetch(`${DAEMON_URL}/status`, { - headers: { 'X-OpenCLI': '1' }, + headers: buildDaemonAuthHeaders(), signal: controller.signal, }); clearTimeout(timer); @@ -102,7 +152,7 @@ export async function sendCommand( const res = await fetch(`${DAEMON_URL}/command`, { method: 'POST', - headers: { 'Content-Type': 'application/json', 'X-OpenCLI': '1' }, + headers: { 'Content-Type': 'application/json', ...buildDaemonAuthHeaders() }, body: JSON.stringify(command), signal: controller.signal, }); @@ -117,8 +167,12 @@ export async function sendCommand( || errMsg.includes('Extension not connected') || errMsg.includes('attach failed') || errMsg.includes('no longer exists'); - if (isTransient && attempt < maxRetries) { - // Longer delay for extension recovery (service worker restart) + // 401 means the daemon restarted and generated a new token — clear cache + // so the next attempt re-reads the fresh token file. + if (res.status === 401) { + resetTokenCache(); + } + if ((isTransient || res.status === 401) && attempt < maxRetries) { await sleep(1500); continue; } @@ -144,4 +198,3 @@ export async function listSessions(): Promise { const result = await sendCommand('sessions'); return Array.isArray(result) ? result : []; } - diff --git a/src/browser/discover.test.ts b/src/browser/discover.test.ts new file mode 100644 index 00000000..7311e81f --- /dev/null +++ b/src/browser/discover.test.ts @@ -0,0 +1,47 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const mockBuildDaemonAuthHeaders = vi.fn(() => ({ 'X-OpenCLI': '1', Authorization: 'Bearer test-token' })); +const mockIsDaemonRunning = vi.fn(); +const mockResetTokenCache = vi.fn(); + +vi.mock('./daemon-client.js', () => ({ + buildDaemonAuthHeaders: mockBuildDaemonAuthHeaders, + isDaemonRunning: mockIsDaemonRunning, + resetTokenCache: mockResetTokenCache, +})); + +describe('checkDaemonStatus', () => { + beforeEach(() => { + vi.clearAllMocks(); + vi.unstubAllGlobals(); + }); + + it('uses daemon auth headers and retries once on 401', async () => { + const fetchMock = vi.fn() + .mockResolvedValueOnce(new Response(JSON.stringify({ ok: false }), { status: 401 })) + .mockResolvedValueOnce(new Response(JSON.stringify({ + ok: true, + extensionConnected: true, + extensionVersion: '1.5.6', + }), { status: 200 })); + + vi.stubGlobal('fetch', fetchMock); + + const { checkDaemonStatus } = await import('./discover.js'); + await expect(checkDaemonStatus()).resolves.toEqual({ + running: true, + extensionConnected: true, + extensionVersion: '1.5.6', + }); + + expect(mockBuildDaemonAuthHeaders).toHaveBeenCalledTimes(2); + expect(mockResetTokenCache).toHaveBeenCalledTimes(1); + expect(fetchMock).toHaveBeenNthCalledWith( + 1, + 'http://127.0.0.1:19825/status', + expect.objectContaining({ + headers: { 'X-OpenCLI': '1', Authorization: 'Bearer test-token' }, + }), + ); + }); +}); diff --git a/src/browser/discover.ts b/src/browser/discover.ts index d51b2869..fedad157 100644 --- a/src/browser/discover.ts +++ b/src/browser/discover.ts @@ -6,7 +6,7 @@ */ import { DEFAULT_DAEMON_PORT } from '../constants.js'; -import { isDaemonRunning } from './daemon-client.js'; +import { buildDaemonAuthHeaders, isDaemonRunning, resetTokenCache } from './daemon-client.js'; export { isDaemonRunning }; @@ -20,17 +20,34 @@ export async function checkDaemonStatus(opts?: { timeout?: number }): Promise<{ }> { try { const port = parseInt(process.env.OPENCLI_DAEMON_PORT ?? String(DEFAULT_DAEMON_PORT), 10); - const controller = new AbortController(); - const timer = setTimeout(() => controller.abort(), opts?.timeout ?? 2000); + const requestStatus = async () => { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), opts?.timeout ?? 2000); + try { + return await fetch(`http://127.0.0.1:${port}/status`, { + headers: buildDaemonAuthHeaders(), + signal: controller.signal, + }); + } finally { + clearTimeout(timer); + } + }; + + let res = await requestStatus(); + if (res.status === 401) { + resetTokenCache(); + res = await requestStatus(); + } + + if (!res.ok) { + return { running: true, extensionConnected: false }; + } + try { - const res = await fetch(`http://127.0.0.1:${port}/status`, { - headers: { 'X-OpenCLI': '1' }, - signal: controller.signal, - }); const data = await res.json() as { ok: boolean; extensionConnected: boolean; extensionVersion?: string }; - return { running: true, extensionConnected: data.extensionConnected, extensionVersion: data.extensionVersion }; - } finally { - clearTimeout(timer); + return { running: true, extensionConnected: !!data.extensionConnected, extensionVersion: data.extensionVersion }; + } catch { + return { running: true, extensionConnected: false }; } } catch { return { running: false, extensionConnected: false }; diff --git a/src/browser/page.ts b/src/browser/page.ts index bfbd8aff..c523bc5a 100644 --- a/src/browser/page.ts +++ b/src/browser/page.ts @@ -12,6 +12,7 @@ import { formatSnapshot } from '../snapshotFormatter.js'; import type { BrowserCookie, IPage, ScreenshotOptions, SnapshotOptions, WaitOptions } from '../types.js'; +import { redactCookies } from '../types.js'; import { sendCommand } from './daemon-client.js'; import { wrapForEval } from './utils.js'; import { saveBase64ToFile } from '../utils.js'; @@ -125,9 +126,28 @@ export class Page implements IPage { return sendCommand('exec', { code, ...this._cmdOpts() }); } - async getCookies(opts: { domain?: string; url?: string } = {}): Promise { - const result = await sendCommand('cookies', { ...this._wsOpt(), ...opts }); - return Array.isArray(result) ? result : []; + async getCookies(opts: { domain?: string; url?: string; redact?: boolean } = {}): Promise { + const result = await sendCommand('cookies', { ...this._wsOpt(), domain: opts.domain, url: opts.url }); + const cookies: BrowserCookie[] = Array.isArray(result) ? result : []; + + // Warn when HttpOnly cookies are included — these are session/auth tokens + // that websites intentionally protect from JavaScript access. + // CDP bypasses that protection; callers should handle them with care. + const httpOnlyCount = cookies.filter((c) => c.httpOnly).length; + if (httpOnlyCount > 0 && process.env.OPENCLI_VERBOSE) { + console.error( + `[opencli] Warning: getCookies() returned ${httpOnlyCount} HttpOnly cookie(s).` + + ' These may contain session tokens — avoid logging or storing raw values.', + ); + } + + // Redact by default when OPENCLI_REDACT_COOKIES=1 or opts.redact=true. + // This replaces sensitive / httpOnly cookie values with '[REDACTED]' + // so they are safe to log or include in snapshots. + if (opts.redact || process.env.OPENCLI_REDACT_COOKIES === '1') { + return redactCookies(cookies); + } + return cookies; } async snapshot(opts: SnapshotOptions = {}): Promise { diff --git a/src/clis/xiaohongshu/download.test.ts b/src/clis/xiaohongshu/download.test.ts new file mode 100644 index 00000000..2d88f204 --- /dev/null +++ b/src/clis/xiaohongshu/download.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, it } from 'vitest'; +import { __test__ } from './download.js'; + +describe('xiaohongshu download media normalization', () => { + it('replaces blob video urls with real media urls from performance resources', () => { + const media = __test__.normalizeXhsMedia([ + { type: 'video', url: 'blob:https://www.xiaohongshu.com/abc' }, + { type: 'image', url: 'https://ci.xiaohongshu.com/image.jpg' }, + ], [ + 'https://sns-video-hw.xhscdn.com/stream/1/110/130/example_130.mp4?sign=abc&t=123', + 'https://www.xiaohongshu.com/some-script.js', + ]); + + expect(media).toEqual([ + { + type: 'video', + url: 'https://sns-video-hw.xhscdn.com/stream/1/110/130/example_130.mp4?sign=abc&t=123', + }, + { + type: 'image', + url: 'https://ci.xiaohongshu.com/image.jpg', + }, + ]); + }); + + it('keeps direct video urls unchanged', () => { + const media = __test__.normalizeXhsMedia([ + { type: 'video', url: 'https://sns-video-hw.xhscdn.com/direct.mp4' }, + ], [ + 'https://sns-video-hw.xhscdn.com/stream/1/110/130/other.mp4', + ]); + + expect(media).toEqual([ + { type: 'video', url: 'https://sns-video-hw.xhscdn.com/direct.mp4' }, + ]); + }); +}); diff --git a/src/clis/xiaohongshu/download.ts b/src/clis/xiaohongshu/download.ts index 6fa21389..05175da9 100644 --- a/src/clis/xiaohongshu/download.ts +++ b/src/clis/xiaohongshu/download.ts @@ -9,6 +9,38 @@ import { cli, Strategy } from '../../registry.js'; import { formatCookieHeader } from '../../download/index.js'; import { downloadMedia } from '../../download/media-download.js'; +interface XhsMediaItem { + type: 'image' | 'video'; + url: string; +} + +function isRealXhsVideoUrl(url: string): boolean { + return /^https?:\/\//i.test(url) + && /\.(mp4|m3u8)(?:[?#]|$)/i.test(url) + && /(xhscdn|xiaohongshu|sns-video|video)/i.test(url); +} + +function normalizeXhsMedia(media: XhsMediaItem[], performanceResources: string[] = []): XhsMediaItem[] { + const fallbackVideos = performanceResources + .filter((url) => isRealXhsVideoUrl(url)) + .filter((url, index, list) => list.indexOf(url) === index); + + let videoFallbackIndex = 0; + + return media.map((item) => { + if (item.type !== 'video') return item; + if (isRealXhsVideoUrl(item.url)) return item; + + const fallback = fallbackVideos[videoFallbackIndex]; + if (fallback) { + videoFallbackIndex += 1; + return { ...item, url: fallback }; + } + + return item; + }); +} + cli({ site: 'xiaohongshu', name: 'download', @@ -34,7 +66,8 @@ cli({ noteId: '${noteId}', title: '', author: '', - media: [] + media: [], + performanceResources: [] }; // Get title @@ -79,7 +112,7 @@ cli({ for (const selector of videoSelectors) { document.querySelectorAll(selector).forEach(v => { - const src = v.src || v.getAttribute('src') || ''; + const src = v.currentSrc || v.src || v.getAttribute('src') || ''; if (src) { result.media.push({ type: 'video', url: src }); } @@ -91,6 +124,12 @@ cli({ result.media.push({ type: 'image', url: url }); }); + try { + result.performanceResources = performance.getEntriesByType('resource') + .map(entry => entry.name) + .filter(url => /\\.(mp4|m3u8)(?:[?#]|$)/i.test(url) && /(xhscdn|xiaohongshu|sns-video|video)/i.test(url)); + } catch {} + return result; })() `); @@ -99,10 +138,15 @@ cli({ return [{ index: 0, type: '-', status: 'failed', size: 'No media found' }]; } + const media = normalizeXhsMedia( + Array.isArray(data.media) ? data.media as XhsMediaItem[] : [], + Array.isArray((data as any).performanceResources) ? (data as any).performanceResources as string[] : [], + ); + // Extract cookies for authenticated downloads const cookies = formatCookieHeader(await page.getCookies({ domain: 'xiaohongshu.com' })); - return downloadMedia(data.media, { + return downloadMedia(media, { output, subdir: noteId, cookies, @@ -111,3 +155,8 @@ cli({ }); }, }); + +export const __test__ = { + normalizeXhsMedia, + isRealXhsVideoUrl, +}; diff --git a/src/daemon.ts b/src/daemon.ts index 1c30f973..d7fd464a 100644 --- a/src/daemon.ts +++ b/src/daemon.ts @@ -12,6 +12,12 @@ * 3. No CORS headers — responses never include Access-Control-Allow-Origin * 4. Body size limit — 1 MB max to prevent OOM * 5. WebSocket verifyClient — reject upgrade before connection is established + * 6. Bearer token auth — random 32-byte token written to ~/.opencli/daemon.token + * (mode 0o600) at startup; all non-/ping requests require + * Authorization: Bearer . Prevents lateral-movement attacks from + * other local processes that happen to know the port. + * 7. Extension ID pinning — optional OPENCLI_EXTENSION_ID env var lets operators + * restrict WebSocket connections to a specific extension build. * * Lifecycle: * - Auto-spawned by opencli on first browser command @@ -21,12 +27,73 @@ import { createServer, type IncomingMessage, type ServerResponse } from 'node:http'; import { WebSocketServer, WebSocket, type RawData } from 'ws'; +import { randomBytes } from 'node:crypto'; +import * as nodefs from 'node:fs'; +import * as nodepath from 'node:path'; +import * as nodeos from 'node:os'; import { DEFAULT_DAEMON_PORT } from './constants.js'; import { EXIT_CODES } from './errors.js'; const PORT = parseInt(process.env.OPENCLI_DAEMON_PORT ?? String(DEFAULT_DAEMON_PORT), 10); const IDLE_TIMEOUT = 5 * 60 * 1000; // 5 minutes +// ─── Token Authentication ───────────────────────────────────────────── +// Generate a cryptographically random 64-hex token per daemon process. +// Write it to ~/.opencli/daemon.token (mode 0o600) so only the owning +// user can read it. All HTTP endpoints (except /ping) require: +// Authorization: Bearer +// This stops other local processes from hijacking the browser session +// even if they know the port number and add the X-OpenCLI header. + +const TOKEN_DIR = nodepath.join(nodeos.homedir(), '.opencli'); +const TOKEN_FILE = nodepath.join(TOKEN_DIR, 'daemon.token'); + +const DAEMON_TOKEN: string = (() => { + const token = randomBytes(32).toString('hex'); + try { + nodefs.mkdirSync(TOKEN_DIR, { recursive: true }); + nodefs.writeFileSync(TOKEN_FILE, token, { encoding: 'utf-8', mode: 0o600 }); + } catch (e) { + // Non-fatal: token is still used in-memory for header checks. + // Log to stderr so the operator knows the file wasn't persisted. + console.error('[daemon] Warning: could not write token file:', (e as Error).message); + } + return token; +})(); + +/** Constant-time comparison to guard against timing attacks. */ +function safeTokenEqual(a: string, b: string): boolean { + if (a.length !== b.length) return false; + let diff = 0; + for (let i = 0; i < a.length; i++) { + diff |= a.charCodeAt(i) ^ b.charCodeAt(i); + } + return diff === 0; +} + +function checkBearerToken(req: IncomingMessage): boolean { + const auth = req.headers['authorization'] as string | undefined; + if (!auth) return false; + const [scheme, token] = auth.split(' '); + if (scheme?.toLowerCase() !== 'bearer' || !token) return false; + return safeTokenEqual(token, DAEMON_TOKEN); +} + +// Optional extension ID pinning: set OPENCLI_EXTENSION_ID to restrict WebSocket +// connections to exactly one extension build (e.g. after sideloading). +const PINNED_EXTENSION_ID = process.env.OPENCLI_EXTENSION_ID?.trim() || null; + +function isAllowedExtensionOrigin(origin: string | undefined): boolean { + if (!origin) return true; // no origin = non-browser client (CLI curl-style), allow + if (!origin.startsWith('chrome-extension://')) return false; + if (PINNED_EXTENSION_ID) { + // origin is chrome-extension:// + const id = origin.slice('chrome-extension://'.length).replace(/\/$/, ''); + return id === PINNED_EXTENSION_ID; + } + return true; +} + // ─── State ─────────────────────────────────────────────────────────── let extensionWs: WebSocket | null = null; @@ -54,7 +121,7 @@ function resetIdleTimer(): void { if (idleTimer) clearTimeout(idleTimer); idleTimer = setTimeout(() => { console.error('[daemon] Idle timeout, shutting down'); - process.exit(EXIT_CODES.SUCCESS); + shutdown(); }, IDLE_TIMEOUT); } @@ -106,13 +173,10 @@ async function handleRequest(req: IncomingMessage, res: ServerResponse): Promise const url = req.url ?? '/'; const pathname = url.split('?')[0]; - // Health-check endpoint — no X-OpenCLI header required. + // Health-check endpoint — no auth required, intentionally minimal. // Used by the extension to silently probe daemon reachability before // attempting a WebSocket connection (avoids uncatchable ERR_CONNECTION_REFUSED). - // Security note: this endpoint is reachable by any client that passes the - // origin check above (chrome-extension:// or no Origin header, e.g. curl). - // Timing side-channels can reveal daemon presence to local processes, which - // is an accepted risk given the daemon is loopback-only and short-lived. + // Returns only {ok:true} — no sensitive data, no state mutation. if (req.method === 'GET' && pathname === '/ping') { jsonResponse(res, 200, { ok: true }); return; @@ -127,6 +191,15 @@ async function handleRequest(req: IncomingMessage, res: ServerResponse): Promise return; } + // ─── Bearer Token check (layer-6 auth) ────────────────────────────── + // Reject requests that don't carry the per-process token. This ensures + // no other local process (malware, shared host tenant, etc.) can control + // the daemon even if they discover the port and add the X-OpenCLI header. + if (!checkBearerToken(req)) { + jsonResponse(res, 401, { ok: false, error: 'Unauthorized: invalid or missing Bearer token' }); + return; + } + if (req.method === 'GET' && pathname === '/status') { jsonResponse(res, 200, { ok: true, @@ -203,8 +276,10 @@ const wss = new WebSocketServer({ // enforce CORS on WebSocket, so a malicious webpage could connect to // ws://localhost:19825/ext and impersonate the Extension. Real Chrome // Extensions send origin chrome-extension://. + // + // If OPENCLI_EXTENSION_ID is set, further pin to that exact extension build. const origin = req.headers['origin'] as string | undefined; - return !origin || origin.startsWith('chrome-extension://'); + return isAllowedExtensionOrigin(origin); }, }); @@ -320,6 +395,8 @@ function shutdown(): void { pending.clear(); if (extensionWs) extensionWs.close(); httpServer.close(); + // Remove the token file so stale tokens can't be reused after restart + try { nodefs.unlinkSync(TOKEN_FILE); } catch { /* ignore if already gone */ } process.exit(EXIT_CODES.SUCCESS); } diff --git a/src/execution-cdp.test.ts b/src/execution-cdp.test.ts new file mode 100644 index 00000000..7653a47f --- /dev/null +++ b/src/execution-cdp.test.ts @@ -0,0 +1,45 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { cli, Strategy } from './registry.js'; + +const mockCheckDaemonStatus = vi.fn(); +const mockBrowserSession = vi.fn(); + +vi.mock('./browser/discover.js', () => ({ + checkDaemonStatus: mockCheckDaemonStatus, +})); + +vi.mock('./runtime.js', async () => { + const actual = await vi.importActual('./runtime.js'); + return { + ...actual, + browserSession: mockBrowserSession, + }; +}); + +describe('executeCommand with OPENCLI_CDP_ENDPOINT', () => { + beforeEach(() => { + vi.resetModules(); + vi.unstubAllEnvs(); + mockCheckDaemonStatus.mockReset(); + mockBrowserSession.mockReset(); + }); + + it('does not fail fast on Browser Bridge status when CDP mode is enabled', async () => { + vi.stubEnv('OPENCLI_CDP_ENDPOINT', 'ws://127.0.0.1:9222/devtools/browser/browser-1'); + mockCheckDaemonStatus.mockResolvedValue({ running: true, extensionConnected: false }); + mockBrowserSession.mockImplementation(async (_factory, fn) => fn({} as any)); + + const { executeCommand } = await import('./execution.js'); + const cmd = cli({ + site: 'test-execution', + name: 'cdp-browser-command', + description: 'test cdp browser execution path', + strategy: Strategy.COOKIE, + domain: 'www.xiaohongshu.com', + func: async () => 'ok', + }); + + await expect(executeCommand(cmd, {})).resolves.toBe('ok'); + expect(mockBrowserSession).toHaveBeenCalledTimes(1); + }); +}); diff --git a/src/execution.ts b/src/execution.ts index ab0b5e1b..917e6c6c 100644 --- a/src/execution.ts +++ b/src/execution.ts @@ -169,21 +169,24 @@ export async function executeCommand( let result: unknown; try { if (shouldUseBrowserSession(cmd)) { + const usesDirectCdp = !!process.env.OPENCLI_CDP_ENDPOINT; // ── Fail-fast: only when daemon is UP but extension is not connected ── // If daemon is not running, let browserSession() handle auto-start as usual. // We only short-circuit when the daemon confirms the extension is missing — // that's a clear setup gap, not a transient startup state. // Use a short timeout: localhost responds in <50ms when running. // 300ms avoids a full 2s wait on cold-start (daemon not yet running). - const status = await checkDaemonStatus({ timeout: 300 }); - if (status.running && !status.extensionConnected) { - throw new BrowserConnectError( - 'Browser Bridge extension not connected', - 'Install the Browser Bridge:\n' + - ' 1. Download: https://github.com/jackwener/opencli/releases\n' + - ' 2. chrome://extensions → Developer Mode → Load unpacked\n' + - ' Then run: opencli doctor', - ); + if (!usesDirectCdp) { + const status = await checkDaemonStatus({ timeout: 300 }); + if (status.running && !status.extensionConnected) { + throw new BrowserConnectError( + 'Browser Bridge extension not connected', + 'Install the Browser Bridge:\n' + + ' 1. Download: https://github.com/jackwener/opencli/releases\n' + + ' 2. chrome://extensions → Developer Mode → Load unpacked\n' + + ' Then run: opencli doctor', + ); + } } ensureRequiredEnv(cmd); const BrowserFactory = getBrowserFactory(); diff --git a/src/pipeline/steps/fetch.ts b/src/pipeline/steps/fetch.ts index edecda63..d0a3287b 100644 --- a/src/pipeline/steps/fetch.ts +++ b/src/pipeline/steps/fetch.ts @@ -39,6 +39,15 @@ async function fetchSingle( const headersJs = JSON.stringify(renderedHeaders); const urlJs = JSON.stringify(finalUrl); const methodJs = JSON.stringify(method.toUpperCase()); + // Security note: `credentials: "include"` sends the browser's session cookies + // with every in-page fetch. This is intentional for authenticated scraping + // (e.g. fetching user-specific API endpoints that require the logged-in session), + // but it means the pipeline must only request URLs the user has authorised. + // Never pass user-controlled or untrusted URLs into this step — a malicious + // URL could harvest session cookies via a CSRF-style request. + // To opt out, set `credentials: omit` in the step headers or use a Node-side + // fetch (no page) which never carries browser cookies. + // // Return error status instead of throwing inside evaluate to avoid CDP wrapper // rewriting the message (CDP prepends "Evaluate error: " to thrown errors). const result = await page.evaluate(` @@ -81,6 +90,7 @@ async function fetchBatchInBrowser( const results = new Array(urls.length); let idx = 0; + // credentials:"include" sends session cookies — only use with trusted URLs. async function worker() { while (idx < urls.length) { const i = idx++; diff --git a/src/types.ts b/src/types.ts index f1647e6e..e692acec 100644 --- a/src/types.ts +++ b/src/types.ts @@ -13,6 +13,47 @@ export interface BrowserCookie { secure?: boolean; httpOnly?: boolean; expirationDate?: number; + /** Set to true when value has been redacted for security logging. */ + _redacted?: boolean; +} + +/** + * Names (case-insensitive) whose values should be treated as secrets. + * Used by redactCookies() to replace values with '[REDACTED]' in logs/output. + */ +export const SENSITIVE_COOKIE_NAMES = new Set([ + 'session', 'sess', 'sid', 'auth', 'token', 'access_token', 'refresh_token', + 'jwt', 'bearer', 'api_key', 'apikey', 'secret', 'password', 'passwd', 'pwd', + 'credential', 'credentials', 'authorization', + // Common session cookie names used by popular platforms + 'JSESSIONID', 'PHPSESSID', 'ASP.NET_SessionId', '__Secure-next-auth.session-token', + 'supabase-auth-token', '__session', 'connect.sid', +]); + +/** + * Returns true if the cookie name appears to hold a sensitive credential. + * Matching is case-insensitive and also catches partial matches + * (e.g. "app_session_id" → true). + */ +export function isSensitiveCookieName(name: string): boolean { + const lower = name.toLowerCase(); + for (const s of SENSITIVE_COOKIE_NAMES) { + if (lower === s.toLowerCase() || lower.includes(s.toLowerCase())) return true; + } + return false; +} + +/** + * Return a copy of the cookie list with sensitive values replaced by '[REDACTED]'. + * HttpOnly cookies are always redacted regardless of name, because they are + * typically session/auth cookies that websites intentionally protect from JS. + */ +export function redactCookies(cookies: BrowserCookie[]): BrowserCookie[] { + return cookies.map((c) => { + const shouldRedact = c.httpOnly || isSensitiveCookieName(c.name); + if (!shouldRedact) return c; + return { ...c, value: '[REDACTED]', _redacted: true }; + }); } export interface SnapshotOptions {