diff --git a/src/browser/cdp.ts b/src/browser/cdp.ts index 72bcf87f..915c48b2 100644 --- a/src/browser/cdp.ts +++ b/src/browser/cdp.ts @@ -14,6 +14,7 @@ import { request as httpsRequest } from 'node:https'; import type { BrowserCookie, IPage, ScreenshotOptions, SnapshotOptions, WaitOptions } from '../types.js'; import type { IBrowserFactory } from '../runtime.js'; import { wrapForEval } from './utils.js'; +import { normalizeWaitTimeoutMs } from './wait.js'; import { generateSnapshotJs, scrollToRefJs, getFormStateJs } from './dom-snapshot.js'; import { generateStealthJs } from './stealth.js'; import { @@ -267,12 +268,12 @@ class CDPPage implements IPage { return; } if (options.selector) { - const timeout = (options.timeout ?? 10) * 1000; + const timeout = normalizeWaitTimeoutMs(options.timeout, 10); await this.evaluate(waitForSelectorJs(options.selector, timeout)); return; } if (options.text) { - const timeout = (options.timeout ?? 30) * 1000; + const timeout = normalizeWaitTimeoutMs(options.timeout, 30); await this.evaluate(waitForTextJs(options.text, timeout)); } } diff --git a/src/browser/page.ts b/src/browser/page.ts index a1bd2264..146625b3 100644 --- a/src/browser/page.ts +++ b/src/browser/page.ts @@ -14,6 +14,7 @@ import { formatSnapshot } from '../snapshotFormatter.js'; import type { BrowserCookie, IPage, ScreenshotOptions, SnapshotOptions, WaitOptions } from '../types.js'; import { sendCommand } from './daemon-client.js'; import { wrapForEval } from './utils.js'; +import { normalizeWaitTimeoutMs } from './wait.js'; import { saveBase64ToFile } from '../utils.js'; import { generateSnapshotJs, scrollToRefJs, getFormStateJs } from './dom-snapshot.js'; import { generateStealthJs } from './stealth.js'; @@ -239,13 +240,13 @@ export class Page implements IPage { return; } if (options.selector) { - const timeout = (options.timeout ?? 10) * 1000; + const timeout = normalizeWaitTimeoutMs(options.timeout, 10); const code = waitForSelectorJs(options.selector, timeout); await sendCommand('exec', { code, ...this._cmdOpts() }); return; } if (options.text) { - const timeout = (options.timeout ?? 30) * 1000; + const timeout = normalizeWaitTimeoutMs(options.timeout, 30); const code = waitForTextJs(options.text, timeout); await sendCommand('exec', { code, ...this._cmdOpts() }); } diff --git a/src/browser/wait.test.ts b/src/browser/wait.test.ts new file mode 100644 index 00000000..f63bdca5 --- /dev/null +++ b/src/browser/wait.test.ts @@ -0,0 +1,23 @@ +import { describe, expect, it } from 'vitest'; + +import { normalizeWaitTimeoutMs } from './wait.js'; + +describe('normalizeWaitTimeoutMs', () => { + it('uses the default timeout when no timeout is provided', () => { + expect(normalizeWaitTimeoutMs(undefined, 10)).toBe(10_000); + }); + + it('treats sub-1000 values as seconds', () => { + expect(normalizeWaitTimeoutMs(3, 10)).toBe(3_000); + expect(normalizeWaitTimeoutMs(0.5, 10)).toBe(500); + }); + + it('treats large values as milliseconds for backward compatibility', () => { + expect(normalizeWaitTimeoutMs(10_000, 10)).toBe(10_000); + }); + + it('clamps non-positive values to zero', () => { + expect(normalizeWaitTimeoutMs(0, 10)).toBe(0); + expect(normalizeWaitTimeoutMs(-2, 10)).toBe(0); + }); +}); diff --git a/src/browser/wait.ts b/src/browser/wait.ts new file mode 100644 index 00000000..dd0b98d1 --- /dev/null +++ b/src/browser/wait.ts @@ -0,0 +1,17 @@ +export function normalizeWaitTimeoutMs(timeout: number | undefined, defaultSeconds: number): number { + if (typeof timeout !== 'number' || !Number.isFinite(timeout)) { + return defaultSeconds * 1000; + } + + if (timeout <= 0) { + return 0; + } + + // Older adapters occasionally passed milliseconds here even though + // wait({ timeout }) is documented in seconds. Preserve those callers. + if (timeout >= 1000) { + return Math.round(timeout); + } + + return Math.round(timeout * 1000); +} diff --git a/src/clis/36kr/article.test.ts b/src/clis/36kr/article.test.ts new file mode 100644 index 00000000..00de82d6 --- /dev/null +++ b/src/clis/36kr/article.test.ts @@ -0,0 +1,41 @@ +import { beforeAll, describe, expect, it, vi } from 'vitest'; + +import { getRegistry } from '../../registry.js'; + +beforeAll(async () => { + await import('./article.js'); +}); + +describe('36kr/article', () => { + it('waits for capture before scraping the rendered article', async () => { + const command = getRegistry().get('36kr/article'); + expect(command?.func).toBeTypeOf('function'); + + const page = { + installInterceptor: vi.fn().mockResolvedValue(undefined), + goto: vi.fn().mockResolvedValue(undefined), + waitForCapture: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue({ + title: 'OpenCLI ships faster 36kr article scraping', + author: 'opencli', + date: '2026-03-31', + body: 'A useful article body', + }), + } as any; + + const result = await command!.func!(page, { id: 'https://www.36kr.com/p/321654987' }); + + expect(page.installInterceptor).toHaveBeenCalledWith('36kr.com/api'); + expect(page.goto).toHaveBeenCalledWith('https://www.36kr.com/p/321654987'); + expect(page.waitForCapture).toHaveBeenCalledWith(6); + expect(page.wait).toHaveBeenCalledWith({ selector: '.article-title, h1', timeout: 3 }); + expect(result).toEqual([ + { field: 'title', value: 'OpenCLI ships faster 36kr article scraping' }, + { field: 'author', value: 'opencli' }, + { field: 'date', value: '2026-03-31' }, + { field: 'url', value: 'https://36kr.com/p/321654987' }, + { field: 'body', value: 'A useful article body' }, + ]); + }); +}); diff --git a/src/clis/36kr/article.ts b/src/clis/36kr/article.ts index e8d24ef2..74765591 100644 --- a/src/clis/36kr/article.ts +++ b/src/clis/36kr/article.ts @@ -31,7 +31,8 @@ cli({ await page.installInterceptor('36kr.com/api'); await page.goto(`https://www.36kr.com/p/${articleId}`); - await page.wait(5); + await page.waitForCapture(6); + await page.wait({ selector: '.article-title, h1', timeout: 3 }); const data: any = await page.evaluate(` (() => { diff --git a/src/clis/sinafinance/rolling-news.ts b/src/clis/sinafinance/rolling-news.ts index 9e0cac9d..8f40f7b4 100644 --- a/src/clis/sinafinance/rolling-news.ts +++ b/src/clis/sinafinance/rolling-news.ts @@ -14,7 +14,7 @@ cli({ columns: ['column', 'title', 'date', 'url'], func: async (page, _args) => { await page.goto(`https://finance.sina.com.cn/roll/#pageid=384&lid=2519`); - await page.wait({ selector: '.d_list_txt li', timeout: 10000 }); + await page.wait({ selector: '.d_list_txt li', timeout: 10 }); const payload = await page.evaluate(` (() => { diff --git a/src/types.ts b/src/types.ts index 43ceb983..e3b0c590 100644 --- a/src/types.ts +++ b/src/types.ts @@ -28,7 +28,7 @@ export interface WaitOptions { text?: string; selector?: string; // wait until document.querySelector(selector) matches time?: number; - timeout?: number; + timeout?: number; // seconds by default; values >= 1000 are treated as ms for backward compatibility } export interface ScreenshotOptions {