diff --git a/src/clis/gemini/ask.test.ts b/src/clis/gemini/ask.test.ts new file mode 100644 index 00000000..ce37bb17 --- /dev/null +++ b/src/clis/gemini/ask.test.ts @@ -0,0 +1,116 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { IPage } from '../../types.js'; + +const baseline = { + turns: [{ Role: 'Assistant', Text: '旧回答' }], + transcriptLines: ['baseline'], + composerHasText: true, + isGenerating: false, + structuredTurnsTrusted: true, +}; + +const submission = { + snapshot: { + turns: [ + { Role: 'Assistant', Text: '旧回答' }, + { Role: 'User', Text: '请只回复:OK' }, + ], + transcriptLines: ['baseline', '请只回复:OK'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: true, + }, + preSendAssistantCount: 1, + userAnchorTurn: { Role: 'User', Text: '请只回复:OK' }, + reason: 'user_turn' as const, +}; + +const mocks = vi.hoisted(() => ({ + readGeminiSnapshot: vi.fn(), + sendGeminiMessage: vi.fn(), + startNewGeminiChat: vi.fn(), + waitForGeminiSubmission: vi.fn(), + waitForGeminiResponse: vi.fn(), +})); + +vi.mock('./utils.js', async () => { + const actual = await vi.importActual('./utils.js'); + return { + ...actual, + readGeminiSnapshot: mocks.readGeminiSnapshot, + sendGeminiMessage: mocks.sendGeminiMessage, + startNewGeminiChat: mocks.startNewGeminiChat, + waitForGeminiSubmission: mocks.waitForGeminiSubmission, + waitForGeminiResponse: mocks.waitForGeminiResponse, + }; +}); + +import { askCommand } from './ask.js'; + +function createPageMock(): IPage { + return { + goto: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn(), + getCookies: vi.fn().mockResolvedValue([]), + snapshot: vi.fn().mockResolvedValue(undefined), + click: vi.fn().mockResolvedValue(undefined), + typeText: vi.fn().mockResolvedValue(undefined), + pressKey: vi.fn().mockResolvedValue(undefined), + scrollTo: vi.fn().mockResolvedValue(undefined), + getFormState: vi.fn().mockResolvedValue({}), + wait: vi.fn().mockResolvedValue(undefined), + tabs: vi.fn().mockResolvedValue([]), + selectTab: vi.fn().mockResolvedValue(undefined), + networkRequests: vi.fn().mockResolvedValue([]), + consoleMessages: vi.fn().mockResolvedValue([]), + scroll: vi.fn().mockResolvedValue(undefined), + autoScroll: vi.fn().mockResolvedValue(undefined), + installInterceptor: vi.fn().mockResolvedValue(undefined), + getInterceptedRequests: vi.fn().mockResolvedValue([]), + waitForCapture: vi.fn().mockResolvedValue(undefined), + screenshot: vi.fn().mockResolvedValue(''), + nativeType: vi.fn().mockResolvedValue(undefined), + nativeKeyPress: vi.fn().mockResolvedValue(undefined), + } as unknown as IPage; +} + +describe('gemini ask orchestration', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('captures baseline, sends, waits for confirmed submission, then waits with the remaining timeout', async () => { + vi.spyOn(Date, 'now') + .mockReturnValueOnce(0) + .mockReturnValueOnce(2000); + + const page = createPageMock(); + mocks.readGeminiSnapshot.mockResolvedValueOnce(baseline); + mocks.sendGeminiMessage.mockResolvedValueOnce('button'); + mocks.waitForGeminiSubmission.mockResolvedValueOnce(submission); + mocks.waitForGeminiResponse.mockResolvedValueOnce('OK'); + + const result = await askCommand.func!(page, { prompt: '请只回复:OK', timeout: '20', new: 'false' }); + + expect(mocks.readGeminiSnapshot).toHaveBeenCalledWith(page); + expect(mocks.waitForGeminiSubmission).toHaveBeenCalledWith(page, baseline, 20); + expect(mocks.waitForGeminiResponse).toHaveBeenCalledWith(page, submission, '请只回复:OK', 18); + expect(result).toEqual([{ response: '💬 OK' }]); + }); + + it('does not spend extra response wait time after submission has already consumed the full timeout budget', async () => { + vi.spyOn(Date, 'now') + .mockReturnValueOnce(0) + .mockReturnValueOnce(20000); + + const page = createPageMock(); + mocks.readGeminiSnapshot.mockResolvedValueOnce(baseline); + mocks.sendGeminiMessage.mockResolvedValueOnce('button'); + mocks.waitForGeminiSubmission.mockResolvedValueOnce(submission); + mocks.waitForGeminiResponse.mockResolvedValueOnce(''); + + await askCommand.func!(page, { prompt: '请只回复:OK', timeout: '20', new: 'false' }); + + expect(mocks.waitForGeminiResponse).toHaveBeenCalledWith(page, submission, '请只回复:OK', 0); + }); +}); diff --git a/src/clis/gemini/ask.ts b/src/clis/gemini/ask.ts index 8451f5fe..522c3b21 100644 --- a/src/clis/gemini/ask.ts +++ b/src/clis/gemini/ask.ts @@ -1,6 +1,6 @@ import { cli, Strategy } from '../../registry.js'; import type { IPage } from '../../types.js'; -import { GEMINI_DOMAIN, getGeminiTranscriptLines, sendGeminiMessage, startNewGeminiChat, waitForGeminiResponse } from './utils.js'; +import { GEMINI_DOMAIN, readGeminiSnapshot, sendGeminiMessage, startNewGeminiChat, waitForGeminiResponse, waitForGeminiSubmission } from './utils.js'; function normalizeBooleanFlag(value: unknown): boolean { if (typeof value === 'boolean') return value; @@ -33,9 +33,16 @@ export const askCommand = cli({ if (startFresh) await startNewGeminiChat(page); - const beforeLines = await getGeminiTranscriptLines(page); + const before = await readGeminiSnapshot(page); await sendGeminiMessage(page, prompt); - const response = await waitForGeminiResponse(page, beforeLines, prompt, timeout); + const submissionStartedAt = Date.now(); + const submitted = await waitForGeminiSubmission(page, before, timeout); + if (!submitted) { + return [{ response: `💬 ${NO_RESPONSE_PREFIX} No Gemini response within ${timeout}s.` }]; + } + + const remainingTimeoutSeconds = Math.max(0, timeout - Math.ceil((Date.now() - submissionStartedAt) / 1000)); + const response = await waitForGeminiResponse(page, submitted, prompt, remainingTimeoutSeconds); if (!response) { return [{ response: `💬 ${NO_RESPONSE_PREFIX} No Gemini response within ${timeout}s.` }]; diff --git a/src/clis/gemini/reply-state.test.ts b/src/clis/gemini/reply-state.test.ts new file mode 100644 index 00000000..2ca6ddd0 --- /dev/null +++ b/src/clis/gemini/reply-state.test.ts @@ -0,0 +1,708 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { IPage } from '../../types.js'; +import type { GeminiSnapshot } from './utils.js'; +import { __test__, waitForGeminiResponse, waitForGeminiSubmission } from './utils.js'; + +function snapshot(overrides: Partial = {}): GeminiSnapshot { + return { + turns: [], + transcriptLines: [], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + ...overrides, + }; +} + +function createPageMock(): IPage { + return { + goto: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn(), + getCookies: vi.fn().mockResolvedValue([]), + snapshot: vi.fn().mockResolvedValue(undefined), + click: vi.fn().mockResolvedValue(undefined), + typeText: vi.fn().mockResolvedValue(undefined), + pressKey: vi.fn().mockResolvedValue(undefined), + scrollTo: vi.fn().mockResolvedValue(undefined), + getFormState: vi.fn().mockResolvedValue({}), + wait: vi.fn().mockResolvedValue(undefined), + tabs: vi.fn().mockResolvedValue([]), + selectTab: vi.fn().mockResolvedValue(undefined), + networkRequests: vi.fn().mockResolvedValue([]), + consoleMessages: vi.fn().mockResolvedValue([]), + scroll: vi.fn().mockResolvedValue(undefined), + autoScroll: vi.fn().mockResolvedValue(undefined), + installInterceptor: vi.fn().mockResolvedValue(undefined), + getInterceptedRequests: vi.fn().mockResolvedValue([]), + waitForCapture: vi.fn().mockResolvedValue(undefined), + screenshot: vi.fn().mockResolvedValue(''), + nativeType: vi.fn().mockResolvedValue(undefined), + nativeKeyPress: vi.fn().mockResolvedValue(undefined), + } as unknown as IPage; +} + +describe('Gemini snapshot diff helpers', () => { + it('reports appended trusted turns when the current snapshot extends the baseline', () => { + const before = snapshot({ + turns: [{ Role: 'Assistant', Text: '旧回答' }], + }); + const current = snapshot({ + turns: [ + { Role: 'Assistant', Text: '旧回答' }, + { Role: 'User', Text: '请只回复:OK' }, + ], + }); + + expect(__test__.diffTrustedStructuredTurns(before, current)).toEqual({ + appendedTurns: [{ Role: 'User', Text: '请只回复:OK' }], + hasTrustedAppend: true, + hasNewUserTurn: true, + hasNewAssistantTurn: false, + }); + }); + + it('treats restored structured turns as untrusted when the pre-send snapshot had no trustworthy turns', () => { + const before = snapshot({ + turns: [], + transcriptLines: ['旧问题', '旧回答'], + structuredTurnsTrusted: false, + }); + const current = snapshot({ + turns: [ + { Role: 'User', Text: '旧问题' }, + { Role: 'Assistant', Text: '旧回答' }, + ], + transcriptLines: ['旧问题', '旧回答'], + structuredTurnsTrusted: true, + }); + + expect(__test__.diffTrustedStructuredTurns(before, current)).toEqual({ + appendedTurns: [], + hasTrustedAppend: false, + hasNewUserTurn: false, + hasNewAssistantTurn: false, + }); + }); + + it('keeps transcript delta lines raw for later conservative fallback checks', () => { + const before = snapshot({ + transcriptLines: ['baseline'], + }); + const current = snapshot({ + transcriptLines: ['baseline', '关于“请只回复:OK”,这里是解释。'], + structuredTurnsTrusted: false, + }); + + expect(__test__.diffTranscriptLines(before, current)).toEqual([ + '关于“请只回复:OK”,这里是解释。', + ]); + }); +}); + +describe('Gemini submission state', () => { + it('confirms submission from a trusted appended user turn', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [ + { Role: 'Assistant', Text: '旧回答' }, + { Role: 'User', Text: '请只回复:OK' }, + ], + transcriptLines: ['baseline', '请只回复:OK'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: true, + }); + + const result = await waitForGeminiSubmission(page, snapshot({ + turns: [{ Role: 'Assistant', Text: '旧回答' }], + transcriptLines: ['baseline'], + composerHasText: true, + structuredTurnsTrusted: true, + }), 4); + + expect(result).toEqual({ + snapshot: { + turns: [ + { Role: 'Assistant', Text: '旧回答' }, + { Role: 'User', Text: '请只回复:OK' }, + ], + transcriptLines: ['baseline', '请只回复:OK'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: true, + }, + preSendAssistantCount: 1, + userAnchorTurn: { Role: 'User', Text: '请只回复:OK' }, + reason: 'user_turn', + }); + }); + + it('confirms submission from composer cleared plus generating even when transcript has not changed yet', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: false, + }); + + const result = await waitForGeminiSubmission(page, snapshot({ + transcriptLines: ['baseline'], + composerHasText: true, + structuredTurnsTrusted: false, + }), 2); + + expect(result).toEqual({ + snapshot: { + turns: [], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: false, + }, + preSendAssistantCount: 0, + userAnchorTurn: null, + reason: 'composer_generating', + }); + }); + + it('confirms submission from generating state even when the pre-send baseline composer was empty', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [ + { Role: 'User', Text: '你说\n\n请只回复:DBG2' }, + { Role: 'User', Text: '请只回复:DBG2' }, + ], + transcriptLines: ['baseline', '请只回复:DBG2'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: true, + }); + + const result = await waitForGeminiSubmission(page, snapshot({ + turns: [{ Role: 'Assistant', Text: '需要我为你做些什么?' }], + transcriptLines: ['baseline'], + composerHasText: false, + structuredTurnsTrusted: true, + }), 2); + + expect(result).toEqual({ + snapshot: { + turns: [ + { Role: 'User', Text: '你说\n\n请只回复:DBG2' }, + { Role: 'User', Text: '请只回复:DBG2' }, + ], + transcriptLines: ['baseline', '请只回复:DBG2'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: true, + }, + preSendAssistantCount: 1, + userAnchorTurn: { Role: 'User', Text: '请只回复:DBG2' }, + reason: 'composer_generating', + }); + }); + + it('confirms submission from composer cleared plus transcript growth when generation state is unavailable', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + + // This transcript delta may be only a prompt echo. It is allowed to confirm + // submission only because the composer has already cleared, and it must never + // be reused later as reply ownership evidence. + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [], + transcriptLines: ['baseline', '请只回复:OK'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: false, + }); + + const result = await waitForGeminiSubmission(page, snapshot({ + transcriptLines: ['baseline'], + composerHasText: true, + structuredTurnsTrusted: false, + }), 2); + + expect(result).toEqual({ + snapshot: { + turns: [], + transcriptLines: ['baseline', '请只回复:OK'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: false, + }, + preSendAssistantCount: 0, + userAnchorTurn: null, + reason: 'composer_transcript', + }); + }); + + it('does not confirm submission when old structured turns only reappear after an untrusted pre-send snapshot', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [ + { Role: 'User', Text: '旧问题' }, + { Role: 'Assistant', Text: '旧回答' }, + ], + transcriptLines: ['旧问题', '旧回答'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }) + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [ + { Role: 'User', Text: '旧问题' }, + { Role: 'Assistant', Text: '旧回答' }, + ], + transcriptLines: ['旧问题', '旧回答'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }); + + const result = await waitForGeminiSubmission(page, snapshot({ + turns: [], + transcriptLines: ['旧问题', '旧回答'], + composerHasText: true, + structuredTurnsTrusted: false, + }), 2); + + expect(result).toBeNull(); + }); + + it('does not confirm submission from transcript growth alone when the composer never clears', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [], + transcriptLines: ['baseline', '请只回复:OK'], + composerHasText: true, + isGenerating: false, + structuredTurnsTrusted: false, + }) + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [], + transcriptLines: ['baseline', '请只回复:OK'], + composerHasText: true, + isGenerating: false, + structuredTurnsTrusted: false, + }); + + const result = await waitForGeminiSubmission(page, snapshot({ + transcriptLines: ['baseline'], + composerHasText: true, + structuredTurnsTrusted: false, + }), 2); + + expect(result).toBeNull(); + }); + + it('keeps polling past ten seconds when the overall timeout budget still allows submission confirmation', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + + for (let index = 0; index < 10; index += 1) { + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [], + transcriptLines: ['baseline'], + composerHasText: true, + isGenerating: false, + structuredTurnsTrusted: false, + }); + } + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [], + transcriptLines: ['baseline', '请只回复:OK'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: false, + }); + + const result = await waitForGeminiSubmission(page, snapshot({ + transcriptLines: ['baseline'], + composerHasText: true, + structuredTurnsTrusted: false, + }), 12); + + expect(result?.reason).toBe('composer_transcript'); + }); +}); + +describe('Gemini reply state', () => { + it('does not reuse an older identical reply when the submission baseline has no structured user anchor', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [{ Role: 'Assistant', Text: 'OK' }], + transcriptLines: ['baseline', 'OK'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }) + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [ + { Role: 'Assistant', Text: 'OK' }, + { Role: 'Assistant', Text: 'OK' }, + ], + transcriptLines: ['baseline', 'OK'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }) + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [ + { Role: 'Assistant', Text: 'OK' }, + { Role: 'Assistant', Text: 'OK' }, + ], + transcriptLines: ['baseline', 'OK'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }); + + const result = await waitForGeminiResponse(page, { + snapshot: snapshot({ + turns: [{ Role: 'Assistant', Text: 'OK' }], + transcriptLines: ['baseline', 'OK'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: true, + }), + preSendAssistantCount: 1, + userAnchorTurn: null, + reason: 'composer_generating', + }, '请只回复:OK', 6); + + expect(result).toBe('OK'); + }); + + it('does not treat prepended older history as the current round reply when reply ownership has no user anchor', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [ + { Role: 'Assistant', Text: '更早的问题' }, + { Role: 'Assistant', Text: '旧回答' }, + ], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }) + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [ + { Role: 'Assistant', Text: '更早的问题' }, + { Role: 'Assistant', Text: '旧回答' }, + ], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }) + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [ + { Role: 'Assistant', Text: '更早的问题' }, + { Role: 'Assistant', Text: '旧回答' }, + ], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }); + + const result = await waitForGeminiResponse(page, { + snapshot: snapshot({ + turns: [{ Role: 'Assistant', Text: '旧回答' }], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: true, + }), + preSendAssistantCount: 1, + userAnchorTurn: null, + reason: 'composer_generating', + }, '请只回复:OK', 6); + + expect(result).toBe(''); + }); + + it('accepts a reply when the submission snapshot contains only the current round user turns and later appends a new assistant', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [ + { Role: 'User', Text: '你说\n\n请只回复:DBGREG' }, + { Role: 'User', Text: '请只回复:DBGREG' }, + { Role: 'Assistant', Text: 'DBGREG' }, + ], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }) + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [ + { Role: 'User', Text: '你说\n\n请只回复:DBGREG' }, + { Role: 'User', Text: '请只回复:DBGREG' }, + { Role: 'Assistant', Text: 'DBGREG' }, + ], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }); + + const result = await waitForGeminiResponse(page, { + snapshot: snapshot({ + turns: [ + { Role: 'User', Text: '你说\n\n请只回复:DBGREG' }, + { Role: 'User', Text: '请只回复:DBGREG' }, + ], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: true, + }), + preSendAssistantCount: 1, + userAnchorTurn: { Role: 'User', Text: '请只回复:DBGREG' }, + reason: 'composer_generating', + }, '请只回复:DBGREG', 6); + + expect(result).toBe('DBGREG'); + }); + + it('does not trust an assistant-only submission snapshot without a stable post-submission owner', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [{ Role: 'Assistant', Text: '完整回答' }], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }) + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [{ Role: 'Assistant', Text: '完整回答' }], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }); + + const result = await waitForGeminiResponse(page, { + snapshot: snapshot({ + turns: [{ Role: 'Assistant', Text: '半截回答' }], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: true, + }), + preSendAssistantCount: 0, + userAnchorTurn: null, + reason: 'composer_generating', + }, '请解释', 4); + + expect(result).toBe(''); + }); + + it('accepts an assistant reply that appears after a structured user anchor only after it stabilizes and generation stops', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [ + { Role: 'Assistant', Text: '旧回答' }, + { Role: 'User', Text: '请解释' }, + { Role: 'Assistant', Text: '半截回答' }, + ], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: true, + }) + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [ + { Role: 'Assistant', Text: '旧回答' }, + { Role: 'User', Text: '请解释' }, + { Role: 'Assistant', Text: '完整回答' }, + ], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: true, + }) + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [ + { Role: 'Assistant', Text: '旧回答' }, + { Role: 'User', Text: '请解释' }, + { Role: 'Assistant', Text: '完整回答' }, + ], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }); + + const result = await waitForGeminiResponse(page, { + snapshot: snapshot({ + turns: [ + { Role: 'Assistant', Text: '旧回答' }, + { Role: 'User', Text: '请解释' }, + ], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: true, + }), + preSendAssistantCount: 1, + userAnchorTurn: { Role: 'User', Text: '请解释' }, + reason: 'user_turn', + }, '请解释', 6); + + expect(result).toBe('完整回答'); + }); + + it('uses transcript fallback only after two identical post-submission deltas and after generation stops', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [{ Role: 'User', Text: '请只回复:OK' }], + transcriptLines: ['baseline', 'OK'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: true, + }) + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [{ Role: 'User', Text: '请只回复:OK' }], + transcriptLines: ['baseline', 'OK'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }) + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [{ Role: 'User', Text: '请只回复:OK' }], + transcriptLines: ['baseline', 'OK'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }); + + const result = await waitForGeminiResponse(page, { + snapshot: snapshot({ + turns: [{ Role: 'User', Text: '请只回复:OK' }], + transcriptLines: ['baseline'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: true, + }), + preSendAssistantCount: 0, + userAnchorTurn: { Role: 'User', Text: '请只回复:OK' }, + reason: 'user_turn', + }, '请只回复:OK', 6); + + expect(result).toBe('OK'); + }); + + it('ignores transcript lines that appeared before submission confirmation and only accepts post-submission transcript deltas', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [{ Role: 'User', Text: '请只回复:OK' }], + transcriptLines: ['baseline', '早到的提示词回声', 'OK'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }) + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [{ Role: 'User', Text: '请只回复:OK' }], + transcriptLines: ['baseline', '早到的提示词回声', 'OK'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }) + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ + turns: [{ Role: 'User', Text: '请只回复:OK' }], + transcriptLines: ['baseline', '早到的提示词回声', 'OK'], + composerHasText: false, + isGenerating: false, + structuredTurnsTrusted: true, + }); + + const result = await waitForGeminiResponse(page, { + snapshot: snapshot({ + turns: [{ Role: 'User', Text: '请只回复:OK' }], + transcriptLines: ['baseline', '早到的提示词回声'], + composerHasText: false, + isGenerating: true, + structuredTurnsTrusted: true, + }), + preSendAssistantCount: 0, + userAnchorTurn: { Role: 'User', Text: '请只回复:OK' }, + reason: 'composer_transcript', + }, '请只回复:OK', 6); + + expect(result).toBe('OK'); + }); +}); diff --git a/src/clis/gemini/utils.test.ts b/src/clis/gemini/utils.test.ts index 4dae9c9e..342619b8 100644 --- a/src/clis/gemini/utils.test.ts +++ b/src/clis/gemini/utils.test.ts @@ -1,5 +1,39 @@ -import { describe, expect, it } from 'vitest'; -import { collectGeminiTranscriptAdditions, sanitizeGeminiResponseText } from './utils.js'; +import { describe, expect, it, vi } from 'vitest'; +import type { IPage } from '../../types.js'; +import type { GeminiTurn } from './utils.js'; +import { + __test__, + collectGeminiTranscriptAdditions, + sanitizeGeminiResponseText, + sendGeminiMessage, +} from './utils.js'; + +function createPageMock(): IPage { + return { + goto: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn(), + getCookies: vi.fn().mockResolvedValue([]), + snapshot: vi.fn().mockResolvedValue(undefined), + click: vi.fn().mockResolvedValue(undefined), + typeText: vi.fn().mockResolvedValue(undefined), + pressKey: vi.fn().mockResolvedValue(undefined), + scrollTo: vi.fn().mockResolvedValue(undefined), + getFormState: vi.fn().mockResolvedValue({}), + wait: vi.fn().mockResolvedValue(undefined), + tabs: vi.fn().mockResolvedValue([]), + selectTab: vi.fn().mockResolvedValue(undefined), + networkRequests: vi.fn().mockResolvedValue([]), + consoleMessages: vi.fn().mockResolvedValue([]), + scroll: vi.fn().mockResolvedValue(undefined), + autoScroll: vi.fn().mockResolvedValue(undefined), + installInterceptor: vi.fn().mockResolvedValue(undefined), + getInterceptedRequests: vi.fn().mockResolvedValue([]), + waitForCapture: vi.fn().mockResolvedValue(undefined), + screenshot: vi.fn().mockResolvedValue(''), + nativeType: vi.fn().mockResolvedValue(undefined), + nativeKeyPress: vi.fn().mockResolvedValue(undefined), + } as unknown as IPage; +} describe('sanitizeGeminiResponseText', () => { it('strips a prompt echo only when it appears as a prefixed block', () => { @@ -33,4 +67,152 @@ describe('collectGeminiTranscriptAdditions', () => { const current = ['Previous', 'Tell me a haiku', 'Tell me a haiku\n\nSoft spring rain arrives']; expect(collectGeminiTranscriptAdditions(before, current, prompt)).toBe('Soft spring rain arrives'); }); + + it('keeps a reply line that quotes the prompt inside the answer body', () => { + const prompt = '请只回复:OK'; + const before = ['baseline']; + const current = ['baseline', '关于“请只回复:OK”,这里是解释。']; + expect(collectGeminiTranscriptAdditions(before, current, prompt)).toBe('关于“请只回复:OK”,这里是解释。'); + }); +}); + +describe('gemini send strategy', () => { + it('includes structural composer selectors instead of relying only on english aria labels', () => { + expect(__test__.GEMINI_COMPOSER_SELECTORS).toContain('.ql-editor[contenteditable="true"]'); + expect(__test__.GEMINI_COMPOSER_SELECTORS).toContain('.ql-editor[role="textbox"]'); + }); + + it('prefers native text insertion before submitting the composer', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + const nativeType = vi.mocked(page.nativeType!); + const nativeKeyPress = vi.mocked(page.nativeKeyPress!); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ ok: true }) + .mockResolvedValueOnce({ hasText: true }) + .mockResolvedValueOnce('button'); + + const result = await sendGeminiMessage(page, '你好'); + + expect(nativeType).toHaveBeenCalledWith('你好'); + expect(nativeKeyPress).not.toHaveBeenCalled(); + expect(result).toBe('button'); + }); + + it('falls back when native insertion does not update the composer', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + const nativeType = vi.mocked(page.nativeType!); + const nativeKeyPress = vi.mocked(page.nativeKeyPress!); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ ok: true }) + .mockResolvedValueOnce({ hasText: false }) + .mockResolvedValueOnce({ hasText: true }) + .mockResolvedValueOnce('enter'); + + const result = await sendGeminiMessage(page, '你好'); + + expect(nativeType).toHaveBeenCalledWith('你好'); + expect(nativeKeyPress).toHaveBeenCalledWith('Enter'); + expect(evaluate).toHaveBeenCalledTimes(5); + expect(result).toBe('enter'); + }); + + it('falls back when native insertion throws', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + const nativeType = vi.mocked(page.nativeType!); + + nativeType.mockRejectedValueOnce(new Error('Unknown action: cdp')); + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ ok: true }) + .mockResolvedValueOnce({ hasText: true }) + .mockResolvedValueOnce('button'); + + const result = await sendGeminiMessage(page, '你好'); + + expect(nativeType).toHaveBeenCalledWith('你好'); + expect(result).toBe('button'); + }); + + it('retries composer preparation until a slow-loading composer appears', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + const wait = vi.mocked(page.wait); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ ok: false, reason: 'Could not find Gemini composer' }) + .mockResolvedValueOnce({ ok: false, reason: 'Could not find Gemini composer' }) + .mockResolvedValueOnce({ ok: true }) + .mockResolvedValueOnce({ hasText: true }) + .mockResolvedValueOnce('button'); + + const result = await sendGeminiMessage(page, '你好'); + + expect(result).toBe('button'); + expect(wait.mock.calls.filter(([value]) => value === 1)).toHaveLength(3); + }); + + it('keeps retrying until a composer that appears on the fourth attempt is ready', async () => { + const page = createPageMock(); + const evaluate = vi.mocked(page.evaluate); + const wait = vi.mocked(page.wait); + + evaluate + .mockResolvedValueOnce('https://gemini.google.com/app') + .mockResolvedValueOnce({ ok: false, reason: 'Could not find Gemini composer' }) + .mockResolvedValueOnce({ ok: false, reason: 'Could not find Gemini composer' }) + .mockResolvedValueOnce({ ok: false, reason: 'Could not find Gemini composer' }) + .mockResolvedValueOnce({ ok: true }) + .mockResolvedValueOnce({ hasText: true }) + .mockResolvedValueOnce('button'); + + const result = await sendGeminiMessage(page, '你好'); + + expect(result).toBe('button'); + expect(wait.mock.calls.filter(([value]) => value === 1)).toHaveLength(4); + }); + + it('avoids innerHTML in the fallback insertion path for trusted types pages', () => { + expect(__test__.insertComposerTextFallbackScript('你好')).not.toContain('innerHTML'); + expect(__test__.insertComposerTextFallbackScript('你好')).toContain('replaceChildren'); + }); + + it('keeps a button submit path in the generated submit script', () => { + expect(__test__.submitComposerScript()).toContain('.click()'); + }); + + it('supports localized new chat labels in the generated new-chat script', () => { + expect(__test__.clickNewChatScript()).toContain('发起新对话'); + }); +}); + +describe('gemini turn normalization', () => { + it('collapses only adjacent duplicate turns so identical replies across rounds remain visible', () => { + const turns: GeminiTurn[] = [ + { Role: 'User', Text: '你说\n\n请只回复:OK' }, + { Role: 'User', Text: '请只回复:OK' }, + { Role: 'Assistant', Text: 'OK' }, + { Role: 'Assistant', Text: 'OK' }, + { Role: 'User', Text: '你说\n\n请只回复:OK' }, + { Role: 'User', Text: '请只回复:OK' }, + { Role: 'Assistant', Text: 'OK' }, + { Role: 'Assistant', Text: 'OK' }, + ]; + + expect(__test__.collapseAdjacentGeminiTurns(turns)).toEqual([ + { Role: 'User', Text: '你说\n\n请只回复:OK' }, + { Role: 'User', Text: '请只回复:OK' }, + { Role: 'Assistant', Text: 'OK' }, + { Role: 'User', Text: '你说\n\n请只回复:OK' }, + { Role: 'User', Text: '请只回复:OK' }, + { Role: 'Assistant', Text: 'OK' }, + ]); + }); }); diff --git a/src/clis/gemini/utils.ts b/src/clis/gemini/utils.ts index d02db7e8..41703def 100644 --- a/src/clis/gemini/utils.ts +++ b/src/clis/gemini/utils.ts @@ -1,3 +1,4 @@ +import { CommandExecutionError } from '../../errors.js'; import type { IPage } from '../../types.js'; export const GEMINI_DOMAIN = 'gemini.google.com'; @@ -16,12 +17,88 @@ export interface GeminiTurn { Text: string; } +export interface GeminiSnapshot { + turns: GeminiTurn[]; + transcriptLines: string[]; + composerHasText: boolean; + isGenerating: boolean; + structuredTurnsTrusted: boolean; +} + +export interface GeminiStructuredAppend { + appendedTurns: GeminiTurn[]; + hasTrustedAppend: boolean; + hasNewUserTurn: boolean; + hasNewAssistantTurn: boolean; +} + +export interface GeminiSubmissionBaseline { + snapshot: GeminiSnapshot; + preSendAssistantCount: number; + userAnchorTurn: GeminiTurn | null; + reason: 'user_turn' | 'composer_generating' | 'composer_transcript'; +} + const GEMINI_RESPONSE_NOISE_PATTERNS = [ /Gemini can make mistakes\.?/gi, /Google Terms/gi, /Google Privacy Policy/gi, /Opens in a new window/gi, ]; +const GEMINI_TRANSCRIPT_CHROME_MARKERS = ['gemini', '我的内容', '对话', 'google terms', 'google privacy policy']; + +const GEMINI_COMPOSER_SELECTORS = [ + '.ql-editor[contenteditable="true"]', + '.ql-editor[role="textbox"]', + '.ql-editor[aria-label*="Gemini"]', + '[contenteditable="true"][aria-label*="Gemini"]', + '[aria-label="Enter a prompt for Gemini"]', + '[aria-label*="prompt for Gemini"]', +]; + +const GEMINI_COMPOSER_MARKER_ATTR = 'data-opencli-gemini-composer'; +const GEMINI_COMPOSER_PREPARE_ATTEMPTS = 4; +const GEMINI_COMPOSER_PREPARE_WAIT_SECONDS = 1; + +function buildGeminiComposerLocatorScript(): string { + const selectorsJson = JSON.stringify(GEMINI_COMPOSER_SELECTORS); + const markerAttrJson = JSON.stringify(GEMINI_COMPOSER_MARKER_ATTR); + return ` + const isVisible = (el) => { + if (!(el instanceof HTMLElement)) return false; + const style = window.getComputedStyle(el); + if (style.display === 'none' || style.visibility === 'hidden') return false; + const rect = el.getBoundingClientRect(); + return rect.width > 0 && rect.height > 0; + }; + + const markerAttr = ${markerAttrJson}; + const clearComposerMarkers = (active) => { + document.querySelectorAll('[' + markerAttr + ']').forEach((node) => { + if (node !== active) node.removeAttribute(markerAttr); + }); + }; + + const markComposer = (node) => { + if (!(node instanceof HTMLElement)) return null; + clearComposerMarkers(node); + node.setAttribute(markerAttr, '1'); + return node; + }; + + const findComposer = () => { + const marked = document.querySelector('[' + markerAttr + '="1"]'); + if (marked instanceof HTMLElement && isVisible(marked)) return marked; + + const selectors = ${selectorsJson}; + for (const selector of selectors) { + const node = Array.from(document.querySelectorAll(selector)).find((candidate) => candidate instanceof HTMLElement && isVisible(candidate)); + if (node instanceof HTMLElement) return markComposer(node); + } + return null; + }; + `; +} export function sanitizeGeminiResponseText(value: string, promptText: string): string { let sanitized = value; @@ -52,33 +129,160 @@ export function collectGeminiTranscriptAdditions( const beforeSet = new Set(beforeLines); const additions = currentLines .filter((line) => !beforeSet.has(line)) - .map((line) => sanitizeGeminiResponseText(line, promptText)) + .map((line) => extractGeminiTranscriptLineCandidate(line, promptText)) .filter((line) => line && line !== promptText); return additions.join('\n').trim(); } +export function collapseAdjacentGeminiTurns(turns: GeminiTurn[]): GeminiTurn[] { + const collapsed: GeminiTurn[] = []; + + for (const turn of turns) { + if (!turn || typeof turn.Role !== 'string' || typeof turn.Text !== 'string') continue; + const previous = collapsed.at(-1); + if (previous?.Role === turn.Role && previous.Text === turn.Text) continue; + collapsed.push(turn); + } + + return collapsed; +} + +function hasGeminiTurnPrefix(before: GeminiTurn[], current: GeminiTurn[]): boolean { + if (before.length > current.length) return false; + return before.every((turn, index) => ( + turn.Role === current[index]?.Role + && turn.Text === current[index]?.Text + )); +} + +function findLastMatchingGeminiTurnIndex(turns: GeminiTurn[], target: GeminiTurn | null): number | null { + if (!target) return null; + for (let index = turns.length - 1; index >= 0; index -= 1) { + const turn = turns[index]; + if (turn?.Role === target.Role && turn.Text === target.Text) { + return index; + } + } + return null; +} + +function diffTrustedStructuredTurns( + before: GeminiSnapshot, + current: GeminiSnapshot, +): GeminiStructuredAppend { + if (!before.structuredTurnsTrusted || !current.structuredTurnsTrusted) { + return { + appendedTurns: [], + hasTrustedAppend: false, + hasNewUserTurn: false, + hasNewAssistantTurn: false, + }; + } + + if (!hasGeminiTurnPrefix(before.turns, current.turns)) { + return { + appendedTurns: [], + hasTrustedAppend: false, + hasNewUserTurn: false, + hasNewAssistantTurn: false, + }; + } + + const appendedTurns = current.turns.slice(before.turns.length); + return { + appendedTurns, + hasTrustedAppend: appendedTurns.length > 0, + hasNewUserTurn: appendedTurns.some((turn) => turn.Role === 'User'), + hasNewAssistantTurn: appendedTurns.some((turn) => turn.Role === 'Assistant'), + }; +} + +function diffTranscriptLines(before: GeminiSnapshot, current: GeminiSnapshot): string[] { + const beforeLines = new Set(before.transcriptLines); + return current.transcriptLines.filter((line) => !beforeLines.has(line)); +} + +function isLikelyGeminiTranscriptChrome(line: string): boolean { + const lower = line.toLowerCase(); + const markerHits = GEMINI_TRANSCRIPT_CHROME_MARKERS.filter((marker) => lower.includes(marker)).length; + return markerHits >= 2; +} + +function extractGeminiTranscriptLineCandidate(transcriptLine: string, promptText: string): string { + const candidate = transcriptLine.trim(); + if (!candidate) return ''; + + const prompt = promptText.trim(); + const sanitized = sanitizeGeminiResponseText(candidate, promptText); + + if (!prompt) return sanitized; + if (!candidate.includes(prompt)) return sanitized; + if (sanitized && sanitized !== prompt && sanitized !== candidate) return sanitized; + if (isLikelyGeminiTranscriptChrome(candidate)) return ''; + + // Some transcript snapshots flatten "prompt + answer" into a single line. + // Recover the answer only when the line starts with the current prompt. + if (candidate.startsWith(prompt)) { + const tail = candidate.slice(prompt.length).replace(/^[\s::,,-]+/, '').trim(); + return tail ? sanitizeGeminiResponseText(tail, '') : ''; + } + + return sanitized; +} + function getStateScript(): string { return ` (() => { + ${buildGeminiComposerLocatorScript()} + const signInNode = Array.from(document.querySelectorAll('a, button')).find((node) => { const text = (node.textContent || '').trim().toLowerCase(); const aria = (node.getAttribute('aria-label') || '').trim().toLowerCase(); const href = node.getAttribute('href') || ''; return text === 'sign in' || aria === 'sign in' + || text === '登录' + || aria === '登录' || href.includes('accounts.google.com/ServiceLogin'); }); - const composer = document.querySelector('[aria-label="Enter a prompt for Gemini"], [aria-label*="prompt for Gemini"], .ql-editor[aria-label*="Gemini"], [contenteditable="true"][aria-label*="Gemini"]'); - const sendButton = document.querySelector('button[aria-label="Send message"]'); + const composer = findComposer(); return { url: window.location.href, title: document.title || '', isSignedIn: signInNode ? false : (composer ? true : null), composerLabel: composer?.getAttribute('aria-label') || '', - canSend: !!(sendButton && !sendButton.disabled), + canSend: !!composer, + }; + })() + `; +} + +function readGeminiSnapshotScript(): string { + return ` + (() => { + ${buildGeminiComposerLocatorScript()} + const composer = findComposer(); + const composerText = composer?.textContent?.replace(/\\u00a0/g, ' ').trim() || ''; + const isGenerating = !!Array.from(document.querySelectorAll('button, [role="button"]')).find((node) => { + const text = (node.textContent || '').trim().toLowerCase(); + const aria = (node.getAttribute('aria-label') || '').trim().toLowerCase(); + return text === 'stop response' + || aria === 'stop response' + || text === '停止回答' + || aria === '停止回答'; + }); + const turns = ${getTurnsScript().trim()}; + const transcriptLines = ${getTranscriptLinesScript().trim()}; + + return { + turns, + transcriptLines, + composerHasText: composerText.length > 0, + isGenerating, + structuredTurnsTrusted: turns.length > 0 || transcriptLines.length === 0, }; })() `; @@ -186,7 +390,16 @@ function getTurnsScript(): string { ]; const roots = selectors.flatMap((selector) => Array.from(document.querySelectorAll(selector))); - const unique = roots.filter((el, index, all) => all.indexOf(el) === index).filter(isVisible); + const unique = roots + .filter((el, index, all) => all.indexOf(el) === index) + .filter(isVisible) + .sort((left, right) => { + if (left === right) return 0; + const relation = left.compareDocumentPosition(right); + if (relation & Node.DOCUMENT_POSITION_FOLLOWING) return -1; + if (relation & Node.DOCUMENT_POSITION_PRECEDING) return 1; + return 0; + }); const turns = unique.map((el) => { const text = clean(el.innerText || el.textContent || ''); @@ -206,53 +419,189 @@ function getTurnsScript(): string { return role ? { Role: role, Text: text } : null; }).filter(Boolean); - const deduped = []; - const seen = new Set(); - for (const turn of turns) { - const key = turn.Role + '::' + turn.Text; - if (seen.has(key)) continue; - seen.add(key); - deduped.push(turn); - } - return deduped; + return turns; })() `; } -function fillAndSubmitComposerScript(text: string): string { +function prepareComposerScript(): string { return ` - ((inputText) => { - const cleanInsert = (el) => { - if (!(el instanceof HTMLElement)) throw new Error('Composer is not editable'); - el.focus(); + (() => { + ${buildGeminiComposerLocatorScript()} + const composer = findComposer(); + + if (!(composer instanceof HTMLElement)) { + return { ok: false, reason: 'Could not find Gemini composer' }; + } + + try { + composer.focus(); const selection = window.getSelection(); const range = document.createRange(); - range.selectNodeContents(el); + range.selectNodeContents(composer); range.collapse(false); selection?.removeAllRanges(); selection?.addRange(range); - el.textContent = ''; - document.execCommand('insertText', false, inputText); - el.dispatchEvent(new InputEvent('input', { bubbles: true, data: inputText, inputType: 'insertText' })); + composer.textContent = ''; + composer.dispatchEvent(new InputEvent('input', { bubbles: true, data: '', inputType: 'deleteContentBackward' })); + } catch (error) { + return { + ok: false, + reason: error instanceof Error ? error.message : String(error), + }; + } + + return { + ok: true, + label: composer.getAttribute('aria-label') || '', + }; + })() + `; +} + +function composerHasTextScript(): string { + return ` + (() => { + ${buildGeminiComposerLocatorScript()} + const composer = findComposer(); + + return { + hasText: !!(composer && ((composer.textContent || '').trim() || (composer.innerText || '').trim())), + }; + })() + `; +} + +function insertComposerTextFallbackScript(text: string): string { + return ` + ((inputText) => { + ${buildGeminiComposerLocatorScript()} + const composer = findComposer(); + + if (!(composer instanceof HTMLElement)) { + return { hasText: false, reason: 'Could not find Gemini composer' }; + } + + const selection = window.getSelection(); + const range = document.createRange(); + range.selectNodeContents(composer); + range.collapse(false); + selection?.removeAllRanges(); + selection?.addRange(range); + + composer.focus(); + composer.textContent = ''; + const execResult = typeof document.execCommand === 'function' + ? document.execCommand('insertText', false, inputText) + : false; + + if (!execResult) { + const paragraph = document.createElement('p'); + const lines = String(inputText).split(/\\n/); + for (const [index, line] of lines.entries()) { + if (index > 0) paragraph.appendChild(document.createElement('br')); + paragraph.appendChild(document.createTextNode(line)); + } + composer.replaceChildren(paragraph); + } + + composer.dispatchEvent(new InputEvent('beforeinput', { bubbles: true, data: inputText, inputType: 'insertText' })); + composer.dispatchEvent(new InputEvent('input', { bubbles: true, data: inputText, inputType: 'insertText' })); + composer.dispatchEvent(new Event('change', { bubbles: true })); + + return { + hasText: !!((composer.textContent || '').trim() || (composer.innerText || '').trim()), }; + })(${JSON.stringify(text)}) + `; +} + +function submitComposerScript(): string { + return ` + (() => { + ${buildGeminiComposerLocatorScript()} + const composer = findComposer(); - const composer = document.querySelector('[aria-label="Enter a prompt for Gemini"], [aria-label*="prompt for Gemini"], .ql-editor[aria-label*="Gemini"], [contenteditable="true"][aria-label*="Gemini"]'); if (!(composer instanceof HTMLElement)) { throw new Error('Could not find Gemini composer'); } - cleanInsert(composer); + const composerRect = composer.getBoundingClientRect(); + const rootCandidates = [ + composer.closest('form'), + composer.closest('[role="form"]'), + composer.closest('.input-area-container'), + composer.closest('.textbox-container'), + composer.closest('.input-wrapper'), + composer.parentElement, + composer.parentElement?.parentElement, + ].filter(Boolean); + + const seen = new Set(); + const buttons = []; + for (const root of rootCandidates) { + root.querySelectorAll('button, [role="button"]').forEach((node) => { + if (!(node instanceof HTMLElement)) return; + if (seen.has(node)) return; + seen.add(node); + buttons.push(node); + }); + } + + const excludedPattern = /main menu|主菜单|microphone|麦克风|upload|上传|mode|模式|tools|工具|settings|临时对话|new chat|新对话/i; + const submitPattern = /send|发送|submit|提交/i; + let bestButton = null; + let bestScore = -1; + + for (const button of buttons) { + if (!isVisible(button)) continue; + if (button instanceof HTMLButtonElement && button.disabled) continue; + if (button.getAttribute('aria-disabled') === 'true') continue; + + const label = ((button.getAttribute('aria-label') || '') + ' ' + ((button.textContent || '').trim())).trim(); + if (excludedPattern.test(label)) continue; - const sendButton = document.querySelector('button[aria-label="Send message"]'); - if (sendButton instanceof HTMLButtonElement && !sendButton.disabled) { - sendButton.click(); + const rect = button.getBoundingClientRect(); + const verticalDistance = Math.abs((rect.top + rect.bottom) / 2 - (composerRect.top + composerRect.bottom) / 2); + if (verticalDistance > 160) continue; + + let score = 0; + if (submitPattern.test(label)) score += 10; + if (rect.left >= composerRect.right - 160) score += 3; + if (rect.left >= composerRect.left) score += 1; + if (rect.width <= 96 && rect.height <= 96) score += 1; + + if (score > bestScore) { + bestScore = score; + bestButton = button; + } + } + + if (bestButton instanceof HTMLElement && bestScore >= 3) { + bestButton.click(); return 'button'; } - composer.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', keyCode: 13, bubbles: true })); - composer.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', code: 'Enter', keyCode: 13, bubbles: true })); return 'enter'; - })(${JSON.stringify(text)}) + })() + `; +} + +function dispatchComposerEnterScript(): string { + return ` + (() => { + ${buildGeminiComposerLocatorScript()} + const composer = findComposer(); + + if (!(composer instanceof HTMLElement)) { + throw new Error('Could not find Gemini composer'); + } + + composer.focus(); + composer.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', keyCode: 13, which: 13, bubbles: true })); + composer.dispatchEvent(new KeyboardEvent('keyup', { key: 'Enter', code: 'Enter', keyCode: 13, which: 13, bubbles: true })); + return 'enter'; + })() `; } @@ -270,7 +619,14 @@ function clickNewChatScript(): string { const candidates = Array.from(document.querySelectorAll('button, a')).filter((node) => { const text = (node.textContent || '').trim().toLowerCase(); const aria = (node.getAttribute('aria-label') || '').trim().toLowerCase(); - return isVisible(node) && (text === 'new chat' || aria === 'new chat'); + return isVisible(node) && ( + text === 'new chat' + || aria === 'new chat' + || text === '发起新对话' + || aria === '发起新对话' + || text === '新对话' + || aria === '新对话' + ); }); const target = candidates.find((node) => !node.hasAttribute('disabled')) || candidates[0]; @@ -321,27 +677,150 @@ export async function startNewGeminiChat(page: IPage): Promise<'clicked' | 'navi } export async function getGeminiVisibleTurns(page: IPage): Promise { - await ensureGeminiPage(page); - const turns = await page.evaluate(getTurnsScript()) as GeminiTurn[]; + const turns = await getGeminiStructuredTurns(page); if (Array.isArray(turns) && turns.length > 0) return turns; const lines = await getGeminiTranscriptLines(page); return lines.map((line) => ({ Role: 'System', Text: line })); } +async function getGeminiStructuredTurns(page: IPage): Promise { + await ensureGeminiPage(page); + const turns = collapseAdjacentGeminiTurns(await page.evaluate(getTurnsScript()) as GeminiTurn[]); + return Array.isArray(turns) ? turns : []; +} + export async function getGeminiTranscriptLines(page: IPage): Promise { await ensureGeminiPage(page); return await page.evaluate(getTranscriptLinesScript()) as string[]; } +export async function readGeminiSnapshot(page: IPage): Promise { + await ensureGeminiPage(page); + return await page.evaluate(readGeminiSnapshotScript()) as GeminiSnapshot; +} + +function findLastUserTurnIndex(turns: GeminiTurn[]): number | null { + for (let index = turns.length - 1; index >= 0; index -= 1) { + if (turns[index]?.Role === 'User') return index; + } + return null; +} + +function findLastUserTurn(turns: GeminiTurn[]): GeminiTurn | null { + const index = findLastUserTurnIndex(turns); + return index === null ? null : turns[index] ?? null; +} + +export async function waitForGeminiSubmission( + page: IPage, + before: GeminiSnapshot, + timeoutSeconds: number, +): Promise { + const preSendAssistantCount = before.turns.filter((turn) => turn.Role === 'Assistant').length; + const maxPolls = Math.max(1, Math.ceil(timeoutSeconds)); + + for (let index = 0; index < maxPolls; index += 1) { + await page.wait(index === 0 ? 0.5 : 1); + const current = await readGeminiSnapshot(page); + const structuredAppend = diffTrustedStructuredTurns(before, current); + const transcriptDelta = diffTranscriptLines(before, current); + + if (structuredAppend.hasTrustedAppend && structuredAppend.hasNewUserTurn) { + return { + snapshot: current, + preSendAssistantCount, + userAnchorTurn: findLastUserTurn(current.turns), + reason: 'user_turn', + }; + } + + if (!current.composerHasText && current.isGenerating) { + return { + snapshot: current, + preSendAssistantCount, + userAnchorTurn: findLastUserTurn(current.turns), + reason: 'composer_generating', + }; + } + + if (!current.composerHasText && transcriptDelta.length > 0) { + return { + snapshot: current, + preSendAssistantCount, + userAnchorTurn: findLastUserTurn(current.turns), + reason: 'composer_transcript', + }; + } + } + + return null; +} + export async function sendGeminiMessage(page: IPage, text: string): Promise<'button' | 'enter'> { await ensureGeminiPage(page); - const submittedBy = await page.evaluate(fillAndSubmitComposerScript(text)) as 'button' | 'enter'; + let prepared: { ok?: boolean; reason?: string } | undefined; + for (let attempt = 0; attempt < GEMINI_COMPOSER_PREPARE_ATTEMPTS; attempt += 1) { + prepared = await page.evaluate(prepareComposerScript()) as { ok?: boolean; reason?: string }; + if (prepared?.ok) break; + if (attempt < GEMINI_COMPOSER_PREPARE_ATTEMPTS - 1) await page.wait(GEMINI_COMPOSER_PREPARE_WAIT_SECONDS); + } + if (!prepared?.ok) { + throw new CommandExecutionError(prepared?.reason || 'Could not find Gemini composer'); + } + + let hasText = false; + if (page.nativeType) { + try { + await page.nativeType(text); + await page.wait(0.2); + const nativeState = await page.evaluate(composerHasTextScript()) as { hasText?: boolean }; + hasText = !!nativeState?.hasText; + } catch {} + } + + if (!hasText) { + const fallbackState = await page.evaluate(insertComposerTextFallbackScript(text)) as { hasText?: boolean }; + hasText = !!fallbackState?.hasText; + } + + if (!hasText) { + throw new CommandExecutionError('Failed to insert text into Gemini composer'); + } + + const submitAction = await page.evaluate(submitComposerScript()) as 'button' | 'enter'; + if (submitAction === 'button') { + await page.wait(1); + return 'button'; + } + + if (page.nativeKeyPress) { + try { + await page.nativeKeyPress('Enter'); + } catch { + await page.evaluate(dispatchComposerEnterScript()); + } + } else { + await page.evaluate(dispatchComposerEnterScript()); + } + await page.wait(1); - return submittedBy; + return 'enter'; } - +export const __test__ = { + GEMINI_COMPOSER_SELECTORS, + GEMINI_COMPOSER_MARKER_ATTR, + collapseAdjacentGeminiTurns, + clickNewChatScript, + diffTranscriptLines, + diffTrustedStructuredTurns, + hasGeminiTurnPrefix, + readGeminiSnapshot, + readGeminiSnapshotScript, + submitComposerScript, + insertComposerTextFallbackScript, +}; export async function getGeminiVisibleImageUrls(page: IPage): Promise { await ensureGeminiPage(page); @@ -484,40 +963,89 @@ export async function exportGeminiImages(page: IPage, urls: string[]): Promise { - const getCandidate = async (): Promise => { - const turns = await getGeminiVisibleTurns(page); - const assistantCandidate = [...turns].reverse().find((turn) => turn.Role === 'Assistant'); - const visibleCandidate = assistantCandidate - ? sanitizeGeminiResponseText(assistantCandidate.Text, promptText) - : ''; - if (visibleCandidate && visibleCandidate !== promptText) return visibleCandidate; - - const lines = await getGeminiTranscriptLines(page); - return collectGeminiTranscriptAdditions(beforeLines, lines, promptText); + if (timeoutSeconds <= 0) return ''; + + // Reply ownership must survive Gemini prepending older history later. + // Re-anchor on the submitted user turn when possible, and otherwise only + // accept assistants that are appended to the exact submission snapshot. + const pickStructuredReplyCandidate = (current: GeminiSnapshot): string => { + if (!current.structuredTurnsTrusted) return ''; + + const userAnchorTurnIndex = findLastMatchingGeminiTurnIndex(current.turns, baseline.userAnchorTurn); + if (userAnchorTurnIndex !== null) { + const candidate = current.turns + .slice(userAnchorTurnIndex + 1) + .filter((turn) => turn.Role === 'Assistant') + .at(-1); + return candidate ? sanitizeGeminiResponseText(candidate.Text, promptText) : ''; + } + + if (hasGeminiTurnPrefix(baseline.snapshot.turns, current.turns)) { + const appendedAssistant = current.turns + .slice(baseline.snapshot.turns.length) + .filter((turn) => turn.Role === 'Assistant') + .at(-1); + if (appendedAssistant) { + return sanitizeGeminiResponseText(appendedAssistant.Text, promptText); + } + } + + return ''; }; - const pollIntervalSeconds = 2; - const maxPolls = Math.max(1, Math.ceil(timeoutSeconds / pollIntervalSeconds)); - let lastCandidate = ''; - let stableCount = 0; + const pickFallbackGeminiTranscriptReply = (current: GeminiSnapshot): string => current.transcriptLines + .filter((line) => !baseline.snapshot.transcriptLines.includes(line)) + .map((line) => extractGeminiTranscriptLineCandidate(line, promptText)) + .filter(Boolean) + .join('\n') + .trim(); + + const maxPolls = Math.max(1, Math.ceil(timeoutSeconds / 2)); + let lastStructured = ''; + let structuredStableCount = 0; + let lastTranscript = ''; + let transcriptStableCount = 0; + let transcriptMissCount = 0; for (let index = 0; index < maxPolls; index += 1) { - await page.wait(index === 0 ? 1.5 : pollIntervalSeconds); - const candidate = await getCandidate(); - if (!candidate) continue; + await page.wait(index === 0 ? 1 : 2); + const current = await readGeminiSnapshot(page); + const structuredCandidate = pickStructuredReplyCandidate(current); + + if (structuredCandidate) { + if (structuredCandidate === lastStructured) structuredStableCount += 1; + else { + lastStructured = structuredCandidate; + structuredStableCount = 1; + } + + if (!current.isGenerating && structuredStableCount >= 2) { + return structuredCandidate; + } - if (candidate === lastCandidate) stableCount += 1; + continue; + } + + transcriptMissCount += 1; + if (transcriptMissCount < 2) continue; + + const transcriptCandidate = pickFallbackGeminiTranscriptReply(current); + if (!transcriptCandidate) continue; + + if (transcriptCandidate === lastTranscript) transcriptStableCount += 1; else { - lastCandidate = candidate; - stableCount = 1; + lastTranscript = transcriptCandidate; + transcriptStableCount = 1; } - if (stableCount >= 2 || index === maxPolls - 1) return candidate; + if (!current.isGenerating && transcriptStableCount >= 2) { + return transcriptCandidate; + } } - return lastCandidate; + return ''; }