Skip to content

Commit e10f200

Browse files
steipeteMrKipler
andcommitted
fix(browser): resolve aria snapshot refs via DOM markers
Co-authored-by: MrKipler <mrkipler@kiphausen.com>
1 parent 207f034 commit e10f200

12 files changed

Lines changed: 434 additions & 15 deletions

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai
1515
### Fixes
1616

1717
- Browser/Playwright: ignore benign already-handled route races during guarded navigation so browser-page tasks no longer fail when Playwright tears down a route mid-flight. (#68708) Thanks @Steady-ai.
18+
- Browser/aria snapshots: bind `format=aria` `axN` refs to live DOM nodes through backend DOM ids when Playwright is available, so follow-up browser actions can use those refs without timing out. (#62434) Thanks @MrKipler.
1819
- Telegram: prevent duplicate in-process long pollers for the same bot token and add clearer `getUpdates` conflict diagnostics for external duplicate pollers. Fixes #56230.
1920
- Browser/Linux: detect Chromium-based installs under `/opt/google`, `/opt/brave.com`, `/usr/lib/chromium`, and `/usr/lib/chromium-browser` before asking users to set `browser.executablePath`. (#48563) Thanks @lupuletic.
2021
- Sessions/browser: close tracked browser tabs when idle, daily, `/new`, or `/reset` session rollover archives the previous transcript, preventing tabs from leaking past the old session. Thanks @jakozloski.

extensions/browser/src/browser/pw-ai.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ export {
5353
snapshotAiViaPlaywright,
5454
snapshotAriaViaPlaywright,
5555
snapshotRoleViaPlaywright,
56+
storeAriaSnapshotRefsViaPlaywright,
5657
screenshotWithLabelsViaPlaywright,
5758
storageClearViaPlaywright,
5859
storageGetViaPlaywright,

extensions/browser/src/browser/pw-session.page-cdp.test.ts

Lines changed: 108 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import { beforeEach, describe, expect, it, vi } from "vitest";
2-
import { withPageScopedCdpClient } from "./pw-session.page-cdp.js";
2+
import {
3+
BROWSER_REF_MARKER_ATTRIBUTE,
4+
markBackendDomRefsOnPage,
5+
withPageScopedCdpClient,
6+
} from "./pw-session.page-cdp.js";
37

48
describe("pw-session page-scoped CDP client", () => {
59
beforeEach(() => {
@@ -32,4 +36,107 @@ describe("pw-session page-scoped CDP client", () => {
3236
expect(sessionSend).toHaveBeenCalledWith("Emulation.setLocaleOverride", { locale: "en-US" });
3337
expect(sessionDetach).toHaveBeenCalledTimes(1);
3438
});
39+
40+
it("marks backend DOM refs on the page", async () => {
41+
const sessionSend = vi.fn(async (method: string, params?: Record<string, unknown>) => {
42+
if (method === "DOM.pushNodesByBackendIdsToFrontend") {
43+
expect(params).toEqual({ backendNodeIds: [42, 84] });
44+
return { nodeIds: [101, 202] };
45+
}
46+
return {};
47+
});
48+
const sessionDetach = vi.fn(async () => {});
49+
const newCDPSession = vi.fn(async () => ({
50+
send: sessionSend,
51+
detach: sessionDetach,
52+
}));
53+
const evaluateAll = vi.fn(async () => {});
54+
const page = {
55+
context: () => ({
56+
newCDPSession,
57+
}),
58+
locator: vi.fn(() => ({ evaluateAll })),
59+
};
60+
61+
const marked = await markBackendDomRefsOnPage({
62+
page: page as never,
63+
refs: [
64+
{ ref: "ax1", backendDOMNodeId: 42 },
65+
{ ref: "ax2", backendDOMNodeId: 84 },
66+
],
67+
});
68+
69+
expect(page.locator).toHaveBeenCalledWith(`[${BROWSER_REF_MARKER_ATTRIBUTE}]`);
70+
expect(evaluateAll).toHaveBeenCalledTimes(1);
71+
expect(sessionSend).toHaveBeenNthCalledWith(1, "DOM.enable", undefined);
72+
expect(sessionSend).toHaveBeenNthCalledWith(2, "DOM.pushNodesByBackendIdsToFrontend", {
73+
backendNodeIds: [42, 84],
74+
});
75+
expect(sessionSend).toHaveBeenNthCalledWith(3, "DOM.setAttributeValue", {
76+
nodeId: 101,
77+
name: BROWSER_REF_MARKER_ATTRIBUTE,
78+
value: "ax1",
79+
});
80+
expect(sessionSend).toHaveBeenNthCalledWith(4, "DOM.setAttributeValue", {
81+
nodeId: 202,
82+
name: BROWSER_REF_MARKER_ATTRIBUTE,
83+
value: "ax2",
84+
});
85+
expect(marked).toEqual(new Set(["ax1", "ax2"]));
86+
expect(sessionDetach).toHaveBeenCalledTimes(1);
87+
});
88+
89+
it("clears stale markers even when no backend refs are valid", async () => {
90+
const newCDPSession = vi.fn();
91+
const evaluateAll = vi.fn(async () => {});
92+
const page = {
93+
context: () => ({
94+
newCDPSession,
95+
}),
96+
locator: vi.fn(() => ({ evaluateAll })),
97+
};
98+
99+
const marked = await markBackendDomRefsOnPage({
100+
page: page as never,
101+
refs: [{ ref: "e1", backendDOMNodeId: 0 }],
102+
});
103+
104+
expect(page.locator).toHaveBeenCalledWith(`[${BROWSER_REF_MARKER_ATTRIBUTE}]`);
105+
expect(evaluateAll).toHaveBeenCalledTimes(1);
106+
expect(newCDPSession).not.toHaveBeenCalled();
107+
expect(marked).toEqual(new Set());
108+
});
109+
110+
it("keeps unmarked refs out of the marked set when marker writes fail", async () => {
111+
const sessionSend = vi.fn(async (method: string) => {
112+
if (method === "DOM.pushNodesByBackendIdsToFrontend") {
113+
return { nodeIds: [101, 202] };
114+
}
115+
if (method === "DOM.setAttributeValue") {
116+
throw new Error("detached");
117+
}
118+
return {};
119+
});
120+
const sessionDetach = vi.fn(async () => {});
121+
const page = {
122+
context: () => ({
123+
newCDPSession: vi.fn(async () => ({
124+
send: sessionSend,
125+
detach: sessionDetach,
126+
})),
127+
}),
128+
locator: vi.fn(() => ({ evaluateAll: vi.fn(async () => {}) })),
129+
};
130+
131+
const marked = await markBackendDomRefsOnPage({
132+
page: page as never,
133+
refs: [
134+
{ ref: "ax1", backendDOMNodeId: 42 },
135+
{ ref: "ax2", backendDOMNodeId: 84 },
136+
],
137+
});
138+
139+
expect(marked).toEqual(new Set());
140+
expect(sessionDetach).toHaveBeenCalledTimes(1);
141+
});
35142
});

extensions/browser/src/browser/pw-session.page-cdp.ts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import type { CDPSession, Page } from "playwright-core";
22

33
type PageCdpSend = (method: string, params?: Record<string, unknown>) => Promise<unknown>;
4+
type MarkBackendDomRef = { ref: string; backendDOMNodeId: number };
5+
6+
export const BROWSER_REF_MARKER_ATTRIBUTE = "data-openclaw-browser-ref";
47

58
async function withPlaywrightPageCdpSession<T>(
69
page: Page,
@@ -31,3 +34,75 @@ export async function withPageScopedCdpClient<T>(opts: {
3134
);
3235
});
3336
}
37+
38+
export async function markBackendDomRefsOnPage(opts: {
39+
page: Page;
40+
refs: MarkBackendDomRef[];
41+
}): Promise<Set<string>> {
42+
await opts.page
43+
.locator(`[${BROWSER_REF_MARKER_ATTRIBUTE}]`)
44+
.evaluateAll((elements, attr) => {
45+
for (const element of elements) {
46+
if (element instanceof Element) {
47+
element.removeAttribute(attr);
48+
}
49+
}
50+
}, BROWSER_REF_MARKER_ATTRIBUTE)
51+
.catch(() => {});
52+
53+
const refs = opts.refs.filter(
54+
(entry) =>
55+
/^ax\d+$/.test(entry.ref) &&
56+
Number.isFinite(entry.backendDOMNodeId) &&
57+
Math.floor(entry.backendDOMNodeId) > 0,
58+
);
59+
const marked = new Set<string>();
60+
if (!refs.length) {
61+
return marked;
62+
}
63+
64+
return await withPlaywrightPageCdpSession(opts.page, async (session) => {
65+
const send = async (method: string, params?: Record<string, unknown>) =>
66+
await (
67+
session.send as unknown as (
68+
method: string,
69+
params?: Record<string, unknown>,
70+
) => Promise<unknown>
71+
)(method, params);
72+
73+
await send("DOM.enable").catch(() => {});
74+
75+
const backendNodeIds = [...new Set(refs.map((entry) => Math.floor(entry.backendDOMNodeId)))];
76+
const pushed = (await send("DOM.pushNodesByBackendIdsToFrontend", {
77+
backendNodeIds,
78+
}).catch(() => ({}))) as { nodeIds?: number[] };
79+
const nodeIds = Array.isArray(pushed.nodeIds) ? pushed.nodeIds : [];
80+
const nodeIdByBackendId = new Map<number, number>();
81+
for (let index = 0; index < backendNodeIds.length; index += 1) {
82+
const backendNodeId = backendNodeIds[index];
83+
const nodeId = nodeIds[index];
84+
if (backendNodeId && typeof nodeId === "number" && nodeId > 0) {
85+
nodeIdByBackendId.set(backendNodeId, nodeId);
86+
}
87+
}
88+
89+
for (const entry of refs) {
90+
const nodeId = nodeIdByBackendId.get(Math.floor(entry.backendDOMNodeId));
91+
if (!nodeId) {
92+
continue;
93+
}
94+
try {
95+
await send("DOM.setAttributeValue", {
96+
nodeId,
97+
name: BROWSER_REF_MARKER_ATTRIBUTE,
98+
value: entry.ref,
99+
});
100+
marked.add(entry.ref);
101+
} catch {
102+
// Best-effort marker write. Unmarked refs fall back to role metadata.
103+
}
104+
}
105+
106+
return marked;
107+
});
108+
}

extensions/browser/src/browser/pw-session.test.ts

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import {
66
rememberRoleRefsForTarget,
77
restoreRoleRefsForTarget,
88
} from "./pw-session.js";
9+
import { BROWSER_REF_MARKER_ATTRIBUTE } from "./pw-session.page-cdp.js";
910

1011
function fakePage(): {
1112
page: Page;
@@ -27,6 +28,7 @@ function fakePage(): {
2728
const getByRole = vi.fn(() => ({ nth: vi.fn(() => ({ ok: true })) }));
2829
const frameLocator = vi.fn(() => ({
2930
getByRole: vi.fn(() => ({ nth: vi.fn(() => ({ ok: true })) })),
31+
locator: vi.fn(() => ({ nth: vi.fn(() => ({ ok: true })) })),
3032
}));
3133
const locator = vi.fn(() => ({ nth: vi.fn(() => ({ ok: true })) }));
3234

@@ -72,10 +74,30 @@ describe("pw-session refLocator", () => {
7274
expect(mocks.locator).toHaveBeenCalledWith("aria-ref=e1");
7375
});
7476

75-
it("rejects axN refs from format=aria snapshots instead of timing out", () => {
77+
it("uses backend-marked DOM locators for ax refs", () => {
78+
const { page, mocks } = fakePage();
79+
const state = ensurePageState(page);
80+
state.roleRefs = { ax12: { role: "button", name: "OK", domMarker: true } };
81+
82+
refLocator(page, "ax12");
83+
84+
expect(mocks.locator).toHaveBeenCalledWith(`[${BROWSER_REF_MARKER_ATTRIBUTE}="ax12"]`);
85+
});
86+
87+
it("falls back to role heuristics for ax refs without backend markers", () => {
88+
const { page, mocks } = fakePage();
89+
const state = ensurePageState(page);
90+
state.roleRefs = { ax12: { role: "button", name: "OK" } };
91+
92+
refLocator(page, "ax12");
93+
94+
expect(mocks.getByRole).toHaveBeenCalledWith("button", { name: "OK", exact: true });
95+
});
96+
97+
it("rejects unknown ax refs instead of timing out on aria-ref locators", () => {
7698
const { page, mocks } = fakePage();
7799

78-
expect(() => refLocator(page, "ax12")).toThrow(/format=aria snapshot/);
100+
expect(() => refLocator(page, "ax12")).toThrow(/Unknown ref/);
79101
expect(mocks.locator).not.toHaveBeenCalled();
80102
});
81103
});

extensions/browser/src/browser/pw-session.ts

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import {
3131
InvalidBrowserNavigationUrlError,
3232
withBrowserNavigationPolicy,
3333
} from "./navigation-guard.js";
34-
import { withPageScopedCdpClient } from "./pw-session.page-cdp.js";
34+
import { BROWSER_REF_MARKER_ATTRIBUTE, withPageScopedCdpClient } from "./pw-session.page-cdp.js";
3535

3636
export type BrowserConsoleMessage = {
3737
type: string;
@@ -84,7 +84,7 @@ type PageState = {
8484
* Mode "role" refs are generated from ariaSnapshot and resolved via getByRole.
8585
* Mode "aria" refs are Playwright aria-ref ids and resolved via `aria-ref=...`.
8686
*/
87-
roleRefs?: Record<string, { role: string; name?: string; nth?: number }>;
87+
roleRefs?: Record<string, { role: string; name?: string; nth?: number; domMarker?: boolean }>;
8888
roleRefsMode?: "role" | "aria";
8989
roleRefsFrameSelector?: string;
9090
};
@@ -935,10 +935,29 @@ export function refLocator(page: Page, ref: string) {
935935
}
936936

937937
if (AX_REF_PATTERN.test(normalized)) {
938-
throw new Error(
939-
`Ref "${normalized}" comes from a format=aria snapshot and cannot be used with act. ` +
940-
`Re-snapshot with format=ai and use the eN refs from that snapshot.`,
941-
);
938+
const state = pageStates.get(page);
939+
const info = state?.roleRefs?.[normalized];
940+
if (!info) {
941+
throw new Error(
942+
`Unknown ref "${normalized}". Run a new snapshot and use a ref from that snapshot.`,
943+
);
944+
}
945+
const scope = state.roleRefsFrameSelector
946+
? page.frameLocator(state.roleRefsFrameSelector)
947+
: page;
948+
if (info.domMarker) {
949+
return scope.locator(`[${BROWSER_REF_MARKER_ATTRIBUTE}="${normalized}"]`);
950+
}
951+
const locAny = scope as unknown as {
952+
getByRole: (
953+
role: never,
954+
opts?: { name?: string; exact?: boolean },
955+
) => ReturnType<Page["getByRole"]>;
956+
};
957+
const locator = info.name
958+
? locAny.getByRole(info.role as never, { name: info.name, exact: true })
959+
: locAny.getByRole(info.role as never);
960+
return info.nth !== undefined ? locator.nth(info.nth) : locator;
942961
}
943962

944963
return page.locator(`aria-ref=${normalized}`);

extensions/browser/src/browser/pw-tools-core.browser-ssrf-guard.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ const sessionMocks = vi.hoisted(() => ({
2727
}));
2828

2929
const pageCdpMocks = vi.hoisted(() => ({
30+
markBackendDomRefsOnPage: vi.fn(async () => new Set<string>()),
3031
withPageScopedCdpClient: vi.fn(
3132
async ({ fn }: { fn: (send: () => Promise<unknown>) => unknown }) =>
3233
await fn(async () => ({ nodes: [] })),

0 commit comments

Comments
 (0)