Skip to content

Commit fa23e35

Browse files
web-flowclaude
andcommitted
Add viewport-aware element tracking, screen change detection, and bounding box fixes
- Add screen change detection via frame comparison in web UI (Image + Canvas thumbnail diff) - Auto-refresh element tree when screen navigation detected (>15% pixel change) - Filter elements by active route (SceneView.isFocused) to exclude frozen screens - Add screenId to annotations for screen-scoped display - Add scroll-aware annotation pin repositioning - Fix bounding box misalignment: remove incorrect viewport-to-screen normalization - Add iPhone 17 series to iOS screen size lookup table - Fix device dimensions for iPhone 17 Pro (402x874 instead of default 393x852) - Add accessibility merge utility for cross-source element enrichment - Add MCP annotation tool and bridge-core getScreenId interface - Improve Flutter bounding box extraction (fix early-return bug) - Add iOS multi-strategy accessibility fallback Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 17b6b3a commit fa23e35

23 files changed

Lines changed: 1321 additions & 312 deletions

File tree

Lines changed: 334 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,334 @@
1+
import type { MobileElement, Platform } from "@agentation-mobile/core";
2+
3+
/**
4+
* Maps iOS AX accessibility roles to semantic role names.
5+
*/
6+
const IOS_ROLE_MAP: Record<string, string> = {
7+
AXButton: "button",
8+
AXStaticText: "text",
9+
AXTextField: "textfield",
10+
AXSecureTextField: "textfield",
11+
AXTextView: "textfield",
12+
AXImage: "image",
13+
AXCheckBox: "checkbox",
14+
AXRadioButton: "radio",
15+
AXSwitch: "switch",
16+
AXSlider: "slider",
17+
AXProgressIndicator: "progressbar",
18+
AXPopUpButton: "combobox",
19+
AXTable: "list",
20+
AXCollectionView: "list",
21+
AXScrollView: "scrollbar",
22+
AXWebView: "web",
23+
AXTabBar: "tablist",
24+
AXTabButton: "tab",
25+
AXNavigationBar: "navigation",
26+
AXToolbar: "toolbar",
27+
AXLink: "link",
28+
AXCell: "cell",
29+
AXGroup: "group",
30+
AXWindow: "window",
31+
AXApplication: "application",
32+
};
33+
34+
/**
35+
* Maps Android widget class names to semantic role names.
36+
*/
37+
const ANDROID_ROLE_MAP: Record<string, string> = {
38+
"android.widget.Button": "button",
39+
"android.widget.TextView": "text",
40+
"android.widget.EditText": "textfield",
41+
"android.widget.ImageView": "image",
42+
"android.widget.ImageButton": "button",
43+
"android.widget.CheckBox": "checkbox",
44+
"android.widget.RadioButton": "radio",
45+
"android.widget.Switch": "switch",
46+
"android.widget.ToggleButton": "switch",
47+
"android.widget.SeekBar": "slider",
48+
"android.widget.ProgressBar": "progressbar",
49+
"android.widget.Spinner": "combobox",
50+
"android.widget.ListView": "list",
51+
"android.widget.RecyclerView": "list",
52+
"android.widget.ScrollView": "scrollbar",
53+
"android.widget.HorizontalScrollView": "scrollbar",
54+
"android.widget.TabHost": "tablist",
55+
"android.widget.TabWidget": "tablist",
56+
"android.webkit.WebView": "web",
57+
"android.widget.LinearLayout": "group",
58+
"android.widget.RelativeLayout": "group",
59+
"android.widget.FrameLayout": "group",
60+
"android.view.ViewGroup": "group",
61+
"android.view.View": "view",
62+
};
63+
64+
/**
65+
* Maps a platform-specific role string to a unified semantic role.
66+
* Handles both iOS AX roles and Android widget class names.
67+
*/
68+
export function mapRole(role: string): string {
69+
// Check iOS mappings first
70+
if (role in IOS_ROLE_MAP) return IOS_ROLE_MAP[role];
71+
72+
// Check Android mappings
73+
if (role in ANDROID_ROLE_MAP) return ANDROID_ROLE_MAP[role];
74+
75+
// Fallback: strip AX prefix for iOS, extract class name for Android
76+
if (role.startsWith("AX")) return role.replace(/^AX/, "").toLowerCase();
77+
if (role.includes(".")) return role.split(".").pop()?.toLowerCase() ?? role.toLowerCase();
78+
79+
return role.toLowerCase();
80+
}
81+
82+
/**
83+
* Finds the best SDK element match for a given native element using bounding box
84+
* overlap. Returns null if no sufficient overlap is found.
85+
*
86+
* Requires >50% overlap relative to the smaller element's area.
87+
*/
88+
export function findBestMatch(
89+
target: MobileElement,
90+
sdkElements: MobileElement[],
91+
): MobileElement | null {
92+
let bestMatch: MobileElement | null = null;
93+
let bestOverlap = 0;
94+
95+
const tb = target.boundingBox;
96+
const targetArea = tb.width * tb.height;
97+
98+
for (const sdk of sdkElements) {
99+
const sb = sdk.boundingBox;
100+
101+
const overlapX = Math.max(0, Math.min(tb.x + tb.width, sb.x + sb.width) - Math.max(tb.x, sb.x));
102+
const overlapY = Math.max(
103+
0,
104+
Math.min(tb.y + tb.height, sb.y + sb.height) - Math.max(tb.y, sb.y),
105+
);
106+
const overlapArea = overlapX * overlapY;
107+
108+
const sdkArea = sb.width * sb.height;
109+
const minArea = Math.min(targetArea, sdkArea);
110+
111+
if (minArea > 0 && overlapArea / minArea > 0.5 && overlapArea > bestOverlap) {
112+
bestOverlap = overlapArea;
113+
bestMatch = sdk;
114+
}
115+
}
116+
117+
// Fallback: when both have zero-area bounding boxes, try text-content matching
118+
if (!bestMatch && targetArea === 0 && target.textContent) {
119+
const targetText = target.textContent.trim().toLowerCase();
120+
if (targetText.length > 0) {
121+
for (const sdk of sdkElements) {
122+
const sdkText = sdk.textContent?.trim().toLowerCase();
123+
if (sdkText && sdkText === targetText) {
124+
return sdk;
125+
}
126+
}
127+
}
128+
}
129+
130+
return bestMatch;
131+
}
132+
133+
/**
134+
* Enriches "native" elements (from accessibility tree / UIAutomator) with data from
135+
* "SDK" elements (from fiber tree / DevTools) using bounding box overlap matching.
136+
*
137+
* Native elements provide accurate bounding boxes; SDK elements provide component
138+
* names, file paths, and source locations.
139+
*/
140+
export function mergeElements(
141+
nativeElements: MobileElement[],
142+
sdkElements: MobileElement[],
143+
): MobileElement[] {
144+
if (sdkElements.length === 0) return nativeElements;
145+
146+
return nativeElements.map((nativeEl) => {
147+
const match = findBestMatch(nativeEl, sdkElements);
148+
if (!match) return nativeEl;
149+
150+
return {
151+
...nativeEl,
152+
sourceLocation: match.sourceLocation ?? nativeEl.sourceLocation,
153+
componentFile: match.componentFile ?? nativeEl.componentFile,
154+
componentName: match.componentName || nativeEl.componentName,
155+
animations: match.animations ?? nativeEl.animations,
156+
};
157+
});
158+
}
159+
160+
export interface AccessibilityNode {
161+
label: string;
162+
role: string;
163+
value: string;
164+
traits: string[];
165+
frame: { x: number; y: number; width: number; height: number } | null;
166+
depth: number;
167+
}
168+
169+
function normalizeAccessibilityNode(
170+
partial: Partial<AccessibilityNode>,
171+
depth: number,
172+
): AccessibilityNode {
173+
return {
174+
label: partial.label ?? "",
175+
role: partial.role ?? "Unknown",
176+
value: partial.value ?? "",
177+
traits: partial.traits ?? [],
178+
frame: partial.frame ?? null,
179+
depth,
180+
};
181+
}
182+
183+
/**
184+
* Parses the raw text output from `xcrun simctl ui <deviceId> accessibility`
185+
* into structured accessibility nodes.
186+
*/
187+
export function parseAccessibilityOutput(output: string): AccessibilityNode[] {
188+
const nodes: AccessibilityNode[] = [];
189+
const lines = output.split("\n");
190+
191+
let currentNode: Partial<AccessibilityNode> | null = null;
192+
let currentDepth = 0;
193+
194+
for (const line of lines) {
195+
const trimmed = line.trimEnd();
196+
if (!trimmed) continue;
197+
198+
// Measure indentation depth
199+
const indent = line.length - line.trimStart().length;
200+
const depth = Math.floor(indent / 2);
201+
202+
// Check for element/component start
203+
const elementMatch = trimmed.match(/^\s*(?:Element|SBElement|AX\w+):\s*(?:<(\w+)>)?/);
204+
if (elementMatch) {
205+
// Save previous node if it exists
206+
if (currentNode) {
207+
nodes.push(normalizeAccessibilityNode(currentNode, currentDepth));
208+
}
209+
210+
currentNode = {
211+
role: elementMatch[1] ?? "Unknown",
212+
};
213+
currentDepth = depth;
214+
continue;
215+
}
216+
217+
// Parse properties of the current node
218+
if (currentNode) {
219+
const labelMatch = trimmed.match(/^\s*Label:\s*"?([^"]*)"?/);
220+
if (labelMatch) {
221+
currentNode.label = labelMatch[1];
222+
continue;
223+
}
224+
225+
const valueMatch = trimmed.match(/^\s*Value:\s*"?([^"]*)"?/);
226+
if (valueMatch) {
227+
currentNode.value = valueMatch[1];
228+
continue;
229+
}
230+
231+
const traitsMatch = trimmed.match(/^\s*Traits?:\s*(.*)/);
232+
if (traitsMatch) {
233+
currentNode.traits = traitsMatch[1]
234+
.split(",")
235+
.map((t) => t.trim())
236+
.filter(Boolean);
237+
continue;
238+
}
239+
240+
// Frame: {{x, y}, {width, height}}
241+
const frameMatch = trimmed.match(
242+
/^\s*Frame:\s*\{\{([\d.]+),\s*([\d.]+)\},\s*\{([\d.]+),\s*([\d.]+)\}\}/,
243+
);
244+
if (frameMatch) {
245+
currentNode.frame = {
246+
x: Number.parseFloat(frameMatch[1]),
247+
y: Number.parseFloat(frameMatch[2]),
248+
width: Number.parseFloat(frameMatch[3]),
249+
height: Number.parseFloat(frameMatch[4]),
250+
};
251+
}
252+
}
253+
}
254+
255+
// Don't forget the last node
256+
if (currentNode) {
257+
nodes.push(normalizeAccessibilityNode(currentNode, currentDepth));
258+
}
259+
260+
return nodes;
261+
}
262+
263+
/**
264+
* Converts parsed iOS accessibility nodes into MobileElement[].
265+
* Uses the shared `mapRole` for semantic role mapping.
266+
*/
267+
export function accessibilityNodesToElements(
268+
nodes: AccessibilityNode[],
269+
platform: Platform,
270+
): MobileElement[] {
271+
const elements: MobileElement[] = [];
272+
const pathStack: string[] = [];
273+
274+
for (let i = 0; i < nodes.length; i++) {
275+
const node = nodes[i];
276+
if (!node.frame) continue;
277+
278+
const componentName = node.role ? node.role.replace(/^AX/, "") : "Unknown";
279+
280+
while (pathStack.length > node.depth) {
281+
pathStack.pop();
282+
}
283+
pathStack.push(componentName);
284+
285+
const componentPath = pathStack.join("/");
286+
const id = node.label
287+
? `ios:${componentName}:${node.label}:${i}`
288+
: `ios:${componentName}:${node.frame.x},${node.frame.y}:${i}`;
289+
290+
const element: MobileElement = {
291+
id,
292+
platform,
293+
componentPath,
294+
componentName,
295+
boundingBox: node.frame,
296+
};
297+
298+
const accessibility: MobileElement["accessibility"] = {};
299+
let hasAccessibility = false;
300+
301+
if (node.label) {
302+
accessibility.label = node.label;
303+
hasAccessibility = true;
304+
}
305+
306+
const semanticRole = mapRole(node.role);
307+
if (semanticRole) {
308+
accessibility.role = semanticRole;
309+
hasAccessibility = true;
310+
}
311+
312+
if (node.value) {
313+
accessibility.value = node.value;
314+
hasAccessibility = true;
315+
}
316+
317+
if (node.traits.length > 0) {
318+
accessibility.traits = node.traits;
319+
hasAccessibility = true;
320+
}
321+
322+
if (hasAccessibility) {
323+
element.accessibility = accessibility;
324+
}
325+
326+
if (node.label && node.role === "AXStaticText") {
327+
element.textContent = node.label;
328+
}
329+
330+
elements.push(element);
331+
}
332+
333+
return elements;
334+
}

packages/bridges/bridge-core/src/index.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@ import type { AnimationInfo, MobileElement, Platform } from "@agentation-mobile/
22

33
export { parseUiAutomatorXml, hitTestElement } from "./uiautomator";
44
export { parseWmSize } from "./android-utils";
5+
export {
6+
mergeElements,
7+
findBestMatch,
8+
mapRole,
9+
parseAccessibilityOutput,
10+
accessibilityNodesToElements,
11+
} from "./accessibility-merge";
12+
export type { AccessibilityNode } from "./accessibility-merge";
513
export {
614
IOS_UDID_REGEX,
715
isIosSimulatorId,
@@ -33,6 +41,9 @@ export interface IPlatformBridge {
3341
/** Get the UI element tree for the current screen */
3442
getElementTree(deviceId: string): Promise<MobileElement[]>;
3543

44+
/** Get the current screen/route identifier (optional) */
45+
getScreenId?(deviceId: string): Promise<string | null>;
46+
3647
/** Inspect a specific element at screen coordinates */
3748
inspectElement(deviceId: string, x: number, y: number): Promise<MobileElement | null>;
3849

packages/bridges/bridge-core/src/ios-devices.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ export function isIosSimulatorId(deviceId: string): boolean {
1515
* Used as a fallback when dynamic resolution detection is not available.
1616
*/
1717
export const IOS_SCREEN_SIZES: Record<string, { width: number; height: number }> = {
18+
"iPhone 17 Pro Max": { width: 440, height: 956 },
19+
"iPhone 17 Pro": { width: 402, height: 874 },
20+
"iPhone 17 Plus": { width: 430, height: 932 },
21+
"iPhone 17 Air": { width: 402, height: 874 },
22+
"iPhone 17": { width: 393, height: 852 },
1823
"iPhone 16 Pro Max": { width: 440, height: 956 },
1924
"iPhone 16 Pro": { width: 402, height: 874 },
2025
"iPhone 16 Plus": { width: 430, height: 932 },

0 commit comments

Comments
 (0)