diff --git a/lambda/tools/local-browser-agent/examples/03_bt_broadband_bournemouth.json b/lambda/tools/local-browser-agent/examples/03_bt_broadband_bournemouth.json index 4baa62b7..b63c5aac 100644 --- a/lambda/tools/local-browser-agent/examples/03_bt_broadband_bournemouth.json +++ b/lambda/tools/local-browser-agent/examples/03_bt_broadband_bournemouth.json @@ -459,8 +459,8 @@ "action": "wait", "description": "Random delay before search to avoid bot detection", "duration_range": [ - 1500, - 3500 + 2000, + 5000 ] }, { @@ -550,73 +550,47 @@ "description": "Progressive address matching: regex -> fuzzy -> vision with scroll", "strategies": [ { - "name": "Strategy 1: Regex - building (with range) + first word only", + "name": "Strategy 1: DOM extract + LLM text matching (select_option)", "steps": [ { - "action": "click", - "locator": { - "strategy": "selector", - "value": "text=/^\\s*(?:\\d+-)?143(?:-\\d+)?[A-Za-z]?\\s+\\w+/i", - "nth": 0 - } + "action": "select_by_llm", + "description": "Extract all options from address dropdown, send to LLM for text matching, use select_option() to properly trigger Angular change event", + "select_selector": "select.custom-select", + "target": "143 Belle Vue Road, Bournemouth BH6 3EN", + "context_hint": "Building number: 143, Street: Belle Vue Road, Postcode: BH6 3EN" } ] }, { - "name": "Strategy 2: Regex - building (with range) + street flexible", + "name": "Strategy 2: Wait and retry select_by_llm (page load fallback)", "steps": [ { - "action": "click", - "locator": { - "strategy": "selector", - "value": "text=/(?:\\d+-)?143(?:-\\d+)?[A-Za-z]?[\\s,]+Belle Vue Road/i", - "nth": 0 - } - } - ] - }, - { - "name": "Strategy 3: Regex - building (with range) + street anywhere", - "steps": [ + "action": "wait", + "description": "Wait for page to fully load before retry", + "duration": 3000 + }, { - "action": "click", - "locator": { - "strategy": "selector", - "value": "text=/(?:\\d+-)?143(?:-\\d+)?[A-Za-z]?.*Belle Vue Road/i", - "nth": 0 - } + "action": "select_by_llm", + "description": "Retry address selection after wait - page may not have loaded initially", + "select_selector": "select.custom-select", + "target": "143 Belle Vue Road, Bournemouth BH6 3EN", + "context_hint": "Building number: 143, Street: Belle Vue Road, Postcode: BH6 3EN" } ] - }, - { - "name": "Strategy 4: Exact text match", - "steps": [ - { - "action": "click", - "locator": { - "strategy": "text", - "value": "143 Belle Vue Road" - } - } - ] - }, - { - "name": "Strategy 5: Vision LLM with scroll capability", - "escalate": { - "type": "vision_llm", - "prompt": "Find and click the address matching '143 Belle Vue Road, Bournemouth BH6 3EN' in the address list. CRITICAL INSTRUCTIONS:\n\n1. FIRST SEARCH: Look carefully at ALL visible addresses for one containing:\n - Building number: '143'\n - Street name: 'Belle Vue Road'\n - Format may vary (case, commas, word order)\n\n2. IF FOUND: Click it immediately\n\n3. IF NOT VISIBLE:\n a) The list is likely SCROLLABLE - look for:\n - A dropdown/select element that can be scrolled\n - Scroll arrows (up/down)\n - A scrollbar on the address list\n b) SCROLL DOWN within the list (not the whole page)\n c) After scrolling, SEARCH AGAIN for the address\n d) Repeat: scroll -> search -> scroll -> search (up to 3 scroll attempts)\n\n4. MATCHING RULES:\n - Accept variations: '143 Belle Vue Road', '143, Belle Vue Road', '143 Belle Vue Road, City'\n - Case-insensitive matching\n - Must have BOTH building number AND street name\n\n5. MAX ACTIONS: 8 actions total (including scrolls and clicks)\n\n6. IF STILL NOT FOUND: Report failure.", - "timeout": 30000, - "max_actions": 8 - } } ] }, + { + "action": "execute_js", + "description": "Force Angular change event on select element - required when Vision LLM fallback clicks option text instead of using select_option()", + "script": "(() => { const sel = document.querySelector('select.custom-select'); if (sel && sel.selectedIndex > 0) { sel.dispatchEvent(new Event('change', { bubbles: true })); return 'change_event_dispatched_index_' + sel.selectedIndex; } return 'no_select_or_default'; })()" + }, { "action": "wait", "description": "Random delay before submit to avoid bot detection", "duration_range": [ - 1500, - 3500 + 2000, + 5000 ] }, { @@ -752,54 +726,18 @@ "description": "Capture address details page for cloud extraction" }, { - "action": "extract_dom", - "description": "Extract Exchange Code, L2SID, and ONT Details from page HTML", - "extractions": [ - { - "name": "exchange_code", - "selector": ".ExhangeCodeSetup > span:last-child", - "fallback_js": "(() => { const wrapper = document.querySelector('.ExhangeCodeSetup'); if (wrapper) { const spans = wrapper.querySelectorAll('span'); for (const s of spans) { if (!s.classList.contains('ExhangeCodeSetupTwo') && s.textContent.trim() && !s.textContent.includes('Exchange Code')) { return s.textContent.trim(); } } } return null; })()" - }, - { - "name": "l2sid_new_ont", - "selector": "tr:has(th:has-text('L2SID (New ONT)')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('L2SID (New ONT)')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "fttp_existing_ont", - "selector": "tr:has(th:has-text('FTTP Existing ONT Available')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('FTTP Existing ONT Available')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "fttp_new_ont", - "selector": "tr:has(th:has-text('FTTP New ONT Available')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('FTTP New ONT Available')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "ont_reference", - "selector": "table:has(th:has-text('ONT Details')) tr:has(td:has-text('Working')) td:nth-child(2)", - "fallback_js": "(() => { const tables = document.querySelectorAll('table'); for (const t of tables) { const header = t.querySelector('th'); if (header && header.textContent.includes('ONT Details')) { const rows = t.querySelectorAll('tr'); for (const r of rows) { if (r.textContent.includes('Working')) { const cells = r.querySelectorAll('td'); return cells[1]?.textContent?.trim() || null; } } } } return null; })()" - }, - { - "name": "ont_serial_no", - "selector": "table:has(th:has-text('ONT Details')) tr:has(td:has-text('Working')) td:nth-child(3)", - "fallback_js": "(() => { const tables = document.querySelectorAll('table'); for (const t of tables) { const header = t.querySelector('th'); if (header && header.textContent.includes('ONT Details')) { const rows = t.querySelectorAll('tr'); for (const r of rows) { if (r.textContent.includes('Working')) { const cells = r.querySelectorAll('td'); return cells[2]?.textContent?.trim() || null; } } } } return null; })()" - }, - { - "name": "port_service_id", - "selector": "table:has(th:has-text('ONT Details')) tr:has(td:has-text('Working')) td:nth-child(4)", - "fallback_js": "(() => { const tables = document.querySelectorAll('table'); for (const t of tables) { const header = t.querySelector('th'); if (header && header.textContent.includes('ONT Details')) { const rows = t.querySelectorAll('tr'); for (const r of rows) { if (r.textContent.includes('Working')) { const cells = r.querySelectorAll('td'); return cells[3]?.textContent?.trim() || null; } } } } return null; })()" - }, - { - "name": "wbc_fttp_rag", - "selector": "tr:has(th:has-text('WBC FTTP')) td:first-of-type", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('WBC FTTP')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "premise_type", - "selector": "tr:has(th:has-text('Premise Type')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('Premise Type')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - } + "action": "extract_by_llm", + "description": "Use LLM to extract values from page HTML - more resilient than CSS selectors", + "fields": [ + {"name": "exchange_code", "description": "Exchange Code value (alphanumeric, e.g. 'LNBOU')"}, + {"name": "l2sid_new_ont", "description": "L2SID (New ONT) value - long alphanumeric string"}, + {"name": "fttp_existing_ont", "description": "FTTP Existing ONT Available - Yes/No or similar"}, + {"name": "fttp_new_ont", "description": "FTTP New ONT Available - Yes/No or similar"}, + {"name": "ont_reference", "description": "ONT Reference for Working status row - alphanumeric ID"}, + {"name": "ont_serial_no", "description": "ONT Serial Number for Working status row - alphanumeric"}, + {"name": "port_service_id", "description": "Port Service ID for Working status row - alphanumeric"}, + {"name": "wbc_fttp_rag", "description": "WBC FTTP RAG status (Green/Amber/Red)"}, + {"name": "premise_type", "description": "Premise Type value"} ] }, { diff --git a/lambda/tools/local-browser-agent/examples/04_bt_broadband_bolton.json b/lambda/tools/local-browser-agent/examples/04_bt_broadband_bolton.json index 5fe09029..9676e0c7 100644 --- a/lambda/tools/local-browser-agent/examples/04_bt_broadband_bolton.json +++ b/lambda/tools/local-browser-agent/examples/04_bt_broadband_bolton.json @@ -459,8 +459,8 @@ "action": "wait", "description": "Random delay before search to avoid bot detection", "duration_range": [ - 1500, - 3500 + 2000, + 5000 ] }, { @@ -550,73 +550,47 @@ "description": "Progressive address matching: regex -> fuzzy -> vision with scroll", "strategies": [ { - "name": "Strategy 1: Regex - building (with range) + first word only", + "name": "Strategy 1: DOM extract + LLM text matching (select_option)", "steps": [ { - "action": "click", - "locator": { - "strategy": "selector", - "value": "text=/^\\s*(?:\\d+-)?34(?:-\\d+)?[A-Za-z]?\\s+\\w+/i", - "nth": 0 - } + "action": "select_by_llm", + "description": "Extract all options from address dropdown, send to LLM for text matching, use select_option() to properly trigger Angular change event", + "select_selector": "select.custom-select", + "target": "34-40 Market Street, Westhoughton, Bolton BL5 3AN", + "context_hint": "Building number: 34, Street: Market Street, Postcode: BL5 3AN" } ] }, { - "name": "Strategy 2: Regex - building (with range) + street flexible", + "name": "Strategy 2: Wait and retry select_by_llm (page load fallback)", "steps": [ { - "action": "click", - "locator": { - "strategy": "selector", - "value": "text=/(?:\\d+-)?34(?:-\\d+)?[A-Za-z]?[\\s,]+Market Street/i", - "nth": 0 - } - } - ] - }, - { - "name": "Strategy 3: Regex - building (with range) + street anywhere", - "steps": [ + "action": "wait", + "description": "Wait for page to fully load before retry", + "duration": 3000 + }, { - "action": "click", - "locator": { - "strategy": "selector", - "value": "text=/(?:\\d+-)?34(?:-\\d+)?[A-Za-z]?.*Market Street/i", - "nth": 0 - } + "action": "select_by_llm", + "description": "Retry address selection after wait - page may not have loaded initially", + "select_selector": "select.custom-select", + "target": "34-40 Market Street, Westhoughton, Bolton BL5 3AN", + "context_hint": "Building number: 34, Street: Market Street, Postcode: BL5 3AN" } ] - }, - { - "name": "Strategy 4: Exact text match", - "steps": [ - { - "action": "click", - "locator": { - "strategy": "text", - "value": "34 Market Street" - } - } - ] - }, - { - "name": "Strategy 5: Vision LLM with scroll capability", - "escalate": { - "type": "vision_llm", - "prompt": "Find and click the address matching '34-40 Market Street, Westhoughton, Bolton BL5 3AN' in the address list. CRITICAL INSTRUCTIONS:\n\n1. FIRST SEARCH: Look carefully at ALL visible addresses for one containing:\n - Building number: '34'\n - Street name: 'Market Street'\n - Format may vary (case, commas, word order)\n\n2. IF FOUND: Click it immediately\n\n3. IF NOT VISIBLE:\n a) The list is likely SCROLLABLE - look for:\n - A dropdown/select element that can be scrolled\n - Scroll arrows (up/down)\n - A scrollbar on the address list\n b) SCROLL DOWN within the list (not the whole page)\n c) After scrolling, SEARCH AGAIN for the address\n d) Repeat: scroll -> search -> scroll -> search (up to 3 scroll attempts)\n\n4. MATCHING RULES:\n - Accept variations: '34 Market Street', '34, Market Street', '34 Market Street, City'\n - Case-insensitive matching\n - Must have BOTH building number AND street name\n\n5. MAX ACTIONS: 8 actions total (including scrolls and clicks)\n\n6. IF STILL NOT FOUND: Report failure.", - "timeout": 30000, - "max_actions": 8 - } } ] }, + { + "action": "execute_js", + "description": "Force Angular change event on select element - required when Vision LLM fallback clicks option text instead of using select_option()", + "script": "(() => { const sel = document.querySelector('select.custom-select'); if (sel && sel.selectedIndex > 0) { sel.dispatchEvent(new Event('change', { bubbles: true })); return 'change_event_dispatched_index_' + sel.selectedIndex; } return 'no_select_or_default'; })()" + }, { "action": "wait", "description": "Random delay before submit to avoid bot detection", "duration_range": [ - 1500, - 3500 + 2000, + 5000 ] }, { @@ -752,54 +726,18 @@ "description": "Capture address details page for cloud extraction" }, { - "action": "extract_dom", - "description": "Extract Exchange Code, L2SID, and ONT Details from page HTML", - "extractions": [ - { - "name": "exchange_code", - "selector": ".ExhangeCodeSetup > span:last-child", - "fallback_js": "(() => { const wrapper = document.querySelector('.ExhangeCodeSetup'); if (wrapper) { const spans = wrapper.querySelectorAll('span'); for (const s of spans) { if (!s.classList.contains('ExhangeCodeSetupTwo') && s.textContent.trim() && !s.textContent.includes('Exchange Code')) { return s.textContent.trim(); } } } return null; })()" - }, - { - "name": "l2sid_new_ont", - "selector": "tr:has(th:has-text('L2SID (New ONT)')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('L2SID (New ONT)')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "fttp_existing_ont", - "selector": "tr:has(th:has-text('FTTP Existing ONT Available')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('FTTP Existing ONT Available')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "fttp_new_ont", - "selector": "tr:has(th:has-text('FTTP New ONT Available')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('FTTP New ONT Available')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "ont_reference", - "selector": "table:has(th:has-text('ONT Details')) tr:has(td:has-text('Working')) td:nth-child(2)", - "fallback_js": "(() => { const tables = document.querySelectorAll('table'); for (const t of tables) { const header = t.querySelector('th'); if (header && header.textContent.includes('ONT Details')) { const rows = t.querySelectorAll('tr'); for (const r of rows) { if (r.textContent.includes('Working')) { const cells = r.querySelectorAll('td'); return cells[1]?.textContent?.trim() || null; } } } } return null; })()" - }, - { - "name": "ont_serial_no", - "selector": "table:has(th:has-text('ONT Details')) tr:has(td:has-text('Working')) td:nth-child(3)", - "fallback_js": "(() => { const tables = document.querySelectorAll('table'); for (const t of tables) { const header = t.querySelector('th'); if (header && header.textContent.includes('ONT Details')) { const rows = t.querySelectorAll('tr'); for (const r of rows) { if (r.textContent.includes('Working')) { const cells = r.querySelectorAll('td'); return cells[2]?.textContent?.trim() || null; } } } } return null; })()" - }, - { - "name": "port_service_id", - "selector": "table:has(th:has-text('ONT Details')) tr:has(td:has-text('Working')) td:nth-child(4)", - "fallback_js": "(() => { const tables = document.querySelectorAll('table'); for (const t of tables) { const header = t.querySelector('th'); if (header && header.textContent.includes('ONT Details')) { const rows = t.querySelectorAll('tr'); for (const r of rows) { if (r.textContent.includes('Working')) { const cells = r.querySelectorAll('td'); return cells[3]?.textContent?.trim() || null; } } } } return null; })()" - }, - { - "name": "wbc_fttp_rag", - "selector": "tr:has(th:has-text('WBC FTTP')) td:first-of-type", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('WBC FTTP')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "premise_type", - "selector": "tr:has(th:has-text('Premise Type')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('Premise Type')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - } + "action": "extract_by_llm", + "description": "Use LLM to extract values from page HTML - more resilient than CSS selectors", + "fields": [ + {"name": "exchange_code", "description": "Exchange Code value (alphanumeric, e.g. 'LNBOU')"}, + {"name": "l2sid_new_ont", "description": "L2SID (New ONT) value - long alphanumeric string"}, + {"name": "fttp_existing_ont", "description": "FTTP Existing ONT Available - Yes/No or similar"}, + {"name": "fttp_new_ont", "description": "FTTP New ONT Available - Yes/No or similar"}, + {"name": "ont_reference", "description": "ONT Reference for Working status row - alphanumeric ID"}, + {"name": "ont_serial_no", "description": "ONT Serial Number for Working status row - alphanumeric"}, + {"name": "port_service_id", "description": "Port Service ID for Working status row - alphanumeric"}, + {"name": "wbc_fttp_rag", "description": "WBC FTTP RAG status (Green/Amber/Red)"}, + {"name": "premise_type", "description": "Premise Type value"} ] }, { diff --git a/lambda/tools/local-browser-agent/examples/05_bt_broadband_peacehaven.json b/lambda/tools/local-browser-agent/examples/05_bt_broadband_peacehaven.json index 4220fc3d..6ff80632 100644 --- a/lambda/tools/local-browser-agent/examples/05_bt_broadband_peacehaven.json +++ b/lambda/tools/local-browser-agent/examples/05_bt_broadband_peacehaven.json @@ -459,8 +459,8 @@ "action": "wait", "description": "Random delay before search to avoid bot detection", "duration_range": [ - 1500, - 3500 + 2000, + 5000 ] }, { @@ -550,73 +550,47 @@ "description": "Progressive address matching: regex -> fuzzy -> vision with scroll", "strategies": [ { - "name": "Strategy 1: Regex - building (with range) + first word only", + "name": "Strategy 1: DOM extract + LLM text matching (select_option)", "steps": [ { - "action": "click", - "locator": { - "strategy": "selector", - "value": "text=/^\\s*(?:\\d+-)?282(?:-\\d+)?[A-Za-z]?\\s+\\w+/i", - "nth": 0 - } + "action": "select_by_llm", + "description": "Extract all options from address dropdown, send to LLM for text matching, use select_option() to properly trigger Angular change event", + "select_selector": "select.custom-select", + "target": "282 South Coast Road, Peacehaven BN10 8LA", + "context_hint": "Building number: 282, Street: South Coast Road, Postcode: BN10 8LA" } ] }, { - "name": "Strategy 2: Regex - building (with range) + street flexible", + "name": "Strategy 2: Wait and retry select_by_llm (page load fallback)", "steps": [ { - "action": "click", - "locator": { - "strategy": "selector", - "value": "text=/(?:\\d+-)?282(?:-\\d+)?[A-Za-z]?[\\s,]+South Coast Road/i", - "nth": 0 - } - } - ] - }, - { - "name": "Strategy 3: Regex - building (with range) + street anywhere", - "steps": [ + "action": "wait", + "description": "Wait for page to fully load before retry", + "duration": 3000 + }, { - "action": "click", - "locator": { - "strategy": "selector", - "value": "text=/(?:\\d+-)?282(?:-\\d+)?[A-Za-z]?.*South Coast Road/i", - "nth": 0 - } + "action": "select_by_llm", + "description": "Retry address selection after wait - page may not have loaded initially", + "select_selector": "select.custom-select", + "target": "282 South Coast Road, Peacehaven BN10 8LA", + "context_hint": "Building number: 282, Street: South Coast Road, Postcode: BN10 8LA" } ] - }, - { - "name": "Strategy 4: Exact text match", - "steps": [ - { - "action": "click", - "locator": { - "strategy": "text", - "value": "282 South Coast Road" - } - } - ] - }, - { - "name": "Strategy 5: Vision LLM with scroll capability", - "escalate": { - "type": "vision_llm", - "prompt": "Find and click the address matching '282 South Coast Road, Peacehaven BN10 8LA' in the address list. CRITICAL INSTRUCTIONS:\n\n1. FIRST SEARCH: Look carefully at ALL visible addresses for one containing:\n - Building number: '282'\n - Street name: 'South Coast Road'\n - Format may vary (case, commas, word order)\n\n2. IF FOUND: Click it immediately\n\n3. IF NOT VISIBLE:\n a) The list is likely SCROLLABLE - look for:\n - A dropdown/select element that can be scrolled\n - Scroll arrows (up/down)\n - A scrollbar on the address list\n b) SCROLL DOWN within the list (not the whole page)\n c) After scrolling, SEARCH AGAIN for the address\n d) Repeat: scroll -> search -> scroll -> search (up to 3 scroll attempts)\n\n4. MATCHING RULES:\n - Accept variations: '282 South Coast Road', '282, South Coast Road', '282 South Coast Road, City'\n - Case-insensitive matching\n - Must have BOTH building number AND street name\n\n5. MAX ACTIONS: 8 actions total (including scrolls and clicks)\n\n6. IF STILL NOT FOUND: Report failure.", - "timeout": 30000, - "max_actions": 8 - } } ] }, + { + "action": "execute_js", + "description": "Force Angular change event on select element - required when Vision LLM fallback clicks option text instead of using select_option()", + "script": "(() => { const sel = document.querySelector('select.custom-select'); if (sel && sel.selectedIndex > 0) { sel.dispatchEvent(new Event('change', { bubbles: true })); return 'change_event_dispatched_index_' + sel.selectedIndex; } return 'no_select_or_default'; })()" + }, { "action": "wait", "description": "Random delay before submit to avoid bot detection", "duration_range": [ - 1500, - 3500 + 2000, + 5000 ] }, { @@ -752,54 +726,18 @@ "description": "Capture address details page for cloud extraction" }, { - "action": "extract_dom", - "description": "Extract Exchange Code, L2SID, and ONT Details from page HTML", - "extractions": [ - { - "name": "exchange_code", - "selector": ".ExhangeCodeSetup > span:last-child", - "fallback_js": "(() => { const wrapper = document.querySelector('.ExhangeCodeSetup'); if (wrapper) { const spans = wrapper.querySelectorAll('span'); for (const s of spans) { if (!s.classList.contains('ExhangeCodeSetupTwo') && s.textContent.trim() && !s.textContent.includes('Exchange Code')) { return s.textContent.trim(); } } } return null; })()" - }, - { - "name": "l2sid_new_ont", - "selector": "tr:has(th:has-text('L2SID (New ONT)')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('L2SID (New ONT)')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "fttp_existing_ont", - "selector": "tr:has(th:has-text('FTTP Existing ONT Available')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('FTTP Existing ONT Available')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "fttp_new_ont", - "selector": "tr:has(th:has-text('FTTP New ONT Available')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('FTTP New ONT Available')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "ont_reference", - "selector": "table:has(th:has-text('ONT Details')) tr:has(td:has-text('Working')) td:nth-child(2)", - "fallback_js": "(() => { const tables = document.querySelectorAll('table'); for (const t of tables) { const header = t.querySelector('th'); if (header && header.textContent.includes('ONT Details')) { const rows = t.querySelectorAll('tr'); for (const r of rows) { if (r.textContent.includes('Working')) { const cells = r.querySelectorAll('td'); return cells[1]?.textContent?.trim() || null; } } } } return null; })()" - }, - { - "name": "ont_serial_no", - "selector": "table:has(th:has-text('ONT Details')) tr:has(td:has-text('Working')) td:nth-child(3)", - "fallback_js": "(() => { const tables = document.querySelectorAll('table'); for (const t of tables) { const header = t.querySelector('th'); if (header && header.textContent.includes('ONT Details')) { const rows = t.querySelectorAll('tr'); for (const r of rows) { if (r.textContent.includes('Working')) { const cells = r.querySelectorAll('td'); return cells[2]?.textContent?.trim() || null; } } } } return null; })()" - }, - { - "name": "port_service_id", - "selector": "table:has(th:has-text('ONT Details')) tr:has(td:has-text('Working')) td:nth-child(4)", - "fallback_js": "(() => { const tables = document.querySelectorAll('table'); for (const t of tables) { const header = t.querySelector('th'); if (header && header.textContent.includes('ONT Details')) { const rows = t.querySelectorAll('tr'); for (const r of rows) { if (r.textContent.includes('Working')) { const cells = r.querySelectorAll('td'); return cells[3]?.textContent?.trim() || null; } } } } return null; })()" - }, - { - "name": "wbc_fttp_rag", - "selector": "tr:has(th:has-text('WBC FTTP')) td:first-of-type", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('WBC FTTP')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "premise_type", - "selector": "tr:has(th:has-text('Premise Type')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('Premise Type')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - } + "action": "extract_by_llm", + "description": "Use LLM to extract values from page HTML - more resilient than CSS selectors", + "fields": [ + {"name": "exchange_code", "description": "Exchange Code value (alphanumeric, e.g. 'LNBOU')"}, + {"name": "l2sid_new_ont", "description": "L2SID (New ONT) value - long alphanumeric string"}, + {"name": "fttp_existing_ont", "description": "FTTP Existing ONT Available - Yes/No or similar"}, + {"name": "fttp_new_ont", "description": "FTTP New ONT Available - Yes/No or similar"}, + {"name": "ont_reference", "description": "ONT Reference for Working status row - alphanumeric ID"}, + {"name": "ont_serial_no", "description": "ONT Serial Number for Working status row - alphanumeric"}, + {"name": "port_service_id", "description": "Port Service ID for Working status row - alphanumeric"}, + {"name": "wbc_fttp_rag", "description": "WBC FTTP RAG status (Green/Amber/Red)"}, + {"name": "premise_type", "description": "Premise Type value"} ] }, { diff --git a/lambda/tools/local-browser-agent/examples/06_bt_broadband_template_test.json b/lambda/tools/local-browser-agent/examples/06_bt_broadband_template_test.json index e723fc41..60c9e248 100644 --- a/lambda/tools/local-browser-agent/examples/06_bt_broadband_template_test.json +++ b/lambda/tools/local-browser-agent/examples/06_bt_broadband_template_test.json @@ -459,8 +459,8 @@ "action": "wait", "description": "Random delay before search to avoid bot detection", "duration_range": [ - 1500, - 3500 + 2000, + 5000 ] }, { @@ -550,73 +550,47 @@ "description": "Progressive address matching: regex -> fuzzy -> vision with scroll", "strategies": [ { - "name": "Strategy 1: Regex - building (with range) + first word only", + "name": "Strategy 1: DOM extract + LLM text matching (select_option)", "steps": [ { - "action": "click", - "locator": { - "strategy": "selector", - "value": "text=/^\\s*(?:\\d+-)?396(?:-\\d+)?[A-Za-z]?\\s+\\w+/i", - "nth": 0 - } + "action": "select_by_llm", + "description": "Extract all options from address dropdown, send to LLM for text matching, use select_option() to properly trigger Angular change event", + "select_selector": "select.custom-select", + "target": "93 Wimborne Road, Bournemouth BH8 8BN", + "context_hint": "Building number: 93, Street: Wimborne Road, Postcode: BH8 8BN" } ] }, { - "name": "Strategy 2: Regex - building (with range) + street flexible", + "name": "Strategy 2: Wait and retry select_by_llm (page load fallback)", "steps": [ { - "action": "click", - "locator": { - "strategy": "selector", - "value": "text=/(?:\\d+-)?396(?:-\\d+)?[A-Za-z]?[\\s,]+Wimborne Road/i", - "nth": 0 - } - } - ] - }, - { - "name": "Strategy 3: Regex - building (with range) + street anywhere", - "steps": [ + "action": "wait", + "description": "Wait for page to fully load before retry", + "duration": 3000 + }, { - "action": "click", - "locator": { - "strategy": "selector", - "value": "text=/(?:\\d+-)?396(?:-\\d+)?[A-Za-z]?.*Wimborne Road/i", - "nth": 0 - } + "action": "select_by_llm", + "description": "Retry address selection after wait - page may not have loaded initially", + "select_selector": "select.custom-select", + "target": "93 Wimborne Road, Bournemouth BH8 8BN", + "context_hint": "Building number: 93, Street: Wimborne Road, Postcode: BH8 8BN" } ] - }, - { - "name": "Strategy 4: Exact text match", - "steps": [ - { - "action": "click", - "locator": { - "strategy": "text", - "value": "93 Wimborne Road" - } - } - ] - }, - { - "name": "Strategy 5: Vision LLM with scroll capability", - "escalate": { - "type": "vision_llm", - "prompt": "Find and click the address matching '93 Wimborne Road, Bournemouth BH8 8BN' in the address list. CRITICAL INSTRUCTIONS:\n\n1. FIRST SEARCH: Look carefully at ALL visible addresses for one containing:\n - Building number: '93'\n - Street name: 'Wimborne Road'\n - Format may vary (case, commas, word order)\n\n2. IF FOUND: Click it immediately\n\n3. IF NOT VISIBLE:\n a) The list is likely SCROLLABLE - look for:\n - A dropdown/select element that can be scrolled\n - Scroll arrows (up/down)\n - A scrollbar on the address list\n b) SCROLL DOWN within the list (not the whole page)\n c) After scrolling, SEARCH AGAIN for the address\n d) Repeat: scroll -> search -> scroll -> search (up to 3 scroll attempts)\n\n4. MATCHING RULES:\n - Accept variations: '93 Wimborne Road', '93, Wimborne Road', '93 Wimborne Road, City'\n - Case-insensitive matching\n - Must have BOTH building number AND street name\n\n5. MAX ACTIONS: 8 actions total (including scrolls and clicks)\n\n6. IF STILL NOT FOUND: Report failure.", - "timeout": 30000, - "max_actions": 8 - } } ] }, + { + "action": "execute_js", + "description": "Force Angular change event on select element - required when Vision LLM fallback clicks option text instead of using select_option()", + "script": "(() => { const sel = document.querySelector('select.custom-select'); if (sel && sel.selectedIndex > 0) { sel.dispatchEvent(new Event('change', { bubbles: true })); return 'change_event_dispatched_index_' + sel.selectedIndex; } return 'no_select_or_default'; })()" + }, { "action": "wait", "description": "Random delay before submit to avoid bot detection", "duration_range": [ - 1500, - 3500 + 2000, + 5000 ] }, { @@ -752,54 +726,18 @@ "description": "Capture address details page for cloud extraction" }, { - "action": "extract_dom", - "description": "Extract Exchange Code, L2SID, and ONT Details from page HTML", - "extractions": [ - { - "name": "exchange_code", - "selector": ".ExhangeCodeSetup > span:last-child", - "fallback_js": "(() => { const wrapper = document.querySelector('.ExhangeCodeSetup'); if (wrapper) { const spans = wrapper.querySelectorAll('span'); for (const s of spans) { if (!s.classList.contains('ExhangeCodeSetupTwo') && s.textContent.trim() && !s.textContent.includes('Exchange Code')) { return s.textContent.trim(); } } } return null; })()" - }, - { - "name": "l2sid_new_ont", - "selector": "tr:has(th:has-text('L2SID (New ONT)')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('L2SID (New ONT)')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "fttp_existing_ont", - "selector": "tr:has(th:has-text('FTTP Existing ONT Available')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('FTTP Existing ONT Available')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "fttp_new_ont", - "selector": "tr:has(th:has-text('FTTP New ONT Available')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('FTTP New ONT Available')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "ont_reference", - "selector": "table:has(th:has-text('ONT Details')) tr:has(td:has-text('Working')) td:nth-child(2)", - "fallback_js": "(() => { const tables = document.querySelectorAll('table'); for (const t of tables) { const header = t.querySelector('th'); if (header && header.textContent.includes('ONT Details')) { const rows = t.querySelectorAll('tr'); for (const r of rows) { if (r.textContent.includes('Working')) { const cells = r.querySelectorAll('td'); return cells[1]?.textContent?.trim() || null; } } } } return null; })()" - }, - { - "name": "ont_serial_no", - "selector": "table:has(th:has-text('ONT Details')) tr:has(td:has-text('Working')) td:nth-child(3)", - "fallback_js": "(() => { const tables = document.querySelectorAll('table'); for (const t of tables) { const header = t.querySelector('th'); if (header && header.textContent.includes('ONT Details')) { const rows = t.querySelectorAll('tr'); for (const r of rows) { if (r.textContent.includes('Working')) { const cells = r.querySelectorAll('td'); return cells[2]?.textContent?.trim() || null; } } } } return null; })()" - }, - { - "name": "port_service_id", - "selector": "table:has(th:has-text('ONT Details')) tr:has(td:has-text('Working')) td:nth-child(4)", - "fallback_js": "(() => { const tables = document.querySelectorAll('table'); for (const t of tables) { const header = t.querySelector('th'); if (header && header.textContent.includes('ONT Details')) { const rows = t.querySelectorAll('tr'); for (const r of rows) { if (r.textContent.includes('Working')) { const cells = r.querySelectorAll('td'); return cells[3]?.textContent?.trim() || null; } } } } return null; })()" - }, - { - "name": "wbc_fttp_rag", - "selector": "tr:has(th:has-text('WBC FTTP')) td:first-of-type", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('WBC FTTP')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - }, - { - "name": "premise_type", - "selector": "tr:has(th:has-text('Premise Type')) td", - "fallback_js": "(() => { const rows = document.querySelectorAll('tr'); for (const r of rows) { const th = r.querySelector('th'); if (th && th.textContent.includes('Premise Type')) { const td = r.querySelector('td'); return td ? td.textContent.trim() : null; } } return null; })()" - } + "action": "extract_by_llm", + "description": "Use LLM to extract values from page HTML - more resilient than CSS selectors", + "fields": [ + {"name": "exchange_code", "description": "Exchange Code value (alphanumeric, e.g. 'LNBOU')"}, + {"name": "l2sid_new_ont", "description": "L2SID (New ONT) value - long alphanumeric string"}, + {"name": "fttp_existing_ont", "description": "FTTP Existing ONT Available - Yes/No or similar"}, + {"name": "fttp_new_ont", "description": "FTTP New ONT Available - Yes/No or similar"}, + {"name": "ont_reference", "description": "ONT Reference for Working status row - alphanumeric ID"}, + {"name": "ont_serial_no", "description": "ONT Serial Number for Working status row - alphanumeric"}, + {"name": "port_service_id", "description": "Port Service ID for Working status row - alphanumeric"}, + {"name": "wbc_fttp_rag", "description": "WBC FTTP RAG status (Green/Amber/Red)"}, + {"name": "premise_type", "description": "Premise Type value"} ] }, { diff --git a/lambda/tools/local-browser-agent/python/computer_agent_wrapper.py b/lambda/tools/local-browser-agent/python/computer_agent_wrapper.py index c0508df3..f9d66b16 100644 --- a/lambda/tools/local-browser-agent/python/computer_agent_wrapper.py +++ b/lambda/tools/local-browser-agent/python/computer_agent_wrapper.py @@ -655,9 +655,34 @@ def main(): parser.add_argument("--browser-channel", help="Browser channel (chrome, msedge, chromium)") parser.add_argument("--navigation-timeout", type=int, default=60000, help="Navigation timeout in ms") parser.add_argument("--user-data-dir", help="User data directory for browser profile") + parser.add_argument("--server-mode", action="store_true", help="Run in persistent server mode (NDJSON over stdin/stdout)") args = parser.parse_args() + # Server mode: delegate to OpenAIPlaywrightExecutor's run_server_mode + if args.server_mode: + import asyncio + from openai_playwright_executor import OpenAIPlaywrightExecutor + + browser_channel = args.browser_channel + if not browser_channel: + browser_channel = 'msedge' if platform.system() == 'Windows' else 'chrome' + + executor = OpenAIPlaywrightExecutor( + llm_provider='openai', + llm_model=os.environ.get('OPENAI_MODEL', 'gpt-4o-mini'), + llm_api_key=os.environ.get('OPENAI_API_KEY'), + s3_bucket=args.s3_bucket, + aws_profile=args.aws_profile, + headless=args.headless, + browser_channel=browser_channel, + user_data_dir=args.user_data_dir, + navigation_timeout=args.navigation_timeout, + ) + + asyncio.run(executor.run_server_mode()) + sys.exit(0) + try: # Determine input mode if args.script: diff --git a/lambda/tools/local-browser-agent/python/openai_playwright_executor.py b/lambda/tools/local-browser-agent/python/openai_playwright_executor.py index 220eaec0..8ad3a6e9 100644 --- a/lambda/tools/local-browser-agent/python/openai_playwright_executor.py +++ b/lambda/tools/local-browser-agent/python/openai_playwright_executor.py @@ -781,6 +781,13 @@ async def run_server_mode(self): - Logs/progress go to stderr (unchanged) - EOF on stdin = shutdown signal → close browser and exit """ + # On Windows, ensure stdin/stdout use UTF-8 encoding for NDJSON protocol. + # Without this, cp1252 encoding causes OSError on pipe writes. + if sys.platform == 'win32': + import io + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', newline='\n') + sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', newline='\n') + print(f"\n{'='*60}", file=sys.stderr) print(f"Server Mode Starting", file=sys.stderr) print(f"{'='*60}", file=sys.stderr) @@ -833,11 +840,56 @@ async def run_server_mode(self): # Signal readiness print(json.dumps({"status": "ready"}), flush=True) - # Execute the script with browser lifecycle skipped - result = await self.execute_script(script, skip_browser_lifecycle=True) + # Check browser health before executing. If the page/context + # has died (e.g. crash, navigation error), try to recover. + try: + if self.page and not self.page.is_closed(): + # Quick health check - evaluate a trivial expression + await self.page.evaluate("1") + else: + raise Exception("Page is closed") + except Exception as health_err: + print(f" ⚠ Browser health check failed: {health_err}", file=sys.stderr) + print(f" ⚠ Attempting browser recovery...", file=sys.stderr) + try: + await self._cleanup_browser() + await self._init_browser() + print(f" ✓ Browser recovered successfully", file=sys.stderr) + except Exception as recovery_err: + print(f" ✗ Browser recovery failed: {recovery_err}", file=sys.stderr) + traceback.print_exc(file=sys.stderr) + result = {"success": False, "error": f"Browser recovery failed: {recovery_err}"} + try: + print(json.dumps(result, default=str), flush=True) + except Exception: + print(json.dumps({"success": False, "error": "Browser dead, recovery failed"}), flush=True) + continue - # Write result as single JSON line to stdout - print(json.dumps(result), flush=True) + # Execute the script with browser lifecycle skipped. + # Wrap in try/except to prevent any unhandled error from killing + # the persistent server process. The loop must continue. + try: + result = await self.execute_script(script, skip_browser_lifecycle=True) + except Exception as e: + print(f"\n✗ Unhandled error during script execution: {e}", file=sys.stderr) + traceback.print_exc(file=sys.stderr) + result = { + "success": False, + "error": f"Unhandled execution error: {e}", + "script_name": script.get("name", "Unknown"), + } + + # Write result as single JSON line to stdout. + # Use default=str to handle any non-serializable objects + # (Playwright elements, Path objects, etc.) that may leak + # into the result dict from step execution. + try: + print(json.dumps(result, default=str), flush=True) + except Exception as e: + print(f"\n✗ Failed to serialize result: {e}", file=sys.stderr) + traceback.print_exc(file=sys.stderr) + fallback = {"success": False, "error": f"Result serialization error: {e}"} + print(json.dumps(fallback), flush=True) except KeyboardInterrupt: print(f"Server mode interrupted by user", file=sys.stderr) @@ -1090,8 +1142,10 @@ async def _execute_step(self, step: Dict[str, Any], step_num: int) -> Dict[str, "screenshot": self._step_screenshot, "extract": self._step_extract, "extract_dom": self._step_extract_dom, + "extract_by_llm": self._step_extract_by_llm, "execute_js": self._step_execute_js, "press": self._step_press, + "select_by_llm": self._step_select_by_llm, "error": self._step_error, } @@ -1792,7 +1846,13 @@ async def _step_extract_dom(self, step: Dict[str, Any], step_num: int) -> Dict[s "error": "extract_dom requires 'extractions' array" } - print(f" Extracting {len(extractions)} fields from DOM...", file=sys.stderr) + # Log current URL for debugging extraction failures + try: + current_url = self.page.url + print(f" Extracting {len(extractions)} fields from DOM (URL: {current_url})...", file=sys.stderr) + except Exception: + current_url = "unknown" + print(f" Extracting {len(extractions)} fields from DOM (URL: unknown)...", file=sys.stderr) results = {} errors = [] @@ -1948,6 +2008,32 @@ async def _step_extract_dom(self, step: Dict[str, Any], step_num: int) -> Dict[s except Exception: page_url = None + # Diagnostic: if most fields are missing, log page state to help debug + if len(results) <= 1 and len(extractions) > 3: + print(f" ⚠ DOM extraction mostly empty ({len(results)}/{len(extractions)} fields). Diagnosing page state...", file=sys.stderr) + try: + # Check for key page markers + markers = await self.page.evaluate(""" + (() => { + const markers = {}; + markers.title = document.title; + markers.hasExchangeCode = !!document.querySelector('.ExhangeCodeSetup'); + markers.hasFeaturedProducts = !!document.querySelector('th'); + markers.tableCount = document.querySelectorAll('table').length; + markers.hasError = !!(document.body.textContent.match(/unavailable|error|sorry/i)); + markers.bodyLength = document.body.textContent.length; + // Get first 500 chars of visible text for context + markers.bodyPreview = document.body.innerText.substring(0, 500).replace(/\\s+/g, ' '); + return markers; + })() + """) + print(f" ⚠ Page markers: title='{markers.get('title')}', exchangeCode={markers.get('hasExchangeCode')}, " + f"tables={markers.get('tableCount')}, hasError={markers.get('hasError')}, bodyLen={markers.get('bodyLength')}", file=sys.stderr) + if markers.get('bodyPreview'): + print(f" ⚠ Page preview: {markers['bodyPreview'][:200]}", file=sys.stderr) + except Exception as diag_err: + print(f" ⚠ Diagnostic failed: {diag_err}", file=sys.stderr) + return { "success": True, "action": "extract_dom", @@ -1958,6 +2044,149 @@ async def _step_extract_dom(self, step: Dict[str, Any], step_num: int) -> Dict[s "page_url": page_url, } + async def _step_extract_by_llm(self, step: Dict[str, Any], step_num: int) -> Dict[str, Any]: + """ + Extract values from page HTML using LLM text analysis. + + More resilient than CSS-selector-based extract_dom when page structure + varies or doesn't fully load. Sends cleaned HTML to the LLM with a list + of fields to extract. + + Usage in script: + { + "action": "extract_by_llm", + "description": "Extract key values from results page", + "fields": [ + {"name": "exchange_code", "description": "Exchange Code value"}, + {"name": "ont_reference", "description": "ONT Reference for Working status row"} + ], + "selector": "body", + "max_html_length": 50000 + } + """ + fields = step.get("fields", []) + if not fields: + return { + "success": False, + "action": "extract_by_llm", + "error": "extract_by_llm requires 'fields' array", + } + + container_selector = step.get("selector", "body") + max_html_length = step.get("max_html_length", 50000) + + # 1. Get page HTML, scoped to container if specified, with non-content elements stripped + try: + html = await self.page.evaluate(""" + (selector) => { + const el = document.querySelector(selector) || document.body; + const clone = el.cloneNode(true); + clone.querySelectorAll('script, style, svg, link, meta, noscript').forEach(e => e.remove()); + return clone.innerHTML; + } + """, container_selector) + except Exception as e: + return { + "success": False, + "action": "extract_by_llm", + "error": f"Failed to get page HTML: {e}", + } + + if not html or not html.strip(): + return { + "success": False, + "action": "extract_by_llm", + "error": "Page HTML is empty", + } + + # 2. Truncate if too large + if len(html) > max_html_length: + html = html[:max_html_length] + "\n... [truncated]" + + print(f" Extracting {len(fields)} fields via LLM ({len(html)} chars of HTML)...", file=sys.stderr) + + # 3. Build prompt + field_descriptions = "\n".join( + f"- {f['name']}: {f.get('description', f['name'])}" + for f in fields + ) + + prompt = f"""Extract the following values from this HTML page content. +Return a JSON object with each field name as key and the extracted value as string, or null if not found. + +FIELDS TO EXTRACT: +{field_descriptions} + +IMPORTANT: +- Extract exact values including all characters (these are IDs/codes, not descriptions) +- Values are typically in