From 4feb475c609937a464291eb6f64c34bc8b9855d9 Mon Sep 17 00:00:00 2001 From: zzjchen Date: Thu, 24 Oct 2024 16:26:21 +0800 Subject: [PATCH] use coord to help Element path exact match --- evaluate/evaluate_utils.py | 36 ++++++++++++++--------------- evaluate/step_score.py | 46 +++++++++++++++++++++----------------- 2 files changed, 43 insertions(+), 39 deletions(-) diff --git a/evaluate/evaluate_utils.py b/evaluate/evaluate_utils.py index 195dfc3..0b7e205 100644 --- a/evaluate/evaluate_utils.py +++ b/evaluate/evaluate_utils.py @@ -118,7 +118,9 @@ def get_netloc(url: str) -> str: return netloc -async def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_value=None, text_content=None): +async def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_value=None, text_content=None,input_coords=None): + # input_coords should be (x,y) in pixels, if not None + # and will be used in ElementEvaluator.path_exact_match() """Evaluate step score""" step_score = 0 match_result = [] @@ -135,15 +137,12 @@ async def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_ elif match_function == "url_semantic_match": score = await URLEvaluator.url_semantic_match( page.url, evaluate["reference_answer"], evaluate["key"]) - # print(score, "url_semantic_match") elif match_function == "element_path_exactly_match": input_netloc = get_netloc(page.url) method = evaluate["method"] - score = ElementEvaluator.path_exact_match( - input_path, evaluate["reference_answer"], method, await page.content(), input_netloc, - evaluate["netloc"]) - # print(score, "path_exact_match:", input_path, - # "***", evaluate["reference_answer"]) + score = await ElementEvaluator.path_exact_match( + input_path, evaluate["reference_answer"], method, page, input_netloc, + evaluate["netloc"],input_coords=input_coords) elif match_function == "element_path_included_match": pass # * Temporarily not doing @@ -155,9 +154,9 @@ async def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_ # print(element_value) # print(await page.locator(input_path).input_value()) if "path" in evaluate.keys(): - path_score = ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector", - await page.content(), input_netloc, - evaluate["netloc"]) + path_score = await ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector", + page, input_netloc, + evaluate["netloc"],input_coords=input_coords) if path_score == 0: # print("Path mismatch in value evaluation") score = 0 @@ -172,12 +171,12 @@ async def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_ else: score = 0 elif match_function == "element_value_included_match": - if input_path is not None and element_value is not None: + if (input_path is not None or input_coords is not None) and element_value is not None: input_netloc = get_netloc(page.url) if "path" in evaluate.keys(): - path_score = ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector", - await page.content(), input_netloc, - evaluate["netloc"]) + path_score = await ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector", + page, input_netloc, + evaluate["netloc"],input_coords=input_coords) if path_score == 0: # print("Path mismatch in value evaluation") score = 0 @@ -192,14 +191,14 @@ async def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_ else: score = 0 elif match_function == "element_value_semantic_match": - if input_path is not None and element_value is not None: + if (input_path is not None or input_coords is not None) and element_value is not None: input_netloc = get_netloc(page.url) if len(element_value) > 0: if "path" in evaluate.keys(): - path_score = ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector", - await page.content(), input_netloc, - evaluate["netloc"]) + path_score = await ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector", + page, input_netloc, + evaluate["netloc"],input_coords=input_coords) if path_score == 0: # print("Path mismatch in value evaluation") score = 0 @@ -247,6 +246,7 @@ async def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_ return evaluate_steps, match_result + def parse_current_trace(response: dict, env: AsyncHTMLEnvironment, step_reward: dict): thought = response["description"].get("thought") action_type = response.get( diff --git a/evaluate/step_score.py b/evaluate/step_score.py index a20f77e..36f2414 100644 --- a/evaluate/step_score.py +++ b/evaluate/step_score.py @@ -77,7 +77,26 @@ async def url_semantic_match(input_url, semantic_method, key=False): class ElementEvaluator(StepEvaluator): '''Element evaluation and scoring''' @staticmethod - def path_exact_match(input_answer, reference_answer, method, html_content, input_netloc, reference_netloc): + def is_same_element(page, input_coord, reference_element_handle): + x,y=input_coord + # Get the bounding box of the element, usually 2s is enough, but set to 5s here + bounding_box = reference_element_handle.bounding_box(timeout=5000) + if bounding_box: + element_x = bounding_box['x'] + element_y = bounding_box['y'] + element_width = bounding_box['width'] + element_height = bounding_box['height'] + # Check if the given (x, y) is within the bounding box + if (element_x <= x <= element_x + element_width and + element_y <= y <= element_y + element_height): + return True + return False + + + @staticmethod + def path_exact_match(input_answer, reference_answer, method, page, input_netloc, reference_netloc,input_coords=None): + # input_coords should be (x,y) in pixels, if not None + # and will be used in ElementEvaluator.path_exact_match() score = 0 if method == "xpath": if reference_netloc != input_netloc: @@ -85,6 +104,7 @@ def path_exact_match(input_answer, reference_answer, method, html_content, input # "input_netloc:", input_netloc) return 0 try: + html_content = await page.content() tree = html.fromstring(html_content) input_elements = tree.xpath(input_answer) reference_elements = tree.xpath(reference_answer) @@ -105,32 +125,16 @@ def path_exact_match(input_answer, reference_answer, method, html_content, input pass else: score = 0 - elif method == "selector": + elif method == "selector": #modified to use coords if reference_netloc != input_netloc: # print("reference_netloc:", reference_netloc, # "input_netloc:", input_netloc) return 0 try: - soup = BeautifulSoup(html_content, 'html.parser') - input_element = soup.select_one(input_answer) - reference_element = soup.select_one(reference_answer) + input_element = input_coords#input element is input coord + reference_element = page.locator(reference_answer) if (input_element is not None) and (reference_element is not None): - score = input_element is reference_element - - try: - if reference_element.name in MapTagNameList: - # parent_elements = reference_element.parent - # score_parent = input_element is parent_elements - # score = max(score, score_parent) - trace_up_count = 0 - current_element = reference_element - while trace_up_count < 3 and score == 0: - trace_up_count += 1 - current_element = current_element.parent - score_parent = input_element is current_element - score = max(score, score_parent) - except: - pass + score = ElementEvaluator().is_same_element(page, input_coord=input_element, reference_element_handle=reference_element) except: score = 0 # result_score = MatchFunction.include_match(