diff --git a/Brain/AI/src/constants.py b/Brain/AI/src/constants.py index d900365a..e7e5a05b 100644 --- a/Brain/AI/src/constants.py +++ b/Brain/AI/src/constants.py @@ -1,3 +1,4 @@ +import logging import os SRC_PATH = os.path.dirname(__file__) # Where your .py file is located @@ -5,12 +6,25 @@ SCREENSHOT_PATH = os.path.join(RESOURCES_PATH, 'screenshot') VALIDATION_PATH = os.path.join(SRC_PATH, 'validation') BENCHMARK_PATH = os.path.join(SRC_PATH, 'benchmark') +MOTION_MODULE = os.path.join(SRC_PATH, 'motion_module') +MOTION_CALIBRATION_PATH = os.path.join(MOTION_MODULE, 'calibrations') SCREENSHOT_FILENAME = 'screenshot.png' SCREENSHOT_FULLPATH = os.path.join(SCREENSHOT_PATH, SCREENSHOT_FILENAME) CLIENT_PATH = os.path.join(SRC_PATH, '../../../tappy-client/clients/python') DLV_PATH = os.path.join(RESOURCES_PATH, 'dlv') # change IP addresses to your needs. -SCREENSHOT_SERVER_IP = '192.168.0.30' # IP of the mobile phone with Screenshotserver on board -TAPPY_ORIGINAL_SERVER_IP = '127.0.0.1' # IP of the server where the robot is attached to -USE_ADB = True # True if you want to use adb to get the screenshot, False if you want to use the Screenshotserver +SCREENSHOT_SERVER_IP = '192.168.0.30' # IP of the mobile phone with Screenshotserver on board +TAPPY_ORIGINAL_SERVER_IP = 'http://127.0.0.1:8000' # IP of the server where the robot is attached to +USE_ADB = True # True if you want to use adb to get the screenshot, False if you want to use the Screenshotserver + + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +if not logger.handlers: + ch = logging.StreamHandler() + ch.setLevel(logging.DEBUG) + # Niente, colori, non sono riuscito a farli funzionare bene... se ho tempo (voglia) ci metto le emoji + formatter = logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s') + ch.setFormatter(formatter) + logger.addHandler(ch) diff --git a/Brain/AI/src/motion_module/__init__.py b/Brain/AI/src/motion_module/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/Brain/AI/src/motion_module/calibrations/README.md b/Brain/AI/src/motion_module/calibrations/README.md new file mode 100644 index 00000000..cee951f5 --- /dev/null +++ b/Brain/AI/src/motion_module/calibrations/README.md @@ -0,0 +1,13 @@ +The calibrationf follow the pattern: +- Test: test_{n_} +- Robot: brainybot1 / brainybot2 +- Pen: garbage1 / garbage2 +- Suffix: stuff you want + +Other stuff appended before .pkl will be ignored +Information like: +- automatic/manual deep +- parameters used + +The search function will only match Robot and Pen so if 2 file with the sane starting name exists will take one randomly +If you need different calibrations for the same bot/pen use the test_enumarator \ No newline at end of file diff --git a/Brain/AI/src/motion_module/controllable_motion_module.py b/Brain/AI/src/motion_module/controllable_motion_module.py new file mode 100644 index 00000000..73bc7cde --- /dev/null +++ b/Brain/AI/src/motion_module/controllable_motion_module.py @@ -0,0 +1,616 @@ +import os +import threading +import time +import tkinter as tk + +# External libraries +import customtkinter +import numpy as np +from customtkinter import filedialog +from PIL import Image, ImageTk + +# Internal modules +from AI.src.constants import MOTION_MODULE +from AI.src.motion_module.motion_module import MotionModule +from AI.src.motion_module.swipe_calibrator import SwipeCalibrator +from AI.src.webservices.helpers import get_screenshot + + +class App(customtkinter.CTk): + def __init__(self): + super().__init__() + + self.title("Controllable Motion Module") + self.geometry("1600x900") + customtkinter.set_default_color_theme("dark-blue") + customtkinter.set_appearance_mode("dark") # dark theme supremacy + + dummy_mask = np.ones((2340, 1080, 4), dtype=np.uint8) * 255 + self.motion_module = MotionModule( + ui_mask=dummy_mask, swipe_calibrator=SwipeCalibrator(), force_headless=True + ) + + # Configure grid layout + self.grid_columnconfigure(0, weight=0) + self.grid_columnconfigure(1, weight=1) + self.grid_columnconfigure(2, weight=0) + self.grid_rowconfigure(0, weight=1) + + # Left frame for phone screen + self.left_frame = customtkinter.CTkFrame(self) + self.left_frame.grid(row=0, column=0, padx=10, pady=10, sticky="nsew") + + self.screen_label = customtkinter.CTkLabel(self.left_frame, text="Live Feed") + self.screen_label.pack(padx=10, pady=5) + + self.image_label = customtkinter.CTkLabel(self.left_frame, text="") + self.image_label.pack(padx=10, pady=5) + self.image_label.bind("", self.on_screen_click) + + # Middle frame for Desk + self.desk_frame = customtkinter.CTkFrame(self) + self.desk_frame.grid(row=0, column=1, padx=10, pady=10, sticky="nsew") + + self.desk_title_label = customtkinter.CTkLabel( + self.desk_frame, text="Desk (Map)" + ) + self.desk_title_label.pack(padx=10, pady=2) + + self.desk_canvas = tk.Canvas( + self.desk_frame, bg="#212121", highlightthickness=0 + ) + self.desk_canvas.pack(fill="both", expand=True) + + self.desk_canvas.bind("", self.on_desk_press) + self.desk_canvas.bind("", self.on_desk_drag) + self.desk_canvas.bind("", self.on_desk_release) + + self.desk_image_item = None + self.desk_pil_image = None + self.desk_tk_image = None + self.pan_start_x = 0 + self.pan_start_y = 0 + self.is_panning = False + self.desk_scale = 2 + + self.screen_update_interval = 1.0 + self.desk_update_interval = 2.0 + + # Right frame for controls + self.right_frame = customtkinter.CTkFrame(self) + self.right_frame.grid(row=0, column=2, padx=10, pady=10, sticky="ns") + + self.control_label = customtkinter.CTkLabel( + self.right_frame, + text="Controls", + font=customtkinter.CTkFont(size=20, weight="bold"), + ) + self.control_label.pack(padx=10, pady=10) + + self.__standard_input_group() + self.__movement_group() + self.__position_and_desk_group() + self.__configuration_group() + self.__extra_group() + + self.position_label = customtkinter.CTkLabel( + self.right_frame, + text="Position: (0, 0)", + font=customtkinter.CTkFont(size=16, weight="bold"), + ) + self.position_label.pack(side="bottom", padx=10, pady=10) + + self.status_label = customtkinter.CTkLabel( + self.right_frame, text="Status: Idle", wraplength=200 + ) + self.status_label.pack(side="bottom", padx=10, pady=10) + + # Start live feed thread + self.stop_event = threading.Event() + self.thread = threading.Thread(target=self.live_feed_loop, daemon=True) + self.thread.start() + + # Start desk update thread + self.desk_thread = threading.Thread(target=self.desk_update_loop, daemon=True) + self.desk_thread.start() + + def __standard_input_group(self): + self.input_frame = customtkinter.CTkFrame(self.right_frame) + self.input_frame.pack(padx=10, pady=10, fill="x") + + self.label_x = customtkinter.CTkLabel(self.input_frame, text="X, dX:") + self.label_x.grid(row=0, column=0, padx=5, pady=5) + self.entry_x = customtkinter.CTkEntry(self.input_frame, width=100) + self.entry_x.grid(row=0, column=1, padx=5, pady=5) + self.entry_x.insert(0, "0") + + self.label_y = customtkinter.CTkLabel(self.input_frame, text="Y, dY:") + self.label_y.grid(row=1, column=0, padx=5, pady=5) + self.entry_y = customtkinter.CTkEntry(self.input_frame, width=100) + self.entry_y.grid(row=1, column=1, padx=5, pady=5) + self.entry_y.insert(0, "0") + + def __movement_group(self): + self.group1_label = customtkinter.CTkLabel( + self.right_frame, + text="Movement", + font=customtkinter.CTkFont(size=14, weight="bold"), + ) + self.group1_label.pack(padx=10, pady=(15, 5)) + + self.btn_goto = customtkinter.CTkButton( + self.right_frame, text="Goto(x, y)", command=self.cmd_goto + ) + self.btn_goto.pack(padx=20, pady=5, fill="x") + + self.btn_goto_action = customtkinter.CTkButton( + self.right_frame, text="Goto For Action(x, y)", command=self.cmd_goto_action + ) + self.btn_goto_action.pack(padx=20, pady=5, fill="x") + + self.btn_move = customtkinter.CTkButton( + self.right_frame, text="Move(dx, dy)", command=self.cmd_move + ) + self.btn_move.pack(padx=20, pady=5, fill="x") + + def __position_and_desk_group(self): + self.group2_label = customtkinter.CTkLabel( + self.right_frame, + text="Position & Desk", + font=customtkinter.CTkFont(size=14, weight="bold"), + ) + self.group2_label.pack(padx=10, pady=(15, 5)) + + self.btn_check_pos = customtkinter.CTkButton( + self.right_frame, + text="Check Position Changed", + command=self.cmd_check_position, + ) + self.btn_check_pos.pack(padx=20, pady=5, fill="x") + + self.btn_set_pos = customtkinter.CTkButton( + self.right_frame, text="Set Position(x, y)", command=self.cmd_set_position + ) + self.btn_set_pos.pack(padx=20, pady=5, fill="x") + + self.btn_collapse = customtkinter.CTkButton( + self.right_frame, text="Collapse Desk", command=self.cmd_collapse_desk + ) + self.btn_collapse.pack(padx=20, pady=5, fill="x") + + self.btn_reset_desk = customtkinter.CTkButton( + self.right_frame, text="Reset Desk", command=self.cmd_reset_desk + ) + self.btn_reset_desk.pack(padx=20, pady=5, fill="x") + + def __configuration_group(self): + self.group3_label = customtkinter.CTkLabel( + self.right_frame, + text="Configuration", + font=customtkinter.CTkFont(size=14, weight="bold"), + ) + self.group3_label.pack(padx=10, pady=(15, 5)) + + self.btn_select_mask = customtkinter.CTkButton( + self.right_frame, text="Select Mask", command=self.cmd_select_mask + ) + self.btn_select_mask.pack(padx=20, pady=5, fill="x") + + self.btn_select_calib = customtkinter.CTkButton( + self.right_frame, + text="Select Calibration", + command=self.cmd_select_calibration, + ) + self.btn_select_calib.pack(padx=20, pady=5, fill="x") + + self.calib_method_var = customtkinter.StringVar(value="linear_regression") + self.calib_dropdown = customtkinter.CTkOptionMenu( + self.right_frame, + values=[ + "linear_regression", + "linear_interpolation", + "ransac_regression", + "huber_regression", + ], + variable=self.calib_method_var, + ) + self.calib_dropdown.pack(padx=20, pady=5, fill="x") + + def __extra_group(self): + self.group4_label = customtkinter.CTkLabel( + self.right_frame, + text="Extra", + font=customtkinter.CTkFont(size=14, weight="bold"), + ) + self.group4_label.pack(padx=10, pady=(15, 5)) + + self.btn_save_desk = customtkinter.CTkButton( + self.right_frame, text="Save Desk", command=self.cmd_save_desk + ) + self.btn_save_desk.pack(padx=20, pady=5, fill="x") + + self.screen_interval_frame = customtkinter.CTkFrame(self.right_frame) + self.screen_interval_frame.pack(padx=10, pady=5, fill="x") + + self.entry_screen_interval = customtkinter.CTkEntry( + self.screen_interval_frame, width=60 + ) + self.entry_screen_interval.pack(side="left", padx=5) + self.entry_screen_interval.insert(0, str(self.screen_update_interval)) + + self.btn_set_screen_interval = customtkinter.CTkButton( + self.screen_interval_frame, + text="Set Screen Interval", + command=self.cmd_set_screen_interval, + ) + self.btn_set_screen_interval.pack(side="left", padx=5, fill="x", expand=True) + + self.desk_interval_frame = customtkinter.CTkFrame(self.right_frame) + self.desk_interval_frame.pack(padx=10, pady=5, fill="x") + + self.entry_desk_interval = customtkinter.CTkEntry( + self.desk_interval_frame, width=60 + ) + self.entry_desk_interval.pack(side="left", padx=5) + self.entry_desk_interval.insert(0, str(self.desk_update_interval)) + + self.btn_set_desk_interval = customtkinter.CTkButton( + self.desk_interval_frame, + text="Set Desk Interval", + command=self.cmd_set_desk_interval, + ) + self.btn_set_desk_interval.pack(side="left", padx=5, fill="x", expand=True) + + def get_inputs(self): + try: + return int(self.entry_x.get()), int(self.entry_y.get()) + except ValueError: + self.status_label.configure(text="Status: Invalid Input") + return None, None + + def cmd_select_mask(self): + filepath = filedialog.askopenfilename( + title="Select a Mask File", + initialdir=os.path.join(MOTION_MODULE, "resources"), + filetypes=(("PNG files", "*.png"), ("All files", "*.*")), + ) + if not filepath: + return + + try: + new_mask = Image.open(filepath).convert("RGBA") + self.motion_module = MotionModule( + ui_mask=new_mask, + swipe_calibrator=self.motion_module.swipe_calibrator, + force_headless=self.motion_module._force_headless, + ) + self.status_label.configure(text="Status: Loaded new mask.") + except Exception as e: + self.status_label.configure(text=f"Status: Error loading mask: {e}") + + def cmd_select_calibration(self): + filepath = filedialog.askopenfilename( + title="Select a Calibration File", + initialdir=os.path.join(MOTION_MODULE, "resources"), + filetypes=(("JSON files", "*.json"), ("All files", "*.*")), + ) + if not filepath: + return + + try: + method = self.calib_method_var.get() + calibrator = SwipeCalibrator(filepath, method=method) + self.motion_module = MotionModule( + ui_mask=self.motion_module.get_mask_copy(), + swipe_calibrator=calibrator, + force_headless=self.motion_module._force_headless, + ) + self.status_label.configure(text=f"Status: Loaded calibration ({method}).") + except Exception as e: + self.status_label.configure(text=f"Status: Error loading calibration: {e}") + + def cmd_save_desk(self): + desk = self.motion_module.get_desk_copy() + if desk is not None: + try: + filepath = filedialog.asksaveasfilename( + title="Save Desk Image", + initialdir=os.path.join(MOTION_MODULE, "resources"), + defaultextension=".png", + filetypes=(("PNG files", "*.png"), ("All files", "*.*")), + ) + if filepath: + Image.fromarray(desk).save(filepath) + self.status_label.configure( + text=f"Status: Desk saved to {os.path.basename(filepath)}" + ) + except Exception as e: + self.status_label.configure(text=f"Status: Error saving desk: {e}") + else: + self.status_label.configure(text="Status: No desk to save") + + def cmd_set_screen_interval(self): + try: + val = float(self.entry_screen_interval.get()) + if val > 0: + self.screen_update_interval = val + self.status_label.configure( + text=f"Status: Screen interval set to {val}s" + ) + else: + self.status_label.configure(text="Status: Interval must be > 0") + except ValueError: + self.status_label.configure(text="Status: Invalid screen interval") + + def cmd_set_desk_interval(self): + try: + val = float(self.entry_desk_interval.get()) + if val > 0: + self.desk_update_interval = val + self.status_label.configure(text=f"Status: Desk interval set to {val}s") + else: + self.status_label.configure(text="Status: Interval must be > 0") + except ValueError: + self.status_label.configure(text="Status: Invalid desk interval") + + def cmd_goto(self): + x, y = self.get_inputs() + if x is not None and y is not None: + self.status_label.configure(text=f"Status: Executing Goto({x}, {y})") + threading.Thread( + target=lambda: self.motion_module.goto((x, y)), daemon=True + ).start() + + def cmd_goto_action(self): + x, y = self.get_inputs() + if x is not None and y is not None: + self.status_label.configure(text=f"Status: Executing GotoAction({x}, {y})") + threading.Thread( + target=lambda: self.motion_module.goto_for_action((x, y)), daemon=True + ).start() + + def cmd_move(self): + dx, dy = self.get_inputs() + if dx is not None and dy is not None: + self.status_label.configure(text=f"Status: Executing Move({dx}, {dy})") + threading.Thread( + target=lambda: self.motion_module.move_with_offset((dx, dy)), + daemon=True, + ).start() + + def cmd_check_position(self): + self.status_label.configure(text="Status: Checking position...") + + def run(): + try: + changed = self.motion_module.check_is_position_changed() + self.after( + 0, + lambda: self.status_label.configure( + text=f"Status: Position Changed? {changed}" + ), + ) + except Exception as e: + err = e # WTF If I do not do this it bugs the error checker + self.after( + 0, lambda: self.status_label.configure(text=f"Status: Error {err}") + ) + + threading.Thread(target=run, daemon=True).start() + + def cmd_set_position(self): + x, y = self.get_inputs() + if x is not None and y is not None: + self.motion_module.set_current_position((x, y)) + self.status_label.configure(text=f"Status: Set Position to ({x}, {y})") + + def cmd_collapse_desk(self): + self.motion_module.collapse_desk() + self.status_label.configure(text="Status: Desk Collapsed") + + def cmd_reset_desk(self): + self.motion_module.clear_desk() + self.status_label.configure(text="Status: Desk Cleared") + self.update_desk() + + def on_desk_press(self, event): + self.pan_start_x = event.x + self.pan_start_y = event.y + self.is_panning = False + + def on_desk_drag(self, event): + dx = event.x - self.pan_start_x + dy = event.y - self.pan_start_y + if abs(dx) > 2 or abs(dy) > 2: + self.is_panning = True + self.desk_canvas.move("all", dx, dy) + self.pan_start_x = event.x + self.pan_start_y = event.y + + def on_desk_release(self, event): + if not self.is_panning: + self.process_desk_click(event) + self.is_panning = False + + def process_desk_click(self, event): + if self.desk_pil_image is None: + return + + try: + assert self.desk_image_item is not None + coords = self.desk_canvas.coords(self.desk_image_item) + img_x_offset = coords[0] + img_y_offset = coords[1] + except Exception: + return + + click_x = int(event.x - img_x_offset) + click_y = int(event.y - img_y_offset) + + w, h = self.desk_pil_image.size + + if click_x < 0 or click_x >= w or click_y < 0 or click_y >= h: + return + + try: + pixel = self.desk_pil_image.getpixel((click_x, click_y)) + if len(pixel) == 4 and pixel[3] == 0: + return + except Exception as e: + print(f"Error checking pixel: {e}") + return + + original_x = click_x * self.desk_scale + original_y = click_y * self.desk_scale + + print(f"Desk Click: {original_x}, {original_y}") + + desk_x = original_x + desk_y = original_y + + pos_x, pos_y = self.motion_module.desk_to_position((desk_x, desk_y)) + self.status_label.configure( + text=f"Desk: ({desk_x}, {desk_y}) | Pos: ({pos_x}, {pos_y})" + ) + + if event.state & 1: + self.entry_x.delete(0, "end") + self.entry_x.insert(0, str(pos_x)) + self.entry_y.delete(0, "end") + self.entry_y.insert(0, str(pos_y)) + + def on_screen_click(self, event): + if not hasattr(self.image_label, "_original_size") or not hasattr( + self.image_label, "_display_size" + ): + return + + widget_w = self.image_label.winfo_width() + widget_h = self.image_label.winfo_height() + + img_w, img_h = self.image_label._display_size + orig_w, orig_h = self.image_label._original_size + + pad_x = (widget_w - img_w) // 2 + pad_y = (widget_h - img_h) // 2 + + x_on_img = event.x - pad_x + y_on_img = event.y - pad_y + + if x_on_img < 0 or x_on_img >= img_w or y_on_img < 0 or y_on_img >= img_h: + return + + scale_x = orig_w / img_w + scale_y = orig_h / img_h + + screen_x = int(x_on_img * scale_x) + screen_y = int(y_on_img * scale_y) + + current_pos = self.motion_module.position() + pos_x = screen_x + current_pos[0] + pos_y = screen_y + current_pos[1] + + desk_coord = self.motion_module.position_to_desk((pos_x, pos_y)) + + print( + f"Debug Screen Click: widget={widget_w}x{widget_h} img={img_w}x{img_h} event={event.x},{event.y} calc={x_on_img},{y_on_img}" + ) + self.status_label.configure( + text=f"Desk: {desk_coord} | Screen: ({screen_x}, {screen_y}) | Pos: ({pos_x}, {pos_y})" + ) + + # Populate inputs on Shift+Click + if event.state & 1: + self.entry_x.delete(0, "end") + self.entry_x.insert(0, str(pos_x)) + self.entry_y.delete(0, "end") + self.entry_y.insert(0, str(pos_y)) + + def update_image(self): + screenshot = get_screenshot(to_memory=True) + if screenshot is not None and not isinstance(screenshot, bool): + try: + img = Image.fromarray(screenshot).convert("RGBA") + self.after(0, lambda: self._display_screenshot(img)) + except Exception as e: + print(f"Error processing image: {e}") + + def _display_screenshot(self, img): + try: + target_h = 600 + img_w, img_h = img.size + + ratio = target_h / img_h + target_w = int(img_w * ratio) + + ctk_image = customtkinter.CTkImage( + light_image=img, dark_image=img, size=(target_w, target_h) + ) + self.image_label.configure(image=ctk_image) + self.image_label._image_ref = ctk_image + self.image_label._display_size = (target_w, target_h) + self.image_label._original_size = img.size + except Exception as e: + print(f"Error displaying screenshot: {e}") + + def update_desk(self): + desk = self.motion_module.get_desk_copy() + if desk is not None: + try: + img = Image.fromarray(desk).convert("RGBA") + self.after(0, lambda: self._display_desk(img)) + except Exception as e: + print(f"Error processing desk: {e}") + + def _display_desk(self, img): + try: + new_width = img.width // self.desk_scale + new_height = img.height // self.desk_scale + scaled_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS) + + self.desk_pil_image = scaled_img + self.desk_tk_image = ImageTk.PhotoImage(scaled_img) + + if self.desk_image_item is None: + cw = self.desk_canvas.winfo_width() + ch = self.desk_canvas.winfo_height() + + x = (cw - scaled_img.width) // 2 + y = (ch - scaled_img.height) // 2 + self.desk_image_item = self.desk_canvas.create_image( + x, y, image=self.desk_tk_image, anchor="nw" + ) + else: + self.desk_canvas.itemconfig( + self.desk_image_item, image=self.desk_tk_image + ) + except Exception as e: + print(f"Error displaying desk: {e}") + + def update_position_label(self): + try: + pos = self.motion_module.position() + self.position_label.configure(text=f"Position: {pos}") + except: + pass + + def live_feed_loop(self): + while not self.stop_event.is_set(): + self.update_image() + self.after(0, self.update_position_label) + time.sleep(self.screen_update_interval) + + def desk_update_loop(self): + while not self.stop_event.is_set(): + self.update_desk() + time.sleep(self.desk_update_interval) + + def on_closing(self): + self.stop_event.set() + self.destroy() + + +if __name__ == "__main__": + app = App() + app.protocol("WM_DELETE_WINDOW", app.on_closing) + app.mainloop() diff --git a/Brain/AI/src/motion_module/enums.py b/Brain/AI/src/motion_module/enums.py new file mode 100644 index 00000000..65dc1fe9 --- /dev/null +++ b/Brain/AI/src/motion_module/enums.py @@ -0,0 +1,24 @@ +from enum import Enum + + +class Direction(Enum): + HORIZONTAL = 0 + VERTICAL = 1 + + +class Orientation(Enum): + DESCENDING = 0 + ASCENDING = 1 + + +class MotionType(Enum): + SWIPE = 0 + TAP = 1 + TIMED = 2 + + +class Towards(Enum): + TOP = 0 + RIGHT = 1 + BOTTOM = 2 + LEFT = 3 diff --git a/Brain/AI/src/motion_module/gesture_utils/gesture_detector.bash b/Brain/AI/src/motion_module/gesture_utils/gesture_detector.bash new file mode 100755 index 00000000..8d7b4e16 --- /dev/null +++ b/Brain/AI/src/motion_module/gesture_utils/gesture_detector.bash @@ -0,0 +1,14 @@ +#!/usr/bin/bash +#!ho usato perl... sono molto depresso dalle mie azioni... +adb exec-out getevent -lt | perl -ne ' + if (/\[\s*([\d.]+)\].*ABS_MT_POSITION_X\s+([0-9a-f]+)/) { + $t=$1; $x=hex($2); + } + if (/\[\s*([\d.]+)\].*ABS_MT_POSITION_Y\s+([0-9a-f]+)/) { + $t=$1; $y=hex($2); + if (defined $x) { + printf("[%s] X: %d, Y: %d\n", $t, $x, $y); + undef $x; # Reset for next touch point + } + }' +#!Spero non ci abbiate creduto, lho fatto scrivere a una AI a caso... col ***** che uso perl! diff --git a/Brain/AI/src/motion_module/gesture_utils/gesture_detector.fish b/Brain/AI/src/motion_module/gesture_utils/gesture_detector.fish new file mode 100755 index 00000000..428d85e5 --- /dev/null +++ b/Brain/AI/src/motion_module/gesture_utils/gesture_detector.fish @@ -0,0 +1,14 @@ +#!/usr/bin/fish +#!ho usato perl... sono molto depresso dalle mie azioni... +adb exec-out getevent -lt | perl -ne ' + if (/\[\s*([\d.]+)\].*ABS_MT_POSITION_X\s+([0-9a-f]+)/) { + $t=$1; $x=hex($2); + } + if (/\[\s*([\d.]+)\].*ABS_MT_POSITION_Y\s+([0-9a-f]+)/) { + $t=$1; $y=hex($2); + if (defined $x) { + printf("[%s] X: %d, Y: %d\n", $t, $x, $y); + undef $x; # Reset for next touch point + } + }' +#!Spero non ci abbiate creduto, lho fatto scrivere a una AI a caso... col ***** che uso perl! diff --git a/Brain/AI/src/motion_module/gesture_utils/gesture_tracker.py b/Brain/AI/src/motion_module/gesture_utils/gesture_tracker.py new file mode 100644 index 00000000..5525e1f4 --- /dev/null +++ b/Brain/AI/src/motion_module/gesture_utils/gesture_tracker.py @@ -0,0 +1,221 @@ +import math +import queue +import re +import subprocess +import threading +import time +from contextlib import AbstractContextManager +from dataclasses import dataclass, field +from typing import List, Tuple + +from AI.src.constants import logger +from AI.src.motion_module.gesture_utils.processed_phone_screen_data import ( + extract_phone_screen_info, +) + + +@dataclass +class Gesture: + start_time: float + end_time: float + start_x: int | float + start_y: int | float + end_x: int | float + end_y: int | float + points: List[Tuple[int | float, int | float]] = field(default_factory=list) + is_finished: bool = True + + def __str__(self): + dx = self.end_x - self.start_x + dy = self.end_y - self.start_y + dist = math.hypot(dx, dy) + duration = (self.end_time - self.start_time) * 1000 + + points_str = f"{self.points}" + if len(self.points) > 10: + points_str = ( + f"[{self.points[0]}, ..., {self.points[-1]}] ({len(self.points)} items)" + ) + + return ( + f"[⌚] {time.ctime(self.start_time)}\n" + f"[⏱️] Duration: \t{duration:.0f} ms\n" + f"[🏁] Start: \t[{self.start_x}, {self.start_y}]\n" + f"[🏅] Finish:\t[{self.end_x}, {self.end_y}]\n" + f"[📍] Points:\t{points_str}\n" + f"[📐] Measured:\t[{dx}, {dy}] Dist: {dist:.2f}" + ) + + +class GestureTracker(threading.Thread, AbstractContextManager): + """ + A class that tracks what happen on the connected device. + + Parameters: + output_queue: a queue to put the gesture objects into. + live_feed: if True, will also put live gestures into the output queue. + """ + + def __init__( + self, output_queue: queue.Queue = queue.Queue(), live_feed: bool = False + ): + super().__init__(daemon=True) + self.output_queue = output_queue + self.live_feed = live_feed + self.__stop_event = threading.Event() + self.__process = None + + self.__re_tracking = re.compile(r"ABS_MT_TRACKING_ID\s+([0-9a-f]+)") + self.__re_btn = re.compile(r"BTN_TOUCH\s+(DOWN|UP)") + self.__re_x = re.compile(r"ABS_MT_POSITION_X\s+([0-9a-f]+)") + self.__re_y = re.compile(r"ABS_MT_POSITION_Y\s+([0-9a-f]+)") + + self.__data_x, self.__data_y = extract_phone_screen_info() + self.__scale_x: float = self.__data_x[2] + self.__scale_y: float = self.__data_y[2] + + def __enter__(self): + self.start() + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.stop() + self.join() + + def run(self): + cmd = ["adb", "exec-out", "getevent -lt"] + try: + self.__process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + stdin=subprocess.DEVNULL, + ) + + active_touch = False + curr_x, curr_y = None, None + last_valid_x, last_valid_y = 0, 0 + gesture_points = [] + start_ts = 0.0 + + assert self.__process.stdout is not None + for raw_line in iter(self.__process.stdout.readline, b""): + if self.__stop_event.is_set(): + break + + line = raw_line.decode("utf-8", errors="ignore").strip() + if not line: + continue + + now = time.time() + update_coords = False + + match_x = self.__re_x.search(line) + if match_x: + curr_x = int(match_x.group(1), 16) // self.__scale_x + last_valid_x = curr_x + update_coords = True + + match_y = self.__re_y.search(line) + if match_y: + curr_y = int(match_y.group(1), 16) // self.__scale_y + last_valid_y = curr_y + update_coords = True + + if active_touch and update_coords: + if not gesture_points or gesture_points[-1] != ( + last_valid_x, + last_valid_y, + ): + gesture_points.append((last_valid_x, last_valid_y)) + + if self.live_feed: + s_x, s_y = gesture_points[0] + live_g = Gesture( + start_time=start_ts, + end_time=now, + start_x=s_x, + start_y=s_y, + end_x=last_valid_x, + end_y=last_valid_y, + points=list(gesture_points), + is_finished=False, + ) + self.output_queue.put(live_g) + + is_start, is_end = False, False + match_track = self.__re_tracking.search(line) + if match_track: + if match_track.group(1) != "ffffffff": + is_start = True + else: + is_end = True + + match_btn = self.__re_btn.search(line) + if match_btn: + if match_btn.group(1) == "DOWN": + is_start = True + elif match_btn.group(1) == "UP": + is_end = True + + if is_start and not active_touch: + active_touch = True + start_ts = now + gesture_points = [(last_valid_x, last_valid_y)] + + elif is_end and active_touch: + active_touch = False + if not gesture_points: + gesture_points.append((last_valid_x, last_valid_y)) + + if gesture_points: + s_x, s_y = gesture_points[0] + e_x, e_y = gesture_points[-1] + + g = Gesture( + start_time=start_ts, + end_time=now, + start_x=s_x, + start_y=s_y, + end_x=e_x, + end_y=e_y, + points=list(gesture_points), + is_finished=True, + ) + self.output_queue.put(g) + curr_x, curr_y = None, None + + except Exception as e: + pass + finally: + self.stop() + + def stop(self): + self.__stop_event.set() + if self.__process: + try: + self.__process.terminate() + except Exception: + pass + + +if __name__ == "__main__": + gesture_queue = queue.Queue() + + print("🤖 Tracker Active... (Press Ctrl+C to stop)") + tracker = GestureTracker(gesture_queue) + tracker.start() + + try: + while True: + try: + gesture = gesture_queue.get(timeout=0.1) + logger.info(f"\n{gesture}") + logger.info("-" * 40) + except queue.Empty: + continue + + except KeyboardInterrupt: + logger.info("\nExiting...") + tracker.stop() + tracker.join() diff --git a/Brain/AI/src/motion_module/gesture_utils/live_gesture_plotter.py b/Brain/AI/src/motion_module/gesture_utils/live_gesture_plotter.py new file mode 100644 index 00000000..f57b536c --- /dev/null +++ b/Brain/AI/src/motion_module/gesture_utils/live_gesture_plotter.py @@ -0,0 +1,277 @@ +import argparse +import queue +import threading +import time +from collections import deque +from typing import Union + +import matplotlib.pyplot as plt +from AI.src.constants import logger +from AI.src.motion_module.gesture_utils.gesture_tracker import Gesture, GestureTracker +from AI.src.motion_module.gesture_utils.processed_phone_screen_data import ( + extract_phone_screen_info, +) +from AI.src.webservices.helpers import get_screenshot + + +class LiveGesturePlotter: + """ + Will display in graphical form what's happening on the device. + + Parameters: + gesture_queue: a queue that will be populated with Gesture objects + max_history: maximum number of past gestures to keep + base_alpha: if set to a float, it will be used as the base alpha for all gestures. + log_console: if True, prints gesture details to the console + live_screen: if True, overlays the gestures on top of a "live" ADB screen mirror + width: width of the plot + height: height of the plot + """ + + def __init__( + self, + gesture_queue: queue.Queue, + max_history: int = 8, + base_alpha: Union[bool, float] = False, + log_console: bool = False, + live_screen: bool = False, + width: int = 1080, + height: int = 2400, + ): + self.queue = gesture_queue + self.width = width + self.height = height + self.base_alpha = base_alpha + self.log_console = log_console + self.live_screen = live_screen + + self.history = deque(maxlen=max_history) + self.permanent_gestures = [] + + self.latest_finished_gesture = None + self.active_live_gesture = None + + self.latest_frame = None + self.last_frame_drawn = None + self.screen_thread_running = False + + def _screen_updater(self): + """ + Background thread fetching fast NumPy arrays directly into memory. + """ + while self.screen_thread_running: + try: + img_array = get_screenshot(to_memory=True) + + if img_array is not None: + self.latest_frame = img_array + + except Exception as e: + logger.error(f"Screen updater error: {e}") + time.sleep(0.5) + + def run(self): + logger.info("📊 Starting Live Plotter...") + plt.ion() + self.fig, self.ax = plt.subplots(figsize=(5, 10)) + self.fig.canvas.manager.set_window_title("Live ADB Gestures") + self._setup_axes() + + if self.live_screen: + logger.info("🎥 Starting live screen capture...") + self.screen_thread_running = True + threading.Thread(target=self._screen_updater, daemon=True).start() + + plt.show() + + try: + while plt.fignum_exists(self.fig.number): + needs_redraw = False + + while not self.queue.empty(): + try: + new_gesture = self.queue.get_nowait() + needs_redraw = True + + if new_gesture.is_finished: + if self.base_alpha is not False: + if self.latest_finished_gesture is not None: + self.permanent_gestures.append( + self.latest_finished_gesture + ) + self.latest_finished_gesture = new_gesture + else: + self.history.append(new_gesture) + + self.active_live_gesture = None + if self.log_console: + print(f"\n{new_gesture}\n" + "-" * 40) + else: + self.active_live_gesture = new_gesture + except queue.Empty: + break + + if self.live_screen and self.latest_frame is not None: + needs_redraw = True + + if needs_redraw: + self._update_plot() + + self.fig.canvas.flush_events() + time.sleep(0.016) + + except KeyboardInterrupt: + pass + finally: + self.screen_thread_running = False + plt.ioff() + plt.close() + + def _setup_axes(self): + self.ax.clear() + self.ax.set_xlim(0, self.width) + self.ax.set_ylim(self.height, 0) + self.ax.set_title("Live Gesture Feed") + self.ax.set_facecolor("#1e1e1e") + self.fig.patch.set_facecolor("#121212") + self.ax.tick_params(colors="white") + self.ax.title.set_color("white") + + def _draw_gesture(self, gesture: Gesture, alpha: float): + """ + Draws a gesture, breaking the line if a phantom jump is detected. + + Parameters: + gesture: The gesture to draw. + alpha: The transparency of the gesture. + """ + dx = gesture.end_x - gesture.start_x + dy = gesture.end_y - gesture.start_y + is_tap = (dx**2 + dy**2) ** 0.5 < 50 + + if is_tap: + self.ax.scatter( + gesture.start_x, + gesture.start_y, + color="cyan", + s=100, + alpha=alpha, + edgecolors="white" if alpha > 0.5 else "none", + ) + else: + if gesture.points: + xs = [] + ys = [] + + xs.append(gesture.points[0][0]) + ys.append(gesture.points[0][1]) + + for i in range(1, len(gesture.points)): + # The commented part was an attempt to sanitize the data + # By removing unrealistic tap + # prev_x, prev_y = gesture.points[i - 1] + curr_x, curr_y = gesture.points[i] + + # dist = ((curr_x - prev_x) ** 2 + (curr_y - prev_y) ** 2) ** 0.5 + # if dist > 150: + # xs.append(float("nan")) + # ys.append(float("nan")) + + xs.append(curr_x) + ys.append(curr_y) + + self.ax.plot(xs, ys, color="springgreen", linewidth=3, alpha=alpha) + + last_x, last_y = gesture.points[-1] + self.ax.scatter(last_x, last_y, color="red", s=40, alpha=alpha) + + def _update_plot(self): + self._setup_axes() + + if self.live_screen: + frame_to_draw = ( + self.latest_frame + if self.latest_frame is not None + else self.last_frame_drawn + ) + if frame_to_draw is not None: + self.ax.imshow( + frame_to_draw, + extent=[0, self.width, self.height, 0], + aspect="auto", + zorder=-1, + ) + if self.latest_frame is not None: + self.last_frame_drawn = self.latest_frame + self.latest_frame = None + + if self.base_alpha is not False: + for gesture in self.permanent_gestures: + self._draw_gesture(gesture, alpha=float(self.base_alpha)) + if self.latest_finished_gesture and not self.active_live_gesture: + self._draw_gesture(self.latest_finished_gesture, alpha=1.0) + else: + total = len(self.history) + for i, gesture in enumerate(self.history): + alpha = (i + 1) / total + self._draw_gesture(gesture, alpha) + + if self.active_live_gesture: + self._draw_gesture(self.active_live_gesture, alpha=1.0) + + self.fig.canvas.draw_idle() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Track and plot Android touches in real-time." + ) + parser.add_argument( + "--subplots", type=int, default=8, help="Number of past gestures to keep" + ) + parser.add_argument( + "--keep-subplots", + nargs="?", + type=float, + const=0.15, + default=False, + help="Keep infinite history. Defaults to 0.15 opacity", + ) + parser.add_argument( + "--live-draw", + action="store_true", + help="Stream gestures as they happen", + ) + parser.add_argument( + "--live-screen", + action="store_true", + help="Overlay the gestures on top of a live ADB screen mirror", + ) + parser.add_argument( + "--log-to-console", action="store_true", help="Print gesture details" + ) + args = parser.parse_args() + + data_x, data_y = extract_phone_screen_info() + dev_width = int(data_x[1]) + dev_height = int(data_y[1]) + gesture_queue = queue.Queue() + + tracker = GestureTracker(gesture_queue, live_feed=args.live_draw) + tracker.start() + + plotter = LiveGesturePlotter( + gesture_queue=gesture_queue, + max_history=args.subplots, + base_alpha=args.keep_subplots, + log_console=args.log_to_console, + live_screen=args.live_screen, + width=dev_width, + height=dev_height, + ) + + try: + plotter.run() + finally: + tracker.stop() + tracker.join() diff --git a/Brain/AI/src/motion_module/gesture_utils/processed_phone_screen_data.py b/Brain/AI/src/motion_module/gesture_utils/processed_phone_screen_data.py new file mode 100644 index 00000000..f195a671 --- /dev/null +++ b/Brain/AI/src/motion_module/gesture_utils/processed_phone_screen_data.py @@ -0,0 +1,39 @@ +import re +import subprocess + + +def extract_phone_screen_info() -> tuple[ + tuple[float, float, float], tuple[float, float, float] +]: + """ + Parses the output of `adb shell dumpsys input` to compare raw and logical touch input dimensions. + + Returns: a tuple of (raw_x, logical_x, ratio_x) and (raw_y, logical_y, ratio_y). + Returns: a tuple of (raw_x, logical_x, ratio_x) and (raw_y, logical_y, ratio_y). + """ + data = subprocess.check_output(["adb", "shell", "dumpsys", "input"]).decode() + + logical_x = re.search(r"X:.*?max=([\d.]+)", data) + logical_y = re.search(r"Y:.*?max=([\d.]+)", data) + + raw_x = re.search(r"Touch Input Mapper.*?X:.*?max=(\d+)", data, re.DOTALL) + raw_y = re.search(r"Touch Input Mapper.*?Y:.*?max=(\d+)", data, re.DOTALL) + + assert logical_x and logical_y and raw_x and raw_y, ( + "Error: Could not find all X/Y pairs in the input." + ) + + lx, ly = ( + round(float(logical_x.group(1)) + 0.01), + round(float(logical_y.group(1)) + 0.01), + ) + rx, ry = int(raw_x.group(1)), int(raw_y.group(1)) + + return (rx, lx, round(rx / lx, 2)), (ry, ly, round(ry / ly, 2)) + + +if __name__ == "__main__": + print(f"{'Axis':<10} | {'Raw':<10} | {'Pixel':<15} | {'Ratio'}") + print("-" * 55) + for axis, (raw, logical, ratio) in zip(["X", "Y"], extract_phone_screen_info()): + print(f"{axis:<10} | {raw:<10} | {logical:<15} | {ratio}x") diff --git a/Brain/AI/src/motion_module/gesture_utils/raw_phone_data.bash b/Brain/AI/src/motion_module/gesture_utils/raw_phone_data.bash new file mode 100755 index 00000000..b1cc8e4e --- /dev/null +++ b/Brain/AI/src/motion_module/gesture_utils/raw_phone_data.bash @@ -0,0 +1,3 @@ +#!/bin/bash + +adb shell dumpsys input | grep -E "Touch Input Mapper|X:|Y:" diff --git a/Brain/AI/src/motion_module/gesture_utils/simple_gesture_capture.py b/Brain/AI/src/motion_module/gesture_utils/simple_gesture_capture.py new file mode 100644 index 00000000..83022b72 --- /dev/null +++ b/Brain/AI/src/motion_module/gesture_utils/simple_gesture_capture.py @@ -0,0 +1,83 @@ +import re +import subprocess +from datetime import datetime +from tkinter.constants import N + +from AI.src.constants import logger +from processed_phone_screen_data import extract_phone_screen_info + + +def get_wall_clock(): + return datetime.now().strftime("%H:%M:%S.%f")[:-3] + + +def run_monitor(): + curr_x = None + curr_y = None + active_touch = False + scale_x, scale_y = extract_phone_screen_info() + scale_x = scale_x[2] + scale_y = scale_y[2] + + logger.info("Listening for ordered touch events... (Ctrl+C to stop)") + + cmd = ["adb", "exec-out", "getevent", "-lt"] + try: + process = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) + except FileNotFoundError: + logger.error("adb not found. Please install it and try again.") + raise FileNotFoundError + + # Regex patterns for speed + re_x = re.compile(r"ABS_MT_POSITION_X\s+([0-9a-f]+)") + re_y = re.compile(r"ABS_MT_POSITION_Y\s+([0-9a-f]+)") + re_id = re.compile(r"ABS_MT_TRACKING_ID\s+([0-9a-f]+)") + re_btn = re.compile(r"BTN_TOUCH\s+(DOWN|UP)") + + try: + for line in process.stdout: + time_str = get_wall_clock() + + match_x = re_x.search(line) + if match_x: + curr_x = int(match_x.group(1), 16) / scale_x + + match_y = re_y.search(line) + if match_y: + curr_y = int(match_y.group(1), 16) / scale_y + + match_id = re_id.search(line) + match_btn = re_btn.search(line) + + is_start = (match_id and match_id.group(1) != "ffffffff") or ( + match_btn and match_btn.group(1) == "DOWN" + ) + + is_end = (match_id and match_id.group(1) == "ffffffff") or ( + match_btn and match_btn.group(1) == "UP" + ) + + if is_start and not active_touch: + print(f"[{time_str}] 🟢 START TAP") + active_touch = True + + if active_touch and curr_x is not None and curr_y is not None: + print(f"[{time_str}] X:{round(curr_x):-4d} Y:{round(curr_y):-4d}") + curr_x, curr_y = None, None + + if is_end and active_touch: + print(f"[{time_str}] 🔴 END TAP") + print("-" * 48) + active_touch = False + curr_x = curr_y = None + + except KeyboardInterrupt: + print("\nStopping...") + finally: + process.terminate() + + +if __name__ == "__main__": + run_monitor() diff --git a/Brain/AI/src/motion_module/motion_module.py b/Brain/AI/src/motion_module/motion_module.py new file mode 100644 index 00000000..1fddf42d --- /dev/null +++ b/Brain/AI/src/motion_module/motion_module.py @@ -0,0 +1,651 @@ +import threading +from collections import Counter + +# External libraries +import mahotas +import numpy as np +from PIL import Image, ImageDraw + +# Internal modules +from AI.src.constants import logger +from AI.src.motion_module.enums import MotionType, Towards +from AI.src.motion_module.swipe_calibrator import SwipeCalibrator +from AI.src.motion_module.utils.image_processing_utility import ( + apply_mask_make_transparent, + calculate_offset, + to_int32, +) +from AI.src.webservices.helpers import get_screenshot, swipe + + +class Worker(threading.Thread): + def __init__(self, target, args=()): + super().__init__() + self.target = target + self.args = args + + def run(self): + self.target(*self.args) + + +class MotionModule: + """ + Parameters: + ui_mask: an image or a ndArray representing the mask of the UI you want to interact with. + swipe_calibrator: Used to correct some error of the pointing device, you can omit it. + The swipe calibrator (if provided) need to be already trained. + motion_type: the type of motion you want to use, default is swipe. + force_headless: if True the module will use adb for movements. + start_position: where you start (will also expand the desk to fit your position) + How to use: + - To properly use the motion module you should implement in you application a set of actions + - When the motion module moves it fills the desk with the (screenshot - the mask) at your position + - When you want to scan it with cv2 or other method you can request the desk with 'get_desk_copy' or 'get_pil_desk' + - If you want to clear the whole map and scan it from scratch you can call 'clear_desk' + - If your position no longer matches the one accounted for by the module you can reset it with 'set_current_position' but you may want to 'collapse_desk' after + - When you want to (tap 1000, 1000) do not go to (1000, 1000) use 'goto_for_action' and execute the action at the returned value + - If you use the desk to campionate for the position where to tap you should call 'desk_to_position' and use those coordinated for the movement + because the desk (0,0) does not match the position (0,0) if you moved backwards + """ + + def __init__( + self, + ui_mask: Image.Image | np.ndarray, + swipe_calibrator: SwipeCalibrator, + motion_type: MotionType = MotionType.SWIPE, + image_history_size: int = 10, + force_headless: bool = False, + start_position: tuple[int, int] = (0, 0), + ): + self._ui_mask: np.ndarray + if isinstance(ui_mask, Image.Image): + self._ui_mask = to_int32(ui_mask) + else: + self._ui_mask = ui_mask + + height, width = self._ui_mask.shape[:2] + # array RGBA trasparente (0 = trasparente) + self._desk = np.zeros((height, width, 4), dtype=np.uint8) + self._desk_offset = start_position + self._expand_desk(start_position, (width, height)) + self.swipe_calibrator = swipe_calibrator + self._position: tuple[int, int] = start_position + self._positions_history: list[tuple[int, int]] = [] + self._frames_history: list[np.ndarray] = [] + self._frame_history_size: int = image_history_size + self._motion_type: MotionType = motion_type + self._motion_area: tuple[tuple[int, int], tuple[int, int]] | None = None + if motion_type == MotionType.SWIPE: + self.calculate_motion_area() + self._force_headless = force_headless + + # TODO: add a way to auto-train the swipe calibrator + # TODO: add confidence threshold to have some kind of security net for wrong movement, and a correction for them + # TODO: a function to check and explore the map boundaries + + @staticmethod + def _angle_to_offset(distance: float, angle: float): + return np.cos(angle) * distance, np.sin(angle) * distance + + def _expand_desk(self, coordinates: tuple[int, int], size: tuple[int, int]): + """ + Expands the desk to include the area defined by the coordinates (x, y) and the dimensions (width, height). + Parameters: + coordinates: x, y of the top-left corner of the area. + size: width, height of the area. + """ + current_pos = np.array(self._desk_offset) + # shape is (h, w) but we all know that (w, h) is superior so flip it + current_size = np.array(self._desk.shape[:2][::-1]) + + new_pos = np.array(coordinates) + new_size = np.array(size) + + min_coords = np.minimum(current_pos, new_pos) + max_coords = np.maximum(current_pos + current_size, new_pos + new_size) + + offset = current_pos - min_coords + + if np.any(offset > 0) or np.any(max_coords > current_pos + current_size): + new_dims = max_coords - min_coords + new_w, new_h = new_dims + new_desk = np.zeros((new_h, new_w, 4), dtype=np.uint8) + + # For readability... if you prefer replace everything with [0] and [1] and talk to me later + offset_x, offset_y = offset + current_w, current_h = current_size + + new_desk[ + offset_y : offset_y + current_h, offset_x : offset_x + current_w + ] = self._desk + + self._desk = new_desk + self._desk_offset = tuple(min_coords) + + def _add_frame_to_desk(self, frame: np.ndarray, coordinates: tuple[int, int]): + """ + Paste a frame on the desk, ignoring irrelevant areas. + Parameters: + frame: the frame to paste, as a numpy array. + coordinates: the coordinates of the top-left corner of the frame, in pixels (x, y). + """ + masked_frame = apply_mask_make_transparent(frame, self._ui_mask).astype( + np.uint8 + ) + w, h = masked_frame.shape[:2][::-1] + x, y = coordinates + + self._expand_desk((x, y), (w, h)) + + desk_x = x - self._desk_offset[0] + desk_y = y - self._desk_offset[1] + + mask = masked_frame[:, :, 3] > 0 + self._desk[desk_y : desk_y + h, desk_x : desk_x + w][mask] = masked_frame[mask] + + def _add_last_frame_to_desk(self): + """ + Paste the last frame from the history on the desk. + """ + if not self._frames_history: + return + self._add_frame_to_desk(self._frames_history[-1], coordinates=self._position) + + def _majority_offset_calculation( + self, f1: np.ndarray | None = None, f2: np.ndarray | None = None + ): + """ + Calculates the offset between the last two frames in the history, using the UI mask to ignore irrelevant areas. + This variant execute different algorithm and takes the best / most common result. + + Parameters: + f1 (np.ndarray): First frame to compare. If None, uses the second-to-last frame in the history. + f2 (np.ndarray): Second frame to compare. If None, uses the last frame in the history. + """ + # TODO: Could make this multithreaded + results = [] + + if f1 is None: + f1 = self._frames_history[-2] + + if f2 is None: + f2 = self._frames_history[-1] + + for i in range(3): + results.append( + calculate_offset( + f1, + f2, + self._ui_mask, + used_detector=i, + ) + ) + + votes = [(round(r[0]), round(r[1])) for r in results] + most_common = Counter(votes).most_common(1)[0][0] + dx, dy, confidence = max( + [r for r in results if (round(r[0]), round(r[1])) == most_common], + key=lambda x: x[2], + ) + logger.debug(f"dx = {dx}, dy = {dy}, confidence = {confidence}") + return dx, dy, confidence + + def _calculate_offset(self, detector: int = 1) -> tuple[float, float, float]: + """ + Calculates the offset between the last two frames in the history, using the UI mask to ignore irrelevant areas. + """ + dx, dy, confidence = calculate_offset( + self._frames_history[-2], + self._frames_history[-1], + self._ui_mask, + used_detector=detector, + ) + logger.debug(f"dx = {dx}, dy = {dy}, confidence = {confidence}") + return dx, dy, confidence + + def calculate_motion_area(self, borders: int = 10): + """ + Parameters: + borders: the distance we want to maintain from the border of the mask + https://www.geeksforgeeks.org/dsa/largest-rectangular-area-in-a-histogram-using-stack/ + """ + + # Could be made protected/private + # TODO: Could optimize the motion area for vertical and horizontal movement instead of "biggest one" + + if self._ui_mask.ndim == 3: + binary_mask = self._ui_mask[:, :, 3] != 0 + else: + binary_mask = self._ui_mask != 0 + + distance_map = mahotas.distance(binary_mask) + valid_region = distance_map > borders + + rows, cols = valid_region.shape + heights = np.zeros(cols, dtype=np.int32) + max_area = 0 + best_rect = ((0, 0), (0, 0)) + + for r in range(rows): + heights = np.where(valid_region[r], heights + 1, 0) + stack = [-1] + for c in range(cols + 1): + h = heights[c] if c < cols else 0 + while stack[-1] != -1 and heights[stack[-1]] >= h: + height = heights[stack.pop()] + width = c - stack[-1] - 1 + area = height * width + if area > max_area: + max_area = area + best_rect = ((stack[-1] + 1, r - height + 1), (c, r + 1)) + stack.append(c) + + self._motion_area = best_rect + + def _offset_to_swipe( + self, offset: tuple[int, int] + ) -> tuple[int, int, int, int] | None: + """ + Converts a desired offset into swipe coordinates based on the motion area. + Parameters: + offset: the desired offset, in pixels (x, y). + Returns: + start_x, start_y, end_x, end_y || None if the offset is out of bounds. + """ + if self._motion_area is None: + self.calculate_motion_area() + assert self._motion_area is not None, "Motion area calculation failed." + + (min_x, min_y), (max_x, max_y) = self._motion_area + + area_width = max_x - min_x + area_height = max_y - min_y + + target_x, target_y = offset + + if abs(target_x) > area_width or abs(target_y) > area_height: + logger.info( + f"Desired offset {offset} is out of bounds for the motion area." + ) + return None + + # Center the swipe vector in the available space (kinda works) + # start_x = (min_x + max_x - target_x) // 2 # <- +x ; -> -x + start_x = (min_x + max_x + target_x) // 2 # <- -x ; -> +x + start_y = (min_y + max_y + target_y) // 2 + + # end_x = start_x + target_x # <- +x ; -> -x + end_x = start_x - target_x # <- -x ; -> +x + end_y = start_y - target_y + + return int(start_x), int(start_y), int(end_x), int(end_y) + + def _clamp_frame_history(self, keep: int | None = None): + if keep is None: + keep = self._frame_history_size + self._frames_history = self._frames_history[-keep:] + + def _ensure_history(self): + """ + Make sure there are at least one frame in the history, so we can calculate the offset. + """ + if len(self._frames_history) < 2: + self._positions_history.append(self._position) + screenshot = get_screenshot(to_memory=True) + if screenshot is not None and not isinstance(screenshot, bool): + self._frames_history.append(screenshot) + self._add_last_frame_to_desk() + else: + logger.warning("Failed to get screenshot for history") + + @staticmethod + def distance(point_a: tuple[int, int], point_b: tuple[int, int]) -> float: + return ((point_a[0] - point_b[0]) ** 2 + (point_a[1] - point_b[1]) ** 2) ** 0.5 + + def move_with_offset(self, offset: tuple[int, int]) -> tuple[float, float] | None: + """ + Tries to move the map ONCE by swiping + Parameters: + offset: int(x), int(y), how much you want to move, DO NOT pre-calibrate these values + Returns: + offset_x, offset_y || None if something went wrong + """ + self._ensure_history() + command = self.swipe_calibrator.get_calibrated_command(*offset) + command = int(command[0]), int(command[1]) + swipe_cmd = self._offset_to_swipe(command) + if swipe_cmd is None: + # Note: here you should NOT retry with multiple swipes command, the move command should only do ONE action + return None + + swipe(*swipe_cmd) + + screenshot = get_screenshot(to_memory=True) + if screenshot is not None and not isinstance(screenshot, bool): + self._frames_history.append(screenshot) + # dx, dy, confidence = self._calculate_offset() + dx, dy, confidence = self._majority_offset_calculation() + self._position = (self._position[0] - int(dx), self._position[1] - int(dy)) + self._positions_history.append(self._position) + self._add_last_frame_to_desk() + self._clamp_frame_history() + return dx, dy + else: + logger.warning("Failed to get screenshot after move") + return None + + def move_with_angle( + self, distance: float, angle: float + ) -> tuple[float, float] | None: + """ + Tries to move the map ONCE by swiping + Parameters: + distance: how far you want to move, in pixels + angle: the angle you want to move, in radians + Returns: + offset_x, offset_y: None if something went wrong + """ + self._ensure_history() + # TODO: may need to invert (*-1) angle_to_offset result to have it match the screen coordinates + return self.move_with_offset(self._angle_to_offset(distance, angle)) + + def move_with_time(self) -> tuple[float, float] | None: + """ + Tries to move the map ONCE by pressing for x time on a point + """ + self._ensure_history() + # TODO: to implement + """ + The problem with this function is the press may become faster exponentially and cap at some point + We need to have an approximation of fun(time) = movement + So another calibration step + """ + pass + + def move_with_tap(self, towards: Towards) -> tuple[float, float] | None: + """ + Tries to move the map ONCE by tapping on a point + Parameters: + The direction we want to move to + Returns: + offset_x, offset_y || None if something went wrong + """ + # TODO: to implement + """ + The problem with this function is that a tap may move a fixes amount or a full screen + it need to be set prior to moving and for each game + """ + pass + + def goto( + self, + destination: tuple[int, int], + acceptable_distance: float = 50, + cutoff_distance: float = 10.0, + ) -> tuple[int, int]: + """ + Goes to an absolute destination + Parameters: + destination: the absolute destination, in pixels (x, y) + acceptable_distance: the maximum distance from the destination we are willing to accept + cutoff_distance: the minimum average distance from the destination we need to maintain in the last 3 moves + to keep trying, to avoid infinite loops when we're stuck + Returns: + The distance moved from the starting position, in pixels (x, y) + """ + + if self._motion_area is None: + # It's redundant because at this point should be already instantiated, but I feel safer + self.calculate_motion_area() + assert self._motion_area is not None, "Motion area calculation failed." + + # min - max + max_x_movement = abs(self._motion_area[0][0] - self._motion_area[1][0]) + max_y_movement = abs(self._motion_area[0][1] - self._motion_area[1][1]) + + movement_history = [] + delta_history: list[tuple[float, float]] = [] + while ( + (dist := self.distance(self._position, destination)) > acceptable_distance + ) and ( + len(movement_history) < 3 + or np.mean(movement_history[-3:]) > cutoff_distance + ): + logger.debug(f"Distance to destination: {dist}") + + x_distance = destination[0] - self._position[0] + y_distance = destination[1] - self._position[1] + + clamped_x = int( + max(-(max_x_movement // 2), min((max_x_movement // 2), x_distance)) + ) + clamped_y = int( + max(-(max_y_movement // 2), min((max_y_movement // 2), y_distance)) + ) + + movement = (clamped_x, clamped_y) + + result = self.move_with_offset(movement) + if result is not None: + dx, dy = result + delta_history.append((dx, dy)) + movement_history.append(np.linalg.norm(dx - dy)) + else: + logger.warning("Move failed during goto") + break + + return sum([round(x[0]) for x in delta_history]), sum( + [round(y[1]) for y in delta_history] + ) + + def goto_for_action( + self, desired_center: tuple[int, int] + ) -> tuple[int, int] | None: + """ + Move the map to that your "Action" can be set in the middle of the screen + If it cannot do it will return None, otherwise it will return the position where you should execute the action + Parameters: + desired_center: the destination where you want to set your "Action" button, in pixels (x, y) (map coordinates) + maximum_distance: the maximum distance from the destination we are willing to accept + Returns: + the final screen coordinates where you should execute the action, in pixels (x, y) || None if it failed + """ + if self._motion_area is None: + self.calculate_motion_area() + + height, width = self._ui_mask.shape[:2] + center_screen_x = width // 2 + center_screen_y = height // 2 + + # actual_position_x, actual_position_y = self._position + # actual_center_x, actual_center_y = (actual_position_x + center_screen_x, actual_position_y + center_screen_y) + + desired_center_x, desired_center_y = desired_center + desired_position_x, desired_position_y = ( + desired_center_x - center_screen_x, + desired_center_y - center_screen_y, + ) + + distance = self.goto((desired_position_x, desired_position_y)) + if distance is None: + return None + + final_screen_x = desired_center_x - self._position[0] + final_screen_y = desired_center_y - self._position[1] + + if not (0 <= final_screen_x <= width and 0 <= final_screen_y <= height): + return None + + # final_position_x, final_position_y = self._position + # final_position_error_x, final_position_error_y = (desired_position_x - final_position_x, + # desired_position_y - final_position_y) + # final_center_x, final_center_y = (center_screen_x - final_position_error_x, + # center_screen_y - final_position_error_y) + + return final_screen_x, final_screen_y + + def get_pil_desk(self) -> Image.Image: + return Image.fromarray(self._desk, mode="RGBA") + + def get_desk_copy(self) -> np.ndarray: + return self._desk.copy() + + def get_pil_mask(self) -> Image.Image: + return Image.fromarray(self._ui_mask, mode="L") + + def get_mask_copy(self) -> np.ndarray: + return self._ui_mask.copy() + + def clear_desk(self): + """ + Makes every pixel transparent as it started + Use this if you do not want anything of the old scan + ⚠️This will not reset the position, goto(0,0) before doing this + ⚠️This will clear the frame_history + """ + self._desk = np.zeros_like(self._desk) + self._frames_history.clear() + + def check_is_position_changed(self) -> bool: + """ + Takes a screenshot and compares it to the last one in the frame history to estimate if the position has changed + If it has it will update the position and add the new frame to the desk. + ⚠️Will always add a frame to the frame_history + Returns: True if the position has changed, False otherwise + """ + if len(self._frames_history) < 1: + raise ValueError("No frames in history to compare with.") + + last_frame = self._frames_history[-1].copy() + screenshot = get_screenshot(to_memory=True) + if screenshot is not None and not isinstance(screenshot, bool): + self._frames_history.append(screenshot) + new_frame = self._frames_history[-1] + + dx, dy, _ = self._majority_offset_calculation(last_frame, new_frame) + + if int(dx) != 0 or int(dy) != 0: + self._position = ( + self._position[0] - int(dx), + self._position[1] - int(dy), + ) + self._positions_history.append(self._position) + self._add_last_frame_to_desk() + self._clamp_frame_history() + return True + else: + logger.warning("Failed to get screenshot for position check") + return False + + def position(self) -> tuple[int, int]: + """ + Returns: + the current position, in pixels (x, y) + """ + return self._position + + def set_current_position(self, position: tuple[int, int]): + """ + Will set the current position without adding frames to the desk + ⚠️If you're not sure about where you are do not use this function! + Parameters: + position: the new position, in pixels (x, y) + """ + self._position = position + + def collapse_desk(self): + """ + Will clear the desk and resize to the minimum size for the actual position starting from (0, 0) + """ + height, width = self._ui_mask.shape[:2] + self._desk = np.zeros((height, width, 4), dtype=np.uint8) + self._desk_offset = self._position + self._expand_desk(self._position, (width, height)) + + def desk_to_position(self, coordinates: tuple[int, int]) -> tuple[int, int]: + """ + Converts desk coordinates (indices in the desk array) to global position coordinates. + """ + return coordinates[0] + self._desk_offset[0], coordinates[ + 1 + ] + self._desk_offset[1] + + def position_to_desk(self, position: tuple[int, int]) -> tuple[int, int]: + """ + Converts global position coordinates to desk coordinates (indices in the desk array). + """ + return position[0] - self._desk_offset[0], position[1] - self._desk_offset[1] + + +class TestMotionModule(MotionModule): + def test_draw_largest_bbox(self, output_path: str = "bbox_output.png"): + """ + you will never guess what this does + """ + self.calculate_motion_area() + if self._motion_area is not None: + (x1, y1), (x2, y2) = self._motion_area + logger.debug(f"Bounding Box found: {self._motion_area}") + img = Image.fromarray(self._ui_mask.astype(np.uint8)) + draw = ImageDraw.Draw(img) + draw.rectangle( + [x1, y1, x2 - 1, y2 - 1], fill="green", outline="red", width=5 + ) + img.save(output_path) + logger.debug(f"Image saved to {output_path}") + else: + logger.debug("No bounding box calculated.") + + +if __name__ == "__main__": + import argparse + + DESCRIPTION = """ + """ + argparser = argparse.ArgumentParser(description=DESCRIPTION) + argparser.add_argument( + "--calibration-file", + type=str, + default="calibration_data.json", + help="Path to the calibration file", + ) + + cal = SwipeCalibrator() + # cal.load() + # cal.train() + # cal.plot_calibration() + + motion_module = MotionModule( + ui_mask=Image.open("resources/p10lite/islandempire_mask_alpha.png").convert( + "RGBA" + ), + swipe_calibrator=cal, + force_headless=True, + ) + + logger.debug(motion_module.goto((2000, 0))) + motion_module.move_with_offset((0, 500)) + logger.debug(motion_module.position()) + logger.debug(motion_module.goto((0, 0))) + + print(motion_module.position()) + Image.fromarray(motion_module._desk).save("utils/desk2.png") +# motion_module.test_draw_largest_bbox() + +# from matplotlib import pyplot as plt +# md = OldMotionModule( +# image_mask=Image.open("resources/islandempire_mask_alpha.png"), +# screen_size=(1080, 2340) +# ) +# desk = Image.new("RGBA", (3000, 3000), (255, 0, 0)) +# # logger.debug( +# # md.move( +# # step_number = (12, 5), +# # destination_offset= (1200, 500), +# # desk = desk +# # ) +# # ) +# # ) +# logger.debug(md.calculate_map_size()) +# plt.imshow(desk) +# plt.show() diff --git a/Brain/AI/src/motion_module/resources/test_material/isandempire_mask.png b/Brain/AI/src/motion_module/resources/test_material/isandempire_mask.png new file mode 100644 index 00000000..86ca4260 Binary files /dev/null and b/Brain/AI/src/motion_module/resources/test_material/isandempire_mask.png differ diff --git a/Brain/AI/src/motion_module/resources/test_material/islandempire_mask_alpha.png b/Brain/AI/src/motion_module/resources/test_material/islandempire_mask_alpha.png new file mode 100644 index 00000000..8acb6048 Binary files /dev/null and b/Brain/AI/src/motion_module/resources/test_material/islandempire_mask_alpha.png differ diff --git a/Brain/AI/src/motion_module/resources/test_material/minesweeper_mask.png b/Brain/AI/src/motion_module/resources/test_material/minesweeper_mask.png new file mode 100644 index 00000000..b9cdca61 Binary files /dev/null and b/Brain/AI/src/motion_module/resources/test_material/minesweeper_mask.png differ diff --git a/Brain/AI/src/motion_module/resources/test_material/minesweeper_mask_alpha.png b/Brain/AI/src/motion_module/resources/test_material/minesweeper_mask_alpha.png new file mode 100644 index 00000000..61955414 Binary files /dev/null and b/Brain/AI/src/motion_module/resources/test_material/minesweeper_mask_alpha.png differ diff --git a/Brain/AI/src/motion_module/swipe_calibrator.py b/Brain/AI/src/motion_module/swipe_calibrator.py new file mode 100644 index 00000000..6ce421f0 --- /dev/null +++ b/Brain/AI/src/motion_module/swipe_calibrator.py @@ -0,0 +1,781 @@ +import datetime +import os +import pickle +import queue +from time import sleep + +# External libraries +import matplotlib.pyplot as plt +import numpy as np +from scipy.interpolate import LinearNDInterpolator, NearestNDInterpolator +from sklearn.linear_model import HuberRegressor, LinearRegression, RANSACRegressor +from sklearn.multioutput import MultiOutputRegressor + +# Internal modules +from AI.src.constants import ( + CLIENT_PATH, + MOTION_CALIBRATION_PATH, + SCREENSHOT_PATH, + TAPPY_ORIGINAL_SERVER_IP, + logger, +) +from AI.src.motion_module.gesture_utils.gesture_tracker import GestureTracker +from AI.src.motion_module.utils.resources_utility import DoStuffElsewhere + + +class SwipeCalibrator: + """ + A set of functions to generate a correction map for swipes to account for pen friction + + Parameters: + method: "linear_regression" (default), "linear_interpolation", "ransac_regression", "huber_regression". Can be changed later. + How to use: + - Before calibrating the robot should be connected and on a screen that does not block adb captures (you can use npm calibration display) + - The simplest calibration you can do simply implies running the program from a terminal with --save_to_file. + - The --deep calibration will take a lot longer 11~ minutes but if the robot does not explode it will be way more precise + - If you use the --deep calibration you also need --max_iterations and --target_error + - You can add point to the calibration by using the --load_from_file and you method of calibration + The manual calibration cannot be used to add point to a calibration since it's discarded if load_from_file is used + - You can test the end result with --test and plot the graphs with --plot_{type} + - ⚠️Remember to pass --save_to_file if you want to save the calibration, it will NOT do it by default + - It's suggested to add a suffix with the name of the phone used for the calibration and to avoid sharing calibrations between devices + """ + + def __init__(self, method: str = "linear_regression"): + self.is_trained = False + self.cmds = [] + self.acts = [] + + self.method = method + self.model = None + self.fallback_model = None + + def train( + self, + commanded_swipes: list[list[int]] | None = None, + measured_swipes: list[list[int]] | None = None, + method: str | None = None, + drop_first: bool = False, + outlier_zscore_threshold: float | None = None, + ): + """ + Train the model + + Parameters: + commanded_swipes: A list of pairs (dx, dy) that the pen should have performed + measured_swipes: A list of pairs (dx, dy) that the pen actually performed + method: None (default, will use the one used in the class creation), + "linear_regression", "linear_interpolation", "ransac_regression", "huber_regression" + drop_first: if True, drop the first sample (useful if the first capture is noisy) + example: the first we set [0, 0] = (0, 0) and we want to skip this + outlier_zscore_threshold: remove measured samples with z-score > thresh (on measured magnitude) + this tries to remove from the calibration garbage data + Notes: + - drop_first could be transformed in drop_selected: list[index] + """ + if commanded_swipes is None: + commanded_swipes = self.cmds + assert commanded_swipes is not None, "Commanded swipes not provided." + if measured_swipes is None: + measured_swipes = self.acts + assert measured_swipes is not None, "Measured swipes not provided." + + if len(commanded_swipes) == 0 or len(measured_swipes) == 0: + raise ValueError("No training data provided") + + measured = np.array(measured_swipes) + commanded = np.array(commanded_swipes) + + if measured.shape[0] != commanded.shape[0]: + raise ValueError("Measured and commanded lists must have same length") + + if drop_first: + if measured.shape[0] <= 1: + raise ValueError("Not enough points to drop first sample") + measured = measured[1:] + commanded = commanded[1:] + + if outlier_zscore_threshold is not None and measured.shape[0] >= 3: + mags = np.linalg.norm(measured, axis=1) + m_mean = np.mean(mags) + m_std = np.std(mags) + if m_std > 0: + z = (mags - m_mean) / m_std + mask = np.abs(z) <= outlier_zscore_threshold + if not np.all(mask): + logger.info( + f"Removing {np.sum(~mask)} outlier(s) from calibration data by z-score" + ) + measured = measured[mask] + commanded = commanded[mask] + + if method is None: + method = self.method + + if method == "linear_regression": + self.model = LinearRegression() + self.model.fit(measured, commanded) + elif method == "huber_regression": + self.model = MultiOutputRegressor(HuberRegressor()) + self.model.fit(measured, commanded) + elif method == "ransac_regression": + self.model = RANSACRegressor(LinearRegression(), random_state=0) + self.model.fit(measured, commanded) + elif method == "linear_interpolation": + self.model = LinearNDInterpolator(measured, commanded) + self.fallback_model = NearestNDInterpolator(measured, commanded) + else: + raise ValueError(f"Unknown calibration method: {method}") + + self.is_trained = True + logger.info(f"Calibration complete using {self.method}.") + + def _predict(self, inputs): + if self.method == "linear_interpolation": + res = self.model(inputs) + if np.any(np.isnan(res)): + nans = np.isnan(res).any(axis=1) + res[nans] = self.fallback_model(inputs[nans]) + return res + return self.model.predict(inputs) + + def get_calibrated_command(self, target_dx, target_dy) -> tuple[float, float]: + """ + transform the target displacement into the command needed to achieve it + + Returns: + the [x, y] command needed to achieve the target swipe + """ + if not self.is_trained: + return target_dx, target_dy + + prediction = self._predict(np.array([[target_dx, target_dy]])) + return prediction[0] + + def automatic_calibration(self): + with GestureTracker() as tracker: + # Get X data + for i in range(2, 9): + self.cmds.append([100 * i, 0]) + os.system( + f"python3 {CLIENT_PATH}/client3.py --url {TAPPY_ORIGINAL_SERVER_IP} --light 'swipe {200} {1000} {200 + 100 * i} {1000}'" + ) + sleep(0.5) + + # Get X Half step + for i in range(1, 9, 2): + self.cmds.append([200 + 50 * i, 0]) + os.system( + f"python3 {CLIENT_PATH}/client3.py --url {TAPPY_ORIGINAL_SERVER_IP} --light 'swipe {200} {1000} {200 + 200 + 50 * i} {1000}'" + ) + sleep(0.5) + + # Get Y data + for i in range(2, 9): + self.cmds.append([0, 100 * i]) + os.system( + f"python3 {CLIENT_PATH}/client3.py --url {TAPPY_ORIGINAL_SERVER_IP} --light 'swipe {500} {200} {500} {200 + 100 * i}'" + ) + sleep(0.5) + + # Get Y Half step + for i in range(1, 9, 2): + self.cmds.append([0, 200 + 50 * i]) + os.system( + f"python3 {CLIENT_PATH}/client3.py --url {TAPPY_ORIGINAL_SERVER_IP} --light 'swipe {500} {200} {500} {200 + 200 + 50 * i}'" + ) + sleep(0.5) + + # Get XY Data combined + for i in range(2, 9): + self.cmds.append([100 * i, 100 * i]) + os.system( + f"python3 {CLIENT_PATH}/client3.py --url {TAPPY_ORIGINAL_SERVER_IP} --light 'swipe {200} {200} {200 + 100 * i} {200 + 100 * i}'" + ) + sleep(0.5) + + # Get XY Data combined Half step + for i in range(1, 9, 2): + self.cmds.append([200 + 50 * i, 200 + 50 * i]) + os.system( + f"python3 {CLIENT_PATH}/client3.py --url {TAPPY_ORIGINAL_SERVER_IP} --light 'swipe {200} {200} {200 + 200 + 50 * i} {200 + 200 + 50 * i}'" + ) + sleep(0.5) + + queue_to_list = [ + tracker.output_queue.get(timeout=5) + for _ in range(tracker.output_queue.qsize()) + ] + for cmd, gesture in zip(self.cmds, queue_to_list): + logger.info(gesture) + logger.info(f"[🧪] ({cmd})") + self.acts.append( + [gesture.end_x - gesture.start_x, gesture.end_y - gesture.start_y] + ) + + def deep_calibration( + self, + target: tuple[int, int], + target_error: float = 0.1, + max_iterations: int = 5, + ): + """ + Calibrate by setting an objective (600,0) and perform multiple actions until you get close to (600,0) + Collects a lot of data, and it's slow + + Parameters: + target: the target displacement [dx, dy] + target_error: the target error percentage for each swipe + max_iterations: the maximum number of iterations to perform for each swipe to get close to the target + """ + + with GestureTracker() as tracker: + current_cmd = [target[0], target[1]] + + local_iterations = 0 + + while True: + os.system( + f"python3 {CLIENT_PATH}/client3.py --url {TAPPY_ORIGINAL_SERVER_IP} --light 'swipe {200} {500} {200 + int(current_cmd[0])} {500 + int(current_cmd[1])}'" + ) + sleep(0.5) + + try: + gesture = tracker.output_queue.get(timeout=1) + except queue.Empty: + logger.warning("No gesture detected!") + continue + + actual_dx = gesture.end_x - gesture.start_x + actual_dy = gesture.end_y - gesture.start_y + + # Calculate error + error_x = actual_dx - target[0] + error_y = actual_dy - target[1] + total_error = np.linalg.norm([error_x, error_y]) + target_mag = np.linalg.norm(target) + error_percent = total_error / target_mag * 100 + logger.info( + f"\n{gesture}\n" + f"[🧪] Target:\t{target}\n" + f"[🚂] Command:\t{current_cmd}\n" + f"[⚠️] Error: \t[{error_x}, {error_y}] {total_error:.2f}px {error_percent:.2f}%" + ) + + self.cmds.append(list(current_cmd)) + self.acts.append([actual_dx, actual_dy]) + + if total_error <= target_error * target_mag: + logger.info("Target reached within tolerance!") + break + + if local_iterations >= max_iterations: + logger.info("Max iterations reached for this target.") + break + + variant = 1.0 + """ + you can make it hover around the point with values > 1 + and you can make it more cautious with values < 1 + ⚠️ With values < 1 and > 1 it (most likely) will not get between tolerances + so avoid setting max_iterations too high + """ + current_cmd[0] += variant * (target[0] - actual_dx) + current_cmd[1] += variant * (target[1] - actual_dy) + + local_iterations += 1 + + def automatic_deep_calibration( + self, target_error: float = 0.02, max_iterations: int = 5 + ): + """ + ⚠️ Warning using value of x/y too small without moving on the other axis WILL result in the program crashing + due to the inability of the GestureTracker to detect those movements! + ⚠️ Warning movements too big will result in the pen going outside the display and breaking the calibration + + Parameters: + target_error: the target error percentage for each swipe + max_iterations: the maximum number of iterations to perform for each swipe to get close to the target + """ + start_time = datetime.datetime.now() + + calibration_points = [ + (150, 0), + (300, 0), + (600, 0), + (800, 0), + (0, 150), + (0, 300), + (0, 600), + (0, 800), + (150, 150), + (300, 300), + (600, 600), + (800, 800), + ] + + for target in calibration_points: + self.deep_calibration( + target=target, target_error=target_error, max_iterations=max_iterations + ) + + finish_time = datetime.datetime.now() + logger.info(f"Calibration took {finish_time - start_time}") + + def manual_calibration(self): + """ + Add calibration point to the existing ones, can be used with an empty set of calibration points + [⚠️ WARNING may damage the automatic_deep_calibration dataset ] + You can use "clear_calibration" to remove all the calibration points + """ + with GestureTracker() as tracker: + try: + while True: + taget_x = int(input("x: ")) + taget_y = int(input("y: ")) + + self.cmds.append([taget_x, taget_y]) + + os.system( + f"python3 {CLIENT_PATH}/client3.py --url {TAPPY_ORIGINAL_SERVER_IP} --light 'swipe {200} {500} {200 + int(taget_x)} {500 + int(taget_y)}'" + ) + sleep(1) + + gesture = tracker.output_queue.get(timeout=2) + self.acts.append( + [ + gesture.end_x - gesture.start_x, + gesture.end_y - gesture.start_y, + ] + ) + + logger.info(f"\n{gesture}\n[🧪]: {taget_x}, {taget_y}") + except KeyboardInterrupt: + pass # redundant + finally: + pass + + def clear_calibration(self): + self.cmds = [] + self.acts = [] + + @staticmethod + def _assemble_file_string_prefix( + test_n: int | None = None, + robot: str = "brainybot1", + pen: str = "pinkyThing", + suffix: str | None = None, + ) -> str: + match (test_n is None, suffix is None): + case (True, True): + return f"{robot}_{pen}" + case (True, False): + return f"{robot}_{pen}_{suffix}" + case (False, True): + return f"{test_n}_{robot}_{pen}" + case (False, False): + return f"{test_n}_{robot}_{pen}_{suffix}" + + def load_from_file( + self, + filename: str | None = None, + test_n: int | None = None, + robot: str = "brainybot1", + pen: str = "pinkyThing", + suffix: str | None = None, + ): + + prefix = None + if filename is None: + prefix = self._assemble_file_string_prefix(test_n, robot, pen, suffix) + else: + if not filename.endswith(".pkl"): + raise ValueError("File must be a pickle file") + + with DoStuffElsewhere(MOTION_CALIBRATION_PATH): + if prefix is not None: + for f in os.listdir(): + if ( + os.path.isfile(f) + and f.endswith(".pkl") + and f.startswith(prefix) + ): + filename = f + break + assert filename is not None, "No valid file found matching prefix." + with open(filename, "rb") as f: + caricato = pickle.load(f) + + self.cmds = caricato["cmds"] + self.acts = caricato["acts"] + + def save_to_file( + self, + filename: str | None = None, + test_n: int | None = None, + robot: str = "brainybot1", + pen: str = "pinkyThing", + suffix: str | None = None, + ): + if filename is None: + filename = ( + f"{self._assemble_file_string_prefix(test_n, robot, pen, suffix)}.pkl" + ) + + if not filename.endswith(".pkl"): + raise ValueError("File must be a pickle file") + + data_to_save = {"cmds": self.cmds, "acts": self.acts} + + with DoStuffElsewhere(MOTION_CALIBRATION_PATH): + with open(filename, "wb") as f: + pickle.dump(data_to_save, f) + + def plot_vector_calibration( + self, max_range: int = 1000, plot_filename: str | None = None + ): + """ + Generates the visualization of the calibration map with vectors + + Parameters: + max_range: the max offset + plot_filename: name of the plot + """ + if not self.is_trained: + raise ValueError("Train the model first!") + + grid_size = 10 + x, y = np.meshgrid( + np.linspace(0, max_range, grid_size), np.linspace(0, max_range, grid_size) + ) + + flat_x, flat_y = x.flatten(), y.flatten() + targets = np.column_stack((flat_x, flat_y)) + commands = self._predict(targets) + + plt.figure(figsize=(8, 6)) + # Plot the "push" vectors: (Command - Target) + plt.quiver( + flat_x, + flat_y, + commands[:, 0] - flat_x, + commands[:, 1] - flat_y, + color="blue", + alpha=0.6, + label="Correction Vector", + ) + + plt.title( + f"Calibration Map: Required 'Push' to overcome Friction ({self.method})" + ) + plt.xlabel("Desired X Displacement") + plt.ylabel("Desired Y Displacement") + plt.legend() + plt.grid(True) + if plot_filename is not None: + with DoStuffElsewhere(SCREENSHOT_PATH): + plt.savefig(plot_filename) + logger.info(f"Plot saved to {plot_filename}") + plt.show() + + def plot_heatmap_calibration( + self, max_range: int = 1000, save_path: str | None = None + ): + """ + Generates a heatmap visualization of the calibration map showing the magnitude of correction. + + Parameters: + max_range: the max offset + save_path: path to save the plot + """ + if not self.is_trained: + raise ValueError("Train the model first!") + + grid_size = 50 + x_linspace = np.linspace(0, max_range, grid_size) + y_linspace = np.linspace(0, max_range, grid_size) + x, y = np.meshgrid(x_linspace, y_linspace) + + flat_x, flat_y = x.flatten(), y.flatten() + targets = np.column_stack((flat_x, flat_y)) + commands = self._predict(targets) + + # Calculate the magnitude of the correction vector (Command - Target) + correction_vectors = commands - targets + correction_magnitudes = np.linalg.norm(correction_vectors, axis=1) + z = correction_magnitudes.reshape(x.shape) + + plt.figure(figsize=(10, 8)) + plt.pcolormesh(x, y, z, shading="auto", cmap="viridis") + plt.colorbar(label="Correction Magnitude (pixels)") + + # Plot original training points if available (measured acts) + if len(self.acts) > 0: + acts_arr = np.array(self.acts) + plt.plot( + acts_arr[:, 0], + acts_arr[:, 1], + "ok", + markersize=4, + label="Measured Points", + ) + plt.legend(loc="upper right") + + plt.title(f"Calibration Heatmap: Correction Magnitude ({self.method})") + plt.xlabel("Desired X Displacement") + plt.ylabel("Desired Y Displacement") + plt.axis("equal") + + if save_path is not None: + plt.savefig(save_path) + logger.info(f"Heatmap saved to {save_path}") + plt.show() + + def plot_connected_pairs(self, save_path: str | None = None): + """ + Plots the commanded vs measured swipes connected by lines. + This helps visualize the error for each specific training points + + Parameters: + save_path: path to save the plot + """ + if len(self.cmds) == 0 or len(self.acts) == 0: + raise ValueError("No training data available to plot pairs") + + cmds_arr = np.array(self.cmds) + acts_arr = np.array(self.acts) + + if len(cmds_arr) != len(acts_arr): + logger.warning( + "Commands and Acts arrays have different lengths, truncating to minimum" + ) + min_len = min(len(cmds_arr), len(acts_arr)) + cmds_arr = cmds_arr[:min_len] + acts_arr = acts_arr[:min_len] + + plt.figure(figsize=(10, 8)) + + # What we wanted + plt.scatter( + cmds_arr[:, 0], + cmds_arr[:, 1], + c="blue", + label="Commanded (Desired)", + marker="o", + ) + # What we got + plt.scatter( + acts_arr[:, 0], + acts_arr[:, 1], + c="red", + label="Measured (Actual)", + marker="x", + ) + + for i in range(len(cmds_arr)): + plt.plot( + [cmds_arr[i, 0], acts_arr[i, 0]], + [cmds_arr[i, 1], acts_arr[i, 1]], + "k-", + alpha=0.3, + ) + + plt.title("Calibration: Commanded vs Measured Pairs") + plt.xlabel("X Displacement") + plt.ylabel("Y Displacement") + plt.legend() + plt.grid(True) + plt.axis("equal") + + if save_path is not None: + plt.savefig(save_path) + logger.info(f"Pairs plot saved to {save_path}") + plt.show() + + +if __name__ == "__main__": + import argparse + import sys + + # TAPPY_ORIGINAL_SERVER_IP = "http://127.0.0.1:8000" + # CLIENT_PATH = "/home/wip/tesi/BrainyBot/tappy-client/clients/python" + DESCRIPTION = """Run the calibration script +How to use: + - Before calibrating the robot should be connected and on a screen that does not block adb captures (you can use npm calibration display) + - The simplest calibration you can do simply implies running the program from a terminal with [--save_to_file]. + - The [--deep] calibration will take a lot longer 11~ minutes but if the robot does not explode it will be way more precise + - If you use the [--deep] calibration you also need [--max_iterations] and [--target_error] + - You can add point to the calibration by using the [--load_from_file] and you method of calibration + The manual calibration cannot be used to add point to a calibration since it's discarded if [--load_from_file] is used + - You can test the end result with [--test] and plot the graphs with [--plot_{type}] + - ⚠️ Remember to pass [--save_to_file] if you want to save the calibration, it will NOT do it by default + - It's suggested to add a suffix with the name of the phone used for the calibration and to avoid sharing calibrations between devices +""" + + parser = argparse.ArgumentParser(description=DESCRIPTION) + parser.add_argument( + "--method", + type=str, + default="linear_regression", + help="Method to use for the calibration: linear_regression (default), linear_interpolation, ransac_regression, huber_regression", + ) + parser.add_argument( + "--manual", action="store_true", help="Choose the calibration swipes manually" + ) + parser.add_argument( + "--deep", + action="store_true", + help="Run the deep calibration (slow but more accurate)", + ) + + deep_required = "--deep" in sys.argv + parser.add_argument( + "--target_error", + type=float, + default=0.03, + help="Target error for the deep calibration", + required=deep_required, + ) + parser.add_argument( + "--max_iterations", + type=int, + default=5, + help="Max iterations for the deep calibration", + required=deep_required, + ) + + parser.add_argument( + "--test", + action="store_true", + help="At the end of the execution let the user test the calibration", + ) + + parser.add_argument( + "--load_from_file", action="store_true", help="Load the calibration from a file" + ) + parser.add_argument( + "--save_to_file", action="store_true", help="Save the calibration to a file" + ) + parser.add_argument( + "--plot_vector_calibration", + action="store_true", + help="Plot the vector calibration", + ) + parser.add_argument( + "--plot_heatmap_calibration", + action="store_true", + help="Plot the heatmap calibration", + ) + parser.add_argument( + "--plot_connected_pairs", + action="store_true", + help="Plot the commanded vs measured swipes connected by lines", + ) + parser.add_argument( + "--suffix", + type=str, + default=None, + help="A suffix to append to the calibration file name.", + ) + parser.add_argument( + "--robot", type=str, default="brainybot1", help="Name of the robot" + ) + parser.add_argument("--pen", type=str, default="pinkyThing", help="Name of the pen") + parser.add_argument( + "--test_number", type=int, default=None, help="Test number prefix" + ) + + args = parser.parse_args() + + __cal = SwipeCalibrator(method=args.method) + + if args.load_from_file: + try: + __cal.load_from_file( + suffix=args.suffix, + robot=args.robot, + pen=args.pen, + test_n=args.test_number, + ) + logger.info("Loaded calibration from file.") + except Exception as e: + logger.error(f"Failed to load calibration: {e}") + sys.exit(1) + + if args.manual: + logger.info("Starting manual calibration...") + __cal.manual_calibration() + elif args.deep: + logger.info( + f"Starting deep calibration (target_error={args.target_error}, max_iterations={args.max_iterations})..." + ) + __cal.automatic_deep_calibration( + target_error=args.target_error, max_iterations=args.max_iterations + ) + elif not args.load_from_file: + # Default to automatic calibration if no data loaded and no specific method selected + logger.info("Starting automatic calibration...") + __cal.automatic_calibration() + + if args.save_to_file: + __cal.save_to_file( + suffix=args.suffix, robot=args.robot, pen=args.pen, test_n=args.test_number + ) + logger.info("Saved calibration to file.") + + if len(__cal.cmds) > 0 and len(__cal.acts) > 0: + __cal.train() + else: + logger.warning("No calibration data available. Skipping training.") + + if args.plot_vector_calibration: + __cal.plot_vector_calibration() + + if args.plot_heatmap_calibration: + __cal.plot_heatmap_calibration() + + if args.plot_connected_pairs: + __cal.plot_connected_pairs() + + if args.test: + logger.info("Starting test mode...") + with GestureTracker() as __tracker: + try: + while True: + try: + input_str = input("Enter target x,y (or q to quit): ") + if input_str.lower() == "q": + break + parts = input_str.replace(",", " ").split() + if len(parts) != 2: + logger.warning("Invalid input. Format: x,y") + continue + + __taget_x = int(parts[0]) + __taget_y = int(parts[1]) + except ValueError: + logger.warning("Invalid numbers.") + continue + + __cmd_needed = __cal.get_calibrated_command(__taget_x, __taget_y) + + # Test swipe + os.system( + f"python3 {CLIENT_PATH}/client3.py --url {TAPPY_ORIGINAL_SERVER_IP} --light 'swipe {200} {500} {200 + int(__cmd_needed[0])} {500 + int(__cmd_needed[1])}'" + ) + sleep(0.5) + + try: + __gesture = __tracker.output_queue.get(timeout=2) + logger.info( + f"\n{__gesture}\n" + f"[🧪] Target:\t{__taget_x}, {__taget_y}\n" + f"[🚂] Command:\tX={__cmd_needed[0]:.2f}, Y={__cmd_needed[1]:.2f}" + ) + except queue.Empty: + logger.warning("No gesture detected.") + + except KeyboardInterrupt: + pass + finally: + logger.info("Exiting test mode...") diff --git a/Brain/AI/src/motion_module/tests/test_coordinates.py b/Brain/AI/src/motion_module/tests/test_coordinates.py new file mode 100644 index 00000000..0193db8d --- /dev/null +++ b/Brain/AI/src/motion_module/tests/test_coordinates.py @@ -0,0 +1,27 @@ +import os +import subprocess + +import matplotlib.pyplot as plt +from PIL import Image, ImageDraw + +if __name__ == "__main__": + os.chdir("../resources") + with open("../screenshot.png", "wb") as f: + subprocess.run(["adb", "exec-out", "screencap", "-p"], stdout=f, check=True) + + with Image.open("../screenshot.png") as im: + size = (im.size[0], im.size[1]) + + ox, dx = int(size[0] / 2 * 1), int(size[0] / 2 * 1) + oy, dy = int(size[1] / 2 * 1.5), int(size[1] / 2 * 0.5) + + # Display swipe from point 1 to point b (origin to destination) + + draw = ImageDraw.Draw(im) + draw.line((ox, oy, dx, dy), fill="red", width=4) + draw.circle((ox, oy), radius=14, fill="green", width=4) + draw.circle((ox, oy), radius=10, fill="blue", width=4) + draw.circle((dx, dy), radius=24, fill="green", width=4) + draw.circle((dx, dy), radius=20, fill="blue", width=4) + plt.imshow(im) + plt.show() diff --git a/Brain/AI/src/motion_module/tests/test_offset_recognition.py b/Brain/AI/src/motion_module/tests/test_offset_recognition.py new file mode 100644 index 00000000..cf9fb128 --- /dev/null +++ b/Brain/AI/src/motion_module/tests/test_offset_recognition.py @@ -0,0 +1,96 @@ +import matplotlib.pyplot as plt +from AI.src.constants import logger +from AI.src.motion_module.enums import * +from AI.src.motion_module.utils.image_processing_utility import * +from AI.src.motion_module.utils.resources_utility import * + +logger.propagate = False + +TOTAL = 400 # px +STAGES = 4 # The number of moves we want to make +STEP = TOTAL // STAGES # px +CHECK_RANGE = 30 # px +CHECK_FREQUENCY = 1 # px +VERTICAL = False +HORIZONTAL = True +ORIENTATION = Orientation.DESCENDING # 0 = descending, 1 = ascending (0->100, 100->0) +PERFECT = True +CAPTURE = False + + +if __name__ == "__main__": + os.chdir("../resources/test_material") + if "multiple_axis" not in os.listdir(): + os.mkdir("multiple_axis/") + + if CAPTURE: + get_image_set( + perfect=PERFECT, + vertical=VERTICAL, + horizontal=HORIZONTAL, + orientation=ORIENTATION, + save_location="multiple_axis/", + step_size=200, + start_x=900, + start_y=1000, + ) + + image_prefix = f"{'i_' if not PERFECT else ''}{'v_' if VERTICAL else ''}{'h_' if HORIZONTAL else ''}{'r_' if ORIENTATION != Orientation.DESCENDING else ''}" + + images = [ + Image.open(f"multiple_axis/{image_prefix}screenshot_{index}.png") + for index in range(5) + ] + mask = make_alpha_mask_from_bw(Image.open("../p10lite/islandempire_mask_alpha.png")) + + # TOTAL*2 and TOTAL inside the desk gives us additional margin to paste the images without worrying about going out of bounds, while CHECK_RANGE gives us the necessary space to check for offsets in both directions. + + desk = Image.new( + "RGBA", + ( + images[0].width + TOTAL * 2 + CHECK_RANGE, + images[0].height + TOTAL * 2 + CHECK_RANGE, + ), + (255, 0, 0, 255), + ) + + desk.paste( + images[0].convert("RGBA"), + (0, 0) + if ORIENTATION == Orientation.DESCENDING + else ( + desk.width - images[0].width - TOTAL, + desk.height - images[0].height - TOTAL, + ), + mask, + ) + + offsets = {"x": 0, "y": 0} + deltas = [] + + for i in range(len(images) - 1): + img_a = to_uint8(images[i]) + img_b = to_uint8(images[i + 1]) + # + # img_b = np.roll(img_b, offsets["x"], axis=1) + # img_b = np.roll(img_b, offsets["y"], axis=0) + # img_b[:, :offsets["x"]] = 0 + # img_b[:offsets["y"], :] = 0 + + # dx, dy, confidence = find_offset_orb(img_a, img_b, mask) + # img_a = apply_mask_make_transparent(img_a, mask) + # img_b = apply_mask_make_transparent(img_b, mask) + mask = invert_mask_alpha_channel(mask) + + img, (dx, dy, confidence) = visualize_orb_matches(img_a, img_b, mask) + + plt.figure() + plt.imshow(img) + plt.title(f"Pair {i}-{i + 1}: dx={dx}, dy={dy}, conf={confidence}") + plt.show() + + offsets["x"] -= round(dx) + offsets["y"] -= round(dy) + + logger.info(f"Pair {i}-{i + 1}: dx={dx}, dy={dy}, conf={confidence}") + logger.info(f"Total offset: x={offsets['x']} y={offsets['y']}") diff --git a/Brain/AI/src/motion_module/tests/test_offset_recognition_on_single_axis.py b/Brain/AI/src/motion_module/tests/test_offset_recognition_on_single_axis.py new file mode 100644 index 00000000..2a72d636 --- /dev/null +++ b/Brain/AI/src/motion_module/tests/test_offset_recognition_on_single_axis.py @@ -0,0 +1,194 @@ +import logging +from logging import DEBUG, INFO, Formatter +from threading import Thread + +import matplotlib.pyplot as plt +from AI.src.constants import logger +from AI.src.motion_module.enums import * +from AI.src.motion_module.utils.image_processing_utility import * +from AI.src.motion_module.utils.resources_utility import * + +logger.setLevel(logging.INFO) + +TOTAL = 400 # px +STAGES = 4 +STEP = TOTAL // STAGES # px +CHECK_RANGE = 30 # px +CHECK_FREQUENCY = 1 # px +DIRECTION = Direction.HORIZONTAL # 0 = horizontal, 1 = vertical +ORIENTATION = Orientation.DESCENDING # 0 = descending, 1 = ascending +PERFECT = False +CAPTURE = False + + +class Runner(Thread): + def __init__(self, img0, crop1, shift): + super().__init__() + self.img0 = img0 + self.crop1 = crop1 + self.shift = shift + self.score = None + self.n_valid = None + + def run(self): + self.score, self.n_valid = np_absolute_distance_image_comparison( + self.img0, self.crop1 + ) + logger.log(DEBUG, f"Score at shift {self.shift}: {self.score!r}") + + +def find_best_offset( + _img0: Image.Image, + _img1: Image.Image, + _mask: Image.Image, + base_shift: int, + check_range: tuple[int, int], # min -> max + check_step: int, + direction: Direction = Direction.HORIZONTAL, # 0 = horizontal, 1 = vertical + orientation: Orientation = Orientation.DESCENDING, +): + """ + _img0: image to match + _img1: image to check + _mask: mask for both images (only check alpha values, can be generated from a black and white image with make_alpha_mask_from_bw) + base_shift: how much the check image is shifted from the first image (can be negative) + check_range: range of offset to check (a, b) a->b (a can be negative) + check_step: how many pixels between each check (1 = every pixel) + direction: change the direction of the shift (0 = horizontal, 1 = vertical) + orientation: change the orientation of the shift (basically if the delta is positive or negative) + """ + mask = np.array(_mask, dtype=np.int32) + img0 = apply_mask_make_transparent(np.array(_img0, dtype=np.int32), mask) + img1 = apply_mask_make_transparent(np.array(_img1, dtype=np.int32), mask) + + best_score = float("inf") + best_shift = base_shift + + threads = [] + + for delta in range(check_range[0], check_range[1] + 1, check_step): + shift = base_shift + ( + delta if orientation == Orientation.DESCENDING else -delta + ) + + if direction == Direction.HORIZONTAL: + crop1 = np.roll(img1, shift, axis=1) + if orientation == Orientation.DESCENDING: + crop1[:, :shift] = 0 + else: + crop1[:, -shift:] = 0 + else: + crop1 = np.roll(img1, shift, axis=0) + if orientation == Orientation.DESCENDING: + crop1[:shift, :] = 0 + else: + crop1[-shift:, :] = 0 + + t = SimilarityThread(img0, crop1, shift) + threads.append(t) + t.start() + + for t in threads: + t.join() + + for t in threads: + if t.score < best_score: + best_score = t.score + best_shift = t.shift + if best_shift in check_range: + logger.log( + logging.WARNING, + f"The shift is at the border of the check_range, probably the search was a failure", + ) + + logger.log(DEBUG, f"result: {best_shift} {best_score}") + + return best_shift, best_score + + +if __name__ == "__main__": + os.chdir("resources/cache") + if CAPTURE: + get_image_set( + perfect=PERFECT, + vertical=DIRECTION == Direction.VERTICAL, + horizontal=DIRECTION == Direction.HORIZONTAL, + orientation=ORIENTATION, + save_location="single_axis/", + ) + + images = [ + Image.open( + f"single_axis/{'i_' if (not PERFECT) else ''}{'v_' if DIRECTION == Direction.VERTICAL else ''}{'r_' if ORIENTATION == Orientation.ASCENDING else ''}screenshot_{index}.png" + ) + for index in range(5) + ] + ignoreZone = make_alpha_mask_from_bw(Image.open("isandempire_mask.png")) + + desk = Image.new( + "RGBA", + (images[0].width + TOTAL + CHECK_RANGE, images[0].height) + if DIRECTION == Direction.HORIZONTAL + else (images[0].width, images[0].height + TOTAL + CHECK_RANGE), + (255, 0, 0, 255), + ) + + prev_offset = ( + desk.width - images[0].width + if DIRECTION == Direction.HORIZONTAL + else desk.height - images[0].height + ) + if ORIENTATION == Orientation.DESCENDING: + prev_offset = 0 + + desk.paste( + images[0].convert("RGBA"), + (0, 0) + if ORIENTATION == Orientation.DESCENDING + else ( + (desk.width - images[0].width, 0) + if DIRECTION == Direction.HORIZONTAL + else (0, desk.height - images[0].height) + ), + mask=ignoreZone, + ) + + offsets = [] + + for i in range(STAGES): + best_shift, best_score = find_best_offset( + _img0=images[i], + _img1=images[i + 1], + _mask=ignoreZone, + base_shift=STEP if ORIENTATION == Orientation.DESCENDING else -STEP, + check_range=(-CHECK_RANGE, CHECK_RANGE), + check_step=CHECK_FREQUENCY, + direction=DIRECTION, + orientation=ORIENTATION, + ) + delta = best_shift + (-STEP if ORIENTATION == Orientation.DESCENDING else STEP) + logger.log( + INFO, + f"Best shift: {best_shift} px (delta vs STEP: {delta} px), score: {best_score:.4f}", + ) + offsets.append(best_shift) + + for i in range(STAGES): + prev_offset += offsets[i] + desk.paste( + images[i + 1], + (prev_offset, 0) if DIRECTION == Direction.HORIZONTAL else (0, prev_offset), + mask=ignoreZone, + ) + + logger.log( + INFO, + f"Total Distance x:{sum(offsets) if DIRECTION == Direction.HORIZONTAL else 0} y:{0 if DIRECTION == Direction.HORIZONTAL else sum(offsets)}", + ) + + w, h = desk.size + dpi = 100 + fig = plt.figure(figsize=(w / dpi, h / dpi), dpi=dpi) + ax = fig.add_axes([0, 0, 1, 1]) + ax.imshow(np.asarray(desk)) + plt.show() diff --git a/Brain/AI/src/motion_module/tests/test_round_trip.py b/Brain/AI/src/motion_module/tests/test_round_trip.py new file mode 100644 index 00000000..47aca408 --- /dev/null +++ b/Brain/AI/src/motion_module/tests/test_round_trip.py @@ -0,0 +1,98 @@ +import os +import subprocess +import time +from enum import Enum + +import matplotlib.pyplot as plt +from AI.src.constants import CLIENT_PATH, TAPPY_ORIGINAL_SERVER_IP +from matplotlib.transforms import offset_copy +from PIL import Image, ImageDraw + +# north, east, south, west +# ox, dx, oy, dy +actions_coefficient = ( + (1, 1, 0.25, 1.25), + (1.25, 0.25, 1, 1), + (1, 1, 1.25, 0.25), + (0.25, 1.25, 1, 1), +) +ac = actions_coefficient + + +actions_direction = ( + (0, 100), + (-100, 0), + (0, -100), + (100, 0), +) +ad = actions_direction + + +offsets = 0, 0 + + +class Direction(Enum): + NORTH = 0 + EAST = 1 + SOUTH = 2 + WEST = 3 + + +direction = 0 + +if __name__ == "__main__": + os.chdir("../resources") + + for x in range(4): + direction = (x + 1) % 4 + + with open("../screenshot.png", "wb") as f: + subprocess.run(["adb", "exec-out", "screencap", "-p"], stdout=f, check=True) + + with Image.open("../screenshot.png") as im: + size = (im.size[0], im.size[1]) + # ox, dx, oy, dy = im.width//2*ac[direction][0]+offsets[0], im.width//2*ac[direction][1]+offsets[0], im.height//2*ac[direction][2]+offsets[1], im.height//2*ac[direction][3]+offsets[1] + ox, dx, oy, dy = ( + im.width // 2 + offsets[0], + im.width // 2 + ad[direction][0] + offsets[0], + im.height // 2 + offsets[1], + im.height // 2 + ad[direction][1] + offsets[1], + ) + + # Display + draw = ImageDraw.Draw(im) + draw.line((ox, oy, dx, dy), fill="red", width=4) + draw.circle((ox, oy), radius=14, fill="green", width=4) + draw.circle((ox, oy), radius=10, fill="blue", width=4) + draw.circle((dx, dy), radius=24, fill="green", width=4) + draw.circle((dx, dy), radius=20, fill="blue", width=4) + plt.imshow(im) + plt.show() + + # Action + os.chdir(CLIENT_PATH) + + movements = [ + f"adb shell input motionevent DOWN {ox} {oy}", + f"adb shell input motionevent MOVE {dx} {dy}", + "sleep 0.10\n", + f"adb shell input motionevent UP {dx} {dy}", + ] + movementsClient = [ + f"python3 client3.py --url http://{TAPPY_ORIGINAL_SERVER_IP}:8000 --light 'down {ox} {oy}'", + f"python3 client3.py --url http://{TAPPY_ORIGINAL_SERVER_IP}:8000 --light 'move {dx} {dy}'", + f"python3 client3.py --url http://{TAPPY_ORIGINAL_SERVER_IP}:8000 --light 'up {dx} {dy}'", + ] + + for move in movements: + print(move) + subprocess.run(move, shell=True, check=True) + # subprocess.run(f"python3 client3.py --url http://{TAPPY_ORIGINAL_SERVER_IP}:8000 --light 'swipe {int(ox)} {int(oy)} {int(dx)} {int(dy)}'") + # subprocess.run(f"python3 client3.py --url http://{TAPPY_ORIGINAL_SERVER_IP}:8000 --light 'tap {int(ox)} {int(oy)} {int(dx)} {int(dy)}'") + time.sleep(1) + with open("../screenshot.png", "wb") as f: + subprocess.run(["adb", "exec-out", "screencap", "-p"], stdout=f, check=True) + + with Image.open("../screenshot.png") as im: + plt.imshow(im) + plt.show() diff --git a/Brain/AI/src/motion_module/tests/test_swipe.py b/Brain/AI/src/motion_module/tests/test_swipe.py new file mode 100644 index 00000000..fdfbc089 --- /dev/null +++ b/Brain/AI/src/motion_module/tests/test_swipe.py @@ -0,0 +1,35 @@ +import os +import subprocess + +from AI.src.constants import CLIENT_PATH, TAPPY_ORIGINAL_SERVER_IP + +if __name__ == "__main__": + size = (1080, 2400) + ox, dx = int(size[0] / 2 * 1), int(size[0] / 2 * 1) + oy, dy = int(size[1] / 2 * 1.5), int(size[1] / 2 * 0.5) + + os.chdir(CLIENT_PATH) + + movements = [ + "adb shell input motionevent DOWN 600 1700", + "adb shell input motionevent MOVE 500 1700", + "adb shell input motionevent MOVE 400 1700", + "adb shell input motionevent MOVE 300 1700", + "adb shell input motionevent MOVE 200 1700", + "adb shell input motionevent UP 200 1700", + ] + + # something = f"python3 client3.py --url http://{TAPPY_ORIGINAL_SERVER_IP}:8000 --light 'swipe {675} {1700} {135} {1700}'", + + movementsClient = [ + f"python3 client3.py --url http://{TAPPY_ORIGINAL_SERVER_IP}:8000 --light 'down {675} {1700}'", + f"python3 client3.py --url http://{TAPPY_ORIGINAL_SERVER_IP}:8000 --light 'move {135} {1700}'", + f"python3 client3.py --url http://{TAPPY_ORIGINAL_SERVER_IP}:8000 --light 'up {135} {1700}'", + # "python3 client3.py --url http://127.0.0.1:8000 --light 'down 540 1200'", + # "python3 client3.py --url http://127.0.0.1:8000 --light 'move 540 1200'", + ] + + for move in movements: + subprocess.run(move) + + # subprocess.run(f"python3 client3.py --url http://{TAPPY_ORIGINAL_SERVER_IP}:8000 --light 'swipe {ox} {oy} {dx} {dy}'") diff --git a/Brain/AI/src/motion_module/utils/image_processing_utility.py b/Brain/AI/src/motion_module/utils/image_processing_utility.py new file mode 100644 index 00000000..7361e38e --- /dev/null +++ b/Brain/AI/src/motion_module/utils/image_processing_utility.py @@ -0,0 +1,821 @@ +import datetime +import logging +from logging import DEBUG +from threading import Thread + +# External libraries +import cv2 +import numpy as np + +# Internal modules +from AI.src.constants import logger +from AI.src.motion_module.enums import Direction, Orientation +from PIL import Image +from skimage.metrics import structural_similarity + + +def sanitize_mask(arr: np.ndarray) -> np.ndarray: + """ + Generates a boolean mask representing pixels to keep. + + Parameters: + arr: input mask (H,W,3) or (H,W,4) or (H,W) ~ RGB or RGBA or grayscale + Returns: + a boolean numpy array where True indicates pixels to keep. + - For RGBA: keeps pixels where alpha > 0. + - For RGB: keeps pixels that are fully white (255, 255, 255). + - For Grayscale: keeps pixels that are not 0. + """ + if arr.ndim == 3 and arr.shape[2] == 4: + # mask_keep = (np.all(arr[:, :, :3] == 255, axis=2)) & (arr[:, :, 3] == 255) + mask_keep = arr[:, :, 3] > 0 + elif arr.ndim == 3 and arr.shape[2] == 3: + mask_keep = np.all(arr == 255, axis=2) + else: + mask_keep = arr != 0 + + return mask_keep + + +# Conversions functions +def apply_mask_make_transparent(img: np.ndarray, mask: np.ndarray) -> np.ndarray: + """ + Removes pixels from an image according to a mask. + STRONGLY RECOMMENDED to use RGBA (H,W,4) mask with alpha channel, you can generate one from a black and white image + with make_alpha_mask_from_bw function + + Parameters: + img: input image (H,W,3) or (H,W,4) or (H,W) ~ RGB or RGBA or grayscale + mask: input mask (H,W,3) or (H,W,4) or (H,W) ~ RGB or RGBA or grayscale + Returns: + image with alpha channel applied according to the mask + 1) if the mask pixel is white (255,255,255) the corresponding image pixel is kept + 2) if the mask pixel alpha channel is 255 (opaque) the corresponding image pixel is kept + 3) otherwise the corresponding image pixel is made transparent + """ + img_rgba = to_rgba(img, *img.shape[:2]) + h, w = img_rgba.shape[:2] + mask_keep = sanitize_mask(mask) + + # This way we can try and fix the mask if the resolution is different but this should not happen normally + if mask_keep.shape != (h, w): + logger.warning( + "The mask has a different shape than the image, it will be resized using nearest neighbor" + ) + """ + https://medium.com/@epcm18/image-resampling-in-image-processing-f7b597ee78a8 + """ + mask_keep = cv2.resize( + mask_keep.astype(np.uint8), (w, h), interpolation=cv2.INTER_NEAREST + ) + + mask_keep = mask_keep.astype(bool) + inv = ~mask_keep + if inv.any(): + img_rgba[inv, :3] = 0 + img_rgba[inv, 3] = 0 + + return img_rgba + + +def invert_mask_alpha_channel(img: np.ndarray | Image.Image) -> np.ndarray: + """ + Inverts the alpha channel of an RGBA image. + 255 (opaque) becomes 0 (transparent) and viceversa. + + Parameters: + img: The input image. Can be a numpy array (int32) or a PIL Image. + Returns: + a numpy array representing the inverted maks. + """ + # convert PIL Image to numpy int32 (helper to_int32 is defined in the module) + if isinstance(img, Image.Image): + res = to_int32(img) + else: + res = img.copy() + + # If grayscale or single-channel, nothing to invert + if res.ndim == 2: + return res + + # If RGB (3 channels), no alpha to invert + if res.shape[2] != 4: + return res + + # Ensure integer type to avoid underflow on subtraction + if res.dtype != np.int32: + res = res.astype(np.int32) + + res[:, :, 3] = 255 - res[:, :, 3] + return res + + +# Stuff I like to use +def to_rgba(arr: np.ndarray, height: int, width: int) -> np.ndarray: + """ + Transforms a numpy array to RGBA format. + + Parameters: + arr: The input numpy array, which can be in grayscale (H,W), RGB (H,W,3), or RGBA (H,W,4) format. + height: The height of the image (H). + width: The width of the image (W). + Returns: + A numpy array in RGBA format (H,W,4) with dtype int32. + """ + if arr.ndim == 2: + arr = np.stack([arr, arr, arr], axis=2) + match arr.shape[2]: + case 4: + return to_int32(arr) + case 3: + alpha = np.full((height, width, 1), 255, dtype=arr.dtype) + return to_int32(np.concatenate([arr, alpha], axis=2)) + raise ValueError("Unsupported channel count") + + +def to_grayscale(arr: np.ndarray) -> np.ndarray: + """ + Transforms a numpy array to grayscale. + + Parameters: + arr: The input numpy array, which can be in grayscale (H,W), RGB (H,W,3), or RGBA (H,W,4) format. + Returns: + A numpy array in grayscale format (H,W) with dtype uint8. + """ + h, w = arr.shape[:2] + return cv2.cvtColor(to_uint8(to_rgba(arr, h, w)), cv2.COLOR_RGBA2GRAY) + + +def to_uint8(arr: np.ndarray | Image.Image) -> np.ndarray: + """ + Converts an image or array to uint8, clipping values to [0, 255]. + + Parameters: + arr: The input image (PIL) or numpy array. + Returns: + A numpy array with dtype uint8. + """ + if isinstance(arr, Image.Image): + arr = to_int32(arr) + a = np.array(arr, copy=False) + if a.dtype != np.uint8: + a = np.clip(a, 0, 255).astype(np.uint8) + return a + + +def to_int32(arr: np.ndarray | Image.Image) -> np.ndarray: + """ + Converts an image or array to int32. + + Parameters: + arr: The input image (PIL) or numpy array. + Returns: + A numpy array with dtype int32. + """ + match isinstance(arr, Image.Image): + case True: + return np.array(arr, dtype=np.int32) + case False: + return arr.astype(np.int32) + raise ValueError( + "Unsupported type" + ) # I'm proud of myself, a raised exception!!! what a well written piece of code 😂 + + +# Overlap similarity +def np_absolute_distance_image_comparison( + img0: Image.Image | np.ndarray, img1: Image.Image | np.ndarray +) -> tuple[float, int]: + """ + Computes the mean absolute distance between two images. + + Parameters: + img0: first image to compare (PIL Image or numpy array). + img1: second image to compare (PIL Image or numpy array). + Returns: + A tuple of (mean absolute distance, number of considered pixels). + A score of 0 indicates identical images. + """ + img0 = to_int32(img0) + img1 = to_int32(img1) + if img0.shape[:2] != img1.shape[:2]: + raise ValueError("Images must have the same dimensions") + + h, w = img0.shape[:2] + # rgb images will break stuff so do not remove this piece of code, we need them rgba + img0rgba = to_rgba(img0, h, w) + img1rgba = to_rgba(img1, h, w) + + # checking the alpha channel we spare only the overlapping pixels with alpha>0 in both images + # This is not really needed, but it removes some pixels that are not relevant for the comparison + # alpha_overlap = (img0rgba[:, :, 3] > 0) & (img1rgba[:, :, 3] > 0) + # n = int(np.count_nonzero(alpha_overlap)) + # if n == 0: + # return float("inf"), 0 + # me from the future: apparently this was a waste of performance, so I commented it out, it's going to stay here for + # history until someone decide it's not needed anymore (commented date:04/11/2025) + + diff = np.abs( + img0rgba[:, :, :3].astype(np.float64) - img1rgba[:, :, :3].astype(np.float64) + ).mean(axis=2) + # vals = diff[alpha_overlap] + return float(diff.mean()), np.count_nonzero(diff) + + +def np_cosine_similarity(img0: np.ndarray, img1: np.ndarray) -> float: + """ + Calculates the cosine similari ty between two images. + https://en.wikipedia.org/wiki/Cosine_similarity + https://www.geeksforgeeks.org/dbms/cosine-similarity/ + Pretty fast but not ideal for offset detection + + Parameters: + img0: The first image as a numpy array. + img1: The second image as a numpy array. + Returns: + The cosine similarity between the two images. + """ + picture1_norm = img0 / np.sqrt(np.sum(img0**2)) + picture2_norm = img1 / np.sqrt(np.sum(img1**2)) + return np.sum(picture2_norm * picture1_norm) + + +def cv2_structural_similarity(img0: np.ndarray, img1: np.ndarray) -> float: + """ + Calculates the structural similarity index (SSIM) between two images. + https://en.wikipedia.org/wiki/Structural_similarity_index_measure + https://scikit-image.org/docs/0.25.x/auto_examples/transform/plot_ssim.html + + Parameters: + img0: The first image as a numpy array. + img1: The second image as a numpy array. + Returns: + The structural similarity index (SSIM) between the two images. + """ + first_gray = to_grayscale(img0) + second_gray = to_grayscale(img1) + score, _ = structural_similarity(first_gray, second_gray, full=True) + return score + + +def cv2_match_template( + template_img: np.ndarray, image: np.ndarray, method=cv2.TM_SQDIFF_NORMED +) -> float: + """ + Matches a template image (img1) within a search image (img0) using OpenCV. + https://docs.opencv.org/4.11.0/de/da9/tutorial_template_matching.html + [WARNING] only TM_SQDIFF and TM_CCORR support mask from my understanding + Notes that SQDIFF is more sensitive to small differences and that include noise, so for noisy images avoid this + Since we're working with screen and we want the most accuracy possible SQDIFF wins + CCORR is less sensitive so it's less likely to break on photorealistic environment BUT to use this you need to switch to max_val + I use the normed one by default you can choose what you want to use but do some research/testing before changing stuff, it's pretty prone to breaking in edge cases + + Parameters: + template_img: The search image (H,W) or (H,W,3) or (H,W,4). + image: The template image (H,W) or (H,W,3) or (H,W,4). + method: The template matching method to use. Defaults to cv2.TM_SQDIFF_NORMED. + Returns: + A float between 0 and 1 indicating the similarity, where 1 is a perfect match and 0 is no match. + """ + first_gray = to_grayscale(template_img) + second_gray = to_grayscale(image) + res = cv2.matchTemplate(first_gray, second_gray, method) + min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) + return float(np.clip(1.0 - min_val, 0.0, 1.0)) + + +class SimilarityThread(Thread): + """ + Execute the similarity computation in a separate thread and store the result in self.score. + + Parameters: + img0: first image. + img1: second image. + shift: the shift applied to img1 (for logging purposes). + algorithm: function to use for comparison, should take + shift: The shift applied to img1 (for logging purposes). + algorithm: Function to use for comparison, should take two numpy arrays as input and return a float score (higher = more similar). + """ + + def __init__(self, img0, img1, shift, algorithm=cv2_match_template): + super().__init__() + self.img0: np.ndarray = img0 + self.crop1: np.ndarray = img1 + self.shift = shift + self.score = None + self.times = [] + self.scores = [] + self.n_valid = None + self.algorithm = algorithm + + def run(self): + """ + Executes the similarity algorithm. + """ + self.score = 1 - self.algorithm(self.img0, self.crop1) + logger.log(DEBUG, f"Score at shift {self.shift}: old {self.score!r}") + + def benchmark_algorithms(self): + """ + Runs multiple similarity algorithms and records their performance. + """ + time0 = datetime.datetime.now() + # ------------ absolute distance ------------ + s1, self.n_valid = np_absolute_distance_image_comparison(self.img0, self.crop1) + self.n_valid = 10 + time1 = datetime.datetime.now() + self.times.append(time1 - time0) + self.scores.append(s1) + # ------------ cosine similarity ------------ + s2 = 1 - np_cosine_similarity(self.img0, self.crop1) + time2 = datetime.datetime.now() + self.times.append(time2 - time1) + self.scores.append(s2) + # ------------ structural similarity ------------ + s3 = 1 - cv2_structural_similarity(self.img0, self.crop1) + time3 = datetime.datetime.now() + self.times.append(time3 - time2) + self.scores.append(s3) + # ------------ template matching ------------ [BEST ONE] + s4 = 1 - cv2_match_template(self.img0, self.crop1) + time4 = datetime.datetime.now() + self.times.append(time4 - time3) + self.scores.append(s4) + + +def check_benchmarked_similarity_algorithms(threads: list[SimilarityThread]) -> None: + """ + Logs average times and best scores from a list of SimilarityThread benchmarks. + + Parameters: + threads: A list of SimilarityThread instances that have run benchmarks. + """ + + def find_min(scores): + min_score = float( + "inf" + ) # if we use scores[0] it will raise an error if list is empty, just thia voids useless checks + for s in scores: + if s < min_score: + min_score = s + return min_score + + # # average times and best scores for each method + + for i, t in enumerate(threads): + time = sum((t.times[i] for t in threads), datetime.timedelta()) / len(threads) + score = find_min([t.scores[i] for t in threads]) + shift = [t.shift for t in threads if t.scores[0] == score][0] + logger.debug(f"Method {i}:\t{shift},\t{score},\t{time}") + + +# Offset detection +def cv2_phase_correlation( + img0: np.ndarray, img1: np.ndarray +) -> tuple[float, float, float]: + """ + Estimates translation between two images using phase correlation. + + Parameters: + img0: The first image as a numpy array. + img1: The second image as a numpy array. + Returns: + A tuple (dx, dy, response) representing x/y offsets and the peak response strength. + """ + # Not compatible with masks = useless + # Could try with "Windowing" (Hann Window) basically fading out the edges but the mask still need to be a rectangle + first_gray = to_grayscale(img0) + second_gray = to_grayscale(img1) + + img0_32 = np.float32(first_gray) + img1_32 = np.float32(second_gray) + + # This returns ( (x, y), response_strength ) + (dx, dy), response = cv2.phaseCorrelate(img0_32, img1_32) + + return dx, dy, response + + +def cv2_match_template_multi_axis( + img0: np.ndarray, img1: np.ndarray, method=cv2.TM_SQDIFF_NORMED +) -> tuple[int, int, float]: + """ + https://docs.opencv.org/4.11.0/de/da9/tutorial_template_matching.html + Need adjustment, since we want to find the offset this does not work as intented for now + To make this work we need to check for section of the second image + + Parameters: + img0: The first image as a numpy array. + img1: The second image as a numpy array. + method: The template matching method to use. Defaults to cv2.TM_SQDIFF_NORMED. + Returns: + A tuple (offset_x, offset_y, confidence) where: + - offset_x: The estimated horizontal offset between img0 and img1. + - offset_y: The estimated vertical offset between img0 and img1. + - confidence: A float between 0 and 1 indicating the confidence of the match, where 1 is a perfect match and 0 is no match. + """ + first_gray = to_grayscale(img0) + second_gray = to_grayscale(img1) + res = cv2.matchTemplate(first_gray, second_gray, method) + min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) + + if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]: + offset_x, offset_y = min_loc + # For SQDIFF/SQDIFF_NORMED, 0 is perfect, 1 is terrible. + confidence = 1.0 - min_val if method == cv2.TM_SQDIFF_NORMED else min_val + else: + offset_x, offset_y = max_loc + # For CCORR/COEFF_NORMED, 1 is perfect, 0 is terrible. + confidence = max_val + + return offset_x, offset_y, float(np.clip(confidence, 0.0, 1.0)) + + +def find_best_offset( + _img0: Image.Image | np.ndarray, + _img1: Image.Image | np.ndarray, + _mask: Image.Image | np.ndarray, + base_shift: int, + check_range: tuple[int, int], # min -> max + check_step: int, + direction: Direction = Direction.HORIZONTAL, + orientation: Orientation = Orientation.DESCENDING, +): + """ + Iteratively checks a range of offsets to find the most similar match between two images. + + Parameters: + _img0: image to match. + _img1: image to check. + _mask: mask for both images. + base_shift: starting shift. + check_range: range of relative offsets to check (min, max). + check_step: step size between checks. + direction: shift direction (HORIZONTAL or VERTICAL). + orientation: shift orientation (ASCENDING or DESCENDING). + Returns: + A tuple (best_shift, best_score). + """ + if isinstance(_mask, Image.Image): + _mask = np.array(_mask, dtype=np.int32) + if isinstance(_img0, Image.Image): + _img0 = np.array(_img0, dtype=np.int32) + if isinstance(_img1, Image.Image): + _img1 = np.array(_img1, dtype=np.int32) + img0 = apply_mask_make_transparent(_img0, _mask) + img1 = apply_mask_make_transparent(_img1, _mask) + + best_score = float("inf") + best_shift = base_shift + + threads = [] + now = datetime.datetime.now() + + for delta in range(check_range[0], check_range[1] + 1, check_step): + shift = base_shift + ( + delta if orientation == Orientation.DESCENDING else -delta + ) + + if direction == Direction.HORIZONTAL: + crop1 = np.roll(img1, shift, axis=1) + if orientation == Orientation.DESCENDING: + crop1[:, :shift] = 0 + else: + crop1[:, -shift:] = 0 + else: + crop1 = np.roll(img1, shift, axis=0) + if orientation == Orientation.DESCENDING: + crop1[:shift, :] = 0 + else: + crop1[-shift:, :] = 0 + + t = SimilarityThread(img0, crop1, shift) + threads.append(t) + t.start() + + for t in threads: + t.join() + + logger.log(level=logging.DEBUG, msg=f"Total time{datetime.datetime.now() - now}") + + for t in threads: + if t.score < best_score: + best_score = t.score + best_shift = t.shift + if best_shift in check_range: + logger.log( + logging.WARNING, + f"The shift is at the border of the check_range, probably the search was a failure", + ) + + logger.log(DEBUG, f"result: {best_shift} {best_score}") + + return best_shift, best_score + + +def calculate_offset( + img0: np.ndarray, img1: np.ndarray, mask: np.ndarray = None, used_detector: int = 0 +) -> tuple[float, float, float]: + """ + Finds the offset between two images using feature matching with keypoints. + + This function calculates the relative offset (translation in x and y directions) + between two input images by detecting keypoints and matching them, using one of + several feature detection techniques. Optionally, a mask can be provided to + focus on certain regions for keypoint detection and matching. + + [WARNING] do not apply the mask before the step, pass the whole images and the mask here + you will be held accountable for the problems you cause by not using the mask correctly + + Parameters: + img0: The first input image as a numpy array. + img1: The second input image as a numpy array, which will be compared to the first. + mask: Optional. A mask as a numpy array where non-zero values indicate regions to + focus on for keypoint detection and matching. + used_detector: An integer representing the detector to use. Defaults to 0. + Acceptable values are: + - 0: ORB + - 1: AKAZE + - 2: SIFT + + Returns: + A tuple (dx, dy, confidence): + - dx: float, the estimated x offset between the two images. + - dy: float, the estimated y offset between the two images. + - confidence: float, a value between 0 and 1 indicating the confidence + of the estimated offset based on inliers and the total + number of matches. + + Notes: + - I hate python indentation + - another viable detector is fast(brief) but it's not a drop in replacement like the one supported + """ + + # This is the implementation of fast, but it's more code to allow for this to be switchable + # so it's not worth it, if you need this it's easy to implement + # it's this way because fast does not have a descriptor included so we have to provide it ourself + # fast = cv2.FastFeatureDetector_create() + # brief = cv2.xfeatures2d.BriefDescriptorExtractor_create() + # kp0 = fast.detect(img0, mask_cv) + # kp1 = fast.detect(img1, mask_cv) + # kp0, des0 = brief.compute(img0_gray, kp0) + # kp1, des1 = brief.compute(img1_gray, kp1) + + match used_detector: + case 0: + detector = cv2.ORB_create() # nfeatures=2000 + case 1: + detector = cv2.AKAZE_create() + case 2: + detector = cv2.SIFT_create() # nfeatures=2000 + case _: + detector = cv2.ORB_create() # nfeatures=2000 + + img0_gray = to_grayscale(img0) + img1_gray = to_grayscale(img1) + + # Preprocess mask + mask_cv = None + if mask is not None: + m = np.array(mask) + h, w = img0_gray.shape + if m.shape[:2] != (h, w): + m = cv2.resize(to_uint8(m), (w, h), interpolation=cv2.INTER_NEAREST) + if m.ndim == 3 and m.shape[2] == 4: + mask_keep = (np.all(m[:, :, :3] == 255, axis=2)) & (m[:, :, 3] == 255) + elif m.ndim == 3 and m.shape[2] == 3: + mask_keep = np.all(m == 255, axis=2) + else: + mask_keep = m != 0 + mask_cv = mask_keep.astype(np.uint8) * 255 + + kp0, des0 = detector.detectAndCompute(img0_gray, mask_cv) + kp1, des1 = detector.detectAndCompute(img1_gray, mask_cv) + + if des0 is None or des1 is None: + return 0, 0, 0 + + bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + + match used_detector: + case 0, 1, _: + bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + case 2: + bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True) + matches = bf.match(des0, des1) + + if len(matches) < 4: + logger.warning("Not enough matches found to calculate offset.") + return 0, 0, 0 + + src_pts = np.float32([kp0[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2) + dst_pts = np.float32([kp1[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2) + + # Find the Translation Matrix (Estimate affine limited to translation) + # RANSAC filters out points that don't move in the same direction + matrix, inliers = cv2.estimateAffinePartial2D(src_pts, dst_pts, method=cv2.RANSAC) + + if matrix is not None: + # matrix is [[1, 0, tx], [0, 1, ty]] + dx = matrix[0, 2] + dy = matrix[1, 2] + confidence = np.sum(inliers) / len(matches) if len(matches) > 0 else 0 + return dx, dy, confidence + + return 0, 0, 0 + + +def visualize_orb_matches(img0, img1, mask=None, used_detector: int = 0): + """ + Visualizes keypoint matches between two images and estimates the offset. + + Parameters: + img0: The first image. + img1: The second image. + mask: Optional region-of-interest mask. + used_detector: Integer mapping to ORB (0), AKAZE (1), or SIFT (2). + Returns: + A tuple (vis_img, (dx, dy, confidence)). + """ + match used_detector: + case 0: + detector = cv2.ORB_create() + case 1: + detector = cv2.AKAZE_create() + case 2: + detector = cv2.SIFT_create() + case _: + detector = cv2.ORB_create() + + img0_gray = to_grayscale(img0) + img1_gray = to_grayscale(img1) + + # Preprocess mask + mask_cv = None + if mask is not None: + m = np.array(mask) + h, w = img0_gray.shape + if m.shape[:2] != (h, w): + m = cv2.resize(to_uint8(m), (w, h), interpolation=cv2.INTER_NEAREST) + if m.ndim == 3 and m.shape[2] == 4: + mask_keep = (np.all(m[:, :, :3] == 255, axis=2)) & (m[:, :, 3] == 255) + elif m.ndim == 3 and m.shape[2] == 3: + mask_keep = np.all(m == 255, axis=2) + else: + mask_keep = m != 0 + mask_cv = mask_keep.astype(np.uint8) * 255 + kp0, des0 = detector.detectAndCompute(img0_gray, mask_cv) + kp1, des1 = detector.detectAndCompute(img1_gray, mask_cv) + + if des0 is None or des1 is None: + return cv2.drawMatches(img0, [], img1, [], [], None), (0, 0, 0) + + match used_detector: + case 0: + bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + case 1: + bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + case 2: + bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True) + case _: + bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + + matches = bf.match(des0, des1) + matches = sorted(matches, key=lambda x: x.distance) + + if not matches: + return cv2.drawMatches(img0, kp0, img1, kp1, [], None), (0, 0, 0) + + src_pts = np.float32([kp0[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2) + dst_pts = np.float32([kp1[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2) + + matrix, inliers = cv2.estimateAffinePartial2D(src_pts, dst_pts, method=cv2.RANSAC) + + matches_mask = [] + if inliers is not None: + matches_mask = inliers.ravel().tolist() + else: + matches_mask = [0] * len(matches) + + matches_to_draw = matches[:50] + mask_to_draw = matches_mask[:50] + + draw_params = dict( + matchColor=(0, 255, 0), singlePointColor=None, matchesMask=mask_to_draw, flags=2 + ) + + vis_img = cv2.drawMatches( + img0, kp0, img1, kp1, matches_to_draw, None, **draw_params + ) + dx, dy = (matrix[0, 2], matrix[1, 2]) if matrix is not None else (0, 0) + confidence = np.sum(inliers) / len(matches) if len(matches) > 0 else 0 + return vis_img, (dx, dy, confidence) + + +def show_image_full_resolution( + img: Image.Image | np.ndarray, + title: str | None = None, + dpi: int = 100, + max_inches: float = 16.0, +): + """ + Display a PIL Image or numpy array in a matplotlib window attempting to preserve full image resolution. + Needed because using pycharm with remote development on was only matplotlib appears so I need this + + Behavior: + - If possible, opens a figure whose pixel dimensions match the image (1:1 mapping) by setting + figsize = (width/dpi, height/dpi) and figure dpi. + - If the resulting window would be larger than `max_inches` on the longest side, the image is + uniformly downscaled to fit within that constraint to avoid creating an enormous window. + - Uses interpolation='nearest' to avoid smoothing and preserve pixel-perfect rendering. + + Parameters: + img: PIL.Image.Image or numpy.ndarray + title: optional window title + dpi: DPI used for the matplotlib figure + max_inches: maximum inches allowed for the longest figure side + """ + + if isinstance(img, Image.Image): + arr = np.array(img) + else: + arr = img + + if arr is None: + raise ValueError("No image provided") + + if arr.ndim == 2: + h, w = arr.shape + else: + h, w = arr.shape[:2] + + max_px_allowed = int(dpi * max_inches) + scale = 1.0 + if max(h, w) > max_px_allowed: + scale = max_px_allowed / max(h, w) + + display_w = max(1, int(w * scale)) + display_h = max(1, int(h * scale)) + + figsize = (display_w / dpi, display_h / dpi) + + fig = plt.figure(figsize=figsize, dpi=dpi) + ax = fig.add_subplot(1, 1, 1) + ax.imshow(arr, interpolation="nearest") + ax.axis("off") + if title: + ax.set_title(title) + plt.subplots_adjust(left=0, right=1, top=1, bottom=0) + plt.show() + + +if __name__ == "__main__": + import os + + import matplotlib.pyplot as plt + + os.chdir("../resources/") + print(os.getcwd()) + local_mask = np.array(Image.open("p10lite/islandempire_mask_alpha.png")) + + # a = np.array(Image.open("cache/screenshot_0.png")) + # b = np.array(Image.open("cache/screenshot_0.png")) + # print(cv2_match_template(a, b)) + # + # a = np.array(Image.open("cache/screenshot_1.png")) + # b = np.array(Image.open("cache/screenshot_1.png")) + # a = apply_mask_make_transparent(a, mask) + # b = apply_mask_make_transparent(b, mask) + # print(cv2_match_template(a, b)) + + # a = np.array(Image.open("test_material/test_0.png")) + # b = np.array(Image.open("test_material/test_1.png")) + + a = np.array(Image.open("../utils/test_2.png")) + b = np.array(Image.open("../utils/test_3.png")) + # a = apply_mask_make_transparent(a, mask) + # b = apply_mask_make_transparent(b, mask) + # b = np.roll(b, 77, axis=1) + # b[:, :77] = 0 + # x_movement = 130 + # y_movement = 130 + # b = np.roll(b, x_movement, axis=1) + # b = np.roll(b, y_movement, axis=0) + # b[:, :x_movement] = 0 + # b[:y_movement, :] = 0 + + # Show the two images at (nearly) full resolution. If they're extremely large, they will be + # uniformly scaled down so the longest side fits within `max_inches` inches on screen. + # show_image_full_resolution(a, title="Original", dpi=100, max_inches=16.0) + # show_image_full_resolution(b, title=f"Shifted ({x_movement}px,{y_movement}px)", dpi=100, max_inches=16.0) + + for i in range(3): + start = datetime.datetime.now() + img, res = visualize_orb_matches(a, b, local_mask, i) + # res = calculate_offset(a, b, local_mask, i) + print(datetime.datetime.now() - start, res) + Image.fromarray(img).save(f"matches_{i}.png") + # visualize orb matches (may be large) using our helper + show_image_full_resolution( + img, title=f"ORB matches detector {i}", dpi=100, max_inches=16.0 + ) diff --git a/Brain/AI/src/motion_module/utils/resources_utility.py b/Brain/AI/src/motion_module/utils/resources_utility.py new file mode 100644 index 00000000..09c64cba --- /dev/null +++ b/Brain/AI/src/motion_module/utils/resources_utility.py @@ -0,0 +1,242 @@ +import os +import random +import subprocess +from contextlib import AbstractContextManager +from typing import List + +import numpy as np + +# Internal modules +from AI.src.constants import ( + CLIENT_PATH, + TAPPY_ORIGINAL_SERVER_IP, + logger, +) +from AI.src.motion_module.enums import Direction, Orientation +from AI.src.webservices.helpers import get_screenshot + +# External libraries +from PIL import Image + + +def _run_motionevent(parts: List[str] | str) -> None: + if isinstance(parts, str): + os.system(parts) + else: + subprocess.run(parts, check=True) + + +def get_custom_image_set( + orientation: Orientation = Orientation.DESCENDING, + direction: Direction = Direction.HORIZONTAL, + offset: int = 100, + step_number: int = 4, + start_x: int = 540, + start_y: int = 1200, +) -> List[Image.Image]: + if offset <= 0: + raise ValueError("offset must be positive") + if step_number < 1: + raise ValueError("step_number must be at least 1") + + end_x = start_x + end_y = start_y + images: List[Image.Image] = [] + + _run_motionevent( + ["adb", "shell", "input", "motionevent", "DOWN", str(start_x), str(start_y)] + ) + for i in range(step_number): + if direction == Direction.HORIZONTAL: + end_x = ( + start_x + offset * (i + 1) + if orientation == Orientation.DESCENDING + else start_x - offset * (i + 1) + ) + else: + end_y = ( + start_y + offset * (i + 1) + if orientation == Orientation.DESCENDING + else start_y - offset * (i + 1) + ) + _run_motionevent( + ["adb", "shell", "input", "motionevent", "MOVE", str(end_x), str(end_y)] + ) + get_screenshot( + save_path=os.getcwd(), + filename=f"screenshot.png", + ) + img = Image.open("screenshot.png") + img.load() + images.append(img) + + _run_motionevent( + ["adb", "shell", "input", "motionevent", "UP", str(end_x), str(end_y)] + ) + return images + + +def generate_steps( + start: int, step_size: int, count: int, perfect: bool, orientation: Orientation +) -> List[int]: + steps = [start] + current = start + direction = -1 if orientation == Orientation.DESCENDING else 1 + + for _ in range(count): + delta = step_size + if not perfect: + noise = random.randint(-5, 5) + delta += noise + + current += delta * direction + steps.append(current) + return steps + + +def get_image_set( + perfect: bool = True, + vertical: bool = False, + horizontal: bool = False, + orientation: Orientation = Orientation.DESCENDING, + save_location: str = "", + step_size: int = 100, + start_x: int | None = None, + start_y: int | None = None, +): + """ + Generates image set by simulating motion events and capturing screenshots, + works best if used with adb for more precise movements + [REQUIRED] a device with android 13 or higher or that support adb -> motionevent + [WARNING] if save_location is provided it should end with "/" + """ + if start_x is None: + start_x = 200 if horizontal and orientation == Orientation.ASCENDING else 600 + if start_y is None: + start_y = 1300 if vertical and orientation == Orientation.ASCENDING else 1700 + + count = 5 + + if not vertical and not horizontal: + raise ValueError("At least one of vertical or horizontal must be True") + + # if vertical: + # match (orientation, perfect): + # case (Orientation.DESCENDING, True): y_steps = [1700, 1600, 1500, 1400, 1300] + # case (Orientation.ASCENDING, True): y_steps = [1300, 1400, 1500, 1600, 1700] + # case (Orientation.DESCENDING, False): y_steps = [1700, 1597, 1491, 1394, 1289] + # case (Orientation.ASCENDING, False): y_steps = [1300, 1397, 1491, 1594, 1689] + # if horizontal: + # match (orientation, perfect): + # case (Orientation.DESCENDING, True): x_steps = [600, 500, 400, 300, 200] + # case (Orientation.ASCENDING, True): x_steps = [200, 300, 400, 500, 600] + # case (Orientation.DESCENDING, False): x_steps = [600, 497, 391, 294, 189] + # case (Orientation.ASCENDING, False): x_steps = [200, 297, 391, 494, 589] + + # actions = [ + # ["adb", "shell", "input", "motionevent", "DOWN", str(x_steps[0]), str(y_steps[0])], + # ["adb", "shell", "input", "motionevent", "MOVE", str(x_steps[1]), str(y_steps[1])], + # ["adb", "shell", "input", "motionevent", "MOVE", str(x_steps[2]), str(y_steps[2])], + # ["adb", "shell", "input", "motionevent", "MOVE", str(x_steps[3]), str(y_steps[3])], + # ["adb", "shell", "input", "motionevent", "MOVE", str(x_steps[4]), str(y_steps[4])], + # ["adb", "shell", "input", "motionevent", "MOVE", str(x_steps[0]), str(y_steps[0])], + # ["adb", "shell", "input", "motionevent", "UP", str(x_steps[0]), str(y_steps[0])], + # ] + + x_steps = [start_x] * count + y_steps = [start_y] * count + + if vertical: + y_steps = generate_steps(start_y, step_size, count, perfect, orientation) + if horizontal: + x_steps = generate_steps(start_x, step_size, count, perfect, orientation) + + actions = [ + f"python3 client3.py --url http://{TAPPY_ORIGINAL_SERVER_IP}:8000 --light 'swipe {start_x} {start_y} {end_x} {end_y}'" + for start_x, start_y, end_x, end_y in zip( + x_steps, y_steps, x_steps[1:], y_steps[1:] + ) + ] + actions.append( + f"python3 client3.py --url http://{TAPPY_ORIGINAL_SERVER_IP}:8000 --light 'swipe {x_steps[-1]} {y_steps[-1]} {x_steps[0]} {y_steps[0]}'" + ) + + image_prefix = f"{'i_' if not perfect else ''}{'v_' if vertical else ''}{'h_' if horizontal else ''}{'r_' if orientation != Orientation.DESCENDING else ''}" + + get_screenshot(save_path=save_location, filename=f"{image_prefix}screenshot_0.png") + for index, action in enumerate(actions[1:]): + with DoStuffElsewhere(CLIENT_PATH): + _run_motionevent(action) + get_screenshot( + save_path=save_location, + filename=f"{image_prefix}screenshot_{index + 1}.png", + ) + _run_motionevent(actions[-1]) + + +def make_alpha_mask_from_bw(_img: Image.Image, name: str = "ignoreZone") -> Image.Image: + img = _img.convert("RGBA") + arr = np.array(img) + # mask True per pixel completamente bianchi (R=G=B=A=255) + white_mask = np.all(arr == 255, axis=2) + arr[~white_mask, 3] = 0 + result = Image.fromarray(arr) + result.save(f"{name}.png") + return result + + +class DoStuffElsewhere(AbstractContextManager): + """ + Allows you to run code in a different directory + When you use with 'with', it will restore the previous directory on exit + """ + + def __exit__(self, exc_type, exc_value, traceback): + os.chdir(self.old_directory) + + def __enter__(self): + self.old_directory = os.getcwd() + try: + os.chdir(self.location) + except Exception as e: + logger.error(f"Failed to change directory to {self.location}: {e}") + raise e + return self + + def __init__(self, location: str) -> None: + self.location = location + self.old_directory: str + + +if __name__ == "__main__": + import argparse + import sys + + parser = argparse.ArgumentParser() + parser.add_argument( + "--bw_image", + type=str, + help="Path to the black and white image to generate the alpha mask from", + ) + parser.add_argument( + "--save_location", + type=str, + default="", + help="Path to save the generated alpha mask", + ) + parser.add_argument( + "--name", + type=str, + default="ignoreZone", + help="Name of the generated alpha mask", + ) + args = parser.parse_args() + if args.bw_image: + make_alpha_mask_from_bw( + Image.open(args.bw_image), + args.name + if args.save_location == "" + else f"{args.save_location}/{args.name}", + ) + logger.info("Alpha mask generated successfully") + sys.exit(0) diff --git a/Brain/AI/src/resources/screenshot/screenshot.png b/Brain/AI/src/resources/screenshot/screenshot.png deleted file mode 100644 index 9c71d9dd..00000000 Binary files a/Brain/AI/src/resources/screenshot/screenshot.png and /dev/null differ diff --git a/Brain/AI/src/vision/scrcpy/tests/compare_adb_screencap_speed_to_memory.bash b/Brain/AI/src/vision/scrcpy/tests/compare_adb_screencap_speed_to_memory.bash new file mode 100644 index 00000000..822e77bb --- /dev/null +++ b/Brain/AI/src/vision/scrcpy/tests/compare_adb_screencap_speed_to_memory.bash @@ -0,0 +1,12 @@ +#!/bin/bash + +echo -n "Png: " +time adb exec-out "screencap -p" > /dev/null + +echo -n "Raw: " +time adb exec-out "screencap" > /dev/null + +for lv in 1 2 3 4 5 6 7 8 9; do + echo -n "Level -$lv: " + time adb exec-out "screencap | gzip -$lv" > /dev/null +done diff --git a/Brain/AI/src/vision/scrcpy/tests/compare_video_feed_quality.bash b/Brain/AI/src/vision/scrcpy/tests/compare_video_feed_quality.bash new file mode 100755 index 00000000..bd0a36c7 --- /dev/null +++ b/Brain/AI/src/vision/scrcpy/tests/compare_video_feed_quality.bash @@ -0,0 +1,24 @@ +#!/bin/bash + +echo "🎥 Starting scrcpy recording for 3 seconds..." +scrcpy --video-codec=h265 --video-encoder=c2.qti.hevc.encoder --record=output_h265.mkv --video-bit-rate=100M --no-audio --time-limit=3 & + +sleep 1.5 + +echo "📸 Capturing ground truth..." +adb exec-out screencap -p > ground_truth.png + +echo "⏳ Waiting for scrcpy to finalize the video..." +wait + +echo "🎞️ Extracting video frame..." +# -ss 00:00:01.500 seeks to the 1.5-second mark to match our screencap timing +ffmpeg -y -ss 00:00:01.500 -i output_h265.mkv -frames:v 1 h265_frame.png -loglevel warning + +echo "🗑️ Cleaning up video file..." +rm output_h265.mkv + +echo "🔍 Generating difference map..." +compare ground_truth.png h265_frame.png difference_map.png + +echo "✅ Done! Check difference_map.png." diff --git a/Brain/AI/src/vision/scrcpy/tests/test_ram_video_feed.py b/Brain/AI/src/vision/scrcpy/tests/test_ram_video_feed.py new file mode 100644 index 00000000..6d7547bd --- /dev/null +++ b/Brain/AI/src/vision/scrcpy/tests/test_ram_video_feed.py @@ -0,0 +1,102 @@ +import subprocess +import threading +import time +import cv2 +import av +# Require PyAv + +class ScrcpyStream: + def __init__(self): + self.latest_frame = None + self.running = True + + print("Starting pure-RAM Scrcpy process...") + + self.process = subprocess.Popen( + [ + "scrcpy", + "--no-playback", + "--no-audio", + "--max-fps=15", + "--verbosity=error", + "--record-format=mkv", + "--record=-" + ], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL + ) + + self.thread = threading.Thread(target=self._stream_loop, daemon=True) + self.thread.start() + + time.sleep(2) + + def _stream_loop(self): + try: + container = av.open(self.process.stdout) + stream = container.streams.video[0] + + for frame in container.decode(stream): + if not self.running: + break + self.latest_frame = frame.to_ndarray(format='bgr24') + + except Exception as e: + if self.running: + print(f"\n[Stream Error] Connection lost or failed: {e}") + + def get_frame(self): + return self.latest_frame + + def stop(self): + self.running = False + if self.process: + self.process.terminate() + self.process.wait() + + if hasattr(self, 'thread') and self.thread.is_alive(): + self.thread.join(timeout=2) + + +def main(): + stream = ScrcpyStream() + + if stream.process.poll() is not None: + print("Error: scrcpy failed to start. Is the device connected?") + return + + print("Scrcpy stream connected and decoding entirely in memory!") + + try: + while True: + cmd = input("\nPress [ENTER] to capture a frame (or type 'q' to quit): ") + + if cmd.lower() == 'q': + break + + frame = stream.get_frame() + + if frame is not None: + print("Frame captured! Focus the image window and press any key to close it.") + + # height, width = frame.shape[:2] + # if height > 1080: + # scale = 1080 / height + # frame = cv2.resize(frame, (int(width * scale), int(height * scale))) + # + cv2.imshow("Captured Frame", frame) + cv2.waitKey(0) + cv2.destroyWindow("Captured Frame") + else: + print("Stream is still buffering or device is off, try again in a second.") + + except KeyboardInterrupt: + print("\nExiting...") + finally: + print("Shutting down scrcpy stream...") + stream.stop() + cv2.destroyAllWindows() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/Brain/AI/src/vision/scrcpy/tests/test_usb_speed.bash b/Brain/AI/src/vision/scrcpy/tests/test_usb_speed.bash new file mode 100755 index 00000000..c65ff100 --- /dev/null +++ b/Brain/AI/src/vision/scrcpy/tests/test_usb_speed.bash @@ -0,0 +1,6 @@ +#!/bin/bash +# 1. Create a 100MB dummy file +dd if=/dev/urandom of=test_file.bin bs=1M count=100 + +# 2. Push it to the phone to check the speed +adb push test_file.bin /sdcard/ diff --git a/Brain/AI/src/vision/scrcpy/tests/test_video_feed.py b/Brain/AI/src/vision/scrcpy/tests/test_video_feed.py new file mode 100644 index 00000000..dcd7e279 --- /dev/null +++ b/Brain/AI/src/vision/scrcpy/tests/test_video_feed.py @@ -0,0 +1,112 @@ +import os +import tempfile +import subprocess +import threading +import time +import cv2 +import av +# Require PyAv + +class ScrcpyStream: + def __init__(self): + self.latest_frame = None + self.running = True + + self.fifo_path = os.path.join(tempfile.gettempdir(), "scrcpy_video.mkv") + + if os.path.exists(self.fifo_path): + os.remove(self.fifo_path) + os.mkfifo(self.fifo_path) + + print("Starting headless Scrcpy process (v3.x compatible)...") + + self.process = subprocess.Popen( + [ + "scrcpy", + "--no-playback", + "--no-audio", + "--max-fps=15", + "--record-format=mkv", + f"--record={self.fifo_path}" + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL + ) + + self.thread = threading.Thread(target=self._stream_loop, daemon=True) + self.thread.start() + + time.sleep(2) + + def _stream_loop(self): + try: + container = av.open(self.fifo_path) + stream = container.streams.video[0] + + for frame in container.decode(stream): + if not self.running: + break + self.latest_frame = frame.to_ndarray(format='bgr24') + + except Exception as e: + if self.running: + print(f"\n[Stream Error] Connection lost or failed: {e}") + + def get_frame(self): + return self.latest_frame + + def stop(self): + self.running = False + if self.process: + self.process.terminate() + self.process.wait() + + if os.path.exists(self.fifo_path): + os.remove(self.fifo_path) + + if hasattr(self, 'thread') and self.thread.is_alive(): + self.thread.join(timeout=2) + + +def main(): + stream = ScrcpyStream() + + if stream.process.poll() is not None: + print("Error: scrcpy failed to start. Is the device connected?") + return + + print("Scrcpy stream connected and decoding in background!") + + try: + while True: + cmd = input("\nPress [ENTER] to capture a frame (or type 'q' to quit): ") + + if cmd.lower() == 'q': + break + + frame = stream.get_frame() + + if frame is not None: + print("Frame captured! Focus the image window and press any key to close it.") + + # height, width = frame.shape[:2] + # if height > 1080: + # scale = 1080 / height + # frame = cv2.resize(frame, (int(width * scale), int(height * scale))) + # + cv2.imshow("Captured Frame", frame) + cv2.waitKey(0) + cv2.destroyWindow("Captured Frame") + else: + print("Stream is still buffering or device is off, try again in a second.") + + except KeyboardInterrupt: + print("\nExiting...") + finally: + print("Shutting down scrcpy stream...") + stream.stop() + cv2.destroyAllWindows() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/Brain/AI/src/webservices/helpers.py b/Brain/AI/src/webservices/helpers.py index f80a12d5..f30afcdf 100644 --- a/Brain/AI/src/webservices/helpers.py +++ b/Brain/AI/src/webservices/helpers.py @@ -1,32 +1,120 @@ +import gzip import os -import requests +import struct import subprocess -from AI.src.constants import SCREENSHOT_PATH, USE_ADB +import numpy as np +import requests +from AI.src.constants import ( + CLIENT_PATH, + SCREENSHOT_PATH, + TAPPY_ORIGINAL_SERVER_IP, + USE_ADB, + logger, +) +from PIL import Image -def getScreenshot(url = None, port = None) -> bool: +def getScreenshot(url=None, port=None) -> bool: if USE_ADB: return require_image_from_adb() else: return require_image_from_url(url, port) -def require_image_from_url(url, port) -> None: + +def get_screenshot( + url=None, + port=None, + to_memory: bool = False, + save_path: str = None, + filename: str = "screenshot.png", +) -> bool | np.ndarray: + """ + Retrieves a screenshot from the device using either adb or a web request + + Parameters: + url: the url of the server + port: the port of the server + to_memory: if True, saves the screenshot to memory, else saves it to a file + save_path: the path to save the screenshot if to_memory is False, if None, uses the default SCREENSHOT_PATH + filename: the name of the screenshot file if to_memory is False, if None, uses "screenshot.png" + Returns: a boolean with the state of the screenshot if to_memory is false, else an np.ndarray with the image data, empty or False is something went wrong + """ + + if save_path is not None and save_path[-1] != "/": + save_path += "/" + + try: + match (USE_ADB, to_memory): + case (True, True): + return run_adb_screencap_to_memory() + case (True, False): + return run_adb_screencap_to_memory( + file_path=SCREENSHOT_PATH if save_path is None else save_path, + filename=filename, + ) + case (False, True): + return require_image_from_url_to_memory(url=url, port=port) + case (False, False): + return require_image_from_url( + url=url, + port=port, + save_path=SCREENSHOT_PATH, + filename=filename, + ) + except Exception as e: + logger.info(f"Error getting screenshot: {e}") + return False + + +def require_image_from_url( + url, port, save_path: str = None, filename: str = "screenshot.png" +) -> bool: + if save_path is None: + save_path = SCREENSHOT_PATH + if not os.path.exists(save_path): + raise Exception(f"The directory {save_path} does not exist") response = requests.get(f"http://{url}:{port}/?name=requestimage") - if response.status_code!=200: - print("The screenshot can not be taken: is the ScreenshotServer running on the device?") + if response.status_code != 200: + print( + "The screenshot can not be taken: is the ScreenshotServer running on the device?" + ) return False - file = open(os.path.join(SCREENSHOT_PATH, 'screenshot.png'), "wb") + file = open(os.path.join(save_path, filename), "wb") file.write(response.content) file.close() return True -def require_image_from_adb() -> bool: - adb_command = f"adb exec-out screencap -p > {SCREENSHOT_PATH}/screenshot.png" + +def require_image_from_url_to_memory(url: str, port: str) -> np.ndarray | bool: + """ + Takes a screenshot from the device using a web request [⚠️ UNTESTED] + + Parameters: + url: the url of the server + port: the port of the server + """ + response = requests.get(f"http://{url}:{port}/?name=requestimage") + if response.status_code != 200: + print( + "The screenshot can not be taken: is the ScreenshotServer running on the device?" + ) + return False + return np.array(response.content) + + +def require_image_from_adb( + save_path: str = None, filename: str = "screenshot.png" +) -> bool: + if save_path is None: + save_path = SCREENSHOT_PATH + if not os.path.exists(save_path): + raise Exception(f"The directory {save_path} does not exist") + adb_command = f"adb exec-out screencap -p > {save_path}/{filename}" try: - # Execute the adb command + # Execute the adb command subprocess.run(adb_command, shell=True, check=True) - print(f"Screenshot saved to {SCREENSHOT_PATH}") + print(f"Screenshot saved to {save_path}") return True except subprocess.CalledProcessError as e: print(f"Error executing adb command: {e}") @@ -34,4 +122,114 @@ def require_image_from_adb() -> bool: return False except Exception as e: print(f"An error occurred: {e}") - return False \ No newline at end of file + return False + + +def run_adb_screencap_to_memory( + slow_usb: bool = True, file_path: str = None, filename: str = "screenshot.png" +) -> np.ndarray: + """ + Takes a screenshot from the device using adb + https://stackoverflow.com/questions/43900380/faster-command-than-adb-shell-screencap + + Parameters: + slow_usb: if True, uses adb exec-out screencap | gzip -1 to save bandwidth, + otherwise uses adb exec-out screencap to save cpu cycles + file_path: the path to save the screenshot + filename: the name of the screenshot file + Returns: + np.ndarray: image data in RGBA format + """ + + if slow_usb: + cmd = [ + "adb", + "exec-out", + 'sh -c "screencap | gzip -2"', + ] # can be changed for each device, I found -2 to be the best + else: + cmd = ["adb", "exec-out", "screencap"] + + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + if slow_usb: + stream = gzip.GzipFile(fileobj=process.stdout, mode="rb") + else: + stream = process.stdout + + header = stream.read(12) + if len(header) < 12: + raise Exception("Failed to read image header") + + width, height, pixel_format = struct.unpack("