add test against YOLO

cflinto · cflinto · commit d2a80dc59cb8 · 2025-06-02T15:27:14.000+02:00
diff --git a/fps_demo/viewer.py b/fps_demo/viewer.py
@@ -165,34 +165,62 @@ def main():
             rect_pos[1] -= 1
         if keys[pygame.K_SPACE]:
             # compute the prediction, we get the same array but with different estimated w,h in cm (two last columns)
-            prediction = np.zeros((YOLO_data.shape[0], 7))
+            # prediction = np.zeros((YOLO_data.shape[0], 7))
+            # for i, line in enumerate(YOLO_data):
+            #     class_id, x_c, y_c, w, h, true_w, true_h = line
+            #     prediction[i, 0] = class_id
+            #     prediction[i, 1] = x_c
+            #     prediction[i, 2] = y_c
+
+            #     if class_id != 0:
+            #         print(f"Skipping class {class_id} at line {i}")
+            #         continue
+
+            #     # Convert to pixel coordinates
+            #     rect_x_estimation = int((x_c-w/2) * camera.resolution_px[0])
+            #     rect_y_estimation = int((y_c-h/2) * camera.resolution_px[1])
+            #     rect_width_estimation = int(w * camera.resolution_px[0])
+            #     rect_height_estimation = int(h * camera.resolution_px[1])
+
+            #     # Estimate physical size in cm
+            #     largest_x, largest_y = get_largest_diameters(camera, rect_x_estimation, rect_y_estimation, rect_width_estimation, rect_height_estimation)
+            #     width_cm = largest_x * 100
+            #     height_cm = largest_y * 100
+
+            #     prediction[i, 3] = width_cm
+            #     prediction[i, 4] = height_cm
+            #     prediction[i, 5] = true_w
+            #     prediction[i, 6] = true_h
+
+            #     print(f'line {i}: predicted w={width_cm:.1f} cm, h={height_cm:.1f} cm, actual w={true_w:.1f} cm, h={true_h:.1f} cm')
+            
+            # Above: old wrong version
+            
+            prediction = []
             for i, line in enumerate(YOLO_data):
                 class_id, x_c, y_c, w, h, true_w, true_h = line
-                prediction[i, 0] = class_id
-                prediction[i, 1] = x_c
-                prediction[i, 2] = y_c
-
                 if class_id != 0:
                     print(f"Skipping class {class_id} at line {i}")
                     continue
 
-                # Convert to pixel coordinates
-                rect_x_estimation = int((x_c-w/2) * camera.resolution_px[0])
-                rect_y_estimation = int((y_c-h/2) * camera.resolution_px[1])
+                rect_x_estimation = int((x_c - w / 2) * camera.resolution_px[0])
+                rect_y_estimation = int((y_c - h / 2) * camera.resolution_px[1])
                 rect_width_estimation = int(w * camera.resolution_px[0])
                 rect_height_estimation = int(h * camera.resolution_px[1])
 
-                # Estimate physical size in cm
-                largest_x, largest_y = get_largest_diameters(camera, rect_x_estimation, rect_y_estimation, rect_width_estimation, rect_height_estimation)
+                largest_x, largest_y = get_largest_diameters(
+                    camera, rect_x_estimation, rect_y_estimation,
+                    rect_width_estimation, rect_height_estimation
+                )
                 width_cm = largest_x * 100
                 height_cm = largest_y * 100
 
-                prediction[i, 3] = width_cm
-                prediction[i, 4] = height_cm
-                prediction[i, 5] = true_w
-                prediction[i, 6] = true_h
+                prediction.append([
+                    class_id, x_c, y_c,
+                    width_cm, height_cm,
+                    true_w, true_h
+                ])
 
-                print(f'line {i}: predicted w={width_cm:.1f} cm, h={height_cm:.1f} cm, actual w={true_w:.1f} cm, h={true_h:.1f} cm')
 
             # Now compute error stats
             df = pd.DataFrame(prediction, columns=[
diff --git a/tests/test_against_YOLO.py b/tests/test_against_YOLO.py
@@ -0,0 +1,74 @@
+import numpy as np
+import pandas as pd
+from core_geometry.camera_model import Camera
+from core_geometry.intersection import get_largest_diameters
+
+def test_physical_size_estimation_accuracy(include_non_plastic=False):
+    df = pd.read_csv("data/yolo_groundtruth_data.csv")
+
+    if not include_non_plastic:
+        df = df[df["class"] == 0]  # Only class 0
+
+    WIDTH, HEIGHT = 192 * 4, 108 * 4
+    f_mm = 4.6
+    sensor_w_mm = 5.6
+    sensor_h_mm = 3.2
+    CAMERA_HEIGHT = 2.12
+
+    camera = Camera((WIDTH, HEIGHT), position=[0.0, CAMERA_HEIGHT, 0.0])
+    camera.set_focal_length_and_sensor(f_mm / 1000, (sensor_w_mm / 1000, sensor_h_mm / 1000))
+    camera.yaw_deg = 0.0
+    camera.pitch_deg = -25.5
+
+    predictions = []
+
+    for _, row in df.iterrows():
+        x_c, y_c, w, h, true_w, true_h = row[["x_c", "y_c", "w", "h", "true_width_cm", "true_height_cm"]]
+        rect_x = int((x_c - w / 2) * camera.resolution_px[0])
+        rect_y = int((y_c - h / 2) * camera.resolution_px[1])
+        rect_width = int(w * camera.resolution_px[0])
+        rect_height = int(h * camera.resolution_px[1])
+        
+        largest_x, largest_y = get_largest_diameters(camera, rect_x, rect_y, rect_width, rect_height)
+        pred_w = largest_x * 100
+        pred_h = largest_y * 100
+
+        predictions.append((true_w, true_h, pred_w, pred_h))
+
+        # # If we always predict (0,0), we get the following:
+        # predictions.append((true_w, true_h, 0, 0))
+        # # MAE Width (cm): 28.31, MAE Height (cm): 7.97
+        # # Standard Deviation Width (cm): 4.06, Standard Deviation Height (cm): 1.24
+        # # Average Relative Error Width: 1.00, Average Relative Error Height: 1.00
+        # # MAE Width (cm): 83.32, MAE Height (cm): 18.61
+        # # Standard Deviation Width (cm): 92.00, Standard Deviation Height (cm): 17.82
+        # # Average Relative Error Width: 1.00, Average Relative Error Height: 1.00
+        # # Hence, the test should at least ensure values lower than these.
+
+    arr = np.array(predictions)
+    err_w = arr[:, 2] - arr[:, 0]
+    err_h = arr[:, 3] - arr[:, 1]
+
+    mae_w = np.mean(np.abs(err_w))
+    mae_h = np.mean(np.abs(err_h))
+    std_err_h = np.std(err_h, ddof=1)
+    std_err_w = np.std(err_w, ddof=1)
+    avg_rel_err_w = np.mean(np.abs(err_w / arr[:, 0]))
+    avg_rel_err_h = np.mean(np.abs(err_h / arr[:, 1]))
+
+    # # Uncomment if you want to see the results
+    # print(f"MAE Width (cm): {mae_w:.2f}, MAE Height (cm): {mae_h:.2f}")
+    # print(f"Standard Deviation Width (cm): {std_err_w:.2f}, Standard Deviation Height (cm): {std_err_h:.2f}")
+    # print(f"Average Relative Error Width: {np.mean(np.abs(err_w / arr[:, 0])):.2f}, Average Relative Error Height: {np.mean(np.abs(err_h / arr[:, 1])):.2f}")
+    
+    assert mae_w < 10, "MAE for width too high"
+    assert mae_h < 5, "MAE for height too high"
+    assert std_err_w < 15, "Standard deviation for width too high"
+    assert std_err_h < 5, "Standard deviation for height too high"
+    assert avg_rel_err_w < 0.15, "Average relative error for width too high"
+    assert avg_rel_err_h < 0.15, "Average relative error for height too high"
+
+if __name__ == "__main__":
+    test_physical_size_estimation_accuracy()
+    test_physical_size_estimation_accuracy(include_non_plastic=True)
+    print("Test passed successfully, results do not diverge too much from YOLO ground truth data.")