unify dataframe structure for single/multi model runs

marwan37 · marwan37 · commit b729a95e5e51 · 2025-04-09T14:39:14.000-05:00
diff --git a/omni-reader/steps/evaluate_models.py b/omni-reader/steps/evaluate_models.py
@@ -46,16 +46,20 @@ def evaluate_models(
     Returns:
         HTML visualization of the evaluation results
     """
-    if model_results is None or len(model_results.columns) == 0:
-        raise ValueError("At least one model is required for evaluation")
+    if model_results is None or model_results.is_empty():
+        raise ValueError("Model results are required for evaluation")
 
     if ground_truth_df is None or ground_truth_df.is_empty():
         raise ValueError("Ground truth data is required for evaluation")
 
     gt_df = ground_truth_df
 
+    # --- 1. Extract unique model names from the flat DataFrame structure ---
+    model_keys = model_results["model_name"].unique().to_list()
+    if not model_keys:
+        raise ValueError("No model names found in model_results")
+
     # --- 2. Build model info for evaluation models ---
-    model_keys = list(model_results.columns)
     model_info = {}
     model_displays = []
     model_prefixes = {}
@@ -65,11 +69,11 @@ def evaluate_models(
         model_displays.append(display)
         model_prefixes[display] = prefix
 
-    # --- 3. Convert DataFrame rows to dictionaries ---
+    # --- 3. Split model results by model ---
     model_results_dict = {}
     for model_name in model_keys:
-        model_data = model_results[model_name].to_dicts()
-        model_results_dict[model_name] = pl.DataFrame(model_data)
+        model_data = model_results.filter(pl.col("model_name") == model_name)
+        model_results_dict[model_name] = model_data
 
     # --- 4. Merge evaluation models' results ---
     base_model = model_keys[0]
@@ -113,7 +117,7 @@ def evaluate_models(
 
     # Check if we have ground truth data in our joined dataset
     if gt_text_col not in merged_results.columns and "raw_text_gt" in merged_results.columns:
-        gt_text_col = "raw_text_gt"  # Fall back to legacy ground truth model format
+        gt_text_col = "raw_text_gt"
 
     for row in merged_results.iter_rows(named=True):
         if gt_text_col not in row:
diff --git a/omni-reader/utils/visualizations.py b/omni-reader/utils/visualizations.py
@@ -385,10 +385,16 @@ def create_summary_visualization(
 
 
 def create_ocr_batch_visualization(df: pl.DataFrame) -> HTMLString:
-    """Create an HTML visualization of batch OCR processing results."""
-    # Extract metrics
+    """Create an HTML visualization of batch OCR processing results.
+
+    Args:
+        df: DataFrame containing OCR results (flattened for single/multi-model runs)
+
+    Returns:
+        HTMLString: HTML visualization of batch OCR processing results
+    """
+    # Calculate overall metrics
     total_results = len(df)
-    # Ensure all raw_text values are strings
     raw_texts = []
     for txt in df["raw_text"].to_list():
         if isinstance(txt, list):
@@ -400,14 +406,13 @@ def create_ocr_batch_visualization(df: pl.DataFrame) -> HTMLString:
     total_proc_time = df["processing_time"].sum() if "processing_time" in df.columns else 0
     avg_proc_time = df["processing_time"].mean() if "processing_time" in df.columns else 0
 
-    # Get model-specific metrics
+    # Calculate model-specific metrics
     model_metrics = {}
     model_displays = []
 
     if "model_name" in df.columns:
         for model in df["model_name"].unique().to_list():
             mdf = df.filter(pl.col("model_name") == model)
-            # Ensure all model-specific raw_text values are strings
             m_raw_texts = []
             for txt in mdf["raw_text"].to_list():
                 if isinstance(txt, list):
@@ -523,8 +528,6 @@ def create_ocr_batch_visualization(df: pl.DataFrame) -> HTMLString:
         if isinstance(raw_text, list):
             raw_text = "\n".join(raw_text)
         text_preview = str(raw_text)[:100] + ("..." if len(str(raw_text)) > 100 else "")
-
-        # Calculate the length properly
         text_length = len(str(raw_text)) if raw_text is not None else 0
 
         table_rows += f"""