edge case for pandas geodataframe concat

weecology · Mar 4, 2025 · 31277ef · 31277ef
1 parent c4485a3
commit 31277ef
Show file tree

Hide file tree

Showing 6 changed files with 21 additions and 30 deletions.
diff --git a/src/classification.py b/src/classification.py
@@ -97,8 +97,6 @@ def train(model, train_dir, val_dir, comet_logger=None, fast_dev_run=False, max_
             comet_logger.experiment.log_image(image_path, name=f"{label_name}_{image_name}")
             label_count[label_name] += 1
 
-    #with comet_logger.experiment.context_manager("classification"):
-
     model.trainer.fit(model)
 
     # Compute confusion matrix and upload to cometml

diff --git a/src/pipeline.py b/src/pipeline.py
@@ -26,6 +26,10 @@ def __init__(self, cfg: DictConfig):
 
         self.comet_logger = CometLogger(project_name=self.config.comet.project, workspace=self.config.comet.workspace)
         self.comet_logger.experiment.add_tag("pipeline")
+        flight_name = os.path.basename(self.config.label_studio.images_to_annotate_dir)
+        self.comet_logger.experiment.add_tag(flight_name)
+        self.comet_logger.experiment.log_parameters(self.config)
+        self.comet_logger.experiment.log_parameter("flight_name", flight_name)
 
     def save_model(self, model, directory):
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

diff --git a/src/pipeline_evaluation.py b/src/pipeline_evaluation.py
@@ -8,6 +8,7 @@
 import torch
 from torchvision.ops.boxes import box_iou
 from torchvision.models.detection._utils import Matcher
+import geopandas as gpd
 
 import os
 
@@ -63,11 +64,7 @@ def __init__(self, model, crop_model, image_dir, detect_ground_truth_dir, classi
             (self.classification_annotations.xmax == 0) &
             (self.classification_annotations.ymax == 0)
             )
-        ]
-
-        # Prediction container
-        self.predictions = []
-
+        ]        
         self.confident_predictions, self.uncertain_predictions = self.predict_classification()
         self.num_classes = len(self.classification_annotations["label"].unique())
 
@@ -214,7 +211,6 @@ def _evaluate_classification(self, predictions, accuracy_metric):
             if len(pred["labels"]) == 0:
                 continue
             matches = self.match_predictions_and_targets(pred, target)
-            self.predictions.append(matches)
             if matches.empty:
                 continue
             accuracy_metric.update(preds=torch.tensor(matches["pred"].values), target=torch.tensor(matches["target"].values))
@@ -243,17 +239,14 @@ def evaluate_detection(self):
             patch_overlap=self.patch_overlap, 
         )
 
-        combined_predictions = pd.concat(predictions)
-        combined_predictions["workflow"] = "detection"
-        self.predictions.append(combined_predictions)
-
-        # Remove empty predictions, needs to be confirmed for edge cases
+        combined_predictions = gpd.GeoDataFrame(pd.concat(predictions))
+
+        # When you concat geodataframes, you get pandas dataframes
         combined_predictions = combined_predictions[~combined_predictions["score"].isna()]
 
+        # Check if geometry is string or polygon
         combined_predictions = read_file(combined_predictions, self.image_dir)
-        ground_truth = self.detection_annotations
-        if "geometry" not in ground_truth.columns:
-            ground_truth = read_file(ground_truth, self.image_dir)
+        ground_truth = read_file(self.detection_annotations, self.image_dir)
 
         iou_results = evaluate_boxes(
             combined_predictions,

diff --git a/submit.sh b/submit.sh
@@ -15,4 +15,4 @@
 source activate BOEM
 
 cd ~/BOEM/
-srun python main.py check_annotations=True active_learning.pool_limit=10000 active_testing.n_images=100 active_learning.n_images=200 
+srun python main.py check_annotations=True active_learning.pool_limit=10 active_testing.n_images=1 active_learning.n_images=1 ++classification_model.trainer.fast_dev_run=True ++detection_model.trainer.train.fast_dev_run=True debug=True
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
@@ -3,6 +3,7 @@
 import pytest
 import os
 import torch
+from tests.conftest import config
 
 # Local imports
 from src import label_studio

diff --git a/tests/test_pipeline_evaluation.py b/tests/test_pipeline_evaluation.py
@@ -1,6 +1,7 @@
 from src.pipeline_evaluation import PipelineEvaluation
 from deepforest import main
 from deepforest.model import CropModel
+from deepforest.utilities import read_file
 import pytest
 import pandas as pd
 import numpy as np
@@ -17,21 +18,13 @@ def __init__(self, label_dict, random=False):
 
         def predict_tile(self, raster_path, patch_size=450, patch_overlap=0, return_plot=False, crop_model=None):
             # Return realistic predictions based on image name
-            if "empty" in raster_path.lower():
-                return pd.DataFrame({
-                    'xmin': [None],
-                    'ymin': [None],
-                    'xmax': [None],
-                    'ymax': [None],
-                    'label': [None],
-                    'score': [None],
-                    "image_path": [os.path.basename(raster_path)]
-                })
+            if "empty" in raster_path:
+                return None
 
             # If random, Generate 1-3 random predictions for non-empty images
             if self.random:
                 num_predictions = np.random.randint(1, 4)
-                return pd.DataFrame({
+                df = pd.DataFrame({
                         'xmin': np.random.randint(0, 800, num_predictions),
                         'ymin': np.random.randint(0, 600, num_predictions),
                         'xmax': np.random.randint(800, 1000, num_predictions),
@@ -41,10 +34,13 @@ def predict_tile(self, raster_path, patch_size=450, patch_overlap=0, return_plot
                         'score': np.random.uniform(0.1, 0.99, num_predictions),
                         'image_path': [os.path.basename(raster_path)] * num_predictions
                     })
+                df = read_file(df)
+                return df
             else:
                 # Return the validation data for perfect performance
                 val_csv_path = os.path.join(config.label_studio.csv_dir_validation, 'detection_annotations.csv')
-                validation_df = pd.read_csv(val_csv_path)
+                validation_df = read_file(val_csv_path)
+
                 # Drop the empty image
                 validation_df = validation_df[validation_df['image_path'] != 'empty.jpg']
 
@@ -61,7 +57,6 @@ def random_crop_model():
     m.label_dict = {"Bird": 0,"Mammal":1}
     return m
 
-
 def test_check_success(config, mock_deepforest_model, random_crop_model, comet_logger):
     """Test check success with mock model and perfect performance."""
     pipeline_evaluation = PipelineEvaluation(model=mock_deepforest_model, crop_model=random_crop_model, comet_logger=comet_logger, **config.pipeline_evaluation)