reporting merged with metadata

weecology · Feb 6, 2025 · d1fa2c6 · d1fa2c6
1 parent 1156dd2
commit d1fa2c6
Show file tree

Hide file tree

Showing 4 changed files with 39 additions and 44 deletions.
diff --git a/conf/config.yaml b/conf/config.yaml
@@ -76,9 +76,10 @@ pipeline_evaluation:
 
 reporting:
   report_dir: /blue/ewhite/b.weinstein/BOEM/reporting/reports
-  metadata: /blue/ewhite/b.weinstein/BOEM/reporting/metadata.csv
+  metadata: /blue/ewhite/b.weinstein/BOEM/reporting/metadata/metadata.csv
   thin_factor: 500
 
+
 active_learning:
   image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27
   strategy: 'target-labels'

diff --git a/src/pipeline.py b/src/pipeline.py
@@ -197,6 +197,7 @@ def run(self):
                     patch_size=self.config.active_learning.patch_size,
                     confident_predictions=confident_predictions,
                     uncertain_predictions=uncertain_predictions,
+                    metadata_csv=self.config.reporting.metadata,
                     pipeline_monitor=pipeline_monitor)
 
                 reporter.generate_report(create_video=False)

diff --git a/src/pipeline_evaluation.py b/src/pipeline_evaluation.py
@@ -203,57 +203,36 @@ def match_predictions_and_targets(self, pred, target):
         matches = pd.DataFrame({"pred": matched_pred, "target": matched_target})
 
         # Remove the None values for predicted, can't get class scores if the box doesn't match
-        matches = matches.dropna(subset=["pred"])
+        matches = matches.dropna(subset=["target"])
 
         return matches
 
     def evaluate_confident_classification(self):
         """Evaluate confident classification performance"""
-        targets = []
-        preds = []
-        for image_path in self.confident_predictions.drop_duplicates("image_path").image_path.tolist():
-            # Min score for predictions
-            image_targets = self.classification_annotations.loc[self.classification_annotations.image_path == os.path.basename(image_path)]
-            image_predictions = self.confident_predictions.loc[self.confident_predictions.image_path == os.path.basename(image_path)]
-            image_predictions = image_predictions[image_predictions.score > self.min_score]
-            target = self._format_targets(image_targets)
-            pred = self._format_targets(image_predictions)
-            if len(pred["labels"]) == 0:
-                continue
-
-            matches = self.match_predictions_and_targets(pred, target)
-            if len(matches) == 0:
-                continue
-            else:
-                self.confident_classification_accuracy.update(preds=torch.tensor(matches["pred"].values), target=torch.tensor(matches["target"].values))
-
-        results = {"confident_classification_accuracy": self.confident_classification_accuracy.compute()}
-
-        return results
+        return self._evaluate_classification(self.confident_predictions, self.confident_classification_accuracy)
 
     def evaluate_uncertain_classification(self):
         """Evaluate uncertain classification performance"""
+        return self._evaluate_classification(self.uncertain_predictions, self.uncertain_classification_accuracy)
 
-        targets = []
-        preds = []
-        for image_path in self.uncertain_predictions.drop_duplicates("image_path").image_path.tolist():
+    def _evaluate_classification(self, predictions, accuracy_metric):
+        """Helper function to evaluate classification performance"""
+        for image_path in predictions.drop_duplicates("image_path").image_path.tolist():
             image_targets = self.classification_annotations.loc[self.classification_annotations.image_path == os.path.basename(image_path)]
-            image_predictions = self.uncertain_predictions.loc[self.uncertain_predictions.image_path == os.path.basename(image_path)]
+            image_predictions = predictions.loc[predictions.image_path == os.path.basename(image_path)]
             image_predictions = image_predictions[image_predictions.score > self.min_score]
             if image_predictions.empty:
-                    continue
-            targets = self._format_targets(image_targets)
-            preds = self._format_targets(image_predictions)
-
-            matches = self.match_predictions_and_targets(preds, targets)
-
-            if len(matches) == 0:
                 continue
-            else:
-                self.uncertain_classification_accuracy.update(preds=torch.tensor(matches["pred"].values), target=torch.tensor(matches["target"].values))
+            target = self._format_targets(image_targets)
+            pred = self._format_targets(image_predictions)
+            if len(pred["labels"]) == 0:
+                continue
+            matches = self.match_predictions_and_targets(pred, target)
+            if matches.empty:
+                continue
+            accuracy_metric.update(preds=torch.tensor(matches["pred"].values), target=torch.tensor(matches["target"].values))
 
-        results = {"uncertain_classification_accuracy": self.uncertain_classification_accuracy.compute()}
-
+        results = {f"{accuracy_metric.__class__.__name__.lower()}": accuracy_metric.compute()}
         return results
 
     def evaluate(self):

diff --git a/src/reporting.py b/src/reporting.py
@@ -6,7 +6,7 @@
 import glob
 
 class Reporting:
-    def __init__(self, report_dir, image_dir, pipeline_monitor=None, model=None, classification_model=None, confident_predictions=None, uncertain_predictions=None, thin_factor=10,patch_overlap=0.2, patch_size=300, min_score=0.3):
+    def __init__(self, report_dir, image_dir, metadata_csv, pipeline_monitor=None, model=None, classification_model=None, confident_predictions=None, uncertain_predictions=None, thin_factor=10,patch_overlap=0.2, patch_size=300, min_score=0.3):
         """Initialize reporting class
         
         Args:
@@ -19,14 +19,17 @@ def __init__(self, report_dir, image_dir, pipeline_monitor=None, model=None, cla
             patch_size: Patch size for detection model
             min_score: Minimum score for detection model
             thin_factor: Factor to thin images by for video creation
+            metadata_csv: Path to metadata csv for image location
             confident_predictions: Dataframe containing confident predictions
             uncertain_predictions: Dataframe containing uncertain predictions
         """
 
-        self.report_dir = report_dir
-        self.report_file = f"{report_dir}/report.csv"
+        # Create timestamped report directory
+        timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+        self.report_dir = os.path.join(report_dir, timestamp)
+        self.report_file = f"{self.report_dir}/report.csv"
         self.image_dir = image_dir
-        self.sample_prediction_dir = f"{report_dir}/samples"
+        self.sample_prediction_dir = f"{self.report_dir}/samples"
         self.model = model
         self.classification_model = classification_model
         self.patch_overlap = patch_overlap
@@ -35,6 +38,10 @@ def __init__(self, report_dir, image_dir, pipeline_monitor=None, model=None, cla
         self.thin_factor = thin_factor
         self.uncertain_predictions = uncertain_predictions
         self.confident_predictions = confident_predictions
+        self.metadata = metadata_csv
+
+        self.detection_experiment = model.trainer.logger.experiment
+        self.classification_experiment = classification_model.trainer.logger.experiment
 
         # Check the dirs exist
         os.makedirs(self.report_dir, exist_ok=True)
@@ -64,6 +71,13 @@ def write_predictions(self):
         """Write predictions to a csv file"""
         self.concat_predictions()
         self.all_predictions.to_csv(f"{self.report_dir}/predictions.csv", index=False)
+        self.all_predictions['timestamp'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        self.all_predictions["unique_image"] = self.all_predictions["image_path"].apply(lambda x: os.path.splitext(os.path.basename(x))[0])
+
+        # Connect with metadata on location
+        metadata_df = pd.read_csv(self.metadata)
+        merged_predictions = self.all_predictions.merge(metadata_df[["unique_image", "flight_name","date","lat","long"]], on='unique_image')
+        merged_predictions.to_csv(f"{self.report_dir}/predictions.csv", index=False)
 
         return f"{self.report_dir}/predictions.csv"
 
@@ -118,8 +132,8 @@ def write_metrics(self):
 
         # Extract key metrics
         detection_map = performance['detection']['mAP']['map']
-        confident_acc = performance['confident_classification']["confident_classification_accuracy"]
-        uncertain_acc = performance['uncertain_classification']["uncertain_classification_accuracy"]
+        confident_acc = performance['confident_classification']["multiclassaccuracy"]
+        uncertain_acc = performance['uncertain_classification']["multiclassaccuracy"]
 
         # Get annotation counts and completion rate
         human_reviewed_images = len(self.all_predictions['image_path'].unique())