Skip to content

Commit

Permalink
edge case for pandas geodataframe concat
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Mar 4, 2025
1 parent c4485a3 commit 31277ef
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 30 deletions.
2 changes: 0 additions & 2 deletions src/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,6 @@ def train(model, train_dir, val_dir, comet_logger=None, fast_dev_run=False, max_
comet_logger.experiment.log_image(image_path, name=f"{label_name}_{image_name}")
label_count[label_name] += 1

#with comet_logger.experiment.context_manager("classification"):

model.trainer.fit(model)

# Compute confusion matrix and upload to cometml
Expand Down
4 changes: 4 additions & 0 deletions src/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ def __init__(self, cfg: DictConfig):

self.comet_logger = CometLogger(project_name=self.config.comet.project, workspace=self.config.comet.workspace)
self.comet_logger.experiment.add_tag("pipeline")
flight_name = os.path.basename(self.config.label_studio.images_to_annotate_dir)
self.comet_logger.experiment.add_tag(flight_name)
self.comet_logger.experiment.log_parameters(self.config)
self.comet_logger.experiment.log_parameter("flight_name", flight_name)

def save_model(self, model, directory):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
Expand Down
21 changes: 7 additions & 14 deletions src/pipeline_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import torch
from torchvision.ops.boxes import box_iou
from torchvision.models.detection._utils import Matcher
import geopandas as gpd

import os

Expand Down Expand Up @@ -63,11 +64,7 @@ def __init__(self, model, crop_model, image_dir, detect_ground_truth_dir, classi
(self.classification_annotations.xmax == 0) &
(self.classification_annotations.ymax == 0)
)
]

# Prediction container
self.predictions = []

]
self.confident_predictions, self.uncertain_predictions = self.predict_classification()
self.num_classes = len(self.classification_annotations["label"].unique())

Expand Down Expand Up @@ -214,7 +211,6 @@ def _evaluate_classification(self, predictions, accuracy_metric):
if len(pred["labels"]) == 0:
continue
matches = self.match_predictions_and_targets(pred, target)
self.predictions.append(matches)
if matches.empty:
continue
accuracy_metric.update(preds=torch.tensor(matches["pred"].values), target=torch.tensor(matches["target"].values))
Expand Down Expand Up @@ -243,17 +239,14 @@ def evaluate_detection(self):
patch_overlap=self.patch_overlap,
)

combined_predictions = pd.concat(predictions)
combined_predictions["workflow"] = "detection"
self.predictions.append(combined_predictions)

# Remove empty predictions, needs to be confirmed for edge cases
combined_predictions = gpd.GeoDataFrame(pd.concat(predictions))

# When you concat geodataframes, you get pandas dataframes
combined_predictions = combined_predictions[~combined_predictions["score"].isna()]

# Check if geometry is string or polygon
combined_predictions = read_file(combined_predictions, self.image_dir)
ground_truth = self.detection_annotations
if "geometry" not in ground_truth.columns:
ground_truth = read_file(ground_truth, self.image_dir)
ground_truth = read_file(self.detection_annotations, self.image_dir)

iou_results = evaluate_boxes(
combined_predictions,
Expand Down
2 changes: 1 addition & 1 deletion submit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
source activate BOEM

cd ~/BOEM/
srun python main.py check_annotations=True active_learning.pool_limit=10000 active_testing.n_images=100 active_learning.n_images=200
srun python main.py check_annotations=True active_learning.pool_limit=10 active_testing.n_images=1 active_learning.n_images=1 ++classification_model.trainer.fast_dev_run=True ++detection_model.trainer.train.fast_dev_run=True debug=True
1 change: 1 addition & 0 deletions tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest
import os
import torch
from tests.conftest import config

# Local imports
from src import label_studio
Expand Down
21 changes: 8 additions & 13 deletions tests/test_pipeline_evaluation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from src.pipeline_evaluation import PipelineEvaluation
from deepforest import main
from deepforest.model import CropModel
from deepforest.utilities import read_file
import pytest
import pandas as pd
import numpy as np
Expand All @@ -17,21 +18,13 @@ def __init__(self, label_dict, random=False):

def predict_tile(self, raster_path, patch_size=450, patch_overlap=0, return_plot=False, crop_model=None):
# Return realistic predictions based on image name
if "empty" in raster_path.lower():
return pd.DataFrame({
'xmin': [None],
'ymin': [None],
'xmax': [None],
'ymax': [None],
'label': [None],
'score': [None],
"image_path": [os.path.basename(raster_path)]
})
if "empty" in raster_path:
return None

# If random, Generate 1-3 random predictions for non-empty images
if self.random:
num_predictions = np.random.randint(1, 4)
return pd.DataFrame({
df = pd.DataFrame({
'xmin': np.random.randint(0, 800, num_predictions),
'ymin': np.random.randint(0, 600, num_predictions),
'xmax': np.random.randint(800, 1000, num_predictions),
Expand All @@ -41,10 +34,13 @@ def predict_tile(self, raster_path, patch_size=450, patch_overlap=0, return_plot
'score': np.random.uniform(0.1, 0.99, num_predictions),
'image_path': [os.path.basename(raster_path)] * num_predictions
})
df = read_file(df)
return df
else:
# Return the validation data for perfect performance
val_csv_path = os.path.join(config.label_studio.csv_dir_validation, 'detection_annotations.csv')
validation_df = pd.read_csv(val_csv_path)
validation_df = read_file(val_csv_path)

# Drop the empty image
validation_df = validation_df[validation_df['image_path'] != 'empty.jpg']

Expand All @@ -61,7 +57,6 @@ def random_crop_model():
m.label_dict = {"Bird": 0,"Mammal":1}
return m


def test_check_success(config, mock_deepforest_model, random_crop_model, comet_logger):
"""Test check success with mock model and perfect performance."""
pipeline_evaluation = PipelineEvaluation(model=mock_deepforest_model, crop_model=random_crop_model, comet_logger=comet_logger, **config.pipeline_evaluation)
Expand Down

0 comments on commit 31277ef

Please sign in to comment.