human review had duplicate images

weecology · Mar 4, 2025 · c4485a3 · c4485a3
1 parent 3660d7b
commit c4485a3
Show file tree

Hide file tree

Showing 7 changed files with 37 additions and 204 deletions.
diff --git a/src/active_learning.py b/src/active_learning.py
@@ -5,107 +5,6 @@
 import dask.array as da
 import pandas as pd
 
-def choose_train_images(evaluation, image_dir, strategy, n=10, patch_size=512, patch_overlap=0.1, min_score=0.1, model=None, model_path=None, classification_model=None, dask_client=None, target_labels=None, pool_limit=1000, batch_size=16, selected_test_images=[], comet_logger=None):
-    """
-    Choose images to annotate.
-    Args:
-        evaluation (dict): A dictionary of evaluation metrics.
-        image_dir (str): The path to a directory of images.
-        strategy (str): The strategy for choosing images. Available strategies are:
-            - "random": Choose images randomly from the pool.
-            - "most-detections": Choose images with the most detections based on predictions.
-            - "target-labels": Choose images with target labels.
-        n (int, optional): The number of images to choose. Defaults to 10.
-        dask_client (dask.distributed.Client, optional): A Dask client for parallel processing. Defaults to None.
-        patch_size (int, optional): The size of the image patches to predict on. Defaults to 512.
-        patch_overlap (float, optional): The amount of overlap between image patches. Defaults to 0.1.
-        min_score (float, optional): The minimum score for a prediction to be included. Defaults to 0.1.
-        model (main.deepforest, optional): A trained deepforest model. Defaults to None. 
-        classification_model (main.deepforest, optional): A trained deepforest model for classification. Defaults to None.
-        model_path (str, optional): The path to the model checkpoint file. Defaults to None. Only used in combination with dask
-        target_labels: (list, optional): A list of target labels to filter images by. Defaults to None.
-        pool_limit (int, optional): The maximum number of images to consider. Defaults to 1000.
-        batch_size (int, optional): The batch size for prediction. Defaults to 16.
-        selected_test_images (list, optional): A list of test images that have already been selected. Defaults to [].
-        comet_logger (CometLogger, optional): A CometLogger object. Defaults to None.
-    Returns:
-        list: A list of image paths.
-        pd.DataFrame: A DataFrame of preannotations.
-    """
-    pool = glob.glob(os.path.join(image_dir,"*.jpg")) # Get all images in the data directory
-
-    # Remove .csv files from the pool
-    pool = [image for image in pool if not image.endswith('.csv')]
-
-    # Remove crop dir
-    try:
-        pool.remove(os.path.join(image_dir,"crops"))
-    except ValueError:
-        pass
-
-    #subsample
-    if len(pool) > pool_limit:
-        pool = random.sample(pool, pool_limit)
-
-    # Don't allow any test images that have just been selected to be chosen
-    pool = [x for x in pool if not x in selected_test_images]
-
-    if strategy=="random":
-        chosen_images = random.sample(pool, n)
-        return chosen_images    
-    elif strategy in ["most-detections","target-labels"]:
-        # Predict all images
-        if model_path is None and model is None:
-            raise ValueError("A model is required for the 'most-detections' or 'target-labels' strategy. Either pass a model or a model_path.")
-        if dask_client:
-            # load model on each client
-            def update_sys_path():
-                import sys
-                sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-            dask_client.run(update_sys_path)
-
-            # Load model on each client
-            dask_pool = da.from_array(pool, chunks=len(pool)//len(dask_client.ncores()))
-            blocks = dask_pool.to_delayed().ravel()
-            block_futures = []
-            for block in blocks:
-                block_future = dask_client.submit(detection.predict,image_paths=block.compute(), patch_size=patch_size, patch_overlap=patch_overlap, model_path=model_path)
-                block_futures.append(block_future)
-            # Get results
-            dask_results = []
-            for block_result in block_futures:
-                block_result = block_result.result()
-                dask_results.append(pd.concat(block_result))
-            preannotations = pd.concat(dask_results)
-        else:
-            preannotations = detection.predict(m=model, image_paths=pool, patch_size=patch_size, patch_overlap=patch_overlap, batch_size=batch_size)
-            preannotations = pd.concat(preannotations)
-
-        if comet_logger:
-            comet_logger.log_table("active_training_pool", preannotations)
-
-        # Print the number of preannotations before removing min score
-        preannotations = preannotations[preannotations["score"] >= min_score]
-
-        if strategy == "most-detections":
-            # Sort images by total number of predictions
-            chosen_images = preannotations.groupby("image_path").size().sort_values(ascending=False).head(n).index.tolist()
-        elif strategy == "target-labels":
-            if target_labels is None:
-                raise ValueError("Target labels are required for the 'target-labels' strategy.")
-            # Filter images by target labels
-            chosen_images = preannotations[preannotations.label.isin(target_labels)].groupby("image_path")["score"].mean().sort_values(ascending=False).head(n).index.tolist()
-        else:
-            raise ValueError("Invalid strategy. Must be one of 'random', 'most-detections', or 'target-labels'.")
-        # Get full path
-        chosen_images = [os.path.join(image_dir, image) for image in chosen_images]
-    else:
-        raise ValueError("Invalid strategy. Must be one of 'random', 'most-detections', or 'target-labels'.")
-
-    # Get preannotations for chosen images
-    chosen_preannotations = preannotations[preannotations["image_path"].isin(chosen_images)]
-
-    return chosen_images, chosen_preannotations
 
 def choose_test_images(image_dir, strategy, n=10, patch_size=512, patch_overlap=0, min_score=0.5, model=None, model_path=None, dask_client=None, target_labels=None, pool_limit=1000, batch_size=1, comet_logger=None):
     """Choose images to annotate.
@@ -223,7 +122,7 @@ def human_review(predictions, min_score=0.1, confident_threshold=0.5):
 
     return confident_predictions, uncertain_predictions
 
-def generate_training_pool_predictions(image_dir, patch_size=512, patch_overlap=0.1, min_score=0.1, model=None, model_path=None, dask_client=None, batch_size=16, comet_logger=None, pool_limit=1000):
+def generate_pool_predictions(image_dir, patch_size=512, patch_overlap=0.1, min_score=0.1, model=None, model_path=None, dask_client=None, batch_size=16, comet_logger=None, pool_limit=1000):
     """
     Generate predictions for the training pool.
     
@@ -289,7 +188,7 @@ def update_sys_path():
 
     return preannotations
 
-def select_train_images(preannotations, strategy, n=10, target_labels=None):
+def select_images(preannotations, strategy, n=10, target_labels=None):
     """
     Select images to annotate based on the strategy.
     

diff --git a/src/pipeline.py b/src/pipeline.py
@@ -4,7 +4,7 @@
 
 from omegaconf import DictConfig
 
-from src.active_learning import generate_training_pool_predictions, select_train_images, choose_test_images, human_review
+from src.active_learning import generate_pool_predictions, select_images, choose_test_images, human_review
 from src import label_studio
 from src import detection
 from src import classification
@@ -148,7 +148,7 @@ def run(self):
                                     preannotations=None)
 
         # Generate predictions for the training pool
-        training_pool_predictions = generate_training_pool_predictions(
+        training_pool_predictions = generate_pool_predictions(
             image_dir=self.config.active_learning.image_dir,
             pool_limit=self.config.active_learning.pool_limit,
             patch_size=self.config.active_learning.patch_size,
@@ -163,7 +163,7 @@ def run(self):
         self.comet_logger.experiment.log_table(tabular_data=training_pool_predictions, filename="training_pool_predictions.csv")
 
         # Select images to annotate based on the strategy
-        train_images_to_annotate, preannotations = select_train_images(
+        train_images_to_annotate, preannotations = select_images(
             preannotations=training_pool_predictions,
             strategy=self.config.active_learning.strategy,
             n=self.config.active_learning.n_images,
@@ -191,7 +191,7 @@ def run(self):
         self.comet_logger.experiment.log_table(tabular_data=uncertain_predictions, filename="uncertain_predictions.csv") 
 
         # Human review - to be replaced by AWS for NJ Audubon
-        chosen_uncertain_images = uncertain_predictions.sort_values(by="score", ascending=False).head(self.config.human_review.n)["image_path"].tolist()
+        chosen_uncertain_images = uncertain_predictions.sort_values(by="score", ascending=False).head(self.config.human_review.n)["image_path"].unique()
         chosen_preannotations = uncertain_predictions[uncertain_predictions.image_path.isin(chosen_uncertain_images)]
         chosen_preannotations = [group for _, group in chosen_preannotations.groupby("image_path")]
         full_image_paths = [os.path.join(self.config.active_learning.image_dir, image) for image in chosen_uncertain_images]

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -19,6 +19,7 @@ def config(tmpdir_factory):
     cfg.detection_model.train_image_dir = tmpdir_factory.mktemp("images").strpath
     cfg.detection_model.crop_image_dir = tmpdir_factory.mktemp("crops").strpath
     cfg.pipeline_evaluation.image_dir = cfg.detection_model.train_image_dir
+    cfg.detection_model.trainer.train.fast_dev_run = True
 
     # Classification model
     cfg.classification_model.train_csv_folder = tmpdir_factory.mktemp("csvs").strpath
@@ -104,8 +105,9 @@ def config(tmpdir_factory):
     # Create classification annotations
     cfg.pipeline_evaluation.classify_ground_truth_dir = tmpdir_factory.mktemp("classification_annotations").strpath
     csv_path = os.path.join(cfg.pipeline_evaluation.classify_ground_truth_dir, 'classification_annotations.csv')
-    val_df.to_csv(csv_path, index=False)
+    classification_val_df.to_csv(csv_path, index=False)
 
+    cfg.pipeline_evaluation.detection_true_positive_threshold = 0.4
     # Active learning
     cfg.active_learning.image_dir = cfg.detection_model.train_image_dir
     cfg.active_testing.image_dir = cfg.detection_model.train_image_dir

diff --git a/tests/test_active_learning.py b/tests/test_active_learning.py
@@ -1,5 +1,5 @@
 import pytest
-from src.active_learning import choose_train_images, choose_test_images
+from src.active_learning import choose_test_images, generate_pool_predictions, select_images
 
 @pytest.fixture
 def performance():
@@ -15,18 +15,33 @@ def classification_model():
     from deepforest.model import CropModel
     return CropModel()
 
-def test_choose_train_images(performance, detection_model, config):
-    train_images_to_annotate = choose_train_images(
+def test_generate_train_image_pool(performance, detection_model, config):
+    train_image_pool = generate_pool_predictions(
         evaluation=performance,
         image_dir=config.active_learning.image_dir,
         model=detection_model,
-        strategy='random',
-        n=config.active_learning.n_images,
         patch_size=config.active_learning.patch_size,
         patch_overlap=config.active_learning.patch_overlap,
         min_score=config.active_learning.min_score,
         target_labels=None
-        )
+    )
+    assert len(train_image_pool) > 0
+
+def test_select_train_images(performance, config):
+    train_image_pool = generate_pool_predictions(
+        evaluation=performance,
+        image_dir=config.active_learning.image_dir,
+        model=None,  # Assuming model is not needed for selection
+        patch_size=config.active_learning.patch_size,
+        patch_overlap=config.active_learning.patch_overlap,
+        min_score=config.active_learning.min_score,
+        target_labels=None
+    )
+    train_images_to_annotate = select_images(
+        image_pool=train_image_pool,
+        strategy='random',
+        n=config.active_learning.n_images
+    )
     assert len(train_images_to_annotate) > 0
 
 def test_choose_test_images(detection_model, config):

diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
@@ -65,25 +65,9 @@ def cleanup_label_studio(label_studio_client, request):
     yield
 
 @pytest.mark.integration
-def test_pipeline_run(config, comet_logger):
+def test_pipeline_run(config, label_studio_client):
     pipeline = Pipeline(config)
-    pipeline.run(comet_logger=comet_logger)
-
-    # Add assertions to verify the pipeline run
-    assert pipeline is not None
-
-@pytest.mark.integration
-def test_first_phase(config, label_studio_client):
-    """Test init phase with no data"""
-    # Set validation csv paths to None
-    pipeline = Pipeline(cfg=config)
     pipeline.run()
 
-
-#@pytest.mark.skipif(not torch.cuda.is_available(), reason="Test requires GPU")
-#def test_multiple_gpu(config, label_studio_client):
-#    """Test init phase with no data"""
-#    # Set validation csv paths to None
-#    config["active_learning"]["gpus"] = 2
-#    pipeline = Pipeline(cfg=config)
-#    pipeline.run()
+    # Add assertions to verify the pipeline run
+    assert pipeline is not None
diff --git a/tests/test_pipeline_evaluation.py b/tests/test_pipeline_evaluation.py
@@ -55,11 +55,6 @@ def predict_tile(self, raster_path, patch_size=450, patch_overlap=0, return_plot
 
     return MockDeepForest(label_dict={"Object": 0})
 
-@pytest.fixture
-def random_model():
-    m = main.deepforest(label_dict={"Object": 0}, num_classes=1)
-    return m
-
 @pytest.fixture
 def random_crop_model():
     m = CropModel()
@@ -79,8 +74,8 @@ def test_evaluate_detection(config, mock_deepforest_model, random_crop_model, co
     pipeline_evaluation = PipelineEvaluation(model=mock_deepforest_model, crop_model=random_crop_model, comet_logger=comet_logger, **config.pipeline_evaluation)
     detection_results = pipeline_evaluation.evaluate_detection()
 
-    # Detection results are mocked, so the mAP should be 1
-    assert detection_results["mAP"]["map"] == 1
+    # Detection results are mocked, one image is correct, the other is not.
+    assert detection_results["recall"] == 0.5
 
 def test_confident_classification_accuracy(config, mock_deepforest_model, random_crop_model, comet_logger):
     """Test confident classification accuracy with mock model and perfect performance."""
@@ -96,14 +91,4 @@ def test_uncertain_classification_accuracy(config, mock_deepforest_model, random
     pipeline_evaluation = PipelineEvaluation(model=mock_deepforest_model, crop_model=random_crop_model, comet_logger=comet_logger, **config.pipeline_evaluation)
     uncertain_classification_accuracy = pipeline_evaluation.evaluate_uncertain_classification()
 
-    assert uncertain_classification_accuracy["multiclassaccuracy"] == 0
-
-def test_evaluate(config, random_model, random_crop_model, comet_logger):
-    """Test evaluate with mock model."""
-    pipeline_evaluation = PipelineEvaluation(model=random_model, crop_model=random_crop_model, comet_logger=comet_logger, **config.pipeline_evaluation)
-    pipeline_evaluation.evaluate()
-
-    # All the metrics should be undefined
-    assert pipeline_evaluation.results["detection"]["mAP"]["map"] == -1
-    assert pipeline_evaluation.results["confident_classification"]["multiclassaccuracy"] == 0
-    assert pipeline_evaluation.results["uncertain_classification"]["multiclassaccuracy"] == 0
+    assert uncertain_classification_accuracy["multiclassaccuracy"] == 0
diff --git a/tests/test_visualization.py b/tests/test_visualization.py
@@ -39,12 +39,6 @@ def test_predictions():
     })
 
 
-def test_visualizer_initialization(mock_model, tmp_path, test_predictions):
-    """Test PredictionVisualizer initialization."""
-    visualizer = PredictionVisualizer(test_predictions, tmp_path)
-    assert visualizer.output_dir == tmp_path
-    assert visualizer.fps == 30
-
 def test_draw_predictions(mock_model, tmp_path, test_image, test_predictions):
     """Test drawing predictions on image."""
     visualizer = PredictionVisualizer(test_predictions, tmp_path)
@@ -53,50 +47,4 @@ def test_draw_predictions(mock_model, tmp_path, test_image, test_predictions):
     assert isinstance(result, np.ndarray)
     assert result.shape == test_image.shape
     # Image should be different from original due to drawn boxes
-    assert not np.array_equal(result, test_image)
-
-def test_create_visualization(mock_model, tmp_path, test_predictions):
-    """Test video creation from image sequence."""
-    # Create test images
-    image_dir = tmp_path / "images"
-    image_dir.mkdir()
-
-    for i in range(5):
-        img = np.ones((600, 800, 3), dtype=np.uint8) * 255
-        cv2.imwrite(str(image_dir / f"image_{i:03d}.jpg"), img)
-
-    visualizer = PredictionVisualizer(test_predictions, tmp_path)
-    output_path = visualizer.create_visualization(list(image_dir.glob("*.jpg")))
-
-    assert Path(output_path).exists()
-    assert output_path.endswith('.mp4')
-
-def test_create_summary_image(mock_model, tmp_path, test_predictions):
-    """Test creation of summary statistics image."""
-    visualizer = PredictionVisualizer(test_predictions, tmp_path)
-
-    predictions_list = [
-        pd.DataFrame({
-            'label': ['Bird', 'Bird'],
-            'score': [0.9, 0.8]
-        }),
-        pd.DataFrame({
-            'label': ['Bird'],
-            'score': [0.95]
-        })
-    ]
-
-    summary = visualizer.create_summary_image(predictions_list)
-    assert isinstance(summary, np.ndarray)
-    assert summary.shape == (600, 800, 3)
-
-@pytest.mark.parametrize("confidence_threshold", [0.3, 0.7, 0.9])
-def test_confidence_thresholds(mock_model, tmp_path, test_image, test_predictions, confidence_threshold):
-    """Test different confidence thresholds."""
-    visualizer = PredictionVisualizer(test_predictions, tmp_path)
-    result = visualizer.draw_predictions(
-        test_image,
-        test_predictions,
-        confidence_threshold=confidence_threshold   
-    )
-    assert isinstance(result, np.ndarray) 
+    assert not np.array_equal(result, test_image)