Skip to content

Commit

Permalink
refactor active learning code, multi gpu fails
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Dec 10, 2024
1 parent a3019bf commit e17de33
Show file tree
Hide file tree
Showing 17 changed files with 233 additions and 240 deletions.
27 changes: 12 additions & 15 deletions conf/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,41 +34,45 @@ propagate:
detection_model:
checkpoint: bird
checkpoint_dir: /blue/ewhite/b.weinstein/BOEM/detection/checkpoints
validation_csv_path:
train_csv_folder: /blue/ewhite/b.weinstein/BOEM/annotations/train/
train_image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27/annotated
crop_image_dir: /blue/ewhite/b.weinstein/BOEM/detection/crops/
limit_empty_frac: 0.05
fast_dev_run: false
labels:
- "Bird"
trainer:
train:
fast_dev_run: False
epochs: 3
lr: 0.00001
workers: 0

classification_model:
checkpoint:
checkpoint_dir: /blue/ewhite/b.weinstein/BOEM/classification/checkpoints
validation_csv_path:
train_csv_folder: /blue/ewhite/b.weinstein/BOEM/annotations/train
train_image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27/annotated
crop_image_dir: /blue/ewhite/b.weinstein/BOEM/classification/crops/
under_sample_ratio: 0
trainer:
fast_dev_run: false
max_epochs: 1
fast_dev_run: False
max_epochs: 4
lr: 0.001

pipeline_evaluation:
detect_ground_truth_dir: /blue/ewhite/b.weinstein/BOEM/annotations/validation
classify_confident_ground_truth_dir: /blue/ewhite/b.weinstein/BOEM/annotations/validation
classify_uncertain_ground_truth_dir: /blue/ewhite/b.weinstein/BOEM/annotations/validation
classify_ground_truth_dir: /blue/ewhite/b.weinstein/BOEM/annotations/validation
# This is an average mAP threshold for now, but we may want to add a per-iou threshold in the future
detection_true_positive_threshold: 0.8
detection_false_positive_threshold: 0.5
classification_avg_score: 0.5
image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27/annotated
debug: False

reporting:
report_dir: /blue/ewhite/b.weinstein/BOEM/reporting/reports
metadata: /blue/ewhite/b.weinstein/BOEM/reporting/metadata.csv
thin_factor: 500

active_learning:
image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27
Expand All @@ -84,7 +88,7 @@ active_learning:
# Optional parameters:
evaluation:
dask_client:
pool_limit: 5000
pool_limit: 500
gpus: 1

active_testing:
Expand All @@ -95,10 +99,3 @@ active_testing:
patch_size: 2000
patch_overlap: 0
min_score: 0.2

deepforest:
train:
fast_dev_run: False
epochs: 10
lr: 0.00001
workers: 0
27 changes: 27 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: BOEM
channels:
- conda-forge
dependencies:
- python=3.10
- mamba
- geopandas
- hydra-core
- numpy
- pandas
- scikit-learn
- rasterio
- pytest
- label-studio-sdk
- dask
- bokeh
- pytorch
- torchvision
- paramiko
- omegaconf
- matplotlib
- torchmetrics
- pip:
- dask_jobqueue
- label-studio-converter
- comet-ml

31 changes: 0 additions & 31 deletions requirements.txt

This file was deleted.

3 changes: 2 additions & 1 deletion src/active_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import dask.array as da
import pandas as pd

def choose_train_images(evaluation, image_dir, strategy, n=10, patch_size=512, patch_overlap=0.1, min_score=0.1, model=None, model_path=None, dask_client=None, target_labels=None, pool_limit=1000):
def choose_train_images(evaluation, image_dir, strategy, n=10, patch_size=512, patch_overlap=0.1, min_score=0.1, model=None, model_path=None, classification_model=None, dask_client=None, target_labels=None, pool_limit=1000):
"""Choose images to annotate.
Args:
evaluation (dict): A dictionary of evaluation metrics.
Expand All @@ -20,6 +20,7 @@ def choose_train_images(evaluation, image_dir, strategy, n=10, patch_size=512, p
patch_overlap (float, optional): The amount of overlap between image patches. Defaults to 0.1.
min_score (float, optional): The minimum score for a prediction to be included. Defaults to 0.5.
model (main.deepforest, optional): A trained deepforest model. Defaults to None.
classification_model (main.deepforest, optional): A trained deepforest model for classification. Defaults to None.
model_path (str, optional): The path to the model checkpoint file. Defaults to None. Only used in combination with dask
target_labels: (list, optional): A list of target labels to filter images by. Defaults to None.
pool_limit (int, optional): The maximum number of images to consider. Defaults to 1000.
Expand Down
5 changes: 4 additions & 1 deletion src/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def train(model, train_dir, val_dir, comet_workspace=None, comet_project=None, f

if comet_project:
comet_logger = CometLogger(project_name=comet_project, workspace=comet_workspace)
comet_logger.experiment.add_tags(["classification"])
else:
comet_logger = None

Expand All @@ -66,6 +67,9 @@ def train(model, train_dir, val_dir, comet_workspace=None, comet_project=None, f
model.load_from_disk(train_dir=train_dir, val_dir=val_dir)
model.trainer.fit(model)

model.trainer.logger.experiment.end()
comet_logger.experiment.end()

return model

def preprocess_images(model, annotations, root_dir, save_dir):
Expand All @@ -82,7 +86,6 @@ def preprocess_and_train_classification(config, validation_df=None):
Args:
config: Configuration object containing training parameters
validation_df (pd.DataFrame): A DataFrame containing validation annotations.
comet_logger (CometLogger): A CometLogger object.
Returns:
trained_model: Trained model object
"""
Expand Down
1 change: 0 additions & 1 deletion src/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ def process_image(
crop_csv = "{}.csv".format(os.path.join(save_dir, image_name))

if os.path.exists(crop_csv):
warn("Crops for {} already exist in {}. Skipping.".format(crop_csv, save_dir))
return pd.read_csv(crop_csv)

full_path = os.path.join(root_dir, image_path)
Expand Down
22 changes: 9 additions & 13 deletions src/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,25 +191,24 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_pro
visualize.plot_results(prediction, savedir=tmpdir)
comet_logger.experiment.log_image(os.path.join(tmpdir, image_path))

comet_logger.experiment.end()
model.trainer.logger.experiment.end()

return model

def preprocess_and_train(config, validation_df=None, model_type="detection"):
def preprocess_and_train(config, model_type="detection"):
"""Preprocess data and train model.
Args:
config: Configuration object containing training parameters
validation_df (pd.DataFrame): A DataFrame containing validation annotations.
model_type (str): The type of model to train. Defaults to "detection".
Returns:
trained_model: Trained model object
"""
# Get and split annotations
annotations = gather_data(config.detection_model.train_csv_folder)

if validation_df is None:
train_df, validation_df = create_train_test(annotations)
else:
train_df = annotations[~annotations["image_path"].isin(validation_df["image_path"])]
train_df = gather_data(config.detection_model.train_csv_folder)
validation_df = gather_data(config.label_studio.csv_dir_validation)
validation_df.loc[validation_df.label==0,"label"] = "Bird"

# Preprocess train and validation data
train_df = data_processing.preprocess_images(train_df,
Expand Down Expand Up @@ -247,7 +246,7 @@ def preprocess_and_train(config, validation_df=None, model_type="detection"):
model=loaded_model,
comet_project=config.comet.project,
comet_workspace=config.comet.workspace,
config_args=config.deepforest)
config_args=config.detection_model.trainer)

return trained_model

Expand Down Expand Up @@ -277,9 +276,7 @@ def _predict_list_(image_paths, patch_size, patch_overlap, model_path, m=None, c
if m is None:
raise ValueError("A model or model_path is required for prediction.")

# if no trainer, create one
if m.trainer is None:
m.create_trainer()
m.create_trainer(fast_dev_run=False)

predictions = []
for image_path in image_paths:
Expand Down Expand Up @@ -318,7 +315,6 @@ def update_sys_path():
patch_size=patch_size,
patch_overlap=patch_overlap,
model_path=model_path,
m=m,
crop_model=crop_model)
block_futures.append(block_future)
# Get results
Expand Down
60 changes: 35 additions & 25 deletions src/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@ def save_model(self, model, directory):
checkpoint_path = os.path.join(directory, f"model_{timestamp}.ckpt")
model.trainer.save_checkpoint(checkpoint_path)

return checkpoint_path

def run(self):
# Check for new annotations if the check_annotations flag is set
if self.config.check_annotations:
new_annotations = label_studio.check_for_new_annotations(
new_train_annotations = label_studio.check_for_new_annotations(
sftp_client=self.sftp_client,
url=self.config.label_studio.url,
csv_dir=self.config.label_studio.csv_dir_train,
Expand All @@ -50,7 +52,7 @@ def run(self):
)

# Validation
new_annotations = label_studio.check_for_new_annotations(
new_val_annotations = label_studio.check_for_new_annotations(
sftp_client=self.sftp_client,
url=self.config.label_studio.url,
csv_dir=self.config.label_studio.csv_dir_validation,
Expand All @@ -59,15 +61,20 @@ def run(self):
images_to_annotate_dir=self.config.label_studio.images_to_annotate_dir,
annotated_images_dir=self.config.label_studio.annotated_images_dir,
)
if new_annotations is None:
if new_val_annotations is None:
if self.config.force_upload:
print("No new annotations, but force_upload is set to True, continuing")
self.skip_training = True
else:
print("No new annotations, exiting")
return None
else:
print(f"New annotations found: {len(new_annotations)}")
try:
print(f"New train annotations found: {len(new_train_annotations)}")
except:
pass
print(f"New val annotations found: {len(new_val_annotations)}")

self.skip_training = False

# Given new annotations, propogate labels to nearby images
Expand All @@ -76,37 +83,28 @@ def run(self):
# label_propagator.through_time(new_annotations)
else:
self.skip_training = False

if self.config.detection_model.validation_csv_path is not None:
validation_df = pd.read_csv(self.config.detection_model.validation_csv_path)
else:
validation_df = None

reporter = Reporting(report_dir=self.config.reporting.report_dir, image_dir=self.config.active_learning.image_dir)

if not self.skip_training:
trained_detection_model = detection.preprocess_and_train(
self.config, validation_df=validation_df)
self.config)

trained_classification_model = classification.preprocess_and_train_classification(
self.config, validation_df=validation_df)
self.config)

self.save_model(trained_detection_model,
detection_checkpoint_path = self.save_model(trained_detection_model,
self.config.detection_model.checkpoint_dir)
self.save_model(trained_classification_model,
classification_checkpoint_path = self.save_model(trained_classification_model,
self.config.classification_model.checkpoint_dir)

pipeline_monitor = PipelineEvaluation(
model=trained_detection_model,
crop_model=trained_classification_model,
**self.config.pipeline_evaluation)
model=trained_detection_model,
crop_model=trained_classification_model,
**self.config.pipeline_evaluation)

performance = pipeline_monitor.evaluate()
reporter.pipeline_monitor = pipeline_monitor

if pipeline_monitor.check_success():
print("Pipeline performance is satisfactory, exiting")
reporter.generate_report()
return None
else:
trained_detection_model = detection.load(
Expand All @@ -119,6 +117,8 @@ def run(self):
trained_classification_model = None

performance = None
pipeline_monitor = None
detection_checkpoint_path = None

if self.config.active_learning.gpus > 1:
dask_client = start(gpus=self.config.active_learning.gpus, mem_size="70GB")
Expand All @@ -145,6 +145,7 @@ def run(self):
train_images_to_annotate = choose_train_images(
evaluation=performance,
image_dir=self.config.active_learning.image_dir,
model_path=detection_checkpoint_path,
model=trained_detection_model,
strategy=self.config.active_learning.strategy,
n=self.config.active_learning.n_images,
Expand All @@ -167,9 +168,6 @@ def run(self):
min_score=self.config.active_learning.min_score
)

reporter.confident_predictions = confident_predictions
reporter.uncertain_predictions = uncertain_predictions

print(f"Images requiring human review: {len(confident_predictions)}")
print(f"Images auto-annotated: {len(uncertain_predictions)}")

Expand All @@ -191,6 +189,18 @@ def run(self):
preannotations=preannotations)


if reporter.pipeline_monitor is not None:
reporter.generate_report()
if pipeline_monitor:
reporter = Reporting(
report_dir=self.config.reporting.report_dir,
image_dir=self.config.active_learning.image_dir,
model=trained_detection_model,
classification_model=trained_classification_model,
thin_factor=self.config.reporting.thin_factor,
patch_overlap=self.config.active_learning.patch_overlap,
patch_size=self.config.active_learning.patch_size,
confident_predictions=confident_predictions,
uncertain_predictions=uncertain_predictions,
pipeline_monitor=pipeline_monitor)

reporter.generate_report()

Loading

0 comments on commit e17de33

Please sign in to comment.