Skip to content

Commit

Permalink
add label studio compliant taxonomy
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Mar 5, 2025
1 parent 9f20a60 commit 934a58c
Show file tree
Hide file tree
Showing 8 changed files with 3,466 additions and 38 deletions.
2 changes: 1 addition & 1 deletion conf/classification_model/finetune.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ crop_image_dir: /blue/ewhite/b.weinstein/BOEM/classification/crops/
under_sample_ratio: 0
trainer:
fast_dev_run: False
max_epochs: 1
max_epochs: 10
lr: 0.00001
batch_size: 16
workers: 10
8 changes: 4 additions & 4 deletions conf/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ predict:
patch_size: 1000
patch_overlap: 0
min_score: 0.4
batch_size: 32
batch_size: 48

pipeline:
confidence_threshold: 0.9
Expand All @@ -54,10 +54,10 @@ detection_model:
labels:
- "Object"
trainer:
batch_size: 12
batch_size: 16
train:
fast_dev_run: False
epochs: 20
epochs: 10
lr: 0.00001
workers: 10
validation:
Expand Down Expand Up @@ -86,7 +86,7 @@ active_learning:
evaluation:
dask_client:
pool_limit: 500
gpus: 1
gpus: 2

active_testing:
image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27
Expand Down
7 changes: 4 additions & 3 deletions src/active_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def human_review(predictions, min_score=0.1, confident_threshold=0.5):

return confident_predictions, uncertain_predictions

def generate_pool_predictions(image_dir, patch_size=512, patch_overlap=0.1, min_score=0.1, model=None, model_path=None, dask_client=None, batch_size=16, comet_logger=None, pool_limit=1000):
def generate_pool_predictions(image_dir, patch_size=512, patch_overlap=0.1, min_score=0.1, model=None, model_path=None, dask_client=None, batch_size=16, comet_logger=None, pool_limit=1000, crop_model=None):
"""
Generate predictions for the training pool.
Expand All @@ -136,6 +136,7 @@ def generate_pool_predictions(image_dir, patch_size=512, patch_overlap=0.1, min_
dask_client (dask.distributed.Client, optional): A Dask client for parallel processing. Defaults to None.
batch_size (int, optional): The batch size for prediction. Defaults to 16.
comet_logger (CometLogger, optional): A CometLogger object. Defaults to None.
crop_model (bool, optional): A deepforest.model.CropModel object. Defaults to None.
pool_limit (int, optional): The maximum number of images to consider. Defaults to 1000.
Returns:
Expand Down Expand Up @@ -168,7 +169,7 @@ def update_sys_path():
blocks = dask_pool.to_delayed().ravel()
block_futures = []
for block in blocks:
block_future = dask_client.submit(detection.predict, image_paths=block.compute(), patch_size=patch_size, patch_overlap=patch_overlap, model_path=model_path)
block_future = dask_client.submit(detection.predict, image_paths=block.compute(), patch_size=patch_size, patch_overlap=patch_overlap, model_path=model_path, crop_model=crop_model)
block_futures.append(block_future)
# Get results
dask_results = []
Expand All @@ -177,7 +178,7 @@ def update_sys_path():
dask_results.append(pd.concat(block_result))
preannotations = pd.concat(dask_results)
else:
preannotations = detection.predict(m=model, image_paths=pool, patch_size=patch_size, patch_overlap=patch_overlap, batch_size=batch_size)
preannotations = detection.predict(m=model, image_paths=pool, patch_size=patch_size, patch_overlap=patch_overlap, batch_size=batch_size, crop_model=crop_model)
preannotations = pd.concat(preannotations)

if comet_logger:
Expand Down
8 changes: 8 additions & 0 deletions src/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,16 @@ def preprocess_images(model, annotations, root_dir, save_dir):
# Remove any negative values
annotations = annotations[(annotations['xmin'] >= 0) & (annotations['ymin'] >= 0) & (annotations['xmax'] >= 0) & (annotations['ymax'] >= 0)]
boxes = annotations[['xmin', 'ymin', 'xmax', 'ymax']].values.tolist()

# Expand by 20 pixels on all sides
boxes = [[box[0]-20, box[1]-20, box[2]+20, box[3]+20] for box in boxes]

# Make sure no negative values
boxes = [[max(0, box[0]), max(0, box[1]), max(0, box[2]), max(0, box[3])] for box in boxes]

images = annotations["image_path"].values
labels = annotations["label"].values

model.write_crops(boxes=boxes, root_dir=root_dir, images=images, labels=labels, savedir=save_dir)

def preprocess_and_train_classification(config, train_df=None, validation_df=None, comet_logger=None):
Expand Down
51 changes: 24 additions & 27 deletions src/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def evaluate(model, test_csv, image_root_dir):
"""
# create trainer
devices = torch.cuda.device_count()
model.create_trainer(num_nodes=1, devices=devices)
strategy = "ddp" if devices > 1 else None
model.create_trainer(num_nodes=1, devices=devices, strategy=strategy)
model.config["validation"]["csv_file"] = test_csv
model.config["validation"]["root_dir"] = image_root_dir
results = model.trainer.validate(model)
Expand Down Expand Up @@ -163,32 +164,28 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_log
model.config[key] = value

devices = torch.cuda.device_count()
if comet_logger:
comet_logger.experiment.log_parameters(model.config)
comet_logger.experiment.log_table("train.csv", train_annotations)
comet_logger.experiment.log_table("test.csv", test_annotations)
model.create_trainer(logger=comet_logger, num_nodes=1, devices=devices)
else:
model.create_trainer(num_nodes=1, devices=devices)

with comet_logger.experiment.context_manager("train_images"):
non_empty_train_annotations = read_file(model.config["train"]["csv_file"], root_dir=train_image_dir)
# Sanity check for debug
n = 5 if non_empty_train_annotations.shape[0] > 5 else non_empty_train_annotations.shape[0]
for filename in non_empty_train_annotations.image_path.sample(n=n).unique():
sample_train_annotations_for_image = non_empty_train_annotations[non_empty_train_annotations.image_path == filename]
sample_train_annotations_for_image.root_dir = train_image_dir
visualize.plot_annotations(sample_train_annotations_for_image, savedir=tmpdir)
comet_logger.experiment.log_image(os.path.join(tmpdir, filename))

with comet_logger.experiment.context_manager("test_images"):
non_empty_validation_annotations = read_file(model.config["validation"]["csv_file"], root_dir=train_image_dir)
n = 5 if non_empty_validation_annotations.shape[0] > 5 else non_empty_validation_annotations.shape[0]
for filename in non_empty_validation_annotations.image_path.sample(n=n).unique():
sample_validation_annotations_for_image = non_empty_validation_annotations[non_empty_validation_annotations.image_path == filename]
sample_validation_annotations_for_image.root_dir = train_image_dir
visualize.plot_annotations(sample_validation_annotations_for_image, savedir=tmpdir)
comet_logger.experiment.log_image(os.path.join(tmpdir, filename))
strategy = "ddp" if devices > 1 else None
comet_logger.experiment.log_parameters(model.config)
comet_logger.experiment.log_table("train.csv", train_annotations)
comet_logger.experiment.log_table("test.csv", test_annotations)
model.create_trainer(logger=comet_logger, num_nodes=1, accelerator="gpu", strategy="ddp", devices=2)

non_empty_train_annotations = read_file(model.config["train"]["csv_file"], root_dir=train_image_dir)
# Sanity check for debug
n = 5 if non_empty_train_annotations.shape[0] > 5 else non_empty_train_annotations.shape[0]
for filename in non_empty_train_annotations.image_path.sample(n=n).unique():
sample_train_annotations_for_image = non_empty_train_annotations[non_empty_train_annotations.image_path == filename]
sample_train_annotations_for_image.root_dir = train_image_dir
visualize.plot_annotations(sample_train_annotations_for_image, savedir=tmpdir)
comet_logger.experiment.log_image(os.path.join(tmpdir, filename),metadata={"name":filename,"context":'train_images'})

non_empty_validation_annotations = read_file(model.config["validation"]["csv_file"], root_dir=train_image_dir)
n = 5 if non_empty_validation_annotations.shape[0] > 5 else non_empty_validation_annotations.shape[0]
for filename in non_empty_validation_annotations.image_path.sample(n=n).unique():
sample_validation_annotations_for_image = non_empty_validation_annotations[non_empty_validation_annotations.image_path == filename]
sample_validation_annotations_for_image.root_dir = train_image_dir
visualize.plot_annotations(sample_validation_annotations_for_image, savedir=tmpdir)
comet_logger.experiment.log_image(os.path.join(tmpdir, filename),metadata={"name":filename,"context":'validation_images'})

with comet_logger.experiment.context_manager("detection"):
model.trainer.fit(model)
Expand Down
3 changes: 2 additions & 1 deletion src/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ def run(self):
model_path=detection_checkpoint_path,
dask_client=dask_client,
batch_size=self.config.predict.batch_size,
comet_logger=self.comet_logger
comet_logger=self.comet_logger,
crop_model=trained_classification_model
)
self.comet_logger.experiment.log_table(tabular_data=training_pool_predictions, filename="training_pool_predictions.csv")

Expand Down
5 changes: 3 additions & 2 deletions submit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
#SBATCH --output=/home/b.weinstein/logs/BOEM%j.out # Standard output and error log
#SBATCH --error=/home/b.weinstein/logs/BOEM%j.err
#SBATCH --partition=gpu
#SBATCH --gpus=1
#SBATCH --ntasks-per-node=2
#SBATCH --gpus=2

source activate BOEM

cd ~/BOEM/
srun python main.py check_annotations=True active_learning.pool_limit=10 active_testing.n_images=1 active_learning.n_images=1 ++classification_model.trainer.fast_dev_run=True ++detection_model.trainer.train.fast_dev_run=True debug=True
srun python main.py check_annotations=True active_learning.pool_limit=10 active_testing.n_images=1 active_learning.n_images=1 debug=True
Loading

0 comments on commit 934a58c

Please sign in to comment.