Skip to content

Commit

Permalink
refactor evaluate detection
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Feb 25, 2025
1 parent b62a380 commit 3c7188f
Show file tree
Hide file tree
Showing 19 changed files with 465 additions and 426 deletions.
29 changes: 0 additions & 29 deletions BOEM.qmd

This file was deleted.

22 changes: 0 additions & 22 deletions Dockerfile

This file was deleted.

101 changes: 57 additions & 44 deletions USGS_backbone.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,77 @@
from deepforest import main
import pandas as pd
import os
import tempfile
import comet_ml
from pytorch_lightning.loggers import CometLogger
from pytorch_lightning.profilers.simple import SimpleProfiler
import torch
import argparse
from deepforest.callbacks import images_callback

df = pd.read_csv("/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/20231116_cropped_annotations.csv")
df.wat_label.value_counts()
df = df[df.wat_label.isin(["Bird","Cartilaginous Fish","Bony Fish","Mammal","Reptile"])]
# Parse arguments
parser = argparse.ArgumentParser(description="Train DeepForest model")
parser.add_argument("--batch_size", type=int, default=12, help="Batch size for training")
parser.add_argument("--workers", type=int, default=0, help="Number of workers for data loading")
args = parser.parse_args()

# Combine Fish classes
df.loc[df.wat_label.isin(["Cartilaginous Fish","Bony Fish"]),"wat_label"] = "Fish"
# Use parsed arguments
batch_size = args.batch_size
workers = args.workers

# Construct padded crop name
df["image_path"] = df["bname_parent"] +"_" + df["tile_xtl"].astype(str) + "_" + df["tile_ytl"].astype(str) + "_" + df["tile_xbr"].astype(str) + "_" + df["tile_ybr"].astype(str) + ".JPG"
savedir = "/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops"
train = pd.read_csv(os.path.join(savedir,"train.csv"))
test = pd.read_csv(os.path.join(savedir,"test.csv"))

# Check if all images exist
df["image_exists"] = df["image_path"].apply(lambda x: os.path.exists(os.path.join("/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/padded",x)))
# Initalize Deepforest model
m = main.deepforest()
m.load_model("weecology/deepforest-bird")
m.label_dict = {"Object":0}
m.numeric_to_label_dict = {0:"Object"}

df["xmin"] = df["xtl"]
df["ymin"] = df["ytl"]
df["xmax"] = df["xbr"]
df["ymax"] = df["ybr"]
df["label"] = df["wat_label"]
m.config["train"]["csv_file"] = os.path.join(savedir,"train.csv")
m.config["train"]["root_dir"] = "/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops"
m.config["train"]["fast_dev_run"] = False
m.config["validation"]["csv_file"] = os.path.join(savedir,"test.csv")
m.config["validation"]["root_dir"] = "/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops"
m.config["batch_size"] = batch_size
m.config["train"]["epochs"] = 100
m.config["workers"] = workers
m.config["validation"]["val_accuracy_interval"] = 10
m.config["train"]["scheduler"]["params"]["eps"] = 0
m.config["train"]["lr"] = 0.0005

# Randomly split 80 - 20 for each class
train = df.groupby("wat_label").sample(frac=0.85)
test = df.drop(train.index)
comet_logger = CometLogger(project_name="BOEM", workspace="bw4sz")

# Write to tmp data directory
tmpdir = tempfile.mkdtemp()
train.to_csv(os.path.join(tmpdir,"train.csv"),index=False)
test.to_csv(os.path.join(tmpdir,"test.csv"),index=False)
im = images_callback(n=20, every_n_epochs=25, savedir=os.path.join(savedir,"images"))

# Initialize new Deepforest model ( the model that you will train ) with your classes
m = main.deepforest(config_args={"num_classes":4}, label_dict={"Bird":0,"Fish":1,"Mammal":2,"Reptile":3})
# Log the training and test sets
comet_logger.experiment.log_table("train.csv", train)
comet_logger.experiment.log_table("test.csv", test)

# Inatialize Deepforest model ( the model that you will modify its regression head )
deepforest_release_model = main.deepforest()
deepforest_release_model.load_model("weecology/deepforest-bird") # or load_model('weecology/deepforest-bird')
# Pytorch lightning save checkpoint
#simple_profiler = SimpleProfiler(dirpath=os.path.join(tmpdir,"profiler"), filename="profiler.txt", extended=True)

# Extract single class backbone that will have useful features for multi-class classification
m.model.backbone.load_state_dict(deepforest_release_model.model.backbone.state_dict())
# Log the devices
devices = torch.cuda.device_count()
comet_logger.experiment.log_parameter("devices", devices)
comet_logger.experiment.log_parameter("workers", m.config["workers"])
comet_logger.experiment.log_parameter("batch_size", m.config["batch_size"])

# load regression head in the new model
m.model.head.regression_head.load_state_dict(deepforest_release_model.model.head.regression_head.state_dict())
# Log data sizes
comet_logger.experiment.log_parameter("train_size", train.shape[0])
comet_logger.experiment.log_parameter("test_size", test.shape[0])

m.config["train"]["csv_file"] = os.path.join(tmpdir,"train.csv")
m.config["train"]["root_dir"] = "/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/padded"
m.config["train"]["fast_dev_run"] = False
m.config["validation"]["csv_file"] = os.path.join(tmpdir,"test.csv")
m.config["validation"]["root_dir"] = "/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/padded"
m.config["batch_size"] = 6
m.config["train"]["epochs"] = 25
m.config["validation"]["val_accuracy_interval"] = 5
m.config["train"]["scheduler"]["params"]["eps"] = 0
comet_logger = CometLogger(project_name="BOEM", workspace="bw4sz")

m.create_trainer(logger=comet_logger)
m.create_trainer(callbacks=[im], logger=comet_logger, accelerator="gpu", strategy="ddp", num_nodes=1, devices=devices)
m.trainer.fit(m)
results = m.evaluate(m.config["validation"]["csv_file"],m.config["validation"]["root_dir"])
print(results)

# Gather the number of steps taken from all GPUs
global_steps = torch.tensor(m.trainer.global_step, dtype=torch.int32, device=m.device)
comet_logger.experiment.log_metric("global_steps", global_steps)

# Save profiler to comet
#comet_logger.experiment.log_asset(os.path.join(tmpdir,"profiler","profiler.txt"))

# Save the model
m.trainer.save_checkpoint("/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/checkpoints/{}.pl".format(comet_logger.experiment.id))
m.trainer.save_checkpoint("/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/checkpoints/{}.pl".format(comet_logger.experiment.id))
9 changes: 4 additions & 5 deletions conf/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ predict:
patch_size: 2000
patch_overlap: 0
min_score: 0.4
batch_size: 48

pipeline:
confidence_threshold: 0.9
Expand All @@ -34,14 +35,14 @@ propagate:
distance_threshold_pixels: 50

detection_model:
checkpoint: "/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/checkpoints/5420a9c3f27d4299992094a7b9b49cb7.pl"
checkpoint: "/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/checkpoints/900710af16d9431aaef84bf74034a48c.pl"
checkpoint_dir: /blue/ewhite/b.weinstein/BOEM/detection/checkpoints
train_csv_folder: /blue/ewhite/b.weinstein/BOEM/annotations/train/
train_image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27/annotated
crop_image_dir: /blue/ewhite/b.weinstein/BOEM/detection/crops/
limit_empty_frac: 0.01
labels:
- "Bird"
- "Object"
trainer:
batch_size: 4
train:
Expand Down Expand Up @@ -69,7 +70,6 @@ pipeline_evaluation:
classify_ground_truth_dir: /blue/ewhite/b.weinstein/BOEM/annotations/validation
# This is an average mAP threshold for now, but we may want to add a per-iou threshold in the future
detection_true_positive_threshold: 0.8
detection_false_positive_threshold: 0.5
classification_avg_score: 0.5
image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27/annotated
debug: False
Expand All @@ -79,7 +79,6 @@ reporting:
metadata: /blue/ewhite/b.weinstein/BOEM/reporting/metadata/metadata.csv
thin_factor: 100


active_learning:
image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27
strategy: 'target-labels'
Expand All @@ -89,7 +88,7 @@ active_learning:
min_score: 0.1
model_checkpoint:
target_labels:
- "Bird"
- "Object"

# Optional parameters:
evaluation:
Expand Down
25 changes: 0 additions & 25 deletions container.sh

This file was deleted.

118 changes: 118 additions & 0 deletions prepare_USGS.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Prepare USGS backbone
import pandas as pd
import os
import glob
from deepforest.preprocess import split_raster
import torch
import argparse
import random
import numpy as np
from src.cluster import start
from dask.distributed import as_completed

# Parse arguments
parser = argparse.ArgumentParser(description="Train DeepForest model")
parser.add_argument("--batch_size", type=int, default=12, help="Batch size for training")
parser.add_argument("--workers", type=int, default=0, help="Number of workers for data loading")
args = parser.parse_args()

# Use parsed arguments
batch_size = args.batch_size
workers = args.workers

# Set random seeds for reproducibility
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

df = pd.read_csv("/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/20250203_total.csv")
df.label.value_counts()

# Construct padded crop name
df["image_path"] = df["bname_parent"] + ".JPG"

# Check if all images exist remove any that do not exist
df["image_exists"] = df["image_path"].apply(lambda x: os.path.exists(os.path.join("/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/images_parent",x)))
df = df[df["image_exists"]]

df["xmin"] = df["left"]
df["ymin"] = df["top"]
df["xmax"] = df["left"] + df["width"]
df["ymax"] = df["top"] + df["height"]

os.makedirs("/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops", exist_ok=True)
crop_annotations =[]
regenerate_crops = True
if regenerate_crops:
client = start(cpus=5, mem_size="40GB")
futures = []

def process_image(image_annotations):
x = image_annotations.image_path.unique()[0]
filename = os.path.join("/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops", x.replace(".JPG", ".csv"))
if os.path.exists(filename):
return pd.read_csv(filename)
try:
split_raster(
annotations_file=image_annotations,
patch_size=1000,
patch_overlap=0,
path_to_raster=os.path.join("/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/images_parent", x),
root_dir="/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/images_parent",
base_dir="/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops",
allow_empty=False)
return filename
except Exception as e:
print(f"Error processing {x}: {e}")
return None

for x in df.image_path.unique():
image_annotations = df[df["image_path"] == x]
futures.append(client.submit(process_image, image_annotations))

for future in as_completed(futures):
result = future.result()
if result is not None:
crop_annotations.append(result)

crop_annotations = glob.glob("/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops/*.csv")
crop_annotations = [pd.read_csv(x) for x in crop_annotations]

crop_annotations = pd.concat(crop_annotations)
# Background classes as negatives
crop_annotations.loc[crop_annotations['label'].isin(["Algae", "Boat", "Buoy"]), ['xmin', 'xmax', 'ymin', 'ymax', 'label']] = [0, 0, 0, 0, "Object"]

# All other as "Object"
crop_annotations.loc[~crop_annotations['label'].isin(["Algae", "Boat", "Buoy"]), 'label'] = "FalsePositive"

# Drop duplicates for False Positives only
falsepositives = crop_annotations[crop_annotations['label'] == "FalsePositive"]
falsepositives = falsepositives.drop_duplicates(subset=['xmin', 'xmax', 'ymin', 'ymax'])

# Drop any falsepositive images that occur in the same image as a true positive by image_path
true_positives = crop_annotations[crop_annotations['label'] != "FalsePositive"]
falsepositives = falsepositives[~falsepositives['image_path'].isin(true_positives['image_path'])]
crop_annotations = pd.concat([crop_annotations[crop_annotations['label'] != "FalsePositive"], falsepositives])
crop_annotations["label"] = "Object"

# Randomly split by image_path
images = crop_annotations.image_path.unique()
random.shuffle(images)
train_images = images[:int(len(images)*0.90)]
test_images = images[int(len(images)*0.90):]

train = crop_annotations[crop_annotations["image_path"].isin(train_images)]
test = crop_annotations[crop_annotations["image_path"].isin(test_images)]

# Write to tmp data directory
savedir = "/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops"
# create images directory
os.makedirs(os.path.join(savedir,"images"), exist_ok=True)

train.to_csv(os.path.join(savedir,"train.csv"),index=False)
test.to_csv(os.path.join(savedir,"test.csv"),index=False)

Loading

0 comments on commit 3c7188f

Please sign in to comment.