-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
19 changed files
with
465 additions
and
426 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,64 +1,77 @@ | ||
from deepforest import main | ||
import pandas as pd | ||
import os | ||
import tempfile | ||
import comet_ml | ||
from pytorch_lightning.loggers import CometLogger | ||
from pytorch_lightning.profilers.simple import SimpleProfiler | ||
import torch | ||
import argparse | ||
from deepforest.callbacks import images_callback | ||
|
||
df = pd.read_csv("/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/20231116_cropped_annotations.csv") | ||
df.wat_label.value_counts() | ||
df = df[df.wat_label.isin(["Bird","Cartilaginous Fish","Bony Fish","Mammal","Reptile"])] | ||
# Parse arguments | ||
parser = argparse.ArgumentParser(description="Train DeepForest model") | ||
parser.add_argument("--batch_size", type=int, default=12, help="Batch size for training") | ||
parser.add_argument("--workers", type=int, default=0, help="Number of workers for data loading") | ||
args = parser.parse_args() | ||
|
||
# Combine Fish classes | ||
df.loc[df.wat_label.isin(["Cartilaginous Fish","Bony Fish"]),"wat_label"] = "Fish" | ||
# Use parsed arguments | ||
batch_size = args.batch_size | ||
workers = args.workers | ||
|
||
# Construct padded crop name | ||
df["image_path"] = df["bname_parent"] +"_" + df["tile_xtl"].astype(str) + "_" + df["tile_ytl"].astype(str) + "_" + df["tile_xbr"].astype(str) + "_" + df["tile_ybr"].astype(str) + ".JPG" | ||
savedir = "/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops" | ||
train = pd.read_csv(os.path.join(savedir,"train.csv")) | ||
test = pd.read_csv(os.path.join(savedir,"test.csv")) | ||
|
||
# Check if all images exist | ||
df["image_exists"] = df["image_path"].apply(lambda x: os.path.exists(os.path.join("/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/padded",x))) | ||
# Initalize Deepforest model | ||
m = main.deepforest() | ||
m.load_model("weecology/deepforest-bird") | ||
m.label_dict = {"Object":0} | ||
m.numeric_to_label_dict = {0:"Object"} | ||
|
||
df["xmin"] = df["xtl"] | ||
df["ymin"] = df["ytl"] | ||
df["xmax"] = df["xbr"] | ||
df["ymax"] = df["ybr"] | ||
df["label"] = df["wat_label"] | ||
m.config["train"]["csv_file"] = os.path.join(savedir,"train.csv") | ||
m.config["train"]["root_dir"] = "/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops" | ||
m.config["train"]["fast_dev_run"] = False | ||
m.config["validation"]["csv_file"] = os.path.join(savedir,"test.csv") | ||
m.config["validation"]["root_dir"] = "/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops" | ||
m.config["batch_size"] = batch_size | ||
m.config["train"]["epochs"] = 100 | ||
m.config["workers"] = workers | ||
m.config["validation"]["val_accuracy_interval"] = 10 | ||
m.config["train"]["scheduler"]["params"]["eps"] = 0 | ||
m.config["train"]["lr"] = 0.0005 | ||
|
||
# Randomly split 80 - 20 for each class | ||
train = df.groupby("wat_label").sample(frac=0.85) | ||
test = df.drop(train.index) | ||
comet_logger = CometLogger(project_name="BOEM", workspace="bw4sz") | ||
|
||
# Write to tmp data directory | ||
tmpdir = tempfile.mkdtemp() | ||
train.to_csv(os.path.join(tmpdir,"train.csv"),index=False) | ||
test.to_csv(os.path.join(tmpdir,"test.csv"),index=False) | ||
im = images_callback(n=20, every_n_epochs=25, savedir=os.path.join(savedir,"images")) | ||
|
||
# Initialize new Deepforest model ( the model that you will train ) with your classes | ||
m = main.deepforest(config_args={"num_classes":4}, label_dict={"Bird":0,"Fish":1,"Mammal":2,"Reptile":3}) | ||
# Log the training and test sets | ||
comet_logger.experiment.log_table("train.csv", train) | ||
comet_logger.experiment.log_table("test.csv", test) | ||
|
||
# Inatialize Deepforest model ( the model that you will modify its regression head ) | ||
deepforest_release_model = main.deepforest() | ||
deepforest_release_model.load_model("weecology/deepforest-bird") # or load_model('weecology/deepforest-bird') | ||
# Pytorch lightning save checkpoint | ||
#simple_profiler = SimpleProfiler(dirpath=os.path.join(tmpdir,"profiler"), filename="profiler.txt", extended=True) | ||
|
||
# Extract single class backbone that will have useful features for multi-class classification | ||
m.model.backbone.load_state_dict(deepforest_release_model.model.backbone.state_dict()) | ||
# Log the devices | ||
devices = torch.cuda.device_count() | ||
comet_logger.experiment.log_parameter("devices", devices) | ||
comet_logger.experiment.log_parameter("workers", m.config["workers"]) | ||
comet_logger.experiment.log_parameter("batch_size", m.config["batch_size"]) | ||
|
||
# load regression head in the new model | ||
m.model.head.regression_head.load_state_dict(deepforest_release_model.model.head.regression_head.state_dict()) | ||
# Log data sizes | ||
comet_logger.experiment.log_parameter("train_size", train.shape[0]) | ||
comet_logger.experiment.log_parameter("test_size", test.shape[0]) | ||
|
||
m.config["train"]["csv_file"] = os.path.join(tmpdir,"train.csv") | ||
m.config["train"]["root_dir"] = "/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/padded" | ||
m.config["train"]["fast_dev_run"] = False | ||
m.config["validation"]["csv_file"] = os.path.join(tmpdir,"test.csv") | ||
m.config["validation"]["root_dir"] = "/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/padded" | ||
m.config["batch_size"] = 6 | ||
m.config["train"]["epochs"] = 25 | ||
m.config["validation"]["val_accuracy_interval"] = 5 | ||
m.config["train"]["scheduler"]["params"]["eps"] = 0 | ||
comet_logger = CometLogger(project_name="BOEM", workspace="bw4sz") | ||
|
||
m.create_trainer(logger=comet_logger) | ||
m.create_trainer(callbacks=[im], logger=comet_logger, accelerator="gpu", strategy="ddp", num_nodes=1, devices=devices) | ||
m.trainer.fit(m) | ||
results = m.evaluate(m.config["validation"]["csv_file"],m.config["validation"]["root_dir"]) | ||
print(results) | ||
|
||
# Gather the number of steps taken from all GPUs | ||
global_steps = torch.tensor(m.trainer.global_step, dtype=torch.int32, device=m.device) | ||
comet_logger.experiment.log_metric("global_steps", global_steps) | ||
|
||
# Save profiler to comet | ||
#comet_logger.experiment.log_asset(os.path.join(tmpdir,"profiler","profiler.txt")) | ||
|
||
# Save the model | ||
m.trainer.save_checkpoint("/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/checkpoints/{}.pl".format(comet_logger.experiment.id)) | ||
m.trainer.save_checkpoint("/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/checkpoints/{}.pl".format(comet_logger.experiment.id)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
# Prepare USGS backbone | ||
import pandas as pd | ||
import os | ||
import glob | ||
from deepforest.preprocess import split_raster | ||
import torch | ||
import argparse | ||
import random | ||
import numpy as np | ||
from src.cluster import start | ||
from dask.distributed import as_completed | ||
|
||
# Parse arguments | ||
parser = argparse.ArgumentParser(description="Train DeepForest model") | ||
parser.add_argument("--batch_size", type=int, default=12, help="Batch size for training") | ||
parser.add_argument("--workers", type=int, default=0, help="Number of workers for data loading") | ||
args = parser.parse_args() | ||
|
||
# Use parsed arguments | ||
batch_size = args.batch_size | ||
workers = args.workers | ||
|
||
# Set random seeds for reproducibility | ||
seed = 42 | ||
random.seed(seed) | ||
np.random.seed(seed) | ||
torch.manual_seed(seed) | ||
if torch.cuda.is_available(): | ||
torch.cuda.manual_seed(seed) | ||
torch.cuda.manual_seed_all(seed) | ||
|
||
df = pd.read_csv("/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/20250203_total.csv") | ||
df.label.value_counts() | ||
|
||
# Construct padded crop name | ||
df["image_path"] = df["bname_parent"] + ".JPG" | ||
|
||
# Check if all images exist remove any that do not exist | ||
df["image_exists"] = df["image_path"].apply(lambda x: os.path.exists(os.path.join("/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/images_parent",x))) | ||
df = df[df["image_exists"]] | ||
|
||
df["xmin"] = df["left"] | ||
df["ymin"] = df["top"] | ||
df["xmax"] = df["left"] + df["width"] | ||
df["ymax"] = df["top"] + df["height"] | ||
|
||
os.makedirs("/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops", exist_ok=True) | ||
crop_annotations =[] | ||
regenerate_crops = True | ||
if regenerate_crops: | ||
client = start(cpus=5, mem_size="40GB") | ||
futures = [] | ||
|
||
def process_image(image_annotations): | ||
x = image_annotations.image_path.unique()[0] | ||
filename = os.path.join("/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops", x.replace(".JPG", ".csv")) | ||
if os.path.exists(filename): | ||
return pd.read_csv(filename) | ||
try: | ||
split_raster( | ||
annotations_file=image_annotations, | ||
patch_size=1000, | ||
patch_overlap=0, | ||
path_to_raster=os.path.join("/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/images_parent", x), | ||
root_dir="/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/images_parent", | ||
base_dir="/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops", | ||
allow_empty=False) | ||
return filename | ||
except Exception as e: | ||
print(f"Error processing {x}: {e}") | ||
return None | ||
|
||
for x in df.image_path.unique(): | ||
image_annotations = df[df["image_path"] == x] | ||
futures.append(client.submit(process_image, image_annotations)) | ||
|
||
for future in as_completed(futures): | ||
result = future.result() | ||
if result is not None: | ||
crop_annotations.append(result) | ||
|
||
crop_annotations = glob.glob("/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops/*.csv") | ||
crop_annotations = [pd.read_csv(x) for x in crop_annotations] | ||
|
||
crop_annotations = pd.concat(crop_annotations) | ||
# Background classes as negatives | ||
crop_annotations.loc[crop_annotations['label'].isin(["Algae", "Boat", "Buoy"]), ['xmin', 'xmax', 'ymin', 'ymax', 'label']] = [0, 0, 0, 0, "Object"] | ||
|
||
# All other as "Object" | ||
crop_annotations.loc[~crop_annotations['label'].isin(["Algae", "Boat", "Buoy"]), 'label'] = "FalsePositive" | ||
|
||
# Drop duplicates for False Positives only | ||
falsepositives = crop_annotations[crop_annotations['label'] == "FalsePositive"] | ||
falsepositives = falsepositives.drop_duplicates(subset=['xmin', 'xmax', 'ymin', 'ymax']) | ||
|
||
# Drop any falsepositive images that occur in the same image as a true positive by image_path | ||
true_positives = crop_annotations[crop_annotations['label'] != "FalsePositive"] | ||
falsepositives = falsepositives[~falsepositives['image_path'].isin(true_positives['image_path'])] | ||
crop_annotations = pd.concat([crop_annotations[crop_annotations['label'] != "FalsePositive"], falsepositives]) | ||
crop_annotations["label"] = "Object" | ||
|
||
# Randomly split by image_path | ||
images = crop_annotations.image_path.unique() | ||
random.shuffle(images) | ||
train_images = images[:int(len(images)*0.90)] | ||
test_images = images[int(len(images)*0.90):] | ||
|
||
train = crop_annotations[crop_annotations["image_path"].isin(train_images)] | ||
test = crop_annotations[crop_annotations["image_path"].isin(test_images)] | ||
|
||
# Write to tmp data directory | ||
savedir = "/blue/ewhite/b.weinstein/BOEM/UBFAI Images with Detection Data/crops" | ||
# create images directory | ||
os.makedirs(os.path.join(savedir,"images"), exist_ok=True) | ||
|
||
train.to_csv(os.path.join(savedir,"train.csv"),index=False) | ||
test.to_csv(os.path.join(savedir,"test.csv"),index=False) | ||
|
Oops, something went wrong.