Skip to content

Commit

Permalink
folders
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Dec 9, 2024
1 parent c7de761 commit a3019bf
Show file tree
Hide file tree
Showing 8 changed files with 195 additions and 132 deletions.
37 changes: 21 additions & 16 deletions conf/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ check_annotations: true
# Force upload bypasses the pipeline, useful for debugging and starting a new project
force_upload: true
label_studio:
project_name: "Bureau of Ocean Energy Management"
project_name_train: "Bureau of Ocean Energy Management - Training"
project_name_validation: "Bureau of Ocean Energy Management - Validation"
url: "https://labelstudio.naturecast.org/"
folder_name: "/pgsql/retrieverdash/everglades-label-studio/everglades-data"
images_to_annotate_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27
annotated_images_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27/annotated
csv_dir: /blue/ewhite/b.weinstein/BOEM/annotations
csv_dir_train: /blue/ewhite/b.weinstein/BOEM/annotations/train
csv_dir_validation: /blue/ewhite/b.weinstein/BOEM/annotations/validation

predict:
patch_size: 2000
Expand All @@ -23,7 +25,7 @@ predict:

pipeline:
confidence_threshold: 0.5
limit_empty_frac: 0.1
limit_empty_frac: 0.01

propagate:
time_threshold_seconds: 5
Expand All @@ -33,10 +35,10 @@ detection_model:
checkpoint: bird
checkpoint_dir: /blue/ewhite/b.weinstein/BOEM/detection/checkpoints
validation_csv_path:
train_csv_folder: /blue/ewhite/b.weinstein/BOEM/annotations/
train_csv_folder: /blue/ewhite/b.weinstein/BOEM/annotations/train/
train_image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27/annotated
crop_image_dir: /blue/ewhite/b.weinstein/BOEM/detection/crops/
limit_empty_frac: 0
limit_empty_frac: 0.05
fast_dev_run: false
labels:
- "Bird"
Expand All @@ -45,31 +47,33 @@ classification_model:
checkpoint:
checkpoint_dir: /blue/ewhite/b.weinstein/BOEM/classification/checkpoints
validation_csv_path:
train_csv_folder: /blue/ewhite/b.weinstein/BOEM/annotations/
train_csv_folder: /blue/ewhite/b.weinstein/BOEM/annotations/train
train_image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27/annotated
crop_image_dir: /blue/ewhite/b.weinstein/BOEM/classification/crops/
under_sample_ratio: 0
fast_dev_run: True
trainer:
fast_dev_run: false
max_epochs: 1
lr: 0.001

pipeline_evaluation:
detect_ground_truth_dir:
classify_confident_ground_truth_dir:
classify_uncertain_ground_truth_dir:
detect_ground_truth_dir: /blue/ewhite/b.weinstein/BOEM/annotations/validation
classify_confident_ground_truth_dir: /blue/ewhite/b.weinstein/BOEM/annotations/validation
classify_uncertain_ground_truth_dir: /blue/ewhite/b.weinstein/BOEM/annotations/validation
# This is an average mAP threshold for now, but we may want to add a per-iou threshold in the future
detection_true_positive_threshold: 0.8
detection_false_positive_threshold: 0.5
classification_avg_score: 0.5
image_dir:
image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27/annotated

reporting:
image_dir: /blue/ewhite/b.weinstein/BOEM/reporting/samples
report_dir: /blue/ewhite/b.weinstein/BOEM/reporting/reports
metadata: /blue/ewhite/b.weinstein/BOEM/reporting/metadata.csv

active_learning:
image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27
strategy: 'target-labels'
n_images: 1
n_images: 50
patch_size: 2000
patch_overlap: 0
min_score: 0.3
Expand All @@ -80,7 +84,7 @@ active_learning:
# Optional parameters:
evaluation:
dask_client:
pool_limit: 10
pool_limit: 5000
gpus: 1

active_testing:
Expand All @@ -94,6 +98,7 @@ active_testing:

deepforest:
train:
fast_dev_run: True
epochs: 1
fast_dev_run: False
epochs: 10
lr: 0.00001
workers: 0
33 changes: 18 additions & 15 deletions src/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@
from src.label_studio import gather_data
from pytorch_lightning.loggers import CometLogger


def create_train_test(annotations):
return annotations.sample(frac=0.8, random_state=1), annotations.drop(
annotations.sample(frac=0.8, random_state=1).index)

def get_latest_checkpoint(checkpoint_dir, annotations):
def get_latest_checkpoint(checkpoint_dir, annotations, lr=0.0001):
#Get model with latest checkpoint dir, if none exist make a new model
if os.path.exists(checkpoint_dir):
checkpoints = glob.glob(os.path.join(checkpoint_dir,"*.ckpt"))
Expand All @@ -25,16 +24,16 @@ def get_latest_checkpoint(checkpoint_dir, annotations):
m = CropModel.load_from_checkpoint(checkpoint)
else:
warnings.warn("No checkpoints found in {}".format(checkpoint_dir))
m = CropModel(num_classes=len(annotations["label"].unique()))
m = CropModel(num_classes=len(annotations["label"].unique()), lr=lr)
else:
os.makedirs(checkpoint_dir)
m = CropModel(num_classes=len(annotations["label"].unique()))
m = CropModel(num_classes=len(annotations["label"].unique()), lr=lr)

return m

def load(checkpoint=None, annotations=None, checkpoint_dir=None):
def load(checkpoint=None, annotations=None, checkpoint_dir=None, lr=0.0001):
if checkpoint:
loaded_model = CropModel(checkpoint, num_classes=len(annotations["label"].unique()))
loaded_model = CropModel(checkpoint, num_classes=len(annotations["label"].unique()), lr=lr)
elif checkpoint_dir:
loaded_model = get_latest_checkpoint(
checkpoint_dir, annotations)
Expand All @@ -43,24 +42,25 @@ def load(checkpoint=None, annotations=None, checkpoint_dir=None):

return loaded_model

def train(model, train_dir, val_dir, comet_project=None, comet_workspace=None, fast_dev_run=False):
def train(model, train_dir, val_dir, comet_workspace=None, comet_project=None, fast_dev_run=False, max_epochs=10):
"""Train a model on labeled images.
Args:
model (CropModel): A CropModel object.
train_dir (str): The directory containing the training images.
val_dir (str): The directory containing the validation images.
comet_project (str): The comet project name for logging. Defaults to None.
comet_workspace (str): The comet workspace for logging. Defaults to None.
fast_dev_run (bool): Whether to run a fast development run.
max_epochs (int): The maximum number of epochs to train for.
Returns:
main.deepforest: A trained deepforest model.
"""
# Update

if comet_project:
comet_logger = CometLogger(project_name=comet_project, workspace=comet_workspace)
model.create_trainer(logger=comet_logger, fast_dev_run=fast_dev_run)
else:
model.create_trainer(fast_dev_run=fast_dev_run)
comet_logger = None

model.create_trainer(logger=comet_logger, fast_dev_run=fast_dev_run, max_epochs=max_epochs)

# Get the data stored from the write_crops step above.
model.load_from_disk(train_dir=train_dir, val_dir=val_dir)
Expand All @@ -82,6 +82,7 @@ def preprocess_and_train_classification(config, validation_df=None):
Args:
config: Configuration object containing training parameters
validation_df (pd.DataFrame): A DataFrame containing validation annotations.
comet_logger (CometLogger): A CometLogger object.
Returns:
trained_model: Trained model object
"""
Expand All @@ -95,7 +96,7 @@ def preprocess_and_train_classification(config, validation_df=None):
isin(validation_df["image_path"])]

# Load existing model
loaded_model = load(checkpoint=config.classification_model.checkpoint, checkpoint_dir=config.classification_model.checkpoint_dir, annotations=annotations)
loaded_model = load(checkpoint=config.classification_model.checkpoint, checkpoint_dir=config.classification_model.checkpoint_dir, annotations=annotations, lr=config.classification_model.trainer.lr)

# Preprocess train and validation data
preprocess_images(
Expand All @@ -114,8 +115,10 @@ def preprocess_and_train_classification(config, validation_df=None):
train_dir=config.classification_model.crop_image_dir,
val_dir=config.classification_model.crop_image_dir,
model=loaded_model,
comet_project=config.comet.project,
comet_workspace=config.comet.workspace,
fast_dev_run=config.classification_model.fast_dev_run)
comet_project=config.comet.project,
fast_dev_run=config.classification_model.trainer.fast_dev_run,
max_epochs=config.classification_model.trainer.max_epochs,
)

return trained_model
43 changes: 24 additions & 19 deletions src/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_pro

if comet_project:
comet_logger = CometLogger(project_name=comet_project, workspace=comet_workspace)
comet_logger.experiment.add_tags(["detection"])
comet_logger.experiment.log_parameters(model.config)
comet_logger.experiment.log_table("train.csv", train_annotations)
comet_logger.experiment.log_table("test.csv", test_annotations)
Expand All @@ -161,24 +162,24 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_pro
else:
model.create_trainer()

with comet_logger.experiment.context_manager("train_images"):
non_empty_train_annotations = train_annotations[~(train_annotations.xmax==0)]
try:
non_empty_train_annotations= gpd.GeoDataFrame(non_empty_train_annotations, geometry=non_empty_train_annotations["geometry"])
non_empty_train_annotations.root_dir = train_image_dir
non_empty_train_annotations = read_file(non_empty_train_annotations)
except:
non_empty_train_annotations = read_file(non_empty_train_annotations, root_dir=train_image_dir)

if non_empty_train_annotations.empty:
pass
else:
sample_train_annotations = non_empty_train_annotations[non_empty_train_annotations.image_path.isin(non_empty_train_annotations.image_path.head(5))]
for filename in sample_train_annotations.image_path:
sample_train_annotations_for_image = sample_train_annotations[sample_train_annotations.image_path == filename]
sample_train_annotations_for_image.root_dir = train_image_dir
visualize.plot_results(sample_train_annotations_for_image, savedir=tmpdir)
comet_logger.experiment.log_image(os.path.join(tmpdir, filename))
# with comet_logger.experiment.context_manager("train_images"):
# non_empty_train_annotations = train_annotations[~(train_annotations.xmax==0)]
# try:
# non_empty_train_annotations= gpd.GeoDataFrame(non_empty_train_annotations, geometry=non_empty_train_annotations["geometry"])
# non_empty_train_annotations.root_dir = train_image_dir
# non_empty_train_annotations = read_file(non_empty_train_annotations)
# except:
# non_empty_train_annotations = read_file(non_empty_train_annotations, root_dir=train_image_dir)

# if non_empty_train_annotations.empty:
# pass
# else:
# sample_train_annotations = non_empty_train_annotations[non_empty_train_annotations.image_path.isin(non_empty_train_annotations.image_path.head(5))]
# for filename in sample_train_annotations.image_path:
# sample_train_annotations_for_image = sample_train_annotations[sample_train_annotations.image_path == filename]
# sample_train_annotations_for_image.root_dir = train_image_dir
# visualize.plot_results(sample_train_annotations_for_image, savedir=tmpdir)
# comet_logger.experiment.log_image(os.path.join(tmpdir, filename))

model.trainer.fit(model)

Expand Down Expand Up @@ -214,11 +215,16 @@ def preprocess_and_train(config, validation_df=None, model_type="detection"):
train_df = data_processing.preprocess_images(train_df,
root_dir=config.detection_model.train_image_dir,
save_dir=config.detection_model.crop_image_dir)

non_empty = train_df[train_df.xmin!=0]
train_df.loc[train_df.label==0,"label"] = "Bird"

if not validation_df.empty:
validation_df = data_processing.preprocess_images(validation_df,
root_dir=config.detection_model.train_image_dir,
save_dir=config.detection_model.crop_image_dir)
non_empty = validation_df[validation_df.xmin!=0]
validation_df.loc[validation_df.label==0,"label"] = "Bird"

# Limit empty frames
if config.detection_model.limit_empty_frac > 0:
Expand Down Expand Up @@ -295,7 +301,6 @@ def predict(image_paths, patch_size, patch_overlap, m=None, model_path=None, das
Returns:
list: A list of image predictions.
"""

if dask_client:
# load model on each client
def update_sys_path():
Expand Down
Loading

0 comments on commit a3019bf

Please sign in to comment.