Skip to content

Commit

Permalink
refactor reporting code
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Feb 20, 2025
1 parent 71adaf8 commit b62a380
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 32 deletions.
4 changes: 2 additions & 2 deletions conf/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ pipeline_evaluation:
debug: False

reporting:
report_dir: /blue/ewhite/b.weinstein/BOEM/reporting/reports
report_dir: /orange/ewhite/web/public/BOEM
metadata: /blue/ewhite/b.weinstein/BOEM/reporting/metadata/metadata.csv
thin_factor: 500
thin_factor: 100


active_learning:
Expand Down
37 changes: 18 additions & 19 deletions src/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,32 +81,31 @@ def run(self):
detection_checkpoint_path = self.save_model(trained_detection_model,
self.config.detection_model.checkpoint_dir)
classification_checkpoint_path = self.save_model(trained_classification_model,
self.config.classification_model.checkpoint_dir)

pipeline_monitor = PipelineEvaluation(
model=trained_detection_model,
crop_model=trained_classification_model,
**self.config.pipeline_evaluation)

performance = pipeline_monitor.evaluate()

if pipeline_monitor.check_success():
print("Pipeline performance is satisfactory, exiting")
return None
self.config.classification_model.checkpoint_dir)

else:
detection_checkpoint_path = self.config.detection_model.checkpoint
trained_detection_model = detection.load(
checkpoint = self.config.detection_model.checkpoint)

if self.config.classification_model.checkpoint:
if self.config.classification_model.checkpoint is not None:
trained_classification_model = classification.load(
self.config.classification_model.checkpoint, checkpoint_dir=self.config.classification_model.checkpoint_dir, annotations=None)
self.config.classification_model.checkpoint, checkpoint_dir=self.config.classification_model.checkpoint_dir, annotations=None)
else:
trained_classification_model = None

performance = None
pipeline_monitor = None
detection_checkpoint_path = None
annotations = label_studio.gather_data(self.config.classification_model.train_csv_folder)
trained_classification_model = classification.load(
checkpoint = None, checkpoint_dir=self.config.classification_model.checkpoint_dir, annotations=annotations)

pipeline_monitor = PipelineEvaluation(
model=trained_detection_model,
crop_model=trained_classification_model,
**self.config.pipeline_evaluation)

performance = pipeline_monitor.evaluate()

if pipeline_monitor.check_success():
print("Pipeline performance is satisfactory, exiting")
return None
if self.config.active_learning.gpus > 1:
dask_client = start(gpus=self.config.active_learning.gpus, mem_size="70GB")
else:
Expand Down
54 changes: 47 additions & 7 deletions src/reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def __init__(self, report_dir, image_dir, metadata_csv, pipeline_monitor=None, m
self.report_dir = os.path.join(report_dir, timestamp)
self.report_file = f"{self.report_dir}/report.csv"
self.image_dir = image_dir
self.sample_prediction_dir = f"{self.report_dir}/samples"
self.model = model
self.classification_model = classification_model
self.patch_overlap = patch_overlap
Expand All @@ -47,7 +46,6 @@ def __init__(self, report_dir, image_dir, metadata_csv, pipeline_monitor=None, m

# Check the dirs exist
os.makedirs(self.report_dir, exist_ok=True)
os.makedirs(self.sample_prediction_dir, exist_ok=True)

self.pipeline_monitor = pipeline_monitor

Expand All @@ -60,31 +58,64 @@ def concat_predictions(self):
self.all_predictions = pd.concat(self.pipeline_monitor.predictions, ignore_index=True)

def generate_report(self, create_video=False):
"""Generate a report"""
"""Generate a report and zip the contents
Args:
create_video (bool): Whether to create visualization video
Returns:
str: Path to the zipped report file
"""
import zipfile
import os

# Generate report contents
if self.pipeline_monitor:
self.concat_predictions()
self.write_predictions()
self.write_metrics()
if create_video:
self.generate_video()

# Create zip file path
zip_path = f"{self.report_dir}.zip"

# Create zip file with just basenames
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for root, dirs, files in os.walk(self.report_dir):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.basename(file_path)
zipf.write(file_path, arcname=arcname)

return zip_path

def write_predictions(self):
"""Write predictions to a csv file"""
self.concat_predictions()
self.all_predictions.to_csv(f"{self.report_dir}/predictions.csv", index=False)
self.all_predictions['timestamp'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
self.all_predictions["unique_image"] = self.all_predictions["image_path"].apply(lambda x: os.path.splitext(os.path.basename(x))[0])

# Connect with metadata on location
metadata_df = pd.read_csv(self.metadata)
merged_predictions = self.all_predictions.merge(metadata_df[["unique_image", "flight_name","date","lat","long"]], on='unique_image')
merged_predictions.to_csv(f"{self.report_dir}/predictions.csv", index=False)
merged_predictions.to_csv(f"{self.report_dir}/validation_predictions.csv", index=False)

# Create shapefile
gpd.GeoDataFrame(merged_predictions, geometry=gpd.points_from_xy(merged_predictions.long, merged_predictions.lat)).to_file(f"{self.report_dir}/predictions.shp")

return f"{self.report_dir}/predictions.csv"
return f"{self.report_dir}/validation_predictions.csv"

@staticmethod
def crop_images(self, CropModel, annotations, root_dir, save_dir):
# Remove any annotations with empty boxes
annotations = annotations[(annotations['xmin'] != 0) & (annotations['ymin'] != 0) & (annotations['xmax'] != 0) & (annotations['ymax'] != 0)]
# Remove any negative values
annotations = annotations[(annotations['xmin'] >= 0) & (annotations['ymin'] >= 0) & (annotations['xmax'] >= 0) & (annotations['ymax'] >= 0)]
boxes = annotations[['xmin', 'ymin', 'xmax', 'ymax']].values.tolist()
images = annotations["image_path"].values
labels = annotations["label"].values
CropModel.write_crops(boxes=boxes, root_dir=root_dir, images=images, labels=labels, savedir=save_dir)

def select_images_for_video(self):
all_images = glob.glob(self.image_dir + "/*.jpg")
Expand All @@ -108,6 +139,12 @@ def predict_video_images(self, images):

predictions = predictions[predictions.score > self.min_score]

# Save predictions
predictions.to_csv(f"{self.report_dir}/video_predictions.csv", index=False)

# Crop the images to the predictions
self.crop_images(CropModel=self.classification_model, annotations=predictions, root_dir=self.image_dir, save_dir=self.report_dir)

return predictions

def get_coco_datasets(self):
Expand All @@ -119,7 +156,10 @@ def generate_video(self):
images = self.select_images_for_video()
video_predictions = self.predict_video_images(images)
visualizer = PredictionVisualizer(video_predictions, self.report_dir)
output_path = f"{self.report_dir}/predictions.mp4"

# Give the flightname as the video name
flightname = self.image_dir.split("/")[-1]
output_path = f"{self.report_dir}/{flightname}.mp4"
output_path = visualizer.create_visualization(images=images)

return output_path
Expand Down
8 changes: 5 additions & 3 deletions src/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,13 @@ def create_visualization(

height, width = first_image.shape[:2]

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
# Use H.264 codec and lower frame rate for better compatibility
fourcc = cv2.VideoWriter_fourcc(*'avc1') # Changed from mp4v to avc1
fps = 5 # Reduced from 30 to 5 for slower playback
video_writer = cv2.VideoWriter(
output_path,
fourcc,
self.fps,
fps,
(width, height)
)

Expand Down Expand Up @@ -226,4 +228,4 @@ def create_summary_image(
2
)

return summary
return summary
2 changes: 1 addition & 1 deletion submit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
source activate BOEM

cd ~/BOEM/
python main.py check_annotations=True active_learning.pool_limit=10000 active_testing.n_images=1 active_learning.n_images=100 pipeline_evaluation.debug=False
python main.py check_annotations=True active_learning.pool_limit=10 active_testing.n_images=1 active_learning.n_images=1 pipeline_evaluation.debug=True force_training=False

0 comments on commit b62a380

Please sign in to comment.