diff --git a/config/bbox_pipeline_config.yaml b/config/bbox_pipeline_config.yaml new file mode 100644 index 0000000..3daf750 --- /dev/null +++ b/config/bbox_pipeline_config.yaml @@ -0,0 +1,25 @@ +DataLoader: + type: BoundingBoxDataLoader + params: + dataset_dir: "./data/raw_data/STARCOP_train_easy" + +Processor: + type: BoundingBoxProcessor + params: + config: + get_normalization_constants: True + resize: True + normalize_dataset: True + augment_dataset: True + normalize_bbox: True + input_shape: (512, 512, 16) + normalize: True + augmentations: + - horizontal_flip + - rotate + +Model: + type: BoundingBoxModel + params: + input_shape: (512, 512, 16) + max_boxes: 1 \ No newline at end of file diff --git a/config/constants.py b/config/constants.py index f9a1a1b..b60d166 100644 --- a/config/constants.py +++ b/config/constants.py @@ -1,18 +1,27 @@ +from enum import Enum + IMAGE_FILE_NAMES = ( - "TOA_AVIRIS_460nm.tif", - "TOA_AVIRIS_550nm.tif", - "TOA_AVIRIS_640nm.tif", - "TOA_AVIRIS_2004nm.tif", - "TOA_AVIRIS_2109nm.tif", - "TOA_AVIRIS_2310nm.tif", - "TOA_AVIRIS_2350nm.tif", - "TOA_AVIRIS_2360nm.tif", - "TOA_WV3_SWIR1.tif", - "TOA_WV3_SWIR2.tif", - "TOA_WV3_SWIR3.tif", - "TOA_WV3_SWIR4.tif", - "TOA_WV3_SWIR5.tif", - "TOA_WV3_SWIR6.tif", - "TOA_WV3_SWIR7.tif", - "TOA_WV3_SWIR8.tif", -) + "TOA_AVIRIS_460nm.tif", + "TOA_AVIRIS_550nm.tif", + "TOA_AVIRIS_640nm.tif", + "TOA_AVIRIS_2004nm.tif", + "TOA_AVIRIS_2109nm.tif", + "TOA_AVIRIS_2310nm.tif", + "TOA_AVIRIS_2350nm.tif", + "TOA_AVIRIS_2360nm.tif", + "TOA_WV3_SWIR1.tif", + "TOA_WV3_SWIR2.tif", + "TOA_WV3_SWIR3.tif", + "TOA_WV3_SWIR4.tif", + "TOA_WV3_SWIR5.tif", + "TOA_WV3_SWIR6.tif", + "TOA_WV3_SWIR7.tif", + "TOA_WV3_SWIR8.tif") + +class PipelineType(Enum): + TRAINING = "training" + INFERENCE = "inference" + +class DatasetType(Enum): + SEGMENTATION = "segmentation" + BOUNDING_BOX = "bounding_box" diff --git a/main.py b/main.py new file mode 100644 index 0000000..17be0d7 --- /dev/null +++ b/main.py @@ -0,0 +1,18 @@ +from pipeline.pipeline_manager import PipelineManager +from config.constants import PipelineType + +if __name__ == "__main__": + config_path = "./config/bbox_pipeline_config.yaml" + + print("Initializing pipeline manager...") + pipeline = PipelineManager(PipelineType.TRAINING, config_path) + + print("Loading dataset...") + pipeline.data_loader.create_dataset() + data = pipeline.data_loader.get_dataset() + + print("Processing dataset...") + pipeline.processor.preprocess(data) + + print("Creating model...") + pipeline.model.compile() diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/base_model.py b/models/base_model.py new file mode 100644 index 0000000..3c59159 --- /dev/null +++ b/models/base_model.py @@ -0,0 +1,38 @@ +from abc import ABC, abstractmethod + +class BaseModel(ABC): + """ + Empty base class to define structure of models and necessary functions. + """ + + def __init__(self): + pass + + @abstractmethod + def build_model(self): + pass + + @abstractmethod + def compile(self): + pass + + @abstractmethod + def load_model(self): + pass + + @abstractmethod + def save_model(self): + pass + + @abstractmethod + def train(self): + pass + + @abstractmethod + def predict(self): + pass + + @abstractmethod + def evaluate(self): + pass + diff --git a/models/bounding_box_model.py b/models/bounding_box_model.py new file mode 100644 index 0000000..7165423 --- /dev/null +++ b/models/bounding_box_model.py @@ -0,0 +1,85 @@ +import yaml +import tensorflow as tf +from datetime import datetime +import ast + +from tensorflow.keras.models import load_model +from tensorflow.keras.optimizers import Adam +from models.base_model import BaseModel +from src.losses import iou_loss, modified_mean_squared_error + +class BoundingBoxModel(BaseModel): + + def __init__(self, input_shape, max_boxes, model_fn=None, model_filepath=None): + super().__init__() + self.input_shape = tuple(ast.literal_eval(input_shape)) if isinstance(input_shape, str) else input_shape + self.max_boxes = max_boxes + self.unique_id = datetime.now().strftime("%Y%m%d%H%M%S") + + if model_filepath: + self.model = BoundingBoxModel.load(model_filepath) + else: + self.model = model_fn(input_shape, max_boxes) if model_fn else self.build_model(self.input_shape, max_boxes) + + def build_model(self, img_shape, max_boxes): + model = tf.keras.Sequential([ + tf.keras.layers.Input(shape=img_shape), + + # Encoder: Convolutional layers + tf.keras.layers.Conv2D(64, (3, 3), padding="same"), + tf.keras.layers.ELU(), + tf.keras.layers.MaxPooling2D((2, 2)), + + tf.keras.layers.Conv2D(128, (3, 3), padding="same"), + tf.keras.layers.ELU(), + tf.keras.layers.MaxPooling2D((2, 2)), + + tf.keras.layers.Conv2D(256, (3, 3), padding="same"), + tf.keras.layers.ELU(), + tf.keras.layers.MaxPooling2D((2, 2)), + + # Decoder: Convolution for bounding box regression + tf.keras.layers.Conv2D(512, (3, 3), padding="same"), + tf.keras.layers.ELU(), + + # Final convolutional layer for predicting bounding boxes + tf.keras.layers.Conv2D(4 * max_boxes, (1, 1), padding="same"), + tf.keras.layers.ELU(), + + # Global Average Pooling to reduce spatial dimensions + tf.keras.layers.GlobalAveragePooling2D(), + + # Reshape to (batch_size, max_boxes, 4) + tf.keras.layers.Reshape((max_boxes, 4)) # We want a fixed number of bounding boxes per image + ]) + return model + + def compile( + self, + optimizer=Adam(learning_rate=0.0001), + loss=modified_mean_squared_error, + metrics=["mae", "accuracy"], + ): + self.model.compile(optimizer=optimizer, loss=loss, metrics=metrics) + + def train(self, train_dataset, epochs=10, batch_size=8): + train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE) + return self.model.fit(train_dataset, epochs=epochs) + + def evaluate(self, test_data): + return self.model.evaluate(test_data) + + def predict(self, x): + return self.model.predict(x) + + def save_model(self, output_dir): + self.model.save(f"{output_dir}/{self.unique_id}_bbox_model.h5") + attrs_dict = {k: self.__dict__[k] for k in self.__dict__ if k != "model"} + + with open(f"{output_dir}/{self.unique_id}_attrs.yaml", "w") as attrs_file: + yaml.safe_dump(attrs_dict, attrs_file) + + @staticmethod + def load_model(filepath): + model = load_model(filepath, custom_objects={"iou_loss": iou_loss, "modified_mean_squared_error": modified_mean_squared_error}) + return model \ No newline at end of file diff --git a/pipeline/pipeline_manager.py b/pipeline/pipeline_manager.py new file mode 100644 index 0000000..b666a51 --- /dev/null +++ b/pipeline/pipeline_manager.py @@ -0,0 +1,44 @@ +import yaml +import importlib + +from config.constants import PipelineType + +CLASS_MAPPING = { + "BoundingBoxDataLoader": "src.data_loader.bounding_box_data_loader.BoundingBoxDataLoader", + "SegmentationDataLoader": "src.data_loader.segmentation_data_loader.SegmentationDataLoader", + "BoundingBoxProcessor": "src.processor.bounding_box_processor.BoundingBoxProcessor", + "BoundingBoxModel": "models.bounding_box_model.BoundingBoxModel", +} + +class PipelineManager: + + def __init__(self, type, config_path): + if not isinstance(type, PipelineType): + raise ValueError(f"Invalid pipeline type: {type}") + self.type = type + self.config = self.load_config(config_path) + self.data_loader = self._load_component("DataLoader") + self.processor = self._load_component("Processor") + self.model = self._load_component("Model") + + def _load_component(self, key): + """Dynamically loads a class from CLASS_MAPPING based on the YAML config.""" + if key not in self.config: + raise ValueError(f"Missing '{key}' section in config file.") + + key_type = self.config[key]["type"] + class_path = CLASS_MAPPING.get(key_type) # Get class path from mapping + + if not class_path: + raise ValueError(f"Unknown type '{key_type}' for '{key}' component.") + + params = self.config[key].get("params", {}) # Extract parameters + module_name, class_name = class_path.rsplit(".", 1) # Split module & class + module = importlib.import_module(module_name) # Import module dynamically + cls = getattr(module, class_name) # Get class from module + + return cls(**params) # Instantiate and return instance + + def load_config(self, path): + with open(path, 'r') as file: + return yaml.safe_load(file) diff --git a/src/data_loader.py b/src/data_loader.py index 62693ac..fbf4c41 100644 --- a/src/data_loader.py +++ b/src/data_loader.py @@ -1,5 +1,7 @@ import os import tensorflow as tf +from src.data_loader.bounding_box_data_loader import BoundingBoxDataLoader +from src.data_loader.segmentation_data_loader import SegmentationDataLoader from src.image_utils import data_generator, bbox_data_generator, is_valid_bbox @@ -112,25 +114,33 @@ def augment_dataset(image, bbox, augmentations=["none", "horizontal_flip", "vert if __name__ == "__main__": - # testing the shapes of the images and bboxes - dataset = create_bbox_dataset(data_dir="./data/raw_data/STARCOP_train_easy") - + # Test data loader for bounding box dataset + bbox_loader = BoundingBoxDataLoader( + dataset_dir='./data/raw_data/STARCOP_train_easy', + max_boxes=1 + ) + bbox_loader.create_dataset() + dataset = bbox_loader.get_dataset() + # Testing the shapes of images and bounding boxes for image, bbox in dataset.take(3): - print(f"original bounding box: {bbox}") + print(f"Original bounding box: {bbox}") print(f"Original Image Shape: {image.shape}, Original Bbox Shape: {bbox.shape}") + # Apply augmentation augmented_dataset = dataset.flat_map(augment_dataset) - for image, bbox in augmented_dataset.take(3): - print(f"augmented bounding box: {bbox}") + print(f"Augmented bounding box: {bbox}") print(f"Augmented Image Shape: {image.shape}, Augmented Bbox Shape: {bbox.shape}") - # Test the create_dataset function - train_data_path = "./data/raw_data/STARCOP_train_easy" - dataset = create_dataset(train_data_path) - # Fetch a few samples from the dataset - for i, data_point in enumerate(dataset.take(3)): # Verify first 3 samples + # Test data loader for segmentation dataset + segmentation_loader = SegmentationDataLoader( + dataset_dir='./data/raw_data/STARCOP_train_easy' + ) + segmentation_loader.create_dataset() + dataset = segmentation_loader.get_dataset() + # Fetch and verify a few samples from the dataset + for i, data_point in enumerate(dataset.take(3)): print(f"Sample {i + 1}:") print("Keys:", data_point.keys()) print("Image shape:", data_point["image"].shape) diff --git a/src/data_loader/__init__.py b/src/data_loader/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data_loader/base_data_loader.py b/src/data_loader/base_data_loader.py new file mode 100644 index 0000000..c596a11 --- /dev/null +++ b/src/data_loader/base_data_loader.py @@ -0,0 +1,16 @@ +from abc import ABC, abstractmethod + +class BaseDataLoader(ABC): + def __init__(self, dataset_dir, batch_size=32, exclude_dirs=[]): + self.dataset_dir = dataset_dir + self.batch_size = batch_size + self.exclude_dirs = exclude_dirs + self.dataset = None + + @abstractmethod + def create_dataset(self): + pass + + def get_dataset(self): + return self.dataset + diff --git a/src/data_loader/bounding_box_data_loader.py b/src/data_loader/bounding_box_data_loader.py new file mode 100644 index 0000000..e5cf04e --- /dev/null +++ b/src/data_loader/bounding_box_data_loader.py @@ -0,0 +1,30 @@ +import tensorflow as tf + +from src.data_loader.base_data_loader import BaseDataLoader +from src.image_utils import bbox_data_generator + +class BoundingBoxDataLoader(BaseDataLoader): + def __init__(self, dataset_dir, batch_size=32, max_boxes=10, exclude_dirs=[]): + super().__init__(dataset_dir, batch_size, exclude_dirs) + self.max_boxes = max_boxes + + def create_dataset(self): + """ + Creates a TensorFlow dataset with images and their bounding box labels + + Returns: + tf.data.Dataset: Dataset with images and their bounding box labels + - Images: (512, 512, 16) + - Labels: (max_boxes, 4) + """ + output_sig = ( + tf.TensorSpec(shape=(512, 512, 16), dtype=tf.float32), # Images + tf.TensorSpec(shape=(self.max_boxes, 4), dtype=tf.float32) # Bounding boxes + ) + + dataset = tf.data.Dataset.from_generator( + lambda: bbox_data_generator(self.dataset_dir, self.max_boxes, self.exclude_dirs), + output_signature=output_sig + ) + + self.dataset = dataset \ No newline at end of file diff --git a/src/data_loader/segmentation_data_loader.py b/src/data_loader/segmentation_data_loader.py new file mode 100644 index 0000000..5171eb7 --- /dev/null +++ b/src/data_loader/segmentation_data_loader.py @@ -0,0 +1,39 @@ +import tensorflow as tf + +from src.data_loader.base_data_loader import BaseDataLoader +from src.image_utils import data_generator + +class SegmentationDataLoader(BaseDataLoader): + def __init__(self, dataset_dir, batch_size=32, exclude_dirs=[]): + super().__init__(dataset_dir, batch_size, exclude_dirs) + + def create_dataset(self): + """ + Creates a TensorFlow dataset with images and labels grouped in dictionary format as given: + - {"image": image_data, "segmentation_mask": label_data} + - "image": (512, 512, 16) in float32. + - "segmentation_mask": (512, 512, 1) in float32. + + Args: + dir (str | os.PathLike): Path to the directory containing the data. + + Returns: + tf.data.Dataset: A TensorFlow dataset. + """ + output_sig = ( + tf.TensorSpec(shape=(512, 512, 16), dtype=tf.float32), # Images + tf.TensorSpec(shape=(512, 512, 1), dtype=tf.float32) # Segmentation Masks + ) + + dataset = tf.data.Dataset.from_generator( + lambda: data_generator(self.dataset_dir), + output_signature=output_sig + ) + + # Transform dataset to dictionary format + dataset = dataset.map( + lambda img, lbl: {"image": img, "segmentation_mask": lbl}, + num_parallel_calls=tf.data.AUTOTUNE + ) + + self.dataset = dataset diff --git a/src/processor/__init__.py b/src/processor/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/processor/base_processor.py b/src/processor/base_processor.py new file mode 100644 index 0000000..333afb2 --- /dev/null +++ b/src/processor/base_processor.py @@ -0,0 +1,31 @@ + +class BaseProcessor: + + def __init__(self, config): + """ + Initialize the preprocessor with a config specifying which steps to apply. + + :param config: Dictionary where keys are preprocessing step names + and values are booleans (True = Apply, False = Skip). + """ + self.config = config + self.pipeline = self.build_pipeline() + + def build_pipeline(self): + """ + Constructs the preprocessing pipeline dynamically based on the config. + """ + pipeline = [] + for step_name in self.config: + if self.config[step_name] and hasattr(self, step_name): + pipeline.append(getattr(self, step_name)) + return pipeline + + def preprocess(self, data): + """ + Applies the dynamically built preprocessing pipeline to the dataset. + """ + for step in self.pipeline: + data = step(data) + return data + diff --git a/src/processor/bounding_box_processor.py b/src/processor/bounding_box_processor.py new file mode 100644 index 0000000..27e6963 --- /dev/null +++ b/src/processor/bounding_box_processor.py @@ -0,0 +1,116 @@ +import tensorflow as tf + +from src.processor.base_processor import BaseProcessor +from src.image_utils import is_valid_bbox + +class BoundingBoxProcessor(BaseProcessor): + + def __init__(self, config, input_shape, normalize=True, augmentations=None): + super().__init__(config) + self.input_shape = input_shape + self.normalize = normalize + self.augmentations = augmentations if augmentations else [] + self.norm_mean = None + self.norm_std = None + + def get_normalization_constants(self, dataset): + if not self.normalize: + return (0.0, 1.0) + + if self.norm_mean is not None and self.norm_std is not None: + return tf.constant(self.norm_mean, dtype=tf.float32), tf.constant(self.norm_std, dtype=tf.float32) + + sum_pixels = tf.zeros((16,), dtype=tf.float32) + sum_squares = tf.zeros((16,), dtype=tf.float32) + num_pixels = tf.Variable(0, dtype=tf.int32) + + for image_batch, _ in dataset: + num_pixels.assign_add(tf.reduce_prod(tf.shape(image_batch)[:-1])) # Total pixels across batch + sum_pixels += tf.reduce_sum(image_batch, axis=[0, 1, 2]) # Sum across height & width + sum_squares += tf.reduce_sum(tf.square(image_batch), axis=[0, 1, 2]) # Sum of squares + + self.norm_mean = sum_pixels / tf.cast(num_pixels, tf.float32) + variance = (sum_squares / tf.cast(num_pixels, tf.float32)) - tf.square(self.norm_mean) + self.norm_std = tf.sqrt(variance) + + print(f"Normalization constants: mean={self.norm_mean}, stddev={self.norm_std}") + return dataset + + def resize(self, dataset): + return dataset.map(lambda img, lab: (tf.image.resize(img, self.input_shape[:-1]), lab)) + + def normalize_dataset(self, dataset): + if self.normalize: + return dataset.map(lambda img, lab: ((img - self.norm_mean) / self.norm_std, lab)) + return dataset + + def augment_dataset(self, dataset): + return dataset.flat_map(lambda img, bbox: tf.data.Dataset.from_tensor_slices( + [self.augment_image(img, bbox, augmentation) for augmentation in self.augmentations] + )) + + def augment_image(self, image, bboxes, transformation): + augmented_bboxes = [] + valid_mask = tf.cast(tf.map_fn(is_valid_bbox, bboxes, dtype=tf.bool), tf.bool) + valid_mask = tf.expand_dims(valid_mask, axis=-1) + valid_mask = tf.broadcast_to(valid_mask, tf.shape(bboxes)) + image_shape = tf.cast(tf.shape(image), tf.float32) + + if transformation == "horizontal_flip": + image = tf.image.flip_left_right(image) + augmented_bboxes = tf.where( + valid_mask, + tf.stack( + [ + image_shape[1] - bboxes[:, 1] - 1, + image_shape[1] - bboxes[:, 0] - 1, + bboxes[:, 2], + bboxes[:, 3], + ], + axis=1, + ), + tf.fill(tf.shape(bboxes), -1.0), + ) + elif transformation == "vertical_flip": + image = tf.image.flip_up_down(image) + augmented_bboxes = tf.where( + valid_mask, + tf.stack( + [ + bboxes[:, 0], + bboxes[:, 1], + image_shape[0] - bboxes[:, 3] - 1, + image_shape[0] - bboxes[:, 2] - 1, + ], + axis=1, + ), + tf.fill(tf.shape(bboxes), -1.0), + ) + elif transformation == "rotate": + image = tf.image.rot90(image) + augmented_bboxes = tf.where( + valid_mask, + tf.stack( + [ + bboxes[:, 2], + bboxes[:, 3], + image_shape[1] - bboxes[:, 1] - 1, + image_shape[1] - bboxes[:, 0] - 1, + ], + axis=1, + ), + tf.fill(tf.shape(bboxes), -1.0), + ) + return image, augmented_bboxes + + def normalize_bbox(self, dataset): + height = tf.cast(self.input_shape[0], tf.float32) + width = tf.cast(self.input_shape[1], tf.float32) + + # Normalize bounding box coordinates + return dataset.map(lambda img, bbox: (img, tf.stack([ + bbox[..., 0] / width, # x-left + bbox[..., 1] / width, # x-right + bbox[..., 2] / height, # y-top + bbox[..., 3] / height # y-bottom + ], axis=-1))) diff --git a/src/test_data_utils.py b/tests/test_data_utils.py similarity index 97% rename from src/test_data_utils.py rename to tests/test_data_utils.py index a39f901..1cc4d08 100644 --- a/src/test_data_utils.py +++ b/tests/test_data_utils.py @@ -1,7 +1,7 @@ import unittest -from data_utils import get_easy_ids import os +from src.data_utils import get_easy_ids class TestDataUtils(unittest.TestCase): def setUp(self):