diff --git a/src/image_utils.py b/src/image_utils.py index 45ce810..9ab70c6 100644 --- a/src/image_utils.py +++ b/src/image_utils.py @@ -1,12 +1,14 @@ import os import numpy as np -import tensorflow as tf import matplotlib.pyplot as plt from PIL import Image from skimage import measure from typing import Tuple, Generator, Optional, Union from ipywidgets import interact +from typing import List, Tuple, Generator +import tensorflow as tf +import math from keras_cv import losses from config.constants import IMAGE_FILE_NAMES @@ -342,7 +344,23 @@ def compare_bbox(true_bbox: tuple|list, pred_bbox: tuple|list, metric: str = "io return iou_value -def varon_iteration(dir_path: str, c_threshold: float, num_bands: int, output_file: Optional[str]=None, images: Optional[list]=None, pixels: Optional[int]= 255) -> np.ndarray: +def createTestMatrix(): + + data = np.array([ + [[0, -0.2104005415, -0.4138471552], + [-0.2104005415, 0, -0.2586695576], + [-0.4138471552, -0.2586695576, 0]], + [[0, 0, 0], + [0, 0, 0], + [0, 0, 0]], + [[0, -0.2246186874, -0.3063050874], + [-0.2246186874, 0, -0.1064981483], + [-0.3063050874, -0.1064981483, 0]] + ]) + + np.save("tests/varon_correct.npy", data) + +def varon_iteration(dir_path: str, output_file: str, c_threshold: float, num_bands: int, images: Optional[np.ndarray]=None, pixels: Optional[int]= 512): """ consumes a path to a directory (easy training), and name of the output file. For each folder of images, it computes the varon ratio between each image creating @@ -411,36 +429,33 @@ def varon_iteration(dir_path: str, c_threshold: float, num_bands: int, output_fi return final_matrix - -def get_global_normalization_mean_std(data): - """Function to calculate the global mean and standard deviation of the data - - Args: - data (np.ndarray): numpy array containing image data - - Returns: - tuple: (np.ndarray, np.ndarray) containing the mean and standard deviation of the data +""" +Gets normalization constants using data loader: +Find mean and standard deviation for each of the 16 channels of image dataset +arguments: takes in tf.data.Dataset +returns: +""" +def find_normalization_constants(dataset, num_channels: Optional[int]= 16, pixels: Optional[int]= 255): """ - mean_global = np.mean(data, axis=(0, 1, 2), keepdims=True) - std_global = np.std(data, axis=(0, 1, 2), keepdims=True) - - std_global[std_global == 0] = 1.0 - return mean_global, std_global - + final array is an array of 16 tuples with mean and std dev for each of the channels + """ + final_array = [] + + # Loop through each of the 16 channels + for i in range(num_channels): + channel_array = [] # Initialize a zero matrix for channel summation + for images, _ in dataset: + current_channel = images[:, :, i] # Extract the i-th channel + """current channel is currently a 512x512 array, we want to flatten it into an array length 262144, then append it to channel_array""" + current_channel_flat =tf.reshape(current_channel, [-1]) + channel_array.append(current_channel_flat) -def resize_data_and_labels(x, y, reshape_size): - """function to resize the data and labels to a specified size + all_pixels = tf.concat(channel_array, axis=0) - Args: - x (np.ndarray or tf.tensor): image array - y (np.ndarray or tf.tensor): label array - reshape_size (tuple or list): shape to resize image and labels to + mean = tf.reduce_mean(all_pixels) + stddev = tf.math.reduce_std(all_pixels) - Returns: - tuple: (tf.tensor, tf.tensor): resized image and label arrays - """ - x_resized = tf.image.resize(x, reshape_size) - y_resized = tf.image.resize(y, reshape_size) + final_array.append((mean.numpy(), stddev.numpy())) - return x_resized, y_resized \ No newline at end of file + return final_array \ No newline at end of file diff --git a/tests/test_image_utils.py b/tests/test_image_utils.py index e644fc2..36f747f 100644 --- a/tests/test_image_utils.py +++ b/tests/test_image_utils.py @@ -1,7 +1,7 @@ import unittest import numpy as np from pathlib import Path - +import tensorflow as tf from src import image_utils def get_expected_varon_matrix(): @@ -20,6 +20,7 @@ def get_expected_varon_matrix(): return data class TestImageUtils(unittest.TestCase): + def setUp(self): self.files_to_remove = [] return super().setUp() @@ -57,6 +58,7 @@ def test_differentDenom(self): Here's the link to the google sheet used to create the correct_matrix: https://docs.google.com/spreadsheets/d/1ibQIVitjaxNGXof7cjM9m5XKFG8H756YdaG6zqICUpI/edit?usp=sharing """ + def test_varon_iteration_easy(self): images = ["ang20190927t184620_r7541_c401_w151_h151", "ang20190927t153023_r7101_c126_w151_h151", @@ -66,6 +68,23 @@ def test_varon_iteration_easy(self): correct_matrix = get_expected_varon_matrix() np.testing.assert_almost_equal(correct_matrix, compute_matrix, decimal=6) + + def test_find_normalization_constants(self): + # Create a dummy dataset with 16 channels + images = ["ang20190927t184620_r7541_c401_w151_h151", + "ang20190927t153023_r7101_c126_w151_h151", + "ang20191019t175004_r8192_c256_w512_h512"] + + dataset = tf.data.Dataset.from_tensor_slices(images) + dataset = dataset.map(lambda x: tf.io.read_file(x)) + computed_constants = image_utils.find_normalization_constants(dataset, 3, 5) + + # https://docs.google.com/spreadsheets/d/1lbjrDmN354iAQZzkFJaPDEINOGFKBeBwbdE54z4O34I/edit?usp=sharing + correct_contstants = np.array([(19.8782136, 14.91234357), + (22.53969195, 17.19600461), + (24.9107772, 18.98169895)]) + np.testing.assert_almost_equal(correct_contstants, computed_constants, decimal=6) + def test_binary_bbox(self): binary_image = np.array([