From 523632abf59edb84d81947b80cdc74fecc1c2c7f Mon Sep 17 00:00:00 2001 From: Calista Besseling Date: Thu, 16 Jan 2025 21:57:34 -0500 Subject: [PATCH 1/3] first draft of normalization_constants function --- src/image_utils.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/image_utils.py b/src/image_utils.py index 2be7068..de9aa08 100644 --- a/src/image_utils.py +++ b/src/image_utils.py @@ -6,6 +6,7 @@ import matplotlib.pyplot as plt from ipywidgets import interact from typing import List, Tuple, Generator +import tensorflow as tf import math from keras_cv import losses @@ -383,3 +384,27 @@ def varon_iteration(dir_path: str, output_file: str, c_threshold: float, num_ban np.save(output_file, final_matrix) return final_matrix + +""" +Gets normalization constants using data loader: +Find mean and standard deviation for each of the 16 channels of image dataset +arguments: takes in tf.data.Dataset +returns: +""" +def find_normalization_constants(dataset): + final_array = [] + + # Loop through each of the 16 channels + for i in range(16): + channel_total = np.zeros((512, 512), dtype=np.float32) # Initialize a zero matrix for channel summation + + for images, _ in dataset: + channel = images[:, :, i] # Extract the i-th channel + channel_total += channel + + mean = np.mean(channel_total) + stddev = np.std(channel_total) + + final_array.append((mean, stddev)) + + return final_array \ No newline at end of file From 168993ba39398e9561eaa7871c8e6cdbe89437a7 Mon Sep 17 00:00:00 2001 From: Calista Besseling Date: Mon, 27 Jan 2025 21:08:29 -0500 Subject: [PATCH 2/3] Changed how mean and stddev was calculated --- src/image_utils.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/image_utils.py b/src/image_utils.py index de9aa08..2a55060 100644 --- a/src/image_utils.py +++ b/src/image_utils.py @@ -392,18 +392,25 @@ def varon_iteration(dir_path: str, output_file: str, c_threshold: float, num_ban returns: """ def find_normalization_constants(dataset): + """ + final array is an array of 16 tuples with mean and std dev for each of the channels + """ final_array = [] # Loop through each of the 16 channels for i in range(16): - channel_total = np.zeros((512, 512), dtype=np.float32) # Initialize a zero matrix for channel summation + channel_array = [] # Initialize a zero matrix for channel summation for images, _ in dataset: - channel = images[:, :, i] # Extract the i-th channel - channel_total += channel + current_channel = images[:, :, i] # Extract the i-th channel + """current channel is currently a 512x512 array, we want to flatten it into an array length 262144, then append it to channel_array""" + current_channel_flat =tf.reshape(current_channel, [-1]) + channel_array.append(current_channel_flat) + + all_pixels = tf.concat(channel_array, axis=0) - mean = np.mean(channel_total) - stddev = np.std(channel_total) + mean = tf.reduce_mean(all_pixels) + stddev = tf.math.reduce_std(all_pixels) final_array.append((mean, stddev)) From 1dd634e4e40fe9d0c4e456f95230be33e8672ccc Mon Sep 17 00:00:00 2001 From: Calista Besseling Date: Mon, 3 Mar 2025 19:44:42 -0500 Subject: [PATCH 3/3] Added a unittest in test_image_utils, I haven't been able to run it yet --- src/image_utils.py | 8 ++++---- tests/test_image_utils.py | 21 ++++++++++++++++++++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/image_utils.py b/src/image_utils.py index 2a55060..7fbbce2 100644 --- a/src/image_utils.py +++ b/src/image_utils.py @@ -299,7 +299,7 @@ def createTestMatrix(): np.save("tests/varon_correct.npy", data) -def varon_iteration(dir_path: str, output_file: str, c_threshold: float, num_bands: int, images: Optional[np.ndarray]=None, pixels: Optional[int]= 255): +def varon_iteration(dir_path: str, output_file: str, c_threshold: float, num_bands: int, images: Optional[np.ndarray]=None, pixels: Optional[int]= 512): """ consumes a path to a directory (easy training), and name of the output file. For each folder of images, it computes the varon ratio between each image creating @@ -391,14 +391,14 @@ def varon_iteration(dir_path: str, output_file: str, c_threshold: float, num_ban arguments: takes in tf.data.Dataset returns: """ -def find_normalization_constants(dataset): +def find_normalization_constants(dataset, num_channels: Optional[int]= 16, pixels: Optional[int]= 255): """ final array is an array of 16 tuples with mean and std dev for each of the channels """ final_array = [] # Loop through each of the 16 channels - for i in range(16): + for i in range(num_channels): channel_array = [] # Initialize a zero matrix for channel summation for images, _ in dataset: @@ -412,6 +412,6 @@ def find_normalization_constants(dataset): mean = tf.reduce_mean(all_pixels) stddev = tf.math.reduce_std(all_pixels) - final_array.append((mean, stddev)) + final_array.append((mean.numpy(), stddev.numpy())) return final_array \ No newline at end of file diff --git a/tests/test_image_utils.py b/tests/test_image_utils.py index 16e3313..f396437 100644 --- a/tests/test_image_utils.py +++ b/tests/test_image_utils.py @@ -1,10 +1,11 @@ import unittest import numpy as np from pathlib import Path - +import tensorflow as tf from src import image_utils class TestImageUtils(unittest.TestCase): + def setUp(self): self.files_to_remove = [] return super().setUp() @@ -42,6 +43,7 @@ def test_differentDenom(self): Here's the link to the google sheet used to create the correct_matrix: https://docs.google.com/spreadsheets/d/1ibQIVitjaxNGXof7cjM9m5XKFG8H756YdaG6zqICUpI/edit?usp=sharing """ + def test_varon_iteration_easy(self): images = ["ang20190927t184620_r7541_c401_w151_h151", "ang20190927t153023_r7101_c126_w151_h151", @@ -54,6 +56,23 @@ def test_varon_iteration_easy(self): self.files_to_remove.append('tests/varon_correct.npy') np.testing.assert_almost_equal(correct_matrix, compute_matrix, decimal=6) + + def test_find_normalization_constants(self): + # Create a dummy dataset with 16 channels + images = ["ang20190927t184620_r7541_c401_w151_h151", + "ang20190927t153023_r7101_c126_w151_h151", + "ang20191019t175004_r8192_c256_w512_h512"] + + dataset = tf.data.Dataset.from_tensor_slices(images) + dataset = dataset.map(lambda x: tf.io.read_file(x)) + computed_constants = image_utils.find_normalization_constants(dataset, 3, 5) + + # https://docs.google.com/spreadsheets/d/1lbjrDmN354iAQZzkFJaPDEINOGFKBeBwbdE54z4O34I/edit?usp=sharing + correct_contstants = np.array([(19.8782136, 14.91234357), + (22.53969195, 17.19600461), + (24.9107772, 18.98169895)]) + np.testing.assert_almost_equal(correct_contstants, computed_constants, decimal=6) + def test_binary_bbox(self): binary_image = np.array([