From 523632abf59edb84d81947b80cdc74fecc1c2c7f Mon Sep 17 00:00:00 2001
From: Calista Besseling <calista.besseling@gmail.com>
Date: Thu, 16 Jan 2025 21:57:34 -0500
Subject: [PATCH 1/3] first draft of normalization_constants function

---
 src/image_utils.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/image_utils.py b/src/image_utils.py
index 2be7068..de9aa08 100644
--- a/src/image_utils.py
+++ b/src/image_utils.py
@@ -6,6 +6,7 @@
 import matplotlib.pyplot as plt
 from ipywidgets import interact
 from typing import List, Tuple, Generator
+import tensorflow as tf
 import math
 from keras_cv import losses
 
@@ -383,3 +384,27 @@ def varon_iteration(dir_path: str, output_file: str, c_threshold: float, num_ban
     np.save(output_file, final_matrix)
 
     return final_matrix
+
+"""
+Gets normalization constants using data loader:
+Find mean and standard deviation for each of the 16 channels of image dataset
+arguments: takes in tf.data.Dataset
+returns:
+"""
+def find_normalization_constants(dataset):
+    final_array = [] 
+    
+    # Loop through each of the 16 channels
+    for i in range(16):
+        channel_total = np.zeros((512, 512), dtype=np.float32)  # Initialize a zero matrix for channel summation
+
+        for images, _ in dataset:
+            channel = images[:, :, i]  # Extract the i-th channel
+            channel_total += channel  
+
+        mean = np.mean(channel_total)
+        stddev = np.std(channel_total)
+
+        final_array.append((mean, stddev))
+
+    return final_array
\ No newline at end of file

From 168993ba39398e9561eaa7871c8e6cdbe89437a7 Mon Sep 17 00:00:00 2001
From: Calista Besseling <calista.besseling@gmail.com>
Date: Mon, 27 Jan 2025 21:08:29 -0500
Subject: [PATCH 2/3] Changed how mean and stddev was calculated

---
 src/image_utils.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/image_utils.py b/src/image_utils.py
index de9aa08..2a55060 100644
--- a/src/image_utils.py
+++ b/src/image_utils.py
@@ -392,18 +392,25 @@ def varon_iteration(dir_path: str, output_file: str, c_threshold: float, num_ban
 returns:
 """
 def find_normalization_constants(dataset):
+    """
+    final array is an array of 16 tuples with mean and std dev for each of the channels
+    """
     final_array = [] 
     
     # Loop through each of the 16 channels
     for i in range(16):
-        channel_total = np.zeros((512, 512), dtype=np.float32)  # Initialize a zero matrix for channel summation
+        channel_array = []  # Initialize a zero matrix for channel summation
 
         for images, _ in dataset:
-            channel = images[:, :, i]  # Extract the i-th channel
-            channel_total += channel  
+            current_channel = images[:, :, i]  # Extract the i-th channel
+            """current channel is currently a 512x512 array, we want to flatten it into an array length 262144, then append it to channel_array"""
+            current_channel_flat =tf.reshape(current_channel, [-1])
+            channel_array.append(current_channel_flat)
+
+        all_pixels = tf.concat(channel_array, axis=0)
 
-        mean = np.mean(channel_total)
-        stddev = np.std(channel_total)
+        mean = tf.reduce_mean(all_pixels)
+        stddev = tf.math.reduce_std(all_pixels)
 
         final_array.append((mean, stddev))
 

From 1dd634e4e40fe9d0c4e456f95230be33e8672ccc Mon Sep 17 00:00:00 2001
From: Calista Besseling <calista.besseling@gmail.com>
Date: Mon, 3 Mar 2025 19:44:42 -0500
Subject: [PATCH 3/3] Added a unittest in test_image_utils, I haven't been able
 to run it yet

---
 src/image_utils.py        |  8 ++++----
 tests/test_image_utils.py | 21 ++++++++++++++++++++-
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/image_utils.py b/src/image_utils.py
index 2a55060..7fbbce2 100644
--- a/src/image_utils.py
+++ b/src/image_utils.py
@@ -299,7 +299,7 @@ def createTestMatrix():
 
         np.save("tests/varon_correct.npy", data)
 
-def varon_iteration(dir_path: str, output_file: str, c_threshold: float, num_bands: int, images: Optional[np.ndarray]=None, pixels: Optional[int]= 255):
+def varon_iteration(dir_path: str, output_file: str, c_threshold: float, num_bands: int, images: Optional[np.ndarray]=None, pixels: Optional[int]= 512):
     """
     consumes a path to a directory (easy training), and name of the output file. For each
     folder of images, it computes the varon ratio between each image creating
@@ -391,14 +391,14 @@ def varon_iteration(dir_path: str, output_file: str, c_threshold: float, num_ban
 arguments: takes in tf.data.Dataset
 returns:
 """
-def find_normalization_constants(dataset):
+def find_normalization_constants(dataset, num_channels: Optional[int]= 16, pixels: Optional[int]= 255):
     """
     final array is an array of 16 tuples with mean and std dev for each of the channels
     """
     final_array = [] 
     
     # Loop through each of the 16 channels
-    for i in range(16):
+    for i in range(num_channels):
         channel_array = []  # Initialize a zero matrix for channel summation
 
         for images, _ in dataset:
@@ -412,6 +412,6 @@ def find_normalization_constants(dataset):
         mean = tf.reduce_mean(all_pixels)
         stddev = tf.math.reduce_std(all_pixels)
 
-        final_array.append((mean, stddev))
+        final_array.append((mean.numpy(), stddev.numpy()))
 
     return final_array
\ No newline at end of file
diff --git a/tests/test_image_utils.py b/tests/test_image_utils.py
index 16e3313..f396437 100644
--- a/tests/test_image_utils.py
+++ b/tests/test_image_utils.py
@@ -1,10 +1,11 @@
 import unittest
 import numpy as np
 from pathlib import Path
-
+import tensorflow as tf
 from src import image_utils
 
 class TestImageUtils(unittest.TestCase):
+    
     def setUp(self):
         self.files_to_remove = []
         return super().setUp()
@@ -42,6 +43,7 @@ def test_differentDenom(self):
     Here's the link to the google sheet used to create the correct_matrix:
     https://docs.google.com/spreadsheets/d/1ibQIVitjaxNGXof7cjM9m5XKFG8H756YdaG6zqICUpI/edit?usp=sharing
     """
+    
     def test_varon_iteration_easy(self):
         images = ["ang20190927t184620_r7541_c401_w151_h151",
                   "ang20190927t153023_r7101_c126_w151_h151",
@@ -54,6 +56,23 @@ def test_varon_iteration_easy(self):
         self.files_to_remove.append('tests/varon_correct.npy') 
         np.testing.assert_almost_equal(correct_matrix, compute_matrix, decimal=6) 
     
+
+    def test_find_normalization_constants(self):
+    # Create a dummy dataset with 16 channels
+        images = ["ang20190927t184620_r7541_c401_w151_h151",
+                  "ang20190927t153023_r7101_c126_w151_h151",
+                  "ang20191019t175004_r8192_c256_w512_h512"]
+        
+        dataset = tf.data.Dataset.from_tensor_slices(images)
+        dataset = dataset.map(lambda x: tf.io.read_file(x))
+        computed_constants = image_utils.find_normalization_constants(dataset, 3, 5)
+        
+        # https://docs.google.com/spreadsheets/d/1lbjrDmN354iAQZzkFJaPDEINOGFKBeBwbdE54z4O34I/edit?usp=sharing
+        correct_contstants = np.array([(19.8782136, 14.91234357), 
+                                       (22.53969195, 17.19600461), 
+                                       (24.9107772, 18.98169895)])
+        np.testing.assert_almost_equal(correct_contstants, computed_constants, decimal=6) 
+    
     
     def test_binary_bbox(self):
         binary_image = np.array([