From 9645fe90a55eb2f8d247d34b4950b67e53083aa8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Wed, 28 Feb 2024 14:06:13 +0100
Subject: [PATCH 01/64] Update user id

---
 labs/team_description.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/labs/team_description.py b/labs/team_description.py
index 14ed5e1..8207533 100644
--- a/labs/team_description.py
+++ b/labs/team_description.py
@@ -6,4 +6,4 @@
 #
 # You can find out ReCodEx ID in the URL bar after navigating
 # to your User profile page. The ID has the following format:
-# 01234567-89ab-cdef-0123-456789abcdef.
+# 31a0a96a-c590-4486-b194-f72765b2ce25

From d7c8844f16e5d7632ddf7a2046458becbf5e2e57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Sun, 3 Mar 2024 15:28:32 +0100
Subject: [PATCH 02/64] Solve numpy_entropy

---
 labs/01/numpy_entropy.py | 44 ++++++++++++++++++++++++----------------
 labs/01/test.ps1         |  4 ++++
 2 files changed, 30 insertions(+), 18 deletions(-)
 create mode 100644 labs/01/test.ps1

diff --git a/labs/01/numpy_entropy.py b/labs/01/numpy_entropy.py
index 8e86bff..6cab8aa 100644
--- a/labs/01/numpy_entropy.py
+++ b/labs/01/numpy_entropy.py
@@ -12,42 +12,50 @@
 
 
 def main(args: argparse.Namespace) -> tuple[float, float, float]:
-    # TODO: Load data distribution, each line containing a datapoint -- a string.
+    # Load data distribution, each line containing a datapoint -- a string.
+    data_map = {}
+
     with open(args.data_path, "r") as data:
         for line in data:
             line = line.rstrip("\n")
-            # TODO: Process the line, aggregating data with built-in Python
+
+            # Process the line, aggregating data with built-in Python
             # data structures (not NumPy, which is not suitable for incremental
             # addition and string mapping).
+            if line in data_map:
+                data_map[line] += 1
+            else:
+                data_map[line] = 1
 
-    # TODO: Create a NumPy array containing the data distribution. The
+    # Create a NumPy array containing the data distribution. The
     # NumPy array should contain only data, not any mapping. Alternatively,
     # the NumPy array might be created after loading the model distribution.
+    data_dist = np.array(list(data_map.values())) / sum(data_map.values())
+
+    # Load model distribution, each line `string \t probability`.
+    model_map = {}
 
-    # TODO: Load model distribution, each line `string \t probability`.
     with open(args.model_path, "r") as model:
         for line in model:
             line = line.rstrip("\n")
-            # TODO: Process the line, aggregating using Python data structures.
+            key, value = line.split("\t")
+            model_map[key] = float(value)
 
-    # TODO: Create a NumPy array containing the model distribution.
+    # Create a NumPy array containing the model distribution.
+    model_dist = np.array([model_map[key] if key in model_map else np.inf for key in data_map.keys()])
 
-    # TODO: Compute the entropy H(data distribution). You should not use
-    # manual for/while cycles, but instead use the fact that most NumPy methods
-    # operate on all elements (for example `*` is vector element-wise multiplication).
-    entropy = ...
+    # Compute the entropy H(data distribution).
+    entropy = -np.sum(data_dist * np.log(data_dist))
 
-    # TODO: Compute cross-entropy H(data distribution, model distribution).
-    # When some data distribution elements are missing in the model distribution,
-    # return `np.inf`.
-    crossentropy = ...
+    # Compute cross-entropy H(data distribution, model distribution).
+    crossentropy = -np.sum(data_dist * np.log(model_dist))
 
-    # TODO: Compute KL-divergence D_KL(data distribution, model_distribution),
-    # again using `np.inf` when needed.
-    kl_divergence = ...
+    # Compute KL-divergence D_KL(data distribution, model_distribution).
+    kl_divergence = crossentropy - entropy
+    # kl_divergence = np.where(np.isinf(kl_divergence), np.inf, kl_divergence)
 
     # Return the computed values for ReCodEx to validate.
-    return entropy, crossentropy, kl_divergence
+    return entropy, crossentropy if np.isfinite(crossentropy) else np.inf, kl_divergence if np.isfinite(kl_divergence) else np.inf
 
 
 if __name__ == "__main__":
diff --git a/labs/01/test.ps1 b/labs/01/test.ps1
new file mode 100644
index 0000000..1a8e7cd
--- /dev/null
+++ b/labs/01/test.ps1
@@ -0,0 +1,4 @@
+python3 numpy_entropy.py --data_path numpy_entropy_data_1.txt --model_path numpy_entropy_model_1.txt
+python3 numpy_entropy.py --data_path numpy_entropy_data_2.txt --model_path numpy_entropy_model_2.txt
+python3 numpy_entropy.py --data_path numpy_entropy_data_3.txt --model_path numpy_entropy_model_3.txt
+python3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt

From 372885d8db56575c9e443ddd2395f8bcd120f87d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Sun, 3 Mar 2024 15:31:05 +0100
Subject: [PATCH 03/64] Add pull.sh script to automate upstream pull

---
 pull.sh | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 pull.sh

diff --git a/pull.sh b/pull.sh
new file mode 100644
index 0000000..9cadfe4
--- /dev/null
+++ b/pull.sh
@@ -0,0 +1 @@
+git pull upstream master

From 161e5c90986f18b7bd1d08756883cc3fe4d03f13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 12:48:47 +0100
Subject: [PATCH 04/64] Fix reshape and compute covariance matrix in
 pca_first.keras.py and pca_first.py

---
 labs/01/pca_first.keras.py | 16 ++++++++--------
 labs/01/pca_first.py       | 15 ++++++++-------
 labs/01/run.ps1            |  1 +
 labs/01/test.ps1           |  2 +-
 4 files changed, 18 insertions(+), 16 deletions(-)
 create mode 100644 labs/01/run.ps1

diff --git a/labs/01/pca_first.keras.py b/labs/01/pca_first.keras.py
index 1f99e21..c81108a 100644
--- a/labs/01/pca_first.keras.py
+++ b/labs/01/pca_first.keras.py
@@ -32,27 +32,27 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
     data_indices = np.random.choice(mnist.train.size, size=args.examples, replace=False)
     data = keras.ops.convert_to_tensor(mnist.train.data["images"][data_indices] / 255, dtype="float32")
 
-    # TODO: Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C].
+    # Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C].
     # We want to reshape it to [args.examples, MNIST.H * MNIST.W * MNIST.C].
     # We can do so using `keras.ops.reshape(data, new_shape)` with new shape
     # `[data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]`.
-    data = ...
+    data = keras.ops.reshape(data, [data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]])
 
-    # TODO: Now compute mean of every feature. Use `keras.ops.mean`, and set
+    # Now compute mean of every feature. Use `keras.ops.mean`, and set
     # `axis` to zero -- therefore, the mean will be computed across the first
     # dimension, so across examples.
-    mean = ...
+    mean = keras.ops.mean(data, axis=0)
 
-    # TODO: Compute the covariance matrix. The covariance matrix is
+    # Compute the covariance matrix. The covariance matrix is
     #   (data - mean)^T * (data - mean) / data.shape[0]
     # where transpose can be computed using `keras.ops.transpose` and
     # matrix multiplication using either Python operator @ or `keras.ops.matmul`.
-    cov = ...
+    cov = keras.ops.transpose(data-mean) @ (data-mean) / data.shape[0]
 
-    # TODO: Compute the total variance, which is the sum of the diagonal
+    # Compute the total variance, which is the sum of the diagonal
     # of the covariance matrix. To extract the diagonal use `keras.ops.diagonal`,
     # and to sum a tensor use `keras.ops.sum`.
-    total_variance = ...
+    total_variance = keras.ops.sum(keras.ops.diagonal(cov))
 
     # TODO: Now run `args.iterations` of the power iteration algorithm.
     # Start with a vector of `cov.shape[0]` ones of type `"float32"` using `keras.ops.ones`.
diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py
index 2e4ef10..0300441 100644
--- a/labs/01/pca_first.py
+++ b/labs/01/pca_first.py
@@ -30,30 +30,31 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
     data_indices = np.random.choice(mnist.train.size, size=args.examples, replace=False)
     data = torch.tensor(mnist.train.data["images"][data_indices] / 255, dtype=torch.float32)
 
-    # TODO: Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C].
+    # Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C].
     # We want to reshape it to [args.examples, MNIST.H * MNIST.W * MNIST.C].
     # We can do so using `torch.reshape(data, new_shape)` with new shape
     # `[data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]`.
-    data = ...
+    data = data.reshape(args.examples, MNIST.H, MNIST.W, MNIST.C)
 
-    # TODO: Now compute mean of every feature. Use `torch.mean`, and set
+    # Now compute mean of every feature. Use `torch.mean`, and set
     # `dim` (or `axis`) argument to zero -- therefore, the mean will be
     # computed across the first dimension, so across examples.
     #
     # Note that for compatibility with Numpy/TF/Keras, all `dim` arguments
     # in PyTorch can be also called `axis`.
-    mean = ...
+    mean = torch.mean(data, dim=0)
 
-    # TODO: Compute the covariance matrix. The covariance matrix is
+    # Compute the covariance matrix. The covariance matrix is
     #   (data - mean)^T * (data - mean) / data.shape[0]
     # where transpose can be computed using `torch.transpose` or `torch.t` and
     # matrix multiplication using either Python operator @ or `torch.matmul`.
-    cov = ...
+    cov = (data-mean).t @ (data-mean) / data.shape[0]
+    print(cov)
 
     # TODO: Compute the total variance, which is the sum of the diagonal
     # of the covariance matrix. To extract the diagonal use `torch.diagonal`,
     # and to sum a tensor use `torch.sum`.
-    total_variance = ...
+    total_variance = torch.diagonal(cov).sum()
 
     # TODO: Now run `args.iterations` of the power iteration algorithm.
     # Start with a vector of `cov.shape[0]` ones of type `torch.float32` using `torch.ones`.
diff --git a/labs/01/run.ps1 b/labs/01/run.ps1
new file mode 100644
index 0000000..a68f5e8
--- /dev/null
+++ b/labs/01/run.ps1
@@ -0,0 +1 @@
+..\..\.venv\Scripts\python .\pca_first.keras.py
diff --git a/labs/01/test.ps1 b/labs/01/test.ps1
index 1a8e7cd..75ddf37 100644
--- a/labs/01/test.ps1
+++ b/labs/01/test.ps1
@@ -1,4 +1,4 @@
 python3 numpy_entropy.py --data_path numpy_entropy_data_1.txt --model_path numpy_entropy_model_1.txt
 python3 numpy_entropy.py --data_path numpy_entropy_data_2.txt --model_path numpy_entropy_model_2.txt
 python3 numpy_entropy.py --data_path numpy_entropy_data_3.txt --model_path numpy_entropy_model_3.txt
-python3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt
+spython3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt

From bfa91ea76db4b46bad8f6347720d6c5bf2163ab8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 12:48:55 +0100
Subject: [PATCH 05/64] Add .gitignore, pull.ps1, and setup.ps1 files

---
 .gitignore          | 1 +
 pull.sh => pull.ps1 | 0
 setup.ps1           | 1 +
 3 files changed, 2 insertions(+)
 create mode 100644 .gitignore
 rename pull.sh => pull.ps1 (100%)
 create mode 100644 setup.ps1

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1d17dae
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.venv
diff --git a/pull.sh b/pull.ps1
similarity index 100%
rename from pull.sh
rename to pull.ps1
diff --git a/setup.ps1 b/setup.ps1
new file mode 100644
index 0000000..8fd7b89
--- /dev/null
+++ b/setup.ps1
@@ -0,0 +1 @@
+.venv/Scripts/pip install -r .\labs\requirements.txt

From cba46bcf3a3bd8478cba7fe00c9d6285ede13b9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 18:35:38 +0100
Subject: [PATCH 06/64] Update team description

---
 labs/team_description.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/labs/team_description.py b/labs/team_description.py
index 8207533..1d232bc 100644
--- a/labs/team_description.py
+++ b/labs/team_description.py
@@ -6,4 +6,7 @@
 #
 # You can find out ReCodEx ID in the URL bar after navigating
 # to your User profile page. The ID has the following format:
+# Jonas Glerup Røssum <jglr@itu.dk>
 # 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66

From 76d549e4b00f9a818472c4db2a3fced78e3d0745 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 19:11:54 +0100
Subject: [PATCH 07/64] Solve pca_first.keras.py

---
 labs/01/pca_first.keras.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/labs/01/pca_first.keras.py b/labs/01/pca_first.keras.py
index c81108a..028a8ad 100644
--- a/labs/01/pca_first.keras.py
+++ b/labs/01/pca_first.keras.py
@@ -54,17 +54,21 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
     # and to sum a tensor use `keras.ops.sum`.
     total_variance = keras.ops.sum(keras.ops.diagonal(cov))
 
-    # TODO: Now run `args.iterations` of the power iteration algorithm.
+    # Now run `args.iterations` of the power iteration algorithm.
     # Start with a vector of `cov.shape[0]` ones of type `"float32"` using `keras.ops.ones`.
-    v = ...
+    v = keras.ops.ones(cov.shape[0], dtype="float32")
     for i in range(args.iterations):
-        # TODO: In the power iteration algorithm, we compute
+        # In the power iteration algorithm, we compute
         # 1. v = cov v
         #    The matrix-vector multiplication can be computed as regular matrix multiplication.
+        v = keras.ops.matmul(cov, v)
+
         # 2. s = l2_norm(v)
         #    The l2_norm can be computed using for example `keras.ops.norm`.
+        s = keras.ops.norm(v, 2)
+
         # 3. v = v / s
-        pass
+        v = v / s
 
     # The `v` is now approximately the eigenvector of the largest eigenvalue, `s`.
     # We now compute the explained variance, which is the ratio of `s` and `total_variance`.

From eda8cab77109bb77a371818b8cae43462d50bf07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 19:12:28 +0100
Subject: [PATCH 08/64] Specify encoding

---
 labs/01/numpy_entropy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/labs/01/numpy_entropy.py b/labs/01/numpy_entropy.py
index 6cab8aa..4855b8a 100644
--- a/labs/01/numpy_entropy.py
+++ b/labs/01/numpy_entropy.py
@@ -15,7 +15,7 @@ def main(args: argparse.Namespace) -> tuple[float, float, float]:
     # Load data distribution, each line containing a datapoint -- a string.
     data_map = {}
 
-    with open(args.data_path, "r") as data:
+    with open(args.data_path, "r", encoding="utf-8") as data:
         for line in data:
             line = line.rstrip("\n")
 

From c84f9a3ae6d56a34933820ae2e324e6092335662 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 19:15:48 +0100
Subject: [PATCH 09/64] Add Lisa's solution

---
 labs/01/pca_first.py | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py
index 0300441..d1e18ca 100644
--- a/labs/01/pca_first.py
+++ b/labs/01/pca_first.py
@@ -34,7 +34,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
     # We want to reshape it to [args.examples, MNIST.H * MNIST.W * MNIST.C].
     # We can do so using `torch.reshape(data, new_shape)` with new shape
     # `[data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]`.
-    data = data.reshape(args.examples, MNIST.H, MNIST.W, MNIST.C)
+    data = torch.reshape(data, (data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]))
 
     # Now compute mean of every feature. Use `torch.mean`, and set
     # `dim` (or `axis`) argument to zero -- therefore, the mean will be
@@ -42,32 +42,34 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
     #
     # Note that for compatibility with Numpy/TF/Keras, all `dim` arguments
     # in PyTorch can be also called `axis`.
-    mean = torch.mean(data, dim=0)
+    mean = torch.mean(data, axis=0)
 
     # Compute the covariance matrix. The covariance matrix is
     #   (data - mean)^T * (data - mean) / data.shape[0]
     # where transpose can be computed using `torch.transpose` or `torch.t` and
     # matrix multiplication using either Python operator @ or `torch.matmul`.
-    cov = (data-mean).t @ (data-mean) / data.shape[0]
-    print(cov)
+    cov = torch.matmul(torch.t(data-mean), data-mean)/data.shape[0]
 
     # TODO: Compute the total variance, which is the sum of the diagonal
     # of the covariance matrix. To extract the diagonal use `torch.diagonal`,
     # and to sum a tensor use `torch.sum`.
-    total_variance = torch.diagonal(cov).sum()
+    total_variance = torch.sum(torch.diagonal(cov)).item()
 
     # TODO: Now run `args.iterations` of the power iteration algorithm.
     # Start with a vector of `cov.shape[0]` ones of type `torch.float32` using `torch.ones`.
-    v = ...
+    v = torch.ones(cov.shape[0], dtype=torch.float32)
+
     for i in range(args.iterations):
-        # TODO: In the power iteration algorithm, we compute
-        # 1. v = cov v
-        #    The matrix-vector multiplication can be computed as regular matrix multiplication
-        #    or using `torch.mv`.
-        # 2. s = l2_norm(v)
-        #    The l2_norm can be computed using for example `torch.linalg.vector_norm`.
-        # 3. v = v / s
-        pass
+         # TODO: In the power iteration algorithm, we compute
+         # 1. v = cov v
+         #    The matrix-vector multiplication can be computed as regular matrix multiplication
+         #    or using `torch.mv`.
+         # 2. s = l2_norm(v)
+         #    The l2_norm can be computed using for example `torch.linalg.vector_norm`.
+         # 3. v = v / s
+        v = cov*v
+        s = torch.linalg.vector_norm(v)
+        v = v/s
 
     # The `v` is now approximately the eigenvector of the largest eigenvalue, `s`.
     # We now compute the explained variance, which is the ratio of `s` and `total_variance`.

From 4d12eab7b188f4dd458c857d1e0d15e03b9cca8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 19:18:44 +0100
Subject: [PATCH 10/64] Use matrix multiplication instead of element-wise
 multiplication

---
 labs/01/pca_first.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py
index d1e18ca..ade3559 100644
--- a/labs/01/pca_first.py
+++ b/labs/01/pca_first.py
@@ -67,7 +67,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
          # 2. s = l2_norm(v)
          #    The l2_norm can be computed using for example `torch.linalg.vector_norm`.
          # 3. v = v / s
-        v = cov*v
+        v = cov @ v
         s = torch.linalg.vector_norm(v)
         v = v/s
 

From ca7e4bd4a778c37db929fc98e89c0380b31e3775 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 19:18:50 +0100
Subject: [PATCH 11/64] Fix test script

---
 labs/01/test.ps1 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/labs/01/test.ps1 b/labs/01/test.ps1
index 75ddf37..1a8e7cd 100644
--- a/labs/01/test.ps1
+++ b/labs/01/test.ps1
@@ -1,4 +1,4 @@
 python3 numpy_entropy.py --data_path numpy_entropy_data_1.txt --model_path numpy_entropy_model_1.txt
 python3 numpy_entropy.py --data_path numpy_entropy_data_2.txt --model_path numpy_entropy_model_2.txt
 python3 numpy_entropy.py --data_path numpy_entropy_data_3.txt --model_path numpy_entropy_model_3.txt
-spython3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt
+python3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt

From 3f6bde2fffccaa7d7af275f81b567b6b1a55b81d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Tue, 5 Mar 2024 09:02:13 +0100
Subject: [PATCH 12/64] Solve mnist_layers_activations.py

---
 labs/01/expected.txt                |  39 +++++++
 labs/01/mnist.ps1                   |  24 ++++
 labs/01/mnist_layers_activations.py |  10 +-
 labs/01/output.txt                  | 167 ++++++++++++++++++++++++++++
 4 files changed, 239 insertions(+), 1 deletion(-)
 create mode 100644 labs/01/expected.txt
 create mode 100644 labs/01/mnist.ps1
 create mode 100644 labs/01/output.txt

diff --git a/labs/01/expected.txt b/labs/01/expected.txt
new file mode 100644
index 0000000..fdaf786
--- /dev/null
+++ b/labs/01/expected.txt
@@ -0,0 +1,39 @@
+python3 mnist_layers_activations.py --hidden_layers=0 --activation=none
+Epoch  1/10 accuracy: 0.7801 - loss: 0.8405 - val_accuracy: 0.9300 - val_loss: 0.2716
+Epoch  5/10 accuracy: 0.9222 - loss: 0.2792 - val_accuracy: 0.9406 - val_loss: 0.2203
+Epoch 10/10 accuracy: 0.9304 - loss: 0.2515 - val_accuracy: 0.9432 - val_loss: 0.2159
+
+python3 mnist_layers_activations.py --hidden_layers=1 --activation=none
+Epoch  1/10 accuracy: 0.8483 - loss: 0.5230 - val_accuracy: 0.9352 - val_loss: 0.2422
+Epoch  5/10 accuracy: 0.9236 - loss: 0.2758 - val_accuracy: 0.9360 - val_loss: 0.2325
+Epoch 10/10 accuracy: 0.9298 - loss: 0.2517 - val_accuracy: 0.9354 - val_loss: 0.2439
+
+python3 mnist_layers_activations.py --hidden_layers=1 --activation=relu
+Epoch  1/10 accuracy: 0.8503 - loss: 0.5286 - val_accuracy: 0.9604 - val_loss: 0.1432
+Epoch  5/10 accuracy: 0.9824 - loss: 0.0613 - val_accuracy: 0.9808 - val_loss: 0.0740
+Epoch 10/10 accuracy: 0.9948 - loss: 0.0202 - val_accuracy: 0.9788 - val_loss: 0.0821
+
+python3 mnist_layers_activations.py --hidden_layers=1 --activation=tanh
+Epoch  1/10 accuracy: 0.8529 - loss: 0.5183 - val_accuracy: 0.9564 - val_loss: 0.1632
+Epoch  5/10 accuracy: 0.9800 - loss: 0.0728 - val_accuracy: 0.9740 - val_loss: 0.0853
+Epoch 10/10 accuracy: 0.9948 - loss: 0.0244 - val_accuracy: 0.9782 - val_loss: 0.0772
+
+python3 mnist_layers_activations.py --hidden_layers=1 --activation=sigmoid
+Epoch  1/10 accuracy: 0.7851 - loss: 0.8650 - val_accuracy: 0.9414 - val_loss: 0.2196
+Epoch  5/10 accuracy: 0.9647 - loss: 0.1270 - val_accuracy: 0.9704 - val_loss: 0.1079
+Epoch 10/10 accuracy: 0.9852 - loss: 0.0583 - val_accuracy: 0.9756 - val_loss: 0.0837
+
+python3 mnist_layers_activations.py --hidden_layers=3 --activation=relu
+Epoch  1/10 accuracy: 0.8497 - loss: 0.5011 - val_accuracy: 0.9664 - val_loss: 0.1225
+Epoch  5/10 accuracy: 0.9862 - loss: 0.0438 - val_accuracy: 0.9734 - val_loss: 0.1026
+Epoch 10/10 accuracy: 0.9932 - loss: 0.0202 - val_accuracy: 0.9818 - val_loss: 0.0865
+
+python3 mnist_layers_activations.py --hidden_layers=10 --activation=relu
+Epoch  1/10 accuracy: 0.7710 - loss: 0.6793 - val_accuracy: 0.9570 - val_loss: 0.1479
+Epoch  5/10 accuracy: 0.9780 - loss: 0.0783 - val_accuracy: 0.9786 - val_loss: 0.0808
+Epoch 10/10 accuracy: 0.9869 - loss: 0.0481 - val_accuracy: 0.9724 - val_loss: 0.1163
+
+python3 mnist_layers_activations.py --hidden_layers=10 --activation=sigmoid
+Epoch  1/10 accuracy: 0.1072 - loss: 2.3068 - val_accuracy: 0.1784 - val_loss: 2.1247
+Epoch  5/10 accuracy: 0.8825 - loss: 0.4776 - val_accuracy: 0.9164 - val_loss: 0.3686
+Epoch 10/10 accuracy: 0.9294 - loss: 0.2994 - val_accuracy: 0.9386 - val_loss: 0.2671
diff --git a/labs/01/mnist.ps1 b/labs/01/mnist.ps1
new file mode 100644
index 0000000..a274269
--- /dev/null
+++ b/labs/01/mnist.ps1
@@ -0,0 +1,24 @@
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=0 --activation=none"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=0 --activation=none
+# Write-Output ""
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=none"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=none
+# Write-Output ""
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=relu"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=relu
+# Write-Output ""
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=tanh"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=tanh
+# Write-Output ""
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=sigmoid"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=sigmoid
+# Write-Output ""
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=3 --activation=relu"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=3 --activation=relu
+# Write-Output ""
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=10 --activation=relu"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=10 --activation=relu
+# Write-Output ""
+# Write-Output "python3 mnist_layers_activations.py --hidden_layers=10 --activation=sigmoid"
+..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=10 --activation=sigmoid
+# Write-Output ""
diff --git a/labs/01/mnist_layers_activations.py b/labs/01/mnist_layers_activations.py
index d58b796..0ae93ab 100644
--- a/labs/01/mnist_layers_activations.py
+++ b/labs/01/mnist_layers_activations.py
@@ -68,7 +68,7 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     # Create the model
     model = keras.Sequential()
     model.add(keras.Input([MNIST.H, MNIST.W, MNIST.C]))
-    # TODO: Finish the model. Namely:
+    # Finish the model. Namely:
     # - start by adding a `keras.layers.Rescaling(1 / 255)` layer;
     # - then add a `keras.layers.Flatten()` layer;
     # - add `args.hidden_layers` number of fully connected hidden layers
@@ -76,6 +76,14 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     #   from `args.activation`, allowing "none", "relu", "tanh", "sigmoid";
     # - finally, add an output fully connected layer with  `MNIST.LABELS` units
     #   and `softmax` activation.
+    model.add(keras.layers.Rescaling(1 / 255))
+    model.add(keras.layers.Flatten())
+
+    for _ in range(args.hidden_layers):
+        activation = None if args.activation == "none" else args.activation
+        model.add(keras.layers.Dense(args.hidden_layer, activation=activation))
+
+    model.add(keras.layers.Dense(MNIST.LABELS, activation="softmax"))
 
     model.compile(
         optimizer=keras.optimizers.Adam(),
diff --git a/labs/01/output.txt b/labs/01/output.txt
new file mode 100644
index 0000000..916c534
--- /dev/null
+++ b/labs/01/output.txt
@@ -0,0 +1,167 @@
+Epoch 1/10
+1100/1100 14s 12ms/step - accuracy: 0.7761 - loss: 0.8442 - val_accuracy: 0.9298 - val_loss: 0.2730
+Epoch 2/10
+1100/1100 12s 11ms/step - accuracy: 0.9057 - loss: 0.3428 - val_accuracy: 0.9336 - val_loss: 0.2418
+Epoch 3/10
+1100/1100 11s 10ms/step - accuracy: 0.9177 - loss: 0.2945 - val_accuracy: 0.9366 - val_loss: 0.2284
+Epoch 4/10
+1100/1100 12s 10ms/step - accuracy: 0.9193 - loss: 0.2839 - val_accuracy: 0.9384 - val_loss: 0.2267
+Epoch 5/10
+1100/1100 11s 10ms/step - accuracy: 0.9228 - loss: 0.2790 - val_accuracy: 0.9392 - val_loss: 0.2208
+Epoch 6/10
+1100/1100 12s 11ms/step - accuracy: 0.9244 - loss: 0.2713 - val_accuracy: 0.9440 - val_loss: 0.2162
+Epoch 7/10
+1100/1100 13s 12ms/step - accuracy: 0.9252 - loss: 0.2662 - val_accuracy: 0.9398 - val_loss: 0.2178
+Epoch 8/10
+1100/1100 14s 12ms/step - accuracy: 0.9269 - loss: 0.2626 - val_accuracy: 0.9398 - val_loss: 0.2169
+Epoch 9/10
+1100/1100 13s 12ms/step - accuracy: 0.9286 - loss: 0.2612 - val_accuracy: 0.9458 - val_loss: 0.2128
+Epoch 10/10
+1100/1100 13s 12ms/step - accuracy: 0.9307 - loss: 0.2515 - val_accuracy: 0.9438 - val_loss: 0.2161
+
+Epoch 1/10
+1100/1100 15s 13ms/step - accuracy: 0.8422 - loss: 0.5383 - val_accuracy: 0.9346 - val_loss: 0.2400
+Epoch 2/10
+1100/1100 18s 17ms/step - accuracy: 0.9120 - loss: 0.3102 - val_accuracy: 0.9364 - val_loss: 0.2372
+Epoch 3/10
+1100/1100 16s 15ms/step - accuracy: 0.9233 - loss: 0.2774 - val_accuracy: 0.9352 - val_loss: 0.2342
+Epoch 4/10
+1100/1100 16s 14ms/step - accuracy: 0.9225 - loss: 0.2736 - val_accuracy: 0.9366 - val_loss: 0.2336
+Epoch 5/10
+1100/1100 15s 13ms/step - accuracy: 0.9233 - loss: 0.2760 - val_accuracy: 0.9344 - val_loss: 0.2331
+Epoch 6/10
+1100/1100 22s 20ms/step - accuracy: 0.9251 - loss: 0.2683 - val_accuracy: 0.9382 - val_loss: 0.2247
+Epoch 7/10
+1100/1100 15s 14ms/step - accuracy: 0.9261 - loss: 0.2658 - val_accuracy: 0.9356 - val_loss: 0.2367
+Epoch 8/10
+1100/1100 15s 14ms/step - accuracy: 0.9256 - loss: 0.2635 - val_accuracy: 0.9364 - val_loss: 0.2308
+Epoch 9/10
+1100/1100 15s 13ms/step - accuracy: 0.9253 - loss: 0.2625 - val_accuracy: 0.9386 - val_loss: 0.2277
+Epoch 10/10
+1100/1100 15s 13ms/step - accuracy: 0.9301 - loss: 0.2515 - val_accuracy: 0.9358 - val_loss: 0.2441
+
+Epoch 1/10
+1100/1100 16s 13ms/step - accuracy: 0.8499 - loss: 0.5317 - val_accuracy: 0.9618 - val_loss: 0.1400
+Epoch 2/10
+1100/1100 15s 13ms/step - accuracy: 0.9517 - loss: 0.1637 - val_accuracy: 0.9682 - val_loss: 0.1153
+Epoch 3/10
+1100/1100 14s 13ms/step - accuracy: 0.9700 - loss: 0.1021 - val_accuracy: 0.9730 - val_loss: 0.0897
+Epoch 4/10
+1100/1100 13s 12ms/step - accuracy: 0.9774 - loss: 0.0757 - val_accuracy: 0.9754 - val_loss: 0.0835
+Epoch 5/10
+1100/1100 13s 12ms/step - accuracy: 0.9824 - loss: 0.0603 - val_accuracy: 0.9772 - val_loss: 0.0766
+Epoch 6/10
+1100/1100 14s 12ms/step - accuracy: 0.9855 - loss: 0.0486 - val_accuracy: 0.9762 - val_loss: 0.0850
+Epoch 7/10
+1100/1100 14s 13ms/step - accuracy: 0.9889 - loss: 0.0374 - val_accuracy: 0.9776 - val_loss: 0.0774
+Epoch 8/10
+1100/1100 13s 12ms/step - accuracy: 0.9901 - loss: 0.0318 - val_accuracy: 0.9786 - val_loss: 0.0765
+Epoch 9/10
+1100/1100 13s 12ms/step - accuracy: 0.9928 - loss: 0.0267 - val_accuracy: 0.9804 - val_loss: 0.0766
+Epoch 10/10
+1100/1100 14s 12ms/step - accuracy: 0.9944 - loss: 0.0208 - val_accuracy: 0.9792 - val_loss: 0.0801
+
+Epoch 1/10
+1100/1100 14s 12ms/step - accuracy: 0.8468 - loss: 0.5308 - val_accuracy: 0.9594 - val_loss: 0.1591
+Epoch 2/10
+1100/1100 13s 12ms/step - accuracy: 0.9433 - loss: 0.1909 - val_accuracy: 0.9646 - val_loss: 0.1300
+Epoch 3/10
+1100/1100 13s 12ms/step - accuracy: 0.9658 - loss: 0.1235 - val_accuracy: 0.9726 - val_loss: 0.0973
+Epoch 4/10
+1100/1100 13s 12ms/step - accuracy: 0.9744 - loss: 0.0909 - val_accuracy: 0.9732 - val_loss: 0.0876
+Epoch 5/10
+1100/1100 13s 12ms/step - accuracy: 0.9798 - loss: 0.0747 - val_accuracy: 0.9788 - val_loss: 0.0770
+Epoch 6/10
+1100/1100 13s 12ms/step - accuracy: 0.9832 - loss: 0.0606 - val_accuracy: 0.9766 - val_loss: 0.0801
+Epoch 7/10
+1100/1100 13s 12ms/step - accuracy: 0.9881 - loss: 0.0460 - val_accuracy: 0.9792 - val_loss: 0.0714
+Epoch 8/10
+1100/1100 13s 12ms/step - accuracy: 0.9894 - loss: 0.0397 - val_accuracy: 0.9768 - val_loss: 0.0741
+Epoch 9/10
+1100/1100 13s 12ms/step - accuracy: 0.9923 - loss: 0.0312 - val_accuracy: 0.9796 - val_loss: 0.0709
+Epoch 10/10
+1100/1100 14s 12ms/step - accuracy: 0.9940 - loss: 0.0257 - val_accuracy: 0.9802 - val_loss: 0.0720
+
+Epoch 1/10
+1100/1100 15s 13ms/step - accuracy: 0.8072 - loss: 0.8138 - val_accuracy: 0.9452 - val_loss: 0.2121
+Epoch 2/10
+1100/1100 15s 14ms/step - accuracy: 0.9241 - loss: 0.2602 - val_accuracy: 0.9570 - val_loss: 0.1663
+Epoch 3/10
+1100/1100 15s 14ms/step - accuracy: 0.9476 - loss: 0.1863 - val_accuracy: 0.9648 - val_loss: 0.1322
+Epoch 4/10
+1100/1100 14s 13ms/step - accuracy: 0.9583 - loss: 0.1490 - val_accuracy: 0.9670 - val_loss: 0.1168
+Epoch 5/10
+1100/1100 14s 13ms/step - accuracy: 0.9658 - loss: 0.1243 - val_accuracy: 0.9696 - val_loss: 0.1047
+Epoch 6/10
+1100/1100 14s 12ms/step - accuracy: 0.9706 - loss: 0.1065 - val_accuracy: 0.9718 - val_loss: 0.0975
+Epoch 7/10
+1100/1100 13s 12ms/step - accuracy: 0.9758 - loss: 0.0891 - val_accuracy: 0.9740 - val_loss: 0.0918
+Epoch 8/10
+1100/1100 13s 12ms/step - accuracy: 0.9779 - loss: 0.0792 - val_accuracy: 0.9758 - val_loss: 0.0885
+Epoch 9/10
+1100/1100 14s 13ms/step - accuracy: 0.9816 - loss: 0.0681 - val_accuracy: 0.9776 - val_loss: 0.0825
+Epoch 10/10
+1100/1100 14s 12ms/step - accuracy: 0.9852 - loss: 0.0583 - val_accuracy: 0.9766 - val_loss: 0.0831
+
+Epoch 1/10
+1100/1100 16s 14ms/step - accuracy: 0.8483 - loss: 0.5002 - val_accuracy: 0.9650 - val_loss: 0.1189
+Epoch 2/10
+1100/1100 16s 14ms/step - accuracy: 0.9609 - loss: 0.1262 - val_accuracy: 0.9718 - val_loss: 0.0971
+Epoch 3/10
+1100/1100 16s 14ms/step - accuracy: 0.9759 - loss: 0.0783 - val_accuracy: 0.9772 - val_loss: 0.0690
+Epoch 4/10
+1100/1100 16s 14ms/step - accuracy: 0.9810 - loss: 0.0597 - val_accuracy: 0.9788 - val_loss: 0.0752
+Epoch 5/10
+1100/1100 15s 14ms/step - accuracy: 0.9855 - loss: 0.0468 - val_accuracy: 0.9748 - val_loss: 0.0817
+Epoch 6/10
+1100/1100 16s 14ms/step - accuracy: 0.9884 - loss: 0.0398 - val_accuracy: 0.9758 - val_loss: 0.0909
+Epoch 7/10
+1100/1100 15s 14ms/step - accuracy: 0.9898 - loss: 0.0318 - val_accuracy: 0.9724 - val_loss: 0.0998
+Epoch 8/10
+1100/1100 16s 14ms/step - accuracy: 0.9892 - loss: 0.0305 - val_accuracy: 0.9778 - val_loss: 0.0952
+Epoch 9/10
+1100/1100 16s 14ms/step - accuracy: 0.9914 - loss: 0.0267 - val_accuracy: 0.9756 - val_loss: 0.0878
+Epoch 10/10
+1100/1100 16s 15ms/step - accuracy: 0.9935 - loss: 0.0203 - val_accuracy: 0.9770 - val_loss: 0.0974
+
+Epoch 1/10
+1100/1100 24s 21ms/step - accuracy: 0.7772 - loss: 0.6657 - val_accuracy: 0.9524 - val_loss: 0.1752
+Epoch 2/10
+1100/1100 24s 22ms/step - accuracy: 0.9525 - loss: 0.1705 - val_accuracy: 0.9682 - val_loss: 0.1261
+Epoch 3/10
+1100/1100 22s 20ms/step - accuracy: 0.9675 - loss: 0.1162 - val_accuracy: 0.9750 - val_loss: 0.0945
+Epoch 4/10
+1100/1100 22s 20ms/step - accuracy: 0.9735 - loss: 0.0929 - val_accuracy: 0.9720 - val_loss: 0.1018
+Epoch 5/10
+1100/1100 22s 20ms/step - accuracy: 0.9789 - loss: 0.0794 - val_accuracy: 0.9762 - val_loss: 0.0888
+Epoch 6/10
+1100/1100 22s 20ms/step - accuracy: 0.9806 - loss: 0.0729 - val_accuracy: 0.9760 - val_loss: 0.0961
+Epoch 7/10
+1100/1100 22s 20ms/step - accuracy: 0.9847 - loss: 0.0578 - val_accuracy: 0.9810 - val_loss: 0.0932
+Epoch 8/10
+1100/1100 22s 20ms/step - accuracy: 0.9824 - loss: 0.0643 - val_accuracy: 0.9786 - val_loss: 0.0854
+Epoch 9/10
+1100/1100 22s 20ms/step - accuracy: 0.9864 - loss: 0.0487 - val_accuracy: 0.9764 - val_loss: 0.1054
+Epoch 10/10
+1100/1100 22s 20ms/step - accuracy: 0.9864 - loss: 0.0493 - val_accuracy: 0.9780 - val_loss: 0.1108
+
+Epoch 1/10
+1100/1100 23s 20ms/step - accuracy: 0.1052 - loss: 2.3130 - val_accuracy: 0.1808 - val_loss: 1.9383
+Epoch 2/10
+1100/1100 22s 20ms/step - accuracy: 0.2002 - loss: 1.9364 - val_accuracy: 0.2168 - val_loss: 1.8587
+Epoch 3/10
+1100/1100 23s 20ms/step - accuracy: 0.2161 - loss: 1.8392 - val_accuracy: 0.5588 - val_loss: 1.2106
+Epoch 4/10
+1100/1100 22s 20ms/step - accuracy: 0.5594 - loss: 1.1159 - val_accuracy: 0.8168 - val_loss: 0.7119
+Epoch 5/10
+1100/1100 22s 20ms/step - accuracy: 0.8359 - loss: 0.6312 - val_accuracy: 0.8994 - val_loss: 0.4360
+Epoch 6/10
+1100/1100 22s 20ms/step - accuracy: 0.8827 - loss: 0.4854 - val_accuracy: 0.9066 - val_loss: 0.4053
+Epoch 7/10
+1100/1100 22s 20ms/step - accuracy: 0.9007 - loss: 0.4218 - val_accuracy: 0.9166 - val_loss: 0.3660
+Epoch 8/10
+1100/1100 22s 20ms/step - accuracy: 0.9075 - loss: 0.3940 - val_accuracy: 0.9204 - val_loss: 0.3552
+Epoch 9/10
+1100/1100 22s 20ms/step - accuracy: 0.9090 - loss: 0.3922 - val_accuracy: 0.9242 - val_loss: 0.3356
+Epoch 10/10
+1100/1100 24s 22ms/step - accuracy: 0.9191 - loss: 0.3534 - val_accuracy: 0.9270 - val_loss: 0.3286

From 451cb9ee46ed38d56301c64d115205f837f5f0c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Tue, 5 Mar 2024 10:29:20 +0100
Subject: [PATCH 13/64] Add team description to all files

---
 labs/01/mnist_layers_activations.py | 5 +++++
 labs/01/numpy_entropy.py            | 6 ++++++
 labs/01/pca_first.keras.py          | 5 +++++
 labs/01/pca_first.py                | 5 +++++
 4 files changed, 21 insertions(+)

diff --git a/labs/01/mnist_layers_activations.py b/labs/01/mnist_layers_activations.py
index 0ae93ab..bf78be2 100644
--- a/labs/01/mnist_layers_activations.py
+++ b/labs/01/mnist_layers_activations.py
@@ -10,6 +10,11 @@
 
 from mnist import MNIST
 
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
+
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--activation", default="none", choices=["none", "relu", "tanh", "sigmoid"], help="Activation.")
diff --git a/labs/01/numpy_entropy.py b/labs/01/numpy_entropy.py
index 4855b8a..4203a24 100644
--- a/labs/01/numpy_entropy.py
+++ b/labs/01/numpy_entropy.py
@@ -1,4 +1,10 @@
 #!/usr/bin/env python3
+
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
+
 import argparse
 
 import numpy as np
diff --git a/labs/01/pca_first.keras.py b/labs/01/pca_first.keras.py
index 028a8ad..0632b22 100644
--- a/labs/01/pca_first.keras.py
+++ b/labs/01/pca_first.keras.py
@@ -9,6 +9,11 @@
 
 from mnist import MNIST
 
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
+
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--examples", default=256, type=int, help="MNIST examples to use.")
diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py
index ade3559..deecf06 100644
--- a/labs/01/pca_first.py
+++ b/labs/01/pca_first.py
@@ -7,6 +7,11 @@
 
 from mnist import MNIST
 
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
+
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--examples", default=256, type=int, help="MNIST examples to use.")

From 558f9f4306edb6a86fd55347d2450af1fe06cdf6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Tue, 12 Mar 2024 08:36:16 +0100
Subject: [PATCH 14/64] Update repo setup

---
 .gitignore       | 4 +++-
 .venv/pyvenv.cfg | 3 +++
 setup.ps1        | 5 +++++
 3 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 .venv/pyvenv.cfg

diff --git a/.gitignore b/.gitignore
index 1d17dae..0fb63b8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
-.venv
+**/.venv/Lib
+**/.venv/Scripts
+**/.venv/share
diff --git a/.venv/pyvenv.cfg b/.venv/pyvenv.cfg
new file mode 100644
index 0000000..e129fd0
--- /dev/null
+++ b/.venv/pyvenv.cfg
@@ -0,0 +1,3 @@
+home = C:\Python310
+include-system-site-packages = false
+version = 3.10.7
diff --git a/setup.ps1 b/setup.ps1
index 8fd7b89..f1f7bbe 100644
--- a/setup.ps1
+++ b/setup.ps1
@@ -1 +1,6 @@
+git remote rename origin upstream
+git remote add origin git@github.com:joglr/npfl138.git
+git fetch
+git checkout master
+python -m venv .venv
 .venv/Scripts/pip install -r .\labs\requirements.txt

From 55c07e2302238b86ea02f5ca33f761d8310c8b01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Tue, 5 Mar 2024 19:34:00 +0100
Subject: [PATCH 15/64] Solve sgd_backpropagation

---
 .gitignore                      |  2 ++
 labs/02/sgd_backpropagation.ps1 | 50 ++++++++++++++++++++++++++
 labs/02/sgd_backpropagation.py  | 62 +++++++++++++++++++++------------
 3 files changed, 92 insertions(+), 22 deletions(-)
 create mode 100644 labs/02/sgd_backpropagation.ps1

diff --git a/.gitignore b/.gitignore
index 0fb63b8..32199d0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
 **/.venv/Lib
 **/.venv/Scripts
 **/.venv/share
+logs/
+mnist.npz
diff --git a/labs/02/sgd_backpropagation.ps1 b/labs/02/sgd_backpropagation.ps1
new file mode 100644
index 0000000..f613710
--- /dev/null
+++ b/labs/02/sgd_backpropagation.ps1
@@ -0,0 +1,50 @@
+# Examples:
+# ../../.venv/Scripts/python sgd_backpropagation.py --batch_size=64 --hidden_layer=20 --learning_rate=0.1
+# Dev accuracy after epoch 1 is 93.30
+# Dev accuracy after epoch 2 is 94.38
+# Dev accuracy after epoch 3 is 95.16
+# Dev accuracy after epoch 4 is 95.50
+# Dev accuracy after epoch 5 is 95.96
+# Dev accuracy after epoch 6 is 96.04
+# Dev accuracy after epoch 7 is 95.82
+# Dev accuracy after epoch 8 is 95.92
+# Dev accuracy after epoch 9 is 95.96
+# Dev accuracy after epoch 10 is 96.16
+# Test accuracy after epoch 10 is 95.26
+
+# ../../.venv/Scripts/python sgd_backpropagation.py --batch_size=100 --hidden_layer=32 --learning_rate=0.2
+# Dev accuracy after epoch 1 is 93.64
+# Dev accuracy after epoch 2 is 94.80
+# Dev accuracy after epoch 3 is 95.56
+# Dev accuracy after epoch 4 is 95.98
+# Dev accuracy after epoch 5 is 96.24
+# Dev accuracy after epoch 6 is 96.74
+# Dev accuracy after epoch 7 is 96.52
+# Dev accuracy after epoch 8 is 96.54
+# Dev accuracy after epoch 9 is 97.04
+# Dev accuracy after epoch 10 is 97.02
+# Test accuracy after epoch 10 is 96.16
+
+# Tests:
+../../.venv/Scripts/python sgd_backpropagation.py --epochs=2 --batch_size=64 --hidden_layer=20 --learning_rate=0.1
+# Expected
+# Dev accuracy after epoch 1 is 93.30
+# Dev accuracy after epoch 2 is 94.38
+# Test accuracy after epoch 2 is 93.15
+
+# Actual
+# Dev accuracy after epoch 1 is 92.98
+# Dev accuracy after epoch 2 is 93.98
+# Test accuracy after epoch 2 is 92.73
+
+
+../../.venv/Scripts/python sgd_backpropagation.py --epochs=2 --batch_size=100 --hidden_layer=32 --learning_rate=0.2
+# Expected:
+# Dev accuracy after epoch 1 is 93.64
+# Dev accuracy after epoch 2 is 94.80
+# Test accuracy after epoch 2 is 93.54
+
+# Actual:
+# Dev accuracy after epoch 1 is 94.16
+# Dev accuracy after epoch 2 is 94.98
+# Test accuracy after epoch 2 is 93.56
diff --git a/labs/02/sgd_backpropagation.py b/labs/02/sgd_backpropagation.py
index cff312a..1b6eebd 100644
--- a/labs/02/sgd_backpropagation.py
+++ b/labs/02/sgd_backpropagation.py
@@ -35,24 +35,41 @@ def __init__(self, args: argparse.Namespace) -> None:
         )
         self._b1 = keras.Variable(keras.ops.zeros([args.hidden_layer]), trainable=True)
 
-        # TODO: Create variables:
+        # Create variables:
         # - _W2, which is a trainable variable of size `[args.hidden_layer, MNIST.LABELS]`,
         #   initialized to `keras.random.normal` value `with stddev=0.1` and `seed=args.seed`,
         # - _b2, which is a trainable variable of size `[MNIST.LABELS]` initialized to zeros
-        ...
+        self._W2 = keras.Variable(keras.random.normal([args.hidden_layer, MNIST.LABELS], stddev=0.1, seed=args.seed), trainable=True)
+
+        self._b2 = keras.Variable(keras.ops.zeros([MNIST.LABELS]), trainable=True)
 
     def predict(self, inputs: torch.Tensor) -> torch.Tensor:
-        # TODO: Define the computation of the network. Notably:
+        # Define the computation of the network. Notably:
         # - start by casting the input byte image to `float32` with `keras.ops.cast`
+
+        cast_inputs = keras.ops.cast(inputs, dtype="float32")
+
         # - then divide the tensor by 255 to normalize it to the `[0, 1]` range
+
+        normalized_inputs = cast_inputs / 255
+
         # - then reshape it to the shape `[inputs.shape[0], -1]`.
         #   The -1 is a wildcard which is computed so that the number
         #   of elements before and after the reshape is preserved.
+
+        reshaped_inputs = keras.ops.reshape(normalized_inputs, [inputs.shape[0], -1])
+
         # - then multiply it by `self._W1` and then add `self._b1`
         # - apply `keras.ops.tanh`
+
+        hidden_layer_output = keras.ops.tanh(keras.ops.matmul(reshaped_inputs, self._W1) + self._b1)
+
         # - multiply the result by `self._W2` and then add `self._b2`
+
+        hidden_layer_output = keras.ops.matmul(hidden_layer_output, self._W2) + self._b2
+
         # - finally apply `keras.ops.softmax` and return the result
-        return ...
+        return keras.ops.softmax(hidden_layer_output)
 
     def train_epoch(self, dataset: MNIST.Dataset) -> None:
         for batch in dataset.batches(self._args.batch_size):
@@ -62,48 +79,48 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None:
             # Size of the batch is `self._args.batch_size`, except for the last, which
             # might be smaller.
 
-            # TODO: Compute the predicted probabilities of the batch images using `self.predict`
-            probabilities = ...
+            # Compute the predicted probabilities of the batch images using `self.predict`
+            probabilities = self.predict(batch["images"])
 
-            # TODO: Manually compute the loss:
+            # Manually compute the loss:
             # - For every batch example, the loss is the categorical crossentropy of the
             #   predicted probabilities and the gold label. To compute the crossentropy, you can
             #   - either use `keras.ops.one_hot` to obtain one-hot encoded gold labels,
             #   - or suitably use `keras.ops.take_along_axis` to "index" the predicted probabilities.
             # - Finally, compute the average across the batch examples.
-            loss = ...
-
+            loss = -keras.ops.mean(keras.ops.sum(keras.ops.one_hot(batch["labels"], MNIST.LABELS) * keras.ops.log(probabilities), axis=-1))
             # We create a list of all variables. Note that a `keras.Model/Layer` automatically
             # tracks owned variables, so we could also use `self.trainable_variables`
             # (or even `self.variables`, which is useful for loading/saving).
             variables = [self._W1, self._b1, self._W2, self._b2]
 
-            # TODO: Compute the gradient of the loss with respect to variables using
+            # Compute the gradient of the loss with respect to variables using
             # backpropagation algorithm by
             # - first resetting the gradients of all variables to zero with `self.zero_grad()`,
             # - then calling `loss.backward()`.
-            ...
+            self.zero_grad()
+            loss.backward()
 
             gradients = [variable.value.grad for variable in variables]
             with torch.no_grad():
                 for variable, gradient in zip(variables, gradients):
-                    # TODO: Perform the SGD update with learning rate `self._args.learning_rate`
+                    # Perform the SGD update with learning rate `self._args.learning_rate`
                     # for the variable and computed gradient. You can modify the
                     # variable value with `variable.assign` or in this case the more
                     # efficient `variable.assign_sub`.
-                    ...
+                    variable.assign_sub(self._args.learning_rate * gradient)
 
     def evaluate(self, dataset: MNIST.Dataset) -> float:
         # Compute the accuracy of the model prediction
         correct = 0
         for batch in dataset.batches(self._args.batch_size):
-            # TODO: Compute the probabilities of the batch images using `self.predict`
+            # Compute the probabilities of the batch images using `self.predict`
             # and convert them to Numpy with `keras.ops.convert_to_numpy`.
-            probabilities = ...
+            probabilities = keras.ops.convert_to_numpy(self.predict(batch["images"]))
 
-            # TODO: Evaluate how many batch examples were predicted
+            # Evaluate how many batch examples were predicted
             # correctly and increase `correct` variable accordingly.
-            correct += ...
+            correct += np.sum(np.argmax(probabilities, axis=-1) == batch["labels"])
 
         return correct / dataset.size
 
@@ -132,15 +149,16 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
     model = Model(args)
 
     for epoch in range(args.epochs):
-        # TODO: Run the `train_epoch` with `mnist.train` dataset
+        # Run the `train_epoch` with `mnist.train` dataset
+        model.train_epoch(mnist.train)
 
-        # TODO: Evaluate the dev data using `evaluate` on `mnist.dev` dataset
-        accuracy = ...
+        # Evaluate the dev data using `evaluate` on `mnist.dev` dataset
+        accuracy = model.evaluate(mnist.dev)
         print("Dev accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * accuracy), flush=True)
         writer.add_scalar("dev/accuracy", 100 * accuracy, epoch + 1)
 
-    # TODO: Evaluate the test data using `evaluate` on `mnist.test` dataset
-    test_accuracy = ...
+    # Evaluate the test data using `evaluate` on `mnist.test` dataset
+    test_accuracy = model.evaluate(mnist.test)
     print("Test accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * test_accuracy), flush=True)
     writer.add_scalar("test/accuracy", 100 * test_accuracy, epoch + 1)
 

From 732d7d540612facb1a55227fd93c4650eb733f76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Tue, 12 Mar 2024 14:53:06 +0100
Subject: [PATCH 16/64] The average score was 423.7.

---
 labs/02/gym_cartpole.py | 41 ++++++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py
index 7befc72..8ace3c5 100644
--- a/labs/02/gym_cartpole.py
+++ b/labs/02/gym_cartpole.py
@@ -17,8 +17,8 @@
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
 parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
 # If you add more arguments, ReCodEx will keep them with your default values.
-parser.add_argument("--batch_size", default=..., type=int, help="Batch size.")
-parser.add_argument("--epochs", default=..., type=int, help="Number of epochs.")
+parser.add_argument("--batch_size", default=5, type=int, help="Batch size.")
+parser.add_argument("--epochs", default=20, type=int, help="Number of epochs.")
 parser.add_argument("--model", default="gym_cartpole_model.keras", type=str, help="Output model path.")
 
 
@@ -49,7 +49,7 @@ def on_epoch_end(self, epoch, logs=None):
 
 def evaluate_model(
     model: keras.Model, seed: int = 42, episodes: int = 100, render: bool = False, report_per_episode: bool = False
-) -> float:
+    ) -> float:
     """Evaluate the given model on CartPole-v1 environment.
 
     Returns the average score achieved on the given number of episodes.
@@ -86,16 +86,10 @@ def evaluate_model(
 def main(args: argparse.Namespace) -> keras.Model | None:
     # Set the random seed and the number of threads.
     keras.utils.set_random_seed(args.seed)
-    if args.threads:
-        torch.set_num_threads(args.threads)
-        torch.set_num_interop_threads(args.threads)
+    torch.set_num_threads(args.threads)
+    torch.set_num_interop_threads(args.threads)
 
     if not args.evaluate:
-        if args.batch_size is ...:
-            raise ValueError("You must specify the batch size, either in the defaults or on the command line.")
-        if args.epochs is ...:
-            raise ValueError("You must specify the number of epochs, either in the defaults or on the command line.")
-
         # Create logdir name
         args.logdir = os.path.join("logs", "{}-{}-{}".format(
             os.path.basename(globals().get("__file__", "notebook")),
@@ -106,15 +100,36 @@ def main(args: argparse.Namespace) -> keras.Model | None:
         # Load the data
         data = np.loadtxt("gym_cartpole_data.txt")
         observations, labels = data[:, :-1], data[:, -1].astype(np.int32)
+        print("data shape:", observations.shape, "label shape:", labels.shape)
+
+
 
         # TODO: Create the model in the `model` variable. Note that
         # the model can perform any of:
         # - binary classification with 1 output and sigmoid activation;
         # - two-class classification with 2 outputs and softmax activation.
-        model = ...
+
+        # Convert the labels to one-hot encoding
+        labels = keras.ops.one_hot(labels, num_classes=2)
+
+        model = keras.Sequential(name="gym_model", layers=[
+            # Input layer
+            keras.layers.Input(shape=(observations.shape[1],)),
+            # Hidden layers
+            keras.layers.Dense(8, activation="tanh"),
+            # Output layer
+            keras.layers.Dense(2, activation="softmax"),  # 2 outputs because we have 2 actions in the cart pole problem
+        ])
+
+
+        model.summary()
 
         # TODO: Prepare the model for training using the `model.compile` method.
-        model.compile(...)
+        model.compile(
+            loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
+            optimizer=keras.optimizers.Adam(learning_rate=0.01),
+            metrics=["accuracy"],
+        )
 
         tb_callback = TorchTensorBoardCallback(args.logdir)
         model.fit(observations, labels, batch_size=args.batch_size, epochs=args.epochs, callbacks=[tb_callback])

From 7d362488cd4e86697420800b0d5abdc8b698c449 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Tue, 12 Mar 2024 15:15:17 +0100
Subject: [PATCH 17/64] The average score was 457.23.

---
 labs/02/gym_cartpole.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py
index 8ace3c5..8553b65 100644
--- a/labs/02/gym_cartpole.py
+++ b/labs/02/gym_cartpole.py
@@ -127,7 +127,7 @@ def main(args: argparse.Namespace) -> keras.Model | None:
         # TODO: Prepare the model for training using the `model.compile` method.
         model.compile(
             loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
-            optimizer=keras.optimizers.Adam(learning_rate=0.01),
+            optimizer=keras.optimizers.Adam(learning_rate=0.005),
             metrics=["accuracy"],
         )
 

From ed8a2a0c17cf07203203f0c1cb8e2709c99162f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Tue, 12 Mar 2024 15:20:49 +0100
Subject: [PATCH 18/64] The average score was 465.86.

---
 labs/02/gym_cartpole.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py
index 8553b65..d7661b2 100644
--- a/labs/02/gym_cartpole.py
+++ b/labs/02/gym_cartpole.py
@@ -127,7 +127,7 @@ def main(args: argparse.Namespace) -> keras.Model | None:
         # TODO: Prepare the model for training using the `model.compile` method.
         model.compile(
             loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
-            optimizer=keras.optimizers.Adam(learning_rate=0.005),
+            optimizer=keras.optimizers.Adam(learning_rate=0.02),
             metrics=["accuracy"],
         )
 

From be3273074f010712761bf1e8a11ec4fe5c52f27a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Tue, 12 Mar 2024 15:25:08 +0100
Subject: [PATCH 19/64] The average score was 490.01.

---
 labs/02/gym_cartpole.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py
index d7661b2..328b805 100644
--- a/labs/02/gym_cartpole.py
+++ b/labs/02/gym_cartpole.py
@@ -9,6 +9,11 @@
 import numpy as np
 import torch
 
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
+
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--evaluate", default=False, action="store_true", help="Evaluate the given model")
@@ -18,7 +23,7 @@
 parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
 # If you add more arguments, ReCodEx will keep them with your default values.
 parser.add_argument("--batch_size", default=5, type=int, help="Batch size.")
-parser.add_argument("--epochs", default=20, type=int, help="Number of epochs.")
+parser.add_argument("--epochs", default=100, type=int, help="Number of epochs.")
 parser.add_argument("--model", default="gym_cartpole_model.keras", type=str, help="Output model path.")
 
 
@@ -127,7 +132,7 @@ def main(args: argparse.Namespace) -> keras.Model | None:
         # TODO: Prepare the model for training using the `model.compile` method.
         model.compile(
             loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
-            optimizer=keras.optimizers.Adam(learning_rate=0.02),
+            optimizer=keras.optimizers.Adam(learning_rate=0.03),
             metrics=["accuracy"],
         )
 

From 01f0bdea9cc6cb91303faf867e6382eb570af81c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Tue, 12 Mar 2024 15:38:13 +0100
Subject: [PATCH 20/64] The average score was 491.41.

---
 labs/02/gym_cartpole.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py
index 328b805..935fbf9 100644
--- a/labs/02/gym_cartpole.py
+++ b/labs/02/gym_cartpole.py
@@ -22,7 +22,7 @@
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
 parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
 # If you add more arguments, ReCodEx will keep them with your default values.
-parser.add_argument("--batch_size", default=5, type=int, help="Batch size.")
+parser.add_argument("--batch_size", default=10, type=int, help="Batch size.")
 parser.add_argument("--epochs", default=100, type=int, help="Number of epochs.")
 parser.add_argument("--model", default="gym_cartpole_model.keras", type=str, help="Output model path.")
 
@@ -132,7 +132,7 @@ def main(args: argparse.Namespace) -> keras.Model | None:
         # TODO: Prepare the model for training using the `model.compile` method.
         model.compile(
             loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
-            optimizer=keras.optimizers.Adam(learning_rate=0.03),
+            optimizer=keras.optimizers.Adam(learning_rate=0.02),
             metrics=["accuracy"],
         )
 

From b57af982675879a6d0df96d8bce23ed5e1f78957 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Tue, 12 Mar 2024 15:40:10 +0100
Subject: [PATCH 21/64] Add test script

---
 labs/02/test.ps1 | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 labs/02/test.ps1

diff --git a/labs/02/test.ps1 b/labs/02/test.ps1
new file mode 100644
index 0000000..fa38f74
--- /dev/null
+++ b/labs/02/test.ps1
@@ -0,0 +1 @@
+../../.venv/Scripts/python .\gym_cartpole.py  && ../../.venv/Scripts/python .\gym_cartpole.py --evaluate

From d0ad9b98def3fb7a71309950d2d75f3abd53c4dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Tue, 12 Mar 2024 15:50:58 +0100
Subject: [PATCH 22/64] The average score was 498.73.

---
 labs/02/gym_cartpole.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py
index 935fbf9..5d191fa 100644
--- a/labs/02/gym_cartpole.py
+++ b/labs/02/gym_cartpole.py
@@ -132,7 +132,7 @@ def main(args: argparse.Namespace) -> keras.Model | None:
         # TODO: Prepare the model for training using the `model.compile` method.
         model.compile(
             loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.1),
-            optimizer=keras.optimizers.Adam(learning_rate=0.02),
+            optimizer=keras.optimizers.Adam(learning_rate=0.009),
             metrics=["accuracy"],
         )
 

From 83f390a548c87b5c5053c60d834dd69863d1b8b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Tue, 12 Mar 2024 16:14:06 +0100
Subject: [PATCH 23/64] Refactor loss calculation in Model class

---
 labs/02/sgd_backpropagation.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/labs/02/sgd_backpropagation.py b/labs/02/sgd_backpropagation.py
index 1b6eebd..ad65784 100644
--- a/labs/02/sgd_backpropagation.py
+++ b/labs/02/sgd_backpropagation.py
@@ -12,6 +12,11 @@
 
 from mnist import MNIST
 
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
+
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
@@ -88,7 +93,7 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None:
             #   - either use `keras.ops.one_hot` to obtain one-hot encoded gold labels,
             #   - or suitably use `keras.ops.take_along_axis` to "index" the predicted probabilities.
             # - Finally, compute the average across the batch examples.
-            loss = -keras.ops.mean(keras.ops.sum(keras.ops.one_hot(batch["labels"], MNIST.LABELS) * keras.ops.log(probabilities), axis=-1))
+            loss = keras.ops.mean(keras.ops.categorical_crossentropy(keras.ops.one_hot(batch["labels"], MNIST.LABELS), probabilities))
             # We create a list of all variables. Note that a `keras.Model/Layer` automatically
             # tracks owned variables, so we could also use `self.trainable_variables`
             # (or even `self.variables`, which is useful for loading/saving).

From df4da95e2a896afa8a662dab794393dc614d867f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Tue, 12 Mar 2024 16:16:10 +0100
Subject: [PATCH 24/64] Add .venv/Include to .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 32199d0..a203ee2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 **/.venv/Lib
 **/.venv/Scripts
+**/.venv/Include
 **/.venv/share
 logs/
 mnist.npz

From 80063fd468a1105d81511a5b9d7ca2579c0fec28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Wed, 28 Feb 2024 14:06:13 +0100
Subject: [PATCH 25/64] Update user id

---
 labs/team_description.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/labs/team_description.py b/labs/team_description.py
index 1d232bc..8207533 100644
--- a/labs/team_description.py
+++ b/labs/team_description.py
@@ -6,7 +6,4 @@
 #
 # You can find out ReCodEx ID in the URL bar after navigating
 # to your User profile page. The ID has the following format:
-# Jonas Glerup Røssum <jglr@itu.dk>
 # 31a0a96a-c590-4486-b194-f72765b2ce25
-# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
-# 91d4d1d7-b800-4765-96b9-df098ac36a66

From 3efc547fd5bc4d881861f080d3ac25234bfa9618 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Sun, 3 Mar 2024 15:28:32 +0100
Subject: [PATCH 26/64] Solve numpy_entropy

---
 labs/01/numpy_entropy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/labs/01/numpy_entropy.py b/labs/01/numpy_entropy.py
index 4203a24..7ec2359 100644
--- a/labs/01/numpy_entropy.py
+++ b/labs/01/numpy_entropy.py
@@ -21,7 +21,7 @@ def main(args: argparse.Namespace) -> tuple[float, float, float]:
     # Load data distribution, each line containing a datapoint -- a string.
     data_map = {}
 
-    with open(args.data_path, "r", encoding="utf-8") as data:
+    with open(args.data_path, "r") as data:
         for line in data:
             line = line.rstrip("\n")
 

From db3272482d33703e5b19c8a5f7e4c2f1295c5cfb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Sun, 3 Mar 2024 15:31:05 +0100
Subject: [PATCH 27/64] Add pull.sh script to automate upstream pull

---
 pull.sh | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 pull.sh

diff --git a/pull.sh b/pull.sh
new file mode 100644
index 0000000..9cadfe4
--- /dev/null
+++ b/pull.sh
@@ -0,0 +1 @@
+git pull upstream master

From abb43202e7d1e78773195136833fe92f6b9cab3c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 12:48:47 +0100
Subject: [PATCH 28/64] Fix reshape and compute covariance matrix in
 pca_first.keras.py and pca_first.py

---
 labs/01/pca_first.py | 9 +++++----
 labs/01/test.ps1     | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py
index deecf06..263a458 100644
--- a/labs/01/pca_first.py
+++ b/labs/01/pca_first.py
@@ -39,7 +39,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
     # We want to reshape it to [args.examples, MNIST.H * MNIST.W * MNIST.C].
     # We can do so using `torch.reshape(data, new_shape)` with new shape
     # `[data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]`.
-    data = torch.reshape(data, (data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]))
+    data = data.reshape(args.examples, MNIST.H, MNIST.W, MNIST.C)
 
     # Now compute mean of every feature. Use `torch.mean`, and set
     # `dim` (or `axis`) argument to zero -- therefore, the mean will be
@@ -47,18 +47,19 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
     #
     # Note that for compatibility with Numpy/TF/Keras, all `dim` arguments
     # in PyTorch can be also called `axis`.
-    mean = torch.mean(data, axis=0)
+    mean = torch.mean(data, dim=0)
 
     # Compute the covariance matrix. The covariance matrix is
     #   (data - mean)^T * (data - mean) / data.shape[0]
     # where transpose can be computed using `torch.transpose` or `torch.t` and
     # matrix multiplication using either Python operator @ or `torch.matmul`.
-    cov = torch.matmul(torch.t(data-mean), data-mean)/data.shape[0]
+    cov = (data-mean).t @ (data-mean) / data.shape[0]
+    print(cov)
 
     # TODO: Compute the total variance, which is the sum of the diagonal
     # of the covariance matrix. To extract the diagonal use `torch.diagonal`,
     # and to sum a tensor use `torch.sum`.
-    total_variance = torch.sum(torch.diagonal(cov)).item()
+    total_variance = torch.diagonal(cov).sum()
 
     # TODO: Now run `args.iterations` of the power iteration algorithm.
     # Start with a vector of `cov.shape[0]` ones of type `torch.float32` using `torch.ones`.
diff --git a/labs/01/test.ps1 b/labs/01/test.ps1
index 1a8e7cd..75ddf37 100644
--- a/labs/01/test.ps1
+++ b/labs/01/test.ps1
@@ -1,4 +1,4 @@
 python3 numpy_entropy.py --data_path numpy_entropy_data_1.txt --model_path numpy_entropy_model_1.txt
 python3 numpy_entropy.py --data_path numpy_entropy_data_2.txt --model_path numpy_entropy_model_2.txt
 python3 numpy_entropy.py --data_path numpy_entropy_data_3.txt --model_path numpy_entropy_model_3.txt
-python3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt
+spython3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt

From 63db7e38a68f34f0cab8828beeba0c59027bc4e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 12:48:55 +0100
Subject: [PATCH 29/64] Add .gitignore, pull.ps1, and setup.ps1 files

---
 .gitignore | 7 +------
 pull.sh    | 1 -
 setup.ps1  | 5 -----
 3 files changed, 1 insertion(+), 12 deletions(-)
 delete mode 100644 pull.sh

diff --git a/.gitignore b/.gitignore
index a203ee2..1d17dae 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1 @@
-**/.venv/Lib
-**/.venv/Scripts
-**/.venv/Include
-**/.venv/share
-logs/
-mnist.npz
+.venv
diff --git a/pull.sh b/pull.sh
deleted file mode 100644
index 9cadfe4..0000000
--- a/pull.sh
+++ /dev/null
@@ -1 +0,0 @@
-git pull upstream master
diff --git a/setup.ps1 b/setup.ps1
index f1f7bbe..8fd7b89 100644
--- a/setup.ps1
+++ b/setup.ps1
@@ -1,6 +1 @@
-git remote rename origin upstream
-git remote add origin git@github.com:joglr/npfl138.git
-git fetch
-git checkout master
-python -m venv .venv
 .venv/Scripts/pip install -r .\labs\requirements.txt

From 0aa170f18cb876ac6d443df03889db9e8b9c1e5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 18:35:38 +0100
Subject: [PATCH 30/64] Update team description

---
 labs/team_description.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/labs/team_description.py b/labs/team_description.py
index 8207533..1d232bc 100644
--- a/labs/team_description.py
+++ b/labs/team_description.py
@@ -6,4 +6,7 @@
 #
 # You can find out ReCodEx ID in the URL bar after navigating
 # to your User profile page. The ID has the following format:
+# Jonas Glerup Røssum <jglr@itu.dk>
 # 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66

From 90fe7a309ec94ccfa9fc7c76ae45abb7692757a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 19:12:28 +0100
Subject: [PATCH 31/64] Specify encoding

---
 labs/01/numpy_entropy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/labs/01/numpy_entropy.py b/labs/01/numpy_entropy.py
index 7ec2359..4203a24 100644
--- a/labs/01/numpy_entropy.py
+++ b/labs/01/numpy_entropy.py
@@ -21,7 +21,7 @@ def main(args: argparse.Namespace) -> tuple[float, float, float]:
     # Load data distribution, each line containing a datapoint -- a string.
     data_map = {}
 
-    with open(args.data_path, "r") as data:
+    with open(args.data_path, "r", encoding="utf-8") as data:
         for line in data:
             line = line.rstrip("\n")
 

From 4b84b2f4d55c6d908be6e2c631282b1f8287f5e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 19:15:48 +0100
Subject: [PATCH 32/64] Add Lisa's solution

---
 labs/01/pca_first.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py
index 263a458..e2b58aa 100644
--- a/labs/01/pca_first.py
+++ b/labs/01/pca_first.py
@@ -39,7 +39,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
     # We want to reshape it to [args.examples, MNIST.H * MNIST.W * MNIST.C].
     # We can do so using `torch.reshape(data, new_shape)` with new shape
     # `[data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]`.
-    data = data.reshape(args.examples, MNIST.H, MNIST.W, MNIST.C)
+    data = torch.reshape(data, (data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]))
 
     # Now compute mean of every feature. Use `torch.mean`, and set
     # `dim` (or `axis`) argument to zero -- therefore, the mean will be
@@ -47,19 +47,18 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
     #
     # Note that for compatibility with Numpy/TF/Keras, all `dim` arguments
     # in PyTorch can be also called `axis`.
-    mean = torch.mean(data, dim=0)
+    mean = torch.mean(data, axis=0)
 
     # Compute the covariance matrix. The covariance matrix is
     #   (data - mean)^T * (data - mean) / data.shape[0]
     # where transpose can be computed using `torch.transpose` or `torch.t` and
     # matrix multiplication using either Python operator @ or `torch.matmul`.
-    cov = (data-mean).t @ (data-mean) / data.shape[0]
-    print(cov)
+    cov = torch.matmul(torch.t(data-mean), data-mean)/data.shape[0]
 
     # TODO: Compute the total variance, which is the sum of the diagonal
     # of the covariance matrix. To extract the diagonal use `torch.diagonal`,
     # and to sum a tensor use `torch.sum`.
-    total_variance = torch.diagonal(cov).sum()
+    total_variance = torch.sum(torch.diagonal(cov)).item()
 
     # TODO: Now run `args.iterations` of the power iteration algorithm.
     # Start with a vector of `cov.shape[0]` ones of type `torch.float32` using `torch.ones`.
@@ -73,7 +72,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
          # 2. s = l2_norm(v)
          #    The l2_norm can be computed using for example `torch.linalg.vector_norm`.
          # 3. v = v / s
-        v = cov @ v
+        v = cov*v
         s = torch.linalg.vector_norm(v)
         v = v/s
 

From 7955c92bdff5f339d160c16f6837117775812999 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 19:18:44 +0100
Subject: [PATCH 33/64] Use matrix multiplication instead of element-wise
 multiplication

---
 labs/01/pca_first.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py
index e2b58aa..deecf06 100644
--- a/labs/01/pca_first.py
+++ b/labs/01/pca_first.py
@@ -72,7 +72,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
          # 2. s = l2_norm(v)
          #    The l2_norm can be computed using for example `torch.linalg.vector_norm`.
          # 3. v = v / s
-        v = cov*v
+        v = cov @ v
         s = torch.linalg.vector_norm(v)
         v = v/s
 

From 68a5439aad9a603fef76dd63e9b9b6fcd6083ab1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 19:18:50 +0100
Subject: [PATCH 34/64] Fix test script

---
 labs/01/test.ps1 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/labs/01/test.ps1 b/labs/01/test.ps1
index 75ddf37..1a8e7cd 100644
--- a/labs/01/test.ps1
+++ b/labs/01/test.ps1
@@ -1,4 +1,4 @@
 python3 numpy_entropy.py --data_path numpy_entropy_data_1.txt --model_path numpy_entropy_model_1.txt
 python3 numpy_entropy.py --data_path numpy_entropy_data_2.txt --model_path numpy_entropy_model_2.txt
 python3 numpy_entropy.py --data_path numpy_entropy_data_3.txt --model_path numpy_entropy_model_3.txt
-spython3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt
+python3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt

From 6c10a616d7800ab05609360833cbbd7dcce17980 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Tue, 12 Mar 2024 08:36:16 +0100
Subject: [PATCH 35/64] Update repo setup

---
 .gitignore | 4 +++-
 setup.ps1  | 5 +++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 1d17dae..0fb63b8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
-.venv
+**/.venv/Lib
+**/.venv/Scripts
+**/.venv/share
diff --git a/setup.ps1 b/setup.ps1
index 8fd7b89..f1f7bbe 100644
--- a/setup.ps1
+++ b/setup.ps1
@@ -1 +1,6 @@
+git remote rename origin upstream
+git remote add origin git@github.com:joglr/npfl138.git
+git fetch
+git checkout master
+python -m venv .venv
 .venv/Scripts/pip install -r .\labs\requirements.txt

From 3dcc91edcd41c7df70866d8b860e3c514916d18a Mon Sep 17 00:00:00 2001
From: lizawang <56673986+lizawang@users.noreply.github.com>
Date: Sat, 16 Mar 2024 21:35:11 +0100
Subject: [PATCH 36/64] task2,3

---
 labs/03/mnist_ensemble.py       | 15 ++++++++++-----
 labs/03/mnist_regularization.py | 13 ++++++++-----
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/labs/03/mnist_ensemble.py b/labs/03/mnist_ensemble.py
index ebffcf9..dc8f60d 100644
--- a/labs/03/mnist_ensemble.py
+++ b/labs/03/mnist_ensemble.py
@@ -7,6 +7,7 @@
 import torch
 
 from mnist import MNIST
+import numpy as np
 
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
@@ -54,9 +55,11 @@ def main(args: argparse.Namespace) -> tuple[list[float], list[float]]:
         print("Done")
 
     individual_accuracies, ensemble_accuracies = [], []
+    model_predictions = []
     for model in range(args.models):
         # TODO: Compute the accuracy on the dev set for the individual `models[model]`.
-        individual_accuracy = ...
+        predictions = models[model].predict(mnist.dev.data["images"])
+        individual_accuracy = np.sum(np.argmax(predictions, axis=-1) == mnist.dev.data["labels"])/mnist.dev.size
 
         # TODO: Compute the accuracy on the dev set for the ensemble `models[0:model+1]`.
         #
@@ -69,10 +72,12 @@ def main(args: argparse.Namespace) -> tuple[list[float], list[float]]:
         #    need to construct Keras ensemble model at all, and instead call `model.predict`
         #    on the individual models and average the results. To measure accuracy,
         #    either do it completely manually or use `keras.metrics.SparseCategoricalAccuracy`.
-        ensemble_accuracy = ...
-
-        # Store the accuracies
-        individual_accuracies.append(individual_accuracy)
+        # Store the predictions
+        model_predictions.append(predictions)
+        pred_avg = np.mean(model_predictions, axis=0)
+        ensemble_accuracy = np.sum(np.argmax(pred_avg, axis=-1) == mnist.dev.data["labels"])/mnist.dev.size
+        # Store the accuracies   
+        individual_accuracies.append(individual_accuracy)  
         ensemble_accuracies.append(ensemble_accuracy)
     return individual_accuracies, ensemble_accuracies
 
diff --git a/labs/03/mnist_regularization.py b/labs/03/mnist_regularization.py
index cd78fcf..769d7a4 100644
--- a/labs/03/mnist_regularization.py
+++ b/labs/03/mnist_regularization.py
@@ -74,8 +74,10 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     model = keras.Sequential()
     model.add(keras.layers.Rescaling(1 / 255))
     model.add(keras.layers.Flatten())
+    model.add(keras.layers.Dropout(args.dropout))
     for hidden_layer in args.hidden_layers:
         model.add(keras.layers.Dense(hidden_layer, activation="relu"))
+        model.add(keras.layers.Dropout(args.dropout))
     model.add(keras.layers.Dense(MNIST.LABELS, activation="softmax"))
 
     # TODO: Implement label smoothing with the given `args.label_smoothing` strength.
@@ -90,20 +92,21 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     # rate and a weight decay of strength `args.weight_decay`. Then call the
     # `exclude_from_weight_decay` method to specify that all variables with "bias"
     # in their name should not be decayed.
-    optimizer = ...
+    optimizer = keras.optimizers.AdamW(weight_decay=args.weight_decay)
+    optimizer.exclude_from_weight_decay=[v for v in model.variables if "bias" in v.name]
 
     model.compile(
         optimizer=optimizer,
-        loss=keras.losses.SparseCategoricalCrossentropy(),
-        metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")],
+        loss=keras.losses.CategoricalCrossentropy(label_smoothing=args.label_smoothing),
+        metrics=[keras.metrics.CategoricalAccuracy(name="accuracy")],
     )
 
     tb_callback = TorchTensorBoardCallback(args.logdir)
 
     logs = model.fit(
-        mnist.train.data["images"], mnist.train.data["labels"],
+        mnist.train.data["images"], keras.utils.to_categorical(mnist.train.data["labels"], num_classes=mnist.LABELS),
         batch_size=args.batch_size, epochs=args.epochs,
-        validation_data=(mnist.dev.data["images"], mnist.dev.data["labels"]),
+        validation_data=(mnist.dev.data["images"], keras.utils.to_categorical(mnist.dev.data["labels"], mnist.LABELS)),
         callbacks=[tb_callback],
     )
 

From 2f0852a2edc1de73b92a63b6958c8d723a432ca0 Mon Sep 17 00:00:00 2001
From: lizawang <56673986+lizawang@users.noreply.github.com>
Date: Mon, 11 Mar 2024 11:34:43 +0100
Subject: [PATCH 37/64] my solution so far

---
 labs/02/gym_cartpole.py        |  2 +-
 labs/02/mnist_training.py      | 30 +++++++++++++-
 labs/02/sgd_backpropagation.py | 71 ++++++++++++++++++++++++++--------
 3 files changed, 85 insertions(+), 18 deletions(-)

diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py
index 5d191fa..5b2eafc 100644
--- a/labs/02/gym_cartpole.py
+++ b/labs/02/gym_cartpole.py
@@ -137,7 +137,7 @@ def main(args: argparse.Namespace) -> keras.Model | None:
         )
 
         tb_callback = TorchTensorBoardCallback(args.logdir)
-        model.fit(observations, labels, batch_size=args.batch_size, epochs=args.epochs, callbacks=[tb_callback])
+        model.fit(observations, labels, batch_size=args.batch_size, validation_split=0.1, epochs=args.epochs, callbacks=[tb_callback])
 
         # Save the model, without the optimizer state.
         model.save(args.model)
diff --git a/labs/02/mnist_training.py b/labs/02/mnist_training.py
index 6655133..b9105c6 100644
--- a/labs/02/mnist_training.py
+++ b/labs/02/mnist_training.py
@@ -107,8 +107,32 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     #   in `model.optimizer._learning_rate` if needed), so after training, the learning rate
     #   should be `args.learning_rate_final`.
 
+    optimizer = None
+    lr, momen, decay, final_lr = args.learning_rate, args.momentum, args.decay, args.learning_rate_final
+    if decay:
+        if not final_lr:
+            print("Please define a final learning rate!")
+        else:    
+            steps = mnist.train.size/args.batch_size
+            init_lr = args.learning_rate
+            if decay == "linear":
+                lr = keras.optimizers.schedules.PolynomialDecay(initial_learning_rate=init_lr, decay_steps=steps, end_learning_rate=final_lr)
+            elif decay == "exponential":
+                lr = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=init_lr, decay_steps=steps, decay_rate=0.90)
+            elif decay == "cosine":
+                lr = keras.optimizers.schedules.CosineDecay(initial_learning_rate=init_lr, decay_steps=steps, alpha=0.1)
+
+    if args.optimizer == 'SGD':
+        if momen:
+            optimizer = keras.optimizers.SGD(learning_rate=lr, momentum=momen, nesterov=True)
+        else:
+            optimizer = keras.optimizers.SGD(learning_rate=lr)
+    elif args.optimizer =="Adam":
+        optimizer = keras.optimizers.Adam(learning_rate=lr)
+              
+        
     model.compile(
-        optimizer=...,
+        optimizer=optimizer,
         loss=keras.losses.SparseCategoricalCrossentropy(),
         metrics=[keras.metrics.SparseCategoricalAccuracy("accuracy")],
     )
@@ -121,6 +145,10 @@ def main(args: argparse.Namespace) -> dict[str, float]:
         validation_data=(mnist.dev.data["images"], mnist.dev.data["labels"]),
         callbacks=[tb_callback],
     )
+    model.summary()
+
+    if decay:        
+        print("Next learning rate to be used:", model.optimizer.learning_rate)
 
     # Return development metrics for ReCodEx to validate.
     return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")}
diff --git a/labs/02/sgd_backpropagation.py b/labs/02/sgd_backpropagation.py
index ad65784..e3cfacf 100644
--- a/labs/02/sgd_backpropagation.py
+++ b/labs/02/sgd_backpropagation.py
@@ -3,7 +3,10 @@
 import datetime
 import os
 import re
-os.environ.setdefault("KERAS_BACKEND", "torch")  # Use PyTorch backend unless specified otherwise
+
+os.environ.setdefault(
+    "KERAS_BACKEND", "torch"
+)  # Use PyTorch backend unless specified otherwise
 
 import keras
 import numpy as np
@@ -21,11 +24,17 @@
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
 parser.add_argument("--epochs", default=10, type=int, help="Number of epochs.")
-parser.add_argument("--hidden_layer", default=100, type=int, help="Size of the hidden layer.")
+parser.add_argument(
+    "--hidden_layer", default=100, type=int, help="Size of the hidden layer."
+)
 parser.add_argument("--learning_rate", default=0.1, type=float, help="Learning rate.")
-parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.")
+parser.add_argument(
+    "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx."
+)
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
-parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
+parser.add_argument(
+    "--threads", default=1, type=int, help="Maximum number of threads to use."
+)
 # If you add more arguments, ReCodEx will keep them with your default values.
 
 
@@ -35,7 +44,11 @@ def __init__(self, args: argparse.Namespace) -> None:
         self._args = args
 
         self._W1 = keras.Variable(
-            keras.random.normal([MNIST.W * MNIST.H * MNIST.C, args.hidden_layer], stddev=0.1, seed=args.seed),
+            keras.random.normal(
+                [MNIST.W * MNIST.H * MNIST.C, args.hidden_layer],
+                stddev=0.1,
+                seed=args.seed,
+            ),
             trainable=True,
         )
         self._b1 = keras.Variable(keras.ops.zeros([args.hidden_layer]), trainable=True)
@@ -44,7 +57,12 @@ def __init__(self, args: argparse.Namespace) -> None:
         # - _W2, which is a trainable variable of size `[args.hidden_layer, MNIST.LABELS]`,
         #   initialized to `keras.random.normal` value `with stddev=0.1` and `seed=args.seed`,
         # - _b2, which is a trainable variable of size `[MNIST.LABELS]` initialized to zeros
-        self._W2 = keras.Variable(keras.random.normal([args.hidden_layer, MNIST.LABELS], stddev=0.1, seed=args.seed), trainable=True)
+        self._W2 = keras.Variable(
+            keras.random.normal(
+                [args.hidden_layer, MNIST.LABELS], stddev=0.1, seed=args.seed
+            ),
+            trainable=True,
+        )
 
         self._b2 = keras.Variable(keras.ops.zeros([MNIST.LABELS]), trainable=True)
 
@@ -67,7 +85,9 @@ def predict(self, inputs: torch.Tensor) -> torch.Tensor:
         # - then multiply it by `self._W1` and then add `self._b1`
         # - apply `keras.ops.tanh`
 
-        hidden_layer_output = keras.ops.tanh(keras.ops.matmul(reshaped_inputs, self._W1) + self._b1)
+        hidden_layer_output = keras.ops.tanh(
+            keras.ops.matmul(reshaped_inputs, self._W1) + self._b1
+        )
 
         # - multiply the result by `self._W2` and then add `self._b2`
 
@@ -93,11 +113,16 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None:
             #   - either use `keras.ops.one_hot` to obtain one-hot encoded gold labels,
             #   - or suitably use `keras.ops.take_along_axis` to "index" the predicted probabilities.
             # - Finally, compute the average across the batch examples.
-            loss = keras.ops.mean(keras.ops.categorical_crossentropy(keras.ops.one_hot(batch["labels"], MNIST.LABELS), probabilities))
+            loss = keras.ops.mean(
+                keras.ops.categorical_crossentropy(
+                    keras.ops.one_hot(batch["labels"], MNIST.LABELS), probabilities
+                )
+            )
             # We create a list of all variables. Note that a `keras.Model/Layer` automatically
             # tracks owned variables, so we could also use `self.trainable_variables`
             # (or even `self.variables`, which is useful for loading/saving).
             variables = [self._W1, self._b1, self._W2, self._b2]
+            # print("w1, b1, w2, b2:", self._W1.shape, self._b1.shape, self._W2.shape, self._b2.shape)
 
             # Compute the gradient of the loss with respect to variables using
             # backpropagation algorithm by
@@ -107,6 +132,7 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None:
             loss.backward()
 
             gradients = [variable.value.grad for variable in variables]
+            # print("gradients:", gradients)
             with torch.no_grad():
                 for variable, gradient in zip(variables, gradients):
                     # Perform the SGD update with learning rate `self._args.learning_rate`
@@ -126,7 +152,6 @@ def evaluate(self, dataset: MNIST.Dataset) -> float:
             # Evaluate how many batch examples were predicted
             # correctly and increase `correct` variable accordingly.
             correct += np.sum(np.argmax(probabilities, axis=-1) == batch["labels"])
-
         return correct / dataset.size
 
 
@@ -138,11 +163,19 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
         torch.set_num_interop_threads(args.threads)
 
     # Create logdir name
-    args.logdir = os.path.join("logs", "{}-{}-{}".format(
-        os.path.basename(globals().get("__file__", "notebook")),
-        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
-        ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items())))
-    ))
+    args.logdir = os.path.join(
+        "logs",
+        "{}-{}-{}".format(
+            os.path.basename(globals().get("__file__", "notebook")),
+            datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
+            ",".join(
+                (
+                    "{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v)
+                    for k, v in sorted(vars(args).items())
+                )
+            ),
+        ),
+    )
 
     # Load data
     mnist = MNIST()
@@ -159,12 +192,18 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
 
         # Evaluate the dev data using `evaluate` on `mnist.dev` dataset
         accuracy = model.evaluate(mnist.dev)
-        print("Dev accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * accuracy), flush=True)
+        print(
+            "Dev accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * accuracy),
+            flush=True,
+        )
         writer.add_scalar("dev/accuracy", 100 * accuracy, epoch + 1)
 
     # Evaluate the test data using `evaluate` on `mnist.test` dataset
     test_accuracy = model.evaluate(mnist.test)
-    print("Test accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * test_accuracy), flush=True)
+    print(
+        "Test accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * test_accuracy),
+        flush=True,
+    )
     writer.add_scalar("test/accuracy", 100 * test_accuracy, epoch + 1)
 
     # Return dev and test accuracies for ReCodEx to validate.

From 25435047934686618e9f374082ec2d2b84f657a1 Mon Sep 17 00:00:00 2001
From: lizawang <56673986+lizawang@users.noreply.github.com>
Date: Tue, 12 Mar 2024 10:59:02 +0100
Subject: [PATCH 38/64] update

---
 labs/02/gym_cartpole.py   | 6 ++++--
 labs/02/mnist_training.py | 8 +++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py
index 5b2eafc..c55fb14 100644
--- a/labs/02/gym_cartpole.py
+++ b/labs/02/gym_cartpole.py
@@ -8,6 +8,7 @@
 import keras
 import numpy as np
 import torch
+from collections import Counter
 
 # Jonas Glerup Røssum <jglr@itu.dk>
 # 31a0a96a-c590-4486-b194-f72765b2ce25
@@ -105,7 +106,7 @@ def main(args: argparse.Namespace) -> keras.Model | None:
         # Load the data
         data = np.loadtxt("gym_cartpole_data.txt")
         observations, labels = data[:, :-1], data[:, -1].astype(np.int32)
-        print("data shape:", observations.shape, "label shape:", labels.shape)
+        print("data shape:", observations.shape, "label shape:", Counter(labels))
 
 
 
@@ -137,7 +138,8 @@ def main(args: argparse.Namespace) -> keras.Model | None:
         )
 
         tb_callback = TorchTensorBoardCallback(args.logdir)
-        model.fit(observations, labels, batch_size=args.batch_size, validation_split=0.1, epochs=args.epochs, callbacks=[tb_callback])
+        callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
+        model.fit(observations, labels, batch_size=args.batch_size, validation_split=0.1, epochs=args.epochs, callbacks=[tb_callback, callback])
 
         # Save the model, without the optimizer state.
         model.save(args.model)
diff --git a/labs/02/mnist_training.py b/labs/02/mnist_training.py
index b9105c6..5c7e14a 100644
--- a/labs/02/mnist_training.py
+++ b/labs/02/mnist_training.py
@@ -118,9 +118,11 @@ def main(args: argparse.Namespace) -> dict[str, float]:
             if decay == "linear":
                 lr = keras.optimizers.schedules.PolynomialDecay(initial_learning_rate=init_lr, decay_steps=steps, end_learning_rate=final_lr)
             elif decay == "exponential":
-                lr = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=init_lr, decay_steps=steps, decay_rate=0.90)
+                decay_rate = round(final_lr/init_lr,2)
+                lr = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=init_lr, decay_steps=steps, decay_rate=decay_rate)
             elif decay == "cosine":
-                lr = keras.optimizers.schedules.CosineDecay(initial_learning_rate=init_lr, decay_steps=steps, alpha=0.1)
+                alpha = round(final_lr/init_lr,2)
+                lr = keras.optimizers.schedules.CosineDecay(initial_learning_rate=init_lr, decay_steps=steps, alpha=alpha)
 
     if args.optimizer == 'SGD':
         if momen:
@@ -148,7 +150,7 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     model.summary()
 
     if decay:        
-        print("Next learning rate to be used:", model.optimizer.learning_rate)
+        print("Next learning rate to be used:", model.optimizer.learning_rate.item())
 
     # Return development metrics for ReCodEx to validate.
     return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")}

From a76ee8095932fa261634413dff590b037524d871 Mon Sep 17 00:00:00 2001
From: lizawang <56673986+lizawang@users.noreply.github.com>
Date: Tue, 12 Mar 2024 11:05:20 +0100
Subject: [PATCH 39/64] update

---
 labs/02/sgd_manual.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/labs/02/sgd_manual.py b/labs/02/sgd_manual.py
index 422d3e9..0144a60 100644
--- a/labs/02/sgd_manual.py
+++ b/labs/02/sgd_manual.py
@@ -39,7 +39,9 @@ def __init__(self, args: argparse.Namespace) -> None:
         # - _W2, which is a trainable variable of size `[args.hidden_layer, MNIST.LABELS]`,
         #   initialized to `keras.random.normal` value `with stddev=0.1` and `seed=args.seed`,
         # - _b2, which is a trainable variable of size `[MNIST.LABELS]` initialized to zeros
-        ...
+        self._W2 = keras.Variable(keras.random.normal([args.hidden_layer, MNIST.LABELS], stddev=0.1, seed=args.seed),
+            trainable=True)
+        self._b2 = keras.Variable(keras.ops.zeros([MNIST.LABELS]), trainable=True)
 
     def predict(self, inputs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         # TODO(sgd_backpropagation): Define the computation of the network. Notably:
@@ -56,7 +58,14 @@ def predict(self, inputs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, tor
         # TODO: In order to support manual gradient computation, you should
         # return not only the output layer, but also the hidden layer after applying
         # tanh, and the input layer after reshaping.
-        return ..., ..., ...
+        input = keras.ops.cast(inputs, dtype="float32")
+        input = torch.div(input, 255)
+        input = input.reshape([input.shape[0], -1])
+        output = keras.ops.matmul(input,self._W1) + self._b1
+        output = keras.ops.tanh(output)
+        output = keras.ops.matmul(output,self._W2) + self._b2
+        output = keras.ops.softmax(output)
+        return output
 
     def train_epoch(self, dataset: MNIST.Dataset) -> None:
         for batch in dataset.batches(self._args.batch_size):
@@ -72,7 +81,7 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None:
             #
             # Compute the input layer, hidden layer and output layer
             # of the batch images using `self.predict`.
-
+            probabilities = self.predict(torch.tensor(batch['images']))
             # TODO: Compute the gradient of the loss with respect to all
             # variables. Note that the loss is computed as in `sgd_backpropagation`:
             # - For every batch example, the loss is the categorical crossentropy of the
@@ -80,7 +89,6 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None:
             #   - either use `keras.ops.one_hot` to obtain one-hot encoded gold labels,
             #   - or suitably use `keras.ops.take_along_axis` to "index" the predicted probabilities.
             # - Finally, compute the average across the batch examples.
-            #
             # During the gradient computation, you will need to compute
             # a batched version of a so-called outer product
             #   `C[a, i, j] = A[a, i] * B[a, j]`,
@@ -88,6 +96,7 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None:
             #   `A[:, :, np.newaxis] * B[:, np.newaxis, :]`
             # or with
             #   `keras.ops.einsum("ai,aj->aij", A, B)`.
+            loss = torch.mean(keras.ops.categorical_crossentropy(keras.ops.one_hot(batch['labels'], num_classes=MNIST.LABELS), probabilities))
 
             # TODO(sgd_backpropagation): Perform the SGD update with learning rate `self._args.learning_rate`
             # for the variable and computed gradient. You can modify the

From 8f66c587b28322f9b47e0fd9ffb8f1733902765e Mon Sep 17 00:00:00 2001
From: lizawang <56673986+lizawang@users.noreply.github.com>
Date: Tue, 12 Mar 2024 14:58:51 +0100
Subject: [PATCH 40/64] third commit

---
 labs/02/gym_cartpole.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py
index c55fb14..d9a378b 100644
--- a/labs/02/gym_cartpole.py
+++ b/labs/02/gym_cartpole.py
@@ -138,8 +138,8 @@ def main(args: argparse.Namespace) -> keras.Model | None:
         )
 
         tb_callback = TorchTensorBoardCallback(args.logdir)
-        callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
-        model.fit(observations, labels, batch_size=args.batch_size, validation_split=0.1, epochs=args.epochs, callbacks=[tb_callback, callback])
+        labels = keras.ops.one_hot(labels,num_classes=2)
+        model.fit(observations, labels, batch_size=args.batch_size, epochs=args.epochs, callbacks=[tb_callback, callback])
 
         # Save the model, without the optimizer state.
         model.save(args.model)

From 986032a279fcb7e020d92841cea0617f531aff4a Mon Sep 17 00:00:00 2001
From: lizawang <56673986+lizawang@users.noreply.github.com>
Date: Tue, 12 Mar 2024 16:02:04 +0100
Subject: [PATCH 41/64] final

---
 labs/02/gym_cartpole.py   | 3 +--
 labs/02/mnist_training.py | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py
index d9a378b..b708b63 100644
--- a/labs/02/gym_cartpole.py
+++ b/labs/02/gym_cartpole.py
@@ -106,7 +106,6 @@ def main(args: argparse.Namespace) -> keras.Model | None:
         # Load the data
         data = np.loadtxt("gym_cartpole_data.txt")
         observations, labels = data[:, :-1], data[:, -1].astype(np.int32)
-        print("data shape:", observations.shape, "label shape:", Counter(labels))
 
 
 
@@ -139,7 +138,7 @@ def main(args: argparse.Namespace) -> keras.Model | None:
 
         tb_callback = TorchTensorBoardCallback(args.logdir)
         labels = keras.ops.one_hot(labels,num_classes=2)
-        model.fit(observations, labels, batch_size=args.batch_size, epochs=args.epochs, callbacks=[tb_callback, callback])
+        model.fit(observations, labels, batch_size=args.batch_size, epochs=args.epochs, callbacks=[tb_callback])
 
         # Save the model, without the optimizer state.
         model.save(args.model)
diff --git a/labs/02/mnist_training.py b/labs/02/mnist_training.py
index 5c7e14a..4649bae 100644
--- a/labs/02/mnist_training.py
+++ b/labs/02/mnist_training.py
@@ -118,10 +118,10 @@ def main(args: argparse.Namespace) -> dict[str, float]:
             if decay == "linear":
                 lr = keras.optimizers.schedules.PolynomialDecay(initial_learning_rate=init_lr, decay_steps=steps, end_learning_rate=final_lr)
             elif decay == "exponential":
-                decay_rate = round(final_lr/init_lr,2)
+                decay_rate = final_lr/init_lr
                 lr = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=init_lr, decay_steps=steps, decay_rate=decay_rate)
             elif decay == "cosine":
-                alpha = round(final_lr/init_lr,2)
+                alpha = final_lr/init_lr
                 lr = keras.optimizers.schedules.CosineDecay(initial_learning_rate=init_lr, decay_steps=steps, alpha=alpha)
 
     if args.optimizer == 'SGD':

From 82267643aada5de3da86377e64095163f0857d73 Mon Sep 17 00:00:00 2001
From: lizawang <56673986+lizawang@users.noreply.github.com>
Date: Tue, 12 Mar 2024 21:28:48 +0100
Subject: [PATCH 42/64] final

---
 labs/02/mnist_training.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/labs/02/mnist_training.py b/labs/02/mnist_training.py
index 4649bae..116ae98 100644
--- a/labs/02/mnist_training.py
+++ b/labs/02/mnist_training.py
@@ -11,6 +11,11 @@
 
 from mnist import MNIST
 
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
+
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
@@ -108,12 +113,12 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     #   should be `args.learning_rate_final`.
 
     optimizer = None
-    lr, momen, decay, final_lr = args.learning_rate, args.momentum, args.decay, args.learning_rate_final
+    lr, momen, decay, final_lr, epochs = args.learning_rate, args.momentum, args.decay, args.learning_rate_final, args.epochs
     if decay:
         if not final_lr:
             print("Please define a final learning rate!")
         else:    
-            steps = mnist.train.size/args.batch_size
+            steps = mnist.train.size/args.batch_size*epochs
             init_lr = args.learning_rate
             if decay == "linear":
                 lr = keras.optimizers.schedules.PolynomialDecay(initial_learning_rate=init_lr, decay_steps=steps, end_learning_rate=final_lr)

From 7af654e5cff22559f9f46b333b8ac1176907882d Mon Sep 17 00:00:00 2001
From: lizawang <56673986+lizawang@users.noreply.github.com>
Date: Sat, 16 Mar 2024 16:23:39 +0100
Subject: [PATCH 43/64] fixed

---
 labs/02/sgd_manual.py | 48 +++++++++++++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/labs/02/sgd_manual.py b/labs/02/sgd_manual.py
index 0144a60..f023328 100644
--- a/labs/02/sgd_manual.py
+++ b/labs/02/sgd_manual.py
@@ -12,6 +12,11 @@
 
 from mnist import MNIST
 
+# Jonas Glerup Røssum <jglr@itu.dk>
+# 31a0a96a-c590-4486-b194-f72765b2ce25
+# Xiao Wang <xiao.wang@student.uni-tuebingen.de>
+# 91d4d1d7-b800-4765-96b9-df098ac36a66
+
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
@@ -61,11 +66,11 @@ def predict(self, inputs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, tor
         input = keras.ops.cast(inputs, dtype="float32")
         input = torch.div(input, 255)
         input = input.reshape([input.shape[0], -1])
-        output = keras.ops.matmul(input,self._W1) + self._b1
-        output = keras.ops.tanh(output)
-        output = keras.ops.matmul(output,self._W2) + self._b2
-        output = keras.ops.softmax(output)
-        return output
+        hidden_input = keras.ops.matmul(input,self._W1) + self._b1
+        hidden_output = keras.ops.tanh(hidden_input)
+        sm_input = keras.ops.matmul(hidden_output,self._W2) + self._b2
+        output = keras.ops.softmax(sm_input)
+        return input, hidden_output, output
 
     def train_epoch(self, dataset: MNIST.Dataset) -> None:
         for batch in dataset.batches(self._args.batch_size):
@@ -81,7 +86,7 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None:
             #
             # Compute the input layer, hidden layer and output layer
             # of the batch images using `self.predict`.
-            probabilities = self.predict(torch.tensor(batch['images']))
+            input_layer, hidden_layer, probabilities = self.predict(torch.tensor(batch['images']))
             # TODO: Compute the gradient of the loss with respect to all
             # variables. Note that the loss is computed as in `sgd_backpropagation`:
             # - For every batch example, the loss is the categorical crossentropy of the
@@ -96,13 +101,30 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None:
             #   `A[:, :, np.newaxis] * B[:, np.newaxis, :]`
             # or with
             #   `keras.ops.einsum("ai,aj->aij", A, B)`.
-            loss = torch.mean(keras.ops.categorical_crossentropy(keras.ops.one_hot(batch['labels'], num_classes=MNIST.LABELS), probabilities))
+            gold_labels = keras.ops.one_hot(batch['labels'], num_classes=MNIST.LABELS)
+            loss = torch.mean(keras.ops.categorical_crossentropy(gold_labels, probabilities))
+            
+            gd_loss = probabilities - gold_labels
+            gd_b2 = gd_loss
+            #print("loss gradient, hidden_layer, input", gd_b2.shape, hidden_layer.shape, input_layer.shape)
+            gd_w2 = keras.ops.einsum("ai,aj->aij", hidden_layer, gd_loss)
+            gd_h = keras.ops.matmul(gd_loss, keras.ops.transpose(self._W2))
+            hidden_input = keras.ops.matmul(input_layer,self._W1) + self._b1
+            gd_h_i = gd_h*(1-keras.ops.power(keras.ops.tanh(hidden_input), 2))
+            gd_b1 = gd_h_i
+            gd_w1 = keras.ops.einsum("ai,aj->aij", input_layer, gd_h_i)
+            #print("gd_w2, gd_w1, gd_b2, gd_b1:", gd_w2.shape, gd_w1.shape, gd_b2.shape, gd_b1.shape)
 
             # TODO(sgd_backpropagation): Perform the SGD update with learning rate `self._args.learning_rate`
             # for the variable and computed gradient. You can modify the
             # variable value with `variable.assign` or in this case the more
             # efficient `variable.assign_sub`.
-            ...
+            variables = [self._W1, self._b1, self._W2, self._b2]
+            gradients = [gd_w1, gd_b1, gd_w2, gd_b2]
+            with torch.no_grad():
+                for variable, gradient in zip(variables, gradients):
+                    variable.assign_sub(self._args.learning_rate*keras.ops.mean(gradient, axis=0))
+
 
     def evaluate(self, dataset: MNIST.Dataset) -> float:
         # Compute the accuracy of the model prediction
@@ -110,11 +132,11 @@ def evaluate(self, dataset: MNIST.Dataset) -> float:
         for batch in dataset.batches(self._args.batch_size):
             # TODO: Compute the probabilities of the batch images using `self.predict`
             # and convert them to Numpy with `keras.ops.convert_to_numpy`.
-            probabilities = ...
+            probabilities = keras.ops.convert_to_numpy(self.predict(torch.tensor(batch['images']))[2])
 
             # TODO(sgd_backpropagation): Evaluate how many batch examples were predicted
             # correctly and increase `correct` variable accordingly.
-            correct += ...
+            correct += np.sum(np.argmax(probabilities, axis=-1) == batch["labels"])
 
         return correct / dataset.size
 
@@ -144,14 +166,14 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
 
     for epoch in range(args.epochs):
         # TODO: Run the `train_epoch` with `mnist.train` dataset
-
+        model.train_epoch(mnist.train)
         # TODO: Evaluate the dev data using `evaluate` on `mnist.dev` dataset
-        accuracy = ...
+        accuracy = model.evaluate(mnist.dev)
         print("Dev accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * accuracy), flush=True)
         writer.add_scalar("dev/accuracy", 100 * accuracy, epoch + 1)
 
     # TODO: Evaluate the test data using `evaluate` on `mnist.test` dataset
-    test_accuracy = ...
+    test_accuracy = model.evaluate(mnist.test)
     print("Test accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * test_accuracy), flush=True)
     writer.add_scalar("test/accuracy", 100 * test_accuracy, epoch + 1)
 

From aaa30ef49d28208b0f14b3f8a760c21467a47e6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 18 Mar 2024 13:06:43 +0100
Subject: [PATCH 44/64] Remove unnecessary entries from .gitignore

---
 .gitignore | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0fb63b8..f8e2b15 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
-**/.venv/Lib
-**/.venv/Scripts
-**/.venv/share
+.venv/
+logs/
+mnist.npz

From ed5c3be212e8244ebc04aa9a4d0b6dc33c388bc9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Sun, 3 Mar 2024 15:28:32 +0100
Subject: [PATCH 45/64] Solve numpy_entropy

---
 labs/01/numpy_entropy.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/labs/01/numpy_entropy.py b/labs/01/numpy_entropy.py
index 4203a24..819b6b0 100644
--- a/labs/01/numpy_entropy.py
+++ b/labs/01/numpy_entropy.py
@@ -21,6 +21,7 @@ def main(args: argparse.Namespace) -> tuple[float, float, float]:
     # Load data distribution, each line containing a datapoint -- a string.
     data_map = {}
 
+    # Load data distribution, each line containing a datapoint -- a string.
     with open(args.data_path, "r", encoding="utf-8") as data:
         for line in data:
             line = line.rstrip("\n")

From 6659f8e26e01e8b5bf15ef4c7ad03ebc9ac7c07c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 12:48:47 +0100
Subject: [PATCH 46/64] Fix reshape and compute covariance matrix in
 pca_first.keras.py and pca_first.py

---
 labs/01/pca_first.py | 2 +-
 labs/01/test.ps1     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py
index deecf06..688e89c 100644
--- a/labs/01/pca_first.py
+++ b/labs/01/pca_first.py
@@ -58,7 +58,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
     # TODO: Compute the total variance, which is the sum of the diagonal
     # of the covariance matrix. To extract the diagonal use `torch.diagonal`,
     # and to sum a tensor use `torch.sum`.
-    total_variance = torch.sum(torch.diagonal(cov)).item()
+    total_variance = torch.diagonal(cov).sum()
 
     # TODO: Now run `args.iterations` of the power iteration algorithm.
     # Start with a vector of `cov.shape[0]` ones of type `torch.float32` using `torch.ones`.
diff --git a/labs/01/test.ps1 b/labs/01/test.ps1
index 1a8e7cd..75ddf37 100644
--- a/labs/01/test.ps1
+++ b/labs/01/test.ps1
@@ -1,4 +1,4 @@
 python3 numpy_entropy.py --data_path numpy_entropy_data_1.txt --model_path numpy_entropy_model_1.txt
 python3 numpy_entropy.py --data_path numpy_entropy_data_2.txt --model_path numpy_entropy_model_2.txt
 python3 numpy_entropy.py --data_path numpy_entropy_data_3.txt --model_path numpy_entropy_model_3.txt
-python3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt
+spython3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt

From ded51d8ab940cff8e35c785d1ce73b7b63b49080 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 12:48:55 +0100
Subject: [PATCH 47/64] Add .gitignore, pull.ps1, and setup.ps1 files

---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index f8e2b15..309f474 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
-.venv/
+.venv
 logs/
 mnist.npz

From ae7c7a22f0c1991827d8c3b46306b2adca782183 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 4 Mar 2024 19:15:48 +0100
Subject: [PATCH 48/64] Add Lisa's solution

---
 labs/01/pca_first.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py
index 688e89c..deecf06 100644
--- a/labs/01/pca_first.py
+++ b/labs/01/pca_first.py
@@ -58,7 +58,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]:
     # TODO: Compute the total variance, which is the sum of the diagonal
     # of the covariance matrix. To extract the diagonal use `torch.diagonal`,
     # and to sum a tensor use `torch.sum`.
-    total_variance = torch.diagonal(cov).sum()
+    total_variance = torch.sum(torch.diagonal(cov)).item()
 
     # TODO: Now run `args.iterations` of the power iteration algorithm.
     # Start with a vector of `cov.shape[0]` ones of type `torch.float32` using `torch.ones`.

From 28b0519d0209562c7c4a856ec0c4de568e737830 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 18 Mar 2024 20:08:12 +0100
Subject: [PATCH 49/64] Update .gitignore

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 309f474..917c1db 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
-.venv
+**/.venv/
 logs/
 mnist.npz
+*.zip

From 329b15497e5e465f1e1f28d339ebe1008a6be3ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 18 Mar 2024 19:52:00 +0100
Subject: [PATCH 50/64] Solve mnist_regularization

---
 labs/03/mnist_regularization.ps1 |  24 +++++++
 labs/03/mnist_regularization.py  | 120 +++++++++++++++++++++++--------
 2 files changed, 114 insertions(+), 30 deletions(-)
 create mode 100644 labs/03/mnist_regularization.ps1

diff --git a/labs/03/mnist_regularization.ps1 b/labs/03/mnist_regularization.ps1
new file mode 100644
index 0000000..2a61e88
--- /dev/null
+++ b/labs/03/mnist_regularization.ps1
@@ -0,0 +1,24 @@
+# Run script from root repo directory
+
+.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --dropout=0.3
+.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --dropout=0.5 --hidden_layers 300 300
+.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --weight_decay=0.1
+.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --weight_decay=0.3
+.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --label_smoothing=0.1
+.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --label_smoothing=0.3
+
+# Expected
+# accuracy: 0.5981 - loss: 1.2688 - val_accuracy: 0.9174 - val_loss: 0.3051
+# accuracy: 0.3429 - loss: 1.9163 - val_accuracy: 0.8826 - val_loss: 0.4937
+# accuracy: 0.7014 - loss: 1.0412 - val_accuracy: 0.9236 - val_loss: 0.2776
+# accuracy: 0.7006 - loss: 1.0429 - val_accuracy: 0.9232 - val_loss: 0.2801
+# accuracy: 0.7102 - loss: 1.3015 - val_accuracy: 0.9276 - val_loss: 0.7656
+# accuracy: 0.7113 - loss: 1.6854 - val_accuracy: 0.9332 - val_loss: 1.3709
+
+# Actual
+# accuracy: 0.6178 - loss: 1.2374 - val_accuracy: 0.9164 - val_loss: 0.3045
+# accuracy: 0.3412 - loss: 1.8919 - val_accuracy: 0.8818 - val_loss: 0.4794
+# accuracy: 0.6948 - loss: 1.0394 - val_accuracy: 0.9186 - val_loss: 0.2859
+# accuracy: 0.6947 - loss: 1.0410 - val_accuracy: 0.9184 - val_loss: 0.2885
+# accuracy: 0.6996 - loss: 1.3013 - val_accuracy: 0.9228 - val_loss: 0.7735
+# accuracy: 0.7102 - loss: 1.6879 - val_accuracy: 0.9284 - val_loss: 1.3739
diff --git a/labs/03/mnist_regularization.py b/labs/03/mnist_regularization.py
index 769d7a4..0b2e5a2 100644
--- a/labs/03/mnist_regularization.py
+++ b/labs/03/mnist_regularization.py
@@ -3,7 +3,10 @@
 import datetime
 import os
 import re
-os.environ.setdefault("KERAS_BACKEND", "torch")  # Use PyTorch backend unless specified otherwise
+
+os.environ.setdefault(
+    "KERAS_BACKEND", "torch"
+)  # Use PyTorch backend unless specified otherwise
 
 import keras
 import torch
@@ -15,12 +18,20 @@
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
 parser.add_argument("--dropout", default=0, type=float, help="Dropout regularization.")
 parser.add_argument("--epochs", default=30, type=int, help="Number of epochs.")
-parser.add_argument("--hidden_layers", default=[400], nargs="*", type=int, help="Hidden layer sizes.")
+parser.add_argument(
+    "--hidden_layers", default=[400], nargs="*", type=int, help="Hidden layer sizes."
+)
 parser.add_argument("--label_smoothing", default=0, type=float, help="Label smoothing.")
-parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.")
+parser.add_argument(
+    "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx."
+)
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
-parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
-parser.add_argument("--weight_decay", default=0, type=float, help="Weight decay strength.")
+parser.add_argument(
+    "--threads", default=1, type=int, help="Maximum number of threads to use."
+)
+parser.add_argument(
+    "--weight_decay", default=0, type=float, help="Weight decay strength."
+)
 # If you add more arguments, ReCodEx will keep them with your default values.
 
 
@@ -32,7 +43,10 @@ def __init__(self, path):
     def writer(self, writer):
         if writer not in self._writers:
             import torch.utils.tensorboard
-            self._writers[writer] = torch.utils.tensorboard.SummaryWriter(os.path.join(self._path, writer))
+
+            self._writers[writer] = torch.utils.tensorboard.SummaryWriter(
+                os.path.join(self._path, writer)
+            )
         return self._writers[writer]
 
     def add_logs(self, writer, logs, step):
@@ -43,10 +57,24 @@ def add_logs(self, writer, logs, step):
 
     def on_epoch_end(self, epoch, logs=None):
         if logs:
-            if isinstance(getattr(self.model, "optimizer", None), keras.optimizers.Optimizer):
-                logs = logs | {"learning_rate": keras.ops.convert_to_numpy(self.model.optimizer.learning_rate)}
-            self.add_logs("train", {k: v for k, v in logs.items() if not k.startswith("val_")}, epoch + 1)
-            self.add_logs("val", {k[4:]: v for k, v in logs.items() if k.startswith("val_")}, epoch + 1)
+            if isinstance(
+                getattr(self.model, "optimizer", None), keras.optimizers.Optimizer
+            ):
+                logs = logs | {
+                    "learning_rate": keras.ops.convert_to_numpy(
+                        self.model.optimizer.learning_rate
+                    )
+                }
+            self.add_logs(
+                "train",
+                {k: v for k, v in logs.items() if not k.startswith("val_")},
+                epoch + 1,
+            )
+            self.add_logs(
+                "val",
+                {k[4:]: v for k, v in logs.items() if k.startswith("val_")},
+                epoch + 1,
+            )
 
 
 def main(args: argparse.Namespace) -> dict[str, float]:
@@ -57,16 +85,24 @@ def main(args: argparse.Namespace) -> dict[str, float]:
         torch.set_num_interop_threads(args.threads)
 
     # Create logdir name
-    args.logdir = os.path.join("logs", "{}-{}-{}".format(
-        os.path.basename(globals().get("__file__", "notebook")),
-        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
-        ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items())))
-    ))
+    args.logdir = os.path.join(
+        "logs",
+        "{}-{}-{}".format(
+            os.path.basename(globals().get("__file__", "notebook")),
+            datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
+            ",".join(
+                (
+                    "{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v)
+                    for k, v in sorted(vars(args).items())
+                )
+            ),
+        ),
+    )
 
     # Load data
     mnist = MNIST(size={"train": 5_000})
 
-    # TODO: Incorporate dropout to the model below. Namely, add
+    # Incorporate dropout to the model below. Namely, add
     #   a `keras.layers.Dropout` layer with `args.dropout` rate after
     #   the `Flatten` layer and after each `Dense` hidden layer (but not after
     #   the output `Dense` layer).
@@ -75,12 +111,14 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     model.add(keras.layers.Rescaling(1 / 255))
     model.add(keras.layers.Flatten())
     model.add(keras.layers.Dropout(args.dropout))
+
     for hidden_layer in args.hidden_layers:
         model.add(keras.layers.Dense(hidden_layer, activation="relu"))
-        model.add(keras.layers.Dropout(args.dropout))
+        model.add(keras.layers.Dropout(rate=args.dropout))
+
     model.add(keras.layers.Dense(MNIST.LABELS, activation="softmax"))
 
-    # TODO: Implement label smoothing with the given `args.label_smoothing` strength.
+    # Implement label smoothing with the given `args.label_smoothing` strength.
     # You need to change the `SparseCategorical{Crossentropy,Accuracy}` to
     # `Categorical{Crossentropy,Accuracy}`, because `label_smoothing` is supported
     # only by the `CategoricalCrossentropy`. That means you also need to modify
@@ -88,30 +126,52 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     # of the gold class to a full categorical distribution (you can use either NumPy,
     # or there is a helper method also in the `keras.utils` module).
 
-    # TODO: Create a `keras.optimizers.AdamW`, using the default learning
+    # Create a `keras.optimizers.AdamW`, using the default learning
     # rate and a weight decay of strength `args.weight_decay`. Then call the
     # `exclude_from_weight_decay` method to specify that all variables with "bias"
     # in their name should not be decayed.
     optimizer = keras.optimizers.AdamW(weight_decay=args.weight_decay)
-    optimizer.exclude_from_weight_decay=[v for v in model.variables if "bias" in v.name]
-
-    model.compile(
-        optimizer=optimizer,
-        loss=keras.losses.CategoricalCrossentropy(label_smoothing=args.label_smoothing),
-        metrics=[keras.metrics.CategoricalAccuracy(name="accuracy")],
-    )
+    optimizer.exclude_from_weight_decay(var_names=["bias"])
+
+    s = args.label_smoothing != 0
+
+    if s:
+        model.compile(
+            optimizer=optimizer,
+            loss=keras.losses.CategoricalCrossentropy(label_smoothing=args.label_smoothing),
+            metrics=[keras.metrics.CategoricalAccuracy(name="accuracy")],
+        )
+    else:
+        model.compile(
+            optimizer=optimizer,
+            loss=keras.losses.SparseCategoricalCrossentropy(),
+            metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")],
+        )
 
     tb_callback = TorchTensorBoardCallback(args.logdir)
 
     logs = model.fit(
-        mnist.train.data["images"], keras.utils.to_categorical(mnist.train.data["labels"], num_classes=mnist.LABELS),
-        batch_size=args.batch_size, epochs=args.epochs,
-        validation_data=(mnist.dev.data["images"], keras.utils.to_categorical(mnist.dev.data["labels"], mnist.LABELS)),
+        mnist.train.data["images"],
+        keras.utils.to_categorical(
+            mnist.train.data["labels"], num_classes=mnist.LABELS
+        ) if s else mnist.train.data["labels"],
+        batch_size=args.batch_size,
+        epochs=args.epochs,
+        validation_data=(
+            mnist.dev.data["images"],
+            keras.utils.to_categorical(
+                mnist.dev.data["labels"], num_classes=mnist.LABELS
+            ) if s else mnist.dev.data["labels"],
+        ),
         callbacks=[tb_callback],
     )
 
     # Return development metrics for ReCodEx to validate.
-    return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")}
+    return {
+        metric: values[-1]
+        for metric, values in logs.history.items()
+        if metric.startswith("val_")
+    }
 
 
 if __name__ == "__main__":

From 93fd87bc16507fc48a118f636857e8e478026000 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Thu, 21 Mar 2024 20:00:52 +0100
Subject: [PATCH 51/64] Solve mnist_ensemble

---
 labs/03/mnist_ensemble.ps1 |  2 ++
 labs/03/mnist_ensemble.py  | 28 ++++++++++++++++++----------
 2 files changed, 20 insertions(+), 10 deletions(-)
 create mode 100644 labs/03/mnist_ensemble.ps1

diff --git a/labs/03/mnist_ensemble.ps1 b/labs/03/mnist_ensemble.ps1
new file mode 100644
index 0000000..526a6bd
--- /dev/null
+++ b/labs/03/mnist_ensemble.ps1
@@ -0,0 +1,2 @@
+python3 mnist_ensemble.py --epochs=1 --models=5
+python3 mnist_ensemble.py --epochs=1 --models=5 --hidden_layers=200
diff --git a/labs/03/mnist_ensemble.py b/labs/03/mnist_ensemble.py
index dc8f60d..93bb2eb 100644
--- a/labs/03/mnist_ensemble.py
+++ b/labs/03/mnist_ensemble.py
@@ -57,11 +57,11 @@ def main(args: argparse.Namespace) -> tuple[list[float], list[float]]:
     individual_accuracies, ensemble_accuracies = [], []
     model_predictions = []
     for model in range(args.models):
-        # TODO: Compute the accuracy on the dev set for the individual `models[model]`.
-        predictions = models[model].predict(mnist.dev.data["images"])
-        individual_accuracy = np.sum(np.argmax(predictions, axis=-1) == mnist.dev.data["labels"])/mnist.dev.size
+        # Compute the accuracy on the dev set for the individual `models[model]`.
+        individual_accuracy = models[model].evaluate(mnist.dev.data["images"], mnist.dev.data["labels"])[1]
+        print(individual_accuracy)
 
-        # TODO: Compute the accuracy on the dev set for the ensemble `models[0:model+1]`.
+        # Compute the accuracy on the dev set for the ensemble `models[0:model+1]`.
         #
         # Generally you can choose one of the following approaches:
         # 1) Use Keras Functional API and construct a `keras.Model` averaging the models
@@ -72,12 +72,20 @@ def main(args: argparse.Namespace) -> tuple[list[float], list[float]]:
         #    need to construct Keras ensemble model at all, and instead call `model.predict`
         #    on the individual models and average the results. To measure accuracy,
         #    either do it completely manually or use `keras.metrics.SparseCategoricalAccuracy`.
-        # Store the predictions
-        model_predictions.append(predictions)
-        pred_avg = np.mean(model_predictions, axis=0)
-        ensemble_accuracy = np.sum(np.argmax(pred_avg, axis=-1) == mnist.dev.data["labels"])/mnist.dev.size
-        # Store the accuracies   
-        individual_accuracies.append(individual_accuracy)  
+        inputs = keras.Input(shape=(MNIST.W, MNIST.H, MNIST.C))
+        ensemble_output = keras.layers.Average()([model(inputs) for model in models[0:model+1]])
+        ensemble_model = keras.Model(inputs=inputs, outputs=ensemble_output)
+
+        ensemble_model.compile(
+            optimizer=keras.optimizers.Adam(),
+            loss=keras.losses.SparseCategoricalCrossentropy(),
+            metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")],
+        )
+
+        ensemble_accuracy = ensemble_model.evaluate(mnist.dev.data["images"], mnist.dev.data["labels"])[1]
+
+        # Store the accuracies
+        individual_accuracies.append(individual_accuracy)
         ensemble_accuracies.append(ensemble_accuracy)
     return individual_accuracies, ensemble_accuracies
 

From 6ff7487a64b6cabc98ac357a4ebb3d31add0a7ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Thu, 21 Mar 2024 20:04:20 +0100
Subject: [PATCH 52/64] Broken uppercase

---
 labs/03/uppercase.py | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/labs/03/uppercase.py b/labs/03/uppercase.py
index c975e3f..c83d5c5 100644
--- a/labs/03/uppercase.py
+++ b/labs/03/uppercase.py
@@ -10,16 +10,16 @@
 
 from uppercase_data import UppercaseData
 
-# TODO: Set reasonable values for the hyperparameters, especially for
+# Set reasonable values for the hyperparameters, especially for
 # `alphabet_size`, `batch_size`, `epochs`, and `window`.
 # Also, you can set the number of threads to 0 to use all your CPU cores.
 parser = argparse.ArgumentParser()
-parser.add_argument("--alphabet_size", default=..., type=int, help="If given, use this many most frequent chars.")
-parser.add_argument("--batch_size", default=..., type=int, help="Batch size.")
-parser.add_argument("--epochs", default=..., type=int, help="Number of epochs.")
+parser.add_argument("--alphabet_size", default=70, type=int, help="If given, use this many most frequent chars.")
+parser.add_argument("--batch_size", default=1024, type=int, help="Batch size.")
+parser.add_argument("--epochs", default=2, type=int, help="Number of epochs.")
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
-parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
-parser.add_argument("--window", default=..., type=int, help="Window size to use.")
+parser.add_argument("--threads", default=0, type=int, help="Maximum number of threads to use.")
+parser.add_argument("--window", default=4, type=int, help="Window size to use.")
 
 
 class TorchTensorBoardCallback(keras.callbacks.Callback):
@@ -64,7 +64,7 @@ def main(args: argparse.Namespace) -> None:
     # Load data
     uppercase_data = UppercaseData(args.window, args.alphabet_size)
 
-    # TODO: Implement a suitable model, optionally including regularization, select
+    # Implement a suitable model, optionally including regularization, select
     # good hyperparameters and train the model.
     #
     # The inputs are _windows_ of fixed size (`args.window` characters on the left,
@@ -79,16 +79,34 @@ def main(args: argparse.Namespace) -> None:
     #   You can then flatten the one-hot encoded windows and follow with a dense layer.
     # - Alternatively, you can use `keras.layers.Embedding` (which is an efficient
     #   implementation of one-hot encoding followed by a Dense layer) and flatten afterwards.
-    model = ...
+    model = keras.Sequential([
+        keras.layers.InputLayer(shape=[2 * args.window + 1], dtype="int32"),
+        keras.layers.CategoryEncoding(len(uppercase_data.train.alphabet)),
+        keras.layers.Embedding(len(uppercase_data.train.alphabet), 8),
+
+        keras.layers.Flatten(),
+        keras.layers.Dense(64, activation='relu'),
+        keras.layers.Dropout(rate=0.5),
+        keras.layers.Dense(1, activation='sigmoid') # Sigmoid activation function for binary classification
+    ])
+
+    # Generate correctly capitalized test set.
+
+    predictions = model.predict(uppercase_data.test.data, batch_size=args.batch_size)
 
-    # TODO: Generate correctly capitalized test set.
     # Use `uppercase_data.test.text` as input, capitalize suitable characters,
     # and write the result to predictions_file (which is
     # `uppercase_test.txt` in the `args.logdir` directory).
     os.makedirs(args.logdir, exist_ok=True)
     with open(os.path.join(args.logdir, "uppercase_test.txt"), "w", encoding="utf-8") as predictions_file:
-        ...
-
+        new_text = ""
+        for pred, word in zip(predictions, uppercase_data.test.text):
+            if pred > .5:
+                new_word = word.upper()
+                new_text += new_word
+            else:
+                new_text
+        predictions_file.write(new_text)
 
 if __name__ == "__main__":
     args = parser.parse_args([] if "__file__" not in globals() else None)

From 8b211166a33219cc1e7286c09efff197383f5092 Mon Sep 17 00:00:00 2001
From: Aryan <82381989+akumm2k@users.noreply.github.com>
Date: Sat, 23 Mar 2024 20:27:55 +0100
Subject: [PATCH 53/64] Add missing torch suubmodule import to cifar10.py

---
 labs/04/cifar10.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/labs/04/cifar10.py b/labs/04/cifar10.py
index ec06755..6271172 100644
--- a/labs/04/cifar10.py
+++ b/labs/04/cifar10.py
@@ -5,6 +5,7 @@
 
 import numpy as np
 import torch
+import torch.utils.data
 
 
 class CIFAR10:

From 5d2751db8c2880ebb2064199b366c696d6059efe Mon Sep 17 00:00:00 2001
From: Milan Straka <milan@strakovi.com>
Date: Sat, 23 Mar 2024 23:52:18 +0100
Subject: [PATCH 54/64] Remove unnecessary annotation.

---
 labs/04/cifar10.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/labs/04/cifar10.py b/labs/04/cifar10.py
index 6271172..ec06755 100644
--- a/labs/04/cifar10.py
+++ b/labs/04/cifar10.py
@@ -5,7 +5,6 @@
 
 import numpy as np
 import torch
-import torch.utils.data
 
 
 class CIFAR10:

From 9679da6c38727e501b9ea16878a864f0af34203e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Thu, 21 Mar 2024 21:04:31 +0100
Subject: [PATCH 55/64] Solve mnist_cnn.py

---
 .vscode/settings.json |   3 +
 labs/04/mnist_cnn.py  | 150 ++++++++++++++++++++++++++++++++++--------
 2 files changed, 127 insertions(+), 26 deletions(-)
 create mode 100644 .vscode/settings.json

diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..dc3f727
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+  "python.analysis.typeCheckingMode": "basic"
+}
diff --git a/labs/04/mnist_cnn.py b/labs/04/mnist_cnn.py
index a3a91cd..1037888 100644
--- a/labs/04/mnist_cnn.py
+++ b/labs/04/mnist_cnn.py
@@ -1,7 +1,10 @@
 #!/usr/bin/env python3
 import argparse
 import os
-os.environ.setdefault("KERAS_BACKEND", "torch")  # Use PyTorch backend unless specified otherwise
+
+os.environ.setdefault(
+    "KERAS_BACKEND", "torch"
+)  # Use PyTorch backend unless specified otherwise
 
 import keras
 import torch
@@ -11,42 +14,103 @@
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
-parser.add_argument("--cnn", default=None, type=str, help="CNN architecture.")
+parser.add_argument(
+    "--cnn",
+    default="CB-16-5-2-same,M-3-2,F,H-100,D-0.5",
+    type=str,
+    help="CNN architecture.",
+)
 parser.add_argument("--epochs", default=10, type=int, help="Number of epochs.")
-parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.")
+parser.add_argument(
+    "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx."
+)
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
-parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
+parser.add_argument(
+    "--threads", default=1, type=int, help="Maximum number of threads to use."
+)
 # If you add more arguments, ReCodEx will keep them with your default values.
 
 
 class Model(keras.Model):
     def __init__(self, args: argparse.Namespace) -> None:
-        # TODO: Create the model. The template uses the functional API, but
+        # Create the model. The template uses the functional API, but
         # feel free to use subclassing if you want.
         inputs = keras.Input(shape=[MNIST.H, MNIST.W, MNIST.C])
         hidden = keras.layers.Rescaling(1 / 255)(inputs)
 
-        # TODO: Add CNN layers specified by `args.cnn`, which contains
-        # a comma-separated list of the following layers:
-        # - `C-filters-kernel_size-stride-padding`: Add a convolutional layer with ReLU
-        #   activation and specified number of filters, kernel size, stride and padding.
-        # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization.
-        #   In detail, start with a convolutional layer **without bias** and activation,
-        #   then add a batch normalization layer, and finally the ReLU activation.
-        # - `M-pool_size-stride`: Add max pooling with specified size and stride, using
-        #   the default "valid" padding.
-        # - `R-[layers]`: Add a residual connection. The `layers` contain a specification
-        #   of at least one convolutional layer (but not a recursive residual connection `R`).
-        #   The input to the `R` layer should be processed sequentially by `layers`, and the
-        #   produced output (after the ReLU nonlinearity of the last layer) should be added
-        #   to the input (of this `R` layer).
-        # - `F`: Flatten inputs. Must appear exactly once in the architecture.
-        # - `H-hidden_layer_size`: Add a dense layer with ReLU activation and the specified size.
-        # - `D-dropout_rate`: Apply dropout with the given dropout rate.
+        cnn_args = args.cnn.split(",")
+
+        for layer in cnn_args:
+            layer_type, *layer_args = layer.split("-")
+
+            # Add CNN layers specified by `args.cnn`, which contains
+            # a comma-separated list of the following layers:
+
+            # - `C-filters-kernel_size-stride-padding`: Add a convolutional layer with ReLU
+            #   activation and specified number of filters, kernel size, stride and padding.
+            if layer_type == "C":
+                hidden = self.createCnnLayer(hidden, *layer_args)
+                continue
+
+            # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization.
+            #   In detail, start with a convolutional layer **without bias** and activation,
+            #   then add a batch normalization layer, and finally the ReLU activation.
+            if layer_type == "CB":
+                hidden = self.createCnnBatchNormLayer(hidden, *layer_args)
+                continue
+
+            # - `M-pool_size-stride`: Add max pooling with specified size and stride, using
+            #   the default "valid" padding.
+            if layer_type == "M":
+                hidden = self.createMaxPoolingLayer(hidden, *layer_args)
+                continue
+
+            # - `R-[layers]`: Add a residual connection. The `layers` contain a specification
+            #   of at least one convolutional layer (but not a recursive residual connection `R`).
+            #   The input to the `R` layer should be processed sequentially by `layers`, and the
+            #   produced output (after the ReLU nonlinearity of the last layer) should be added
+            #   to the input (of this `R` layer).
+
+            if layer_type == "R":
+                layers = layer_args[1][1:-1].split(",")
+
+                for layer in layers:
+                    layer_type, *layer_args = layer.split("-")
+
+                    if layer_type == "C":
+                        hidden = self.createCnnLayer(hidden, *layer_args)
+                        continue
+
+                    if layer_type == "CB":
+                        hidden = self.createCnnBatchNormLayer(hidden, *layer_args)
+                        continue
+
+                continue
+
+            # - `F`: Flatten inputs. Must appear exactly once in the architecture.
+
+            if layer_type == "F":
+                hidden = keras.layers.Flatten()(hidden)
+                continue
+
+            # - `H-hidden_layer_size`: Add a dense layer with ReLU activation and the specified size.
+            if layer_type == "H":
+                hidden_layer_size = int(layer_args[0])
+                hidden = keras.layers.Dense(hidden_layer_size, activation="relu")(
+                    hidden
+                )
+                continue
+
+            # - `D-dropout_rate`: Apply dropout with the given dropout rate.
+
+            if layer_type == "D":
+                dropout_rate = float(layer_args[0])
+                hidden = keras.layers.Dropout(int(dropout_rate))(hidden)
+                continue
+
         # You can assume the resulting network is valid; it is fine to crash if it is not.
         #
         # Produce the results in the variable `hidden`.
-        hidden = ...
 
         # Add the final output layer
         outputs = keras.layers.Dense(MNIST.LABELS, activation="softmax")(hidden)
@@ -58,6 +122,34 @@ def __init__(self, args: argparse.Namespace) -> None:
             metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")],
         )
 
+    def createCnnLayer(_self, hidden, filters: str, kernel_size: str, stride: str, padding: str):
+        cnn_layer = keras.layers.Conv2D(
+            filters=int(filters),
+            kernel_size=(int(kernel_size), int(kernel_size)),
+            strides=(int(stride), int(stride)),
+            padding=padding,
+        )
+
+        activation_layer = keras.layers.Activation("relu")
+        return activation_layer(cnn_layer(hidden))
+
+    def createCnnBatchNormLayer(_self, hidden, filters: str, kernel_size: str, stride: str, padding: str):
+        cnn_layer = keras.layers.Conv2D(
+            filters=int(filters),
+            kernel_size=(int(kernel_size), int(kernel_size)),
+            strides=(int(stride), int(stride)),
+            padding=padding,
+        )
+        batch_norm = keras.layers.BatchNormalization()
+        activation_layer = keras.layers.Activation("relu")
+
+        return activation_layer(batch_norm(cnn_layer(hidden)))
+
+    def createMaxPoolingLayer(_self, hidden: str, pool_size: str, stride: str):
+        return keras.layers.MaxPooling2D(
+            pool_size=int(pool_size),
+            strides=(int(stride), int(stride)),
+        )(hidden)
 
 def main(args: argparse.Namespace) -> dict[str, float]:
     # Set the random seed and the number of threads.
@@ -73,13 +165,19 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     model = Model(args)
 
     logs = model.fit(
-        mnist.train.data["images"], mnist.train.data["labels"],
-        batch_size=args.batch_size, epochs=args.epochs,
+        mnist.train.data["images"],
+        mnist.train.data["labels"],
+        batch_size=args.batch_size,
+        epochs=args.epochs,
         validation_data=(mnist.dev.data["images"], mnist.dev.data["labels"]),
     )
 
     # Return development metrics for ReCodEx to validate.
-    return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")}
+    return {
+        metric: values[-1]
+        for metric, values in logs.history.items()
+        if metric.startswith("val_")
+    }
 
 
 if __name__ == "__main__":

From 19b1dda5a31936421a64e78ec17ca43cb31cfb8a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Sun, 24 Mar 2024 12:42:48 +0100
Subject: [PATCH 56/64] Fix dropout getting rounded

---
 labs/04/mnist_cnn.py | 84 +++++++++++++++++++++++---------------------
 1 file changed, 44 insertions(+), 40 deletions(-)

diff --git a/labs/04/mnist_cnn.py b/labs/04/mnist_cnn.py
index 1037888..44710f2 100644
--- a/labs/04/mnist_cnn.py
+++ b/labs/04/mnist_cnn.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 import argparse
 import os
+import re
 
 os.environ.setdefault(
     "KERAS_BACKEND", "torch"
@@ -38,7 +39,7 @@ def __init__(self, args: argparse.Namespace) -> None:
         inputs = keras.Input(shape=[MNIST.H, MNIST.W, MNIST.C])
         hidden = keras.layers.Rescaling(1 / 255)(inputs)
 
-        cnn_args = args.cnn.split(",")
+        cnn_args = re.split(r',(?![^\[]*\])', args.cnn)
 
         for layer in cnn_args:
             layer_type, *layer_args = layer.split("-")
@@ -49,20 +50,41 @@ def __init__(self, args: argparse.Namespace) -> None:
             # - `C-filters-kernel_size-stride-padding`: Add a convolutional layer with ReLU
             #   activation and specified number of filters, kernel size, stride and padding.
             if layer_type == "C":
-                hidden = self.createCnnLayer(hidden, *layer_args)
+                filters, kernel_size, stride, padding = layer_args
+                hidden = keras.layers.Conv2D(
+                    filters=int(filters),
+                    kernel_size=(int(kernel_size), int(kernel_size)),
+                    strides=(int(stride), int(stride)),
+                    padding=padding,
+                )(hidden)
+
+                hidden = keras.layers.Activation("relu")(hidden)
+
                 continue
 
             # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization.
             #   In detail, start with a convolutional layer **without bias** and activation,
             #   then add a batch normalization layer, and finally the ReLU activation.
             if layer_type == "CB":
-                hidden = self.createCnnBatchNormLayer(hidden, *layer_args)
+                filters, kernel_size, stride, padding = layer_args
+                hidden = keras.layers.Conv2D(
+                    filters=int(filters),
+                    kernel_size=[int(kernel_size), int(kernel_size)],
+                    strides=[int(stride), int(stride)],
+                    padding=padding,
+                )(hidden)
+                hidden = keras.layers.BatchNormalization()(hidden)
+                hidden = keras.layers.Activation("relu")(hidden)
                 continue
 
             # - `M-pool_size-stride`: Add max pooling with specified size and stride, using
             #   the default "valid" padding.
             if layer_type == "M":
-                hidden = self.createMaxPoolingLayer(hidden, *layer_args)
+                pool_size, stride = layer_args
+                hidden = keras.layers.MaxPooling2D(
+                    pool_size=int(pool_size),
+                    strides=[int(stride), int(stride)],
+                )(hidden)
                 continue
 
             # - `R-[layers]`: Add a residual connection. The `layers` contain a specification
@@ -70,25 +92,37 @@ def __init__(self, args: argparse.Namespace) -> None:
             #   The input to the `R` layer should be processed sequentially by `layers`, and the
             #   produced output (after the ReLU nonlinearity of the last layer) should be added
             #   to the input (of this `R` layer).
-
             if layer_type == "R":
-                layers = layer_args[1][1:-1].split(",")
+                layers = "-".join(layer_args)[1:-1].split(",")
 
                 for layer in layers:
                     layer_type, *layer_args = layer.split("-")
 
                     if layer_type == "C":
-                        hidden = self.createCnnLayer(hidden, *layer_args)
+                        filters, kernel_size, stride, padding = layer_args
+                        hidden = keras.layers.Conv2D(
+                            filters=int(filters),
+                            kernel_size=(int(kernel_size), int(kernel_size)),
+                            strides=(int(stride), int(stride)),
+                            padding=padding,
+                        )(hidden)
                         continue
 
                     if layer_type == "CB":
-                        hidden = self.createCnnBatchNormLayer(hidden, *layer_args)
+                        filters, kernel_size, stride, padding = layer_args
+                        hidden = keras.layers.Conv2D(
+                            filters=int(filters),
+                            kernel_size=(int(kernel_size), int(kernel_size)),
+                            strides=(int(stride), int(stride)),
+                            padding=padding,
+                        )(hidden)
+                        hidden = keras.layers.BatchNormalization()(hidden)
+                        hidden = keras.layers.Activation("relu")(hidden)
                         continue
 
                 continue
 
             # - `F`: Flatten inputs. Must appear exactly once in the architecture.
-
             if layer_type == "F":
                 hidden = keras.layers.Flatten()(hidden)
                 continue
@@ -102,10 +136,9 @@ def __init__(self, args: argparse.Namespace) -> None:
                 continue
 
             # - `D-dropout_rate`: Apply dropout with the given dropout rate.
-
             if layer_type == "D":
                 dropout_rate = float(layer_args[0])
-                hidden = keras.layers.Dropout(int(dropout_rate))(hidden)
+                hidden = keras.layers.Dropout(dropout_rate)(hidden)
                 continue
 
         # You can assume the resulting network is valid; it is fine to crash if it is not.
@@ -122,35 +155,6 @@ def __init__(self, args: argparse.Namespace) -> None:
             metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")],
         )
 
-    def createCnnLayer(_self, hidden, filters: str, kernel_size: str, stride: str, padding: str):
-        cnn_layer = keras.layers.Conv2D(
-            filters=int(filters),
-            kernel_size=(int(kernel_size), int(kernel_size)),
-            strides=(int(stride), int(stride)),
-            padding=padding,
-        )
-
-        activation_layer = keras.layers.Activation("relu")
-        return activation_layer(cnn_layer(hidden))
-
-    def createCnnBatchNormLayer(_self, hidden, filters: str, kernel_size: str, stride: str, padding: str):
-        cnn_layer = keras.layers.Conv2D(
-            filters=int(filters),
-            kernel_size=(int(kernel_size), int(kernel_size)),
-            strides=(int(stride), int(stride)),
-            padding=padding,
-        )
-        batch_norm = keras.layers.BatchNormalization()
-        activation_layer = keras.layers.Activation("relu")
-
-        return activation_layer(batch_norm(cnn_layer(hidden)))
-
-    def createMaxPoolingLayer(_self, hidden: str, pool_size: str, stride: str):
-        return keras.layers.MaxPooling2D(
-            pool_size=int(pool_size),
-            strides=(int(stride), int(stride)),
-        )(hidden)
-
 def main(args: argparse.Namespace) -> dict[str, float]:
     # Set the random seed and the number of threads.
     keras.utils.set_random_seed(args.seed)

From 50952bce5e39d518333639108a6eca5e33e5a024 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Sun, 24 Mar 2024 12:44:16 +0100
Subject: [PATCH 57/64] Add test script

---
 labs/04/mnist_cnn.ps1 | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 labs/04/mnist_cnn.ps1

diff --git a/labs/04/mnist_cnn.ps1 b/labs/04/mnist_cnn.ps1
new file mode 100644
index 0000000..bf78797
--- /dev/null
+++ b/labs/04/mnist_cnn.ps1
@@ -0,0 +1,30 @@
+""
+"👉 TEST 1"
+"python3 mnist_cnn.py --epochs=1 --cnn=F,H-100"
+python3 mnist_cnn.py --epochs=1 --cnn=F,H-100
+"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.8503 - loss: 0.5286 - val_accuracy: 0.9604 - val_loss: 0.1432"
+""
+"👉 TEST 2"
+"python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5"
+python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5
+"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7706 - loss: 0.7444 - val_accuracy: 0.9572 - val_loss: 0.1606"
+""
+"👉 TEST 3"
+"python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50"
+python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50
+"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6630 - loss: 1.0703 - val_accuracy: 0.8798 - val_loss: 0.3894"
+""
+"👉 TEST 4"
+"python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50"
+python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50
+"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.5898 - loss: 1.2535 - val_accuracy: 0.8774 - val_loss: 0.4079"
+""
+"👉 TEST 5"
+"python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32"
+python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32
+"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6822 - loss: 1.0011 - val_accuracy: 0.9284 - val_loss: 0.2537"
+""
+"👉 TEST 6"
+"python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50"
+python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50
+"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7562 - loss: 0.7717 - val_accuracy: 0.9486 - val_loss: 0.1734"

From 3dafa1cd35cf6c679b929093bd2f7e4fd2d0cdc8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Sun, 24 Mar 2024 13:00:44 +0100
Subject: [PATCH 58/64] Fix issue with CB layers

---
 labs/04/mnist_cnn results.txt | 29 +++++++++++++++++++++++++++++
 labs/04/mnist_cnn.py          |  9 ++++++---
 2 files changed, 35 insertions(+), 3 deletions(-)
 create mode 100644 labs/04/mnist_cnn results.txt

diff --git a/labs/04/mnist_cnn results.txt b/labs/04/mnist_cnn results.txt
new file mode 100644
index 0000000..063033d
--- /dev/null
+++ b/labs/04/mnist_cnn results.txt	
@@ -0,0 +1,29 @@
+👉 TEST 1
+python3 mnist_cnn.py --epochs=1 --cnn=F,H-100
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 19s 17ms/step - accuracy: 0.8499 - loss: 0.5317 - val_accuracy: 0.9618 - val_loss: 0.1400
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.8503 - loss: 0.5286 - val_accuracy: 0.9604 - val_loss: 0.1432
+
+👉 TEST 2
+python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 20s 18ms/step - accuracy: 0.7662 - loss: 0.7543 - val_accuracy: 0.9576 - val_loss: 0.1612
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7706 - loss: 0.7444 - val_accuracy: 0.9572 - val_loss: 0.1606
+
+👉 TEST 3
+python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 19s 17ms/step - accuracy: 0.6706 - loss: 1.0717 - val_accuracy: 0.8814 - val_loss: 0.3802
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6630 - loss: 1.0703 - val_accuracy: 0.8798 - val_loss: 0.3894
+
+👉 TEST 4
+python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 21s 18ms/step - accuracy: 0.5799 - loss: 1.2751 - val_accuracy: 0.8898 - val_loss: 0.3617
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.5898 - loss: 1.2535 - val_accuracy: 0.8774 - val_loss: 0.4079
+
+👉 TEST 5
+python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 31s 27ms/step - accuracy: 0.6976 - loss: 0.9518 - val_accuracy: 0.9228 - val_loss: 0.2614
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6822 - loss: 1.0011 - val_accuracy: 0.9284 - val_loss: 0.2537
+
+👉 TEST 6
+python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 44s 38ms/step - accuracy: 0.7638 - loss: 0.7407 - val_accuracy: 0.9454 - val_loss: 0.1744
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7562 - loss: 0.7717 - val_accuracy: 0.9486 - val_loss: 0.1734
diff --git a/labs/04/mnist_cnn.py b/labs/04/mnist_cnn.py
index 44710f2..661c4bd 100644
--- a/labs/04/mnist_cnn.py
+++ b/labs/04/mnist_cnn.py
@@ -39,7 +39,7 @@ def __init__(self, args: argparse.Namespace) -> None:
         inputs = keras.Input(shape=[MNIST.H, MNIST.W, MNIST.C])
         hidden = keras.layers.Rescaling(1 / 255)(inputs)
 
-        cnn_args = re.split(r',(?![^\[]*\])', args.cnn)
+        cnn_args = re.split(r",(?![^\[]*\])", args.cnn)
 
         for layer in cnn_args:
             layer_type, *layer_args = layer.split("-")
@@ -72,6 +72,7 @@ def __init__(self, args: argparse.Namespace) -> None:
                     kernel_size=[int(kernel_size), int(kernel_size)],
                     strides=[int(stride), int(stride)],
                     padding=padding,
+                    use_bias=False,
                 )(hidden)
                 hidden = keras.layers.BatchNormalization()(hidden)
                 hidden = keras.layers.Activation("relu")(hidden)
@@ -112,9 +113,10 @@ def __init__(self, args: argparse.Namespace) -> None:
                         filters, kernel_size, stride, padding = layer_args
                         hidden = keras.layers.Conv2D(
                             filters=int(filters),
-                            kernel_size=(int(kernel_size), int(kernel_size)),
-                            strides=(int(stride), int(stride)),
+                            kernel_size=[int(kernel_size), int(kernel_size)],
+                            strides=[int(stride), int(stride)],
                             padding=padding,
+                            use_bias=False,
                         )(hidden)
                         hidden = keras.layers.BatchNormalization()(hidden)
                         hidden = keras.layers.Activation("relu")(hidden)
@@ -155,6 +157,7 @@ def __init__(self, args: argparse.Namespace) -> None:
             metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")],
         )
 
+
 def main(args: argparse.Namespace) -> dict[str, float]:
     # Set the random seed and the number of threads.
     keras.utils.set_random_seed(args.seed)

From cbf57e6e600f68abe545b4393d41b502e70a011b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Sun, 24 Mar 2024 16:25:02 +0100
Subject: [PATCH 59/64] mnist_cnn.py passes 1-5

---
 labs/04/mnist_cnn results.txt | 12 ++++++------
 labs/04/mnist_cnn.py          | 12 ++++++++----
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/labs/04/mnist_cnn results.txt b/labs/04/mnist_cnn results.txt
index 063033d..71f04b6 100644
--- a/labs/04/mnist_cnn results.txt	
+++ b/labs/04/mnist_cnn results.txt	
@@ -1,29 +1,29 @@
 👉 TEST 1
 python3 mnist_cnn.py --epochs=1 --cnn=F,H-100
-1100/1100 ━━━━━━━━━━━━━━━━━━━━ 19s 17ms/step - accuracy: 0.8499 - loss: 0.5317 - val_accuracy: 0.9618 - val_loss: 0.1400
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 12ms/step - accuracy: 0.8499 - loss: 0.5317 - val_accuracy: 0.9618 - val_loss: 0.1400
 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.8503 - loss: 0.5286 - val_accuracy: 0.9604 - val_loss: 0.1432
 
 👉 TEST 2
 python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5
-1100/1100 ━━━━━━━━━━━━━━━━━━━━ 20s 18ms/step - accuracy: 0.7662 - loss: 0.7543 - val_accuracy: 0.9576 - val_loss: 0.1612
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 12ms/step - accuracy: 0.7662 - loss: 0.7543 - val_accuracy: 0.9576 - val_loss: 0.1612
 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7706 - loss: 0.7444 - val_accuracy: 0.9572 - val_loss: 0.1606
 
 👉 TEST 3
 python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50
-1100/1100 ━━━━━━━━━━━━━━━━━━━━ 19s 17ms/step - accuracy: 0.6706 - loss: 1.0717 - val_accuracy: 0.8814 - val_loss: 0.3802
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 12ms/step - accuracy: 0.6706 - loss: 1.0717 - val_accuracy: 0.8814 - val_loss: 0.3802
 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6630 - loss: 1.0703 - val_accuracy: 0.8798 - val_loss: 0.3894
 
 👉 TEST 4
 python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50
-1100/1100 ━━━━━━━━━━━━━━━━━━━━ 21s 18ms/step - accuracy: 0.5799 - loss: 1.2751 - val_accuracy: 0.8898 - val_loss: 0.3617
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 17s 15ms/step - accuracy: 0.5799 - loss: 1.2751 - val_accuracy: 0.8898 - val_loss: 0.3616
 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.5898 - loss: 1.2535 - val_accuracy: 0.8774 - val_loss: 0.4079
 
 👉 TEST 5
 python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32
-1100/1100 ━━━━━━━━━━━━━━━━━━━━ 31s 27ms/step - accuracy: 0.6976 - loss: 0.9518 - val_accuracy: 0.9228 - val_loss: 0.2614
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 18s 16ms/step - accuracy: 0.6976 - loss: 0.9518 - val_accuracy: 0.9228 - val_loss: 0.2614
 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6822 - loss: 1.0011 - val_accuracy: 0.9284 - val_loss: 0.2537
 
 👉 TEST 6
 python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50
-1100/1100 ━━━━━━━━━━━━━━━━━━━━ 44s 38ms/step - accuracy: 0.7638 - loss: 0.7407 - val_accuracy: 0.9454 - val_loss: 0.1744
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 29s 26ms/step - accuracy: 0.7727 - loss: 0.7074 - val_accuracy: 0.9424 - val_loss: 0.1835
 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7562 - loss: 0.7717 - val_accuracy: 0.9486 - val_loss: 0.1734
diff --git a/labs/04/mnist_cnn.py b/labs/04/mnist_cnn.py
index 661c4bd..5b8fbc1 100644
--- a/labs/04/mnist_cnn.py
+++ b/labs/04/mnist_cnn.py
@@ -8,6 +8,7 @@
 )  # Use PyTorch backend unless specified otherwise
 
 import keras
+from keras.layers import add
 import torch
 
 from mnist import MNIST
@@ -56,10 +57,9 @@ def __init__(self, args: argparse.Namespace) -> None:
                     kernel_size=(int(kernel_size), int(kernel_size)),
                     strides=(int(stride), int(stride)),
                     padding=padding,
+                    activation="relu",
                 )(hidden)
 
-                hidden = keras.layers.Activation("relu")(hidden)
-
                 continue
 
             # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization.
@@ -75,7 +75,7 @@ def __init__(self, args: argparse.Namespace) -> None:
                     use_bias=False,
                 )(hidden)
                 hidden = keras.layers.BatchNormalization()(hidden)
-                hidden = keras.layers.Activation("relu")(hidden)
+                hidden = keras.layers.ReLU()(hidden)
                 continue
 
             # - `M-pool_size-stride`: Add max pooling with specified size and stride, using
@@ -94,6 +94,7 @@ def __init__(self, args: argparse.Namespace) -> None:
             #   produced output (after the ReLU nonlinearity of the last layer) should be added
             #   to the input (of this `R` layer).
             if layer_type == "R":
+                input_layer = hidden
                 layers = "-".join(layer_args)[1:-1].split(",")
 
                 for layer in layers:
@@ -106,6 +107,7 @@ def __init__(self, args: argparse.Namespace) -> None:
                             kernel_size=(int(kernel_size), int(kernel_size)),
                             strides=(int(stride), int(stride)),
                             padding=padding,
+                            activation="relu",
                         )(hidden)
                         continue
 
@@ -119,9 +121,11 @@ def __init__(self, args: argparse.Namespace) -> None:
                             use_bias=False,
                         )(hidden)
                         hidden = keras.layers.BatchNormalization()(hidden)
-                        hidden = keras.layers.Activation("relu")(hidden)
                         continue
 
+                hidden = add([input_layer, hidden])
+                hidden = keras.layers.ReLU()(hidden)
+
                 continue
 
             # - `F`: Flatten inputs. Must appear exactly once in the architecture.

From a2bd060e63587b028c9f2adf18ed5bd8e25093f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Wed, 27 Mar 2024 12:46:19 +0100
Subject: [PATCH 60/64] Refactor and simplify solution to mnist_cnn.py

---
 labs/04/mnist_cnn results.txt |  10 +-
 labs/04/mnist_cnn.py          | 177 +++++++++++++++-------------------
 2 files changed, 81 insertions(+), 106 deletions(-)

diff --git a/labs/04/mnist_cnn results.txt b/labs/04/mnist_cnn results.txt
index 71f04b6..63271eb 100644
--- a/labs/04/mnist_cnn results.txt	
+++ b/labs/04/mnist_cnn results.txt	
@@ -5,25 +5,25 @@ python3 mnist_cnn.py --epochs=1 --cnn=F,H-100
 
 👉 TEST 2
 python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5
-1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 12ms/step - accuracy: 0.7662 - loss: 0.7543 - val_accuracy: 0.9576 - val_loss: 0.1612
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 13ms/step - accuracy: 0.7662 - loss: 0.7543 - val_accuracy: 0.9576 - val_loss: 0.1612
 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7706 - loss: 0.7444 - val_accuracy: 0.9572 - val_loss: 0.1606
 
 👉 TEST 3
 python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50
-1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 12ms/step - accuracy: 0.6706 - loss: 1.0717 - val_accuracy: 0.8814 - val_loss: 0.3802
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 13ms/step - accuracy: 0.6706 - loss: 1.0717 - val_accuracy: 0.8814 - val_loss: 0.3802
 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6630 - loss: 1.0703 - val_accuracy: 0.8798 - val_loss: 0.3894
 
 👉 TEST 4
 python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50
-1100/1100 ━━━━━━━━━━━━━━━━━━━━ 17s 15ms/step - accuracy: 0.5799 - loss: 1.2751 - val_accuracy: 0.8898 - val_loss: 0.3616
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 18s 16ms/step - accuracy: 0.5799 - loss: 1.2751 - val_accuracy: 0.8898 - val_loss: 0.3616
 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.5898 - loss: 1.2535 - val_accuracy: 0.8774 - val_loss: 0.4079
 
 👉 TEST 5
 python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32
-1100/1100 ━━━━━━━━━━━━━━━━━━━━ 18s 16ms/step - accuracy: 0.6976 - loss: 0.9518 - val_accuracy: 0.9228 - val_loss: 0.2614
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 20s 17ms/step - accuracy: 0.6976 - loss: 0.9518 - val_accuracy: 0.9228 - val_loss: 0.2614
 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6822 - loss: 1.0011 - val_accuracy: 0.9284 - val_loss: 0.2537
 
 👉 TEST 6
 python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50
-1100/1100 ━━━━━━━━━━━━━━━━━━━━ 29s 26ms/step - accuracy: 0.7727 - loss: 0.7074 - val_accuracy: 0.9424 - val_loss: 0.1835
+1100/1100 ━━━━━━━━━━━━━━━━━━━━ 31s 27ms/step - accuracy: 0.7476 - loss: 0.7841 - val_accuracy: 0.9370 - val_loss: 0.2037
 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7562 - loss: 0.7717 - val_accuracy: 0.9486 - val_loss: 0.1734
diff --git a/labs/04/mnist_cnn.py b/labs/04/mnist_cnn.py
index 5b8fbc1..b3c5727 100644
--- a/labs/04/mnist_cnn.py
+++ b/labs/04/mnist_cnn.py
@@ -33,6 +33,81 @@
 # If you add more arguments, ReCodEx will keep them with your default values.
 
 
+def create_layer(layer_type, layer_args, hidden):
+    if layer_type == "C":
+        filters, kernel_size, stride, padding = layer_args
+        hidden = keras.layers.Conv2D(
+            filters=int(filters),
+            kernel_size=(int(kernel_size)),
+            strides=(int(stride)),
+            padding=padding,
+            activation="relu",
+        )(hidden)
+
+        return hidden
+
+    # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization.
+    #   In detail, start with a convolutional layer **without bias** and activation,
+    #   then add a batch normalization layer, and finally the ReLU activation.
+    if layer_type == "CB":
+        filters, kernel_size, stride, padding = layer_args
+        hidden = keras.layers.Conv2D(
+            filters=int(filters),
+            kernel_size=(int(kernel_size)),
+            strides=(int(stride)),
+            padding=padding,
+            use_bias=False,
+        )(hidden)
+        hidden = keras.layers.BatchNormalization()(hidden)
+        hidden = keras.layers.ReLU()(hidden)
+        return hidden
+
+    # - `M-pool_size-stride`: Add max pooling with specified size and stride, using
+    #   the default "valid" padding.
+    if layer_type == "M":
+        pool_size, stride = layer_args
+        hidden = keras.layers.MaxPooling2D(
+            pool_size=int(pool_size),
+            strides=(int(stride)),
+        )(hidden)
+        return hidden
+
+    # - `R-[layers]`: Add a residual connection. The `layers` contain a specification
+    #   of at least one convolutional layer (but not a recursive residual connection `R`).
+    #   The input to the `R` layer should be processed sequentially by `layers`, and the
+    #   produced output (after the ReLU nonlinearity of the last layer) should be added
+    #   to the input (of this `R` layer).
+    if layer_type == "R":
+        input_layer = hidden
+        layers = "-".join(layer_args)[1:-1].split(",")
+
+        for layer in layers:
+            layer_type, *layer_args = layer.split("-")
+
+            hidden = create_layer(layer_type, layer_args, hidden)
+
+        hidden = keras.layers.Add()([input_layer, hidden])
+
+        return hidden
+
+    # - `F`: Flatten inputs. Must appear exactly once in the architecture.
+    if layer_type == "F":
+        hidden = keras.layers.Flatten()(hidden)
+        return hidden
+
+    # - `H-hidden_layer_size`: Add a dense layer with ReLU activation and the specified size.
+    if layer_type == "H":
+        hidden_layer_size,  = layer_args
+        hidden = keras.layers.Dense(units=int(hidden_layer_size), activation="relu")(hidden)
+        return hidden
+
+    # - `D-dropout_rate`: Apply dropout with the given dropout rate.
+    if layer_type == "D":
+        dropout_rate, = layer_args
+        hidden = keras.layers.Dropout(rate=float(dropout_rate))(hidden)
+        return hidden
+
+
 class Model(keras.Model):
     def __init__(self, args: argparse.Namespace) -> None:
         # Create the model. The template uses the functional API, but
@@ -45,107 +120,7 @@ def __init__(self, args: argparse.Namespace) -> None:
         for layer in cnn_args:
             layer_type, *layer_args = layer.split("-")
 
-            # Add CNN layers specified by `args.cnn`, which contains
-            # a comma-separated list of the following layers:
-
-            # - `C-filters-kernel_size-stride-padding`: Add a convolutional layer with ReLU
-            #   activation and specified number of filters, kernel size, stride and padding.
-            if layer_type == "C":
-                filters, kernel_size, stride, padding = layer_args
-                hidden = keras.layers.Conv2D(
-                    filters=int(filters),
-                    kernel_size=(int(kernel_size), int(kernel_size)),
-                    strides=(int(stride), int(stride)),
-                    padding=padding,
-                    activation="relu",
-                )(hidden)
-
-                continue
-
-            # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization.
-            #   In detail, start with a convolutional layer **without bias** and activation,
-            #   then add a batch normalization layer, and finally the ReLU activation.
-            if layer_type == "CB":
-                filters, kernel_size, stride, padding = layer_args
-                hidden = keras.layers.Conv2D(
-                    filters=int(filters),
-                    kernel_size=[int(kernel_size), int(kernel_size)],
-                    strides=[int(stride), int(stride)],
-                    padding=padding,
-                    use_bias=False,
-                )(hidden)
-                hidden = keras.layers.BatchNormalization()(hidden)
-                hidden = keras.layers.ReLU()(hidden)
-                continue
-
-            # - `M-pool_size-stride`: Add max pooling with specified size and stride, using
-            #   the default "valid" padding.
-            if layer_type == "M":
-                pool_size, stride = layer_args
-                hidden = keras.layers.MaxPooling2D(
-                    pool_size=int(pool_size),
-                    strides=[int(stride), int(stride)],
-                )(hidden)
-                continue
-
-            # - `R-[layers]`: Add a residual connection. The `layers` contain a specification
-            #   of at least one convolutional layer (but not a recursive residual connection `R`).
-            #   The input to the `R` layer should be processed sequentially by `layers`, and the
-            #   produced output (after the ReLU nonlinearity of the last layer) should be added
-            #   to the input (of this `R` layer).
-            if layer_type == "R":
-                input_layer = hidden
-                layers = "-".join(layer_args)[1:-1].split(",")
-
-                for layer in layers:
-                    layer_type, *layer_args = layer.split("-")
-
-                    if layer_type == "C":
-                        filters, kernel_size, stride, padding = layer_args
-                        hidden = keras.layers.Conv2D(
-                            filters=int(filters),
-                            kernel_size=(int(kernel_size), int(kernel_size)),
-                            strides=(int(stride), int(stride)),
-                            padding=padding,
-                            activation="relu",
-                        )(hidden)
-                        continue
-
-                    if layer_type == "CB":
-                        filters, kernel_size, stride, padding = layer_args
-                        hidden = keras.layers.Conv2D(
-                            filters=int(filters),
-                            kernel_size=[int(kernel_size), int(kernel_size)],
-                            strides=[int(stride), int(stride)],
-                            padding=padding,
-                            use_bias=False,
-                        )(hidden)
-                        hidden = keras.layers.BatchNormalization()(hidden)
-                        continue
-
-                hidden = add([input_layer, hidden])
-                hidden = keras.layers.ReLU()(hidden)
-
-                continue
-
-            # - `F`: Flatten inputs. Must appear exactly once in the architecture.
-            if layer_type == "F":
-                hidden = keras.layers.Flatten()(hidden)
-                continue
-
-            # - `H-hidden_layer_size`: Add a dense layer with ReLU activation and the specified size.
-            if layer_type == "H":
-                hidden_layer_size = int(layer_args[0])
-                hidden = keras.layers.Dense(hidden_layer_size, activation="relu")(
-                    hidden
-                )
-                continue
-
-            # - `D-dropout_rate`: Apply dropout with the given dropout rate.
-            if layer_type == "D":
-                dropout_rate = float(layer_args[0])
-                hidden = keras.layers.Dropout(dropout_rate)(hidden)
-                continue
+            hidden = create_layer(layer_type, layer_args, hidden)
 
         # You can assume the resulting network is valid; it is fine to crash if it is not.
         #

From 3cdadd194a6ed870599e2588228ddf733c327964 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Thu, 28 Mar 2024 12:55:23 +0100
Subject: [PATCH 61/64] Solve mnist_multiple.py

---
 labs/04/mnist_multiple.ps1 |  11 ++++
 labs/04/mnist_multiple.py  | 105 ++++++++++++++++++++++++++++---------
 2 files changed, 92 insertions(+), 24 deletions(-)
 create mode 100644 labs/04/mnist_multiple.ps1

diff --git a/labs/04/mnist_multiple.ps1 b/labs/04/mnist_multiple.ps1
new file mode 100644
index 0000000..d6d4f08
--- /dev/null
+++ b/labs/04/mnist_multiple.ps1
@@ -0,0 +1,11 @@
+""
+"👉 TEST 1"
+"python3 mnist_multiple.py --epochs=1 --batch_size=50"
+python3 mnist_multiple.py --epochs=1 --batch_size=50
+"direct_comparison_accuracy: 0.7993 - indirect_comparison_accuracy: 0.8930 - loss: 1.6710 - val_direct_comparison_accuracy: 0.9508 - val_indirect_comparison_accuracy: 0.9836 - val_loss: 0.2984"
+""
+"👉 TEST 2"
+"python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5"
+python3 mnist_multiple.py --epochs=1 --batch_size=100
+"direct_comparison_accuracy: 0.7680 - indirect_comparison_accuracy: 0.8637 - loss: 2.1429 - val_direct_comparison_accuracy: 0.9288 - val_indirect_comparison_accuracy: 0.9772 - val_loss: 0.4157"
+""
diff --git a/labs/04/mnist_multiple.py b/labs/04/mnist_multiple.py
index 06b9d9e..def13ab 100644
--- a/labs/04/mnist_multiple.py
+++ b/labs/04/mnist_multiple.py
@@ -1,7 +1,10 @@
 #!/usr/bin/env python3
 import argparse
 import os
-os.environ.setdefault("KERAS_BACKEND", "torch")  # Use PyTorch backend unless specified otherwise
+
+os.environ.setdefault(
+    "KERAS_BACKEND", "torch"
+)  # Use PyTorch backend unless specified otherwise
 
 import numpy as np
 import keras
@@ -13,9 +16,13 @@
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
 parser.add_argument("--epochs", default=5, type=int, help="Number of epochs.")
-parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.")
+parser.add_argument(
+    "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx."
+)
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
-parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
+parser.add_argument(
+    "--threads", default=1, type=int, help="Maximum number of threads to use."
+)
 # If you add more arguments, ReCodEx will keep them with your default values.
 
 
@@ -27,7 +34,7 @@ def __init__(self, args: argparse.Namespace) -> None:
             keras.Input(shape=[MNIST.H, MNIST.W, MNIST.C]),
         )
 
-        # TODO: The model starts by passing each input image through the same
+        # The model starts by passing each input image through the same
         # subnetwork (with shared weights), which should perform
         # - keras.layers.Rescaling(1 / 255) to convert images to floats in [0, 1] range,
         # - convolution with 10 filters, 3x3 kernel size, stride 2, "valid" padding, ReLU activation,
@@ -36,24 +43,49 @@ def __init__(self, args: argparse.Namespace) -> None:
         # - fully connected layer with 200 neurons and ReLU activation,
         # obtaining a 200-dimensional feature vector FV of each image.
 
-        # TODO: Using the computed representations, the model should produce four outputs:
+        rescale = keras.layers.Rescaling(1 / 255)
+        c1 = keras.layers.Conv2D(
+            filters=10, kernel_size=3, strides=2, padding="valid", activation="relu"
+        )
+        c2 = keras.layers.Conv2D(
+            filters=20, kernel_size=3, strides=2, padding="valid", activation="relu"
+        )
+        flat = keras.layers.Flatten()
+        hidden = keras.layers.Dense(200, activation="relu")
+
+        fv1 = hidden(flat(c2(c1(rescale(images[0])))))
+        fv2 = hidden(flat(c2(c1(rescale(images[1])))))
+
+        # Using the computed representations, the model should produce four outputs:
         # - first, compute _direct comparison_ whether the first digit is
         #   greater than the second, by
         #   - concatenating the two 200-dimensional image representations FV,
         #   - processing them using another 200-neuron ReLU dense layer
         #   - computing one output using a dense layer with "sigmoid" activation
+        concatenation = keras.layers.Concatenate()([fv1, fv2])
+        hidden2 = keras.layers.Dense(200, activation="relu")
+        pred_layer = keras.layers.Dense(1, activation="sigmoid")
+        direct_comparison = pred_layer(hidden2(concatenation))
         # - then, classify the computed representation FV of the first image using
         #   a densely connected softmax layer into 10 classes;
         # - then, classify the computed representation FV of the second image using
         #   the same layer (identical, i.e., with shared weights) into 10 classes;
+        classification_layer = keras.layers.Dense(10, activation="softmax")
+        d1 = classification_layer(fv1)
+        d2 = classification_layer(fv2)
         # - finally, compute _indirect comparison_ whether the first digit
         #   is greater than second, by comparing the predictions from the above
         #   two outputs; convert the comparison to "float32" using `keras.ops.cast`.
         outputs = {
-            "direct_comparison": ...,
-            "digit_1": ...,
-            "digit_2": ...,
-            "indirect_comparison": ...,
+            "direct_comparison": direct_comparison,
+            "digit_1": d1,
+            "digit_2": d2,
+            "indirect_comparison": keras.ops.cast(
+                keras.ops.greater(
+                    keras.ops.argmax(d1, axis=1), keras.ops.argmax(d2, axis=1)
+                ),
+                "float32",
+            ),
         }
 
         # Finally, construct the model.
@@ -65,7 +97,7 @@ def __init__(self, args: argparse.Namespace) -> None:
         # the keys of the `outputs` dictionary.
         self.output_names = sorted(outputs.keys())
 
-        # TODO: Define the appropriate losses for the model outputs
+        # Define the appropriate losses for the model outputs
         # "direct_comparison", "digit_1", "digit_2". Regarding metrics,
         # the accuracy of both the direct and indirect comparisons should be
         # computed; name both metrics "accuracy" (i.e., pass "accuracy" as the
@@ -73,19 +105,25 @@ def __init__(self, args: argparse.Namespace) -> None:
         self.compile(
             optimizer=keras.optimizers.Adam(),
             loss={
-                "direct_comparison": ...,
-                "digit_1": ...,
-                "digit_2": ...,
+                "direct_comparison": keras.losses.BinaryCrossentropy(),
+                "digit_1": keras.losses.SparseCategoricalCrossentropy(),
+                "digit_2": keras.losses.SparseCategoricalCrossentropy(),
             },
             metrics={
-                "direct_comparison": [...],
-                "indirect_comparison": [...],
+                "direct_comparison": [
+                    keras.metrics.BinaryAccuracy(name="accuracy"),
+                ],
+                "indirect_comparison": [
+                    keras.metrics.BinaryAccuracy(name="accuracy"),
+                ],
             },
         )
 
     # Create an appropriate dataset using the MNIST data.
     def create_dataset(
-        self, mnist_dataset: MNIST.Dataset, args: argparse.Namespace,
+        self,
+        mnist_dataset: MNIST.Dataset,
+        args: argparse.Namespace,
     ) -> torch.utils.data.Dataset:
         # Original MNIST dataset.
         images, labels = mnist_dataset.data["images"], mnist_dataset.data["labels"]
@@ -94,16 +132,27 @@ def create_dataset(
         # You can assume that the size of the original dataset is even.
         class TorchDataset(torch.utils.data.Dataset):
             def __len__(self) -> int:
-                # TODO: The new dataset has half the size of the original one.
-                return ...
+                # The new dataset has half the size of the original one.
+                return len(images) // 2
 
-            def __getitem__(self, index: int) -> tuple[tuple[np.ndarray, np.ndarray], dict[str, np.ndarray]]:
-                # TODO: Given an `index`, generate a dataset element suitable for our model.
+            def __getitem__(
+                self, index: int
+            ) -> tuple[tuple[np.ndarray, np.ndarray], dict[str, np.ndarray]]:
+                # Given an `index`, generate a dataset element suitable for our model.
                 # Notably, the element should be a pair `(input, output)`, with
                 # - `input` being a pair of images `(images[2 * index], images[2 * index + 1])`,
                 # - `output` being a dictionary with keys "digit_1", "digit_2", "direct_comparison",
                 #   and "indirect_comparison".
-                return ...
+                return (
+                    (images[2 * index], images[2 * index + 1]),
+                    {
+                        "digit_1": labels[2 * index],
+                        "digit_2": labels[2 * index + 1],
+                        "direct_comparison": labels[2 * index] > labels[2 * index + 1],
+                        "indirect_comparison": labels[2 * index]
+                        > labels[2 * index + 1],
+                    },
+                )
 
         return TorchDataset()
 
@@ -122,14 +171,22 @@ def main(args: argparse.Namespace) -> dict[str, float]:
     model = Model(args)
 
     # Construct suitable dataloaders from the MNIST data.
-    train = torch.utils.data.DataLoader(model.create_dataset(mnist.train, args), args.batch_size, shuffle=True)
-    dev = torch.utils.data.DataLoader(model.create_dataset(mnist.dev, args), args.batch_size)
+    train = torch.utils.data.DataLoader(
+        model.create_dataset(mnist.train, args), args.batch_size, shuffle=True
+    )
+    dev = torch.utils.data.DataLoader(
+        model.create_dataset(mnist.dev, args), args.batch_size
+    )
 
     # Train
     logs = model.fit(train, epochs=args.epochs, validation_data=dev)
 
     # Return development metrics for ReCodEx to validate.
-    return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")}
+    return {
+        metric: values[-1]
+        for metric, values in logs.history.items()
+        if metric.startswith("val_")
+    }
 
 
 if __name__ == "__main__":

From 7f52f3ca72f1f0aee2c7dd49437a5482a4375217 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Thu, 28 Mar 2024 13:08:06 +0100
Subject: [PATCH 62/64] Improve test output

---
 labs/04/mnist_multiple.ps1 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/labs/04/mnist_multiple.ps1 b/labs/04/mnist_multiple.ps1
index d6d4f08..3416b36 100644
--- a/labs/04/mnist_multiple.ps1
+++ b/labs/04/mnist_multiple.ps1
@@ -2,10 +2,10 @@
 "👉 TEST 1"
 "python3 mnist_multiple.py --epochs=1 --batch_size=50"
 python3 mnist_multiple.py --epochs=1 --batch_size=50
-"direct_comparison_accuracy: 0.7993 - indirect_comparison_accuracy: 0.8930 - loss: 1.6710 - val_direct_comparison_accuracy: 0.9508 - val_indirect_comparison_accuracy: 0.9836 - val_loss: 0.2984"
+"275/275 ━━━━━━━━━━━━━━━━━━━━ 11s 38ms/step - direct_comparison_accuracy: 0.7993 - indirect_comparison_accuracy: 0.8930 - loss: 1.6710 - val_direct_comparison_accuracy: 0.9508 - val_indirect_comparison_accuracy: 0.9836 - val_loss: 0.2984"
 ""
 "👉 TEST 2"
 "python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5"
 python3 mnist_multiple.py --epochs=1 --batch_size=100
-"direct_comparison_accuracy: 0.7680 - indirect_comparison_accuracy: 0.8637 - loss: 2.1429 - val_direct_comparison_accuracy: 0.9288 - val_indirect_comparison_accuracy: 0.9772 - val_loss: 0.4157"
+"275/275 ━━━━━━━━━━━━━━━━━━━━ 11s 38ms/step - direct_comparison_accuracy: 0.7680 - indirect_comparison_accuracy: 0.8637 - loss: 2.1429 - val_direct_comparison_accuracy: 0.9288 - val_indirect_comparison_accuracy: 0.9772 - val_loss: 0.4157"
 ""

From ca5827f3e07cb9a10a033cae6c7684674e6f402c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Thu, 28 Mar 2024 14:54:14 +0100
Subject: [PATCH 63/64] Solve torch_dataset

---
 labs/04/torch_dataset.ps1 | 11 +++++++++
 labs/04/torch_dataset.py  | 47 +++++++++++++++++++++++++--------------
 2 files changed, 41 insertions(+), 17 deletions(-)
 create mode 100644 labs/04/torch_dataset.ps1

diff --git a/labs/04/torch_dataset.ps1 b/labs/04/torch_dataset.ps1
new file mode 100644
index 0000000..46fa378
--- /dev/null
+++ b/labs/04/torch_dataset.ps1
@@ -0,0 +1,11 @@
+# ""
+# "👉 TEST 1"
+# "python3 torch_dataset.py --epochs=1 --batch_size=100"
+# python3 torch_dataset.py --epochs=1 --batch_size=100
+# "50/50 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - accuracy: 0.1297 - loss: 2.2519 - val_accuracy: 0.2710 - val_loss: 1.9796"
+""
+"👉 TEST 2"
+"python3 torch_dataset.py --epochs=1 --batch_size=50 --augment"
+python3 torch_dataset.py --epochs=1 --batch_size=50 --augment
+"100/100 ━━━━━━━━━━━━━━━━━━━━ 4s 34ms/step - accuracy: 0.1354 - loss: 2.2565 - val_accuracy: 0.2690 - val_loss: 1.9889"
+""
diff --git a/labs/04/torch_dataset.py b/labs/04/torch_dataset.py
index 5e0c330..f689e54 100644
--- a/labs/04/torch_dataset.py
+++ b/labs/04/torch_dataset.py
@@ -53,54 +53,67 @@ def main(args: argparse.Namespace) -> dict[str, float]:
         metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")],
     )
 
-    # TODO: Create a Torch dataset constructible from the given `CIFAR10.Dataset`.
+    # Create a Torch dataset constructible from the given `CIFAR10.Dataset`.
     # You should use only the first `size` examples of the dataset, and optional
     # augmentation function `augmentation_fn` may be applied to the images.
     class TorchDataset(torch.utils.data.Dataset):
+        images: np.ndarray
+        labels: np.ndarray
+        augmentation_fn: callable
+
         def __init__(self, cifar: CIFAR10.Dataset, size: int, augmentation_fn=None) -> None:
-            # TODO: Note that the images and labels are available in `cifar.data["images"]`
+            # Note that the images and labels are available in `cifar.data["images"]`
             # and `cifar.data["labels"]`.
-            ...
+            self.images = cifar.data["images"][:size]
+            self.labels = cifar.data["labels"][:size]
+            self.augmentation_fn = augmentation_fn
 
         def __len__(self) -> int:
-            # TODO: Return the appropriate size.
-            ...
+            # Return the appropriate size.
+            size = len(self.images)
+            return size
+
 
         def __getitem__(self, index: int) -> tuple[np.ndarray | torch.Tensor, int]:
-            # TODO: Return the `index`-th example from the dataset, with the image optionally
+            # Return the `index`-th example from the dataset, with the image optionally
             # passed through the `augmentation_fn` if it is not `None`.
-            ...
+            return self.augmentation_fn(self.images[index]) if self.augmentation_fn else self.images[index], self.labels[index]
 
     if args.augment:
         # Construct a sequence of augmentation transformations from `torchvision.transforms.v2`.
         transformation = v2.Compose([
-            # TODO: Add the following transformations:
+            # Add the following transformations:
             # - first create a `v2.RandomResize` that scales the image to
             #   random size in range [28, 36],
             # - then add `v2.Pad` that pads the image with 4 pixels on each side,
             # - then add `v2.RandomCrop` that chooses a random crop of size 32x32,
             # - and finally add `v2.RandomHorizontalFlip` that uniformly
             #   randomly flips the image horizontally.
-            ...
+            v2.RandomResize(28, 36),
+            v2.Pad(4),
+            v2.RandomCrop(32),
+            v2.RandomHorizontalFlip(),
         ])
 
         def augmentation_fn(image: np.ndarray) -> torch.Tensor:
-            # TODO: First, convert the numpy `images` to a PyTorch tensor of uint8s,
+            # First, convert the numpy `images` to a PyTorch tensor of uint8s,
             # preferably by using `torch.from_numpy` or `torch.as_tensor` to avoid copying.
             # Then, because of the channels-position mismatch, permute the axes
             # in the image to change the order of the axes from HWC to CHW.
             # Next, apply the `transformation` to the image (by calling it with
             # the image as an argument), and finally permute the axes back to
             # the original order.
-            return ...
+
+            return transformation(torch.as_tensor(image).permute(2, 0, 1)).permute(1, 2, 0)
+
     else:
         augmentation_fn = None
 
-    # TODO: Create `train` and `dev` instances of `TorchDataset` from the corresponding
+    # Create `train` and `dev` instances of `TorchDataset` from the corresponding
     # `cifar` datasets. Limit their sizes to 5_000 and 1_000 examples, respectively,
     # and use the `augmentation_fn` for the training dataset.
-    train = ...
-    dev = ...
+    train = TorchDataset(cifar.train, 5_000, augmentation_fn)
+    dev = TorchDataset(cifar.dev, 1_000)
 
     if args.show_images:
         from torch.utils import tensorboard
@@ -114,10 +127,10 @@ def augmentation_fn(image: np.ndarray) -> torch.Tensor:
         tb_writer.close()
         print("Saved first {} training imaged to logs/{}".format(GRID * GRID, TAG))
 
-    # TODO: Create `train` and `dev` instances of `torch.utils.data.DataLoader` from
+    # Create `train` and `dev` instances of `torch.utils.data.DataLoader` from
     # the datasets, using the given `args.batch_size` and shuffling the training dataset.
-    train = ...
-    dev = ...
+    train = torch.utils.data.DataLoader(train, args.batch_size, shuffle=True)
+    dev = torch.utils.data.DataLoader(dev, args.batch_size)
 
     # Train
     logs = model.fit(train, epochs=args.epochs, validation_data=dev)

From 0bb34e18c19691e52652a0d278ae86437a0f68f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?=
 <1959615+joglr@users.noreply.github.com>
Date: Mon, 1 Apr 2024 17:36:24 +0200
Subject: [PATCH 64/64] Solve cifar_competition

---
 labs/04/cifar_competition.ps1 |   1 +
 labs/04/cifar_competition.py  | 148 +++++++++++++++++++++++++++++-----
 2 files changed, 128 insertions(+), 21 deletions(-)
 create mode 100644 labs/04/cifar_competition.ps1

diff --git a/labs/04/cifar_competition.ps1 b/labs/04/cifar_competition.ps1
new file mode 100644
index 0000000..0d919fe
--- /dev/null
+++ b/labs/04/cifar_competition.ps1
@@ -0,0 +1 @@
+clear && python .\cifar_competition.py
diff --git a/labs/04/cifar_competition.py b/labs/04/cifar_competition.py
index 0541de8..be29019 100644
--- a/labs/04/cifar_competition.py
+++ b/labs/04/cifar_competition.py
@@ -3,7 +3,10 @@
 import datetime
 import os
 import re
-os.environ.setdefault("KERAS_BACKEND", "torch")  # Use PyTorch backend unless specified otherwise
+
+os.environ.setdefault(
+    "KERAS_BACKEND", "torch"
+)  # Use PyTorch backend unless specified otherwise
 
 import keras
 import numpy as np
@@ -11,13 +14,23 @@
 
 from cifar10 import CIFAR10
 
-# TODO: Define reasonable defaults and optionally more parameters.
+# Define reasonable defaults and optionally more parameters.
 # Also, you can set the number of threads to 0 to use all your CPU cores.
 parser = argparse.ArgumentParser()
-parser.add_argument("--batch_size", default=..., type=int, help="Batch size.")
-parser.add_argument("--epochs", default=..., type=int, help="Number of epochs.")
+parser.add_argument("--batch_size", default=128, type=int, help="Batch size.")
+parser.add_argument("--epochs", default=30, type=int, help="Number of epochs.")
+# parser.add_argument("--epochs", default=200, type=int, help="Number of epochs.")
+parser.add_argument("--learning_rate", default=0.001, help="Initial learning rate")
+parser.add_argument(
+    "--weight_decay", default=1e-4, type=float, help="L2 regularization weight decay."
+)
+parser.add_argument(
+    "--label_smoothing", default=0.1, type=float, help="Label smoothing."
+)
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
-parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
+parser.add_argument(
+    "--threads", default=1, type=int, help="Maximum number of threads to use."
+)
 
 
 class TorchTensorBoardCallback(keras.callbacks.Callback):
@@ -28,7 +41,10 @@ def __init__(self, path):
     def writer(self, writer):
         if writer not in self._writers:
             import torch.utils.tensorboard
-            self._writers[writer] = torch.utils.tensorboard.SummaryWriter(os.path.join(self._path, writer))
+
+            self._writers[writer] = torch.utils.tensorboard.SummaryWriter(
+                os.path.join(self._path, writer)
+            )
         return self._writers[writer]
 
     def add_logs(self, writer, logs, step):
@@ -39,13 +55,51 @@ def add_logs(self, writer, logs, step):
 
     def on_epoch_end(self, epoch, logs=None):
         if logs:
-            if isinstance(getattr(self.model, "optimizer", None), keras.optimizers.Optimizer):
-                logs = logs | {"learning_rate": keras.ops.convert_to_numpy(self.model.optimizer.learning_rate)}
-            self.add_logs("train", {k: v for k, v in logs.items() if not k.startswith("val_")}, epoch + 1)
-            self.add_logs("val", {k[4:]: v for k, v in logs.items() if k.startswith("val_")}, epoch + 1)
-
+            if isinstance(
+                getattr(self.model, "optimizer", None), keras.optimizers.Optimizer
+            ):
+                logs = logs | {
+                    "learning_rate": keras.ops.convert_to_numpy(
+                        self.model.optimizer.learning_rate
+                    )
+                }
+            self.add_logs(
+                "train",
+                {k: v for k, v in logs.items() if not k.startswith("val_")},
+                epoch + 1,
+            )
+            self.add_logs(
+                "val",
+                {k[4:]: v for k, v in logs.items() if k.startswith("val_")},
+                epoch + 1,
+            )
+
+def create_res(input_layer, filters, kernel_size, strides):
+    h = keras.layers.Conv2D(
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding="same",
+        activation=None,
+    )(input_layer)
+
+    h = keras.layers.BatchNormalization()(h)
+    h = keras.layers.Activation("relu")(h)
+    h = keras.layers.Conv2D(
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=1,
+        padding="same",
+        activation=None,
+        use_bias=False,
+    )(h)
+    h = keras.layers.BatchNormalization()(h)
+    h = keras.layers.Add()([input_layer, h])
+    h = keras.layers.Activation("relu")(h)
+    return h
 
 def main(args: argparse.Namespace) -> None:
+
     # Set the random seed and the number of threads.
     keras.utils.set_random_seed(args.seed)
     if args.threads:
@@ -53,23 +107,75 @@ def main(args: argparse.Namespace) -> None:
         torch.set_num_interop_threads(args.threads)
 
     # Create logdir name
-    args.logdir = os.path.join("logs", "{}-{}-{}".format(
-        os.path.basename(globals().get("__file__", "notebook")),
-        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
-        ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items())))
-    ))
+    args.logdir = os.path.join(
+        "logs",
+        "{}-{}-{}".format(
+            os.path.basename(globals().get("__file__", "notebook")),
+            datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
+            ",".join(
+                (
+                    "{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v)
+                    for k, v in sorted(vars(args).items())
+                )
+            ),
+        ),
+    )
 
     # Load data
     cifar = CIFAR10()
 
-    # TODO: Create the model and train it
-    model = ...
+    # Create the model and train it
+    inputs = keras.Input(shape=cifar.train.data["images"][0].shape)
+    h = keras.layers.Rescaling(1 / 255)(inputs)
+    h = keras.layers.Conv2D(64, 3, 1, "same", activation="relu")(h)
+    h = create_res(h, 64, 3, 1)
+    h = keras.layers.MaxPool2D(2)(h)
+    h = create_res(h, 64, 3, 1)
+    h = keras.layers.MaxPool2D(2)(h)
+    h = keras.layers.Dropout(0.2)(h)
+    h = create_res(h, 64, 3, 1)
+    h = keras.layers.Flatten()(h)
+    h = keras.layers.Dropout(0.2)(h)
+    h = keras.layers.Dense(200, activation="relu")(h)
+    outputs = keras.layers.Dense(len(CIFAR10.LABELS), activation="softmax")(h)
+
+    model = keras.Model(inputs=inputs, outputs=outputs)
+
+    model.summary()
+
+
+    lr_optimizer = keras.optimizers.schedules.CosineDecay(
+        initial_learning_rate=args.learning_rate,
+        decay_steps=len(cifar.train.data["images"] / args.batch_size * args.epochs)
+    )
+
+    model.compile(
+        optimizer=keras.optimizers.Adam(
+            learning_rate=lr_optimizer,
+            weight_decay=args.weight_decay),
+        loss=keras.losses.SparseCategoricalCrossentropy(),
+        metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")],
+    )
+
+    model.fit(
+        cifar.train.data["images"],
+        cifar.train.data["labels"],
+        batch_size=args.batch_size,
+        epochs=args.epochs,
+
+    )
+
+    model.save(os.path.join(args.logdir, "cifar.h5"), include_optimizer=False)
 
     # Generate test set annotations, but in `args.logdir` to allow parallel execution.
     os.makedirs(args.logdir, exist_ok=True)
-    with open(os.path.join(args.logdir, "cifar_competition_test.txt"), "w", encoding="utf-8") as predictions_file:
-        # TODO: Perform the prediction on the test data.
-        for probs in model.predict(...):
+    with open(
+        os.path.join(args.logdir, "cifar_competition_test.txt"), "w", encoding="utf-8"
+    ) as predictions_file:
+        # Perform the prediction on the test data.
+        for probs in model.predict(
+            cifar.test.data["images"], batch_size=args.batch_size
+        ):
             print(np.argmax(probs), file=predictions_file)