From 9645fe90a55eb2f8d247d34b4950b67e53083aa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Wed, 28 Feb 2024 14:06:13 +0100 Subject: [PATCH 01/64] Update user id --- labs/team_description.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labs/team_description.py b/labs/team_description.py index 14ed5e1..8207533 100644 --- a/labs/team_description.py +++ b/labs/team_description.py @@ -6,4 +6,4 @@ # # You can find out ReCodEx ID in the URL bar after navigating # to your User profile page. The ID has the following format: -# 01234567-89ab-cdef-0123-456789abcdef. +# 31a0a96a-c590-4486-b194-f72765b2ce25 From d7c8844f16e5d7632ddf7a2046458becbf5e2e57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Sun, 3 Mar 2024 15:28:32 +0100 Subject: [PATCH 02/64] Solve numpy_entropy --- labs/01/numpy_entropy.py | 44 ++++++++++++++++++++++++---------------- labs/01/test.ps1 | 4 ++++ 2 files changed, 30 insertions(+), 18 deletions(-) create mode 100644 labs/01/test.ps1 diff --git a/labs/01/numpy_entropy.py b/labs/01/numpy_entropy.py index 8e86bff..6cab8aa 100644 --- a/labs/01/numpy_entropy.py +++ b/labs/01/numpy_entropy.py @@ -12,42 +12,50 @@ def main(args: argparse.Namespace) -> tuple[float, float, float]: - # TODO: Load data distribution, each line containing a datapoint -- a string. + # Load data distribution, each line containing a datapoint -- a string. + data_map = {} + with open(args.data_path, "r") as data: for line in data: line = line.rstrip("\n") - # TODO: Process the line, aggregating data with built-in Python + + # Process the line, aggregating data with built-in Python # data structures (not NumPy, which is not suitable for incremental # addition and string mapping). + if line in data_map: + data_map[line] += 1 + else: + data_map[line] = 1 - # TODO: Create a NumPy array containing the data distribution. The + # Create a NumPy array containing the data distribution. The # NumPy array should contain only data, not any mapping. Alternatively, # the NumPy array might be created after loading the model distribution. + data_dist = np.array(list(data_map.values())) / sum(data_map.values()) + + # Load model distribution, each line `string \t probability`. + model_map = {} - # TODO: Load model distribution, each line `string \t probability`. with open(args.model_path, "r") as model: for line in model: line = line.rstrip("\n") - # TODO: Process the line, aggregating using Python data structures. + key, value = line.split("\t") + model_map[key] = float(value) - # TODO: Create a NumPy array containing the model distribution. + # Create a NumPy array containing the model distribution. + model_dist = np.array([model_map[key] if key in model_map else np.inf for key in data_map.keys()]) - # TODO: Compute the entropy H(data distribution). You should not use - # manual for/while cycles, but instead use the fact that most NumPy methods - # operate on all elements (for example `*` is vector element-wise multiplication). - entropy = ... + # Compute the entropy H(data distribution). + entropy = -np.sum(data_dist * np.log(data_dist)) - # TODO: Compute cross-entropy H(data distribution, model distribution). - # When some data distribution elements are missing in the model distribution, - # return `np.inf`. - crossentropy = ... + # Compute cross-entropy H(data distribution, model distribution). + crossentropy = -np.sum(data_dist * np.log(model_dist)) - # TODO: Compute KL-divergence D_KL(data distribution, model_distribution), - # again using `np.inf` when needed. - kl_divergence = ... + # Compute KL-divergence D_KL(data distribution, model_distribution). + kl_divergence = crossentropy - entropy + # kl_divergence = np.where(np.isinf(kl_divergence), np.inf, kl_divergence) # Return the computed values for ReCodEx to validate. - return entropy, crossentropy, kl_divergence + return entropy, crossentropy if np.isfinite(crossentropy) else np.inf, kl_divergence if np.isfinite(kl_divergence) else np.inf if __name__ == "__main__": diff --git a/labs/01/test.ps1 b/labs/01/test.ps1 new file mode 100644 index 0000000..1a8e7cd --- /dev/null +++ b/labs/01/test.ps1 @@ -0,0 +1,4 @@ +python3 numpy_entropy.py --data_path numpy_entropy_data_1.txt --model_path numpy_entropy_model_1.txt +python3 numpy_entropy.py --data_path numpy_entropy_data_2.txt --model_path numpy_entropy_model_2.txt +python3 numpy_entropy.py --data_path numpy_entropy_data_3.txt --model_path numpy_entropy_model_3.txt +python3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt From 372885d8db56575c9e443ddd2395f8bcd120f87d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Sun, 3 Mar 2024 15:31:05 +0100 Subject: [PATCH 03/64] Add pull.sh script to automate upstream pull --- pull.sh | 1 + 1 file changed, 1 insertion(+) create mode 100644 pull.sh diff --git a/pull.sh b/pull.sh new file mode 100644 index 0000000..9cadfe4 --- /dev/null +++ b/pull.sh @@ -0,0 +1 @@ +git pull upstream master From 161e5c90986f18b7bd1d08756883cc3fe4d03f13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 12:48:47 +0100 Subject: [PATCH 04/64] Fix reshape and compute covariance matrix in pca_first.keras.py and pca_first.py --- labs/01/pca_first.keras.py | 16 ++++++++-------- labs/01/pca_first.py | 15 ++++++++------- labs/01/run.ps1 | 1 + labs/01/test.ps1 | 2 +- 4 files changed, 18 insertions(+), 16 deletions(-) create mode 100644 labs/01/run.ps1 diff --git a/labs/01/pca_first.keras.py b/labs/01/pca_first.keras.py index 1f99e21..c81108a 100644 --- a/labs/01/pca_first.keras.py +++ b/labs/01/pca_first.keras.py @@ -32,27 +32,27 @@ def main(args: argparse.Namespace) -> tuple[float, float]: data_indices = np.random.choice(mnist.train.size, size=args.examples, replace=False) data = keras.ops.convert_to_tensor(mnist.train.data["images"][data_indices] / 255, dtype="float32") - # TODO: Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C]. + # Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C]. # We want to reshape it to [args.examples, MNIST.H * MNIST.W * MNIST.C]. # We can do so using `keras.ops.reshape(data, new_shape)` with new shape # `[data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]`. - data = ... + data = keras.ops.reshape(data, [data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]) - # TODO: Now compute mean of every feature. Use `keras.ops.mean`, and set + # Now compute mean of every feature. Use `keras.ops.mean`, and set # `axis` to zero -- therefore, the mean will be computed across the first # dimension, so across examples. - mean = ... + mean = keras.ops.mean(data, axis=0) - # TODO: Compute the covariance matrix. The covariance matrix is + # Compute the covariance matrix. The covariance matrix is # (data - mean)^T * (data - mean) / data.shape[0] # where transpose can be computed using `keras.ops.transpose` and # matrix multiplication using either Python operator @ or `keras.ops.matmul`. - cov = ... + cov = keras.ops.transpose(data-mean) @ (data-mean) / data.shape[0] - # TODO: Compute the total variance, which is the sum of the diagonal + # Compute the total variance, which is the sum of the diagonal # of the covariance matrix. To extract the diagonal use `keras.ops.diagonal`, # and to sum a tensor use `keras.ops.sum`. - total_variance = ... + total_variance = keras.ops.sum(keras.ops.diagonal(cov)) # TODO: Now run `args.iterations` of the power iteration algorithm. # Start with a vector of `cov.shape[0]` ones of type `"float32"` using `keras.ops.ones`. diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py index 2e4ef10..0300441 100644 --- a/labs/01/pca_first.py +++ b/labs/01/pca_first.py @@ -30,30 +30,31 @@ def main(args: argparse.Namespace) -> tuple[float, float]: data_indices = np.random.choice(mnist.train.size, size=args.examples, replace=False) data = torch.tensor(mnist.train.data["images"][data_indices] / 255, dtype=torch.float32) - # TODO: Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C]. + # Data has shape [args.examples, MNIST.H, MNIST.W, MNIST.C]. # We want to reshape it to [args.examples, MNIST.H * MNIST.W * MNIST.C]. # We can do so using `torch.reshape(data, new_shape)` with new shape # `[data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]`. - data = ... + data = data.reshape(args.examples, MNIST.H, MNIST.W, MNIST.C) - # TODO: Now compute mean of every feature. Use `torch.mean`, and set + # Now compute mean of every feature. Use `torch.mean`, and set # `dim` (or `axis`) argument to zero -- therefore, the mean will be # computed across the first dimension, so across examples. # # Note that for compatibility with Numpy/TF/Keras, all `dim` arguments # in PyTorch can be also called `axis`. - mean = ... + mean = torch.mean(data, dim=0) - # TODO: Compute the covariance matrix. The covariance matrix is + # Compute the covariance matrix. The covariance matrix is # (data - mean)^T * (data - mean) / data.shape[0] # where transpose can be computed using `torch.transpose` or `torch.t` and # matrix multiplication using either Python operator @ or `torch.matmul`. - cov = ... + cov = (data-mean).t @ (data-mean) / data.shape[0] + print(cov) # TODO: Compute the total variance, which is the sum of the diagonal # of the covariance matrix. To extract the diagonal use `torch.diagonal`, # and to sum a tensor use `torch.sum`. - total_variance = ... + total_variance = torch.diagonal(cov).sum() # TODO: Now run `args.iterations` of the power iteration algorithm. # Start with a vector of `cov.shape[0]` ones of type `torch.float32` using `torch.ones`. diff --git a/labs/01/run.ps1 b/labs/01/run.ps1 new file mode 100644 index 0000000..a68f5e8 --- /dev/null +++ b/labs/01/run.ps1 @@ -0,0 +1 @@ +..\..\.venv\Scripts\python .\pca_first.keras.py diff --git a/labs/01/test.ps1 b/labs/01/test.ps1 index 1a8e7cd..75ddf37 100644 --- a/labs/01/test.ps1 +++ b/labs/01/test.ps1 @@ -1,4 +1,4 @@ python3 numpy_entropy.py --data_path numpy_entropy_data_1.txt --model_path numpy_entropy_model_1.txt python3 numpy_entropy.py --data_path numpy_entropy_data_2.txt --model_path numpy_entropy_model_2.txt python3 numpy_entropy.py --data_path numpy_entropy_data_3.txt --model_path numpy_entropy_model_3.txt -python3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt +spython3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt From bfa91ea76db4b46bad8f6347720d6c5bf2163ab8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 12:48:55 +0100 Subject: [PATCH 05/64] Add .gitignore, pull.ps1, and setup.ps1 files --- .gitignore | 1 + pull.sh => pull.ps1 | 0 setup.ps1 | 1 + 3 files changed, 2 insertions(+) create mode 100644 .gitignore rename pull.sh => pull.ps1 (100%) create mode 100644 setup.ps1 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1d17dae --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.venv diff --git a/pull.sh b/pull.ps1 similarity index 100% rename from pull.sh rename to pull.ps1 diff --git a/setup.ps1 b/setup.ps1 new file mode 100644 index 0000000..8fd7b89 --- /dev/null +++ b/setup.ps1 @@ -0,0 +1 @@ +.venv/Scripts/pip install -r .\labs\requirements.txt From cba46bcf3a3bd8478cba7fe00c9d6285ede13b9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 18:35:38 +0100 Subject: [PATCH 06/64] Update team description --- labs/team_description.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/labs/team_description.py b/labs/team_description.py index 8207533..1d232bc 100644 --- a/labs/team_description.py +++ b/labs/team_description.py @@ -6,4 +6,7 @@ # # You can find out ReCodEx ID in the URL bar after navigating # to your User profile page. The ID has the following format: +# Jonas Glerup Røssum # 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 From 76d549e4b00f9a818472c4db2a3fced78e3d0745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 19:11:54 +0100 Subject: [PATCH 07/64] Solve pca_first.keras.py --- labs/01/pca_first.keras.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/labs/01/pca_first.keras.py b/labs/01/pca_first.keras.py index c81108a..028a8ad 100644 --- a/labs/01/pca_first.keras.py +++ b/labs/01/pca_first.keras.py @@ -54,17 +54,21 @@ def main(args: argparse.Namespace) -> tuple[float, float]: # and to sum a tensor use `keras.ops.sum`. total_variance = keras.ops.sum(keras.ops.diagonal(cov)) - # TODO: Now run `args.iterations` of the power iteration algorithm. + # Now run `args.iterations` of the power iteration algorithm. # Start with a vector of `cov.shape[0]` ones of type `"float32"` using `keras.ops.ones`. - v = ... + v = keras.ops.ones(cov.shape[0], dtype="float32") for i in range(args.iterations): - # TODO: In the power iteration algorithm, we compute + # In the power iteration algorithm, we compute # 1. v = cov v # The matrix-vector multiplication can be computed as regular matrix multiplication. + v = keras.ops.matmul(cov, v) + # 2. s = l2_norm(v) # The l2_norm can be computed using for example `keras.ops.norm`. + s = keras.ops.norm(v, 2) + # 3. v = v / s - pass + v = v / s # The `v` is now approximately the eigenvector of the largest eigenvalue, `s`. # We now compute the explained variance, which is the ratio of `s` and `total_variance`. From eda8cab77109bb77a371818b8cae43462d50bf07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 19:12:28 +0100 Subject: [PATCH 08/64] Specify encoding --- labs/01/numpy_entropy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labs/01/numpy_entropy.py b/labs/01/numpy_entropy.py index 6cab8aa..4855b8a 100644 --- a/labs/01/numpy_entropy.py +++ b/labs/01/numpy_entropy.py @@ -15,7 +15,7 @@ def main(args: argparse.Namespace) -> tuple[float, float, float]: # Load data distribution, each line containing a datapoint -- a string. data_map = {} - with open(args.data_path, "r") as data: + with open(args.data_path, "r", encoding="utf-8") as data: for line in data: line = line.rstrip("\n") From c84f9a3ae6d56a34933820ae2e324e6092335662 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 19:15:48 +0100 Subject: [PATCH 09/64] Add Lisa's solution --- labs/01/pca_first.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py index 0300441..d1e18ca 100644 --- a/labs/01/pca_first.py +++ b/labs/01/pca_first.py @@ -34,7 +34,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]: # We want to reshape it to [args.examples, MNIST.H * MNIST.W * MNIST.C]. # We can do so using `torch.reshape(data, new_shape)` with new shape # `[data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]`. - data = data.reshape(args.examples, MNIST.H, MNIST.W, MNIST.C) + data = torch.reshape(data, (data.shape[0], data.shape[1] * data.shape[2] * data.shape[3])) # Now compute mean of every feature. Use `torch.mean`, and set # `dim` (or `axis`) argument to zero -- therefore, the mean will be @@ -42,32 +42,34 @@ def main(args: argparse.Namespace) -> tuple[float, float]: # # Note that for compatibility with Numpy/TF/Keras, all `dim` arguments # in PyTorch can be also called `axis`. - mean = torch.mean(data, dim=0) + mean = torch.mean(data, axis=0) # Compute the covariance matrix. The covariance matrix is # (data - mean)^T * (data - mean) / data.shape[0] # where transpose can be computed using `torch.transpose` or `torch.t` and # matrix multiplication using either Python operator @ or `torch.matmul`. - cov = (data-mean).t @ (data-mean) / data.shape[0] - print(cov) + cov = torch.matmul(torch.t(data-mean), data-mean)/data.shape[0] # TODO: Compute the total variance, which is the sum of the diagonal # of the covariance matrix. To extract the diagonal use `torch.diagonal`, # and to sum a tensor use `torch.sum`. - total_variance = torch.diagonal(cov).sum() + total_variance = torch.sum(torch.diagonal(cov)).item() # TODO: Now run `args.iterations` of the power iteration algorithm. # Start with a vector of `cov.shape[0]` ones of type `torch.float32` using `torch.ones`. - v = ... + v = torch.ones(cov.shape[0], dtype=torch.float32) + for i in range(args.iterations): - # TODO: In the power iteration algorithm, we compute - # 1. v = cov v - # The matrix-vector multiplication can be computed as regular matrix multiplication - # or using `torch.mv`. - # 2. s = l2_norm(v) - # The l2_norm can be computed using for example `torch.linalg.vector_norm`. - # 3. v = v / s - pass + # TODO: In the power iteration algorithm, we compute + # 1. v = cov v + # The matrix-vector multiplication can be computed as regular matrix multiplication + # or using `torch.mv`. + # 2. s = l2_norm(v) + # The l2_norm can be computed using for example `torch.linalg.vector_norm`. + # 3. v = v / s + v = cov*v + s = torch.linalg.vector_norm(v) + v = v/s # The `v` is now approximately the eigenvector of the largest eigenvalue, `s`. # We now compute the explained variance, which is the ratio of `s` and `total_variance`. From 4d12eab7b188f4dd458c857d1e0d15e03b9cca8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 19:18:44 +0100 Subject: [PATCH 10/64] Use matrix multiplication instead of element-wise multiplication --- labs/01/pca_first.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py index d1e18ca..ade3559 100644 --- a/labs/01/pca_first.py +++ b/labs/01/pca_first.py @@ -67,7 +67,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]: # 2. s = l2_norm(v) # The l2_norm can be computed using for example `torch.linalg.vector_norm`. # 3. v = v / s - v = cov*v + v = cov @ v s = torch.linalg.vector_norm(v) v = v/s From ca7e4bd4a778c37db929fc98e89c0380b31e3775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 19:18:50 +0100 Subject: [PATCH 11/64] Fix test script --- labs/01/test.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labs/01/test.ps1 b/labs/01/test.ps1 index 75ddf37..1a8e7cd 100644 --- a/labs/01/test.ps1 +++ b/labs/01/test.ps1 @@ -1,4 +1,4 @@ python3 numpy_entropy.py --data_path numpy_entropy_data_1.txt --model_path numpy_entropy_model_1.txt python3 numpy_entropy.py --data_path numpy_entropy_data_2.txt --model_path numpy_entropy_model_2.txt python3 numpy_entropy.py --data_path numpy_entropy_data_3.txt --model_path numpy_entropy_model_3.txt -spython3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt +python3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt From 3f6bde2fffccaa7d7af275f81b567b6b1a55b81d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Tue, 5 Mar 2024 09:02:13 +0100 Subject: [PATCH 12/64] Solve mnist_layers_activations.py --- labs/01/expected.txt | 39 +++++++ labs/01/mnist.ps1 | 24 ++++ labs/01/mnist_layers_activations.py | 10 +- labs/01/output.txt | 167 ++++++++++++++++++++++++++++ 4 files changed, 239 insertions(+), 1 deletion(-) create mode 100644 labs/01/expected.txt create mode 100644 labs/01/mnist.ps1 create mode 100644 labs/01/output.txt diff --git a/labs/01/expected.txt b/labs/01/expected.txt new file mode 100644 index 0000000..fdaf786 --- /dev/null +++ b/labs/01/expected.txt @@ -0,0 +1,39 @@ +python3 mnist_layers_activations.py --hidden_layers=0 --activation=none +Epoch 1/10 accuracy: 0.7801 - loss: 0.8405 - val_accuracy: 0.9300 - val_loss: 0.2716 +Epoch 5/10 accuracy: 0.9222 - loss: 0.2792 - val_accuracy: 0.9406 - val_loss: 0.2203 +Epoch 10/10 accuracy: 0.9304 - loss: 0.2515 - val_accuracy: 0.9432 - val_loss: 0.2159 + +python3 mnist_layers_activations.py --hidden_layers=1 --activation=none +Epoch 1/10 accuracy: 0.8483 - loss: 0.5230 - val_accuracy: 0.9352 - val_loss: 0.2422 +Epoch 5/10 accuracy: 0.9236 - loss: 0.2758 - val_accuracy: 0.9360 - val_loss: 0.2325 +Epoch 10/10 accuracy: 0.9298 - loss: 0.2517 - val_accuracy: 0.9354 - val_loss: 0.2439 + +python3 mnist_layers_activations.py --hidden_layers=1 --activation=relu +Epoch 1/10 accuracy: 0.8503 - loss: 0.5286 - val_accuracy: 0.9604 - val_loss: 0.1432 +Epoch 5/10 accuracy: 0.9824 - loss: 0.0613 - val_accuracy: 0.9808 - val_loss: 0.0740 +Epoch 10/10 accuracy: 0.9948 - loss: 0.0202 - val_accuracy: 0.9788 - val_loss: 0.0821 + +python3 mnist_layers_activations.py --hidden_layers=1 --activation=tanh +Epoch 1/10 accuracy: 0.8529 - loss: 0.5183 - val_accuracy: 0.9564 - val_loss: 0.1632 +Epoch 5/10 accuracy: 0.9800 - loss: 0.0728 - val_accuracy: 0.9740 - val_loss: 0.0853 +Epoch 10/10 accuracy: 0.9948 - loss: 0.0244 - val_accuracy: 0.9782 - val_loss: 0.0772 + +python3 mnist_layers_activations.py --hidden_layers=1 --activation=sigmoid +Epoch 1/10 accuracy: 0.7851 - loss: 0.8650 - val_accuracy: 0.9414 - val_loss: 0.2196 +Epoch 5/10 accuracy: 0.9647 - loss: 0.1270 - val_accuracy: 0.9704 - val_loss: 0.1079 +Epoch 10/10 accuracy: 0.9852 - loss: 0.0583 - val_accuracy: 0.9756 - val_loss: 0.0837 + +python3 mnist_layers_activations.py --hidden_layers=3 --activation=relu +Epoch 1/10 accuracy: 0.8497 - loss: 0.5011 - val_accuracy: 0.9664 - val_loss: 0.1225 +Epoch 5/10 accuracy: 0.9862 - loss: 0.0438 - val_accuracy: 0.9734 - val_loss: 0.1026 +Epoch 10/10 accuracy: 0.9932 - loss: 0.0202 - val_accuracy: 0.9818 - val_loss: 0.0865 + +python3 mnist_layers_activations.py --hidden_layers=10 --activation=relu +Epoch 1/10 accuracy: 0.7710 - loss: 0.6793 - val_accuracy: 0.9570 - val_loss: 0.1479 +Epoch 5/10 accuracy: 0.9780 - loss: 0.0783 - val_accuracy: 0.9786 - val_loss: 0.0808 +Epoch 10/10 accuracy: 0.9869 - loss: 0.0481 - val_accuracy: 0.9724 - val_loss: 0.1163 + +python3 mnist_layers_activations.py --hidden_layers=10 --activation=sigmoid +Epoch 1/10 accuracy: 0.1072 - loss: 2.3068 - val_accuracy: 0.1784 - val_loss: 2.1247 +Epoch 5/10 accuracy: 0.8825 - loss: 0.4776 - val_accuracy: 0.9164 - val_loss: 0.3686 +Epoch 10/10 accuracy: 0.9294 - loss: 0.2994 - val_accuracy: 0.9386 - val_loss: 0.2671 diff --git a/labs/01/mnist.ps1 b/labs/01/mnist.ps1 new file mode 100644 index 0000000..a274269 --- /dev/null +++ b/labs/01/mnist.ps1 @@ -0,0 +1,24 @@ +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=0 --activation=none" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=0 --activation=none +# Write-Output "" +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=none" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=none +# Write-Output "" +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=relu" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=relu +# Write-Output "" +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=tanh" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=tanh +# Write-Output "" +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=1 --activation=sigmoid" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=1 --activation=sigmoid +# Write-Output "" +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=3 --activation=relu" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=3 --activation=relu +# Write-Output "" +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=10 --activation=relu" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=10 --activation=relu +# Write-Output "" +# Write-Output "python3 mnist_layers_activations.py --hidden_layers=10 --activation=sigmoid" +..\..\.venv\Scripts\python mnist_layers_activations.py --hidden_layers=10 --activation=sigmoid +# Write-Output "" diff --git a/labs/01/mnist_layers_activations.py b/labs/01/mnist_layers_activations.py index d58b796..0ae93ab 100644 --- a/labs/01/mnist_layers_activations.py +++ b/labs/01/mnist_layers_activations.py @@ -68,7 +68,7 @@ def main(args: argparse.Namespace) -> dict[str, float]: # Create the model model = keras.Sequential() model.add(keras.Input([MNIST.H, MNIST.W, MNIST.C])) - # TODO: Finish the model. Namely: + # Finish the model. Namely: # - start by adding a `keras.layers.Rescaling(1 / 255)` layer; # - then add a `keras.layers.Flatten()` layer; # - add `args.hidden_layers` number of fully connected hidden layers @@ -76,6 +76,14 @@ def main(args: argparse.Namespace) -> dict[str, float]: # from `args.activation`, allowing "none", "relu", "tanh", "sigmoid"; # - finally, add an output fully connected layer with `MNIST.LABELS` units # and `softmax` activation. + model.add(keras.layers.Rescaling(1 / 255)) + model.add(keras.layers.Flatten()) + + for _ in range(args.hidden_layers): + activation = None if args.activation == "none" else args.activation + model.add(keras.layers.Dense(args.hidden_layer, activation=activation)) + + model.add(keras.layers.Dense(MNIST.LABELS, activation="softmax")) model.compile( optimizer=keras.optimizers.Adam(), diff --git a/labs/01/output.txt b/labs/01/output.txt new file mode 100644 index 0000000..916c534 --- /dev/null +++ b/labs/01/output.txt @@ -0,0 +1,167 @@ +Epoch 1/10 +1100/1100 14s 12ms/step - accuracy: 0.7761 - loss: 0.8442 - val_accuracy: 0.9298 - val_loss: 0.2730 +Epoch 2/10 +1100/1100 12s 11ms/step - accuracy: 0.9057 - loss: 0.3428 - val_accuracy: 0.9336 - val_loss: 0.2418 +Epoch 3/10 +1100/1100 11s 10ms/step - accuracy: 0.9177 - loss: 0.2945 - val_accuracy: 0.9366 - val_loss: 0.2284 +Epoch 4/10 +1100/1100 12s 10ms/step - accuracy: 0.9193 - loss: 0.2839 - val_accuracy: 0.9384 - val_loss: 0.2267 +Epoch 5/10 +1100/1100 11s 10ms/step - accuracy: 0.9228 - loss: 0.2790 - val_accuracy: 0.9392 - val_loss: 0.2208 +Epoch 6/10 +1100/1100 12s 11ms/step - accuracy: 0.9244 - loss: 0.2713 - val_accuracy: 0.9440 - val_loss: 0.2162 +Epoch 7/10 +1100/1100 13s 12ms/step - accuracy: 0.9252 - loss: 0.2662 - val_accuracy: 0.9398 - val_loss: 0.2178 +Epoch 8/10 +1100/1100 14s 12ms/step - accuracy: 0.9269 - loss: 0.2626 - val_accuracy: 0.9398 - val_loss: 0.2169 +Epoch 9/10 +1100/1100 13s 12ms/step - accuracy: 0.9286 - loss: 0.2612 - val_accuracy: 0.9458 - val_loss: 0.2128 +Epoch 10/10 +1100/1100 13s 12ms/step - accuracy: 0.9307 - loss: 0.2515 - val_accuracy: 0.9438 - val_loss: 0.2161 + +Epoch 1/10 +1100/1100 15s 13ms/step - accuracy: 0.8422 - loss: 0.5383 - val_accuracy: 0.9346 - val_loss: 0.2400 +Epoch 2/10 +1100/1100 18s 17ms/step - accuracy: 0.9120 - loss: 0.3102 - val_accuracy: 0.9364 - val_loss: 0.2372 +Epoch 3/10 +1100/1100 16s 15ms/step - accuracy: 0.9233 - loss: 0.2774 - val_accuracy: 0.9352 - val_loss: 0.2342 +Epoch 4/10 +1100/1100 16s 14ms/step - accuracy: 0.9225 - loss: 0.2736 - val_accuracy: 0.9366 - val_loss: 0.2336 +Epoch 5/10 +1100/1100 15s 13ms/step - accuracy: 0.9233 - loss: 0.2760 - val_accuracy: 0.9344 - val_loss: 0.2331 +Epoch 6/10 +1100/1100 22s 20ms/step - accuracy: 0.9251 - loss: 0.2683 - val_accuracy: 0.9382 - val_loss: 0.2247 +Epoch 7/10 +1100/1100 15s 14ms/step - accuracy: 0.9261 - loss: 0.2658 - val_accuracy: 0.9356 - val_loss: 0.2367 +Epoch 8/10 +1100/1100 15s 14ms/step - accuracy: 0.9256 - loss: 0.2635 - val_accuracy: 0.9364 - val_loss: 0.2308 +Epoch 9/10 +1100/1100 15s 13ms/step - accuracy: 0.9253 - loss: 0.2625 - val_accuracy: 0.9386 - val_loss: 0.2277 +Epoch 10/10 +1100/1100 15s 13ms/step - accuracy: 0.9301 - loss: 0.2515 - val_accuracy: 0.9358 - val_loss: 0.2441 + +Epoch 1/10 +1100/1100 16s 13ms/step - accuracy: 0.8499 - loss: 0.5317 - val_accuracy: 0.9618 - val_loss: 0.1400 +Epoch 2/10 +1100/1100 15s 13ms/step - accuracy: 0.9517 - loss: 0.1637 - val_accuracy: 0.9682 - val_loss: 0.1153 +Epoch 3/10 +1100/1100 14s 13ms/step - accuracy: 0.9700 - loss: 0.1021 - val_accuracy: 0.9730 - val_loss: 0.0897 +Epoch 4/10 +1100/1100 13s 12ms/step - accuracy: 0.9774 - loss: 0.0757 - val_accuracy: 0.9754 - val_loss: 0.0835 +Epoch 5/10 +1100/1100 13s 12ms/step - accuracy: 0.9824 - loss: 0.0603 - val_accuracy: 0.9772 - val_loss: 0.0766 +Epoch 6/10 +1100/1100 14s 12ms/step - accuracy: 0.9855 - loss: 0.0486 - val_accuracy: 0.9762 - val_loss: 0.0850 +Epoch 7/10 +1100/1100 14s 13ms/step - accuracy: 0.9889 - loss: 0.0374 - val_accuracy: 0.9776 - val_loss: 0.0774 +Epoch 8/10 +1100/1100 13s 12ms/step - accuracy: 0.9901 - loss: 0.0318 - val_accuracy: 0.9786 - val_loss: 0.0765 +Epoch 9/10 +1100/1100 13s 12ms/step - accuracy: 0.9928 - loss: 0.0267 - val_accuracy: 0.9804 - val_loss: 0.0766 +Epoch 10/10 +1100/1100 14s 12ms/step - accuracy: 0.9944 - loss: 0.0208 - val_accuracy: 0.9792 - val_loss: 0.0801 + +Epoch 1/10 +1100/1100 14s 12ms/step - accuracy: 0.8468 - loss: 0.5308 - val_accuracy: 0.9594 - val_loss: 0.1591 +Epoch 2/10 +1100/1100 13s 12ms/step - accuracy: 0.9433 - loss: 0.1909 - val_accuracy: 0.9646 - val_loss: 0.1300 +Epoch 3/10 +1100/1100 13s 12ms/step - accuracy: 0.9658 - loss: 0.1235 - val_accuracy: 0.9726 - val_loss: 0.0973 +Epoch 4/10 +1100/1100 13s 12ms/step - accuracy: 0.9744 - loss: 0.0909 - val_accuracy: 0.9732 - val_loss: 0.0876 +Epoch 5/10 +1100/1100 13s 12ms/step - accuracy: 0.9798 - loss: 0.0747 - val_accuracy: 0.9788 - val_loss: 0.0770 +Epoch 6/10 +1100/1100 13s 12ms/step - accuracy: 0.9832 - loss: 0.0606 - val_accuracy: 0.9766 - val_loss: 0.0801 +Epoch 7/10 +1100/1100 13s 12ms/step - accuracy: 0.9881 - loss: 0.0460 - val_accuracy: 0.9792 - val_loss: 0.0714 +Epoch 8/10 +1100/1100 13s 12ms/step - accuracy: 0.9894 - loss: 0.0397 - val_accuracy: 0.9768 - val_loss: 0.0741 +Epoch 9/10 +1100/1100 13s 12ms/step - accuracy: 0.9923 - loss: 0.0312 - val_accuracy: 0.9796 - val_loss: 0.0709 +Epoch 10/10 +1100/1100 14s 12ms/step - accuracy: 0.9940 - loss: 0.0257 - val_accuracy: 0.9802 - val_loss: 0.0720 + +Epoch 1/10 +1100/1100 15s 13ms/step - accuracy: 0.8072 - loss: 0.8138 - val_accuracy: 0.9452 - val_loss: 0.2121 +Epoch 2/10 +1100/1100 15s 14ms/step - accuracy: 0.9241 - loss: 0.2602 - val_accuracy: 0.9570 - val_loss: 0.1663 +Epoch 3/10 +1100/1100 15s 14ms/step - accuracy: 0.9476 - loss: 0.1863 - val_accuracy: 0.9648 - val_loss: 0.1322 +Epoch 4/10 +1100/1100 14s 13ms/step - accuracy: 0.9583 - loss: 0.1490 - val_accuracy: 0.9670 - val_loss: 0.1168 +Epoch 5/10 +1100/1100 14s 13ms/step - accuracy: 0.9658 - loss: 0.1243 - val_accuracy: 0.9696 - val_loss: 0.1047 +Epoch 6/10 +1100/1100 14s 12ms/step - accuracy: 0.9706 - loss: 0.1065 - val_accuracy: 0.9718 - val_loss: 0.0975 +Epoch 7/10 +1100/1100 13s 12ms/step - accuracy: 0.9758 - loss: 0.0891 - val_accuracy: 0.9740 - val_loss: 0.0918 +Epoch 8/10 +1100/1100 13s 12ms/step - accuracy: 0.9779 - loss: 0.0792 - val_accuracy: 0.9758 - val_loss: 0.0885 +Epoch 9/10 +1100/1100 14s 13ms/step - accuracy: 0.9816 - loss: 0.0681 - val_accuracy: 0.9776 - val_loss: 0.0825 +Epoch 10/10 +1100/1100 14s 12ms/step - accuracy: 0.9852 - loss: 0.0583 - val_accuracy: 0.9766 - val_loss: 0.0831 + +Epoch 1/10 +1100/1100 16s 14ms/step - accuracy: 0.8483 - loss: 0.5002 - val_accuracy: 0.9650 - val_loss: 0.1189 +Epoch 2/10 +1100/1100 16s 14ms/step - accuracy: 0.9609 - loss: 0.1262 - val_accuracy: 0.9718 - val_loss: 0.0971 +Epoch 3/10 +1100/1100 16s 14ms/step - accuracy: 0.9759 - loss: 0.0783 - val_accuracy: 0.9772 - val_loss: 0.0690 +Epoch 4/10 +1100/1100 16s 14ms/step - accuracy: 0.9810 - loss: 0.0597 - val_accuracy: 0.9788 - val_loss: 0.0752 +Epoch 5/10 +1100/1100 15s 14ms/step - accuracy: 0.9855 - loss: 0.0468 - val_accuracy: 0.9748 - val_loss: 0.0817 +Epoch 6/10 +1100/1100 16s 14ms/step - accuracy: 0.9884 - loss: 0.0398 - val_accuracy: 0.9758 - val_loss: 0.0909 +Epoch 7/10 +1100/1100 15s 14ms/step - accuracy: 0.9898 - loss: 0.0318 - val_accuracy: 0.9724 - val_loss: 0.0998 +Epoch 8/10 +1100/1100 16s 14ms/step - accuracy: 0.9892 - loss: 0.0305 - val_accuracy: 0.9778 - val_loss: 0.0952 +Epoch 9/10 +1100/1100 16s 14ms/step - accuracy: 0.9914 - loss: 0.0267 - val_accuracy: 0.9756 - val_loss: 0.0878 +Epoch 10/10 +1100/1100 16s 15ms/step - accuracy: 0.9935 - loss: 0.0203 - val_accuracy: 0.9770 - val_loss: 0.0974 + +Epoch 1/10 +1100/1100 24s 21ms/step - accuracy: 0.7772 - loss: 0.6657 - val_accuracy: 0.9524 - val_loss: 0.1752 +Epoch 2/10 +1100/1100 24s 22ms/step - accuracy: 0.9525 - loss: 0.1705 - val_accuracy: 0.9682 - val_loss: 0.1261 +Epoch 3/10 +1100/1100 22s 20ms/step - accuracy: 0.9675 - loss: 0.1162 - val_accuracy: 0.9750 - val_loss: 0.0945 +Epoch 4/10 +1100/1100 22s 20ms/step - accuracy: 0.9735 - loss: 0.0929 - val_accuracy: 0.9720 - val_loss: 0.1018 +Epoch 5/10 +1100/1100 22s 20ms/step - accuracy: 0.9789 - loss: 0.0794 - val_accuracy: 0.9762 - val_loss: 0.0888 +Epoch 6/10 +1100/1100 22s 20ms/step - accuracy: 0.9806 - loss: 0.0729 - val_accuracy: 0.9760 - val_loss: 0.0961 +Epoch 7/10 +1100/1100 22s 20ms/step - accuracy: 0.9847 - loss: 0.0578 - val_accuracy: 0.9810 - val_loss: 0.0932 +Epoch 8/10 +1100/1100 22s 20ms/step - accuracy: 0.9824 - loss: 0.0643 - val_accuracy: 0.9786 - val_loss: 0.0854 +Epoch 9/10 +1100/1100 22s 20ms/step - accuracy: 0.9864 - loss: 0.0487 - val_accuracy: 0.9764 - val_loss: 0.1054 +Epoch 10/10 +1100/1100 22s 20ms/step - accuracy: 0.9864 - loss: 0.0493 - val_accuracy: 0.9780 - val_loss: 0.1108 + +Epoch 1/10 +1100/1100 23s 20ms/step - accuracy: 0.1052 - loss: 2.3130 - val_accuracy: 0.1808 - val_loss: 1.9383 +Epoch 2/10 +1100/1100 22s 20ms/step - accuracy: 0.2002 - loss: 1.9364 - val_accuracy: 0.2168 - val_loss: 1.8587 +Epoch 3/10 +1100/1100 23s 20ms/step - accuracy: 0.2161 - loss: 1.8392 - val_accuracy: 0.5588 - val_loss: 1.2106 +Epoch 4/10 +1100/1100 22s 20ms/step - accuracy: 0.5594 - loss: 1.1159 - val_accuracy: 0.8168 - val_loss: 0.7119 +Epoch 5/10 +1100/1100 22s 20ms/step - accuracy: 0.8359 - loss: 0.6312 - val_accuracy: 0.8994 - val_loss: 0.4360 +Epoch 6/10 +1100/1100 22s 20ms/step - accuracy: 0.8827 - loss: 0.4854 - val_accuracy: 0.9066 - val_loss: 0.4053 +Epoch 7/10 +1100/1100 22s 20ms/step - accuracy: 0.9007 - loss: 0.4218 - val_accuracy: 0.9166 - val_loss: 0.3660 +Epoch 8/10 +1100/1100 22s 20ms/step - accuracy: 0.9075 - loss: 0.3940 - val_accuracy: 0.9204 - val_loss: 0.3552 +Epoch 9/10 +1100/1100 22s 20ms/step - accuracy: 0.9090 - loss: 0.3922 - val_accuracy: 0.9242 - val_loss: 0.3356 +Epoch 10/10 +1100/1100 24s 22ms/step - accuracy: 0.9191 - loss: 0.3534 - val_accuracy: 0.9270 - val_loss: 0.3286 From 451cb9ee46ed38d56301c64d115205f837f5f0c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Tue, 5 Mar 2024 10:29:20 +0100 Subject: [PATCH 13/64] Add team description to all files --- labs/01/mnist_layers_activations.py | 5 +++++ labs/01/numpy_entropy.py | 6 ++++++ labs/01/pca_first.keras.py | 5 +++++ labs/01/pca_first.py | 5 +++++ 4 files changed, 21 insertions(+) diff --git a/labs/01/mnist_layers_activations.py b/labs/01/mnist_layers_activations.py index 0ae93ab..bf78be2 100644 --- a/labs/01/mnist_layers_activations.py +++ b/labs/01/mnist_layers_activations.py @@ -10,6 +10,11 @@ from mnist import MNIST +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 + parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--activation", default="none", choices=["none", "relu", "tanh", "sigmoid"], help="Activation.") diff --git a/labs/01/numpy_entropy.py b/labs/01/numpy_entropy.py index 4855b8a..4203a24 100644 --- a/labs/01/numpy_entropy.py +++ b/labs/01/numpy_entropy.py @@ -1,4 +1,10 @@ #!/usr/bin/env python3 + +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 + import argparse import numpy as np diff --git a/labs/01/pca_first.keras.py b/labs/01/pca_first.keras.py index 028a8ad..0632b22 100644 --- a/labs/01/pca_first.keras.py +++ b/labs/01/pca_first.keras.py @@ -9,6 +9,11 @@ from mnist import MNIST +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 + parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--examples", default=256, type=int, help="MNIST examples to use.") diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py index ade3559..deecf06 100644 --- a/labs/01/pca_first.py +++ b/labs/01/pca_first.py @@ -7,6 +7,11 @@ from mnist import MNIST +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 + parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--examples", default=256, type=int, help="MNIST examples to use.") From 558f9f4306edb6a86fd55347d2450af1fe06cdf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Tue, 12 Mar 2024 08:36:16 +0100 Subject: [PATCH 14/64] Update repo setup --- .gitignore | 4 +++- .venv/pyvenv.cfg | 3 +++ setup.ps1 | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 .venv/pyvenv.cfg diff --git a/.gitignore b/.gitignore index 1d17dae..0fb63b8 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -.venv +**/.venv/Lib +**/.venv/Scripts +**/.venv/share diff --git a/.venv/pyvenv.cfg b/.venv/pyvenv.cfg new file mode 100644 index 0000000..e129fd0 --- /dev/null +++ b/.venv/pyvenv.cfg @@ -0,0 +1,3 @@ +home = C:\Python310 +include-system-site-packages = false +version = 3.10.7 diff --git a/setup.ps1 b/setup.ps1 index 8fd7b89..f1f7bbe 100644 --- a/setup.ps1 +++ b/setup.ps1 @@ -1 +1,6 @@ +git remote rename origin upstream +git remote add origin git@github.com:joglr/npfl138.git +git fetch +git checkout master +python -m venv .venv .venv/Scripts/pip install -r .\labs\requirements.txt From 55c07e2302238b86ea02f5ca33f761d8310c8b01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Tue, 5 Mar 2024 19:34:00 +0100 Subject: [PATCH 15/64] Solve sgd_backpropagation --- .gitignore | 2 ++ labs/02/sgd_backpropagation.ps1 | 50 ++++++++++++++++++++++++++ labs/02/sgd_backpropagation.py | 62 +++++++++++++++++++++------------ 3 files changed, 92 insertions(+), 22 deletions(-) create mode 100644 labs/02/sgd_backpropagation.ps1 diff --git a/.gitignore b/.gitignore index 0fb63b8..32199d0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ **/.venv/Lib **/.venv/Scripts **/.venv/share +logs/ +mnist.npz diff --git a/labs/02/sgd_backpropagation.ps1 b/labs/02/sgd_backpropagation.ps1 new file mode 100644 index 0000000..f613710 --- /dev/null +++ b/labs/02/sgd_backpropagation.ps1 @@ -0,0 +1,50 @@ +# Examples: +# ../../.venv/Scripts/python sgd_backpropagation.py --batch_size=64 --hidden_layer=20 --learning_rate=0.1 +# Dev accuracy after epoch 1 is 93.30 +# Dev accuracy after epoch 2 is 94.38 +# Dev accuracy after epoch 3 is 95.16 +# Dev accuracy after epoch 4 is 95.50 +# Dev accuracy after epoch 5 is 95.96 +# Dev accuracy after epoch 6 is 96.04 +# Dev accuracy after epoch 7 is 95.82 +# Dev accuracy after epoch 8 is 95.92 +# Dev accuracy after epoch 9 is 95.96 +# Dev accuracy after epoch 10 is 96.16 +# Test accuracy after epoch 10 is 95.26 + +# ../../.venv/Scripts/python sgd_backpropagation.py --batch_size=100 --hidden_layer=32 --learning_rate=0.2 +# Dev accuracy after epoch 1 is 93.64 +# Dev accuracy after epoch 2 is 94.80 +# Dev accuracy after epoch 3 is 95.56 +# Dev accuracy after epoch 4 is 95.98 +# Dev accuracy after epoch 5 is 96.24 +# Dev accuracy after epoch 6 is 96.74 +# Dev accuracy after epoch 7 is 96.52 +# Dev accuracy after epoch 8 is 96.54 +# Dev accuracy after epoch 9 is 97.04 +# Dev accuracy after epoch 10 is 97.02 +# Test accuracy after epoch 10 is 96.16 + +# Tests: +../../.venv/Scripts/python sgd_backpropagation.py --epochs=2 --batch_size=64 --hidden_layer=20 --learning_rate=0.1 +# Expected +# Dev accuracy after epoch 1 is 93.30 +# Dev accuracy after epoch 2 is 94.38 +# Test accuracy after epoch 2 is 93.15 + +# Actual +# Dev accuracy after epoch 1 is 92.98 +# Dev accuracy after epoch 2 is 93.98 +# Test accuracy after epoch 2 is 92.73 + + +../../.venv/Scripts/python sgd_backpropagation.py --epochs=2 --batch_size=100 --hidden_layer=32 --learning_rate=0.2 +# Expected: +# Dev accuracy after epoch 1 is 93.64 +# Dev accuracy after epoch 2 is 94.80 +# Test accuracy after epoch 2 is 93.54 + +# Actual: +# Dev accuracy after epoch 1 is 94.16 +# Dev accuracy after epoch 2 is 94.98 +# Test accuracy after epoch 2 is 93.56 diff --git a/labs/02/sgd_backpropagation.py b/labs/02/sgd_backpropagation.py index cff312a..1b6eebd 100644 --- a/labs/02/sgd_backpropagation.py +++ b/labs/02/sgd_backpropagation.py @@ -35,24 +35,41 @@ def __init__(self, args: argparse.Namespace) -> None: ) self._b1 = keras.Variable(keras.ops.zeros([args.hidden_layer]), trainable=True) - # TODO: Create variables: + # Create variables: # - _W2, which is a trainable variable of size `[args.hidden_layer, MNIST.LABELS]`, # initialized to `keras.random.normal` value `with stddev=0.1` and `seed=args.seed`, # - _b2, which is a trainable variable of size `[MNIST.LABELS]` initialized to zeros - ... + self._W2 = keras.Variable(keras.random.normal([args.hidden_layer, MNIST.LABELS], stddev=0.1, seed=args.seed), trainable=True) + + self._b2 = keras.Variable(keras.ops.zeros([MNIST.LABELS]), trainable=True) def predict(self, inputs: torch.Tensor) -> torch.Tensor: - # TODO: Define the computation of the network. Notably: + # Define the computation of the network. Notably: # - start by casting the input byte image to `float32` with `keras.ops.cast` + + cast_inputs = keras.ops.cast(inputs, dtype="float32") + # - then divide the tensor by 255 to normalize it to the `[0, 1]` range + + normalized_inputs = cast_inputs / 255 + # - then reshape it to the shape `[inputs.shape[0], -1]`. # The -1 is a wildcard which is computed so that the number # of elements before and after the reshape is preserved. + + reshaped_inputs = keras.ops.reshape(normalized_inputs, [inputs.shape[0], -1]) + # - then multiply it by `self._W1` and then add `self._b1` # - apply `keras.ops.tanh` + + hidden_layer_output = keras.ops.tanh(keras.ops.matmul(reshaped_inputs, self._W1) + self._b1) + # - multiply the result by `self._W2` and then add `self._b2` + + hidden_layer_output = keras.ops.matmul(hidden_layer_output, self._W2) + self._b2 + # - finally apply `keras.ops.softmax` and return the result - return ... + return keras.ops.softmax(hidden_layer_output) def train_epoch(self, dataset: MNIST.Dataset) -> None: for batch in dataset.batches(self._args.batch_size): @@ -62,48 +79,48 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None: # Size of the batch is `self._args.batch_size`, except for the last, which # might be smaller. - # TODO: Compute the predicted probabilities of the batch images using `self.predict` - probabilities = ... + # Compute the predicted probabilities of the batch images using `self.predict` + probabilities = self.predict(batch["images"]) - # TODO: Manually compute the loss: + # Manually compute the loss: # - For every batch example, the loss is the categorical crossentropy of the # predicted probabilities and the gold label. To compute the crossentropy, you can # - either use `keras.ops.one_hot` to obtain one-hot encoded gold labels, # - or suitably use `keras.ops.take_along_axis` to "index" the predicted probabilities. # - Finally, compute the average across the batch examples. - loss = ... - + loss = -keras.ops.mean(keras.ops.sum(keras.ops.one_hot(batch["labels"], MNIST.LABELS) * keras.ops.log(probabilities), axis=-1)) # We create a list of all variables. Note that a `keras.Model/Layer` automatically # tracks owned variables, so we could also use `self.trainable_variables` # (or even `self.variables`, which is useful for loading/saving). variables = [self._W1, self._b1, self._W2, self._b2] - # TODO: Compute the gradient of the loss with respect to variables using + # Compute the gradient of the loss with respect to variables using # backpropagation algorithm by # - first resetting the gradients of all variables to zero with `self.zero_grad()`, # - then calling `loss.backward()`. - ... + self.zero_grad() + loss.backward() gradients = [variable.value.grad for variable in variables] with torch.no_grad(): for variable, gradient in zip(variables, gradients): - # TODO: Perform the SGD update with learning rate `self._args.learning_rate` + # Perform the SGD update with learning rate `self._args.learning_rate` # for the variable and computed gradient. You can modify the # variable value with `variable.assign` or in this case the more # efficient `variable.assign_sub`. - ... + variable.assign_sub(self._args.learning_rate * gradient) def evaluate(self, dataset: MNIST.Dataset) -> float: # Compute the accuracy of the model prediction correct = 0 for batch in dataset.batches(self._args.batch_size): - # TODO: Compute the probabilities of the batch images using `self.predict` + # Compute the probabilities of the batch images using `self.predict` # and convert them to Numpy with `keras.ops.convert_to_numpy`. - probabilities = ... + probabilities = keras.ops.convert_to_numpy(self.predict(batch["images"])) - # TODO: Evaluate how many batch examples were predicted + # Evaluate how many batch examples were predicted # correctly and increase `correct` variable accordingly. - correct += ... + correct += np.sum(np.argmax(probabilities, axis=-1) == batch["labels"]) return correct / dataset.size @@ -132,15 +149,16 @@ def main(args: argparse.Namespace) -> tuple[float, float]: model = Model(args) for epoch in range(args.epochs): - # TODO: Run the `train_epoch` with `mnist.train` dataset + # Run the `train_epoch` with `mnist.train` dataset + model.train_epoch(mnist.train) - # TODO: Evaluate the dev data using `evaluate` on `mnist.dev` dataset - accuracy = ... + # Evaluate the dev data using `evaluate` on `mnist.dev` dataset + accuracy = model.evaluate(mnist.dev) print("Dev accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * accuracy), flush=True) writer.add_scalar("dev/accuracy", 100 * accuracy, epoch + 1) - # TODO: Evaluate the test data using `evaluate` on `mnist.test` dataset - test_accuracy = ... + # Evaluate the test data using `evaluate` on `mnist.test` dataset + test_accuracy = model.evaluate(mnist.test) print("Test accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * test_accuracy), flush=True) writer.add_scalar("test/accuracy", 100 * test_accuracy, epoch + 1) From 732d7d540612facb1a55227fd93c4650eb733f76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Tue, 12 Mar 2024 14:53:06 +0100 Subject: [PATCH 16/64] The average score was 423.7. --- labs/02/gym_cartpole.py | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py index 7befc72..8ace3c5 100644 --- a/labs/02/gym_cartpole.py +++ b/labs/02/gym_cartpole.py @@ -17,8 +17,8 @@ parser.add_argument("--seed", default=42, type=int, help="Random seed.") parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") # If you add more arguments, ReCodEx will keep them with your default values. -parser.add_argument("--batch_size", default=..., type=int, help="Batch size.") -parser.add_argument("--epochs", default=..., type=int, help="Number of epochs.") +parser.add_argument("--batch_size", default=5, type=int, help="Batch size.") +parser.add_argument("--epochs", default=20, type=int, help="Number of epochs.") parser.add_argument("--model", default="gym_cartpole_model.keras", type=str, help="Output model path.") @@ -49,7 +49,7 @@ def on_epoch_end(self, epoch, logs=None): def evaluate_model( model: keras.Model, seed: int = 42, episodes: int = 100, render: bool = False, report_per_episode: bool = False -) -> float: + ) -> float: """Evaluate the given model on CartPole-v1 environment. Returns the average score achieved on the given number of episodes. @@ -86,16 +86,10 @@ def evaluate_model( def main(args: argparse.Namespace) -> keras.Model | None: # Set the random seed and the number of threads. keras.utils.set_random_seed(args.seed) - if args.threads: - torch.set_num_threads(args.threads) - torch.set_num_interop_threads(args.threads) + torch.set_num_threads(args.threads) + torch.set_num_interop_threads(args.threads) if not args.evaluate: - if args.batch_size is ...: - raise ValueError("You must specify the batch size, either in the defaults or on the command line.") - if args.epochs is ...: - raise ValueError("You must specify the number of epochs, either in the defaults or on the command line.") - # Create logdir name args.logdir = os.path.join("logs", "{}-{}-{}".format( os.path.basename(globals().get("__file__", "notebook")), @@ -106,15 +100,36 @@ def main(args: argparse.Namespace) -> keras.Model | None: # Load the data data = np.loadtxt("gym_cartpole_data.txt") observations, labels = data[:, :-1], data[:, -1].astype(np.int32) + print("data shape:", observations.shape, "label shape:", labels.shape) + + # TODO: Create the model in the `model` variable. Note that # the model can perform any of: # - binary classification with 1 output and sigmoid activation; # - two-class classification with 2 outputs and softmax activation. - model = ... + + # Convert the labels to one-hot encoding + labels = keras.ops.one_hot(labels, num_classes=2) + + model = keras.Sequential(name="gym_model", layers=[ + # Input layer + keras.layers.Input(shape=(observations.shape[1],)), + # Hidden layers + keras.layers.Dense(8, activation="tanh"), + # Output layer + keras.layers.Dense(2, activation="softmax"), # 2 outputs because we have 2 actions in the cart pole problem + ]) + + + model.summary() # TODO: Prepare the model for training using the `model.compile` method. - model.compile(...) + model.compile( + loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.1), + optimizer=keras.optimizers.Adam(learning_rate=0.01), + metrics=["accuracy"], + ) tb_callback = TorchTensorBoardCallback(args.logdir) model.fit(observations, labels, batch_size=args.batch_size, epochs=args.epochs, callbacks=[tb_callback]) From 7d362488cd4e86697420800b0d5abdc8b698c449 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Tue, 12 Mar 2024 15:15:17 +0100 Subject: [PATCH 17/64] The average score was 457.23. --- labs/02/gym_cartpole.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py index 8ace3c5..8553b65 100644 --- a/labs/02/gym_cartpole.py +++ b/labs/02/gym_cartpole.py @@ -127,7 +127,7 @@ def main(args: argparse.Namespace) -> keras.Model | None: # TODO: Prepare the model for training using the `model.compile` method. model.compile( loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.1), - optimizer=keras.optimizers.Adam(learning_rate=0.01), + optimizer=keras.optimizers.Adam(learning_rate=0.005), metrics=["accuracy"], ) From ed8a2a0c17cf07203203f0c1cb8e2709c99162f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Tue, 12 Mar 2024 15:20:49 +0100 Subject: [PATCH 18/64] The average score was 465.86. --- labs/02/gym_cartpole.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py index 8553b65..d7661b2 100644 --- a/labs/02/gym_cartpole.py +++ b/labs/02/gym_cartpole.py @@ -127,7 +127,7 @@ def main(args: argparse.Namespace) -> keras.Model | None: # TODO: Prepare the model for training using the `model.compile` method. model.compile( loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.1), - optimizer=keras.optimizers.Adam(learning_rate=0.005), + optimizer=keras.optimizers.Adam(learning_rate=0.02), metrics=["accuracy"], ) From be3273074f010712761bf1e8a11ec4fe5c52f27a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Tue, 12 Mar 2024 15:25:08 +0100 Subject: [PATCH 19/64] The average score was 490.01. --- labs/02/gym_cartpole.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py index d7661b2..328b805 100644 --- a/labs/02/gym_cartpole.py +++ b/labs/02/gym_cartpole.py @@ -9,6 +9,11 @@ import numpy as np import torch +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 + parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--evaluate", default=False, action="store_true", help="Evaluate the given model") @@ -18,7 +23,7 @@ parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") # If you add more arguments, ReCodEx will keep them with your default values. parser.add_argument("--batch_size", default=5, type=int, help="Batch size.") -parser.add_argument("--epochs", default=20, type=int, help="Number of epochs.") +parser.add_argument("--epochs", default=100, type=int, help="Number of epochs.") parser.add_argument("--model", default="gym_cartpole_model.keras", type=str, help="Output model path.") @@ -127,7 +132,7 @@ def main(args: argparse.Namespace) -> keras.Model | None: # TODO: Prepare the model for training using the `model.compile` method. model.compile( loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.1), - optimizer=keras.optimizers.Adam(learning_rate=0.02), + optimizer=keras.optimizers.Adam(learning_rate=0.03), metrics=["accuracy"], ) From 01f0bdea9cc6cb91303faf867e6382eb570af81c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Tue, 12 Mar 2024 15:38:13 +0100 Subject: [PATCH 20/64] The average score was 491.41. --- labs/02/gym_cartpole.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py index 328b805..935fbf9 100644 --- a/labs/02/gym_cartpole.py +++ b/labs/02/gym_cartpole.py @@ -22,7 +22,7 @@ parser.add_argument("--seed", default=42, type=int, help="Random seed.") parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") # If you add more arguments, ReCodEx will keep them with your default values. -parser.add_argument("--batch_size", default=5, type=int, help="Batch size.") +parser.add_argument("--batch_size", default=10, type=int, help="Batch size.") parser.add_argument("--epochs", default=100, type=int, help="Number of epochs.") parser.add_argument("--model", default="gym_cartpole_model.keras", type=str, help="Output model path.") @@ -132,7 +132,7 @@ def main(args: argparse.Namespace) -> keras.Model | None: # TODO: Prepare the model for training using the `model.compile` method. model.compile( loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.1), - optimizer=keras.optimizers.Adam(learning_rate=0.03), + optimizer=keras.optimizers.Adam(learning_rate=0.02), metrics=["accuracy"], ) From b57af982675879a6d0df96d8bce23ed5e1f78957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Tue, 12 Mar 2024 15:40:10 +0100 Subject: [PATCH 21/64] Add test script --- labs/02/test.ps1 | 1 + 1 file changed, 1 insertion(+) create mode 100644 labs/02/test.ps1 diff --git a/labs/02/test.ps1 b/labs/02/test.ps1 new file mode 100644 index 0000000..fa38f74 --- /dev/null +++ b/labs/02/test.ps1 @@ -0,0 +1 @@ +../../.venv/Scripts/python .\gym_cartpole.py && ../../.venv/Scripts/python .\gym_cartpole.py --evaluate From d0ad9b98def3fb7a71309950d2d75f3abd53c4dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Tue, 12 Mar 2024 15:50:58 +0100 Subject: [PATCH 22/64] The average score was 498.73. --- labs/02/gym_cartpole.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py index 935fbf9..5d191fa 100644 --- a/labs/02/gym_cartpole.py +++ b/labs/02/gym_cartpole.py @@ -132,7 +132,7 @@ def main(args: argparse.Namespace) -> keras.Model | None: # TODO: Prepare the model for training using the `model.compile` method. model.compile( loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.1), - optimizer=keras.optimizers.Adam(learning_rate=0.02), + optimizer=keras.optimizers.Adam(learning_rate=0.009), metrics=["accuracy"], ) From 83f390a548c87b5c5053c60d834dd69863d1b8b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Tue, 12 Mar 2024 16:14:06 +0100 Subject: [PATCH 23/64] Refactor loss calculation in Model class --- labs/02/sgd_backpropagation.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/labs/02/sgd_backpropagation.py b/labs/02/sgd_backpropagation.py index 1b6eebd..ad65784 100644 --- a/labs/02/sgd_backpropagation.py +++ b/labs/02/sgd_backpropagation.py @@ -12,6 +12,11 @@ from mnist import MNIST +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 + parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--batch_size", default=50, type=int, help="Batch size.") @@ -88,7 +93,7 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None: # - either use `keras.ops.one_hot` to obtain one-hot encoded gold labels, # - or suitably use `keras.ops.take_along_axis` to "index" the predicted probabilities. # - Finally, compute the average across the batch examples. - loss = -keras.ops.mean(keras.ops.sum(keras.ops.one_hot(batch["labels"], MNIST.LABELS) * keras.ops.log(probabilities), axis=-1)) + loss = keras.ops.mean(keras.ops.categorical_crossentropy(keras.ops.one_hot(batch["labels"], MNIST.LABELS), probabilities)) # We create a list of all variables. Note that a `keras.Model/Layer` automatically # tracks owned variables, so we could also use `self.trainable_variables` # (or even `self.variables`, which is useful for loading/saving). From df4da95e2a896afa8a662dab794393dc614d867f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Tue, 12 Mar 2024 16:16:10 +0100 Subject: [PATCH 24/64] Add .venv/Include to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 32199d0..a203ee2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ **/.venv/Lib **/.venv/Scripts +**/.venv/Include **/.venv/share logs/ mnist.npz From 80063fd468a1105d81511a5b9d7ca2579c0fec28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Wed, 28 Feb 2024 14:06:13 +0100 Subject: [PATCH 25/64] Update user id --- labs/team_description.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/labs/team_description.py b/labs/team_description.py index 1d232bc..8207533 100644 --- a/labs/team_description.py +++ b/labs/team_description.py @@ -6,7 +6,4 @@ # # You can find out ReCodEx ID in the URL bar after navigating # to your User profile page. The ID has the following format: -# Jonas Glerup Røssum # 31a0a96a-c590-4486-b194-f72765b2ce25 -# Xiao Wang -# 91d4d1d7-b800-4765-96b9-df098ac36a66 From 3efc547fd5bc4d881861f080d3ac25234bfa9618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Sun, 3 Mar 2024 15:28:32 +0100 Subject: [PATCH 26/64] Solve numpy_entropy --- labs/01/numpy_entropy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labs/01/numpy_entropy.py b/labs/01/numpy_entropy.py index 4203a24..7ec2359 100644 --- a/labs/01/numpy_entropy.py +++ b/labs/01/numpy_entropy.py @@ -21,7 +21,7 @@ def main(args: argparse.Namespace) -> tuple[float, float, float]: # Load data distribution, each line containing a datapoint -- a string. data_map = {} - with open(args.data_path, "r", encoding="utf-8") as data: + with open(args.data_path, "r") as data: for line in data: line = line.rstrip("\n") From db3272482d33703e5b19c8a5f7e4c2f1295c5cfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Sun, 3 Mar 2024 15:31:05 +0100 Subject: [PATCH 27/64] Add pull.sh script to automate upstream pull --- pull.sh | 1 + 1 file changed, 1 insertion(+) create mode 100644 pull.sh diff --git a/pull.sh b/pull.sh new file mode 100644 index 0000000..9cadfe4 --- /dev/null +++ b/pull.sh @@ -0,0 +1 @@ +git pull upstream master From abb43202e7d1e78773195136833fe92f6b9cab3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 12:48:47 +0100 Subject: [PATCH 28/64] Fix reshape and compute covariance matrix in pca_first.keras.py and pca_first.py --- labs/01/pca_first.py | 9 +++++---- labs/01/test.ps1 | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py index deecf06..263a458 100644 --- a/labs/01/pca_first.py +++ b/labs/01/pca_first.py @@ -39,7 +39,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]: # We want to reshape it to [args.examples, MNIST.H * MNIST.W * MNIST.C]. # We can do so using `torch.reshape(data, new_shape)` with new shape # `[data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]`. - data = torch.reshape(data, (data.shape[0], data.shape[1] * data.shape[2] * data.shape[3])) + data = data.reshape(args.examples, MNIST.H, MNIST.W, MNIST.C) # Now compute mean of every feature. Use `torch.mean`, and set # `dim` (or `axis`) argument to zero -- therefore, the mean will be @@ -47,18 +47,19 @@ def main(args: argparse.Namespace) -> tuple[float, float]: # # Note that for compatibility with Numpy/TF/Keras, all `dim` arguments # in PyTorch can be also called `axis`. - mean = torch.mean(data, axis=0) + mean = torch.mean(data, dim=0) # Compute the covariance matrix. The covariance matrix is # (data - mean)^T * (data - mean) / data.shape[0] # where transpose can be computed using `torch.transpose` or `torch.t` and # matrix multiplication using either Python operator @ or `torch.matmul`. - cov = torch.matmul(torch.t(data-mean), data-mean)/data.shape[0] + cov = (data-mean).t @ (data-mean) / data.shape[0] + print(cov) # TODO: Compute the total variance, which is the sum of the diagonal # of the covariance matrix. To extract the diagonal use `torch.diagonal`, # and to sum a tensor use `torch.sum`. - total_variance = torch.sum(torch.diagonal(cov)).item() + total_variance = torch.diagonal(cov).sum() # TODO: Now run `args.iterations` of the power iteration algorithm. # Start with a vector of `cov.shape[0]` ones of type `torch.float32` using `torch.ones`. diff --git a/labs/01/test.ps1 b/labs/01/test.ps1 index 1a8e7cd..75ddf37 100644 --- a/labs/01/test.ps1 +++ b/labs/01/test.ps1 @@ -1,4 +1,4 @@ python3 numpy_entropy.py --data_path numpy_entropy_data_1.txt --model_path numpy_entropy_model_1.txt python3 numpy_entropy.py --data_path numpy_entropy_data_2.txt --model_path numpy_entropy_model_2.txt python3 numpy_entropy.py --data_path numpy_entropy_data_3.txt --model_path numpy_entropy_model_3.txt -python3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt +spython3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt From 63db7e38a68f34f0cab8828beeba0c59027bc4e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 12:48:55 +0100 Subject: [PATCH 29/64] Add .gitignore, pull.ps1, and setup.ps1 files --- .gitignore | 7 +------ pull.sh | 1 - setup.ps1 | 5 ----- 3 files changed, 1 insertion(+), 12 deletions(-) delete mode 100644 pull.sh diff --git a/.gitignore b/.gitignore index a203ee2..1d17dae 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1 @@ -**/.venv/Lib -**/.venv/Scripts -**/.venv/Include -**/.venv/share -logs/ -mnist.npz +.venv diff --git a/pull.sh b/pull.sh deleted file mode 100644 index 9cadfe4..0000000 --- a/pull.sh +++ /dev/null @@ -1 +0,0 @@ -git pull upstream master diff --git a/setup.ps1 b/setup.ps1 index f1f7bbe..8fd7b89 100644 --- a/setup.ps1 +++ b/setup.ps1 @@ -1,6 +1 @@ -git remote rename origin upstream -git remote add origin git@github.com:joglr/npfl138.git -git fetch -git checkout master -python -m venv .venv .venv/Scripts/pip install -r .\labs\requirements.txt From 0aa170f18cb876ac6d443df03889db9e8b9c1e5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 18:35:38 +0100 Subject: [PATCH 30/64] Update team description --- labs/team_description.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/labs/team_description.py b/labs/team_description.py index 8207533..1d232bc 100644 --- a/labs/team_description.py +++ b/labs/team_description.py @@ -6,4 +6,7 @@ # # You can find out ReCodEx ID in the URL bar after navigating # to your User profile page. The ID has the following format: +# Jonas Glerup Røssum # 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 From 90fe7a309ec94ccfa9fc7c76ae45abb7692757a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 19:12:28 +0100 Subject: [PATCH 31/64] Specify encoding --- labs/01/numpy_entropy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labs/01/numpy_entropy.py b/labs/01/numpy_entropy.py index 7ec2359..4203a24 100644 --- a/labs/01/numpy_entropy.py +++ b/labs/01/numpy_entropy.py @@ -21,7 +21,7 @@ def main(args: argparse.Namespace) -> tuple[float, float, float]: # Load data distribution, each line containing a datapoint -- a string. data_map = {} - with open(args.data_path, "r") as data: + with open(args.data_path, "r", encoding="utf-8") as data: for line in data: line = line.rstrip("\n") From 4b84b2f4d55c6d908be6e2c631282b1f8287f5e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 19:15:48 +0100 Subject: [PATCH 32/64] Add Lisa's solution --- labs/01/pca_first.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py index 263a458..e2b58aa 100644 --- a/labs/01/pca_first.py +++ b/labs/01/pca_first.py @@ -39,7 +39,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]: # We want to reshape it to [args.examples, MNIST.H * MNIST.W * MNIST.C]. # We can do so using `torch.reshape(data, new_shape)` with new shape # `[data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]`. - data = data.reshape(args.examples, MNIST.H, MNIST.W, MNIST.C) + data = torch.reshape(data, (data.shape[0], data.shape[1] * data.shape[2] * data.shape[3])) # Now compute mean of every feature. Use `torch.mean`, and set # `dim` (or `axis`) argument to zero -- therefore, the mean will be @@ -47,19 +47,18 @@ def main(args: argparse.Namespace) -> tuple[float, float]: # # Note that for compatibility with Numpy/TF/Keras, all `dim` arguments # in PyTorch can be also called `axis`. - mean = torch.mean(data, dim=0) + mean = torch.mean(data, axis=0) # Compute the covariance matrix. The covariance matrix is # (data - mean)^T * (data - mean) / data.shape[0] # where transpose can be computed using `torch.transpose` or `torch.t` and # matrix multiplication using either Python operator @ or `torch.matmul`. - cov = (data-mean).t @ (data-mean) / data.shape[0] - print(cov) + cov = torch.matmul(torch.t(data-mean), data-mean)/data.shape[0] # TODO: Compute the total variance, which is the sum of the diagonal # of the covariance matrix. To extract the diagonal use `torch.diagonal`, # and to sum a tensor use `torch.sum`. - total_variance = torch.diagonal(cov).sum() + total_variance = torch.sum(torch.diagonal(cov)).item() # TODO: Now run `args.iterations` of the power iteration algorithm. # Start with a vector of `cov.shape[0]` ones of type `torch.float32` using `torch.ones`. @@ -73,7 +72,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]: # 2. s = l2_norm(v) # The l2_norm can be computed using for example `torch.linalg.vector_norm`. # 3. v = v / s - v = cov @ v + v = cov*v s = torch.linalg.vector_norm(v) v = v/s From 7955c92bdff5f339d160c16f6837117775812999 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 19:18:44 +0100 Subject: [PATCH 33/64] Use matrix multiplication instead of element-wise multiplication --- labs/01/pca_first.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py index e2b58aa..deecf06 100644 --- a/labs/01/pca_first.py +++ b/labs/01/pca_first.py @@ -72,7 +72,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]: # 2. s = l2_norm(v) # The l2_norm can be computed using for example `torch.linalg.vector_norm`. # 3. v = v / s - v = cov*v + v = cov @ v s = torch.linalg.vector_norm(v) v = v/s From 68a5439aad9a603fef76dd63e9b9b6fcd6083ab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 19:18:50 +0100 Subject: [PATCH 34/64] Fix test script --- labs/01/test.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labs/01/test.ps1 b/labs/01/test.ps1 index 75ddf37..1a8e7cd 100644 --- a/labs/01/test.ps1 +++ b/labs/01/test.ps1 @@ -1,4 +1,4 @@ python3 numpy_entropy.py --data_path numpy_entropy_data_1.txt --model_path numpy_entropy_model_1.txt python3 numpy_entropy.py --data_path numpy_entropy_data_2.txt --model_path numpy_entropy_model_2.txt python3 numpy_entropy.py --data_path numpy_entropy_data_3.txt --model_path numpy_entropy_model_3.txt -spython3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt +python3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt From 6c10a616d7800ab05609360833cbbd7dcce17980 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Tue, 12 Mar 2024 08:36:16 +0100 Subject: [PATCH 35/64] Update repo setup --- .gitignore | 4 +++- setup.ps1 | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1d17dae..0fb63b8 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -.venv +**/.venv/Lib +**/.venv/Scripts +**/.venv/share diff --git a/setup.ps1 b/setup.ps1 index 8fd7b89..f1f7bbe 100644 --- a/setup.ps1 +++ b/setup.ps1 @@ -1 +1,6 @@ +git remote rename origin upstream +git remote add origin git@github.com:joglr/npfl138.git +git fetch +git checkout master +python -m venv .venv .venv/Scripts/pip install -r .\labs\requirements.txt From 3dcc91edcd41c7df70866d8b860e3c514916d18a Mon Sep 17 00:00:00 2001 From: lizawang <56673986+lizawang@users.noreply.github.com> Date: Sat, 16 Mar 2024 21:35:11 +0100 Subject: [PATCH 36/64] task2,3 --- labs/03/mnist_ensemble.py | 15 ++++++++++----- labs/03/mnist_regularization.py | 13 ++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/labs/03/mnist_ensemble.py b/labs/03/mnist_ensemble.py index ebffcf9..dc8f60d 100644 --- a/labs/03/mnist_ensemble.py +++ b/labs/03/mnist_ensemble.py @@ -7,6 +7,7 @@ import torch from mnist import MNIST +import numpy as np parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. @@ -54,9 +55,11 @@ def main(args: argparse.Namespace) -> tuple[list[float], list[float]]: print("Done") individual_accuracies, ensemble_accuracies = [], [] + model_predictions = [] for model in range(args.models): # TODO: Compute the accuracy on the dev set for the individual `models[model]`. - individual_accuracy = ... + predictions = models[model].predict(mnist.dev.data["images"]) + individual_accuracy = np.sum(np.argmax(predictions, axis=-1) == mnist.dev.data["labels"])/mnist.dev.size # TODO: Compute the accuracy on the dev set for the ensemble `models[0:model+1]`. # @@ -69,10 +72,12 @@ def main(args: argparse.Namespace) -> tuple[list[float], list[float]]: # need to construct Keras ensemble model at all, and instead call `model.predict` # on the individual models and average the results. To measure accuracy, # either do it completely manually or use `keras.metrics.SparseCategoricalAccuracy`. - ensemble_accuracy = ... - - # Store the accuracies - individual_accuracies.append(individual_accuracy) + # Store the predictions + model_predictions.append(predictions) + pred_avg = np.mean(model_predictions, axis=0) + ensemble_accuracy = np.sum(np.argmax(pred_avg, axis=-1) == mnist.dev.data["labels"])/mnist.dev.size + # Store the accuracies + individual_accuracies.append(individual_accuracy) ensemble_accuracies.append(ensemble_accuracy) return individual_accuracies, ensemble_accuracies diff --git a/labs/03/mnist_regularization.py b/labs/03/mnist_regularization.py index cd78fcf..769d7a4 100644 --- a/labs/03/mnist_regularization.py +++ b/labs/03/mnist_regularization.py @@ -74,8 +74,10 @@ def main(args: argparse.Namespace) -> dict[str, float]: model = keras.Sequential() model.add(keras.layers.Rescaling(1 / 255)) model.add(keras.layers.Flatten()) + model.add(keras.layers.Dropout(args.dropout)) for hidden_layer in args.hidden_layers: model.add(keras.layers.Dense(hidden_layer, activation="relu")) + model.add(keras.layers.Dropout(args.dropout)) model.add(keras.layers.Dense(MNIST.LABELS, activation="softmax")) # TODO: Implement label smoothing with the given `args.label_smoothing` strength. @@ -90,20 +92,21 @@ def main(args: argparse.Namespace) -> dict[str, float]: # rate and a weight decay of strength `args.weight_decay`. Then call the # `exclude_from_weight_decay` method to specify that all variables with "bias" # in their name should not be decayed. - optimizer = ... + optimizer = keras.optimizers.AdamW(weight_decay=args.weight_decay) + optimizer.exclude_from_weight_decay=[v for v in model.variables if "bias" in v.name] model.compile( optimizer=optimizer, - loss=keras.losses.SparseCategoricalCrossentropy(), - metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")], + loss=keras.losses.CategoricalCrossentropy(label_smoothing=args.label_smoothing), + metrics=[keras.metrics.CategoricalAccuracy(name="accuracy")], ) tb_callback = TorchTensorBoardCallback(args.logdir) logs = model.fit( - mnist.train.data["images"], mnist.train.data["labels"], + mnist.train.data["images"], keras.utils.to_categorical(mnist.train.data["labels"], num_classes=mnist.LABELS), batch_size=args.batch_size, epochs=args.epochs, - validation_data=(mnist.dev.data["images"], mnist.dev.data["labels"]), + validation_data=(mnist.dev.data["images"], keras.utils.to_categorical(mnist.dev.data["labels"], mnist.LABELS)), callbacks=[tb_callback], ) From 2f0852a2edc1de73b92a63b6958c8d723a432ca0 Mon Sep 17 00:00:00 2001 From: lizawang <56673986+lizawang@users.noreply.github.com> Date: Mon, 11 Mar 2024 11:34:43 +0100 Subject: [PATCH 37/64] my solution so far --- labs/02/gym_cartpole.py | 2 +- labs/02/mnist_training.py | 30 +++++++++++++- labs/02/sgd_backpropagation.py | 71 ++++++++++++++++++++++++++-------- 3 files changed, 85 insertions(+), 18 deletions(-) diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py index 5d191fa..5b2eafc 100644 --- a/labs/02/gym_cartpole.py +++ b/labs/02/gym_cartpole.py @@ -137,7 +137,7 @@ def main(args: argparse.Namespace) -> keras.Model | None: ) tb_callback = TorchTensorBoardCallback(args.logdir) - model.fit(observations, labels, batch_size=args.batch_size, epochs=args.epochs, callbacks=[tb_callback]) + model.fit(observations, labels, batch_size=args.batch_size, validation_split=0.1, epochs=args.epochs, callbacks=[tb_callback]) # Save the model, without the optimizer state. model.save(args.model) diff --git a/labs/02/mnist_training.py b/labs/02/mnist_training.py index 6655133..b9105c6 100644 --- a/labs/02/mnist_training.py +++ b/labs/02/mnist_training.py @@ -107,8 +107,32 @@ def main(args: argparse.Namespace) -> dict[str, float]: # in `model.optimizer._learning_rate` if needed), so after training, the learning rate # should be `args.learning_rate_final`. + optimizer = None + lr, momen, decay, final_lr = args.learning_rate, args.momentum, args.decay, args.learning_rate_final + if decay: + if not final_lr: + print("Please define a final learning rate!") + else: + steps = mnist.train.size/args.batch_size + init_lr = args.learning_rate + if decay == "linear": + lr = keras.optimizers.schedules.PolynomialDecay(initial_learning_rate=init_lr, decay_steps=steps, end_learning_rate=final_lr) + elif decay == "exponential": + lr = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=init_lr, decay_steps=steps, decay_rate=0.90) + elif decay == "cosine": + lr = keras.optimizers.schedules.CosineDecay(initial_learning_rate=init_lr, decay_steps=steps, alpha=0.1) + + if args.optimizer == 'SGD': + if momen: + optimizer = keras.optimizers.SGD(learning_rate=lr, momentum=momen, nesterov=True) + else: + optimizer = keras.optimizers.SGD(learning_rate=lr) + elif args.optimizer =="Adam": + optimizer = keras.optimizers.Adam(learning_rate=lr) + + model.compile( - optimizer=..., + optimizer=optimizer, loss=keras.losses.SparseCategoricalCrossentropy(), metrics=[keras.metrics.SparseCategoricalAccuracy("accuracy")], ) @@ -121,6 +145,10 @@ def main(args: argparse.Namespace) -> dict[str, float]: validation_data=(mnist.dev.data["images"], mnist.dev.data["labels"]), callbacks=[tb_callback], ) + model.summary() + + if decay: + print("Next learning rate to be used:", model.optimizer.learning_rate) # Return development metrics for ReCodEx to validate. return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")} diff --git a/labs/02/sgd_backpropagation.py b/labs/02/sgd_backpropagation.py index ad65784..e3cfacf 100644 --- a/labs/02/sgd_backpropagation.py +++ b/labs/02/sgd_backpropagation.py @@ -3,7 +3,10 @@ import datetime import os import re -os.environ.setdefault("KERAS_BACKEND", "torch") # Use PyTorch backend unless specified otherwise + +os.environ.setdefault( + "KERAS_BACKEND", "torch" +) # Use PyTorch backend unless specified otherwise import keras import numpy as np @@ -21,11 +24,17 @@ # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--batch_size", default=50, type=int, help="Batch size.") parser.add_argument("--epochs", default=10, type=int, help="Number of epochs.") -parser.add_argument("--hidden_layer", default=100, type=int, help="Size of the hidden layer.") +parser.add_argument( + "--hidden_layer", default=100, type=int, help="Size of the hidden layer." +) parser.add_argument("--learning_rate", default=0.1, type=float, help="Learning rate.") -parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.") +parser.add_argument( + "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx." +) parser.add_argument("--seed", default=42, type=int, help="Random seed.") -parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") +parser.add_argument( + "--threads", default=1, type=int, help="Maximum number of threads to use." +) # If you add more arguments, ReCodEx will keep them with your default values. @@ -35,7 +44,11 @@ def __init__(self, args: argparse.Namespace) -> None: self._args = args self._W1 = keras.Variable( - keras.random.normal([MNIST.W * MNIST.H * MNIST.C, args.hidden_layer], stddev=0.1, seed=args.seed), + keras.random.normal( + [MNIST.W * MNIST.H * MNIST.C, args.hidden_layer], + stddev=0.1, + seed=args.seed, + ), trainable=True, ) self._b1 = keras.Variable(keras.ops.zeros([args.hidden_layer]), trainable=True) @@ -44,7 +57,12 @@ def __init__(self, args: argparse.Namespace) -> None: # - _W2, which is a trainable variable of size `[args.hidden_layer, MNIST.LABELS]`, # initialized to `keras.random.normal` value `with stddev=0.1` and `seed=args.seed`, # - _b2, which is a trainable variable of size `[MNIST.LABELS]` initialized to zeros - self._W2 = keras.Variable(keras.random.normal([args.hidden_layer, MNIST.LABELS], stddev=0.1, seed=args.seed), trainable=True) + self._W2 = keras.Variable( + keras.random.normal( + [args.hidden_layer, MNIST.LABELS], stddev=0.1, seed=args.seed + ), + trainable=True, + ) self._b2 = keras.Variable(keras.ops.zeros([MNIST.LABELS]), trainable=True) @@ -67,7 +85,9 @@ def predict(self, inputs: torch.Tensor) -> torch.Tensor: # - then multiply it by `self._W1` and then add `self._b1` # - apply `keras.ops.tanh` - hidden_layer_output = keras.ops.tanh(keras.ops.matmul(reshaped_inputs, self._W1) + self._b1) + hidden_layer_output = keras.ops.tanh( + keras.ops.matmul(reshaped_inputs, self._W1) + self._b1 + ) # - multiply the result by `self._W2` and then add `self._b2` @@ -93,11 +113,16 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None: # - either use `keras.ops.one_hot` to obtain one-hot encoded gold labels, # - or suitably use `keras.ops.take_along_axis` to "index" the predicted probabilities. # - Finally, compute the average across the batch examples. - loss = keras.ops.mean(keras.ops.categorical_crossentropy(keras.ops.one_hot(batch["labels"], MNIST.LABELS), probabilities)) + loss = keras.ops.mean( + keras.ops.categorical_crossentropy( + keras.ops.one_hot(batch["labels"], MNIST.LABELS), probabilities + ) + ) # We create a list of all variables. Note that a `keras.Model/Layer` automatically # tracks owned variables, so we could also use `self.trainable_variables` # (or even `self.variables`, which is useful for loading/saving). variables = [self._W1, self._b1, self._W2, self._b2] + # print("w1, b1, w2, b2:", self._W1.shape, self._b1.shape, self._W2.shape, self._b2.shape) # Compute the gradient of the loss with respect to variables using # backpropagation algorithm by @@ -107,6 +132,7 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None: loss.backward() gradients = [variable.value.grad for variable in variables] + # print("gradients:", gradients) with torch.no_grad(): for variable, gradient in zip(variables, gradients): # Perform the SGD update with learning rate `self._args.learning_rate` @@ -126,7 +152,6 @@ def evaluate(self, dataset: MNIST.Dataset) -> float: # Evaluate how many batch examples were predicted # correctly and increase `correct` variable accordingly. correct += np.sum(np.argmax(probabilities, axis=-1) == batch["labels"]) - return correct / dataset.size @@ -138,11 +163,19 @@ def main(args: argparse.Namespace) -> tuple[float, float]: torch.set_num_interop_threads(args.threads) # Create logdir name - args.logdir = os.path.join("logs", "{}-{}-{}".format( - os.path.basename(globals().get("__file__", "notebook")), - datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), - ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items()))) - )) + args.logdir = os.path.join( + "logs", + "{}-{}-{}".format( + os.path.basename(globals().get("__file__", "notebook")), + datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), + ",".join( + ( + "{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) + for k, v in sorted(vars(args).items()) + ) + ), + ), + ) # Load data mnist = MNIST() @@ -159,12 +192,18 @@ def main(args: argparse.Namespace) -> tuple[float, float]: # Evaluate the dev data using `evaluate` on `mnist.dev` dataset accuracy = model.evaluate(mnist.dev) - print("Dev accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * accuracy), flush=True) + print( + "Dev accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * accuracy), + flush=True, + ) writer.add_scalar("dev/accuracy", 100 * accuracy, epoch + 1) # Evaluate the test data using `evaluate` on `mnist.test` dataset test_accuracy = model.evaluate(mnist.test) - print("Test accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * test_accuracy), flush=True) + print( + "Test accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * test_accuracy), + flush=True, + ) writer.add_scalar("test/accuracy", 100 * test_accuracy, epoch + 1) # Return dev and test accuracies for ReCodEx to validate. From 25435047934686618e9f374082ec2d2b84f657a1 Mon Sep 17 00:00:00 2001 From: lizawang <56673986+lizawang@users.noreply.github.com> Date: Tue, 12 Mar 2024 10:59:02 +0100 Subject: [PATCH 38/64] update --- labs/02/gym_cartpole.py | 6 ++++-- labs/02/mnist_training.py | 8 +++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py index 5b2eafc..c55fb14 100644 --- a/labs/02/gym_cartpole.py +++ b/labs/02/gym_cartpole.py @@ -8,6 +8,7 @@ import keras import numpy as np import torch +from collections import Counter # Jonas Glerup Røssum # 31a0a96a-c590-4486-b194-f72765b2ce25 @@ -105,7 +106,7 @@ def main(args: argparse.Namespace) -> keras.Model | None: # Load the data data = np.loadtxt("gym_cartpole_data.txt") observations, labels = data[:, :-1], data[:, -1].astype(np.int32) - print("data shape:", observations.shape, "label shape:", labels.shape) + print("data shape:", observations.shape, "label shape:", Counter(labels)) @@ -137,7 +138,8 @@ def main(args: argparse.Namespace) -> keras.Model | None: ) tb_callback = TorchTensorBoardCallback(args.logdir) - model.fit(observations, labels, batch_size=args.batch_size, validation_split=0.1, epochs=args.epochs, callbacks=[tb_callback]) + callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3) + model.fit(observations, labels, batch_size=args.batch_size, validation_split=0.1, epochs=args.epochs, callbacks=[tb_callback, callback]) # Save the model, without the optimizer state. model.save(args.model) diff --git a/labs/02/mnist_training.py b/labs/02/mnist_training.py index b9105c6..5c7e14a 100644 --- a/labs/02/mnist_training.py +++ b/labs/02/mnist_training.py @@ -118,9 +118,11 @@ def main(args: argparse.Namespace) -> dict[str, float]: if decay == "linear": lr = keras.optimizers.schedules.PolynomialDecay(initial_learning_rate=init_lr, decay_steps=steps, end_learning_rate=final_lr) elif decay == "exponential": - lr = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=init_lr, decay_steps=steps, decay_rate=0.90) + decay_rate = round(final_lr/init_lr,2) + lr = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=init_lr, decay_steps=steps, decay_rate=decay_rate) elif decay == "cosine": - lr = keras.optimizers.schedules.CosineDecay(initial_learning_rate=init_lr, decay_steps=steps, alpha=0.1) + alpha = round(final_lr/init_lr,2) + lr = keras.optimizers.schedules.CosineDecay(initial_learning_rate=init_lr, decay_steps=steps, alpha=alpha) if args.optimizer == 'SGD': if momen: @@ -148,7 +150,7 @@ def main(args: argparse.Namespace) -> dict[str, float]: model.summary() if decay: - print("Next learning rate to be used:", model.optimizer.learning_rate) + print("Next learning rate to be used:", model.optimizer.learning_rate.item()) # Return development metrics for ReCodEx to validate. return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")} From a76ee8095932fa261634413dff590b037524d871 Mon Sep 17 00:00:00 2001 From: lizawang <56673986+lizawang@users.noreply.github.com> Date: Tue, 12 Mar 2024 11:05:20 +0100 Subject: [PATCH 39/64] update --- labs/02/sgd_manual.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/labs/02/sgd_manual.py b/labs/02/sgd_manual.py index 422d3e9..0144a60 100644 --- a/labs/02/sgd_manual.py +++ b/labs/02/sgd_manual.py @@ -39,7 +39,9 @@ def __init__(self, args: argparse.Namespace) -> None: # - _W2, which is a trainable variable of size `[args.hidden_layer, MNIST.LABELS]`, # initialized to `keras.random.normal` value `with stddev=0.1` and `seed=args.seed`, # - _b2, which is a trainable variable of size `[MNIST.LABELS]` initialized to zeros - ... + self._W2 = keras.Variable(keras.random.normal([args.hidden_layer, MNIST.LABELS], stddev=0.1, seed=args.seed), + trainable=True) + self._b2 = keras.Variable(keras.ops.zeros([MNIST.LABELS]), trainable=True) def predict(self, inputs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: # TODO(sgd_backpropagation): Define the computation of the network. Notably: @@ -56,7 +58,14 @@ def predict(self, inputs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, tor # TODO: In order to support manual gradient computation, you should # return not only the output layer, but also the hidden layer after applying # tanh, and the input layer after reshaping. - return ..., ..., ... + input = keras.ops.cast(inputs, dtype="float32") + input = torch.div(input, 255) + input = input.reshape([input.shape[0], -1]) + output = keras.ops.matmul(input,self._W1) + self._b1 + output = keras.ops.tanh(output) + output = keras.ops.matmul(output,self._W2) + self._b2 + output = keras.ops.softmax(output) + return output def train_epoch(self, dataset: MNIST.Dataset) -> None: for batch in dataset.batches(self._args.batch_size): @@ -72,7 +81,7 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None: # # Compute the input layer, hidden layer and output layer # of the batch images using `self.predict`. - + probabilities = self.predict(torch.tensor(batch['images'])) # TODO: Compute the gradient of the loss with respect to all # variables. Note that the loss is computed as in `sgd_backpropagation`: # - For every batch example, the loss is the categorical crossentropy of the @@ -80,7 +89,6 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None: # - either use `keras.ops.one_hot` to obtain one-hot encoded gold labels, # - or suitably use `keras.ops.take_along_axis` to "index" the predicted probabilities. # - Finally, compute the average across the batch examples. - # # During the gradient computation, you will need to compute # a batched version of a so-called outer product # `C[a, i, j] = A[a, i] * B[a, j]`, @@ -88,6 +96,7 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None: # `A[:, :, np.newaxis] * B[:, np.newaxis, :]` # or with # `keras.ops.einsum("ai,aj->aij", A, B)`. + loss = torch.mean(keras.ops.categorical_crossentropy(keras.ops.one_hot(batch['labels'], num_classes=MNIST.LABELS), probabilities)) # TODO(sgd_backpropagation): Perform the SGD update with learning rate `self._args.learning_rate` # for the variable and computed gradient. You can modify the From 8f66c587b28322f9b47e0fd9ffb8f1733902765e Mon Sep 17 00:00:00 2001 From: lizawang <56673986+lizawang@users.noreply.github.com> Date: Tue, 12 Mar 2024 14:58:51 +0100 Subject: [PATCH 40/64] third commit --- labs/02/gym_cartpole.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py index c55fb14..d9a378b 100644 --- a/labs/02/gym_cartpole.py +++ b/labs/02/gym_cartpole.py @@ -138,8 +138,8 @@ def main(args: argparse.Namespace) -> keras.Model | None: ) tb_callback = TorchTensorBoardCallback(args.logdir) - callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3) - model.fit(observations, labels, batch_size=args.batch_size, validation_split=0.1, epochs=args.epochs, callbacks=[tb_callback, callback]) + labels = keras.ops.one_hot(labels,num_classes=2) + model.fit(observations, labels, batch_size=args.batch_size, epochs=args.epochs, callbacks=[tb_callback, callback]) # Save the model, without the optimizer state. model.save(args.model) From 986032a279fcb7e020d92841cea0617f531aff4a Mon Sep 17 00:00:00 2001 From: lizawang <56673986+lizawang@users.noreply.github.com> Date: Tue, 12 Mar 2024 16:02:04 +0100 Subject: [PATCH 41/64] final --- labs/02/gym_cartpole.py | 3 +-- labs/02/mnist_training.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/labs/02/gym_cartpole.py b/labs/02/gym_cartpole.py index d9a378b..b708b63 100644 --- a/labs/02/gym_cartpole.py +++ b/labs/02/gym_cartpole.py @@ -106,7 +106,6 @@ def main(args: argparse.Namespace) -> keras.Model | None: # Load the data data = np.loadtxt("gym_cartpole_data.txt") observations, labels = data[:, :-1], data[:, -1].astype(np.int32) - print("data shape:", observations.shape, "label shape:", Counter(labels)) @@ -139,7 +138,7 @@ def main(args: argparse.Namespace) -> keras.Model | None: tb_callback = TorchTensorBoardCallback(args.logdir) labels = keras.ops.one_hot(labels,num_classes=2) - model.fit(observations, labels, batch_size=args.batch_size, epochs=args.epochs, callbacks=[tb_callback, callback]) + model.fit(observations, labels, batch_size=args.batch_size, epochs=args.epochs, callbacks=[tb_callback]) # Save the model, without the optimizer state. model.save(args.model) diff --git a/labs/02/mnist_training.py b/labs/02/mnist_training.py index 5c7e14a..4649bae 100644 --- a/labs/02/mnist_training.py +++ b/labs/02/mnist_training.py @@ -118,10 +118,10 @@ def main(args: argparse.Namespace) -> dict[str, float]: if decay == "linear": lr = keras.optimizers.schedules.PolynomialDecay(initial_learning_rate=init_lr, decay_steps=steps, end_learning_rate=final_lr) elif decay == "exponential": - decay_rate = round(final_lr/init_lr,2) + decay_rate = final_lr/init_lr lr = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=init_lr, decay_steps=steps, decay_rate=decay_rate) elif decay == "cosine": - alpha = round(final_lr/init_lr,2) + alpha = final_lr/init_lr lr = keras.optimizers.schedules.CosineDecay(initial_learning_rate=init_lr, decay_steps=steps, alpha=alpha) if args.optimizer == 'SGD': From 82267643aada5de3da86377e64095163f0857d73 Mon Sep 17 00:00:00 2001 From: lizawang <56673986+lizawang@users.noreply.github.com> Date: Tue, 12 Mar 2024 21:28:48 +0100 Subject: [PATCH 42/64] final --- labs/02/mnist_training.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/labs/02/mnist_training.py b/labs/02/mnist_training.py index 4649bae..116ae98 100644 --- a/labs/02/mnist_training.py +++ b/labs/02/mnist_training.py @@ -11,6 +11,11 @@ from mnist import MNIST +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 + parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--batch_size", default=50, type=int, help="Batch size.") @@ -108,12 +113,12 @@ def main(args: argparse.Namespace) -> dict[str, float]: # should be `args.learning_rate_final`. optimizer = None - lr, momen, decay, final_lr = args.learning_rate, args.momentum, args.decay, args.learning_rate_final + lr, momen, decay, final_lr, epochs = args.learning_rate, args.momentum, args.decay, args.learning_rate_final, args.epochs if decay: if not final_lr: print("Please define a final learning rate!") else: - steps = mnist.train.size/args.batch_size + steps = mnist.train.size/args.batch_size*epochs init_lr = args.learning_rate if decay == "linear": lr = keras.optimizers.schedules.PolynomialDecay(initial_learning_rate=init_lr, decay_steps=steps, end_learning_rate=final_lr) From 7af654e5cff22559f9f46b333b8ac1176907882d Mon Sep 17 00:00:00 2001 From: lizawang <56673986+lizawang@users.noreply.github.com> Date: Sat, 16 Mar 2024 16:23:39 +0100 Subject: [PATCH 43/64] fixed --- labs/02/sgd_manual.py | 48 +++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/labs/02/sgd_manual.py b/labs/02/sgd_manual.py index 0144a60..f023328 100644 --- a/labs/02/sgd_manual.py +++ b/labs/02/sgd_manual.py @@ -12,6 +12,11 @@ from mnist import MNIST +# Jonas Glerup Røssum +# 31a0a96a-c590-4486-b194-f72765b2ce25 +# Xiao Wang +# 91d4d1d7-b800-4765-96b9-df098ac36a66 + parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--batch_size", default=50, type=int, help="Batch size.") @@ -61,11 +66,11 @@ def predict(self, inputs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, tor input = keras.ops.cast(inputs, dtype="float32") input = torch.div(input, 255) input = input.reshape([input.shape[0], -1]) - output = keras.ops.matmul(input,self._W1) + self._b1 - output = keras.ops.tanh(output) - output = keras.ops.matmul(output,self._W2) + self._b2 - output = keras.ops.softmax(output) - return output + hidden_input = keras.ops.matmul(input,self._W1) + self._b1 + hidden_output = keras.ops.tanh(hidden_input) + sm_input = keras.ops.matmul(hidden_output,self._W2) + self._b2 + output = keras.ops.softmax(sm_input) + return input, hidden_output, output def train_epoch(self, dataset: MNIST.Dataset) -> None: for batch in dataset.batches(self._args.batch_size): @@ -81,7 +86,7 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None: # # Compute the input layer, hidden layer and output layer # of the batch images using `self.predict`. - probabilities = self.predict(torch.tensor(batch['images'])) + input_layer, hidden_layer, probabilities = self.predict(torch.tensor(batch['images'])) # TODO: Compute the gradient of the loss with respect to all # variables. Note that the loss is computed as in `sgd_backpropagation`: # - For every batch example, the loss is the categorical crossentropy of the @@ -96,13 +101,30 @@ def train_epoch(self, dataset: MNIST.Dataset) -> None: # `A[:, :, np.newaxis] * B[:, np.newaxis, :]` # or with # `keras.ops.einsum("ai,aj->aij", A, B)`. - loss = torch.mean(keras.ops.categorical_crossentropy(keras.ops.one_hot(batch['labels'], num_classes=MNIST.LABELS), probabilities)) + gold_labels = keras.ops.one_hot(batch['labels'], num_classes=MNIST.LABELS) + loss = torch.mean(keras.ops.categorical_crossentropy(gold_labels, probabilities)) + + gd_loss = probabilities - gold_labels + gd_b2 = gd_loss + #print("loss gradient, hidden_layer, input", gd_b2.shape, hidden_layer.shape, input_layer.shape) + gd_w2 = keras.ops.einsum("ai,aj->aij", hidden_layer, gd_loss) + gd_h = keras.ops.matmul(gd_loss, keras.ops.transpose(self._W2)) + hidden_input = keras.ops.matmul(input_layer,self._W1) + self._b1 + gd_h_i = gd_h*(1-keras.ops.power(keras.ops.tanh(hidden_input), 2)) + gd_b1 = gd_h_i + gd_w1 = keras.ops.einsum("ai,aj->aij", input_layer, gd_h_i) + #print("gd_w2, gd_w1, gd_b2, gd_b1:", gd_w2.shape, gd_w1.shape, gd_b2.shape, gd_b1.shape) # TODO(sgd_backpropagation): Perform the SGD update with learning rate `self._args.learning_rate` # for the variable and computed gradient. You can modify the # variable value with `variable.assign` or in this case the more # efficient `variable.assign_sub`. - ... + variables = [self._W1, self._b1, self._W2, self._b2] + gradients = [gd_w1, gd_b1, gd_w2, gd_b2] + with torch.no_grad(): + for variable, gradient in zip(variables, gradients): + variable.assign_sub(self._args.learning_rate*keras.ops.mean(gradient, axis=0)) + def evaluate(self, dataset: MNIST.Dataset) -> float: # Compute the accuracy of the model prediction @@ -110,11 +132,11 @@ def evaluate(self, dataset: MNIST.Dataset) -> float: for batch in dataset.batches(self._args.batch_size): # TODO: Compute the probabilities of the batch images using `self.predict` # and convert them to Numpy with `keras.ops.convert_to_numpy`. - probabilities = ... + probabilities = keras.ops.convert_to_numpy(self.predict(torch.tensor(batch['images']))[2]) # TODO(sgd_backpropagation): Evaluate how many batch examples were predicted # correctly and increase `correct` variable accordingly. - correct += ... + correct += np.sum(np.argmax(probabilities, axis=-1) == batch["labels"]) return correct / dataset.size @@ -144,14 +166,14 @@ def main(args: argparse.Namespace) -> tuple[float, float]: for epoch in range(args.epochs): # TODO: Run the `train_epoch` with `mnist.train` dataset - + model.train_epoch(mnist.train) # TODO: Evaluate the dev data using `evaluate` on `mnist.dev` dataset - accuracy = ... + accuracy = model.evaluate(mnist.dev) print("Dev accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * accuracy), flush=True) writer.add_scalar("dev/accuracy", 100 * accuracy, epoch + 1) # TODO: Evaluate the test data using `evaluate` on `mnist.test` dataset - test_accuracy = ... + test_accuracy = model.evaluate(mnist.test) print("Test accuracy after epoch {} is {:.2f}".format(epoch + 1, 100 * test_accuracy), flush=True) writer.add_scalar("test/accuracy", 100 * test_accuracy, epoch + 1) From aaa30ef49d28208b0f14b3f8a760c21467a47e6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 18 Mar 2024 13:06:43 +0100 Subject: [PATCH 44/64] Remove unnecessary entries from .gitignore --- .gitignore | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 0fb63b8..f8e2b15 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -**/.venv/Lib -**/.venv/Scripts -**/.venv/share +.venv/ +logs/ +mnist.npz From ed5c3be212e8244ebc04aa9a4d0b6dc33c388bc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Sun, 3 Mar 2024 15:28:32 +0100 Subject: [PATCH 45/64] Solve numpy_entropy --- labs/01/numpy_entropy.py | 1 + 1 file changed, 1 insertion(+) diff --git a/labs/01/numpy_entropy.py b/labs/01/numpy_entropy.py index 4203a24..819b6b0 100644 --- a/labs/01/numpy_entropy.py +++ b/labs/01/numpy_entropy.py @@ -21,6 +21,7 @@ def main(args: argparse.Namespace) -> tuple[float, float, float]: # Load data distribution, each line containing a datapoint -- a string. data_map = {} + # Load data distribution, each line containing a datapoint -- a string. with open(args.data_path, "r", encoding="utf-8") as data: for line in data: line = line.rstrip("\n") From 6659f8e26e01e8b5bf15ef4c7ad03ebc9ac7c07c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 12:48:47 +0100 Subject: [PATCH 46/64] Fix reshape and compute covariance matrix in pca_first.keras.py and pca_first.py --- labs/01/pca_first.py | 2 +- labs/01/test.ps1 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py index deecf06..688e89c 100644 --- a/labs/01/pca_first.py +++ b/labs/01/pca_first.py @@ -58,7 +58,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]: # TODO: Compute the total variance, which is the sum of the diagonal # of the covariance matrix. To extract the diagonal use `torch.diagonal`, # and to sum a tensor use `torch.sum`. - total_variance = torch.sum(torch.diagonal(cov)).item() + total_variance = torch.diagonal(cov).sum() # TODO: Now run `args.iterations` of the power iteration algorithm. # Start with a vector of `cov.shape[0]` ones of type `torch.float32` using `torch.ones`. diff --git a/labs/01/test.ps1 b/labs/01/test.ps1 index 1a8e7cd..75ddf37 100644 --- a/labs/01/test.ps1 +++ b/labs/01/test.ps1 @@ -1,4 +1,4 @@ python3 numpy_entropy.py --data_path numpy_entropy_data_1.txt --model_path numpy_entropy_model_1.txt python3 numpy_entropy.py --data_path numpy_entropy_data_2.txt --model_path numpy_entropy_model_2.txt python3 numpy_entropy.py --data_path numpy_entropy_data_3.txt --model_path numpy_entropy_model_3.txt -python3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt +spython3 numpy_entropy.py --data_path numpy_entropy_data_4.txt --model_path numpy_entropy_model_4.txt From ded51d8ab940cff8e35c785d1ce73b7b63b49080 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 12:48:55 +0100 Subject: [PATCH 47/64] Add .gitignore, pull.ps1, and setup.ps1 files --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index f8e2b15..309f474 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -.venv/ +.venv logs/ mnist.npz From ae7c7a22f0c1991827d8c3b46306b2adca782183 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 4 Mar 2024 19:15:48 +0100 Subject: [PATCH 48/64] Add Lisa's solution --- labs/01/pca_first.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/labs/01/pca_first.py b/labs/01/pca_first.py index 688e89c..deecf06 100644 --- a/labs/01/pca_first.py +++ b/labs/01/pca_first.py @@ -58,7 +58,7 @@ def main(args: argparse.Namespace) -> tuple[float, float]: # TODO: Compute the total variance, which is the sum of the diagonal # of the covariance matrix. To extract the diagonal use `torch.diagonal`, # and to sum a tensor use `torch.sum`. - total_variance = torch.diagonal(cov).sum() + total_variance = torch.sum(torch.diagonal(cov)).item() # TODO: Now run `args.iterations` of the power iteration algorithm. # Start with a vector of `cov.shape[0]` ones of type `torch.float32` using `torch.ones`. From 28b0519d0209562c7c4a856ec0c4de568e737830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 18 Mar 2024 20:08:12 +0100 Subject: [PATCH 49/64] Update .gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 309f474..917c1db 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ -.venv +**/.venv/ logs/ mnist.npz +*.zip From 329b15497e5e465f1e1f28d339ebe1008a6be3ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 18 Mar 2024 19:52:00 +0100 Subject: [PATCH 50/64] Solve mnist_regularization --- labs/03/mnist_regularization.ps1 | 24 +++++++ labs/03/mnist_regularization.py | 120 +++++++++++++++++++++++-------- 2 files changed, 114 insertions(+), 30 deletions(-) create mode 100644 labs/03/mnist_regularization.ps1 diff --git a/labs/03/mnist_regularization.ps1 b/labs/03/mnist_regularization.ps1 new file mode 100644 index 0000000..2a61e88 --- /dev/null +++ b/labs/03/mnist_regularization.ps1 @@ -0,0 +1,24 @@ +# Run script from root repo directory + +.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --dropout=0.3 +.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --dropout=0.5 --hidden_layers 300 300 +.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --weight_decay=0.1 +.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --weight_decay=0.3 +.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --label_smoothing=0.1 +.\.venv\Scripts\python labs\03\mnist_regularization.py --epochs=1 --label_smoothing=0.3 + +# Expected +# accuracy: 0.5981 - loss: 1.2688 - val_accuracy: 0.9174 - val_loss: 0.3051 +# accuracy: 0.3429 - loss: 1.9163 - val_accuracy: 0.8826 - val_loss: 0.4937 +# accuracy: 0.7014 - loss: 1.0412 - val_accuracy: 0.9236 - val_loss: 0.2776 +# accuracy: 0.7006 - loss: 1.0429 - val_accuracy: 0.9232 - val_loss: 0.2801 +# accuracy: 0.7102 - loss: 1.3015 - val_accuracy: 0.9276 - val_loss: 0.7656 +# accuracy: 0.7113 - loss: 1.6854 - val_accuracy: 0.9332 - val_loss: 1.3709 + +# Actual +# accuracy: 0.6178 - loss: 1.2374 - val_accuracy: 0.9164 - val_loss: 0.3045 +# accuracy: 0.3412 - loss: 1.8919 - val_accuracy: 0.8818 - val_loss: 0.4794 +# accuracy: 0.6948 - loss: 1.0394 - val_accuracy: 0.9186 - val_loss: 0.2859 +# accuracy: 0.6947 - loss: 1.0410 - val_accuracy: 0.9184 - val_loss: 0.2885 +# accuracy: 0.6996 - loss: 1.3013 - val_accuracy: 0.9228 - val_loss: 0.7735 +# accuracy: 0.7102 - loss: 1.6879 - val_accuracy: 0.9284 - val_loss: 1.3739 diff --git a/labs/03/mnist_regularization.py b/labs/03/mnist_regularization.py index 769d7a4..0b2e5a2 100644 --- a/labs/03/mnist_regularization.py +++ b/labs/03/mnist_regularization.py @@ -3,7 +3,10 @@ import datetime import os import re -os.environ.setdefault("KERAS_BACKEND", "torch") # Use PyTorch backend unless specified otherwise + +os.environ.setdefault( + "KERAS_BACKEND", "torch" +) # Use PyTorch backend unless specified otherwise import keras import torch @@ -15,12 +18,20 @@ parser.add_argument("--batch_size", default=50, type=int, help="Batch size.") parser.add_argument("--dropout", default=0, type=float, help="Dropout regularization.") parser.add_argument("--epochs", default=30, type=int, help="Number of epochs.") -parser.add_argument("--hidden_layers", default=[400], nargs="*", type=int, help="Hidden layer sizes.") +parser.add_argument( + "--hidden_layers", default=[400], nargs="*", type=int, help="Hidden layer sizes." +) parser.add_argument("--label_smoothing", default=0, type=float, help="Label smoothing.") -parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.") +parser.add_argument( + "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx." +) parser.add_argument("--seed", default=42, type=int, help="Random seed.") -parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") -parser.add_argument("--weight_decay", default=0, type=float, help="Weight decay strength.") +parser.add_argument( + "--threads", default=1, type=int, help="Maximum number of threads to use." +) +parser.add_argument( + "--weight_decay", default=0, type=float, help="Weight decay strength." +) # If you add more arguments, ReCodEx will keep them with your default values. @@ -32,7 +43,10 @@ def __init__(self, path): def writer(self, writer): if writer not in self._writers: import torch.utils.tensorboard - self._writers[writer] = torch.utils.tensorboard.SummaryWriter(os.path.join(self._path, writer)) + + self._writers[writer] = torch.utils.tensorboard.SummaryWriter( + os.path.join(self._path, writer) + ) return self._writers[writer] def add_logs(self, writer, logs, step): @@ -43,10 +57,24 @@ def add_logs(self, writer, logs, step): def on_epoch_end(self, epoch, logs=None): if logs: - if isinstance(getattr(self.model, "optimizer", None), keras.optimizers.Optimizer): - logs = logs | {"learning_rate": keras.ops.convert_to_numpy(self.model.optimizer.learning_rate)} - self.add_logs("train", {k: v for k, v in logs.items() if not k.startswith("val_")}, epoch + 1) - self.add_logs("val", {k[4:]: v for k, v in logs.items() if k.startswith("val_")}, epoch + 1) + if isinstance( + getattr(self.model, "optimizer", None), keras.optimizers.Optimizer + ): + logs = logs | { + "learning_rate": keras.ops.convert_to_numpy( + self.model.optimizer.learning_rate + ) + } + self.add_logs( + "train", + {k: v for k, v in logs.items() if not k.startswith("val_")}, + epoch + 1, + ) + self.add_logs( + "val", + {k[4:]: v for k, v in logs.items() if k.startswith("val_")}, + epoch + 1, + ) def main(args: argparse.Namespace) -> dict[str, float]: @@ -57,16 +85,24 @@ def main(args: argparse.Namespace) -> dict[str, float]: torch.set_num_interop_threads(args.threads) # Create logdir name - args.logdir = os.path.join("logs", "{}-{}-{}".format( - os.path.basename(globals().get("__file__", "notebook")), - datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), - ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items()))) - )) + args.logdir = os.path.join( + "logs", + "{}-{}-{}".format( + os.path.basename(globals().get("__file__", "notebook")), + datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), + ",".join( + ( + "{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) + for k, v in sorted(vars(args).items()) + ) + ), + ), + ) # Load data mnist = MNIST(size={"train": 5_000}) - # TODO: Incorporate dropout to the model below. Namely, add + # Incorporate dropout to the model below. Namely, add # a `keras.layers.Dropout` layer with `args.dropout` rate after # the `Flatten` layer and after each `Dense` hidden layer (but not after # the output `Dense` layer). @@ -75,12 +111,14 @@ def main(args: argparse.Namespace) -> dict[str, float]: model.add(keras.layers.Rescaling(1 / 255)) model.add(keras.layers.Flatten()) model.add(keras.layers.Dropout(args.dropout)) + for hidden_layer in args.hidden_layers: model.add(keras.layers.Dense(hidden_layer, activation="relu")) - model.add(keras.layers.Dropout(args.dropout)) + model.add(keras.layers.Dropout(rate=args.dropout)) + model.add(keras.layers.Dense(MNIST.LABELS, activation="softmax")) - # TODO: Implement label smoothing with the given `args.label_smoothing` strength. + # Implement label smoothing with the given `args.label_smoothing` strength. # You need to change the `SparseCategorical{Crossentropy,Accuracy}` to # `Categorical{Crossentropy,Accuracy}`, because `label_smoothing` is supported # only by the `CategoricalCrossentropy`. That means you also need to modify @@ -88,30 +126,52 @@ def main(args: argparse.Namespace) -> dict[str, float]: # of the gold class to a full categorical distribution (you can use either NumPy, # or there is a helper method also in the `keras.utils` module). - # TODO: Create a `keras.optimizers.AdamW`, using the default learning + # Create a `keras.optimizers.AdamW`, using the default learning # rate and a weight decay of strength `args.weight_decay`. Then call the # `exclude_from_weight_decay` method to specify that all variables with "bias" # in their name should not be decayed. optimizer = keras.optimizers.AdamW(weight_decay=args.weight_decay) - optimizer.exclude_from_weight_decay=[v for v in model.variables if "bias" in v.name] - - model.compile( - optimizer=optimizer, - loss=keras.losses.CategoricalCrossentropy(label_smoothing=args.label_smoothing), - metrics=[keras.metrics.CategoricalAccuracy(name="accuracy")], - ) + optimizer.exclude_from_weight_decay(var_names=["bias"]) + + s = args.label_smoothing != 0 + + if s: + model.compile( + optimizer=optimizer, + loss=keras.losses.CategoricalCrossentropy(label_smoothing=args.label_smoothing), + metrics=[keras.metrics.CategoricalAccuracy(name="accuracy")], + ) + else: + model.compile( + optimizer=optimizer, + loss=keras.losses.SparseCategoricalCrossentropy(), + metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")], + ) tb_callback = TorchTensorBoardCallback(args.logdir) logs = model.fit( - mnist.train.data["images"], keras.utils.to_categorical(mnist.train.data["labels"], num_classes=mnist.LABELS), - batch_size=args.batch_size, epochs=args.epochs, - validation_data=(mnist.dev.data["images"], keras.utils.to_categorical(mnist.dev.data["labels"], mnist.LABELS)), + mnist.train.data["images"], + keras.utils.to_categorical( + mnist.train.data["labels"], num_classes=mnist.LABELS + ) if s else mnist.train.data["labels"], + batch_size=args.batch_size, + epochs=args.epochs, + validation_data=( + mnist.dev.data["images"], + keras.utils.to_categorical( + mnist.dev.data["labels"], num_classes=mnist.LABELS + ) if s else mnist.dev.data["labels"], + ), callbacks=[tb_callback], ) # Return development metrics for ReCodEx to validate. - return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")} + return { + metric: values[-1] + for metric, values in logs.history.items() + if metric.startswith("val_") + } if __name__ == "__main__": From 93fd87bc16507fc48a118f636857e8e478026000 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Thu, 21 Mar 2024 20:00:52 +0100 Subject: [PATCH 51/64] Solve mnist_ensemble --- labs/03/mnist_ensemble.ps1 | 2 ++ labs/03/mnist_ensemble.py | 28 ++++++++++++++++++---------- 2 files changed, 20 insertions(+), 10 deletions(-) create mode 100644 labs/03/mnist_ensemble.ps1 diff --git a/labs/03/mnist_ensemble.ps1 b/labs/03/mnist_ensemble.ps1 new file mode 100644 index 0000000..526a6bd --- /dev/null +++ b/labs/03/mnist_ensemble.ps1 @@ -0,0 +1,2 @@ +python3 mnist_ensemble.py --epochs=1 --models=5 +python3 mnist_ensemble.py --epochs=1 --models=5 --hidden_layers=200 diff --git a/labs/03/mnist_ensemble.py b/labs/03/mnist_ensemble.py index dc8f60d..93bb2eb 100644 --- a/labs/03/mnist_ensemble.py +++ b/labs/03/mnist_ensemble.py @@ -57,11 +57,11 @@ def main(args: argparse.Namespace) -> tuple[list[float], list[float]]: individual_accuracies, ensemble_accuracies = [], [] model_predictions = [] for model in range(args.models): - # TODO: Compute the accuracy on the dev set for the individual `models[model]`. - predictions = models[model].predict(mnist.dev.data["images"]) - individual_accuracy = np.sum(np.argmax(predictions, axis=-1) == mnist.dev.data["labels"])/mnist.dev.size + # Compute the accuracy on the dev set for the individual `models[model]`. + individual_accuracy = models[model].evaluate(mnist.dev.data["images"], mnist.dev.data["labels"])[1] + print(individual_accuracy) - # TODO: Compute the accuracy on the dev set for the ensemble `models[0:model+1]`. + # Compute the accuracy on the dev set for the ensemble `models[0:model+1]`. # # Generally you can choose one of the following approaches: # 1) Use Keras Functional API and construct a `keras.Model` averaging the models @@ -72,12 +72,20 @@ def main(args: argparse.Namespace) -> tuple[list[float], list[float]]: # need to construct Keras ensemble model at all, and instead call `model.predict` # on the individual models and average the results. To measure accuracy, # either do it completely manually or use `keras.metrics.SparseCategoricalAccuracy`. - # Store the predictions - model_predictions.append(predictions) - pred_avg = np.mean(model_predictions, axis=0) - ensemble_accuracy = np.sum(np.argmax(pred_avg, axis=-1) == mnist.dev.data["labels"])/mnist.dev.size - # Store the accuracies - individual_accuracies.append(individual_accuracy) + inputs = keras.Input(shape=(MNIST.W, MNIST.H, MNIST.C)) + ensemble_output = keras.layers.Average()([model(inputs) for model in models[0:model+1]]) + ensemble_model = keras.Model(inputs=inputs, outputs=ensemble_output) + + ensemble_model.compile( + optimizer=keras.optimizers.Adam(), + loss=keras.losses.SparseCategoricalCrossentropy(), + metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")], + ) + + ensemble_accuracy = ensemble_model.evaluate(mnist.dev.data["images"], mnist.dev.data["labels"])[1] + + # Store the accuracies + individual_accuracies.append(individual_accuracy) ensemble_accuracies.append(ensemble_accuracy) return individual_accuracies, ensemble_accuracies From 6ff7487a64b6cabc98ac357a4ebb3d31add0a7ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Thu, 21 Mar 2024 20:04:20 +0100 Subject: [PATCH 52/64] Broken uppercase --- labs/03/uppercase.py | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/labs/03/uppercase.py b/labs/03/uppercase.py index c975e3f..c83d5c5 100644 --- a/labs/03/uppercase.py +++ b/labs/03/uppercase.py @@ -10,16 +10,16 @@ from uppercase_data import UppercaseData -# TODO: Set reasonable values for the hyperparameters, especially for +# Set reasonable values for the hyperparameters, especially for # `alphabet_size`, `batch_size`, `epochs`, and `window`. # Also, you can set the number of threads to 0 to use all your CPU cores. parser = argparse.ArgumentParser() -parser.add_argument("--alphabet_size", default=..., type=int, help="If given, use this many most frequent chars.") -parser.add_argument("--batch_size", default=..., type=int, help="Batch size.") -parser.add_argument("--epochs", default=..., type=int, help="Number of epochs.") +parser.add_argument("--alphabet_size", default=70, type=int, help="If given, use this many most frequent chars.") +parser.add_argument("--batch_size", default=1024, type=int, help="Batch size.") +parser.add_argument("--epochs", default=2, type=int, help="Number of epochs.") parser.add_argument("--seed", default=42, type=int, help="Random seed.") -parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") -parser.add_argument("--window", default=..., type=int, help="Window size to use.") +parser.add_argument("--threads", default=0, type=int, help="Maximum number of threads to use.") +parser.add_argument("--window", default=4, type=int, help="Window size to use.") class TorchTensorBoardCallback(keras.callbacks.Callback): @@ -64,7 +64,7 @@ def main(args: argparse.Namespace) -> None: # Load data uppercase_data = UppercaseData(args.window, args.alphabet_size) - # TODO: Implement a suitable model, optionally including regularization, select + # Implement a suitable model, optionally including regularization, select # good hyperparameters and train the model. # # The inputs are _windows_ of fixed size (`args.window` characters on the left, @@ -79,16 +79,34 @@ def main(args: argparse.Namespace) -> None: # You can then flatten the one-hot encoded windows and follow with a dense layer. # - Alternatively, you can use `keras.layers.Embedding` (which is an efficient # implementation of one-hot encoding followed by a Dense layer) and flatten afterwards. - model = ... + model = keras.Sequential([ + keras.layers.InputLayer(shape=[2 * args.window + 1], dtype="int32"), + keras.layers.CategoryEncoding(len(uppercase_data.train.alphabet)), + keras.layers.Embedding(len(uppercase_data.train.alphabet), 8), + + keras.layers.Flatten(), + keras.layers.Dense(64, activation='relu'), + keras.layers.Dropout(rate=0.5), + keras.layers.Dense(1, activation='sigmoid') # Sigmoid activation function for binary classification + ]) + + # Generate correctly capitalized test set. + + predictions = model.predict(uppercase_data.test.data, batch_size=args.batch_size) - # TODO: Generate correctly capitalized test set. # Use `uppercase_data.test.text` as input, capitalize suitable characters, # and write the result to predictions_file (which is # `uppercase_test.txt` in the `args.logdir` directory). os.makedirs(args.logdir, exist_ok=True) with open(os.path.join(args.logdir, "uppercase_test.txt"), "w", encoding="utf-8") as predictions_file: - ... - + new_text = "" + for pred, word in zip(predictions, uppercase_data.test.text): + if pred > .5: + new_word = word.upper() + new_text += new_word + else: + new_text + predictions_file.write(new_text) if __name__ == "__main__": args = parser.parse_args([] if "__file__" not in globals() else None) From 8b211166a33219cc1e7286c09efff197383f5092 Mon Sep 17 00:00:00 2001 From: Aryan <82381989+akumm2k@users.noreply.github.com> Date: Sat, 23 Mar 2024 20:27:55 +0100 Subject: [PATCH 53/64] Add missing torch suubmodule import to cifar10.py --- labs/04/cifar10.py | 1 + 1 file changed, 1 insertion(+) diff --git a/labs/04/cifar10.py b/labs/04/cifar10.py index ec06755..6271172 100644 --- a/labs/04/cifar10.py +++ b/labs/04/cifar10.py @@ -5,6 +5,7 @@ import numpy as np import torch +import torch.utils.data class CIFAR10: From 5d2751db8c2880ebb2064199b366c696d6059efe Mon Sep 17 00:00:00 2001 From: Milan Straka Date: Sat, 23 Mar 2024 23:52:18 +0100 Subject: [PATCH 54/64] Remove unnecessary annotation. --- labs/04/cifar10.py | 1 - 1 file changed, 1 deletion(-) diff --git a/labs/04/cifar10.py b/labs/04/cifar10.py index 6271172..ec06755 100644 --- a/labs/04/cifar10.py +++ b/labs/04/cifar10.py @@ -5,7 +5,6 @@ import numpy as np import torch -import torch.utils.data class CIFAR10: From 9679da6c38727e501b9ea16878a864f0af34203e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Thu, 21 Mar 2024 21:04:31 +0100 Subject: [PATCH 55/64] Solve mnist_cnn.py --- .vscode/settings.json | 3 + labs/04/mnist_cnn.py | 150 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 127 insertions(+), 26 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..dc3f727 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.analysis.typeCheckingMode": "basic" +} diff --git a/labs/04/mnist_cnn.py b/labs/04/mnist_cnn.py index a3a91cd..1037888 100644 --- a/labs/04/mnist_cnn.py +++ b/labs/04/mnist_cnn.py @@ -1,7 +1,10 @@ #!/usr/bin/env python3 import argparse import os -os.environ.setdefault("KERAS_BACKEND", "torch") # Use PyTorch backend unless specified otherwise + +os.environ.setdefault( + "KERAS_BACKEND", "torch" +) # Use PyTorch backend unless specified otherwise import keras import torch @@ -11,42 +14,103 @@ parser = argparse.ArgumentParser() # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--batch_size", default=50, type=int, help="Batch size.") -parser.add_argument("--cnn", default=None, type=str, help="CNN architecture.") +parser.add_argument( + "--cnn", + default="CB-16-5-2-same,M-3-2,F,H-100,D-0.5", + type=str, + help="CNN architecture.", +) parser.add_argument("--epochs", default=10, type=int, help="Number of epochs.") -parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.") +parser.add_argument( + "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx." +) parser.add_argument("--seed", default=42, type=int, help="Random seed.") -parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") +parser.add_argument( + "--threads", default=1, type=int, help="Maximum number of threads to use." +) # If you add more arguments, ReCodEx will keep them with your default values. class Model(keras.Model): def __init__(self, args: argparse.Namespace) -> None: - # TODO: Create the model. The template uses the functional API, but + # Create the model. The template uses the functional API, but # feel free to use subclassing if you want. inputs = keras.Input(shape=[MNIST.H, MNIST.W, MNIST.C]) hidden = keras.layers.Rescaling(1 / 255)(inputs) - # TODO: Add CNN layers specified by `args.cnn`, which contains - # a comma-separated list of the following layers: - # - `C-filters-kernel_size-stride-padding`: Add a convolutional layer with ReLU - # activation and specified number of filters, kernel size, stride and padding. - # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization. - # In detail, start with a convolutional layer **without bias** and activation, - # then add a batch normalization layer, and finally the ReLU activation. - # - `M-pool_size-stride`: Add max pooling with specified size and stride, using - # the default "valid" padding. - # - `R-[layers]`: Add a residual connection. The `layers` contain a specification - # of at least one convolutional layer (but not a recursive residual connection `R`). - # The input to the `R` layer should be processed sequentially by `layers`, and the - # produced output (after the ReLU nonlinearity of the last layer) should be added - # to the input (of this `R` layer). - # - `F`: Flatten inputs. Must appear exactly once in the architecture. - # - `H-hidden_layer_size`: Add a dense layer with ReLU activation and the specified size. - # - `D-dropout_rate`: Apply dropout with the given dropout rate. + cnn_args = args.cnn.split(",") + + for layer in cnn_args: + layer_type, *layer_args = layer.split("-") + + # Add CNN layers specified by `args.cnn`, which contains + # a comma-separated list of the following layers: + + # - `C-filters-kernel_size-stride-padding`: Add a convolutional layer with ReLU + # activation and specified number of filters, kernel size, stride and padding. + if layer_type == "C": + hidden = self.createCnnLayer(hidden, *layer_args) + continue + + # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization. + # In detail, start with a convolutional layer **without bias** and activation, + # then add a batch normalization layer, and finally the ReLU activation. + if layer_type == "CB": + hidden = self.createCnnBatchNormLayer(hidden, *layer_args) + continue + + # - `M-pool_size-stride`: Add max pooling with specified size and stride, using + # the default "valid" padding. + if layer_type == "M": + hidden = self.createMaxPoolingLayer(hidden, *layer_args) + continue + + # - `R-[layers]`: Add a residual connection. The `layers` contain a specification + # of at least one convolutional layer (but not a recursive residual connection `R`). + # The input to the `R` layer should be processed sequentially by `layers`, and the + # produced output (after the ReLU nonlinearity of the last layer) should be added + # to the input (of this `R` layer). + + if layer_type == "R": + layers = layer_args[1][1:-1].split(",") + + for layer in layers: + layer_type, *layer_args = layer.split("-") + + if layer_type == "C": + hidden = self.createCnnLayer(hidden, *layer_args) + continue + + if layer_type == "CB": + hidden = self.createCnnBatchNormLayer(hidden, *layer_args) + continue + + continue + + # - `F`: Flatten inputs. Must appear exactly once in the architecture. + + if layer_type == "F": + hidden = keras.layers.Flatten()(hidden) + continue + + # - `H-hidden_layer_size`: Add a dense layer with ReLU activation and the specified size. + if layer_type == "H": + hidden_layer_size = int(layer_args[0]) + hidden = keras.layers.Dense(hidden_layer_size, activation="relu")( + hidden + ) + continue + + # - `D-dropout_rate`: Apply dropout with the given dropout rate. + + if layer_type == "D": + dropout_rate = float(layer_args[0]) + hidden = keras.layers.Dropout(int(dropout_rate))(hidden) + continue + # You can assume the resulting network is valid; it is fine to crash if it is not. # # Produce the results in the variable `hidden`. - hidden = ... # Add the final output layer outputs = keras.layers.Dense(MNIST.LABELS, activation="softmax")(hidden) @@ -58,6 +122,34 @@ def __init__(self, args: argparse.Namespace) -> None: metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")], ) + def createCnnLayer(_self, hidden, filters: str, kernel_size: str, stride: str, padding: str): + cnn_layer = keras.layers.Conv2D( + filters=int(filters), + kernel_size=(int(kernel_size), int(kernel_size)), + strides=(int(stride), int(stride)), + padding=padding, + ) + + activation_layer = keras.layers.Activation("relu") + return activation_layer(cnn_layer(hidden)) + + def createCnnBatchNormLayer(_self, hidden, filters: str, kernel_size: str, stride: str, padding: str): + cnn_layer = keras.layers.Conv2D( + filters=int(filters), + kernel_size=(int(kernel_size), int(kernel_size)), + strides=(int(stride), int(stride)), + padding=padding, + ) + batch_norm = keras.layers.BatchNormalization() + activation_layer = keras.layers.Activation("relu") + + return activation_layer(batch_norm(cnn_layer(hidden))) + + def createMaxPoolingLayer(_self, hidden: str, pool_size: str, stride: str): + return keras.layers.MaxPooling2D( + pool_size=int(pool_size), + strides=(int(stride), int(stride)), + )(hidden) def main(args: argparse.Namespace) -> dict[str, float]: # Set the random seed and the number of threads. @@ -73,13 +165,19 @@ def main(args: argparse.Namespace) -> dict[str, float]: model = Model(args) logs = model.fit( - mnist.train.data["images"], mnist.train.data["labels"], - batch_size=args.batch_size, epochs=args.epochs, + mnist.train.data["images"], + mnist.train.data["labels"], + batch_size=args.batch_size, + epochs=args.epochs, validation_data=(mnist.dev.data["images"], mnist.dev.data["labels"]), ) # Return development metrics for ReCodEx to validate. - return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")} + return { + metric: values[-1] + for metric, values in logs.history.items() + if metric.startswith("val_") + } if __name__ == "__main__": From 19b1dda5a31936421a64e78ec17ca43cb31cfb8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Sun, 24 Mar 2024 12:42:48 +0100 Subject: [PATCH 56/64] Fix dropout getting rounded --- labs/04/mnist_cnn.py | 84 +++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 40 deletions(-) diff --git a/labs/04/mnist_cnn.py b/labs/04/mnist_cnn.py index 1037888..44710f2 100644 --- a/labs/04/mnist_cnn.py +++ b/labs/04/mnist_cnn.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import argparse import os +import re os.environ.setdefault( "KERAS_BACKEND", "torch" @@ -38,7 +39,7 @@ def __init__(self, args: argparse.Namespace) -> None: inputs = keras.Input(shape=[MNIST.H, MNIST.W, MNIST.C]) hidden = keras.layers.Rescaling(1 / 255)(inputs) - cnn_args = args.cnn.split(",") + cnn_args = re.split(r',(?![^\[]*\])', args.cnn) for layer in cnn_args: layer_type, *layer_args = layer.split("-") @@ -49,20 +50,41 @@ def __init__(self, args: argparse.Namespace) -> None: # - `C-filters-kernel_size-stride-padding`: Add a convolutional layer with ReLU # activation and specified number of filters, kernel size, stride and padding. if layer_type == "C": - hidden = self.createCnnLayer(hidden, *layer_args) + filters, kernel_size, stride, padding = layer_args + hidden = keras.layers.Conv2D( + filters=int(filters), + kernel_size=(int(kernel_size), int(kernel_size)), + strides=(int(stride), int(stride)), + padding=padding, + )(hidden) + + hidden = keras.layers.Activation("relu")(hidden) + continue # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization. # In detail, start with a convolutional layer **without bias** and activation, # then add a batch normalization layer, and finally the ReLU activation. if layer_type == "CB": - hidden = self.createCnnBatchNormLayer(hidden, *layer_args) + filters, kernel_size, stride, padding = layer_args + hidden = keras.layers.Conv2D( + filters=int(filters), + kernel_size=[int(kernel_size), int(kernel_size)], + strides=[int(stride), int(stride)], + padding=padding, + )(hidden) + hidden = keras.layers.BatchNormalization()(hidden) + hidden = keras.layers.Activation("relu")(hidden) continue # - `M-pool_size-stride`: Add max pooling with specified size and stride, using # the default "valid" padding. if layer_type == "M": - hidden = self.createMaxPoolingLayer(hidden, *layer_args) + pool_size, stride = layer_args + hidden = keras.layers.MaxPooling2D( + pool_size=int(pool_size), + strides=[int(stride), int(stride)], + )(hidden) continue # - `R-[layers]`: Add a residual connection. The `layers` contain a specification @@ -70,25 +92,37 @@ def __init__(self, args: argparse.Namespace) -> None: # The input to the `R` layer should be processed sequentially by `layers`, and the # produced output (after the ReLU nonlinearity of the last layer) should be added # to the input (of this `R` layer). - if layer_type == "R": - layers = layer_args[1][1:-1].split(",") + layers = "-".join(layer_args)[1:-1].split(",") for layer in layers: layer_type, *layer_args = layer.split("-") if layer_type == "C": - hidden = self.createCnnLayer(hidden, *layer_args) + filters, kernel_size, stride, padding = layer_args + hidden = keras.layers.Conv2D( + filters=int(filters), + kernel_size=(int(kernel_size), int(kernel_size)), + strides=(int(stride), int(stride)), + padding=padding, + )(hidden) continue if layer_type == "CB": - hidden = self.createCnnBatchNormLayer(hidden, *layer_args) + filters, kernel_size, stride, padding = layer_args + hidden = keras.layers.Conv2D( + filters=int(filters), + kernel_size=(int(kernel_size), int(kernel_size)), + strides=(int(stride), int(stride)), + padding=padding, + )(hidden) + hidden = keras.layers.BatchNormalization()(hidden) + hidden = keras.layers.Activation("relu")(hidden) continue continue # - `F`: Flatten inputs. Must appear exactly once in the architecture. - if layer_type == "F": hidden = keras.layers.Flatten()(hidden) continue @@ -102,10 +136,9 @@ def __init__(self, args: argparse.Namespace) -> None: continue # - `D-dropout_rate`: Apply dropout with the given dropout rate. - if layer_type == "D": dropout_rate = float(layer_args[0]) - hidden = keras.layers.Dropout(int(dropout_rate))(hidden) + hidden = keras.layers.Dropout(dropout_rate)(hidden) continue # You can assume the resulting network is valid; it is fine to crash if it is not. @@ -122,35 +155,6 @@ def __init__(self, args: argparse.Namespace) -> None: metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")], ) - def createCnnLayer(_self, hidden, filters: str, kernel_size: str, stride: str, padding: str): - cnn_layer = keras.layers.Conv2D( - filters=int(filters), - kernel_size=(int(kernel_size), int(kernel_size)), - strides=(int(stride), int(stride)), - padding=padding, - ) - - activation_layer = keras.layers.Activation("relu") - return activation_layer(cnn_layer(hidden)) - - def createCnnBatchNormLayer(_self, hidden, filters: str, kernel_size: str, stride: str, padding: str): - cnn_layer = keras.layers.Conv2D( - filters=int(filters), - kernel_size=(int(kernel_size), int(kernel_size)), - strides=(int(stride), int(stride)), - padding=padding, - ) - batch_norm = keras.layers.BatchNormalization() - activation_layer = keras.layers.Activation("relu") - - return activation_layer(batch_norm(cnn_layer(hidden))) - - def createMaxPoolingLayer(_self, hidden: str, pool_size: str, stride: str): - return keras.layers.MaxPooling2D( - pool_size=int(pool_size), - strides=(int(stride), int(stride)), - )(hidden) - def main(args: argparse.Namespace) -> dict[str, float]: # Set the random seed and the number of threads. keras.utils.set_random_seed(args.seed) From 50952bce5e39d518333639108a6eca5e33e5a024 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Sun, 24 Mar 2024 12:44:16 +0100 Subject: [PATCH 57/64] Add test script --- labs/04/mnist_cnn.ps1 | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 labs/04/mnist_cnn.ps1 diff --git a/labs/04/mnist_cnn.ps1 b/labs/04/mnist_cnn.ps1 new file mode 100644 index 0000000..bf78797 --- /dev/null +++ b/labs/04/mnist_cnn.ps1 @@ -0,0 +1,30 @@ +"" +"👉 TEST 1" +"python3 mnist_cnn.py --epochs=1 --cnn=F,H-100" +python3 mnist_cnn.py --epochs=1 --cnn=F,H-100 +"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.8503 - loss: 0.5286 - val_accuracy: 0.9604 - val_loss: 0.1432" +"" +"👉 TEST 2" +"python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5" +python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5 +"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7706 - loss: 0.7444 - val_accuracy: 0.9572 - val_loss: 0.1606" +"" +"👉 TEST 3" +"python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50" +python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50 +"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6630 - loss: 1.0703 - val_accuracy: 0.8798 - val_loss: 0.3894" +"" +"👉 TEST 4" +"python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50" +python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50 +"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.5898 - loss: 1.2535 - val_accuracy: 0.8774 - val_loss: 0.4079" +"" +"👉 TEST 5" +"python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32" +python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32 +"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6822 - loss: 1.0011 - val_accuracy: 0.9284 - val_loss: 0.2537" +"" +"👉 TEST 6" +"python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50" +python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50 +"1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7562 - loss: 0.7717 - val_accuracy: 0.9486 - val_loss: 0.1734" From 3dafa1cd35cf6c679b929093bd2f7e4fd2d0cdc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Sun, 24 Mar 2024 13:00:44 +0100 Subject: [PATCH 58/64] Fix issue with CB layers --- labs/04/mnist_cnn results.txt | 29 +++++++++++++++++++++++++++++ labs/04/mnist_cnn.py | 9 ++++++--- 2 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 labs/04/mnist_cnn results.txt diff --git a/labs/04/mnist_cnn results.txt b/labs/04/mnist_cnn results.txt new file mode 100644 index 0000000..063033d --- /dev/null +++ b/labs/04/mnist_cnn results.txt @@ -0,0 +1,29 @@ +👉 TEST 1 +python3 mnist_cnn.py --epochs=1 --cnn=F,H-100 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 19s 17ms/step - accuracy: 0.8499 - loss: 0.5317 - val_accuracy: 0.9618 - val_loss: 0.1400 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.8503 - loss: 0.5286 - val_accuracy: 0.9604 - val_loss: 0.1432 + +👉 TEST 2 +python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 20s 18ms/step - accuracy: 0.7662 - loss: 0.7543 - val_accuracy: 0.9576 - val_loss: 0.1612 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7706 - loss: 0.7444 - val_accuracy: 0.9572 - val_loss: 0.1606 + +👉 TEST 3 +python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 19s 17ms/step - accuracy: 0.6706 - loss: 1.0717 - val_accuracy: 0.8814 - val_loss: 0.3802 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6630 - loss: 1.0703 - val_accuracy: 0.8798 - val_loss: 0.3894 + +👉 TEST 4 +python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 21s 18ms/step - accuracy: 0.5799 - loss: 1.2751 - val_accuracy: 0.8898 - val_loss: 0.3617 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.5898 - loss: 1.2535 - val_accuracy: 0.8774 - val_loss: 0.4079 + +👉 TEST 5 +python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 31s 27ms/step - accuracy: 0.6976 - loss: 0.9518 - val_accuracy: 0.9228 - val_loss: 0.2614 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6822 - loss: 1.0011 - val_accuracy: 0.9284 - val_loss: 0.2537 + +👉 TEST 6 +python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 44s 38ms/step - accuracy: 0.7638 - loss: 0.7407 - val_accuracy: 0.9454 - val_loss: 0.1744 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7562 - loss: 0.7717 - val_accuracy: 0.9486 - val_loss: 0.1734 diff --git a/labs/04/mnist_cnn.py b/labs/04/mnist_cnn.py index 44710f2..661c4bd 100644 --- a/labs/04/mnist_cnn.py +++ b/labs/04/mnist_cnn.py @@ -39,7 +39,7 @@ def __init__(self, args: argparse.Namespace) -> None: inputs = keras.Input(shape=[MNIST.H, MNIST.W, MNIST.C]) hidden = keras.layers.Rescaling(1 / 255)(inputs) - cnn_args = re.split(r',(?![^\[]*\])', args.cnn) + cnn_args = re.split(r",(?![^\[]*\])", args.cnn) for layer in cnn_args: layer_type, *layer_args = layer.split("-") @@ -72,6 +72,7 @@ def __init__(self, args: argparse.Namespace) -> None: kernel_size=[int(kernel_size), int(kernel_size)], strides=[int(stride), int(stride)], padding=padding, + use_bias=False, )(hidden) hidden = keras.layers.BatchNormalization()(hidden) hidden = keras.layers.Activation("relu")(hidden) @@ -112,9 +113,10 @@ def __init__(self, args: argparse.Namespace) -> None: filters, kernel_size, stride, padding = layer_args hidden = keras.layers.Conv2D( filters=int(filters), - kernel_size=(int(kernel_size), int(kernel_size)), - strides=(int(stride), int(stride)), + kernel_size=[int(kernel_size), int(kernel_size)], + strides=[int(stride), int(stride)], padding=padding, + use_bias=False, )(hidden) hidden = keras.layers.BatchNormalization()(hidden) hidden = keras.layers.Activation("relu")(hidden) @@ -155,6 +157,7 @@ def __init__(self, args: argparse.Namespace) -> None: metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")], ) + def main(args: argparse.Namespace) -> dict[str, float]: # Set the random seed and the number of threads. keras.utils.set_random_seed(args.seed) From cbf57e6e600f68abe545b4393d41b502e70a011b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Sun, 24 Mar 2024 16:25:02 +0100 Subject: [PATCH 59/64] mnist_cnn.py passes 1-5 --- labs/04/mnist_cnn results.txt | 12 ++++++------ labs/04/mnist_cnn.py | 12 ++++++++---- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/labs/04/mnist_cnn results.txt b/labs/04/mnist_cnn results.txt index 063033d..71f04b6 100644 --- a/labs/04/mnist_cnn results.txt +++ b/labs/04/mnist_cnn results.txt @@ -1,29 +1,29 @@ 👉 TEST 1 python3 mnist_cnn.py --epochs=1 --cnn=F,H-100 -1100/1100 ━━━━━━━━━━━━━━━━━━━━ 19s 17ms/step - accuracy: 0.8499 - loss: 0.5317 - val_accuracy: 0.9618 - val_loss: 0.1400 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 12ms/step - accuracy: 0.8499 - loss: 0.5317 - val_accuracy: 0.9618 - val_loss: 0.1400 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.8503 - loss: 0.5286 - val_accuracy: 0.9604 - val_loss: 0.1432 👉 TEST 2 python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5 -1100/1100 ━━━━━━━━━━━━━━━━━━━━ 20s 18ms/step - accuracy: 0.7662 - loss: 0.7543 - val_accuracy: 0.9576 - val_loss: 0.1612 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 12ms/step - accuracy: 0.7662 - loss: 0.7543 - val_accuracy: 0.9576 - val_loss: 0.1612 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7706 - loss: 0.7444 - val_accuracy: 0.9572 - val_loss: 0.1606 👉 TEST 3 python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50 -1100/1100 ━━━━━━━━━━━━━━━━━━━━ 19s 17ms/step - accuracy: 0.6706 - loss: 1.0717 - val_accuracy: 0.8814 - val_loss: 0.3802 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 12ms/step - accuracy: 0.6706 - loss: 1.0717 - val_accuracy: 0.8814 - val_loss: 0.3802 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6630 - loss: 1.0703 - val_accuracy: 0.8798 - val_loss: 0.3894 👉 TEST 4 python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50 -1100/1100 ━━━━━━━━━━━━━━━━━━━━ 21s 18ms/step - accuracy: 0.5799 - loss: 1.2751 - val_accuracy: 0.8898 - val_loss: 0.3617 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 17s 15ms/step - accuracy: 0.5799 - loss: 1.2751 - val_accuracy: 0.8898 - val_loss: 0.3616 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.5898 - loss: 1.2535 - val_accuracy: 0.8774 - val_loss: 0.4079 👉 TEST 5 python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32 -1100/1100 ━━━━━━━━━━━━━━━━━━━━ 31s 27ms/step - accuracy: 0.6976 - loss: 0.9518 - val_accuracy: 0.9228 - val_loss: 0.2614 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 18s 16ms/step - accuracy: 0.6976 - loss: 0.9518 - val_accuracy: 0.9228 - val_loss: 0.2614 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6822 - loss: 1.0011 - val_accuracy: 0.9284 - val_loss: 0.2537 👉 TEST 6 python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50 -1100/1100 ━━━━━━━━━━━━━━━━━━━━ 44s 38ms/step - accuracy: 0.7638 - loss: 0.7407 - val_accuracy: 0.9454 - val_loss: 0.1744 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 29s 26ms/step - accuracy: 0.7727 - loss: 0.7074 - val_accuracy: 0.9424 - val_loss: 0.1835 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7562 - loss: 0.7717 - val_accuracy: 0.9486 - val_loss: 0.1734 diff --git a/labs/04/mnist_cnn.py b/labs/04/mnist_cnn.py index 661c4bd..5b8fbc1 100644 --- a/labs/04/mnist_cnn.py +++ b/labs/04/mnist_cnn.py @@ -8,6 +8,7 @@ ) # Use PyTorch backend unless specified otherwise import keras +from keras.layers import add import torch from mnist import MNIST @@ -56,10 +57,9 @@ def __init__(self, args: argparse.Namespace) -> None: kernel_size=(int(kernel_size), int(kernel_size)), strides=(int(stride), int(stride)), padding=padding, + activation="relu", )(hidden) - hidden = keras.layers.Activation("relu")(hidden) - continue # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization. @@ -75,7 +75,7 @@ def __init__(self, args: argparse.Namespace) -> None: use_bias=False, )(hidden) hidden = keras.layers.BatchNormalization()(hidden) - hidden = keras.layers.Activation("relu")(hidden) + hidden = keras.layers.ReLU()(hidden) continue # - `M-pool_size-stride`: Add max pooling with specified size and stride, using @@ -94,6 +94,7 @@ def __init__(self, args: argparse.Namespace) -> None: # produced output (after the ReLU nonlinearity of the last layer) should be added # to the input (of this `R` layer). if layer_type == "R": + input_layer = hidden layers = "-".join(layer_args)[1:-1].split(",") for layer in layers: @@ -106,6 +107,7 @@ def __init__(self, args: argparse.Namespace) -> None: kernel_size=(int(kernel_size), int(kernel_size)), strides=(int(stride), int(stride)), padding=padding, + activation="relu", )(hidden) continue @@ -119,9 +121,11 @@ def __init__(self, args: argparse.Namespace) -> None: use_bias=False, )(hidden) hidden = keras.layers.BatchNormalization()(hidden) - hidden = keras.layers.Activation("relu")(hidden) continue + hidden = add([input_layer, hidden]) + hidden = keras.layers.ReLU()(hidden) + continue # - `F`: Flatten inputs. Must appear exactly once in the architecture. From a2bd060e63587b028c9f2adf18ed5bd8e25093f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Wed, 27 Mar 2024 12:46:19 +0100 Subject: [PATCH 60/64] Refactor and simplify solution to mnist_cnn.py --- labs/04/mnist_cnn results.txt | 10 +- labs/04/mnist_cnn.py | 177 +++++++++++++++------------------- 2 files changed, 81 insertions(+), 106 deletions(-) diff --git a/labs/04/mnist_cnn results.txt b/labs/04/mnist_cnn results.txt index 71f04b6..63271eb 100644 --- a/labs/04/mnist_cnn results.txt +++ b/labs/04/mnist_cnn results.txt @@ -5,25 +5,25 @@ python3 mnist_cnn.py --epochs=1 --cnn=F,H-100 👉 TEST 2 python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5 -1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 12ms/step - accuracy: 0.7662 - loss: 0.7543 - val_accuracy: 0.9576 - val_loss: 0.1612 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 13ms/step - accuracy: 0.7662 - loss: 0.7543 - val_accuracy: 0.9576 - val_loss: 0.1612 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7706 - loss: 0.7444 - val_accuracy: 0.9572 - val_loss: 0.1606 👉 TEST 3 python3 mnist_cnn.py --epochs=1 --cnn=M-5-2,F,H-50 -1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 12ms/step - accuracy: 0.6706 - loss: 1.0717 - val_accuracy: 0.8814 - val_loss: 0.3802 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 14s 13ms/step - accuracy: 0.6706 - loss: 1.0717 - val_accuracy: 0.8814 - val_loss: 0.3802 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6630 - loss: 1.0703 - val_accuracy: 0.8798 - val_loss: 0.3894 👉 TEST 4 python3 mnist_cnn.py --epochs=1 --cnn=C-8-3-5-same,C-8-3-2-valid,F,H-50 -1100/1100 ━━━━━━━━━━━━━━━━━━━━ 17s 15ms/step - accuracy: 0.5799 - loss: 1.2751 - val_accuracy: 0.8898 - val_loss: 0.3616 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 18s 16ms/step - accuracy: 0.5799 - loss: 1.2751 - val_accuracy: 0.8898 - val_loss: 0.3616 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.5898 - loss: 1.2535 - val_accuracy: 0.8774 - val_loss: 0.4079 👉 TEST 5 python3 mnist_cnn.py --epochs=1 --cnn=CB-6-3-5-valid,F,H-32 -1100/1100 ━━━━━━━━━━━━━━━━━━━━ 18s 16ms/step - accuracy: 0.6976 - loss: 0.9518 - val_accuracy: 0.9228 - val_loss: 0.2614 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 20s 17ms/step - accuracy: 0.6976 - loss: 0.9518 - val_accuracy: 0.9228 - val_loss: 0.2614 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.6822 - loss: 1.0011 - val_accuracy: 0.9284 - val_loss: 0.2537 👉 TEST 6 python3 mnist_cnn.py --epochs=1 --cnn=CB-8-3-5-valid,R-[CB-8-3-1-same,CB-8-3-1-same],F,H-50 -1100/1100 ━━━━━━━━━━━━━━━━━━━━ 29s 26ms/step - accuracy: 0.7727 - loss: 0.7074 - val_accuracy: 0.9424 - val_loss: 0.1835 +1100/1100 ━━━━━━━━━━━━━━━━━━━━ 31s 27ms/step - accuracy: 0.7476 - loss: 0.7841 - val_accuracy: 0.9370 - val_loss: 0.2037 1100/1100 ━━━━━━━━━━━━━━━━━━━━ 15s 14ms/step - accuracy: 0.7562 - loss: 0.7717 - val_accuracy: 0.9486 - val_loss: 0.1734 diff --git a/labs/04/mnist_cnn.py b/labs/04/mnist_cnn.py index 5b8fbc1..b3c5727 100644 --- a/labs/04/mnist_cnn.py +++ b/labs/04/mnist_cnn.py @@ -33,6 +33,81 @@ # If you add more arguments, ReCodEx will keep them with your default values. +def create_layer(layer_type, layer_args, hidden): + if layer_type == "C": + filters, kernel_size, stride, padding = layer_args + hidden = keras.layers.Conv2D( + filters=int(filters), + kernel_size=(int(kernel_size)), + strides=(int(stride)), + padding=padding, + activation="relu", + )(hidden) + + return hidden + + # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization. + # In detail, start with a convolutional layer **without bias** and activation, + # then add a batch normalization layer, and finally the ReLU activation. + if layer_type == "CB": + filters, kernel_size, stride, padding = layer_args + hidden = keras.layers.Conv2D( + filters=int(filters), + kernel_size=(int(kernel_size)), + strides=(int(stride)), + padding=padding, + use_bias=False, + )(hidden) + hidden = keras.layers.BatchNormalization()(hidden) + hidden = keras.layers.ReLU()(hidden) + return hidden + + # - `M-pool_size-stride`: Add max pooling with specified size and stride, using + # the default "valid" padding. + if layer_type == "M": + pool_size, stride = layer_args + hidden = keras.layers.MaxPooling2D( + pool_size=int(pool_size), + strides=(int(stride)), + )(hidden) + return hidden + + # - `R-[layers]`: Add a residual connection. The `layers` contain a specification + # of at least one convolutional layer (but not a recursive residual connection `R`). + # The input to the `R` layer should be processed sequentially by `layers`, and the + # produced output (after the ReLU nonlinearity of the last layer) should be added + # to the input (of this `R` layer). + if layer_type == "R": + input_layer = hidden + layers = "-".join(layer_args)[1:-1].split(",") + + for layer in layers: + layer_type, *layer_args = layer.split("-") + + hidden = create_layer(layer_type, layer_args, hidden) + + hidden = keras.layers.Add()([input_layer, hidden]) + + return hidden + + # - `F`: Flatten inputs. Must appear exactly once in the architecture. + if layer_type == "F": + hidden = keras.layers.Flatten()(hidden) + return hidden + + # - `H-hidden_layer_size`: Add a dense layer with ReLU activation and the specified size. + if layer_type == "H": + hidden_layer_size, = layer_args + hidden = keras.layers.Dense(units=int(hidden_layer_size), activation="relu")(hidden) + return hidden + + # - `D-dropout_rate`: Apply dropout with the given dropout rate. + if layer_type == "D": + dropout_rate, = layer_args + hidden = keras.layers.Dropout(rate=float(dropout_rate))(hidden) + return hidden + + class Model(keras.Model): def __init__(self, args: argparse.Namespace) -> None: # Create the model. The template uses the functional API, but @@ -45,107 +120,7 @@ def __init__(self, args: argparse.Namespace) -> None: for layer in cnn_args: layer_type, *layer_args = layer.split("-") - # Add CNN layers specified by `args.cnn`, which contains - # a comma-separated list of the following layers: - - # - `C-filters-kernel_size-stride-padding`: Add a convolutional layer with ReLU - # activation and specified number of filters, kernel size, stride and padding. - if layer_type == "C": - filters, kernel_size, stride, padding = layer_args - hidden = keras.layers.Conv2D( - filters=int(filters), - kernel_size=(int(kernel_size), int(kernel_size)), - strides=(int(stride), int(stride)), - padding=padding, - activation="relu", - )(hidden) - - continue - - # - `CB-filters-kernel_size-stride-padding`: Same as `C`, but use batch normalization. - # In detail, start with a convolutional layer **without bias** and activation, - # then add a batch normalization layer, and finally the ReLU activation. - if layer_type == "CB": - filters, kernel_size, stride, padding = layer_args - hidden = keras.layers.Conv2D( - filters=int(filters), - kernel_size=[int(kernel_size), int(kernel_size)], - strides=[int(stride), int(stride)], - padding=padding, - use_bias=False, - )(hidden) - hidden = keras.layers.BatchNormalization()(hidden) - hidden = keras.layers.ReLU()(hidden) - continue - - # - `M-pool_size-stride`: Add max pooling with specified size and stride, using - # the default "valid" padding. - if layer_type == "M": - pool_size, stride = layer_args - hidden = keras.layers.MaxPooling2D( - pool_size=int(pool_size), - strides=[int(stride), int(stride)], - )(hidden) - continue - - # - `R-[layers]`: Add a residual connection. The `layers` contain a specification - # of at least one convolutional layer (but not a recursive residual connection `R`). - # The input to the `R` layer should be processed sequentially by `layers`, and the - # produced output (after the ReLU nonlinearity of the last layer) should be added - # to the input (of this `R` layer). - if layer_type == "R": - input_layer = hidden - layers = "-".join(layer_args)[1:-1].split(",") - - for layer in layers: - layer_type, *layer_args = layer.split("-") - - if layer_type == "C": - filters, kernel_size, stride, padding = layer_args - hidden = keras.layers.Conv2D( - filters=int(filters), - kernel_size=(int(kernel_size), int(kernel_size)), - strides=(int(stride), int(stride)), - padding=padding, - activation="relu", - )(hidden) - continue - - if layer_type == "CB": - filters, kernel_size, stride, padding = layer_args - hidden = keras.layers.Conv2D( - filters=int(filters), - kernel_size=[int(kernel_size), int(kernel_size)], - strides=[int(stride), int(stride)], - padding=padding, - use_bias=False, - )(hidden) - hidden = keras.layers.BatchNormalization()(hidden) - continue - - hidden = add([input_layer, hidden]) - hidden = keras.layers.ReLU()(hidden) - - continue - - # - `F`: Flatten inputs. Must appear exactly once in the architecture. - if layer_type == "F": - hidden = keras.layers.Flatten()(hidden) - continue - - # - `H-hidden_layer_size`: Add a dense layer with ReLU activation and the specified size. - if layer_type == "H": - hidden_layer_size = int(layer_args[0]) - hidden = keras.layers.Dense(hidden_layer_size, activation="relu")( - hidden - ) - continue - - # - `D-dropout_rate`: Apply dropout with the given dropout rate. - if layer_type == "D": - dropout_rate = float(layer_args[0]) - hidden = keras.layers.Dropout(dropout_rate)(hidden) - continue + hidden = create_layer(layer_type, layer_args, hidden) # You can assume the resulting network is valid; it is fine to crash if it is not. # From 3cdadd194a6ed870599e2588228ddf733c327964 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Thu, 28 Mar 2024 12:55:23 +0100 Subject: [PATCH 61/64] Solve mnist_multiple.py --- labs/04/mnist_multiple.ps1 | 11 ++++ labs/04/mnist_multiple.py | 105 ++++++++++++++++++++++++++++--------- 2 files changed, 92 insertions(+), 24 deletions(-) create mode 100644 labs/04/mnist_multiple.ps1 diff --git a/labs/04/mnist_multiple.ps1 b/labs/04/mnist_multiple.ps1 new file mode 100644 index 0000000..d6d4f08 --- /dev/null +++ b/labs/04/mnist_multiple.ps1 @@ -0,0 +1,11 @@ +"" +"👉 TEST 1" +"python3 mnist_multiple.py --epochs=1 --batch_size=50" +python3 mnist_multiple.py --epochs=1 --batch_size=50 +"direct_comparison_accuracy: 0.7993 - indirect_comparison_accuracy: 0.8930 - loss: 1.6710 - val_direct_comparison_accuracy: 0.9508 - val_indirect_comparison_accuracy: 0.9836 - val_loss: 0.2984" +"" +"👉 TEST 2" +"python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5" +python3 mnist_multiple.py --epochs=1 --batch_size=100 +"direct_comparison_accuracy: 0.7680 - indirect_comparison_accuracy: 0.8637 - loss: 2.1429 - val_direct_comparison_accuracy: 0.9288 - val_indirect_comparison_accuracy: 0.9772 - val_loss: 0.4157" +"" diff --git a/labs/04/mnist_multiple.py b/labs/04/mnist_multiple.py index 06b9d9e..def13ab 100644 --- a/labs/04/mnist_multiple.py +++ b/labs/04/mnist_multiple.py @@ -1,7 +1,10 @@ #!/usr/bin/env python3 import argparse import os -os.environ.setdefault("KERAS_BACKEND", "torch") # Use PyTorch backend unless specified otherwise + +os.environ.setdefault( + "KERAS_BACKEND", "torch" +) # Use PyTorch backend unless specified otherwise import numpy as np import keras @@ -13,9 +16,13 @@ # These arguments will be set appropriately by ReCodEx, even if you change them. parser.add_argument("--batch_size", default=50, type=int, help="Batch size.") parser.add_argument("--epochs", default=5, type=int, help="Number of epochs.") -parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.") +parser.add_argument( + "--recodex", default=False, action="store_true", help="Evaluation in ReCodEx." +) parser.add_argument("--seed", default=42, type=int, help="Random seed.") -parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") +parser.add_argument( + "--threads", default=1, type=int, help="Maximum number of threads to use." +) # If you add more arguments, ReCodEx will keep them with your default values. @@ -27,7 +34,7 @@ def __init__(self, args: argparse.Namespace) -> None: keras.Input(shape=[MNIST.H, MNIST.W, MNIST.C]), ) - # TODO: The model starts by passing each input image through the same + # The model starts by passing each input image through the same # subnetwork (with shared weights), which should perform # - keras.layers.Rescaling(1 / 255) to convert images to floats in [0, 1] range, # - convolution with 10 filters, 3x3 kernel size, stride 2, "valid" padding, ReLU activation, @@ -36,24 +43,49 @@ def __init__(self, args: argparse.Namespace) -> None: # - fully connected layer with 200 neurons and ReLU activation, # obtaining a 200-dimensional feature vector FV of each image. - # TODO: Using the computed representations, the model should produce four outputs: + rescale = keras.layers.Rescaling(1 / 255) + c1 = keras.layers.Conv2D( + filters=10, kernel_size=3, strides=2, padding="valid", activation="relu" + ) + c2 = keras.layers.Conv2D( + filters=20, kernel_size=3, strides=2, padding="valid", activation="relu" + ) + flat = keras.layers.Flatten() + hidden = keras.layers.Dense(200, activation="relu") + + fv1 = hidden(flat(c2(c1(rescale(images[0]))))) + fv2 = hidden(flat(c2(c1(rescale(images[1]))))) + + # Using the computed representations, the model should produce four outputs: # - first, compute _direct comparison_ whether the first digit is # greater than the second, by # - concatenating the two 200-dimensional image representations FV, # - processing them using another 200-neuron ReLU dense layer # - computing one output using a dense layer with "sigmoid" activation + concatenation = keras.layers.Concatenate()([fv1, fv2]) + hidden2 = keras.layers.Dense(200, activation="relu") + pred_layer = keras.layers.Dense(1, activation="sigmoid") + direct_comparison = pred_layer(hidden2(concatenation)) # - then, classify the computed representation FV of the first image using # a densely connected softmax layer into 10 classes; # - then, classify the computed representation FV of the second image using # the same layer (identical, i.e., with shared weights) into 10 classes; + classification_layer = keras.layers.Dense(10, activation="softmax") + d1 = classification_layer(fv1) + d2 = classification_layer(fv2) # - finally, compute _indirect comparison_ whether the first digit # is greater than second, by comparing the predictions from the above # two outputs; convert the comparison to "float32" using `keras.ops.cast`. outputs = { - "direct_comparison": ..., - "digit_1": ..., - "digit_2": ..., - "indirect_comparison": ..., + "direct_comparison": direct_comparison, + "digit_1": d1, + "digit_2": d2, + "indirect_comparison": keras.ops.cast( + keras.ops.greater( + keras.ops.argmax(d1, axis=1), keras.ops.argmax(d2, axis=1) + ), + "float32", + ), } # Finally, construct the model. @@ -65,7 +97,7 @@ def __init__(self, args: argparse.Namespace) -> None: # the keys of the `outputs` dictionary. self.output_names = sorted(outputs.keys()) - # TODO: Define the appropriate losses for the model outputs + # Define the appropriate losses for the model outputs # "direct_comparison", "digit_1", "digit_2". Regarding metrics, # the accuracy of both the direct and indirect comparisons should be # computed; name both metrics "accuracy" (i.e., pass "accuracy" as the @@ -73,19 +105,25 @@ def __init__(self, args: argparse.Namespace) -> None: self.compile( optimizer=keras.optimizers.Adam(), loss={ - "direct_comparison": ..., - "digit_1": ..., - "digit_2": ..., + "direct_comparison": keras.losses.BinaryCrossentropy(), + "digit_1": keras.losses.SparseCategoricalCrossentropy(), + "digit_2": keras.losses.SparseCategoricalCrossentropy(), }, metrics={ - "direct_comparison": [...], - "indirect_comparison": [...], + "direct_comparison": [ + keras.metrics.BinaryAccuracy(name="accuracy"), + ], + "indirect_comparison": [ + keras.metrics.BinaryAccuracy(name="accuracy"), + ], }, ) # Create an appropriate dataset using the MNIST data. def create_dataset( - self, mnist_dataset: MNIST.Dataset, args: argparse.Namespace, + self, + mnist_dataset: MNIST.Dataset, + args: argparse.Namespace, ) -> torch.utils.data.Dataset: # Original MNIST dataset. images, labels = mnist_dataset.data["images"], mnist_dataset.data["labels"] @@ -94,16 +132,27 @@ def create_dataset( # You can assume that the size of the original dataset is even. class TorchDataset(torch.utils.data.Dataset): def __len__(self) -> int: - # TODO: The new dataset has half the size of the original one. - return ... + # The new dataset has half the size of the original one. + return len(images) // 2 - def __getitem__(self, index: int) -> tuple[tuple[np.ndarray, np.ndarray], dict[str, np.ndarray]]: - # TODO: Given an `index`, generate a dataset element suitable for our model. + def __getitem__( + self, index: int + ) -> tuple[tuple[np.ndarray, np.ndarray], dict[str, np.ndarray]]: + # Given an `index`, generate a dataset element suitable for our model. # Notably, the element should be a pair `(input, output)`, with # - `input` being a pair of images `(images[2 * index], images[2 * index + 1])`, # - `output` being a dictionary with keys "digit_1", "digit_2", "direct_comparison", # and "indirect_comparison". - return ... + return ( + (images[2 * index], images[2 * index + 1]), + { + "digit_1": labels[2 * index], + "digit_2": labels[2 * index + 1], + "direct_comparison": labels[2 * index] > labels[2 * index + 1], + "indirect_comparison": labels[2 * index] + > labels[2 * index + 1], + }, + ) return TorchDataset() @@ -122,14 +171,22 @@ def main(args: argparse.Namespace) -> dict[str, float]: model = Model(args) # Construct suitable dataloaders from the MNIST data. - train = torch.utils.data.DataLoader(model.create_dataset(mnist.train, args), args.batch_size, shuffle=True) - dev = torch.utils.data.DataLoader(model.create_dataset(mnist.dev, args), args.batch_size) + train = torch.utils.data.DataLoader( + model.create_dataset(mnist.train, args), args.batch_size, shuffle=True + ) + dev = torch.utils.data.DataLoader( + model.create_dataset(mnist.dev, args), args.batch_size + ) # Train logs = model.fit(train, epochs=args.epochs, validation_data=dev) # Return development metrics for ReCodEx to validate. - return {metric: values[-1] for metric, values in logs.history.items() if metric.startswith("val_")} + return { + metric: values[-1] + for metric, values in logs.history.items() + if metric.startswith("val_") + } if __name__ == "__main__": From 7f52f3ca72f1f0aee2c7dd49437a5482a4375217 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Thu, 28 Mar 2024 13:08:06 +0100 Subject: [PATCH 62/64] Improve test output --- labs/04/mnist_multiple.ps1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/labs/04/mnist_multiple.ps1 b/labs/04/mnist_multiple.ps1 index d6d4f08..3416b36 100644 --- a/labs/04/mnist_multiple.ps1 +++ b/labs/04/mnist_multiple.ps1 @@ -2,10 +2,10 @@ "👉 TEST 1" "python3 mnist_multiple.py --epochs=1 --batch_size=50" python3 mnist_multiple.py --epochs=1 --batch_size=50 -"direct_comparison_accuracy: 0.7993 - indirect_comparison_accuracy: 0.8930 - loss: 1.6710 - val_direct_comparison_accuracy: 0.9508 - val_indirect_comparison_accuracy: 0.9836 - val_loss: 0.2984" +"275/275 ━━━━━━━━━━━━━━━━━━━━ 11s 38ms/step - direct_comparison_accuracy: 0.7993 - indirect_comparison_accuracy: 0.8930 - loss: 1.6710 - val_direct_comparison_accuracy: 0.9508 - val_indirect_comparison_accuracy: 0.9836 - val_loss: 0.2984" "" "👉 TEST 2" "python3 mnist_cnn.py --epochs=1 --cnn=F,H-100,D-0.5" python3 mnist_multiple.py --epochs=1 --batch_size=100 -"direct_comparison_accuracy: 0.7680 - indirect_comparison_accuracy: 0.8637 - loss: 2.1429 - val_direct_comparison_accuracy: 0.9288 - val_indirect_comparison_accuracy: 0.9772 - val_loss: 0.4157" +"275/275 ━━━━━━━━━━━━━━━━━━━━ 11s 38ms/step - direct_comparison_accuracy: 0.7680 - indirect_comparison_accuracy: 0.8637 - loss: 2.1429 - val_direct_comparison_accuracy: 0.9288 - val_indirect_comparison_accuracy: 0.9772 - val_loss: 0.4157" "" From ca5827f3e07cb9a10a033cae6c7684674e6f402c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Thu, 28 Mar 2024 14:54:14 +0100 Subject: [PATCH 63/64] Solve torch_dataset --- labs/04/torch_dataset.ps1 | 11 +++++++++ labs/04/torch_dataset.py | 47 +++++++++++++++++++++++++-------------- 2 files changed, 41 insertions(+), 17 deletions(-) create mode 100644 labs/04/torch_dataset.ps1 diff --git a/labs/04/torch_dataset.ps1 b/labs/04/torch_dataset.ps1 new file mode 100644 index 0000000..46fa378 --- /dev/null +++ b/labs/04/torch_dataset.ps1 @@ -0,0 +1,11 @@ +# "" +# "👉 TEST 1" +# "python3 torch_dataset.py --epochs=1 --batch_size=100" +# python3 torch_dataset.py --epochs=1 --batch_size=100 +# "50/50 ━━━━━━━━━━━━━━━━━━━━ 3s 33ms/step - accuracy: 0.1297 - loss: 2.2519 - val_accuracy: 0.2710 - val_loss: 1.9796" +"" +"👉 TEST 2" +"python3 torch_dataset.py --epochs=1 --batch_size=50 --augment" +python3 torch_dataset.py --epochs=1 --batch_size=50 --augment +"100/100 ━━━━━━━━━━━━━━━━━━━━ 4s 34ms/step - accuracy: 0.1354 - loss: 2.2565 - val_accuracy: 0.2690 - val_loss: 1.9889" +"" diff --git a/labs/04/torch_dataset.py b/labs/04/torch_dataset.py index 5e0c330..f689e54 100644 --- a/labs/04/torch_dataset.py +++ b/labs/04/torch_dataset.py @@ -53,54 +53,67 @@ def main(args: argparse.Namespace) -> dict[str, float]: metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")], ) - # TODO: Create a Torch dataset constructible from the given `CIFAR10.Dataset`. + # Create a Torch dataset constructible from the given `CIFAR10.Dataset`. # You should use only the first `size` examples of the dataset, and optional # augmentation function `augmentation_fn` may be applied to the images. class TorchDataset(torch.utils.data.Dataset): + images: np.ndarray + labels: np.ndarray + augmentation_fn: callable + def __init__(self, cifar: CIFAR10.Dataset, size: int, augmentation_fn=None) -> None: - # TODO: Note that the images and labels are available in `cifar.data["images"]` + # Note that the images and labels are available in `cifar.data["images"]` # and `cifar.data["labels"]`. - ... + self.images = cifar.data["images"][:size] + self.labels = cifar.data["labels"][:size] + self.augmentation_fn = augmentation_fn def __len__(self) -> int: - # TODO: Return the appropriate size. - ... + # Return the appropriate size. + size = len(self.images) + return size + def __getitem__(self, index: int) -> tuple[np.ndarray | torch.Tensor, int]: - # TODO: Return the `index`-th example from the dataset, with the image optionally + # Return the `index`-th example from the dataset, with the image optionally # passed through the `augmentation_fn` if it is not `None`. - ... + return self.augmentation_fn(self.images[index]) if self.augmentation_fn else self.images[index], self.labels[index] if args.augment: # Construct a sequence of augmentation transformations from `torchvision.transforms.v2`. transformation = v2.Compose([ - # TODO: Add the following transformations: + # Add the following transformations: # - first create a `v2.RandomResize` that scales the image to # random size in range [28, 36], # - then add `v2.Pad` that pads the image with 4 pixels on each side, # - then add `v2.RandomCrop` that chooses a random crop of size 32x32, # - and finally add `v2.RandomHorizontalFlip` that uniformly # randomly flips the image horizontally. - ... + v2.RandomResize(28, 36), + v2.Pad(4), + v2.RandomCrop(32), + v2.RandomHorizontalFlip(), ]) def augmentation_fn(image: np.ndarray) -> torch.Tensor: - # TODO: First, convert the numpy `images` to a PyTorch tensor of uint8s, + # First, convert the numpy `images` to a PyTorch tensor of uint8s, # preferably by using `torch.from_numpy` or `torch.as_tensor` to avoid copying. # Then, because of the channels-position mismatch, permute the axes # in the image to change the order of the axes from HWC to CHW. # Next, apply the `transformation` to the image (by calling it with # the image as an argument), and finally permute the axes back to # the original order. - return ... + + return transformation(torch.as_tensor(image).permute(2, 0, 1)).permute(1, 2, 0) + else: augmentation_fn = None - # TODO: Create `train` and `dev` instances of `TorchDataset` from the corresponding + # Create `train` and `dev` instances of `TorchDataset` from the corresponding # `cifar` datasets. Limit their sizes to 5_000 and 1_000 examples, respectively, # and use the `augmentation_fn` for the training dataset. - train = ... - dev = ... + train = TorchDataset(cifar.train, 5_000, augmentation_fn) + dev = TorchDataset(cifar.dev, 1_000) if args.show_images: from torch.utils import tensorboard @@ -114,10 +127,10 @@ def augmentation_fn(image: np.ndarray) -> torch.Tensor: tb_writer.close() print("Saved first {} training imaged to logs/{}".format(GRID * GRID, TAG)) - # TODO: Create `train` and `dev` instances of `torch.utils.data.DataLoader` from + # Create `train` and `dev` instances of `torch.utils.data.DataLoader` from # the datasets, using the given `args.batch_size` and shuffling the training dataset. - train = ... - dev = ... + train = torch.utils.data.DataLoader(train, args.batch_size, shuffle=True) + dev = torch.utils.data.DataLoader(dev, args.batch_size) # Train logs = model.fit(train, epochs=args.epochs, validation_data=dev) From 0bb34e18c19691e52652a0d278ae86437a0f68f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Glerup=20R=C3=B8ssum?= <1959615+joglr@users.noreply.github.com> Date: Mon, 1 Apr 2024 17:36:24 +0200 Subject: [PATCH 64/64] Solve cifar_competition --- labs/04/cifar_competition.ps1 | 1 + labs/04/cifar_competition.py | 148 +++++++++++++++++++++++++++++----- 2 files changed, 128 insertions(+), 21 deletions(-) create mode 100644 labs/04/cifar_competition.ps1 diff --git a/labs/04/cifar_competition.ps1 b/labs/04/cifar_competition.ps1 new file mode 100644 index 0000000..0d919fe --- /dev/null +++ b/labs/04/cifar_competition.ps1 @@ -0,0 +1 @@ +clear && python .\cifar_competition.py diff --git a/labs/04/cifar_competition.py b/labs/04/cifar_competition.py index 0541de8..be29019 100644 --- a/labs/04/cifar_competition.py +++ b/labs/04/cifar_competition.py @@ -3,7 +3,10 @@ import datetime import os import re -os.environ.setdefault("KERAS_BACKEND", "torch") # Use PyTorch backend unless specified otherwise + +os.environ.setdefault( + "KERAS_BACKEND", "torch" +) # Use PyTorch backend unless specified otherwise import keras import numpy as np @@ -11,13 +14,23 @@ from cifar10 import CIFAR10 -# TODO: Define reasonable defaults and optionally more parameters. +# Define reasonable defaults and optionally more parameters. # Also, you can set the number of threads to 0 to use all your CPU cores. parser = argparse.ArgumentParser() -parser.add_argument("--batch_size", default=..., type=int, help="Batch size.") -parser.add_argument("--epochs", default=..., type=int, help="Number of epochs.") +parser.add_argument("--batch_size", default=128, type=int, help="Batch size.") +parser.add_argument("--epochs", default=30, type=int, help="Number of epochs.") +# parser.add_argument("--epochs", default=200, type=int, help="Number of epochs.") +parser.add_argument("--learning_rate", default=0.001, help="Initial learning rate") +parser.add_argument( + "--weight_decay", default=1e-4, type=float, help="L2 regularization weight decay." +) +parser.add_argument( + "--label_smoothing", default=0.1, type=float, help="Label smoothing." +) parser.add_argument("--seed", default=42, type=int, help="Random seed.") -parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") +parser.add_argument( + "--threads", default=1, type=int, help="Maximum number of threads to use." +) class TorchTensorBoardCallback(keras.callbacks.Callback): @@ -28,7 +41,10 @@ def __init__(self, path): def writer(self, writer): if writer not in self._writers: import torch.utils.tensorboard - self._writers[writer] = torch.utils.tensorboard.SummaryWriter(os.path.join(self._path, writer)) + + self._writers[writer] = torch.utils.tensorboard.SummaryWriter( + os.path.join(self._path, writer) + ) return self._writers[writer] def add_logs(self, writer, logs, step): @@ -39,13 +55,51 @@ def add_logs(self, writer, logs, step): def on_epoch_end(self, epoch, logs=None): if logs: - if isinstance(getattr(self.model, "optimizer", None), keras.optimizers.Optimizer): - logs = logs | {"learning_rate": keras.ops.convert_to_numpy(self.model.optimizer.learning_rate)} - self.add_logs("train", {k: v for k, v in logs.items() if not k.startswith("val_")}, epoch + 1) - self.add_logs("val", {k[4:]: v for k, v in logs.items() if k.startswith("val_")}, epoch + 1) - + if isinstance( + getattr(self.model, "optimizer", None), keras.optimizers.Optimizer + ): + logs = logs | { + "learning_rate": keras.ops.convert_to_numpy( + self.model.optimizer.learning_rate + ) + } + self.add_logs( + "train", + {k: v for k, v in logs.items() if not k.startswith("val_")}, + epoch + 1, + ) + self.add_logs( + "val", + {k[4:]: v for k, v in logs.items() if k.startswith("val_")}, + epoch + 1, + ) + +def create_res(input_layer, filters, kernel_size, strides): + h = keras.layers.Conv2D( + filters=filters, + kernel_size=kernel_size, + strides=strides, + padding="same", + activation=None, + )(input_layer) + + h = keras.layers.BatchNormalization()(h) + h = keras.layers.Activation("relu")(h) + h = keras.layers.Conv2D( + filters=filters, + kernel_size=kernel_size, + strides=1, + padding="same", + activation=None, + use_bias=False, + )(h) + h = keras.layers.BatchNormalization()(h) + h = keras.layers.Add()([input_layer, h]) + h = keras.layers.Activation("relu")(h) + return h def main(args: argparse.Namespace) -> None: + # Set the random seed and the number of threads. keras.utils.set_random_seed(args.seed) if args.threads: @@ -53,23 +107,75 @@ def main(args: argparse.Namespace) -> None: torch.set_num_interop_threads(args.threads) # Create logdir name - args.logdir = os.path.join("logs", "{}-{}-{}".format( - os.path.basename(globals().get("__file__", "notebook")), - datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), - ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items()))) - )) + args.logdir = os.path.join( + "logs", + "{}-{}-{}".format( + os.path.basename(globals().get("__file__", "notebook")), + datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), + ",".join( + ( + "{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) + for k, v in sorted(vars(args).items()) + ) + ), + ), + ) # Load data cifar = CIFAR10() - # TODO: Create the model and train it - model = ... + # Create the model and train it + inputs = keras.Input(shape=cifar.train.data["images"][0].shape) + h = keras.layers.Rescaling(1 / 255)(inputs) + h = keras.layers.Conv2D(64, 3, 1, "same", activation="relu")(h) + h = create_res(h, 64, 3, 1) + h = keras.layers.MaxPool2D(2)(h) + h = create_res(h, 64, 3, 1) + h = keras.layers.MaxPool2D(2)(h) + h = keras.layers.Dropout(0.2)(h) + h = create_res(h, 64, 3, 1) + h = keras.layers.Flatten()(h) + h = keras.layers.Dropout(0.2)(h) + h = keras.layers.Dense(200, activation="relu")(h) + outputs = keras.layers.Dense(len(CIFAR10.LABELS), activation="softmax")(h) + + model = keras.Model(inputs=inputs, outputs=outputs) + + model.summary() + + + lr_optimizer = keras.optimizers.schedules.CosineDecay( + initial_learning_rate=args.learning_rate, + decay_steps=len(cifar.train.data["images"] / args.batch_size * args.epochs) + ) + + model.compile( + optimizer=keras.optimizers.Adam( + learning_rate=lr_optimizer, + weight_decay=args.weight_decay), + loss=keras.losses.SparseCategoricalCrossentropy(), + metrics=[keras.metrics.SparseCategoricalAccuracy(name="accuracy")], + ) + + model.fit( + cifar.train.data["images"], + cifar.train.data["labels"], + batch_size=args.batch_size, + epochs=args.epochs, + + ) + + model.save(os.path.join(args.logdir, "cifar.h5"), include_optimizer=False) # Generate test set annotations, but in `args.logdir` to allow parallel execution. os.makedirs(args.logdir, exist_ok=True) - with open(os.path.join(args.logdir, "cifar_competition_test.txt"), "w", encoding="utf-8") as predictions_file: - # TODO: Perform the prediction on the test data. - for probs in model.predict(...): + with open( + os.path.join(args.logdir, "cifar_competition_test.txt"), "w", encoding="utf-8" + ) as predictions_file: + # Perform the prediction on the test data. + for probs in model.predict( + cifar.test.data["images"], batch_size=args.batch_size + ): print(np.argmax(probs), file=predictions_file)