From ed0912e6947248a9aa11713b392c07cd699ae73c Mon Sep 17 00:00:00 2001 From: Gerhard Reinerth Date: Wed, 8 Nov 2023 19:25:51 +0000 Subject: [PATCH 1/7] Fixing redundant SVD computation --- pydmd/utils.py | 93 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 73 insertions(+), 20 deletions(-) diff --git a/pydmd/utils.py b/pydmd/utils.py index 047df3131..098e52893 100644 --- a/pydmd/utils.py +++ b/pydmd/utils.py @@ -1,12 +1,51 @@ """Utilities module.""" import warnings +from typing import Union import numpy as np from numpy.lib.stride_tricks import sliding_window_view -def compute_rank(X, svd_rank=0): +def __svht(sigma_svd: np.ndarray, rows: int, cols: int) -> int: + """ + Singular Value Hard Threshold. + + :param sigma_svd: Singual values computed by SVD + :type sigma_svd: np.ndarray + :param rows: Number of rows of original data matrix. + :type rows: int + :param cols: Number of columns of original data matrix. + :type cols: int + :return: Computed rank. + :rtype: int + + References: + Gavish, Matan, and David L. Donoho, The optimal hard threshold for + singular values is, IEEE Transactions on Information Theory 60.8 + (2014): 5040-5053. + https://ieeexplore.ieee.org/document/6846297 + """ + beta = np.divide(*sorted((rows, cols))) + beta_square = beta * beta + omega = 0.56 * beta_square * beta - 0.95 * beta_square + 1.82 * beta + 1.43 + tau = np.median(sigma_svd) * omega + rank = np.sum(sigma_svd > tau) + + if rank == 0: + warnings.warn( + "SVD optimal rank is 0. The largest singular values are " + "indistinguishable from noise. Setting rank truncation to 1.", + RuntimeWarning, + ) + rank = 1 + + return rank + + +def __compute_rank( + sigma_svd: np.ndarray, rows: int, cols: int, svd_rank: Union[float, int] +) -> int: """ Rank computation for the truncated Singular Value Decomposition. :param numpy.ndarray X: the matrix to decompose. @@ -24,33 +63,47 @@ def compute_rank(X, svd_rank=0): singular values is, IEEE Transactions on Information Theory 60.8 (2014): 5040-5053. """ - U, s, _ = np.linalg.svd(X, full_matrices=False) - - def omega(x): - return 0.56 * x**3 - 0.95 * x**2 + 1.82 * x + 1.43 - if svd_rank == 0: - beta = np.divide(*sorted(X.shape)) - tau = np.median(s) * omega(beta) - rank = np.sum(s > tau) - if rank == 0: - warnings.warn( - "SVD optimal rank is 0. The largest singular values are " - "indistinguishable from noise. Setting rank truncation to 1.", - RuntimeWarning, - ) - rank = 1 + rank = __svht(sigma_svd, rows, cols) + elif 0 < svd_rank < 1: - cumulative_energy = np.cumsum(s**2 / (s**2).sum()) + sigma_svd_squared = np.square(sigma_svd) + cumulative_energy = np.cumsum( + sigma_svd_squared / sigma_svd_squared.sum() + ) rank = np.searchsorted(cumulative_energy, svd_rank) + 1 + elif svd_rank >= 1 and isinstance(svd_rank, int): - rank = min(svd_rank, U.shape[1]) + rank = min(svd_rank, sigma_svd.size) + else: - rank = min(X.shape) + rank = min(rows, cols) return rank +def compute_rank(X: np.ndarray, svd_rank=0): + """ + Rank computation for the truncated Singular Value Decomposition. + :param numpy.ndarray X: the matrix to decompose. + :param svd_rank: the rank for the truncation; If 0, the method computes + the optimal rank and uses it for truncation; if positive interger, + the method uses the argument for the truncation; if float between 0 + and 1, the rank is the number of the biggest singular values that + are needed to reach the 'energy' specified by `svd_rank`; if -1, + the method does not compute truncation. Default is 0. + :type svd_rank: int or float + :return: the computed rank truncation. + :rtype: int + References: + Gavish, Matan, and David L. Donoho, The optimal hard threshold for + singular values is, IEEE Transactions on Information Theory 60.8 + (2014): 5040-5053. + """ + _, s, _ = np.linalg.svd(X, full_matrices=False) + return __compute_rank(s, X.shape[0], X.shape[1], svd_rank) + + def compute_tlsq(X, Y, tlsq_rank): """ Compute Total Least Square. @@ -100,8 +153,8 @@ def compute_svd(X, svd_rank=0): singular values is, IEEE Transactions on Information Theory 60.8 (2014): 5040-5053. """ - rank = compute_rank(X, svd_rank) U, s, V = np.linalg.svd(X, full_matrices=False) + rank = __compute_rank(s, X.shape[0], X.shape[1], svd_rank) V = V.conj().T U = U[:, :rank] From b09930979de43c937b977e2aaff216632aa6a505 Mon Sep 17 00:00:00 2001 From: Gerhard Reinerth Date: Thu, 9 Nov 2023 14:14:21 +0000 Subject: [PATCH 2/7] Improving codestyle, updating documentation. --- pydmd/utils.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/pydmd/utils.py b/pydmd/utils.py index 098e52893..00885e384 100644 --- a/pydmd/utils.py +++ b/pydmd/utils.py @@ -7,7 +7,7 @@ from numpy.lib.stride_tricks import sliding_window_view -def __svht(sigma_svd: np.ndarray, rows: int, cols: int) -> int: +def _svht(sigma_svd: np.ndarray, rows: int, cols: int) -> int: """ Singular Value Hard Threshold. @@ -27,8 +27,7 @@ def __svht(sigma_svd: np.ndarray, rows: int, cols: int) -> int: https://ieeexplore.ieee.org/document/6846297 """ beta = np.divide(*sorted((rows, cols))) - beta_square = beta * beta - omega = 0.56 * beta_square * beta - 0.95 * beta_square + 1.82 * beta + 1.43 + omega = 0.56 * beta**3 - 0.95 * beta**2 + 1.82 * beta + 1.43 tau = np.median(sigma_svd) * omega rank = np.sum(sigma_svd > tau) @@ -43,12 +42,13 @@ def __svht(sigma_svd: np.ndarray, rows: int, cols: int) -> int: return rank -def __compute_rank( +def _compute_rank( sigma_svd: np.ndarray, rows: int, cols: int, svd_rank: Union[float, int] ) -> int: """ Rank computation for the truncated Singular Value Decomposition. - :param numpy.ndarray X: the matrix to decompose. + :param sigma_svd: 1D singular values of SVD. + :type sigma_svd: np.ndarray :param svd_rank: the rank for the truncation; If 0, the method computes the optimal rank and uses it for truncation; if positive interger, the method uses the argument for the truncation; if float between 0 @@ -64,18 +64,12 @@ def __compute_rank( (2014): 5040-5053. """ if svd_rank == 0: - rank = __svht(sigma_svd, rows, cols) - + rank = _svht(sigma_svd, rows, cols) elif 0 < svd_rank < 1: - sigma_svd_squared = np.square(sigma_svd) - cumulative_energy = np.cumsum( - sigma_svd_squared / sigma_svd_squared.sum() - ) + cumulative_energy = np.cumsum(sigma_svd**2 / (sigma_svd**2).sum()) rank = np.searchsorted(cumulative_energy, svd_rank) + 1 - elif svd_rank >= 1 and isinstance(svd_rank, int): rank = min(svd_rank, sigma_svd.size) - else: rank = min(rows, cols) @@ -101,7 +95,7 @@ def compute_rank(X: np.ndarray, svd_rank=0): (2014): 5040-5053. """ _, s, _ = np.linalg.svd(X, full_matrices=False) - return __compute_rank(s, X.shape[0], X.shape[1], svd_rank) + return _compute_rank(s, X.shape[0], X.shape[1], svd_rank) def compute_tlsq(X, Y, tlsq_rank): @@ -154,7 +148,7 @@ def compute_svd(X, svd_rank=0): (2014): 5040-5053. """ U, s, V = np.linalg.svd(X, full_matrices=False) - rank = __compute_rank(s, X.shape[0], X.shape[1], svd_rank) + rank = _compute_rank(s, X.shape[0], X.shape[1], svd_rank) V = V.conj().T U = U[:, :rank] From 0f0d14bed0d879dd325b0bf01a5efbadb8cd0459 Mon Sep 17 00:00:00 2001 From: Gerhard Reinerth Date: Thu, 9 Nov 2023 14:19:09 +0000 Subject: [PATCH 3/7] Updated docu --- pydmd/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pydmd/utils.py b/pydmd/utils.py index 00885e384..28e9162a5 100644 --- a/pydmd/utils.py +++ b/pydmd/utils.py @@ -49,6 +49,10 @@ def _compute_rank( Rank computation for the truncated Singular Value Decomposition. :param sigma_svd: 1D singular values of SVD. :type sigma_svd: np.ndarray + :param rows: Number of rows of original matrix. + :type rows: int + :param cols: Number of columns of original matrix. + :type cols: int :param svd_rank: the rank for the truncation; If 0, the method computes the optimal rank and uses it for truncation; if positive interger, the method uses the argument for the truncation; if float between 0 From ba61fd1c2060c5f826bdf56d8d4fa821209cf90a Mon Sep 17 00:00:00 2001 From: Gerhard Reinerth Date: Thu, 9 Nov 2023 17:48:04 +0000 Subject: [PATCH 4/7] Improving code style, including typehints, updating docu. --- pydmd/utils.py | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/pydmd/utils.py b/pydmd/utils.py index 28e9162a5..ca2d8ebfa 100644 --- a/pydmd/utils.py +++ b/pydmd/utils.py @@ -1,7 +1,8 @@ """Utilities module.""" import warnings -from typing import Union +from numbers import Number +from typing import Tuple import numpy as np from numpy.lib.stride_tricks import sliding_window_view @@ -43,10 +44,11 @@ def _svht(sigma_svd: np.ndarray, rows: int, cols: int) -> int: def _compute_rank( - sigma_svd: np.ndarray, rows: int, cols: int, svd_rank: Union[float, int] + sigma_svd: np.ndarray, rows: int, cols: int, svd_rank: Number ) -> int: """ Rank computation for the truncated Singular Value Decomposition. + :param sigma_svd: 1D singular values of SVD. :type sigma_svd: np.ndarray :param rows: Number of rows of original matrix. @@ -62,6 +64,7 @@ def _compute_rank( :type svd_rank: int or float :return: the computed rank truncation. :rtype: int + References: Gavish, Matan, and David L. Donoho, The optimal hard threshold for singular values is, IEEE Transactions on Information Theory 60.8 @@ -80,10 +83,12 @@ def _compute_rank( return rank -def compute_rank(X: np.ndarray, svd_rank=0): +def compute_rank(X: np.ndarray, svd_rank: Number = 0) -> int: """ Rank computation for the truncated Singular Value Decomposition. - :param numpy.ndarray X: the matrix to decompose. + + :param X: the matrix to decompose. + :type X: np.ndarray :param svd_rank: the rank for the truncation; If 0, the method computes the optimal rank and uses it for truncation; if positive interger, the method uses the argument for the truncation; if float between 0 @@ -93,6 +98,7 @@ def compute_rank(X: np.ndarray, svd_rank=0): :type svd_rank: int or float :return: the computed rank truncation. :rtype: int + References: Gavish, Matan, and David L. Donoho, The optimal hard threshold for singular values is, IEEE Transactions on Information Theory 60.8 @@ -102,18 +108,23 @@ def compute_rank(X: np.ndarray, svd_rank=0): return _compute_rank(s, X.shape[0], X.shape[1], svd_rank) -def compute_tlsq(X, Y, tlsq_rank): +def compute_tlsq( + X: np.ndarray, Y: np.ndarray, tlsq_rank: int +) -> Tuple[np.ndarray, np.ndarray]: """ Compute Total Least Square. - :param numpy.ndarray X: the first matrix; - :param numpy.ndarray Y: the second matrix; - :param int tlsq_rank: the rank for the truncation; If 0, the method + :param X: the first matrix; + :type X: np.ndarray + :param Y: the second matrix; + :type Y: np.ndarray + :param tlsq_rank: the rank for the truncation; If 0, the method does not compute any noise reduction; if positive number, the method uses the argument for the SVD truncation used in the TLSQ method. + :type tlsq_rank: int :return: the denoised matrix X, the denoised matrix Y - :rtype: numpy.ndarray, numpy.ndarray + :rtype: Tuple[np.ndarray, np.ndarray] References: https://arxiv.org/pdf/1703.11004.pdf @@ -130,11 +141,14 @@ def compute_tlsq(X, Y, tlsq_rank): return X.dot(VV), Y.dot(VV) -def compute_svd(X, svd_rank=0): +def compute_svd( + X: np.ndarray, svd_rank: Number = 0 +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ Truncated Singular Value Decomposition. - :param numpy.ndarray X: the matrix to decompose. + :param X: the matrix to decompose. + :type X: np.ndarray :param svd_rank: the rank for the truncation; If 0, the method computes the optimal rank and uses it for truncation; if positive interger, the method uses the argument for the truncation; if float between 0 @@ -144,7 +158,7 @@ def compute_svd(X, svd_rank=0): :type svd_rank: int or float :return: the truncated left-singular vectors matrix, the truncated singular values array, the truncated right-singular vectors matrix. - :rtype: numpy.ndarray, numpy.ndarray, numpy.ndarray + :rtype: Tuple[np.ndarray, np.ndarray, np.ndarray] References: Gavish, Matan, and David L. Donoho, The optimal hard threshold for @@ -162,7 +176,7 @@ def compute_svd(X, svd_rank=0): return U, s, V -def pseudo_hankel_matrix(X: np.ndarray, d: int): +def pseudo_hankel_matrix(X: np.ndarray, d: int) -> np.ndarray: """ Arrange the snapshot in the matrix `X` into the (pseudo) Hankel matrix. The attribute `d` controls the number of snapshot from `X` in From e6b5203e0203370ac2cd10f7879ddd0bb80b17e0 Mon Sep 17 00:00:00 2001 From: Gerhard Reinerth Date: Sat, 11 Nov 2023 10:28:38 +0000 Subject: [PATCH 5/7] Reformatting to pacify pylint --- pydmd/utils.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/pydmd/utils.py b/pydmd/utils.py index ca2d8ebfa..0d4a62eb1 100644 --- a/pydmd/utils.py +++ b/pydmd/utils.py @@ -2,8 +2,8 @@ import warnings from numbers import Number -from typing import Tuple - +from typing import NamedTuple +from collections import namedtuple import numpy as np from numpy.lib.stride_tricks import sliding_window_view @@ -110,7 +110,9 @@ def compute_rank(X: np.ndarray, svd_rank: Number = 0) -> int: def compute_tlsq( X: np.ndarray, Y: np.ndarray, tlsq_rank: int -) -> Tuple[np.ndarray, np.ndarray]: +) -> NamedTuple( + "TLSQ", [("X_denoised", np.ndarray), ("Y_denoised", np.ndarray)] +): """ Compute Total Least Square. @@ -124,7 +126,8 @@ def compute_tlsq( method. :type tlsq_rank: int :return: the denoised matrix X, the denoised matrix Y - :rtype: Tuple[np.ndarray, np.ndarray] + :rtype: NamedTuple("TLSQ", [('X_denoised', np.ndarray), + ('Y_denoised', np.ndarray)]) References: https://arxiv.org/pdf/1703.11004.pdf @@ -137,13 +140,15 @@ def compute_tlsq( V = np.linalg.svd(np.append(X, Y, axis=0), full_matrices=False)[-1] rank = min(tlsq_rank, V.shape[0]) VV = V[:rank, :].conj().T.dot(V[:rank, :]) - - return X.dot(VV), Y.dot(VV) + TLSQ = namedtuple("TLSQ", ["X_denoised", "Y_denoised"]) + return TLSQ(X.dot(VV), Y.dot(VV)) def compute_svd( X: np.ndarray, svd_rank: Number = 0 -) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: +) -> NamedTuple( + "SVD", [("U", np.ndarray), ("s", np.ndarray), ("V", np.ndarray)] +): """ Truncated Singular Value Decomposition. @@ -158,7 +163,9 @@ def compute_svd( :type svd_rank: int or float :return: the truncated left-singular vectors matrix, the truncated singular values array, the truncated right-singular vectors matrix. - :rtype: Tuple[np.ndarray, np.ndarray, np.ndarray] + :rtype: NamedTuple("SVD", [('U', np.ndarray), + ('s', np.ndarray), + ('V', np.ndarray)]) References: Gavish, Matan, and David L. Donoho, The optimal hard threshold for @@ -172,8 +179,9 @@ def compute_svd( U = U[:, :rank] V = V[:, :rank] s = s[:rank] + SVD = namedtuple("SVD", ["U", "s", "V"]) - return U, s, V + return SVD(U, s, V) def pseudo_hankel_matrix(X: np.ndarray, d: int) -> np.ndarray: From 7c16a144ddb082c82d64528eec548401f3a554c3 Mon Sep 17 00:00:00 2001 From: Gerhard Reinerth Date: Sat, 11 Nov 2023 11:17:47 +0000 Subject: [PATCH 6/7] Moving named tuples definition to top of module utils.py --- pydmd/utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pydmd/utils.py b/pydmd/utils.py index 0d4a62eb1..a4316a0e8 100644 --- a/pydmd/utils.py +++ b/pydmd/utils.py @@ -7,6 +7,12 @@ import numpy as np from numpy.lib.stride_tricks import sliding_window_view +# Named tuples used functions. +# compute_svd uses "SVD", +# compute_tlsq uses "TLSQ". +SVD = namedtuple("SVD", ["U", "s", "V"]) +TLSQ = namedtuple("TLSQ", ["X_denoised", "Y_denoised"]) + def _svht(sigma_svd: np.ndarray, rows: int, cols: int) -> int: """ @@ -140,7 +146,7 @@ def compute_tlsq( V = np.linalg.svd(np.append(X, Y, axis=0), full_matrices=False)[-1] rank = min(tlsq_rank, V.shape[0]) VV = V[:rank, :].conj().T.dot(V[:rank, :]) - TLSQ = namedtuple("TLSQ", ["X_denoised", "Y_denoised"]) + return TLSQ(X.dot(VV), Y.dot(VV)) @@ -179,7 +185,6 @@ def compute_svd( U = U[:, :rank] V = V[:, :rank] s = s[:rank] - SVD = namedtuple("SVD", ["U", "s", "V"]) return SVD(U, s, V) From fd2ac4e934bd2b6d67dcdca88d7e65bbfc6552b0 Mon Sep 17 00:00:00 2001 From: Gerhard Reinerth Date: Sat, 11 Nov 2023 11:19:08 +0000 Subject: [PATCH 7/7] Updating comments on named tuples on top of module utils.py --- pydmd/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydmd/utils.py b/pydmd/utils.py index a4316a0e8..21b59ab44 100644 --- a/pydmd/utils.py +++ b/pydmd/utils.py @@ -7,7 +7,7 @@ import numpy as np from numpy.lib.stride_tricks import sliding_window_view -# Named tuples used functions. +# Named tuples used in functions. # compute_svd uses "SVD", # compute_tlsq uses "TLSQ". SVD = namedtuple("SVD", ["U", "s", "V"])