diff --git a/pydmd/utils.py b/pydmd/utils.py index 047df3131..21b59ab44 100644 --- a/pydmd/utils.py +++ b/pydmd/utils.py @@ -1,15 +1,66 @@ """Utilities module.""" import warnings - +from numbers import Number +from typing import NamedTuple +from collections import namedtuple import numpy as np from numpy.lib.stride_tricks import sliding_window_view +# Named tuples used in functions. +# compute_svd uses "SVD", +# compute_tlsq uses "TLSQ". +SVD = namedtuple("SVD", ["U", "s", "V"]) +TLSQ = namedtuple("TLSQ", ["X_denoised", "Y_denoised"]) + + +def _svht(sigma_svd: np.ndarray, rows: int, cols: int) -> int: + """ + Singular Value Hard Threshold. + + :param sigma_svd: Singual values computed by SVD + :type sigma_svd: np.ndarray + :param rows: Number of rows of original data matrix. + :type rows: int + :param cols: Number of columns of original data matrix. + :type cols: int + :return: Computed rank. + :rtype: int -def compute_rank(X, svd_rank=0): + References: + Gavish, Matan, and David L. Donoho, The optimal hard threshold for + singular values is, IEEE Transactions on Information Theory 60.8 + (2014): 5040-5053. + https://ieeexplore.ieee.org/document/6846297 + """ + beta = np.divide(*sorted((rows, cols))) + omega = 0.56 * beta**3 - 0.95 * beta**2 + 1.82 * beta + 1.43 + tau = np.median(sigma_svd) * omega + rank = np.sum(sigma_svd > tau) + + if rank == 0: + warnings.warn( + "SVD optimal rank is 0. The largest singular values are " + "indistinguishable from noise. Setting rank truncation to 1.", + RuntimeWarning, + ) + rank = 1 + + return rank + + +def _compute_rank( + sigma_svd: np.ndarray, rows: int, cols: int, svd_rank: Number +) -> int: """ Rank computation for the truncated Singular Value Decomposition. - :param numpy.ndarray X: the matrix to decompose. + + :param sigma_svd: 1D singular values of SVD. + :type sigma_svd: np.ndarray + :param rows: Number of rows of original matrix. + :type rows: int + :param cols: Number of columns of original matrix. + :type cols: int :param svd_rank: the rank for the truncation; If 0, the method computes the optimal rank and uses it for truncation; if positive interger, the method uses the argument for the truncation; if float between 0 @@ -19,50 +70,70 @@ def compute_rank(X, svd_rank=0): :type svd_rank: int or float :return: the computed rank truncation. :rtype: int + References: Gavish, Matan, and David L. Donoho, The optimal hard threshold for singular values is, IEEE Transactions on Information Theory 60.8 (2014): 5040-5053. """ - U, s, _ = np.linalg.svd(X, full_matrices=False) - - def omega(x): - return 0.56 * x**3 - 0.95 * x**2 + 1.82 * x + 1.43 - if svd_rank == 0: - beta = np.divide(*sorted(X.shape)) - tau = np.median(s) * omega(beta) - rank = np.sum(s > tau) - if rank == 0: - warnings.warn( - "SVD optimal rank is 0. The largest singular values are " - "indistinguishable from noise. Setting rank truncation to 1.", - RuntimeWarning, - ) - rank = 1 + rank = _svht(sigma_svd, rows, cols) elif 0 < svd_rank < 1: - cumulative_energy = np.cumsum(s**2 / (s**2).sum()) + cumulative_energy = np.cumsum(sigma_svd**2 / (sigma_svd**2).sum()) rank = np.searchsorted(cumulative_energy, svd_rank) + 1 elif svd_rank >= 1 and isinstance(svd_rank, int): - rank = min(svd_rank, U.shape[1]) + rank = min(svd_rank, sigma_svd.size) else: - rank = min(X.shape) + rank = min(rows, cols) return rank -def compute_tlsq(X, Y, tlsq_rank): +def compute_rank(X: np.ndarray, svd_rank: Number = 0) -> int: + """ + Rank computation for the truncated Singular Value Decomposition. + + :param X: the matrix to decompose. + :type X: np.ndarray + :param svd_rank: the rank for the truncation; If 0, the method computes + the optimal rank and uses it for truncation; if positive interger, + the method uses the argument for the truncation; if float between 0 + and 1, the rank is the number of the biggest singular values that + are needed to reach the 'energy' specified by `svd_rank`; if -1, + the method does not compute truncation. Default is 0. + :type svd_rank: int or float + :return: the computed rank truncation. + :rtype: int + + References: + Gavish, Matan, and David L. Donoho, The optimal hard threshold for + singular values is, IEEE Transactions on Information Theory 60.8 + (2014): 5040-5053. + """ + _, s, _ = np.linalg.svd(X, full_matrices=False) + return _compute_rank(s, X.shape[0], X.shape[1], svd_rank) + + +def compute_tlsq( + X: np.ndarray, Y: np.ndarray, tlsq_rank: int +) -> NamedTuple( + "TLSQ", [("X_denoised", np.ndarray), ("Y_denoised", np.ndarray)] +): """ Compute Total Least Square. - :param numpy.ndarray X: the first matrix; - :param numpy.ndarray Y: the second matrix; - :param int tlsq_rank: the rank for the truncation; If 0, the method + :param X: the first matrix; + :type X: np.ndarray + :param Y: the second matrix; + :type Y: np.ndarray + :param tlsq_rank: the rank for the truncation; If 0, the method does not compute any noise reduction; if positive number, the method uses the argument for the SVD truncation used in the TLSQ method. + :type tlsq_rank: int :return: the denoised matrix X, the denoised matrix Y - :rtype: numpy.ndarray, numpy.ndarray + :rtype: NamedTuple("TLSQ", [('X_denoised', np.ndarray), + ('Y_denoised', np.ndarray)]) References: https://arxiv.org/pdf/1703.11004.pdf @@ -76,14 +147,19 @@ def compute_tlsq(X, Y, tlsq_rank): rank = min(tlsq_rank, V.shape[0]) VV = V[:rank, :].conj().T.dot(V[:rank, :]) - return X.dot(VV), Y.dot(VV) + return TLSQ(X.dot(VV), Y.dot(VV)) -def compute_svd(X, svd_rank=0): +def compute_svd( + X: np.ndarray, svd_rank: Number = 0 +) -> NamedTuple( + "SVD", [("U", np.ndarray), ("s", np.ndarray), ("V", np.ndarray)] +): """ Truncated Singular Value Decomposition. - :param numpy.ndarray X: the matrix to decompose. + :param X: the matrix to decompose. + :type X: np.ndarray :param svd_rank: the rank for the truncation; If 0, the method computes the optimal rank and uses it for truncation; if positive interger, the method uses the argument for the truncation; if float between 0 @@ -93,25 +169,27 @@ def compute_svd(X, svd_rank=0): :type svd_rank: int or float :return: the truncated left-singular vectors matrix, the truncated singular values array, the truncated right-singular vectors matrix. - :rtype: numpy.ndarray, numpy.ndarray, numpy.ndarray + :rtype: NamedTuple("SVD", [('U', np.ndarray), + ('s', np.ndarray), + ('V', np.ndarray)]) References: Gavish, Matan, and David L. Donoho, The optimal hard threshold for singular values is, IEEE Transactions on Information Theory 60.8 (2014): 5040-5053. """ - rank = compute_rank(X, svd_rank) U, s, V = np.linalg.svd(X, full_matrices=False) + rank = _compute_rank(s, X.shape[0], X.shape[1], svd_rank) V = V.conj().T U = U[:, :rank] V = V[:, :rank] s = s[:rank] - return U, s, V + return SVD(U, s, V) -def pseudo_hankel_matrix(X: np.ndarray, d: int): +def pseudo_hankel_matrix(X: np.ndarray, d: int) -> np.ndarray: """ Arrange the snapshot in the matrix `X` into the (pseudo) Hankel matrix. The attribute `d` controls the number of snapshot from `X` in