diff --git a/pyproject.toml b/pyproject.toml index 37a71b3..820f60c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "pyspi" -version = "2.0.0" +version = "2.0.1" authors = [ { name ="Oliver M. Cliff", email="oliver.m.cliff@gmail.com"}, ] diff --git a/pyspi/calculator.py b/pyspi/calculator.py index 752197d..e8c1e7c 100644 --- a/pyspi/calculator.py +++ b/pyspi/calculator.py @@ -16,7 +16,7 @@ class Calculator: """Compute all pairwise interactions. - The calculator takes in a multivariate time-series dataset, computes and stores all pairwise interactions for the dataset. + The calculator takes in a multivariate time-series dataset (MTS), computes and stores all pairwise interactions for the dataset. It uses a YAML configuration file that can be modified in order to compute a reduced set of pairwise methods. Example: @@ -27,26 +27,25 @@ class Calculator: Args: dataset (:class:`~pyspi.data.Data`, array_like, optional): - The multivariate time series of M processes and T observations, defaults to None. + The multivariate time series of M processes and T observations, default=None. name (str, optional): - The name of the calculator. Mainly used for printing the results but can be useful if you have multiple instances, defaults to None. + The name of the calculator. Mainly used for printing the results but can be useful if you have multiple instances, default=None. labels (array_like, optional): - Any set of strings by which you want to label the calculator. This can be useful later for classification purposes, defaults to None. + Any set of strings by which you want to label the calculator. This can be useful later for classification purposes, default=None. subset (str, optional): - A pre-configured subset of SPIs to use. Options are "all", "fast", "sonnet", or "fabfour", defaults to "all". + A pre-configured subset of SPIs to use. Options are "all", "fast", "sonnet", or "fabfour", default="all". configfile (str, optional): The location of the YAML configuration file for a user-defined subset. See :ref:`Using a reduced SPI set`, defaults to :code:`'/pyspi/config.yaml'` detrend (bool, optional): - If True, detrend the dataset along the time axis before normalising (if enabled), defaults to True. + If True, detrend each time series in the MTS dataset individually along the time axis, default=False. normalise (bool, optional): - If True, z-score normalise the dataset along the time axis before computing SPIs, defaults to True. - Detrending (if enabled) is always applied before normalisation. + If True, z-score normalise each time series in the MTS dataset individually along the time axis, default=True. """ _optional_dependencies = None def __init__( self, dataset=None, name=None, labels=None, subset="all", configfile=None, - detrend=True, normalise=True + detrend=False, normalise=True ): self._spis = {} self._excluded_spis = list() @@ -511,7 +510,7 @@ def init_from_list(self, datasets, names, labels, **kwargs): self.add_calculator(calc) def init_from_yaml( - self, document, detrend=True, normalise=True, n_processes=None, n_observations=None, **kwargs + self, document, detrend=False, normalise=True, n_processes=None, n_observations=None, **kwargs ): datasets = [] names = [] diff --git a/pyspi/data.py b/pyspi/data.py index f8ca3b3..ad50e7e 100644 --- a/pyspi/data.py +++ b/pyspi/data.py @@ -38,22 +38,21 @@ class Data: Args: data (array_like, optional): - 2-dimensional array with raw data, defaults to None. + 2-dimensional array with raw data, default=None. dim_order (str, optional): - Order of dimensions, accepts two combinations of the characters 'p', and 's' for processes and observations, defaults to 'ps'. + Order of dimensions, accepts two combinations of the characters 'p', and 's' for processes and observations, default='ps'. detrend (bool, optional): - If True, detrend the dataset along the time axis before normalising (if enabled), defaults to True. + If True, detrend each time series in the MTS dataset individually along the time axis, default=False. normalise (bool, optional): - If True, z-score normalise the dataset along the time axis before computing SPIs, defaults to True. - Detrending (if enabled) is always applied before normalisation. + If True, z-score normalise each time series in the MTS dataset individually along the time axis, default=True. name (str, optional): Name of the dataset procnames (list, optional): - List of process names with length the number of processes, defaults to None. + List of process names with length the number of processes, default=None. n_processes (int, optional): - Truncates data to this many processes, defaults to None. + Truncates data to this many processes, default=None. n_observations (int, optional): - Truncates data to this many observations, defaults to None. + Truncates data to this many observations, default=None. """ @@ -61,7 +60,7 @@ def __init__( self, data=None, dim_order="ps", - detrend=True, + detrend=False, normalise=True, name=None, procnames=None, @@ -183,19 +182,19 @@ def set_data( data = data[:, :n_observations] if self.detrend: - print(Fore.GREEN + "[1/2] De-trending the dataset...") + print(Fore.GREEN + "[1/2] Detrending time series in the dataset...") try: data = detrend(data, axis=1) except ValueError as err: print(f"Could not detrend data: {err}") else: - print(Fore.RED + "[1/2] Skipping detrending of the dataset...") + print(Fore.RED + "[1/2] Skipping detrending of time series in the dataset...") if self.normalise: - print(Fore.GREEN + "[2/2] Normalising (z-scoring) the dataset...\n") + print(Fore.GREEN + "[2/2] Normalising (z-scoring) each time series in the dataset...\n") data = zscore(data, axis=1, nan_policy="omit", ddof=1) else: - print(Fore.RED + "[2/2] Skipping normalisation of the dataset...\n") + print(Fore.RED + "[2/2] Skipping normalisation of time series in the dataset...\n") nans = np.isnan(data) if nans.any(): diff --git a/setup.py b/setup.py index bf0a694..89b80e6 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,7 @@ 'data/standard_normal.npy', 'data/cml7.npy']}, include_package_data=True, - version='2.0.0', + version='2.0.1', description='Library for pairwise analysis of time series data.', author='Oliver M. Cliff', author_email='oliver.m.cliff@gmail.com', diff --git a/tests/CML7_benchmark_tables.pkl b/tests/CML7_benchmark_tables.pkl index ddd5b96..f23b1df 100644 Binary files a/tests/CML7_benchmark_tables.pkl and b/tests/CML7_benchmark_tables.pkl differ diff --git a/tests/generate_benchmark_tables.py b/tests/generate_benchmark_tables.py index a5cb433..b8383c0 100644 --- a/tests/generate_benchmark_tables.py +++ b/tests/generate_benchmark_tables.py @@ -29,7 +29,7 @@ def get_benchmark_tables(calc_list): for i in range(10): np.random.seed(42) - calc = Calculator(dataset=dataset, detrend=True, normalise=True) + calc = Calculator(dataset=dataset) calc.compute() store_calcs.append(calc) diff --git a/tests/test_SPIs.py b/tests/test_SPIs.py index ea28593..30eed2e 100644 --- a/tests/test_SPIs.py +++ b/tests/test_SPIs.py @@ -24,7 +24,7 @@ def compute_new_tables(): benchmark_dataset = load_benchmark_dataset() # Compute new tables on the benchmark dataset np.random.seed(42) - calc = Calculator(dataset=benchmark_dataset, normalise=True, detrend=True) + calc = Calculator(dataset=benchmark_dataset) calc.compute() table_dict = dict() for spi in calc.spis: