Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions pyhctsa/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from itertools import product
from pathlib import Path
from typing import Union, Any, Callable
import logging

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -38,8 +39,8 @@ def classify_output(res) -> int:
out = 4
return out

def _apply_selection_wrapper(func:Callable, filter_keys:Union[str, list[str]],
keep:bool=True) -> Callable:
def _apply_selection_wrapper(func: Callable, filter_keys: Union[str, list[str]],
keep: bool = True) -> Callable:
"""
Wraps a function to selectively filter keys from its dict output.

Expand Down Expand Up @@ -69,7 +70,7 @@ def wrapper(*args, **kwargs):
if isinstance(result, dict):
missing = [k for k in keys if k not in result] # log all of the missing keys
if missing:
print(f'Warning: time-series features for func {func} not found {missing}')
logging.info(f'Warning: time-series features for func {func} not found {missing}')
if keep:
return {k: result[k] for k in keys if k in result}
else:
Expand All @@ -86,7 +87,7 @@ def _standardise_inputs(data) -> list[np.ndarray]:
elif data.ndim == 2:
if data.shape[0] > data.shape[1]:
# notify the user to check that the shapes make sense
print(f"Check that the shape of the 2D input is such "
logging.warning(f"Check that the shape of the 2D input is such "
f"that (n_series, n_samples). Got shape: {data.shape}")
return [np.asarray(row, dtype=float) for row in data]
else:
Expand Down Expand Up @@ -216,7 +217,7 @@ def _check_deps(self, module_key, feature_name, config):
missing = [dep for dep in deps_to_check if not _check_optional_deps(dep)]
if missing:
full_name = f"{module_key}.{feature_name}"
print(f"Skipping function '{full_name}' - missing dependencies: {', '.join(missing)}")
logging.info(f"Skipping function '{full_name}' - missing dependencies: {', '.join(missing)}")
self._skipped_functions.append((full_name, missing))
return False
return True
Expand All @@ -229,7 +230,7 @@ def _build_feature_funcs(self):
try:
module = importlib.import_module(f"{self._operations_package}.{module_key}")
except ImportError as e:
print(f"Failed to import module '{module_key}': {e}")
logging.warning(f"Failed to import module '{module_key}': {e}")
# Skip all functions in this module since we can't import it
for feature_name in self.config[module_key].keys():
skipped_functions.append((f"{module_key}.{feature_name}", ["import_error"]))
Expand Down Expand Up @@ -273,7 +274,7 @@ def _build_feature_funcs(self):
# store information about skipped functions for later reference
self._skipped_functions = skipped_functions
if skipped_functions:
print(f"Total functions skipped due to missing dependencies: {len(skipped_functions)}")
logging.info(f"Total functions skipped due to missing dependencies: {len(skipped_functions)}")

return feature_funcs

Expand Down
2 changes: 1 addition & 1 deletion pyhctsa/distribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class LocalDistributor(BaseDistributor):
Number of worker processes to use. If ``None``, defaults to
``multiprocessing.cpu_count()``.
"""
def __init__(self, n_workers : Union[int, None] = None):
def __init__(self, n_workers: Union[int, None] = None):
self.n_workers = n_workers or mp.cpu_count()
try:
pathos_mp.set_start_method('spawn', force=True)
Expand Down
12 changes: 6 additions & 6 deletions pyhctsa/operations/correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def add_noise(y: ArrayLike, tau: Union[int, str] = 1, ami_method: str = 'even',

return out

def first_under_fn(x : ArrayLike, m : ArrayLike, p : ArrayLike) -> float:
def first_under_fn(x: ArrayLike, m: ArrayLike, p: ArrayLike) -> float:
"""
Find the value of m for the first time p goes under the threshold, x.
p and m are vectors of the same length
Expand Down Expand Up @@ -916,7 +916,7 @@ def stick_angles(y: ArrayLike) -> dict:

return out

def _sub_statav(x: ArrayLike, n : int) -> tuple:
def _sub_statav(x: ArrayLike, n: int) -> tuple:
# helper function
nn = len(x)
if nn < 2 * n: # not long enough
Expand Down Expand Up @@ -1280,7 +1280,7 @@ def embed2_shapes(y: ArrayLike, tau: Union[str, int, None] = 'tau',
counts -= 1 # ignore self counts

if np.all(counts == 0):
print("No counts detected!")
logging.warning("embed2_shapes: no counts detected!")
return np.nan

# Return basic statistics on the counts
Expand Down Expand Up @@ -1535,7 +1535,7 @@ def acf_y(t):
for i, t in enumerate(tau):
if np.any(np.isnan(y)):
good_r = (~np.isnan(y[:N-t])) & (~np.isnan(y[t:]))
print(f'NaNs in time series, computing for {np.sum(good_r)}/{len(good_r)} pairs of points')
logging.info(f'NaNs in time series, computing for {np.sum(good_r)}/{len(good_r)} pairs of points.')
y1 = y[:N-t]
y1n = y1[good_r] - np.mean(y1[good_r])
y2 = y[t:]
Expand Down Expand Up @@ -1719,7 +1719,7 @@ def _stat_av(y: ArrayLike, window_stat: str = 'mean', num_seg: int = 5, inc_move
logging.warning(f"Time-series of length {len(y)} is too short for {num_seg} windows")
return np.nan
inc = np.floor(win_length/inc_move) # increment to move at each step
# if incrment rounded down to zero, prop it up
# if increment rounded down to zero, prop it up
if inc == 0:
inc = 1

Expand Down Expand Up @@ -1829,7 +1829,7 @@ def autocorr_shape(y: ArrayLike, stop_when: Union[int, str] = 'pos_drown') -> di

# Check for good behavior
if np.any(np.isnan(acf)):
# This is an anomalous time series (e.g., all constant, or conatining NaNs)
# This is an anomalous time series (e.g., all constant, or containing NaNs)
out = np.nan

out = {}
Expand Down
8 changes: 4 additions & 4 deletions pyhctsa/operations/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,11 @@ def compare_ks_fit(x: ArrayLike, what_distn: str) -> dict:
elif what_distn == 'exp':
# Check positivity
if np.any(x < 0):
print("The data contains negative values, but Exponential is a positive-only distribution.")
logging.warning("The data contains negative values, but Exponential is a positive-only distribution.")
return np.nan
# Check constant
if np.all(x == x[0]):
print("Data are a constant")
logging.warning("Data are a constant.")
return np.nan
# Fit Exponential distribution (equivalent to expfit in MATLAB)
_, lam = expon.fit(x, floc=0) # force support at 0
Expand All @@ -91,7 +91,7 @@ def compare_ks_fit(x: ArrayLike, what_distn: str) -> dict:
elif what_distn == 'logn':
# Check positivity
if np.any(x <= 0):
print("The data are not positive, but Log-Normal is a positive-only distribution.")
logging.warning("The data are not positive, but Log-Normal is a positive-only distribution.")
return np.nan
# Fit log-normal distribution
shape, loc, scale = lognorm.fit(x, floc=0) # sigma, 0, exp(mu)
Expand All @@ -107,7 +107,7 @@ def compare_ks_fit(x: ArrayLike, what_distn: str) -> dict:
ffit_func = lambda xi: lognorm.pdf(xi, s=sigma, loc=0, scale=np.exp(mu))

else:
raise ValueError(f"Unknown distribution: {what_distn}.")
raise ValueError(f"Unknown distribution: {what_distn}.")

# ----------------------------
# Estimate smoothed empirical distribution
Expand Down
3 changes: 2 additions & 1 deletion pyhctsa/operations/entropy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from math import factorial
from typing import Optional, Union
import logging

import numpy as np
from numpy.typing import ArrayLike
Expand Down Expand Up @@ -265,7 +266,7 @@ def multi_scale_entropy(
pp_text = f"after {pre_process_how} pre-processing"
else:
pp_text = ""
print(f"Warning: Not enough samples ({len(y)} {pp_text}) to compute SampEn at multiple scales")
logging.warning(f"Not enough samples ({len(y)} {pp_text}) to compute sample entropy at multiple scales")
return {'out': np.nan}

# Output raw values
Expand Down
3 changes: 2 additions & 1 deletion pyhctsa/operations/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import scipy
from scipy.stats import expon, norm
from ts2vg import NaturalVG
import logging

from pyhctsa.operations.correlation import autocorr, first_crossing
from pyhctsa.operations.entropy import distribution_entropy
Expand Down Expand Up @@ -87,7 +88,7 @@ def visibility_graph(y: ArrayLike, meth: str = 'horiz', max_l: int = 5000) -> di
N = len(y)
if N > max_l:
# too long to store in memory
print(f"Time series ({N} > {max_l}) is too long for visibility graph."
logging.info(f"Time series ({N} > {max_l}) is too long for visibility graph."
f"Analyzing the first {max_l} samples.")
y = y[:max_l]
N = len(y)
Expand Down
40 changes: 20 additions & 20 deletions pyhctsa/operations/information.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def _mi_bin(v1: ArrayLike, v2: ArrayLike, r1: Union[str, list] = 'range',
if np.any(mask):
mi = np.sum(p_ij[mask] * np.log(p_ij[mask] / p_ixp_j[mask]))
else:
print("The histograms aren't catching any points. Perhaps due to an inappropriate custom range for binning the data.")
logging.warning("The histograms aren't catching any points. Perhaps due to an inappropriate custom range for binning the data.")
mi = np.nan

return mi
Expand Down Expand Up @@ -403,7 +403,7 @@ def automutual_info(
for k, delay in enumerate(time_delay):
# check enough samples to compute automutual info
if delay > n - min_samples:
# time sereis too short - keep the remaining values as NaNs
# time series too short - keep the remaining values as NaNs
break

# form the time-delay vectors y1 and y2
Expand All @@ -424,8 +424,8 @@ def automutual_info(
amis[k] = mi_calc.computeAverageLocalOfObservations()

if np.isnan(amis).any():
print(
f"Warning: Time series (n={n}) is too short for automutual information calculations "
logging.warning(
f"Time series (n={n}) is too short for automutual information calculations "
f"up to lags of {max(time_delay)}"
)

Expand Down Expand Up @@ -457,9 +457,9 @@ def mutual_info(

Parameters
----------
y1 : ArrayLike
y1 : array-like
First input time series.
y2 : ArrayLike
y2 : array-like
Second input time series.
est_method : str, optional
Estimation method to use:
Expand Down Expand Up @@ -679,7 +679,7 @@ def _rm_info(*args):
return

# some initial tests on the input arguments
x = np.array(args[0]) # make sure the imputs are in numpy array form
x = np.array(args[0]) # make sure the inputs are in numpy array form
y = np.array(args[1])

x_shape = x.shape
Expand All @@ -689,23 +689,23 @@ def _rm_info(*args):
len_y = y_shape[0]

if len(x_shape) != 1: # makes sure x is a row vector
print("Error: invalid dimension of x")
logging.warning("Invalid dimension of x")
return

if len(y_shape) != 1:
print("Error: invalid dimension of y")
logging.warning("Invalid dimension of y")
return

if len_x != len_y: # makes sure x and y have the same amount of elements
print("Error: unequal length of x and y")
logging.warning("Unequal length of x and y")
return

if n_args > 5:
print("Error: too many arguments")
logging.warning("Too many arguments")
return

if n_args < 2:
print("Error: not enough arguments")
logging.warning("Not enough arguments")
return

# setting up variables depending on amount of inputs
Expand Down Expand Up @@ -869,19 +869,19 @@ def _rm_histogram_2(*args):
leny = yshape[0]

if len(xshape) != 1: # makes sure x is a row vector
print("Error: invalid dimension of x")
logging.warning("Invalid dimension of x")
return

if len(yshape) != 1:
print("Error: invalid dimension of y")
logging.warning("Invalid dimension of y")
return

if lenx != leny: # makes sure x and y have the same amount of elements
print("Error: unequal length of x and y")
logging.warning("Unequal length of x and y")
return

if nargin > 3:
print("Error: too many arguments")
logging.warning("Too many arguments")
return

if nargin == 2:
Expand Down Expand Up @@ -909,16 +909,16 @@ def _rm_histogram_2(*args):
# checking descriptor to make sure it is valid, otherwise print an error

if ncellx < 1:
print("Error: invalid number of cells in X dimension")
logging.warning("Invalid number of cells in X dimension")

if ncelly < 1:
print("Error: invalid number of cells in Y dimension")
logging.warning("Invalid number of cells in Y dimension")

if upperx <= lowerx:
print("Error: invalid bounds in X dimension")
logging.warning("Invalid bounds in X dimension")

if uppery <= lowery:
print("Error: invalid bounds in Y dimension")
logging.warning("Invalid bounds in Y dimension")

result = np.zeros([int(ncellx), int(ncelly)],
dtype=int) # should do the same thing as matlab: result(1:ncellx,1:ncelly) = 0;
Expand Down
1 change: 0 additions & 1 deletion pyhctsa/operations/medical.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,6 @@ def pol_var(x: ArrayLike, d: float = 1, D: int = 6) -> float:
i = 0
pc = 0

# seqcnt = 0
while i <= (N-D):
x_seq = x_sym[i:(i+D)]
if np.array_equal(x_seq, z_seq) or np.array_equal(x_seq, o_seq):
Expand Down
Loading
Loading