diff --git a/pymc_extras/statespace/models/DFM.py b/pymc_extras/statespace/models/DFM.py index 64933d45..a3f7a7b6 100644 --- a/pymc_extras/statespace/models/DFM.py +++ b/pymc_extras/statespace/models/DFM.py @@ -5,7 +5,7 @@ import pytensor.tensor as pt from pymc_extras.statespace.core.statespace import PyMCStateSpace -from pymc_extras.statespace.models.utilities import make_default_coords +from pymc_extras.statespace.models.utilities import make_default_coords, validate_names from pymc_extras.statespace.utils.constants import ( ALL_STATE_AUX_DIM, ALL_STATE_DIM, @@ -224,9 +224,7 @@ def __init__( self, k_factors: int, factor_order: int, - k_endog: int | None = None, endog_names: Sequence[str] | None = None, - k_exog: int | None = None, exog_names: Sequence[str] | None = None, shared_exog_states: bool = False, exog_innovations: bool = False, @@ -249,19 +247,11 @@ def __init__( and are modeled as a white noise process, i.e., :math:`f_t = \varepsilon_{f,t}`. Therefore, the state vector will include one state per factor and "factor_ar" will not exist. - k_endog : int, optional - Number of observed time series. If not provided, the number of observed series will be inferred from `endog_names`. - At least one of `k_endog` or `endog_names` must be provided. - endog_names : list of str, optional - Names of the observed time series. If not provided, default names will be generated as `endog_1`, `endog_2`, ..., `endog_k` based on `k_endog`. - At least one of `k_endog` or `endog_names` must be provided. - - k_exog : int, optional - Number of exogenous variables. If not provided, the model will not have exogenous variables. + Names of the observed time series. exog_names : Sequence[str], optional - Names of the exogenous variables. If not provided, but `k_exog` is specified, default names will be generated as `exog_1`, `exog_2`, ..., `exog_k`. + Names of the exogenous variables. shared_exog_states: bool, optional Whether exogenous latent states are shared across the observed states. If True, there will be only one set of exogenous latent @@ -289,13 +279,7 @@ def __init__( """ - if k_endog is None and endog_names is None: - raise ValueError("Either k_endog or endog_names must be provided.") - if k_endog is None: - k_endog = len(endog_names) - if endog_names is None: - endog_names = [f"endog_{i}" for i in range(k_endog)] - + k_endog = validate_names(endog_names, var_name="endog_names", optional=False) self.endog_names = endog_names self.k_endog = k_endog self.k_factors = k_factors @@ -304,17 +288,14 @@ def __init__( self.error_var = error_var self.error_cov_type = error_cov_type - if k_exog is None and exog_names is None: - self.k_exog = 0 - else: + if exog_names is not None: self.shared_exog_states = shared_exog_states self.exog_innovations = exog_innovations - if k_exog is None: - k_exog = len(exog_names) if exog_names is not None else 0 - elif exog_names is None: - exog_names = [f"exog_{i}" for i in range(k_exog)] if k_exog > 0 else None + k_exog = validate_names(exog_names, var_name="exog_names", optional=True) or 0 self.k_exog = k_exog self.exog_names = exog_names + else: + self.k_exog = 0 self.k_exog_states = self.k_exog * self.k_endog if not shared_exog_states else self.k_exog self.exog_flag = self.k_exog > 0 diff --git a/pymc_extras/statespace/models/ETS.py b/pymc_extras/statespace/models/ETS.py index 351e8b94..d674c0a9 100644 --- a/pymc_extras/statespace/models/ETS.py +++ b/pymc_extras/statespace/models/ETS.py @@ -9,7 +9,7 @@ from pytensor.tensor.slinalg import solve_discrete_lyapunov from pymc_extras.statespace.core.statespace import PyMCStateSpace, floatX -from pymc_extras.statespace.models.utilities import make_default_coords +from pymc_extras.statespace.models.utilities import make_default_coords, validate_names from pymc_extras.statespace.utils.constants import ( ALL_STATE_AUX_DIM, ALL_STATE_DIM, @@ -138,12 +138,9 @@ class BayesianETS(PyMCStateSpace): or 'N'. If provided, the model will be initialized from the given order, and the `trend`, `damped_trend`, and `seasonal` arguments will be ignored. - endog_names: str or list of str, Optional + endog_names: str or list of str Names associated with observed states. If a list, the length should be equal to the number of time series to be estimated. - k_endog: int, Optional - Number of time series to estimate. If endog_names are provided, this is ignored and len(endog_names) is - used instead. trend: bool Whether to include a trend component. Setting ``trend=True`` is equivalent to ``order[1] == 'A'``. damped_trend: bool @@ -213,7 +210,6 @@ def __init__( self, order: tuple[str, str, str] | None = None, endog_names: str | list[str] | None = None, - k_endog: int = 1, trend: bool = True, damped_trend: bool = False, seasonal: bool = False, @@ -265,13 +261,8 @@ def __init__( if self.seasonal and self.seasonal_periods is None: raise ValueError("If seasonal is True, seasonal_periods must be provided.") - if endog_names is not None: - endog_names = list(endog_names) - k_endog = len(endog_names) - else: - endog_names = [f"data_{i}" for i in range(k_endog)] if k_endog > 1 else ["data"] - - self.endog_names = endog_names + k_endog = validate_names(endog_names, var_name="endog_names", optional=False) + self.endog_names = list(endog_names) if dense_innovation_covariance and k_endog == 1: dense_innovation_covariance = False diff --git a/pymc_extras/statespace/models/SARIMAX.py b/pymc_extras/statespace/models/SARIMAX.py index 408a8269..869c1580 100644 --- a/pymc_extras/statespace/models/SARIMAX.py +++ b/pymc_extras/statespace/models/SARIMAX.py @@ -12,6 +12,7 @@ make_default_coords, make_harvey_state_names, make_SARIMA_transition_matrix, + validate_names, ) from pymc_extras.statespace.utils.constants import ( ALL_STATE_AUX_DIM, @@ -132,7 +133,6 @@ def __init__( order: tuple[int, int, int], seasonal_order: tuple[int, int, int, int] | None = None, exog_state_names: list[str] | None = None, - k_exog: int | None = None, stationary_initialization: bool = True, filter_type: str = "standard", state_structure: str = "fast", @@ -166,10 +166,6 @@ def __init__( exog_state_names : list[str], optional Names of the exogenous state variables. - k_exog : int, optional - Number of exogenous variables. If provided, must match the length of - `exog_state_names`. - stationary_initialization : bool, default True If true, the initial state and initial state covariance will not be assigned priors. Instead, their steady state values will be used. @@ -212,18 +208,7 @@ def __init__( if seasonal_order is None: seasonal_order = (0, 0, 0, 0) - if exog_state_names is None and k_exog is not None: - exog_state_names = [f"exogenous_{i}" for i in range(k_exog)] - elif exog_state_names is not None and k_exog is None: - k_exog = len(exog_state_names) - elif exog_state_names is not None and k_exog is not None: - if len(exog_state_names) != k_exog: - raise ValueError( - f"Based on provided inputs, expected exog_state_names to have {k_exog} elements, but " - f"found {len(exog_state_names)}" - ) - else: - k_exog = 0 + k_exog = validate_names(exog_state_names, var_name="exog_state_names", optional=True) or 0 self.exog_state_names = exog_state_names self.k_exog = k_exog diff --git a/pymc_extras/statespace/models/VARMAX.py b/pymc_extras/statespace/models/VARMAX.py index eac35a24..c831fa8c 100644 --- a/pymc_extras/statespace/models/VARMAX.py +++ b/pymc_extras/statespace/models/VARMAX.py @@ -9,7 +9,7 @@ from pytensor.tensor.slinalg import solve_discrete_lyapunov from pymc_extras.statespace.core.statespace import PyMCStateSpace -from pymc_extras.statespace.models.utilities import make_default_coords +from pymc_extras.statespace.models.utilities import make_default_coords, validate_names from pymc_extras.statespace.utils.constants import ( ALL_STATE_AUX_DIM, ALL_STATE_DIM, @@ -99,9 +99,7 @@ def __init__( self, order: tuple[int, int], endog_names: list[str] | None = None, - k_endog: int | None = None, exog_state_names: list[str] | dict[str, list[str]] | None = None, - k_exog: int | dict[str, int] | None = None, stationary_initialization: bool = False, filter_type: str = "standard", measurement_error: bool = False, @@ -118,10 +116,7 @@ def __init__( specified order are included. For restricted models, set zeros directly on the priors. endog_names: list of str, optional - Names of the endogenous variables being modeled. Used to generate names for the state and shock coords. If - None, the state names will simply be numbered. - - Exactly one of either ``endog_names`` or ``k_endog`` must be specified. + Names of the endogenous variables being modeled. Used to generate names for the state and shock coords. exog_state_names : list[str] or dict[str, list[str]], optional Names of the exogenous state variables. If a list, all endogenous variables will share the same exogenous @@ -129,12 +124,6 @@ def __init__( exogenous variable names for that endogenous variable. Endogenous variables not included in the dict will be assumed to have no exogenous variables. If None, no exogenous variables will be included. - k_exog : int or dict[str, int], optional - Number of exogenous variables. If an int, all endogenous variables will share the same number of exogenous - variables. If a dict, keys should be the names of the endogenous variables, and values should be the number of - exogenous variables for that endogenous variable. Endogenous variables not included in the dict will be - assumed to have no exogenous variables. If None, no exogenous variables will be included. - stationary_initialization: bool, default False If true, the initial state and initial state covariance will not be assigned priors. Instead, their steady state values will be used. If False, the user is responsible for setting priors on the initial state and @@ -162,62 +151,22 @@ def __init__( to all sampling methods. """ - if (endog_names is None) and (k_endog is None): - raise ValueError("Must specify either endog_names or k_endog") - if (endog_names is not None) and (k_endog is None): - k_endog = len(endog_names) - if (endog_names is None) and (k_endog is not None): - endog_names = [f"observed_{i}" for i in range(k_endog)] - if (endog_names is not None) and (k_endog is not None): - if len(endog_names) != k_endog: - raise ValueError("Length of provided endog_names does not match provided k_endog") + + k_endog = validate_names(endog_names, var_name="endog_names", optional=False) needs_exog_data = False - if k_exog is not None and not isinstance(k_exog, int | dict): - raise ValueError("If not None, k_exog must be either an int or a dict") if exog_state_names is not None and not isinstance(exog_state_names, list | dict): raise ValueError("If not None, exog_state_names must be either a list or a dict") - if k_exog is not None and exog_state_names is not None: - if isinstance(k_exog, int) and isinstance(exog_state_names, list): - if len(exog_state_names) != k_exog: - raise ValueError("Length of exog_state_names does not match provided k_exog") - elif isinstance(k_exog, int) and isinstance(exog_state_names, dict): - raise ValueError( - "If k_exog is an int, exog_state_names must be a list of the same length (or None)" - ) - elif isinstance(k_exog, dict) and isinstance(exog_state_names, list): - raise ValueError( - "If k_exog is a dict, exog_state_names must be a dict as well (or None)" - ) - elif isinstance(k_exog, dict) and isinstance(exog_state_names, dict): - if set(k_exog.keys()) != set(exog_state_names.keys()): - raise ValueError("Keys of k_exog and exog_state_names dicts must match") - if not all( - len(names) == k for names, k in zip(exog_state_names.values(), k_exog.values()) - ): - raise ValueError( - "If both k_endog and exog_state_names are provided, lengths of exog_state_names " - "lists must match corresponding values in k_exog" - ) - needs_exog_data = True - - if k_exog is not None and exog_state_names is None: - if isinstance(k_exog, int): - exog_state_names = [f"exogenous_{i}" for i in range(k_exog)] - elif isinstance(k_exog, dict): - exog_state_names = { - name: [f"{name}_exogenous_{i}" for i in range(k)] for name, k in k_exog.items() - } - needs_exog_data = True - - if k_exog is None and exog_state_names is not None: + if exog_state_names is not None: if isinstance(exog_state_names, list): k_exog = len(exog_state_names) elif isinstance(exog_state_names, dict): k_exog = {name: len(names) for name, names in exog_state_names.items()} needs_exog_data = True + else: + k_exog = None # If exog_state_names is a dict but 1) all endog variables are among the keys, and 2) all values are the same # then we can drop back to the list case. diff --git a/pymc_extras/statespace/models/structural/components/regression.py b/pymc_extras/statespace/models/structural/components/regression.py index 5620b1ea..288a045e 100644 --- a/pymc_extras/statespace/models/structural/components/regression.py +++ b/pymc_extras/statespace/models/structural/components/regression.py @@ -3,6 +3,7 @@ from pytensor import tensor as pt from pymc_extras.statespace.models.structural.core import Component +from pymc_extras.statespace.models.utilities import validate_names from pymc_extras.statespace.utils.constants import TIME_DIM @@ -107,7 +108,6 @@ class RegressionComponent(Component): def __init__( self, - k_exog: int | None = None, name: str | None = "regression", state_names: list[str] | None = None, observed_state_names: list[str] | None = None, @@ -120,7 +120,7 @@ def __init__( observed_state_names = ["data"] self.innovations = innovations - k_exog = self._handle_input_data(k_exog, state_names, name) + k_exog = self._handle_input_data(state_names) k_states = k_exog k_endog = len(observed_state_names) @@ -140,22 +140,8 @@ def __init__( obs_state_idxs=np.ones(k_states), ) - @staticmethod - def _get_state_names(k_exog: int | None, state_names: list[str] | None, name: str): - if k_exog is None and state_names is None: - raise ValueError("Must specify at least one of k_exog or state_names") - if state_names is not None and k_exog is not None: - if len(state_names) != k_exog: - raise ValueError(f"Expected {k_exog} state names, found {len(state_names)}") - elif k_exog is None: - k_exog = len(state_names) - else: - state_names = [f"{name}_{i + 1}" for i in range(k_exog)] - - return k_exog, state_names - - def _handle_input_data(self, k_exog: int, state_names: list[str] | None, name) -> int: - k_exog, state_names = self._get_state_names(k_exog, state_names, name) + def _handle_input_data(self, state_names: list[str] | None) -> int: + k_exog = validate_names(state_names, var_name="state_names", optional=False) self.state_names = state_names return k_exog diff --git a/pymc_extras/statespace/models/utilities.py b/pymc_extras/statespace/models/utilities.py index 33be8d47..7c302d8f 100644 --- a/pymc_extras/statespace/models/utilities.py +++ b/pymc_extras/statespace/models/utilities.py @@ -670,3 +670,11 @@ def get_exog_dims_from_idata(exog_name, idata): exog_dims = None return exog_dims + + +def validate_names(names: list[str], var_name: str, optional: bool = True) -> int | None: + if names is None: + if optional: + return None + raise ValueError(f"Must specify {var_name}") + return len(names) diff --git a/tests/statespace/core/test_statespace.py b/tests/statespace/core/test_statespace.py index bf27868b..8bf8a203 100644 --- a/tests/statespace/core/test_statespace.py +++ b/tests/statespace/core/test_statespace.py @@ -182,9 +182,8 @@ def exog_ss_mod(exog_data): level_trend = st.LevelTrendComponent(name="trend", order=1, innovations_order=[0]) exog = st.RegressionComponent( name="exog", # Name of this exogenous variable component - k_exog=1, # Only one exogenous variable now innovations=False, # Typically fixed effect (no stochastic evolution) - state_names=exog_data[["x1"]].columns.tolist(), + state_names=exog_data[["x1"]].columns.tolist(), # Only one exogenous variable now ) combined_model = level_trend + exog @@ -198,9 +197,8 @@ def exog_ss_mod_mv(exog_data_mv): ) exog = st.RegressionComponent( name="exog", # Name of this exogenous variable component - k_exog=1, # Only one exogenous variable now innovations=False, # Typically fixed effect (no stochastic evolution) - state_names=exog_data_mv[["x1"]].columns.tolist(), + state_names=exog_data_mv[["x1"]].columns.tolist(), # Only one exogenous variable now observed_state_names=["y1", "y2"], ) diff --git a/tests/statespace/filters/test_distributions.py b/tests/statespace/filters/test_distributions.py index ac1be1cb..cba37bf3 100644 --- a/tests/statespace/filters/test_distributions.py +++ b/tests/statespace/filters/test_distributions.py @@ -193,7 +193,7 @@ def test_lgss_distribution_with_dims(output_name, ss_mod_me, pymc_model_2): def test_lgss_with_time_varying_inputs(output_name, rng): X = rng.random(size=(10, 3), dtype=floatX) ss_mod = structural.LevelTrendComponent() + structural.RegressionComponent( - name="exog", k_exog=3 + name="exog", state_names=["exog_0", "exog_1", "exog_2"] ) mod = ss_mod.build("data", verbose=False) diff --git a/tests/statespace/models/test_DFM.py b/tests/statespace/models/test_DFM.py index 8294c3aa..dc7e6f08 100644 --- a/tests/statespace/models/test_DFM.py +++ b/tests/statespace/models/test_DFM.py @@ -131,7 +131,7 @@ def test_DFM_update_matches_statsmodels(data, k_factors, factor_order, error_ord k_factors=k_factors, factor_order=factor_order, error_order=error_order, - k_endog=data.shape[1], + endog_names=data.columns.to_list(), measurement_error=False, error_var=error_var, verbose=False, @@ -252,10 +252,10 @@ def test_DFM_exog_betas_random_walk(n_obs, n_runs): dfm_mod = BayesianDynamicFactor( k_factors=1, factor_order=1, - k_endog=2, + endog_names=["endogenous_0", "endogenous_1"], error_order=1, error_var=False, - k_exog=2, + exog_names=["exogenous_0", "exogenous_1"], shared_exog_states=False, exog_innovations=True, error_cov_type="diagonal", @@ -310,9 +310,9 @@ def test_DFM_exog_shared_vs_not(shared): dfm_mod = BayesianDynamicFactor( k_factors=1, factor_order=1, - k_endog=k_endog, + endog_names=["endogenous_0", "endogenous_1"], error_order=1, - k_exog=k_exog, + exog_names=["exogenous_0", "exogenous_1"], shared_exog_states=shared, exog_innovations=False, error_cov_type="diagonal", @@ -382,7 +382,6 @@ def test_static_factor_no_ar_no_exog_diagonal_error(self): mod = BayesianDynamicFactor( k_factors=1, factor_order=0, - k_endog=3, endog_names=["y0", "y1", "y2"], error_order=0, error_var=False, @@ -423,7 +422,6 @@ def test_dynamic_factor_ar1_error_diagonal_error(self): mod = BayesianDynamicFactor( k_factors=k_factors, factor_order=factor_order, - k_endog=k_endog, endog_names=["y0", "y1", "y2"], error_order=error_order, error_var=error_var, @@ -493,7 +491,6 @@ def test_dynamic_factor_ar2_error_var_unstructured(self): mod = BayesianDynamicFactor( k_factors=k_factors, factor_order=factor_order, - k_endog=k_endog, endog_names=["y0", "y1", "y2"], error_order=error_order, error_var=error_var, @@ -565,11 +562,9 @@ def test_exog_shared_exog_states_exog_innovations(self): mod = BayesianDynamicFactor( k_factors=k_factors, factor_order=factor_order, - k_endog=k_endog, endog_names=["y0", "y1", "y2"], error_order=error_order, error_var=error_var, - k_exog=2, exog_names=["x0", "x1"], shared_exog_states=shared_exog_states, exog_innovations=True, @@ -652,11 +647,9 @@ def test_exog_not_shared_no_exog_innovations(self): mod = BayesianDynamicFactor( k_factors=k_factors, factor_order=factor_order, - k_endog=k_endog, endog_names=["y0", "y1", "y2"], error_order=error_order, error_var=error_var, - k_exog=k_exog, exog_names=["x0"], shared_exog_states=shared_exog_states, exog_innovations=False, diff --git a/tests/statespace/models/test_ETS.py b/tests/statespace/models/test_ETS.py index 6e2c63c5..0bd899cf 100644 --- a/tests/statespace/models/test_ETS.py +++ b/tests/statespace/models/test_ETS.py @@ -21,27 +21,27 @@ def data(): def test_invalid_order_raises(): # Order must be length 3 with pytest.raises(ValueError, match="Order must be a tuple of three strings"): - BayesianETS(order=("A", "N")) + BayesianETS(order=("A", "N"), endog_names=["y"]) # Order must be strings with pytest.raises(ValueError, match="Order must be a tuple of three strings"): - BayesianETS(order=(2, 1, 1)) + BayesianETS(order=(2, 1, 1), endog_names=["y"]) # Only additive errors allowed with pytest.raises(ValueError, match="Only additive errors are supported"): - BayesianETS(order=("M", "N", "N")) + BayesianETS(order=("M", "N", "N"), endog_names=["y"]) # Trend must be A or Ad with pytest.raises(ValueError, match="Invalid trend specification"): - BayesianETS(order=("A", "P", "N")) + BayesianETS(order=("A", "P", "N"), endog_names=["y"]) # Seasonal must be A or N with pytest.raises(ValueError, match="Invalid seasonal specification"): - BayesianETS(order=("A", "Ad", "M")) + BayesianETS(order=("A", "Ad", "M"), endog_names=["y"]) # seasonal_periods must be provided if seasonal is requested with pytest.raises(ValueError, match="If seasonal is True, seasonal_periods must be provided."): - BayesianETS(order=("A", "Ad", "A")) + BayesianETS(order=("A", "Ad", "A"), endog_names=["y"]) orders = ( @@ -84,20 +84,20 @@ def test_invalid_order_raises(): "order, expected_flags", zip(orders, order_expected_flags), ids=order_names ) def test_order_flags(order, expected_flags): - mod = BayesianETS(order=order, seasonal_periods=4) + mod = BayesianETS(order=order, endog_names=["y"], seasonal_periods=4) for key, value in expected_flags.items(): assert getattr(mod, key) == value def test_mode_argument(): # Mode argument should be passed to the parent class - mod = BayesianETS(order=("A", "N", "N"), mode="FAST_RUN") + mod = BayesianETS(order=("A", "N", "N"), endog_names=["y"], mode="FAST_RUN") assert mod.mode == "FAST_RUN" @pytest.mark.parametrize("order, expected_params", zip(orders, order_params), ids=order_names) def test_param_info(order: tuple[str, str, str], expected_params): - mod = BayesianETS(order=order, seasonal_periods=4) + mod = BayesianETS(order=order, endog_names=["y"], seasonal_periods=4) all_expected_params = [*expected_params, "sigma_state", "P0"] assert all(param in mod.param_names for param in all_expected_params) @@ -117,6 +117,7 @@ def test_statespace_matrices( seasonal_periods = np.random.randint(3, 12) mod = BayesianETS( order=order, + endog_names=["y"], seasonal_periods=seasonal_periods, measurement_error=True, use_transformed_parameterization=use_transformed, @@ -211,6 +212,7 @@ def test_statespace_matches_statsmodels(rng, order: tuple[str, str, str], params data = rng.normal(size=(100,)) mod = BayesianETS( order=order, + endog_names=["y"], seasonal_periods=seasonal_periods, measurement_error=False, use_transformed_parameterization=True, @@ -281,6 +283,7 @@ def test_ETS_with_multiple_endog(rng, order, params, dense_cov): single_mod = BayesianETS( order=order, + endog_names=["y"], seasonal_periods=seasonal_periods, measurement_error=False, use_transformed_parameterization=True, @@ -381,6 +384,7 @@ def test_ETS_with_multiple_endog(rng, order, params, dense_cov): def test_ETS_stationary_initialization(): mod = BayesianETS( order=("A", "Ad", "A"), + endog_names=["y"], seasonal_periods=4, stationary_initialization=True, initialization_dampening=0.66, diff --git a/tests/statespace/models/test_SARIMAX.py b/tests/statespace/models/test_SARIMAX.py index c54fa18d..6b03b710 100644 --- a/tests/statespace/models/test_SARIMAX.py +++ b/tests/statespace/models/test_SARIMAX.py @@ -423,7 +423,13 @@ def test_invalid_order_raises(order, name): def test_SARIMA_with_exogenous(rng, mock_sample): - ss_mod = BayesianSARIMAX(order=(3, 0, 1), seasonal_order=(1, 0, 0, 12), k_exog=2) + # Note something odd is happening with stationary_initialization where the matrix is not singular + ss_mod = BayesianSARIMAX( + order=(3, 0, 1), + seasonal_order=(1, 0, 0, 12), + stationary_initialization=False, + exog_state_names=["exogenous_0", "exogenous_1"], + ) assert ss_mod.param_dims["beta_exog"] == ("exogenous",) assert ss_mod.data_names == ["exogenous_data"] @@ -442,6 +448,9 @@ def test_SARIMA_with_exogenous(rng, mock_sample): with pm.Model(coords=ss_mod.coords) as pymc_mod: pm.Data("exogenous_data", data_val, dims=["time", "exogenous"]) + x0 = pm.Deterministic("x0", pt.ones(15), dims=["state"]) + P0 = pm.Deterministic("P0", pt.eye(15), dims=["state", "state_aux"]) + ar_params = pm.Normal("ar_params", dims=["lag_ar"]) ma_params = pm.Normal("ma_params", dims=["lag_ma"]) seasonal_ar_params = pm.Normal("seasonal_ar_params", dims=["seasonal_lag_ar"]) diff --git a/tests/statespace/models/test_VARMAX.py b/tests/statespace/models/test_VARMAX.py index e16fd5ba..19178cc8 100644 --- a/tests/statespace/models/test_VARMAX.py +++ b/tests/statespace/models/test_VARMAX.py @@ -80,7 +80,7 @@ def idata(pymc_mod, rng): def test_mode_argument(): # Mode argument should be passed to the parent class - mod = BayesianVARMAX(k_endog=2, order=(3, 0), mode="FAST_RUN", verbose=False) + mod = BayesianVARMAX(endog_names=["y1", "y2"], order=(3, 0), mode="FAST_RUN", verbose=False) assert mod.mode == "FAST_RUN" @@ -90,7 +90,9 @@ def test_mode_argument(): def test_VARMAX_param_counts_match_statsmodels(data, order, var): p, q = order - mod = BayesianVARMAX(k_endog=data.shape[1], order=(p, q), verbose=False) + mod = BayesianVARMAX( + endog_names=["realgdp", "realcons", "realinv"], order=(p, q), verbose=False + ) sm_var = sm.tsa.VARMAX(data, order=(p, q)) count = mod.param_counts[var] @@ -122,7 +124,7 @@ def test_VARMAX_update_matches_statsmodels(data, order, rng): res = sm_var.fit_constrained(param_d) mod = BayesianVARMAX( - k_endog=data.shape[1], + endog_names=["realgdp", "realcons", "realinv"], order=(p, q), verbose=False, measurement_error=False, @@ -199,26 +201,9 @@ def test_forecast(varma_mod, idata, rng): class TestVARMAXWithExogenous: - def test_create_varmax_with_exogenous_k_exog_int(self, data): - mod = BayesianVARMAX( - k_endog=data.shape[1], - order=(1, 0), - k_exog=2, - verbose=False, - measurement_error=False, - stationary_initialization=False, - ) - assert mod.k_exog == 2 - assert mod.exog_state_names == ["exogenous_0", "exogenous_1"] - assert mod.data_names == ["exogenous_data"] - assert mod.param_dims["beta_exog"] == ("observed_state", "exogenous") - assert mod.coords["exogenous"] == ["exogenous_0", "exogenous_1"] - assert mod.param_info["beta_exog"]["shape"] == (mod.k_endog, 2) - assert mod.param_info["beta_exog"]["dims"] == ("observed_state", "exogenous") - def test_create_varmax_with_exogenous_list_of_names(self, data): mod = BayesianVARMAX( - k_endog=data.shape[1], + endog_names=["realgdp", "realcons", "realinv"], order=(1, 0), exog_state_names=["foo", "bar"], verbose=False, @@ -235,9 +220,8 @@ def test_create_varmax_with_exogenous_list_of_names(self, data): def test_create_varmax_with_exogenous_both_defined_correctly(self, data): mod = BayesianVARMAX( - k_endog=data.shape[1], + endog_names=["realgdp", "realcons", "realinv"], order=(1, 0), - k_exog=2, exog_state_names=["a", "b"], verbose=False, measurement_error=False, @@ -251,47 +235,6 @@ def test_create_varmax_with_exogenous_both_defined_correctly(self, data): assert mod.param_info["beta_exog"]["shape"] == (mod.k_endog, 2) assert mod.param_info["beta_exog"]["dims"] == ("observed_state", "exogenous") - def test_create_varmax_with_exogenous_k_exog_dict(self, data): - k_exog = {"observed_0": 2, "observed_1": 1, "observed_2": 0} - mod = BayesianVARMAX( - endog_names=["observed_0", "observed_1", "observed_2"], - order=(1, 0), - k_exog=k_exog, - verbose=False, - measurement_error=False, - stationary_initialization=False, - ) - assert mod.k_exog == k_exog - assert mod.exog_state_names == { - "observed_0": ["observed_0_exogenous_0", "observed_0_exogenous_1"], - "observed_1": ["observed_1_exogenous_0"], - "observed_2": [], - } - assert mod.data_names == [ - "observed_0_exogenous_data", - "observed_1_exogenous_data", - "observed_2_exogenous_data", - ] - assert mod.param_dims["beta_observed_0"] == ("exogenous_observed_0",) - assert mod.param_dims["beta_observed_1"] == ("exogenous_observed_1",) - assert ( - "beta_observed_2" not in mod.param_dims - or mod.param_info.get("beta_observed_2") is None - or mod.param_info.get("beta_observed_2", {}).get("shape", (0,))[0] == 0 - ) - - assert mod.coords["exogenous_observed_0"] == [ - "observed_0_exogenous_0", - "observed_0_exogenous_1", - ] - assert mod.coords["exogenous_observed_1"] == ["observed_1_exogenous_0"] - assert "exogenous_observed_2" in mod.coords and mod.coords["exogenous_observed_2"] == [] - - assert mod.param_info["beta_observed_0"]["shape"] == (2,) - assert mod.param_info["beta_observed_0"]["dims"] == ("exogenous_observed_0",) - assert mod.param_info["beta_observed_1"]["shape"] == (1,) - assert mod.param_info["beta_observed_1"]["dims"] == ("exogenous_observed_1",) - def test_create_varmax_with_exogenous_exog_names_dict(self, data): exog_state_names = {"observed_0": ["a", "b"], "observed_1": ["c"], "observed_2": []} mod = BayesianVARMAX( @@ -326,30 +269,6 @@ def test_create_varmax_with_exogenous_exog_names_dict(self, data): assert mod.param_info["beta_observed_1"]["shape"] == (1,) assert mod.param_info["beta_observed_1"]["dims"] == ("exogenous_observed_1",) - def test_create_varmax_with_exogenous_both_dict_correct(self, data): - k_exog = {"observed_0": 2, "observed_1": 1} - exog_state_names = {"observed_0": ["a", "b"], "observed_1": ["c"]} - mod = BayesianVARMAX( - endog_names=["observed_0", "observed_1"], - order=(1, 0), - k_exog=k_exog, - exog_state_names=exog_state_names, - verbose=False, - measurement_error=False, - stationary_initialization=False, - ) - assert mod.k_exog == k_exog - assert mod.exog_state_names == exog_state_names - assert mod.data_names == ["observed_0_exogenous_data", "observed_1_exogenous_data"] - assert mod.param_dims["beta_observed_0"] == ("exogenous_observed_0",) - assert mod.param_dims["beta_observed_1"] == ("exogenous_observed_1",) - assert mod.coords["exogenous_observed_0"] == ["a", "b"] - assert mod.coords["exogenous_observed_1"] == ["c"] - assert mod.param_info["beta_observed_0"]["shape"] == (2,) - assert mod.param_info["beta_observed_0"]["dims"] == ("exogenous_observed_0",) - assert mod.param_info["beta_observed_1"]["shape"] == (1,) - assert mod.param_info["beta_observed_1"]["dims"] == ("exogenous_observed_1",) - def test_create_varmax_with_exogenous_dict_converts_to_list(self, data): exog_state_names = { "observed_0": ["a", "b"], @@ -373,83 +292,12 @@ def test_create_varmax_with_exogenous_dict_converts_to_list(self, data): assert mod.param_info["beta_exog"]["shape"] == (mod.k_endog, 2) assert mod.param_info["beta_exog"]["dims"] == ("observed_state", "exogenous") - def test_create_varmax_with_exogenous_raises_if_args_disagree(self, data): - # List case - with pytest.raises( - ValueError, match="Length of exog_state_names does not match provided k_exog" - ): - BayesianVARMAX( - k_endog=2, - order=(1, 0), - k_exog=3, - exog_state_names=["a", "b"], - verbose=False, - measurement_error=False, - stationary_initialization=False, - ) - - # Dict case - with pytest.raises( - ValueError, - match="If k_exog is an int, exog_state_names must be a list of the same length", - ): - BayesianVARMAX( - k_endog=2, - order=(1, 0), - k_exog=2, - exog_state_names={"observed_0": ["a"], "observed_1": ["b"]}, - verbose=False, - measurement_error=False, - stationary_initialization=False, - ) - - # dict + list - with pytest.raises( - ValueError, match="If k_exog is a dict, exog_state_names must be a dict as well" - ): - BayesianVARMAX( - endog_names=["observed_0", "observed_1"], - order=(1, 0), - k_exog={"observed_0": 1, "observed_1": 1}, - exog_state_names=["a", "b"], - verbose=False, - measurement_error=False, - stationary_initialization=False, - ) - - # Dict/dict, key mismatch - with pytest.raises( - ValueError, match="Keys of k_exog and exog_state_names dicts must match" - ): - BayesianVARMAX( - endog_names=["observed_0", "observed_1"], - order=(1, 0), - k_exog={"observed_0": 1, "observed_1": 1}, - exog_state_names={"observed_0": ["a"], "observed_2": ["b"]}, - verbose=False, - measurement_error=False, - stationary_initialization=False, - ) - - # Dict/dict, length mismatch - with pytest.raises(ValueError, match="lengths of exog_state_names lists must match"): - BayesianVARMAX( - endog_names=["observed_0", "observed_1"], - order=(1, 0), - k_exog={"observed_0": 2, "observed_1": 1}, - exog_state_names={"observed_0": ["a"], "observed_1": ["b"]}, - verbose=False, - measurement_error=False, - stationary_initialization=False, - ) - - def _build_varmax(self, df, k_exog, exog_state_names, exog_data): + def _build_varmax(self, df, exog_state_names, exog_data): endog_names = df.columns.values.tolist() mod = BayesianVARMAX( endog_names=endog_names, order=(1, 0), - k_exog=k_exog, exog_state_names=exog_state_names, verbose=False, measurement_error=False, @@ -488,16 +336,15 @@ def _build_varmax(self, df, k_exog, exog_state_names, exog_data): return mod, m @pytest.mark.parametrize( - "k_exog, exog_state_names", + "exog_state_names", [ - (2, None), - (None, ["foo", "bar"]), - (None, {"y1": ["a", "b"], "y2": ["c"]}), + (["foo", "bar"]), + ({"y1": ["a", "b"], "y2": ["c"]}), ], - ids=["k_exog_int", "exog_state_names_list", "exog_state_names_dict"], + ids=["exog_state_names_list", "exog_state_names_dict"], ) @pytest.mark.filterwarnings("ignore::UserWarning") - def test_varmax_with_exog(self, rng, k_exog, exog_state_names): + def test_varmax_with_exog(self, rng, exog_state_names): endog_names = ["y1", "y2", "y3"] n_obs = 50 time_idx = pd.date_range(start="2020-01-01", periods=n_obs, freq="D") @@ -515,16 +362,15 @@ def test_varmax_with_exog(self, rng, k_exog, exog_state_names): for name, exog_names in exog_state_names.items() } else: - exog_names = exog_state_names or [f"exogenous_{i}" for i in range(k_exog)] exog_data = { "exogenous_data": pd.DataFrame( - rng.normal(size=(n_obs, k_exog or len(exog_state_names))).astype(floatX), - columns=exog_names, + rng.normal(size=(n_obs, len(exog_state_names))).astype(floatX), + columns=exog_state_names, index=time_idx, ) } - mod, m = self._build_varmax(df, k_exog, exog_state_names, exog_data) + mod, m = self._build_varmax(df, exog_state_names, exog_data) with freeze_dims_and_data(m): prior = pm.sample_prior_predictive( @@ -534,7 +380,7 @@ def test_varmax_with_exog(self, rng, k_exog, exog_state_names): prior_cond = mod.sample_conditional_prior(prior, mvn_method="eigh") beta_dot_data = prior_cond.filtered_prior_observed.values - prior_cond.filtered_prior.values - if isinstance(exog_state_names, list) or k_exog is not None: + if isinstance(exog_state_names, list): beta = prior.prior.beta_exog assert beta.shape == (1, 10, 3, 2) @@ -569,8 +415,7 @@ def test_forecast_with_exog(self, rng): mod, m = self._build_varmax( df, - k_exog=2, - exog_state_names=None, + exog_state_names=["exogenous_0", "exogenous_1"], exog_data={ "exogenous_data": pd.DataFrame( rng.normal(size=(n_obs, 2)).astype(floatX),