Skip to content
Open
35 changes: 8 additions & 27 deletions pymc_extras/statespace/models/DFM.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytensor.tensor as pt

from pymc_extras.statespace.core.statespace import PyMCStateSpace
from pymc_extras.statespace.models.utilities import make_default_coords
from pymc_extras.statespace.models.utilities import make_default_coords, validate_names
from pymc_extras.statespace.utils.constants import (
ALL_STATE_AUX_DIM,
ALL_STATE_DIM,
Expand Down Expand Up @@ -224,9 +224,7 @@ def __init__(
self,
k_factors: int,
factor_order: int,
k_endog: int | None = None,
endog_names: Sequence[str] | None = None,
k_exog: int | None = None,
exog_names: Sequence[str] | None = None,
shared_exog_states: bool = False,
exog_innovations: bool = False,
Expand All @@ -249,19 +247,11 @@ def __init__(
and are modeled as a white noise process, i.e., :math:`f_t = \varepsilon_{f,t}`.
Therefore, the state vector will include one state per factor and "factor_ar" will not exist.

k_endog : int, optional
Number of observed time series. If not provided, the number of observed series will be inferred from `endog_names`.
At least one of `k_endog` or `endog_names` must be provided.

endog_names : list of str, optional
Names of the observed time series. If not provided, default names will be generated as `endog_1`, `endog_2`, ..., `endog_k` based on `k_endog`.
At least one of `k_endog` or `endog_names` must be provided.

k_exog : int, optional
Number of exogenous variables. If not provided, the model will not have exogenous variables.
Names of the observed time series.

exog_names : Sequence[str], optional
Names of the exogenous variables. If not provided, but `k_exog` is specified, default names will be generated as `exog_1`, `exog_2`, ..., `exog_k`.
Names of the exogenous variables.

shared_exog_states: bool, optional
Whether exogenous latent states are shared across the observed states. If True, there will be only one set of exogenous latent
Expand Down Expand Up @@ -289,13 +279,7 @@ def __init__(

"""

if k_endog is None and endog_names is None:
raise ValueError("Either k_endog or endog_names must be provided.")
if k_endog is None:
k_endog = len(endog_names)
if endog_names is None:
endog_names = [f"endog_{i}" for i in range(k_endog)]

k_endog = validate_names(endog_names, var_name="endog_names", optional=False)
self.endog_names = endog_names
self.k_endog = k_endog
self.k_factors = k_factors
Expand All @@ -304,17 +288,14 @@ def __init__(
self.error_var = error_var
self.error_cov_type = error_cov_type

if k_exog is None and exog_names is None:
self.k_exog = 0
else:
if exog_names is not None:
self.shared_exog_states = shared_exog_states
self.exog_innovations = exog_innovations
if k_exog is None:
k_exog = len(exog_names) if exog_names is not None else 0
elif exog_names is None:
exog_names = [f"exog_{i}" for i in range(k_exog)] if k_exog > 0 else None
k_exog = validate_names(exog_names, var_name="exog_names", optional=True) or 0
self.k_exog = k_exog
self.exog_names = exog_names
else:
self.k_exog = 0

self.k_exog_states = self.k_exog * self.k_endog if not shared_exog_states else self.k_exog
self.exog_flag = self.k_exog > 0
Expand Down
17 changes: 4 additions & 13 deletions pymc_extras/statespace/models/ETS.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pytensor.tensor.slinalg import solve_discrete_lyapunov

from pymc_extras.statespace.core.statespace import PyMCStateSpace, floatX
from pymc_extras.statespace.models.utilities import make_default_coords
from pymc_extras.statespace.models.utilities import make_default_coords, validate_names
from pymc_extras.statespace.utils.constants import (
ALL_STATE_AUX_DIM,
ALL_STATE_DIM,
Expand Down Expand Up @@ -138,12 +138,9 @@ class BayesianETS(PyMCStateSpace):
or 'N'.
If provided, the model will be initialized from the given order, and the `trend`, `damped_trend`, and `seasonal`
arguments will be ignored.
endog_names: str or list of str, Optional
endog_names: str or list of str
Names associated with observed states. If a list, the length should be equal to the number of time series
to be estimated.
k_endog: int, Optional
Number of time series to estimate. If endog_names are provided, this is ignored and len(endog_names) is
used instead.
trend: bool
Whether to include a trend component. Setting ``trend=True`` is equivalent to ``order[1] == 'A'``.
damped_trend: bool
Expand Down Expand Up @@ -213,7 +210,6 @@ def __init__(
self,
order: tuple[str, str, str] | None = None,
endog_names: str | list[str] | None = None,
k_endog: int = 1,
trend: bool = True,
damped_trend: bool = False,
seasonal: bool = False,
Expand Down Expand Up @@ -265,13 +261,8 @@ def __init__(
if self.seasonal and self.seasonal_periods is None:
raise ValueError("If seasonal is True, seasonal_periods must be provided.")

if endog_names is not None:
endog_names = list(endog_names)
k_endog = len(endog_names)
else:
endog_names = [f"data_{i}" for i in range(k_endog)] if k_endog > 1 else ["data"]

self.endog_names = endog_names
k_endog = validate_names(endog_names, var_name="endog_names", optional=False)
self.endog_names = list(endog_names)

if dense_innovation_covariance and k_endog == 1:
dense_innovation_covariance = False
Expand Down
19 changes: 2 additions & 17 deletions pymc_extras/statespace/models/SARIMAX.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
make_default_coords,
make_harvey_state_names,
make_SARIMA_transition_matrix,
validate_names,
)
from pymc_extras.statespace.utils.constants import (
ALL_STATE_AUX_DIM,
Expand Down Expand Up @@ -132,7 +133,6 @@ def __init__(
order: tuple[int, int, int],
seasonal_order: tuple[int, int, int, int] | None = None,
exog_state_names: list[str] | None = None,
k_exog: int | None = None,
stationary_initialization: bool = True,
filter_type: str = "standard",
state_structure: str = "fast",
Expand Down Expand Up @@ -166,10 +166,6 @@ def __init__(
exog_state_names : list[str], optional
Names of the exogenous state variables.

k_exog : int, optional
Number of exogenous variables. If provided, must match the length of
`exog_state_names`.

stationary_initialization : bool, default True
If true, the initial state and initial state covariance will not be assigned priors. Instead, their steady
state values will be used.
Expand Down Expand Up @@ -212,18 +208,7 @@ def __init__(
if seasonal_order is None:
seasonal_order = (0, 0, 0, 0)

if exog_state_names is None and k_exog is not None:
exog_state_names = [f"exogenous_{i}" for i in range(k_exog)]
elif exog_state_names is not None and k_exog is None:
k_exog = len(exog_state_names)
elif exog_state_names is not None and k_exog is not None:
if len(exog_state_names) != k_exog:
raise ValueError(
f"Based on provided inputs, expected exog_state_names to have {k_exog} elements, but "
f"found {len(exog_state_names)}"
)
else:
k_exog = 0
k_exog = validate_names(exog_state_names, var_name="exog_state_names", optional=True) or 0

self.exog_state_names = exog_state_names
self.k_exog = k_exog
Expand Down
65 changes: 7 additions & 58 deletions pymc_extras/statespace/models/VARMAX.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pytensor.tensor.slinalg import solve_discrete_lyapunov

from pymc_extras.statespace.core.statespace import PyMCStateSpace
from pymc_extras.statespace.models.utilities import make_default_coords
from pymc_extras.statespace.models.utilities import make_default_coords, validate_names
from pymc_extras.statespace.utils.constants import (
ALL_STATE_AUX_DIM,
ALL_STATE_DIM,
Expand Down Expand Up @@ -99,9 +99,7 @@ def __init__(
self,
order: tuple[int, int],
endog_names: list[str] | None = None,
k_endog: int | None = None,
exog_state_names: list[str] | dict[str, list[str]] | None = None,
k_exog: int | dict[str, int] | None = None,
stationary_initialization: bool = False,
filter_type: str = "standard",
measurement_error: bool = False,
Expand All @@ -118,23 +116,14 @@ def __init__(
specified order are included. For restricted models, set zeros directly on the priors.

endog_names: list of str, optional
Names of the endogenous variables being modeled. Used to generate names for the state and shock coords. If
None, the state names will simply be numbered.

Exactly one of either ``endog_names`` or ``k_endog`` must be specified.
Names of the endogenous variables being modeled. Used to generate names for the state and shock coords.

exog_state_names : list[str] or dict[str, list[str]], optional
Names of the exogenous state variables. If a list, all endogenous variables will share the same exogenous
variables. If a dict, keys should be the names of the endogenous variables, and values should be lists of the
exogenous variable names for that endogenous variable. Endogenous variables not included in the dict will
be assumed to have no exogenous variables. If None, no exogenous variables will be included.

k_exog : int or dict[str, int], optional
Number of exogenous variables. If an int, all endogenous variables will share the same number of exogenous
variables. If a dict, keys should be the names of the endogenous variables, and values should be the number of
exogenous variables for that endogenous variable. Endogenous variables not included in the dict will be
assumed to have no exogenous variables. If None, no exogenous variables will be included.

stationary_initialization: bool, default False
If true, the initial state and initial state covariance will not be assigned priors. Instead, their steady
state values will be used. If False, the user is responsible for setting priors on the initial state and
Expand Down Expand Up @@ -162,62 +151,22 @@ def __init__(
to all sampling methods.

"""
if (endog_names is None) and (k_endog is None):
raise ValueError("Must specify either endog_names or k_endog")
if (endog_names is not None) and (k_endog is None):
k_endog = len(endog_names)
if (endog_names is None) and (k_endog is not None):
endog_names = [f"observed_{i}" for i in range(k_endog)]
if (endog_names is not None) and (k_endog is not None):
if len(endog_names) != k_endog:
raise ValueError("Length of provided endog_names does not match provided k_endog")

k_endog = validate_names(endog_names, var_name="endog_names", optional=False)

needs_exog_data = False

if k_exog is not None and not isinstance(k_exog, int | dict):
raise ValueError("If not None, k_exog must be either an int or a dict")
if exog_state_names is not None and not isinstance(exog_state_names, list | dict):
raise ValueError("If not None, exog_state_names must be either a list or a dict")

if k_exog is not None and exog_state_names is not None:
if isinstance(k_exog, int) and isinstance(exog_state_names, list):
if len(exog_state_names) != k_exog:
raise ValueError("Length of exog_state_names does not match provided k_exog")
elif isinstance(k_exog, int) and isinstance(exog_state_names, dict):
raise ValueError(
"If k_exog is an int, exog_state_names must be a list of the same length (or None)"
)
elif isinstance(k_exog, dict) and isinstance(exog_state_names, list):
raise ValueError(
"If k_exog is a dict, exog_state_names must be a dict as well (or None)"
)
elif isinstance(k_exog, dict) and isinstance(exog_state_names, dict):
if set(k_exog.keys()) != set(exog_state_names.keys()):
raise ValueError("Keys of k_exog and exog_state_names dicts must match")
if not all(
len(names) == k for names, k in zip(exog_state_names.values(), k_exog.values())
):
raise ValueError(
"If both k_endog and exog_state_names are provided, lengths of exog_state_names "
"lists must match corresponding values in k_exog"
)
needs_exog_data = True

if k_exog is not None and exog_state_names is None:
if isinstance(k_exog, int):
exog_state_names = [f"exogenous_{i}" for i in range(k_exog)]
elif isinstance(k_exog, dict):
exog_state_names = {
name: [f"{name}_exogenous_{i}" for i in range(k)] for name, k in k_exog.items()
}
needs_exog_data = True

if k_exog is None and exog_state_names is not None:
if exog_state_names is not None:
if isinstance(exog_state_names, list):
k_exog = len(exog_state_names)
elif isinstance(exog_state_names, dict):
k_exog = {name: len(names) for name, names in exog_state_names.items()}
needs_exog_data = True
else:
k_exog = None

# If exog_state_names is a dict but 1) all endog variables are among the keys, and 2) all values are the same
# then we can drop back to the list case.
Expand Down
22 changes: 4 additions & 18 deletions pymc_extras/statespace/models/structural/components/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pytensor import tensor as pt

from pymc_extras.statespace.models.structural.core import Component
from pymc_extras.statespace.models.utilities import validate_names
from pymc_extras.statespace.utils.constants import TIME_DIM


Expand Down Expand Up @@ -107,7 +108,6 @@ class RegressionComponent(Component):

def __init__(
self,
k_exog: int | None = None,
name: str | None = "regression",
state_names: list[str] | None = None,
observed_state_names: list[str] | None = None,
Expand All @@ -120,7 +120,7 @@ def __init__(
observed_state_names = ["data"]

self.innovations = innovations
k_exog = self._handle_input_data(k_exog, state_names, name)
k_exog = self._handle_input_data(state_names)

k_states = k_exog
k_endog = len(observed_state_names)
Expand All @@ -140,22 +140,8 @@ def __init__(
obs_state_idxs=np.ones(k_states),
)

@staticmethod
def _get_state_names(k_exog: int | None, state_names: list[str] | None, name: str):
if k_exog is None and state_names is None:
raise ValueError("Must specify at least one of k_exog or state_names")
if state_names is not None and k_exog is not None:
if len(state_names) != k_exog:
raise ValueError(f"Expected {k_exog} state names, found {len(state_names)}")
elif k_exog is None:
k_exog = len(state_names)
else:
state_names = [f"{name}_{i + 1}" for i in range(k_exog)]

return k_exog, state_names

def _handle_input_data(self, k_exog: int, state_names: list[str] | None, name) -> int:
k_exog, state_names = self._get_state_names(k_exog, state_names, name)
def _handle_input_data(self, state_names: list[str] | None) -> int:
k_exog = validate_names(state_names, var_name="state_names", optional=False)
self.state_names = state_names

return k_exog
Expand Down
8 changes: 8 additions & 0 deletions pymc_extras/statespace/models/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,3 +670,11 @@ def get_exog_dims_from_idata(exog_name, idata):
exog_dims = None

return exog_dims


def validate_names(names: list[str], var_name: str, optional: bool = True) -> int | None:
if names is None:
if optional:
return None
raise ValueError(f"Must specify {var_name}")
return len(names)
6 changes: 2 additions & 4 deletions tests/statespace/core/test_statespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,8 @@ def exog_ss_mod(exog_data):
level_trend = st.LevelTrendComponent(name="trend", order=1, innovations_order=[0])
exog = st.RegressionComponent(
name="exog", # Name of this exogenous variable component
k_exog=1, # Only one exogenous variable now
innovations=False, # Typically fixed effect (no stochastic evolution)
state_names=exog_data[["x1"]].columns.tolist(),
state_names=exog_data[["x1"]].columns.tolist(), # Only one exogenous variable now
)

combined_model = level_trend + exog
Expand All @@ -198,9 +197,8 @@ def exog_ss_mod_mv(exog_data_mv):
)
exog = st.RegressionComponent(
name="exog", # Name of this exogenous variable component
k_exog=1, # Only one exogenous variable now
innovations=False, # Typically fixed effect (no stochastic evolution)
state_names=exog_data_mv[["x1"]].columns.tolist(),
state_names=exog_data_mv[["x1"]].columns.tolist(), # Only one exogenous variable now
observed_state_names=["y1", "y2"],
)

Expand Down
2 changes: 1 addition & 1 deletion tests/statespace/filters/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def test_lgss_distribution_with_dims(output_name, ss_mod_me, pymc_model_2):
def test_lgss_with_time_varying_inputs(output_name, rng):
X = rng.random(size=(10, 3), dtype=floatX)
ss_mod = structural.LevelTrendComponent() + structural.RegressionComponent(
name="exog", k_exog=3
name="exog", state_names=["exog_0", "exog_1", "exog_2"]
)
mod = ss_mod.build("data", verbose=False)

Expand Down
Loading