Skip to content

ENH: Add Hausman specification test #249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
132 changes: 130 additions & 2 deletions linearmodels/panel/results.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from linearmodels.compat.statsmodels import Summary

import datetime as dt
from typing import Dict, List, Optional, Union
from typing import Dict, List, Optional, Union, Tuple
import warnings

import numpy as np
from pandas import DataFrame, Series, concat
Expand All @@ -11,7 +12,12 @@

from linearmodels.iv.results import default_txt_fmt, stub_concat, table_concat
from linearmodels.shared.base import _ModelComparison, _SummaryStr
from linearmodels.shared.hypotheses import WaldTestStatistic, quadratic_form_test
from linearmodels.shared.hypotheses import (
WaldTestStatistic,
InvalidTestStatistic,
quadratic_form_test,
)
from linearmodels.panel.covariance import ClusteredCovariance, HeteroskedasticCovariance
from linearmodels.shared.io import _str, pval_format
from linearmodels.shared.utility import AttrDict
from linearmodels.typing import NDArray, OptionalArrayLike
Expand Down Expand Up @@ -790,6 +796,128 @@ def theta(self) -> DataFrame:
"""Values used in generalized demeaning"""
return self._theta

def wu_hausman(
self,
other: PanelResults,
include_constant: bool = False,
sigmamore: bool = False,
sigmaless: bool = False,
) -> Tuple[Union[InvalidTestStatistic, WaldTestStatistic], DataFrame]:
r"""
Perform Hausman specification test against other regression results.

Parameters
----------
other : PanelResults
Result from panel regression known to be consistent. Typically
fixed effects regression.
include_constant : bool, optional
Flag indicating whether to include the constant term in the comparison.
sigmamore : bool, optional
Flag indicating whether to base the test on the estimated parameter
covariance from the efficient model.
sigmaless : bool, optional
Flag indicating whether to base the test on the estimated parameter
covariance from the consistent model.

Returns
-------
WaldTestStatistic
Object containing test statistic, p-value, distribution and null
DataFrame
Overview of coefficients used in the test, and their differences and standard errors

Notes
-----
The test is computed by
.. math::
H=(b_{1}-b_{0})'\big(\operatorname{Var}(b_{0})-\operatorname{Var}(b_{1})\big)^{-1}(b_{1}-b_{0})

where :math:`b_{1}` is the array of coefficients from the model known to be consistent,
and :math:`b_{1}` is the array of coefficients from the model known to be efficient
(this model).

"""

def alt_cov(res: PanelResults, s2: float) -> DataFrame:
"""
Calculate covariance using the supplied error variance. Based on
https://github.com/bashtage/linearmodels/blob/4.17/linearmodels/panel/covariance.py#L119
"""
cov_obj = res._deferred_cov.__self__
x = cov_obj._x
out = s2 * np.linalg.inv(x.T @ x)
out = (out + out.T) / 2
return DataFrame(
out, columns=res.model.exog.vars, index=res.model.exog.vars
)

def matrix_positive_definite(mat: Union[NDArray, DataFrame]) -> bool:
"""
Check if matrix is positive definite.
"""
if np.array_equal(mat, mat.T):
try:
np.linalg.cholesky(mat)
return True
except np.linalg.LinAlgError:
pass
return False

if sigmamore and sigmaless:
raise ValueError("Conflicting test parameters")

invalid_cov = (ClusteredCovariance, HeteroskedasticCovariance)
if any(
isinstance(res._deferred_cov.__self__, invalid_cov) for res in (other, self)
):
raise TypeError(
"Hausman test cannot be used with clustered or robust covariance"
)

common_cols = set(other.params.index) & set(self.params.index)
if not include_constant:
if other.model.has_constant:
common_cols.discard(other.model.exog.vars[other.model._constant_index])
if self.model.has_constant:
common_cols.discard(self.model.exog.vars[self.model._constant_index])

b0 = other.params[common_cols]
b1 = self.params[common_cols]
var0 = (other.cov if not sigmamore else alt_cov(other, s2=self.s2)).loc[
common_cols, common_cols
]
var1 = (self.cov if not sigmaless else alt_cov(self, s2=other.s2)).loc[
common_cols, common_cols
]

var_diff = var0 - var1
b_diff = b0 - b1
std_errors = Series(np.sqrt(np.diagonal(var_diff)), index=var0.index)
estimates = DataFrame(
data={"b0": b0, "b1": b1, "b0-b1": b_diff, "Std. Err.": std_errors}
)
if not matrix_positive_definite(var_diff):
warnings.warn("(Var(b0) - Var(b1) is not positive definite)")
inv = np.linalg.inv
else:
inv = np.linalg.pinv
test_stat = b_diff.T @ inv(var_diff) @ b_diff

test: Union[InvalidTestStatistic, WaldTestStatistic]
if test_stat >= 0:
test = WaldTestStatistic(
test_stat,
null="No systematic difference in coefficients between models",
df=b0.size,
name="Hausman specification test",
)
else:
test = InvalidTestStatistic(
"chi2<0. Model does not meet the assumptions of the Hausman test."
)
return test, estimates


PanelModelResults = Union[PanelEffectsResults, PanelResults, RandomEffectsResults]

Expand Down
2 changes: 1 addition & 1 deletion linearmodels/tests/panel/_utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def assert_frame_similar(result, expected):
def access_attributes(result):
d = dir(result)
for key in d:
if not key.startswith("_") and key not in ("wald_test",):
if not key.startswith("_") and key not in ("wald_test", "wu_hausman"):
val = getattr(result, key)
if callable(val):
val()
37 changes: 37 additions & 0 deletions linearmodels/tests/panel/test_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,40 @@ def test_wald_test(data):

with pytest.raises(ValueError):
res.wald_test(restriction, np.zeros(2), formula=formula)


@pytest.mark.parametrize("constant", (False, True), ids=("", "constant"))
@pytest.mark.parametrize("sigmamore", (False, True), ids=("", "sigmamore"))
@pytest.mark.parametrize("sigmaless", (False, True), ids=("", "sigmaless"))
def test_wu_hausman(recwarn, data, constant, sigmamore, sigmaless):
if sigmamore and sigmaless:
return
data = data.set_index(["nr", "year"])
dependent = data["hours"]
exog = add_constant(data[["exper", "expersq"]])
re_res = RandomEffects(dependent, exog).fit()
fe_res = PanelOLS(dependent, exog, entity_effects=True).fit()
opts = {
"include_constant": constant,
"sigmamore": sigmamore,
"sigmaless": sigmaless,
}
wald, estimates = re_res.wu_hausman(other=fe_res, **opts)
if constant:
warnings = {str(warn.message) for warn in recwarn}
assert "(Var(b0) - Var(b1) is not positive definite)" in warnings
assert estimates.shape == (3, 4)
else:
assert estimates.shape == (2, 4)
stata_results = {
"": (7.190854126884934, 0.0274489582259534),
"include_constant": (7.190854126885962, 0.0660570908629669),
"sigmaless": (6.953506564342694, 0.0309075965524561),
"include_constant-sigmaless": (6.953506564340507, 0.0733945334224529),
"sigmamore": (6.945610047252053, 0.0310298689573541),
"include_constant-sigmamore": (6.94561004725098, 0.0736517192483979),
}
test_id = "-".join([key for key, val in opts.items() if val is True])
expected_stat, expected_pval = stata_results[test_id]
assert wald.stat == pytest.approx(expected_stat, abs=1e-9)
assert wald.pval == pytest.approx(expected_pval, abs=1e-9)