bashtage · eirki · Feb 27, 2020 · Feb 27, 2020 · Feb 27, 2020 · Feb 28, 2020
diff --git a/linearmodels/panel/results.py b/linearmodels/panel/results.py
@@ -1,7 +1,8 @@
 from linearmodels.compat.statsmodels import Summary
 
 import datetime as dt
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, Union, Tuple
+import warnings
 
 import numpy as np
 from pandas import DataFrame, Series, concat
@@ -11,7 +12,12 @@
 
 from linearmodels.iv.results import default_txt_fmt, stub_concat, table_concat
 from linearmodels.shared.base import _ModelComparison, _SummaryStr
-from linearmodels.shared.hypotheses import WaldTestStatistic, quadratic_form_test
+from linearmodels.shared.hypotheses import (
+    WaldTestStatistic,
+    InvalidTestStatistic,
+    quadratic_form_test,
+)
+from linearmodels.panel.covariance import ClusteredCovariance, HeteroskedasticCovariance
 from linearmodels.shared.io import _str, pval_format
 from linearmodels.shared.utility import AttrDict
 from linearmodels.typing import NDArray, OptionalArrayLike
@@ -790,6 +796,128 @@ def theta(self) -> DataFrame:
         """Values used in generalized demeaning"""
         return self._theta
 
+    def wu_hausman(
+        self,
+        other: PanelResults,
+        include_constant: bool = False,
+        sigmamore: bool = False,
+        sigmaless: bool = False,
+    ) -> Tuple[Union[InvalidTestStatistic, WaldTestStatistic], DataFrame]:
+        r"""
+        Perform Hausman specification test against other regression results.
+
+        Parameters
+        ----------
+        other : PanelResults
+            Result from panel regression known to be consistent. Typically
+            fixed effects regression.
+        include_constant : bool, optional
+            Flag indicating whether to include the constant term in the comparison.
+        sigmamore : bool, optional
+            Flag indicating whether to base the test on the estimated parameter
+            covariance from the efficient model.
+        sigmaless : bool, optional
+            Flag indicating whether to base the test on the estimated parameter
+            covariance from the consistent model.
+
+        Returns
+        -------
+        WaldTestStatistic
+            Object containing test statistic, p-value, distribution and null
+        DataFrame
+            Overview of coefficients used in the test, and their differences and standard errors
+
+        Notes
+        -----
+        The test is computed by
+        .. math::
+            H=(b_{1}-b_{0})'\big(\operatorname{Var}(b_{0})-\operatorname{Var}(b_{1})\big)^{-1}(b_{1}-b_{0})
+
+            where :math:`b_{1}` is the array of coefficients from the model known to be consistent,
+            and :math:`b_{1}` is the array of coefficients from the model known to be efficient
+            (this model).
+
+        """
+
+        def alt_cov(res: PanelResults, s2: float) -> DataFrame:
+            """
+            Calculate covariance using the supplied error variance. Based on
+            https://github.com/bashtage/linearmodels/blob/4.17/linearmodels/panel/covariance.py#L119
+            """
+            cov_obj = res._deferred_cov.__self__
+            x = cov_obj._x
+            out = s2 * np.linalg.inv(x.T @ x)
+            out = (out + out.T) / 2
+            return DataFrame(
+                out, columns=res.model.exog.vars, index=res.model.exog.vars
+            )
+
+        def matrix_positive_definite(mat: Union[NDArray, DataFrame]) -> bool:
+            """
+            Check if matrix is positive definite.
+            """
+            if np.array_equal(mat, mat.T):
+                try:
+                    np.linalg.cholesky(mat)
+                    return True
+                except np.linalg.LinAlgError:
+                    pass
+            return False
+
+        if sigmamore and sigmaless:
+            raise ValueError("Conflicting test parameters")
+
+        invalid_cov = (ClusteredCovariance, HeteroskedasticCovariance)
+        if any(
+            isinstance(res._deferred_cov.__self__, invalid_cov) for res in (other, self)
+        ):
+            raise TypeError(
+                "Hausman test cannot be used with clustered or robust covariance"
+            )
+
+        common_cols = set(other.params.index) & set(self.params.index)
+        if not include_constant:
+            if other.model.has_constant:
+                common_cols.discard(other.model.exog.vars[other.model._constant_index])
+            if self.model.has_constant:
+                common_cols.discard(self.model.exog.vars[self.model._constant_index])
+
+        b0 = other.params[common_cols]
+        b1 = self.params[common_cols]
+        var0 = (other.cov if not sigmamore else alt_cov(other, s2=self.s2)).loc[
+            common_cols, common_cols
+        ]
+        var1 = (self.cov if not sigmaless else alt_cov(self, s2=other.s2)).loc[
+            common_cols, common_cols
+        ]
+
+        var_diff = var0 - var1
+        b_diff = b0 - b1
+        std_errors = Series(np.sqrt(np.diagonal(var_diff)), index=var0.index)
+        estimates = DataFrame(
+            data={"b0": b0, "b1": b1, "b0-b1": b_diff, "Std. Err.": std_errors}
+        )
+        if not matrix_positive_definite(var_diff):
+            warnings.warn("(Var(b0) - Var(b1) is not positive definite)")
+            inv = np.linalg.inv
+        else:
+            inv = np.linalg.pinv
+        test_stat = b_diff.T @ inv(var_diff) @ b_diff
+
+        test: Union[InvalidTestStatistic, WaldTestStatistic]
+        if test_stat >= 0:
+            test = WaldTestStatistic(
+                test_stat,
+                null="No systematic difference in coefficients between models",
+                df=b0.size,
+                name="Hausman specification test",
+            )
+        else:
+            test = InvalidTestStatistic(
+                "chi2<0. Model does not meet the assumptions of the Hausman test."
+            )
+        return test, estimates
+
 
 PanelModelResults = Union[PanelEffectsResults, PanelResults, RandomEffectsResults]
 

diff --git a/linearmodels/tests/panel/_utility.py b/linearmodels/tests/panel/_utility.py
@@ -236,7 +236,7 @@ def assert_frame_similar(result, expected):
 def access_attributes(result):
     d = dir(result)
     for key in d:
-        if not key.startswith("_") and key not in ("wald_test",):
+        if not key.startswith("_") and key not in ("wald_test", "wu_hausman"):
             val = getattr(result, key)
             if callable(val):
                 val()
diff --git a/linearmodels/tests/panel/test_results.py b/linearmodels/tests/panel/test_results.py
@@ -170,3 +170,40 @@ def test_wald_test(data):
 
     with pytest.raises(ValueError):
         res.wald_test(restriction, np.zeros(2), formula=formula)
+
+
+@pytest.mark.parametrize("constant", (False, True), ids=("", "constant"))
+@pytest.mark.parametrize("sigmamore", (False, True), ids=("", "sigmamore"))
+@pytest.mark.parametrize("sigmaless", (False, True), ids=("", "sigmaless"))
+def test_wu_hausman(recwarn, data, constant, sigmamore, sigmaless):
+    if sigmamore and sigmaless:
+        return
+    data = data.set_index(["nr", "year"])
+    dependent = data["hours"]
+    exog = add_constant(data[["exper", "expersq"]])
+    re_res = RandomEffects(dependent, exog).fit()
+    fe_res = PanelOLS(dependent, exog, entity_effects=True).fit()
+    opts = {
+        "include_constant": constant,
+        "sigmamore": sigmamore,
+        "sigmaless": sigmaless,
+    }
+    wald, estimates = re_res.wu_hausman(other=fe_res, **opts)
+    if constant:
+        warnings = {str(warn.message) for warn in recwarn}
+        assert "(Var(b0) - Var(b1) is not positive definite)" in warnings
+        assert estimates.shape == (3, 4)
+    else:
+        assert estimates.shape == (2, 4)
+    stata_results = {
+        "": (7.190854126884934, 0.0274489582259534),
+        "include_constant": (7.190854126885962, 0.0660570908629669),
+        "sigmaless": (6.953506564342694, 0.0309075965524561),
+        "include_constant-sigmaless": (6.953506564340507, 0.0733945334224529),
+        "sigmamore": (6.945610047252053, 0.0310298689573541),
+        "include_constant-sigmamore": (6.94561004725098, 0.0736517192483979),
+    }
+    test_id = "-".join([key for key, val in opts.items() if val is True])
+    expected_stat, expected_pval = stata_results[test_id]
+    assert wald.stat == pytest.approx(expected_stat, abs=1e-9)
+    assert wald.pval == pytest.approx(expected_pval, abs=1e-9)