diff --git a/README.md b/README.md index 1c26424..253b47d 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,8 @@ print(f"p-value: {p_value:.3f}") # expect uniform random from 0-1 ## How it works +### Two sample test + PTED uses the energy distance of the two samples `x` and `y`, this is computed as: $$d = \frac{2}{n_xn_y}\sum_{i,j}||x_i - y_j|| - \frac{1}{n_x^2}\sum_{i,j}||x_i - x_j|| - \frac{1}{n_y^2}\sum_{i,j}||y_i - y_j||$$ @@ -105,6 +107,21 @@ grey distribution is just a histogram of many re-runs of this procedure. We compute a p-value by taking the fraction of the energy distances that are greater than the current one. +### Coverage test + +In the coverage test we have some number of simulations `nsim` where there is a +true value `g` and some posterior samples `s`. For each simulation separately we +use PTED to compute a p-value, essentially asking the question "was `g` drawn +from the distribution that generated `s`?". Individually, these tests are not +especially informative, however their p-values must have been drawn from +`U(0,1)` under the null-hypothesis. Thus we just need a way to combine their +statistical power. It turns out that for some `p ~ U(0,1)` value, we have that +`- 2 ln(p)` is chi2 distributed with `dof = 2`. This means that we can sum the +chi2 values for the PTED test on each simulation and compare with a chi2 +distribution with `dof = 2 * nsim`. We use a density based two tailed p-value +test on this chi2 distribution meaning that if your posterior is underconfident +or overconfident, you will get a small p-value that can be used to reject the +null. ## GPU Compatibility @@ -113,7 +130,9 @@ PyTorch Tensors on the appropriate device. ## Citation -If you use PTED in your work, please include a citation to the [zenodo record](https://doi.org/10.5281/zenodo.15353928) and also see below for references of the underlying method. +If you use PTED in your work, please include a citation to the [zenodo +record](https://doi.org/10.5281/zenodo.15353928) and also see below for +references of the underlying method. ## Reference @@ -132,7 +151,8 @@ I didn't invent this test, I just think its neat. Here is a paper on the subject } ``` -Permutation tests are a whole class of tests, with much literature. Here are some starting points: +Permutation tests are a whole class of tests, with much literature. Here are +some starting points: ``` @book{good2013permutation, diff --git a/src/pted/pted.py b/src/pted/pted.py index 16d05eb..8a3cd89 100644 --- a/src/pted/pted.py +++ b/src/pted/pted.py @@ -3,7 +3,7 @@ from scipy.stats import chi2 as chi2_dist from torch import Tensor -from .utils import _pted_torch, _pted_numpy, _pted_chunk_torch, _pted_chunk_numpy +from .utils import _pted_torch, _pted_numpy, _pted_chunk_torch, _pted_chunk_numpy, two_tailed_p __all__ = ["pted", "pted_coverage_test"] @@ -234,6 +234,7 @@ def pted_coverage_test( of permutations. For chunking to be worth it you should have c^2 * I << n^2. """ nsamp, nsim, *_ = s.shape + assert nsim > 0, "need some simulations to run test, got 0 simulations" assert ( g.shape == s.shape[1:] ), f"g and s must have the same shape (past first dim of s), not {g.shape} and {s.shape}" @@ -262,7 +263,9 @@ def pted_coverage_test( return test_stats, permute_stats # Compute p-values + if nsim == 1: + return np.mean(permute_stats > test_stats[0]) pvals = np.mean(permute_stats > test_stats[:, None], axis=1) pvals[pvals == 0] = 1.0 / permutations # handle pvals == 0 - chi2 = -2 * np.log(pvals) - return 1 - chi2_dist.cdf(np.sum(chi2), 2 * nsim) + chi2 = np.sum(-2 * np.log(pvals)) + return two_tailed_p(chi2, 2 * nsim) diff --git a/src/pted/tests.py b/src/pted/tests.py index 563c24e..5adeeef 100644 --- a/src/pted/tests.py +++ b/src/pted/tests.py @@ -49,6 +49,6 @@ def test(): assert p < 1e-4, f"p-value {p} is not in the expected range (~0)" # underconfident p = pted_coverage_test(g, s_under, permutations=200) - assert p > 0.9999, f"p-value {p} is not in the expected range (~1)" + assert p < 1e-4, f"p-value {p} is not in the expected range (~0)" print("Tests passed!") diff --git a/src/pted/utils.py b/src/pted/utils.py index 7a31f91..74b848a 100644 --- a/src/pted/utils.py +++ b/src/pted/utils.py @@ -3,6 +3,9 @@ import numpy as np from scipy.spatial.distance import cdist import torch +from scipy.stats import chi2 as chi2_dist +from scipy.optimize import root_scalar + __all__ = ["_pted_numpy", "_pted_chunk_numpy", "_pted_torch", "_pted_chunk_torch"] @@ -172,3 +175,31 @@ def _pted_torch( dmatrix = dmatrix[I][:, I] permute_stats.append(_energy_distance_precompute(dmatrix, nx, ny).item()) return test_stat, permute_stats + + +def two_tailed_p(chi2, df): + assert df > 2, "Degrees of freedom must be greater than 2 for two-tailed p-value calculation." + alpha = chi2_dist.pdf(chi2, df) + mode = df - 2 + + if np.isclose(chi2, mode): + return 1.0 + + def root_eq(x): + return chi2_dist.pdf(x, df) - alpha + + # Find left root + if chi2 < mode: + left = chi2_dist.cdf(chi2, df) + else: + res_left = root_scalar(root_eq, bracket=[0, mode], method="brentq") + left = chi2_dist.cdf(res_left.root, df) + + # Find right root + if chi2 > mode: + right = chi2_dist.sf(chi2, df) + else: + res_right = root_scalar(root_eq, bracket=[mode, 10000 * df], method="brentq") + right = chi2_dist.sf(res_right.root, df) + + return left + right diff --git a/tests/test_pted.py b/tests/test_pted.py index 01ac54d..8ac8bf8 100644 --- a/tests/test_pted.py +++ b/tests/test_pted.py @@ -87,3 +87,11 @@ def test_pted_chunk_numpy(): y = np.random.uniform(size=(1000, D)) p = pted.pted(x, y, chunk_size=100, chunk_iter=10) assert p < 1e-4, f"p-value {p} is not in the expected range (~0)" + + +def test_pted_coverage_edgecase(): + # Test with single simulation + g = np.random.normal(size=(1, 10)) + s = np.random.normal(size=(100, 1, 10)) + p = pted.pted_coverage_test(g, s) + assert p > 1e-4 and p < 0.9999, f"p-value {p} is not in the expected range (~1)" diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..378648a --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,20 @@ +import numpy as np + +from pted.utils import two_tailed_p + +import pytest + + +def test_two_tailed_p(): + + assert np.isclose(two_tailed_p(4, 6), 1.0), "p-value at mode should be 1.0" + + assert two_tailed_p(0.01, 10) < 0.01, "p-value should be less than 0.01 for small chi2" + assert two_tailed_p(100, 10) < 0.01, "p-value should be less than 0.01 for large chi2" + assert two_tailed_p(10, 10) > 0.01, "p-value should be close to 0.5 for chi2 near mode" + + assert two_tailed_p(0, 10) < 0.01 + assert two_tailed_p(1e-25, 1000) < 0.01 + + with pytest.raises(AssertionError): + two_tailed_p(4, 2)