Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ print(f"p-value: {p_value:.3f}") # expect uniform random from 0-1

## How it works

### Two sample test

PTED uses the energy distance of the two samples `x` and `y`, this is computed as:

$$d = \frac{2}{n_xn_y}\sum_{i,j}||x_i - y_j|| - \frac{1}{n_x^2}\sum_{i,j}||x_i - x_j|| - \frac{1}{n_y^2}\sum_{i,j}||y_i - y_j||$$
Expand Down Expand Up @@ -105,6 +107,21 @@ grey distribution is just a histogram of many re-runs of this procedure. We
compute a p-value by taking the fraction of the energy distances that are
greater than the current one.

### Coverage test

In the coverage test we have some number of simulations `nsim` where there is a
true value `g` and some posterior samples `s`. For each simulation separately we
use PTED to compute a p-value, essentially asking the question "was `g` drawn
from the distribution that generated `s`?". Individually, these tests are not
especially informative, however their p-values must have been drawn from
`U(0,1)` under the null-hypothesis. Thus we just need a way to combine their
statistical power. It turns out that for some `p ~ U(0,1)` value, we have that
`- 2 ln(p)` is chi2 distributed with `dof = 2`. This means that we can sum the
chi2 values for the PTED test on each simulation and compare with a chi2
distribution with `dof = 2 * nsim`. We use a density based two tailed p-value
test on this chi2 distribution meaning that if your posterior is underconfident
or overconfident, you will get a small p-value that can be used to reject the
null.

## GPU Compatibility

Expand All @@ -113,7 +130,9 @@ PyTorch Tensors on the appropriate device.

## Citation

If you use PTED in your work, please include a citation to the [zenodo record](https://doi.org/10.5281/zenodo.15353928) and also see below for references of the underlying method.
If you use PTED in your work, please include a citation to the [zenodo
record](https://doi.org/10.5281/zenodo.15353928) and also see below for
references of the underlying method.

## Reference

Expand All @@ -132,7 +151,8 @@ I didn't invent this test, I just think its neat. Here is a paper on the subject
}
```

Permutation tests are a whole class of tests, with much literature. Here are some starting points:
Permutation tests are a whole class of tests, with much literature. Here are
some starting points:

```
@book{good2013permutation,
Expand Down
9 changes: 6 additions & 3 deletions src/pted/pted.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from scipy.stats import chi2 as chi2_dist
from torch import Tensor

from .utils import _pted_torch, _pted_numpy, _pted_chunk_torch, _pted_chunk_numpy
from .utils import _pted_torch, _pted_numpy, _pted_chunk_torch, _pted_chunk_numpy, two_tailed_p

__all__ = ["pted", "pted_coverage_test"]

Expand Down Expand Up @@ -234,6 +234,7 @@ def pted_coverage_test(
of permutations. For chunking to be worth it you should have c^2 * I << n^2.
"""
nsamp, nsim, *_ = s.shape
assert nsim > 0, "need some simulations to run test, got 0 simulations"
assert (
g.shape == s.shape[1:]
), f"g and s must have the same shape (past first dim of s), not {g.shape} and {s.shape}"
Expand Down Expand Up @@ -262,7 +263,9 @@ def pted_coverage_test(
return test_stats, permute_stats

# Compute p-values
if nsim == 1:
return np.mean(permute_stats > test_stats[0])
pvals = np.mean(permute_stats > test_stats[:, None], axis=1)
pvals[pvals == 0] = 1.0 / permutations # handle pvals == 0
chi2 = -2 * np.log(pvals)
return 1 - chi2_dist.cdf(np.sum(chi2), 2 * nsim)
chi2 = np.sum(-2 * np.log(pvals))
return two_tailed_p(chi2, 2 * nsim)
2 changes: 1 addition & 1 deletion src/pted/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,6 @@ def test():
assert p < 1e-4, f"p-value {p} is not in the expected range (~0)"
# underconfident
p = pted_coverage_test(g, s_under, permutations=200)
assert p > 0.9999, f"p-value {p} is not in the expected range (~1)"
assert p < 1e-4, f"p-value {p} is not in the expected range (~0)"

print("Tests passed!")
31 changes: 31 additions & 0 deletions src/pted/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import numpy as np
from scipy.spatial.distance import cdist
import torch
from scipy.stats import chi2 as chi2_dist
from scipy.optimize import root_scalar


__all__ = ["_pted_numpy", "_pted_chunk_numpy", "_pted_torch", "_pted_chunk_torch"]

Expand Down Expand Up @@ -172,3 +175,31 @@ def _pted_torch(
dmatrix = dmatrix[I][:, I]
permute_stats.append(_energy_distance_precompute(dmatrix, nx, ny).item())
return test_stat, permute_stats


def two_tailed_p(chi2, df):
assert df > 2, "Degrees of freedom must be greater than 2 for two-tailed p-value calculation."
alpha = chi2_dist.pdf(chi2, df)
mode = df - 2

if np.isclose(chi2, mode):
return 1.0

def root_eq(x):
return chi2_dist.pdf(x, df) - alpha

# Find left root
if chi2 < mode:
left = chi2_dist.cdf(chi2, df)
else:
res_left = root_scalar(root_eq, bracket=[0, mode], method="brentq")
left = chi2_dist.cdf(res_left.root, df)

# Find right root
if chi2 > mode:
right = chi2_dist.sf(chi2, df)
else:
res_right = root_scalar(root_eq, bracket=[mode, 10000 * df], method="brentq")
right = chi2_dist.sf(res_right.root, df)

return left + right
8 changes: 8 additions & 0 deletions tests/test_pted.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,11 @@ def test_pted_chunk_numpy():
y = np.random.uniform(size=(1000, D))
p = pted.pted(x, y, chunk_size=100, chunk_iter=10)
assert p < 1e-4, f"p-value {p} is not in the expected range (~0)"


def test_pted_coverage_edgecase():
# Test with single simulation
g = np.random.normal(size=(1, 10))
s = np.random.normal(size=(100, 1, 10))
p = pted.pted_coverage_test(g, s)
assert p > 1e-4 and p < 0.9999, f"p-value {p} is not in the expected range (~1)"
20 changes: 20 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import numpy as np

from pted.utils import two_tailed_p

import pytest


def test_two_tailed_p():

assert np.isclose(two_tailed_p(4, 6), 1.0), "p-value at mode should be 1.0"

assert two_tailed_p(0.01, 10) < 0.01, "p-value should be less than 0.01 for small chi2"
assert two_tailed_p(100, 10) < 0.01, "p-value should be less than 0.01 for large chi2"
assert two_tailed_p(10, 10) > 0.01, "p-value should be close to 0.5 for chi2 near mode"

assert two_tailed_p(0, 10) < 0.01
assert two_tailed_p(1e-25, 1000) < 0.01

with pytest.raises(AssertionError):
two_tailed_p(4, 2)
Loading