diff --git a/README.md b/README.md
index 1c26424..253b47d 100644
--- a/README.md
+++ b/README.md
@@ -62,6 +62,8 @@ print(f"p-value: {p_value:.3f}") # expect uniform random from 0-1
 
 ## How it works
 
+### Two sample test
+
 PTED uses the energy distance of the two samples `x` and `y`, this is computed as:
 
 $$d = \frac{2}{n_xn_y}\sum_{i,j}||x_i - y_j|| - \frac{1}{n_x^2}\sum_{i,j}||x_i - x_j|| - \frac{1}{n_y^2}\sum_{i,j}||y_i - y_j||$$
@@ -105,6 +107,21 @@ grey distribution is just a histogram of many re-runs of this procedure. We
 compute a p-value by taking the fraction of the energy distances that are
 greater than the current one.
 
+### Coverage test
+
+In the coverage test we have some number of simulations `nsim` where there is a
+true value `g` and some posterior samples `s`. For each simulation separately we
+use PTED to compute a p-value, essentially asking the question "was `g` drawn
+from the distribution that generated `s`?". Individually, these tests are not
+especially informative, however their p-values must have been drawn from
+`U(0,1)` under the null-hypothesis. Thus we just need a way to combine their
+statistical power. It turns out that for some `p ~ U(0,1)` value, we have that
+`- 2 ln(p)` is chi2 distributed with `dof = 2`. This means that we can sum the
+chi2 values for the PTED test on each simulation and compare with a chi2
+distribution with `dof = 2 * nsim`. We use a density based two tailed p-value
+test on this chi2 distribution meaning that if your posterior is underconfident
+or overconfident, you will get a small p-value that can be used to reject the
+null.
 
 ## GPU Compatibility
 
@@ -113,7 +130,9 @@ PyTorch Tensors on the appropriate device.
 
 ## Citation
 
-If you use PTED in your work, please include a citation to the [zenodo record](https://doi.org/10.5281/zenodo.15353928) and also see below for references of the underlying method.
+If you use PTED in your work, please include a citation to the [zenodo
+record](https://doi.org/10.5281/zenodo.15353928) and also see below for
+references of the underlying method.
 
 ## Reference
 
@@ -132,7 +151,8 @@ I didn't invent this test, I just think its neat. Here is a paper on the subject
 }
 ```
 
-Permutation tests are a whole class of tests, with much literature. Here are some starting points:
+Permutation tests are a whole class of tests, with much literature. Here are
+some starting points:
 
 ```
 @book{good2013permutation,
diff --git a/src/pted/pted.py b/src/pted/pted.py
index 16d05eb..8a3cd89 100644
--- a/src/pted/pted.py
+++ b/src/pted/pted.py
@@ -3,7 +3,7 @@
 from scipy.stats import chi2 as chi2_dist
 from torch import Tensor
 
-from .utils import _pted_torch, _pted_numpy, _pted_chunk_torch, _pted_chunk_numpy
+from .utils import _pted_torch, _pted_numpy, _pted_chunk_torch, _pted_chunk_numpy, two_tailed_p
 
 __all__ = ["pted", "pted_coverage_test"]
 
@@ -234,6 +234,7 @@ def pted_coverage_test(
         of permutations. For chunking to be worth it you should have c^2 * I << n^2.
     """
     nsamp, nsim, *_ = s.shape
+    assert nsim > 0, "need some simulations to run test, got 0 simulations"
     assert (
         g.shape == s.shape[1:]
     ), f"g and s must have the same shape (past first dim of s), not {g.shape} and {s.shape}"
@@ -262,7 +263,9 @@ def pted_coverage_test(
         return test_stats, permute_stats
 
     # Compute p-values
+    if nsim == 1:
+        return np.mean(permute_stats > test_stats[0])
     pvals = np.mean(permute_stats > test_stats[:, None], axis=1)
     pvals[pvals == 0] = 1.0 / permutations  # handle pvals == 0
-    chi2 = -2 * np.log(pvals)
-    return 1 - chi2_dist.cdf(np.sum(chi2), 2 * nsim)
+    chi2 = np.sum(-2 * np.log(pvals))
+    return two_tailed_p(chi2, 2 * nsim)
diff --git a/src/pted/tests.py b/src/pted/tests.py
index 563c24e..5adeeef 100644
--- a/src/pted/tests.py
+++ b/src/pted/tests.py
@@ -49,6 +49,6 @@ def test():
     assert p < 1e-4, f"p-value {p} is not in the expected range (~0)"
     # underconfident
     p = pted_coverage_test(g, s_under, permutations=200)
-    assert p > 0.9999, f"p-value {p} is not in the expected range (~1)"
+    assert p < 1e-4, f"p-value {p} is not in the expected range (~0)"
 
     print("Tests passed!")
diff --git a/src/pted/utils.py b/src/pted/utils.py
index 7a31f91..74b848a 100644
--- a/src/pted/utils.py
+++ b/src/pted/utils.py
@@ -3,6 +3,9 @@
 import numpy as np
 from scipy.spatial.distance import cdist
 import torch
+from scipy.stats import chi2 as chi2_dist
+from scipy.optimize import root_scalar
+
 
 __all__ = ["_pted_numpy", "_pted_chunk_numpy", "_pted_torch", "_pted_chunk_torch"]
 
@@ -172,3 +175,31 @@ def _pted_torch(
         dmatrix = dmatrix[I][:, I]
         permute_stats.append(_energy_distance_precompute(dmatrix, nx, ny).item())
     return test_stat, permute_stats
+
+
+def two_tailed_p(chi2, df):
+    assert df > 2, "Degrees of freedom must be greater than 2 for two-tailed p-value calculation."
+    alpha = chi2_dist.pdf(chi2, df)
+    mode = df - 2
+
+    if np.isclose(chi2, mode):
+        return 1.0
+
+    def root_eq(x):
+        return chi2_dist.pdf(x, df) - alpha
+
+    # Find left root
+    if chi2 < mode:
+        left = chi2_dist.cdf(chi2, df)
+    else:
+        res_left = root_scalar(root_eq, bracket=[0, mode], method="brentq")
+        left = chi2_dist.cdf(res_left.root, df)
+
+    # Find right root
+    if chi2 > mode:
+        right = chi2_dist.sf(chi2, df)
+    else:
+        res_right = root_scalar(root_eq, bracket=[mode, 10000 * df], method="brentq")
+        right = chi2_dist.sf(res_right.root, df)
+
+    return left + right
diff --git a/tests/test_pted.py b/tests/test_pted.py
index 01ac54d..8ac8bf8 100644
--- a/tests/test_pted.py
+++ b/tests/test_pted.py
@@ -87,3 +87,11 @@ def test_pted_chunk_numpy():
     y = np.random.uniform(size=(1000, D))
     p = pted.pted(x, y, chunk_size=100, chunk_iter=10)
     assert p < 1e-4, f"p-value {p} is not in the expected range (~0)"
+
+
+def test_pted_coverage_edgecase():
+    # Test with single simulation
+    g = np.random.normal(size=(1, 10))
+    s = np.random.normal(size=(100, 1, 10))
+    p = pted.pted_coverage_test(g, s)
+    assert p > 1e-4 and p < 0.9999, f"p-value {p} is not in the expected range (~1)"
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000..378648a
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,20 @@
+import numpy as np
+
+from pted.utils import two_tailed_p
+
+import pytest
+
+
+def test_two_tailed_p():
+
+    assert np.isclose(two_tailed_p(4, 6), 1.0), "p-value at mode should be 1.0"
+
+    assert two_tailed_p(0.01, 10) < 0.01, "p-value should be less than 0.01 for small chi2"
+    assert two_tailed_p(100, 10) < 0.01, "p-value should be less than 0.01 for large chi2"
+    assert two_tailed_p(10, 10) > 0.01, "p-value should be close to 0.5 for chi2 near mode"
+
+    assert two_tailed_p(0, 10) < 0.01
+    assert two_tailed_p(1e-25, 1000) < 0.01
+
+    with pytest.raises(AssertionError):
+        two_tailed_p(4, 2)