Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
42bffb6
adding .gitignore
Richard-Sti Apr 4, 2022
9334786
Add names to samplers
Richard-Sti Apr 4, 2022
d32c89b
Add thinning of a sampler
Richard-Sti Apr 4, 2022
643bb9b
Undo adding imports
Richard-Sti Apr 5, 2022
88150d6
Create diagnostics part of epsie
Richard-Sti Apr 5, 2022
e60f15e
Add PT thinning and comments
Richard-Sti Apr 5, 2022
e09ffd6
Add an arbitrary PT acl functoin support
Richard-Sti Apr 5, 2022
b04c9eb
Change loop from enum to range
Richard-Sti Apr 5, 2022
3f4cfdc
pep8
Richard-Sti Apr 5, 2022
a2973dd
add a blank line
Richard-Sti Apr 5, 2022
6866f1d
Stop accidenatlly ignoring tests..
Richard-Sti Apr 5, 2022
ec5e51c
Add MH sampler thinning test
Richard-Sti Apr 5, 2022
869d57d
If ACL < 1 set it to 1
Richard-Sti Apr 5, 2022
452205c
Add PT sampler thinning test
Richard-Sti Apr 5, 2022
3beeda5
pep8 correction
Richard-Sti Apr 5, 2022
102f348
more pep8
Richard-Sti Apr 5, 2022
9a46c70
add GR test import
Richard-Sti Apr 5, 2022
9103849
Add basic GR test for MH sampler
Richard-Sti Apr 5, 2022
722364a
Simplify the GH calculation
Richard-Sti Apr 5, 2022
34901f8
Add documentation
Richard-Sti Apr 5, 2022
31ad13d
pep8 issues
Richard-Sti Apr 5, 2022
335d0aa
pep8 again
Richard-Sti Apr 5, 2022
33486cb
Test the GH diagnostic
Richard-Sti Apr 5, 2022
0d12d72
typo in documentation
Richard-Sti Apr 11, 2022
20e3fa1
ACL cold calculation
Richard-Sti Apr 11, 2022
1395303
Thin MH logl and logp
Richard-Sti Apr 11, 2022
d713077
add thinning of MH including blobs
Richard-Sti Apr 11, 2022
c16426e
PT sampler thinning probs and blobs
Richard-Sti Apr 11, 2022
601d460
fix tuple to list addition
Richard-Sti Apr 11, 2022
ab50e36
Extend GR to PT sampler
Richard-Sti Apr 11, 2022
fa97fbd
Attempt at fixing GR
Richard-Sti Apr 11, 2022
329d008
Remove name attribute
Richard-Sti Apr 12, 2022
54671a0
Sort by sampler instance
Richard-Sti Apr 12, 2022
0cfe09d
fix `c` assignment bug
Richard-Sti Apr 12, 2022
3648003
Change temp_acl_method to either coldest or max
Richard-Sti Apr 12, 2022
eeeab31
Update tests to take `temp_acls_method`
Richard-Sti Apr 12, 2022
524da5e
Rename name attribute
Richard-Sti Apr 12, 2022
502c952
Simplify mh and pt thinning
Richard-Sti Apr 12, 2022
e4ade3d
Remove testing summarised GR
Richard-Sti Apr 12, 2022
700e031
Reformulated GR
Richard-Sti Apr 12, 2022
ad1a894
pep8
Richard-Sti Apr 12, 2022
e18c8cf
Calculate GR for each temperature level independently
Richard-Sti Apr 13, 2022
fe2c225
in test_diagnostic check Rs dimensions
Richard-Sti Apr 13, 2022
eb1eef3
edit docs
Richard-Sti Apr 22, 2022
9758722
Increare iter num and rm "full" argument
Richard-Sti Apr 22, 2022
bf0428e
Add a check for number of temp levels
Richard-Sti Apr 22, 2022
8e3c8e3
rm whitespace
Richard-Sti Apr 22, 2022
51336d4
increase num indep chains
Richard-Sti Apr 22, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
scratch_*
*ipynb_checkpoints
*.DS_Store
*__pycache__
build/*
epsie.egg-info/*
build/
epsie.egg-info/
.DS_Store
reinstall_epsie.sh
*.pyc
scratch_*
17 changes: 17 additions & 0 deletions epsie/diagnostic/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright (C) 2019 Collin Capano
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

from .acl import acl_1d, acl_chain, thinned_samples
from .convergence import gelman_rubin_test
222 changes: 222 additions & 0 deletions epsie/diagnostic/acl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
# Copyright (C) 2022 Richard Stiskalek, Collin Capano
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

"""Autocorrelation time calculation."""
import numpy

from epsie.samplers import MetropolisHastingsSampler, ParallelTemperedSampler


def acl_1d(x, c=5.0):
"""
Calculate the autocorrelation length (ACL) of some series.

Algorithm used is from:
N. Madras and A.D. Sokal, J. Stat. Phys. 50, 109 (1988).

Implementation from:
https://emcee.readthedocs.io/en/stable/tutorials/autocorr/#autocorr

Arguments
---------
x : 1-dimensional array
Samples whose autocorrelation length is to be calculated.
c : float, optional
ACL hyperparameter. As per Sokal by default set to 5.

Returns
-------
ACL: int
Autocorrelation length.
"""
if x.ndim != 1:
raise TypeError("``x`` not a 1D array. Currently {}".format(x.ndim))
# n is the next power of 2 for length of x
n = (2**numpy.ceil(numpy.log2(x.size))).astype(int)
# Compute the FFT and then (from that) the auto-correlation function
f = numpy.fft.fft(x - numpy.mean(x), n=2 * n)
acf = numpy.fft.ifft(f * numpy.conjugate(f))[: len(x)].real / (4 * n)

acf /= acf[0]

taus = 2.0 * numpy.cumsum(acf) - 1.0
window = auto_window(taus, c)
acl = numpy.ceil(taus[window]).astype(int)
if acl < 1:
acl = 1
return acl


def auto_window(taus, c):
"""
ACL automated windowing procedure following Sokal (1989).

Implementation from:
https://emcee.readthedocs.io/en/stable/tutorials/autocorr/#autocorr
"""
m = numpy.arange(len(taus)) < c * taus
if numpy.any(m):
return numpy.argmin(m)
return len(taus) - 1


def acl_chain(chain, burnin_iter=0, c=5.0, full=False):
"""
Calculate the ACL of :py:class:`epsie.chain.Chain` chain.

Arguments
---------
chain : :py:class:`epsie.chain.Chain`
Epsie chain.
burnin_iter : int, optional
Number of burnin iterations to be thrown away.
c : float, optional
ACL calculation hyperparameter. By default 5.
full : bool, optionall
Whether to return the ACL of every parameter. By default False, thus
returning only the maximum ACL.

Returns
-------
ACL : int or array of integers
Chain's ACL(s).
"""
acls = [acl_1d(chain.positions[p][burnin_iter:]) for p in chain.parameters]
if full:
return acls
return max(acls)


def thinned_samples(sampler, burnin_iter=0, c=5.0, temp_acls_method="coldest"):
"""
Parse a sampler and return its samples thinned by the ACL.

Arguments
---------
sampler : {:py:class:`epsie.sampler.MetropolisHastingsSampler`,
:py:class:`epsie.sampler.ParallelTemperedSampler`}
Epsie sampler whose samples to extract.
burnin_iter : int, optional
Number of burnin iterations to be thrown away.
c : float, optional
ACL calculation hyperparameter. By default 5.
temp_acls_method : string, optional
Tempered acls selection, either `coldest` or `max`. The former
corresponds to taking the ACL of the coldest chain, while the latter to
the maximum ACL across all temperatures. This single ACL is then used
to thin all temperatures.

Returns
-------
thinned samples : structured array
Array whose `dtype.names` are the sampler parameters. Each parameter's
data is 1-dimensional for the MH sampler and 2-dimensional for the PT
sampler. In the latter case the first and second axis represent the
temperature and sample index.
"""
if isinstance(sampler, MetropolisHastingsSampler):
return _thinned_mh_samples(sampler, burnin_iter, c)
elif isinstance(sampler, ParallelTemperedSampler):
return _thinned_pt_samples(sampler, burnin_iter, c, temp_acls_method)
else:
raise ValueError("Unknown sampler type ``{}``".format(type(sampler)))


def _get_raw_data(sampler, burnin_iter):
raw = {}
raw.update({p: sampler.positions[..., burnin_iter:][p]
for p in sampler.parameters})
raw.update({p: sampler.stats[..., burnin_iter:][p]
for p in ("logl", "logp")})
if sampler.blobs is not None:
raw.update({p: sampler.blobs[..., burnin_iter:][p]
for p in sampler.blobs.dtype.names})
return raw


def _thinned_mh_samples(sampler, burnin_iter=0, c=5.0):
# Calculate the ACL for each chain
acls = [acl_chain(chain, burnin_iter, c) for chain in sampler.chains]

keys = sampler.parameters + ("logl", "logp")
keys += sampler.blobs.dtype.names if sampler.blobs is not None else ()
raw = _get_raw_data(sampler, burnin_iter)

# Cycle over the chains and thin them
_thinned = {p: [] for p in keys}
for ii in range(sampler.nchains):
for key in keys:
_thinned[key].append(raw[key][ii, :][::-1][::acls[ii]][::-1])

# Put the thinned samples into a structured array
dtype = sampler.positions.dtype.descr + sampler.stats.dtype.descr
if sampler.blobs is not None:
dtype += sampler.blobs.dtype.descr
thinned = numpy.zeros(sum(x.size for x in _thinned[keys[0]]),
dtype=numpy.dtype(dtype))
for p in keys:
thinned[p] = numpy.concatenate(_thinned[p])

return thinned


def _thinned_pt_samples(sampler, burnin_iter=0, c=5.0,
temp_acls_method="coldest"):
allowed_methods = ["coldest", "max"]
if temp_acls_method not in allowed_methods:
raise ValueError("`Invalid `temp_acls_method`. Allowed methods: `{}`"
.format(allowed_methods))
# Calculate the ACL across temperatures for each chain
temp_acls = numpy.zeros((sampler.nchains, sampler.ntemps), dtype=int)
for ii in range(sampler.nchains):
for jj in range(sampler.ntemps):
temp_acls[ii, jj] = acl_chain(
sampler.chains[ii].chains[jj], burnin_iter, c=c)
# By default we only take the coldest chain. No need to calculate
# the hotter chains then.
if temp_acls_method == "coldest":
break
# Grab the ACL for each chain. By default ACL of the coldest chain
if temp_acls_method == "coldest":
acls = temp_acls[:, 0]
else:
acls = numpy.max(temp_acls, axis=1)

keys = sampler.parameters + ("logl", "logp")
keys += sampler.blobs.dtype.names if sampler.blobs is not None else ()
raw = _get_raw_data(sampler, burnin_iter)

thinned = {p: [] for p in keys}
# Cycle over the chains, thinning them
for tk in range(sampler.ntemps):
_thinned = {p: [] for p in keys}

for ii in range(sampler.nchains):
for key in keys:
_thinned[key].append(
raw[key][tk, ii, :][::-1][::acls[ii]][::-1])

for param in keys:
thinned[param].append(numpy.concatenate(_thinned[param]))
# Cast the thinned samples from a list of arrays into a 2D array for each
# parameter and return as a structured array
dtype = sampler.positions.dtype.descr + sampler.stats.dtype.descr
if sampler.blobs is not None:
dtype += sampler.blobs.dtype.descr
thinned_samples = numpy.zeros((sampler.ntemps, thinned[keys[0]][0].size),
dtype=numpy.dtype(dtype))
for param in keys:
thinned_samples[param] = numpy.asarray(thinned[param])
return thinned_samples
82 changes: 82 additions & 0 deletions epsie/diagnostic/convergence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright (C) 2022 Richard Stiskalek, Collin Capano
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

"""MCMC convergence tests."""

import numpy
from epsie.samplers import MetropolisHastingsSampler, ParallelTemperedSampler


def gelman_rubin_test(sampler, burnin_iter):
"""
Calculate the Gelman-Rubin (GR) convergence test outlined in [1] and
summarised in [2] for parallel, independent MCMC chains.

For a PT sampler the GR statistic is calculated for each temperature level.

Typically values of less than 1.1 or 1.2 are recommended.

Arguments
---------
sampler : {:py:class:`epsie.sampler.MetropolisHastingsSampler`,
:py:class:`epsie.sampler.ParallelTemperedSampler`}
Epsie sampler whose samples to extract.
burnin_iter : int, optional
Number of burnin iterations to be thrown away.

Returns
-------
Rs : array
GR statistic of each parameter. In case of a MH sampler 1-dimensional
array of length `len(sampler.paramaters)` and in case of a PT sampler
2-dimensional array of shape `len(sampler.ntemps, len(sampler.params)`.

References
----------
.. [1] Gelman, A. and Rubin, D.B. (1992). "Inference from Iterative
Simulation using Multiple Sequences". Statistical Science, 7,
p. 457–511.
.. [2] https://mc-stan.org/docs/2_18/reference-manual/
notation-for-samples-chains-and-draws.html
"""
params = sampler.parameters
# Cut off burnin iterations and for PT take coldest chains
samples = sampler.positions[..., burnin_iter:]
if isinstance(sampler, MetropolisHastingsSampler):
return _gelman_rubin_at_temp(samples, params)
elif isinstance(sampler, ParallelTemperedSampler):
Rs = numpy.zeros((sampler.ntemps, len(params)))
for tk in range(sampler.ntemps):
Rs[tk, :] = _gelman_rubin_at_temp(samples[tk, ...], params)
return Rs
else:
raise ValueError("Unknown sampler type ``{}``".format(type(sampler)))


def _gelman_rubin_at_temp(samples, params):
"""Calculate the Gelman Rubin statistic at a given temperature level."""
Nchains, N = samples.shape
if Nchains == 1:
raise TypeError("GL statistic requires > 1 independent chains.")
# Calculate the GH statistic for each parameter independently
Rs = numpy.zeros(len(params))
for i, param in enumerate(params):
# Between chains variance
B = N * numpy.var(numpy.mean(samples[param], axis=1), ddof=1)
# Within chains variance
W = numpy.mean(numpy.var(samples[param], axis=1, ddof=1))
Rs[i] = ((N - 1) / N + 1 / N * B / W)**0.5

return Rs
1 change: 1 addition & 0 deletions epsie/samplers/ptsampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class ParallelTemperedSampler(BaseSampler):
Specify a process pool to use for parallelization. Default is to use a
single core.
"""

def __init__(self, parameters, model, nchains, betas, swap_interval=1,
proposals=None, adaptive_annealer=None,
reset_after_swap=False, default_proposal=None,
Expand Down
Loading