Source code for sherpa.sim.mh

#
#  Copyright (C) 2011, 2016, 2019, 2020, 2021
#  Smithsonian Astrophysical Observatory
#
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#


"""
pyBLoCXS is a sophisticated Markov chain Monte Carlo (MCMC) based algorithm
designed to carry out Bayesian Low-Count X-ray Spectral (BLoCXS) analysis in the
Sherpa environment. The code is a Python extension to Sherpa that explores
parameter space at a suspected minimum using a predefined Sherpa model to
high-energy X-ray spectral data. pyBLoCXS includes a flexible definition of
priors and allows for variations in the calibration information. It can be used
to compute posterior predictive p-values for the likelihood ratio test (see
Protassov et al., 2002, ApJ, 571, 545). Future versions will allow for the
incorporation of calibration uncertainty (Lee et al., 2011, ApJ, 731, 126).

MCMC is a complex computational technique that requires some sophistication on
the part of its users to ensure that it both converges and explores the
posterior distribution properly. The pyBLoCXS code has been tested with a number
of simple single-component spectral models. It should be used with great care in
more complex settings. Readers interested in Bayesian low-count spectral
analysis should consult van Dyk et al. (2001, ApJ, 548, 224). pyBLoCXS is based
on the methods in van Dyk et al. (2001) but employs a different MCMC sampler
than is described in that article. In particular, pyBLoCXS has two sampling
modules. The first uses a Metropolis-Hastings jumping rule that is a
multivariate t-distribution with user specified degrees of freedom centered on
the best spectral fit and with multivariate scale determined by the Sherpa
function, covar(), applied to the best fit. The second module mixes this
Metropolis Hastings jumping rule with a Metropolis jumping rule centered at the
current draw, also sampling according to a t-distribution with user specified
degrees of freedom and multivariate scale determined by a user specified scalar
multiple of covar() applied to the best fit.

A general description of the MCMC techniques we employ along with their
convergence diagnostics can be found in Appendices A.2 - A.4 of van Dyk et
al. (2001) and in more detail in Chapter 11 of Gelman, Carlin, Stern, and Rubin
(Bayesian Data Analysis, 2nd Edition, 2004, Chapman & Hall/CRC).

http://hea-www.harvard.edu/AstroStat/pyBLoCXS/
"""

# The pyBLoCXS code base is cleanly separable from Sherpa!

import inspect
import logging
import math

import numpy as np

logger = logging.getLogger("sherpa")
info = logger.info
debug = logger.debug
error = logger.error

__all__ = ('LimitError', 'MetropolisMH', 'MH', 'Sampler',
           'Walk', 'dmvt', 'dmvnorm')


[docs]class LimitError(Exception):
    pass


class CovarError(Exception):
    pass


def rmvt(mu, sigma, dof):
    """
    Sampling the non-central multivariate Student's t distribution
    using deviates from multivariate normal and chi-squared distributions
    Source: Kshirsagar method taken from function `rmvt` in R package `mvtnorm`.
    http://cran.r-project.org/web/packages/mvtnorm/index.html

    `mu`     the current sample
    `sigma`  covariance matrix
    `dof`    degrees of freedom

    returns a sample from the multivariate t distribution in the shape of `mu`

    """

    if dof < 1:
        raise ValueError("The degrees of freedom must be > 0")

    zero_vec = np.zeros_like(mu)
    q = np.random.chisquare(dof, 1)[0]
    nsample = np.random.multivariate_normal(zero_vec, sigma)
    proposal = mu + nsample / np.sqrt(q / dof)
    return proposal


[docs]def dmvt(x, mu, sigma, dof, log=True, norm=False):
    """

    Probability Density of a multi-variate Student's t distribution
    """

    # if np.min( np.linalg.eigvalsh(sigma))<=0 :
    #    raise ValueError("Error: sigma is not positive definite")
    if np.max(np.abs(sigma-sigma.T)) >= 1e-9:
        raise ValueError("Error: sigma is not symmetric")

    p = mu.size

    # log density unnormalized
    val = (-0.5*np.log(np.linalg.det(sigma)) - (dof+p)/2.0 *
           np.log(dof + np.dot(x - mu,
                               np.dot(np.linalg.inv(sigma), x-mu))))

    # log density normalized
    if norm:
        lgam = math.lgamma
        val += (lgam((dof+p)/2.) - lgam(dof/2.) - (p/2.) *
                np.log(np.pi) + (dof/2.) * np.log(dof))

    # density
    if not log:
        val = np.exp(val)

    return val


[docs]def dmvnorm(x, mu, sigma, log=True):
    """

    Probability Density of a multi-variate Normal distribution
    """

    # if np.min( np.linalg.eigvalsh(sigma))<=0 :
    #    raise ValueError("Error: sigma is not positive definite")
    if np.max(np.abs(sigma-sigma.T)) >= 1e-9:
        raise ValueError("Error: sigma is not symmetric")

    # log density
    logdens = (-mu.size / 2.0 * np.log(2 * np.pi) -
               1/2.0 * np.log(np.linalg.det(sigma)) - 1 / 2.0 *
               np.dot(x - mu, np.dot(np.linalg.inv(sigma), x-mu)))

    if log:
        return logdens

    # density
    dens = np.exp(logdens)
    return dens


# def progress_bar(current, total, tstart, name=None):
#     """simple progress in percent"""

#     if not sys.stdout.isatty():
#         return

#     percent = 0.0
#     if current != 0 and total != 0:
#         percent = current*100./float(total)

#     output = '\r%.1f %%' % percent
#     if name is not None:
#         output = '\r%s: %.1f %%' % (name, percent)

#     sys.stdout.write(output)
#     if current == total:
#         sys.stdout.write(' finished in %s secs \n' % (time.time()-tstart))

#     sys.stdout.flush()


[docs]class Walk():

    def __init__(self, sampler=None, niter=1000):
        self._sampler = sampler
        self.niter = int(niter)

[docs]    def set_sampler(self, sampler):
        self._sampler = sampler

    def __call__(self, **kwargs):

        if self._sampler is None:
            raise AttributeError("sampler object has not been set, " +
                                 "please use set_sampler()")

        pars, stat = self._sampler.init(**kwargs)

        # setup proposal variables
        npars = len(pars)
        niter = self.niter
        nelem = niter+1

        proposals = np.zeros((nelem, npars), dtype=float)
        proposals[0] = pars.copy()

        stats = np.zeros(nelem, dtype=float)
        stats[0] = stat

        acceptflag = np.zeros(nelem, dtype=bool)

        # Iterations
        # - no burn in at present
        # - the 0th element of the params array is the input value
        # - we loop until all parameters are within the allowable
        #   range; should there be some check to ensure we are not
        #   rejecting a huge number of proposals, which would indicate
        #   that the limits need increasing or very low s/n data?
        #

        # tstart = time.time()

        try:
            for ii in range(niter):

                # progress_bar(ii, niter, tstart, self._sampler.__class__.__name__)

                jump = ii+1

                current_params = proposals[ii]
                current_stat = stats[ii]

                # Assume proposal is rejected by default
                proposals[jump] = current_params
                stats[jump] = current_stat
                # acceptflag[jump] = False

                # Draw a proposal

                try:
                    proposed_params = self._sampler.draw(current_params)
                except CovarError:
                    error("Covariance matrix failed! " + str(proposed_params))
                    # automatically reject if the covar is malformed
                    self._sampler.reject()
                    continue

                proposed_params = np.asarray(proposed_params)
                try:
                    proposed_stat = self._sampler.calc_stat(proposed_params)
                except LimitError:
                    # automatically reject the proposal if outside hard limits
                    self._sampler.reject()
                    continue

                # Accept this proposal?
                if self._sampler.accept(current_params, current_stat,
                                        proposed_params, proposed_stat):
                    proposals[jump] = proposed_params
                    stats[jump] = proposed_stat
                    acceptflag[jump] = True

                else:
                    self._sampler.reject()
        finally:
            self._sampler.tear_down()
            # progress_bar(niter, niter, tstart, self._sampler.__class__.__name__)

        params = proposals.transpose()
        return (stats, acceptflag, params)


[docs]class Sampler():

    def __init__(self):

        # get the initial keyword argument defaults;
        # it looks like inspect.getargspec is not being removed
        # in Python 3.6, but use the signature function if it is
        # available to avoid possible warnings.
        try:
            sig = inspect.signature(self.init)
            opts = [(p.name, p.default)
                    for p in sig.parameters.values()
                    if p.kind == p.POSITIONAL_OR_KEYWORD and
                    p.default != p.empty]

        except AttributeError:
            argspec = inspect.getargspec(self.init)
            first = len(argspec[0]) - len(argspec[3])
            opts = zip(argspec[0][first:], argspec[3][0:])

        self._opts = dict(opts)
        self.walk = None

[docs]    def init(self):
        raise NotImplementedError

[docs]    def draw(self, current, **kwargs):
        raise NotImplementedError

[docs]    def accept(self, current, current_stat, proposal, proposal_stat, **kwargs):
        raise NotImplementedError

[docs]    def reject(self):
        raise NotImplementedError

[docs]    def calc_stat(self, proposed_params):
        raise NotImplementedError

[docs]    def tear_down(self):
        raise NotImplementedError


[docs]class MH(Sampler):
    """ The Metropolis Hastings Sampler """

    def __init__(self, fcn, sigma, mu, dof, *args):
        self.fcn = fcn
        self._dof = dof
        self._mu = np.array(mu)
        self._sigma = np.array(sigma)

        self.accept_func = None
        self.currently_metropolis = False
        self.prior = None

        # MH tunable parameters
        self.log = False
        self.inv = False
        self.defaultprior = True
        self.priorshape = False
        self.originalscale = True
        self.scale = 1
        self.prior_funcs = ()
        self.sigma_m = False
        Sampler.__init__(self)

[docs]    def calc_fit_stat(self, proposed_params):
        return self.fcn(proposed_params)

[docs]    def init(self, log=False, inv=False, defaultprior=True, priorshape=False,
             priors=(), originalscale=True, scale=1, sigma_m=False):

        if self._sigma is None or self._mu is None:
            raise AttributeError('sigma or mu is None, initialization failed')

        self.prior = np.ones(self._mu.size)
        self.defaultprior = defaultprior
        self.priorshape = np.array(priorshape)
        self.originalscale = np.array(originalscale)

        self.scale = scale
        self.prior_funcs = priors

        debug(str(self.prior_funcs))

        # if not default prior, prior calculated at each iteration
        if not defaultprior:
            if self.priorshape.size != self._mu.size:
                raise ValueError(
                    "If not using default prior, must specify a " +
                    "function for the prior on each parameter")
            if self.originalscale.size != self._mu.size:
                raise ValueError(
                    "If not using default prior, must specify the " +
                    "scale on which the prior is defined for each parameter")

        self.jacobian = np.zeros(self._mu.size, dtype=bool)
        # jacobian needed if transforming parameter but prior for parameter
        # on original scale
        if not defaultprior:
            # if log transformed but prior on original scale, jacobian
            # for those parameters is needed
            if np.sum(log * self.originalscale) > 0:
                self.jacobian[log * self.originalscale] = True
            if np.sum(inv * self.originalscale) > 0:
                self.jacobian[inv * self.originalscale] = True

        self.log = np.array(log)
        if self.log.size == 1:
            self.log = np.tile(self.log, self._mu.size)

        self.inv = np.array(inv)
        if self.inv.size == 1:
            self.inv = np.tile(self.inv, self._mu.size)

        if np.sum(log * inv) > 0:
            raise TypeError(
                "Cannot specify both log and inv transformation for the same " +
                "parameter")

        debug("Running Metropolis-Hastings")

        current = self._mu.copy()
        stat = self.calc_fit_stat(current)

        # include prior
        stat = self.update(stat, self._mu)

        self.initial_stat = stat

        # using delta method to create proposal distribution on log scale for
        # selected parameters
        if np.sum(self.log) > 0:
            logcovar = self._sigma.copy()
            logcovar[:, self.log] = logcovar[:, self.log] / self._mu[self.log]
            logcovar[self.log] = (logcovar[self.log].T / self._mu[self.log]).T
            self._sigma = np.copy(logcovar)
            self._mu[self.log] = np.log(self._mu[self.log])
            current[self.log] = np.log(current[self.log])

        # using delta method to create proposal distribution on inverse scale
        # for selected parameters
        if np.sum(self.inv) > 0:
            invcovar = self._sigma.copy()
            invcovar[:, self.inv] = invcovar[:, self.inv] / (
                                   -1.0*np.power(self._mu[self.inv], 2))
            invcovar[self.inv] = (invcovar[self.inv].T / (
                                  -1.0*np.power(self._mu[self.inv], 2))).T
            self._sigma = np.copy(invcovar)
            self._mu[self.inv] = 1.0 / self._mu[self.inv]
            current[self.inv] = 1.0 / current[self.inv]

        self.rejections = 0

        self.sigma_m = sigma_m
        if not np.mean(sigma_m):
            self.sigma_m = self._sigma.copy()

        return (current, stat)

[docs]    def update(self, stat, mu, init=True):
        """ include prior """
        if not self.defaultprior:
            x = mu.copy()
            if np.sum(self.originalscale) < mu.size:
                for j in range(mu.size):
                    if self.log[j] * (1 - self.originalscale[j]) > 0:
                        x[j] = np.log(x[j])
                    if self.inv[j] * (1 - self.originalscale[j]) > 0:
                        x[j] = 1.0 / x[j]

            for ii, func in enumerate(self.prior_funcs):
                if self.priorshape[ii]:
                    self.prior[ii] = func(x[ii])

        # If no prior then
        # 0.0 == np.sum(np.log(np.ones(mu.size)))
        stat += np.sum(np.log(self.prior))

        if np.sum(self.log * self.jacobian) > 0:
            stat += np.sum(np.log(mu[self.log * self.jacobian]))
        if np.sum(self.inv * self.jacobian) > 0:
            stat_temp = np.sum(2.0 * np.log(np.abs(mu[self.inv * self.jacobian])))
            if init:
                stat += stat_temp
            else:
                stat -= stat_temp
        return stat

[docs]    def draw(self, current):
        """Create a new set of parameter values using the t distribution.

        Given the best-guess (mu) and current (current) set of
        parameters, along with the covariance matrix (sigma),
        return a new set of parameters.
        """
        proposal = self.mh(current)
        self.accept_func = self.accept_mh
        return proposal

[docs]    def mh(self, current):
        """ MH jumping rule """

        # The current proposal is ignored here.
        # MH jumps from the best-fit parameter values at each iteration
        proposal = rmvt(self._mu, self._sigma, self._dof)
        return proposal

[docs]    def dmvt(self, x, log=True, norm=False):
        return dmvt(x, self._mu, self._sigma, self._dof, log, norm)

[docs]    def accept_mh(self, current, current_stat, proposal, proposal_stat):
        alpha = np.exp(proposal_stat + self.dmvt(current) -
                       current_stat - self.dmvt(proposal))
        return alpha

[docs]    def accept(self, current, current_stat, proposal, proposal_stat, **kwargs):
        """
        Should the proposal be accepted (using the Cash statistic and the
        t distribution)?
        """
        alpha = self.accept_func(current, current_stat, proposal, proposal_stat)
        u = np.random.uniform(0, 1, 1)
        return u <= alpha

[docs]    def reject(self):
        # added for test
        self.rejections += 1

[docs]    def calc_stat(self, proposed_params):

        if np.sum(self.log) > 0:
            proposed_params[self.log] = np.exp(proposed_params[self.log])
        if np.sum(self.inv) > 0:
            proposed_params[self.inv] = 1.0 / proposed_params[self.inv]

        proposed_stat = self.calc_fit_stat(proposed_params)

        # putting parameters back on log scale
        if np.sum(self.log) > 0:
            proposed_params[self.log] = np.log(proposed_params[self.log])
        # putting parameters back on inverse scale
        if np.sum(self.inv) > 0:
            proposed_params[self.inv] = 1.0 / proposed_params[self.inv]

        # include prior
        proposed_stat = self.update(proposed_stat, proposed_params, False)

        return proposed_stat

[docs]    def tear_down(self):
        pass


[docs]class MetropolisMH(MH):
    """ The Metropolis Metropolis-Hastings Sampler """

    def __init__(self, fcn, sigma, mu, dof, *args):
        MH.__init__(self, fcn, sigma, mu, dof, *args)

        # count the p_M
        self.num_mh = 0
        self.num_metropolis = 0

[docs]    def init(self, log=False, inv=False, defaultprior=True, priorshape=False,
             priors=(), originalscale=True, scale=1, sigma_m=False, p_M=.5):

        debug("Running Metropolis with Metropolis-Hastings")

        self.p_M = p_M

        debug("X ~ uniform(0,1) <= %.2f --> Metropolis" % float(p_M))
        debug("X ~ uniform(0,1) >  %.2f --> Metropolis-Hastings" % float(p_M))

        return MH.init(self, log, inv, defaultprior, priorshape, priors,
                       originalscale, scale, sigma_m)

[docs]    def draw(self, current):
        """Create a new set of parameter values using the t distribution.

        Given the best-guess (mu) and current (current) set of
        parameters, along with the covariance matrix (sigma),
        return a new set of parameters.
        """
        u = np.random.uniform(0, 1, 1)
        proposal = None
        if u <= self.p_M:
            proposal = self.metropolis(current)
            self.accept_func = self.accept_metropolis
            self.num_metropolis += 1
        else:
            proposal = self.mh(current)
            self.accept_func = self.accept_mh
            self.num_mh += 1

        return proposal

[docs]    def metropolis(self, current):
        """ Metropolis Jumping Rule """

        # Metropolis with MH jumps from the current accepted parameter
        # proposal at each iteration
        proposal = rmvt(current, self.sigma_m * self.scale, self._dof)
        return proposal

[docs]    def accept_metropolis(self, current, current_stat, proposal, proposal_stat):
        alpha = np.exp(proposal_stat - current_stat)
        return alpha

[docs]    def tear_down(self):
        num = float(self.num_metropolis + self.num_mh)
        if num > 0:
            debug("p_M: %g, Metropolis: %g%%" % (self.p_M, 100 * self.num_metropolis / num))
            debug("p_M: %g, Metropolis-Hastings: %g%%" % (self.p_M, 100 * self.num_mh / num))