Source code for sherpa.sim.sample

#
#  Copyright (C) 2011, 2015, 2016  Smithsonian Astrophysical Observatory
#
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#

from six.moves import zip as izip
import numpy
import numpy.random

from sherpa.estmethods import Covariance, Confidence
from sherpa.utils.err import EstErr
from sherpa.utils import parallel_map, NoNewAttributesAfterInit


import logging
warning = logging.getLogger("sherpa").warning


__all__ = ['multivariate_t', 'multivariate_cauchy',
           'normal_sample', 'uniform_sample', 't_sample',
           'ParameterScaleVector', 'ParameterScaleMatrix',
           'UniformParameterSampleFromScaleVector',
           'NormalParameterSampleFromScaleVector',
           'NormalParameterSampleFromScaleMatrix',
           'StudentTParameterSampleFromScaleMatrix',
           'NormalSampleFromScaleMatrix', 'NormalSampleFromScaleVector',
           'UniformSampleFromScaleVector', 'StudentTSampleFromScaleMatrix',
           ]


[docs]def multivariate_t(mean, cov, df, size=None):
    """
    multivariate_t(mean, cov, df[, size])

    Draw random deviates from a multivariate Student's T distribution Such a
    distribution is specified by its mean covariance matrix, and degrees of
    freedom.  These parameters are analogous to the mean (average or "center"),
    variance (standard deviation, or "width," squared), and the degrees of
    freedom of the one-dimensional t distribution.

    Parameters
    ----------
    mean : 1-D array_like, length N
        Mean of the N-dimensional distribution
    cov : 2-D array_like, shape (N, N)
        Covariate matrix of the distribution.  Must be symmetric and
        positive semi-definite for "physically meaningful" results.
    df : int
        Degrees of freedom of the distribution
    size : tuple of ints, optional
        Given a shape of, for example, ``(m,n,k)``, ``m*n*k`` samples are
        generated, and packed in an `m`-by-`n`-by-`k` arrangement.  Because
        each sample is `N`-dimensional, the output shape is ``(m,n,k,N)``.
        If no shape is specified, a single (`N`-D) sample is returned.

    Returns
    -------
    out : ndarray
        The drawn samples, of shape *size*, if that was provided.  If not,
        the shape is ``(N,)``.

        In other words, each entry ``out[i,j,...,:]`` is an N-dimensional
        value drawn from the distribution.

    Is this right?  This needs to be checked!  A reference to the literature
    the better
    """
    df = float(df)
    mean = numpy.asarray(mean)
    normal = numpy.random.multivariate_normal(
        numpy.zeros_like(mean), cov, size)
    # x = numpy.sqrt(numpy.random.chisquare(df)/df)
    # numpy.divide(normal, x, normal)
    x = numpy.sqrt(numpy.random.chisquare(df, size) / df)
    numpy.divide(normal, x[numpy.newaxis].T, normal)
    numpy.add(mean, normal, normal)
    x = normal
    return x


[docs]def multivariate_cauchy(mean, cov, size=None):
    """
    This needs to be checked too! A reference to the literature the better
    """
    return multivariate_t(mean, cov, 1, size=None)


[docs]class ParameterScale(NoNewAttributesAfterInit):

    sigma = 1

[docs]    def get_scales(self, fit, myscale=None):
        raise NotImplementedError


[docs]class ParameterScaleVector(ParameterScale):

[docs]    def get_scales(self, fit, myscales=None):

        scales = []
        thawedpars = [par for par in fit.model.pars if not par.frozen]

        if None == myscales:

            oldestmethod = fit.estmethod

            covar = Covariance()
            covar.config['sigma'] = self.sigma
            fit.estmethod = Covariance()

            try:
                r = fit.est_errors()
            finally:
                fit.estmethod = oldestmethod

            for par, val, lo, hi in izip(thawedpars, r.parvals, r.parmins, r.parmaxes):
                scale = None
                if lo is not None and hi is not None:
                    scale = numpy.abs(lo)
                else:
                    warning("Covariance failed for '%s', trying Confidence..." %
                            par.fullname)

                    conf = Confidence()
                    conf.config['sigma'] = self.sigma
                    fit.estmethod = conf
                    try:
                        t = fit.est_errors(parlist=(par,))
                        if t.parmins[0] is not None and t.parmaxes[0] is not None:
                            scale = numpy.abs(t.parmins[0])

                        else:

                            if t.parmins[0] is None and t.parmaxes[0] is not None:
                                scale = numpy.abs(t.parmaxes[0])

                            else:

                                warning('1 sigma bounds for parameter ' +
                                        par.fullname +
                                        ' could not be found, using soft limit minimum')
                                if 0.0 == numpy.abs(par.min):
                                    scale = 1.0e-16
                                else:
                                    scale = numpy.abs(par.min)

                    finally:
                        fit.estmethod = oldestmethod
                scales.append(scale)

        else:
            if not numpy.iterable(myscales):
                raise TypeError(
                    "scales option must be iterable of length %d " % len(thawedpars))
            scales = list(map(abs, myscales))
        scales = numpy.asarray(scales).transpose()
        return scales


[docs]class ParameterScaleMatrix(ParameterScale):

[docs]    def get_scales(self, fit, myscales=None):

        def get_size_of_covar(pars):
            thawedpars = [par for par in pars if not par.frozen]
            npar = len(thawedpars)
            msg = 'scales must be a numpy array of size (%d,%d)' % (npar, npar)
            return npar, msg

        if myscales is None:
            oldestmethod = fit.estmethod
            fit.estmethod = Covariance()

            try:
                r = fit.est_errors()
            finally:
                fit.estmethod = oldestmethod

            cov = r.extra_output

        else:
            if isinstance(myscales, (numpy.ndarray)):
                npar, msg = get_size_of_covar(fit.model.pars)
                if (npar, npar) == myscales.shape:
                    cov = myscales
                else:
                    raise EstErr(msg)
            else:
                npar, msg = get_size_of_covar(fit.model.pars)
                raise EstErr(msg)
            cov = myscales

        if cov is None:
            raise EstErr('nocov')

        cov = numpy.asarray(cov)

        # Investigate spectral decomposition to avoid requirement that the cov be
        # semi-positive definite.  Nevermind, NumPy already uses SVD to generate
        # deviates from a multivariate normal.  An alternative is to use Cholesky
        # decomposition, but it assumes that the matrix is semi-positive
        # definite.
        if numpy.min(numpy.linalg.eigvalsh(cov)) <= 0:
            raise TypeError("The covariance matrix is not positive definite")

        return cov


[docs]class ParameterSampleFromScaleVector(NoNewAttributesAfterInit):

    def __init__(self):
        self.scale = ParameterScaleVector()
        NoNewAttributesAfterInit.__init__(self)

[docs]    def get_sample(self):
        raise NotImplementedError


[docs]class ParameterSampleFromScaleMatrix(NoNewAttributesAfterInit):

    def __init__(self):
        self.scale = ParameterScaleMatrix()
        NoNewAttributesAfterInit.__init__(self)

[docs]    def get_sample(self):
        raise NotImplementedError


[docs]class UniformParameterSampleFromScaleVector(ParameterSampleFromScaleVector):

[docs]    def get_sample(self, fit, factor=4, num=1):
        vals = numpy.array(fit.model.thawedpars)
        scales = self.scale.get_scales(fit)
        samples = [numpy.random.uniform(val - factor * abs(scale),
                                        val + factor * abs(scale),
                                        int(num)) for val, scale in izip(vals, scales)]
        return numpy.asarray(samples).T


[docs]class NormalParameterSampleFromScaleVector(ParameterSampleFromScaleVector):

[docs]    def get_sample(self, fit, myscales=None, num=1):
        vals = numpy.array(fit.model.thawedpars)
        scales = self.scale.get_scales(fit, myscales)
        samples = [numpy.random.normal(
            val, scale, int(num)) for val, scale in izip(vals, scales)]
        return numpy.asarray(samples).T


[docs]class NormalParameterSampleFromScaleMatrix(ParameterSampleFromScaleMatrix):

[docs]    def get_sample(self, fit, mycov=None, num=1):
        vals = numpy.array(fit.model.thawedpars)
        cov = self.scale.get_scales(fit, mycov)
        return numpy.random.multivariate_normal(vals, cov, int(num))


[docs]class StudentTParameterSampleFromScaleMatrix(ParameterSampleFromScaleMatrix):

[docs]    def get_sample(self, fit, dof, num=1):
        vals = numpy.array(fit.model.thawedpars)
        cov = self.scale.get_scales(fit)
        return multivariate_t(vals, cov, dof, int(num))


class Evaluate(object):
    """
    Callable class for _sample_stat multiprocessing call
    This class used to be a nested function, which can't be pickled and results in
    python3 failing to execute the code.
    """
    def __init__(self, fit):
        self.fit = fit

    def __call__(self, sample):
        self.fit.model.thawedpars = sample
        return self.fit.calc_stat()


def _sample_stat(fit, samples, numcores=None):

    oldvals = fit.model.thawedpars

    try:
        fit.model.startup()
        stats = numpy.asarray(parallel_map(Evaluate(fit), samples, numcores))
    finally:
        fit.model.teardown()
        fit.model.thawedpars = oldvals

    return numpy.concatenate([stats[:, numpy.newaxis], samples], axis=1)


[docs]class NormalSampleFromScaleMatrix(NormalParameterSampleFromScaleMatrix):

[docs]    def get_sample(self, fit, num=1, numcores=None):
        samples = NormalParameterSampleFromScaleMatrix.get_sample(
            self, fit, num=num)
        return _sample_stat(fit, samples, numcores)


[docs]class NormalSampleFromScaleVector(NormalParameterSampleFromScaleVector):

[docs]    def get_sample(self, fit, num=1, numcores=None):
        samples = NormalParameterSampleFromScaleVector.get_sample(
            self, fit, num=num)
        return _sample_stat(fit, samples, numcores)


[docs]class UniformSampleFromScaleVector(UniformParameterSampleFromScaleVector):

[docs]    def get_sample(self, fit, num=1, factor=4, numcores=None):
        samples = UniformParameterSampleFromScaleVector.get_sample(self, fit,
                                                                   factor=factor, num=num)
        return _sample_stat(fit, samples, numcores)


[docs]class StudentTSampleFromScaleMatrix(StudentTParameterSampleFromScaleMatrix):

[docs]    def get_sample(self, fit, num=1, dof=2, numcores=None):
        samples = StudentTParameterSampleFromScaleMatrix.get_sample(
            self, fit, dof, num)
        return _sample_stat(fit, samples, numcores)


[docs]def normal_sample(fit, num=1, sigma=1, correlate=True, numcores=None):
    """Sample the fit statistic by taking the parameter values
    from a normal distribution.

    For each iteration (sample), change the thawed parameters by
    drawing values from a uni- or multi-variate normal (Gaussian)
    distribution, and calculate the fit statistic.

    Parameters
    ----------
    fit :
       The fit results.
    num : int, optional
       The number of samples to use (default is `1`).
    sigma : number, optional
       The width of the normal distribution (the default
       is `1`).
    correlate : bool, optional
       Should a multi-variate normal be used, with parameters
       set by the covariance matrix (`True`) or should a
       uni-variate normal be used (`False`)?
    numcores : optional
       The number of CPU cores to use. The default is to use all
       the cores on the machine.

    Returns
    -------
    samples :
       A NumPy array table with the first column representing the
       statistic and later columns the parameters used.

    See Also
    --------
    t_sample : Sample from the Student's t-distribution.
    uniform_sample : Sample from a uniform distribution.

    Notes
    -----
    All thawed model parameters are sampled from the Gaussian
    distribution, where the mean is set as the best-fit parameter
    value and the variance is determined by the diagonal elements
    of the covariance matrix. The multi-variate Gaussian is
    assumed by default for correlated parameters, using the
    off-diagonal elements of the covariance matrix.

    """
    sampler = NormalSampleFromScaleVector()
    sampler.scale.sigma = sigma
    if correlate:
        sampler = NormalSampleFromScaleMatrix()
    return sampler.get_sample(fit, num, numcores)


[docs]def uniform_sample(fit, num=1, factor=4, numcores=None):
    """Sample the fit statistic by taking the parameter values
    from an uniform distribution.

    For each iteration (sample), change the thawed parameters by
    drawing values from a uniform distribution, and calculate the
    fit statistic.

    Parameters
    ----------
    fit :
       The fit results.
    num : int, optional
       The number of samples to use (default is `1`).
    factor : number, optional
       Multiplier to expand the scale parameter (default is `4`).
    numcores : optional
       The number of CPU cores to use. The default is to use all
       the cores on the machine.

    Returns
    -------
    samples :
       A NumPy array table with the first column representing the
       statistic and later columns the parameters used.

    See Also
    --------
    normal_sample : Sample from a normal distribution.
    t_sample : Sample from the Student's t-distribution.

    """
    sampler = UniformSampleFromScaleVector()
    sampler.scale.sigma = 1
    return sampler.get_sample(fit, num, factor, numcores)


[docs]def t_sample(fit, num=1, dof=2, numcores=None):
    """Sample the fit statistic by taking the parameter values from
    a Student's t-distribution.

    For each iteration (sample), change the thawed parameters
    by drawing values from a Student's t-distribution, and
    calculate the fit statistic.

    Parameters
    ----------
    fit :
       The fit results.
    num : int, optional
       The number of samples to use (default is `1`).
    dof : optional
       The number of degrees of freedom to use (the default
       is to use the number from the current fit).
    numcores : optional
       The number of CPU cores to use. The default is to use all
       the cores on the machine.

    Returns
    -------
    samples :
       A NumPy array table with the first column representing the
       statistic and later columns the parameters used.

    See Also
    --------
    normal_sample : Sample from the normal distribution.
    uniform_sample : Sample from a uniform distribution.

    """
    sampler = StudentTSampleFromScaleMatrix()
    return sampler.get_sample(fit, num, dof, numcores)