Source code for sherpa.sim.simulate

#
#  Copyright (C) 2010, 2016, 2019, 2020, 2021
#  Smithsonian Astrophysical Observatory
#
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#


"""
Classes for PPP simulations
"""

from copy import deepcopy
import logging

import numpy

from sherpa.stats import Cash, CStat
from sherpa.optmethods import NelderMead
from sherpa.estmethods import Covariance
from sherpa.utils import parallel_map, poisson_noise, NoNewAttributesAfterInit
from sherpa.fit import Fit
from sherpa.sim.sample import NormalParameterSampleFromScaleMatrix

logger = logging.getLogger("sherpa")
debug = logger.debug
info = logger.info

_tol = numpy.finfo(float).eps

__all__ = ('LikelihoodRatioTest', 'LikelihoodRatioResults')


[docs]class LikelihoodRatioResults(NoNewAttributesAfterInit): """The results of a likelihood ratio comparison simulation. Attributes ---------- ratios : numpy array The likelihood ratio for each simulation. stats : numpy array The fit statistic for the null and alternative models for each simulation. The shape is (nsim, 2). samples : numpy array The parameter samples array for each simulation. lr : number The likelihood ratio of the observed data for the null and alternate models. ppp : number The p value of the observed data for the null and alternate models. null : number The fit statistic of the null model on the observed data. alt : number The fit statistic of the alternate model on the observed data. """ _fields = ('ratios', 'stats', 'samples', 'lr', 'ppp', 'null', 'alt') def __init__(self, ratios, stats, samples, lr, ppp, null, alt): self.ratios = numpy.asarray(ratios) self.stats = numpy.asarray(stats) self.samples = numpy.asarray(samples) self.lr = float(lr) self.ppp = float(ppp) self.null = float(null) self.alt = float(alt) NoNewAttributesAfterInit.__init__(self) def __repr__(self): return '<Likelihood ratio results instance>' def __str__(self): samples = self.samples if self.samples is not None: samples = numpy.array2string(self.samples, separator=',', precision=4, suppress_small=False) stats = self.stats if self.stats is not None: stats = numpy.array2string(self.stats, separator=',', precision=4, suppress_small=False) ratios = self.ratios if self.ratios is not None: ratios = numpy.array2string(self.ratios, separator=',', precision=4, suppress_small=False) output = '\n'.join([ 'samples = %s' % samples, 'stats = %s' % stats, 'ratios = %s' % ratios, 'null = %s' % repr(self.null), 'alt = %s' % repr(self.alt), 'lr = %s' % repr(self.lr), 'ppp = %s' % repr(self.ppp) ]) return output
[docs] def format(self): """Convert the object to a string representation for display purposes. Returns ------- txt : str A string representation of the data stored in the object. """ s = 'Likelihood Ratio Test\n' s += 'null statistic = %s\n' % str(self.null) s += 'alt statistic = %s\n' % str(self.alt) s += 'likelihood ratio = %s\n' % str(self.lr) if self.ppp == 0.0: s += 'p-value < %s' % str(1./len(self.samples)) else: s += 'p-value = %s' % str(self.ppp) return s
class LikelihoodRatioTestWorker(): """ Worker class for LikelihoodRatioTest """ def __init__(self, null_fit, alt_fit, null_vals, alt_vals): self.null_fit = null_fit self.alt_fit = alt_fit self.null_vals = null_vals self.alt_vals = alt_vals def __call__(self, proposal): return LikelihoodRatioTest.calculate(self.null_fit, self.alt_fit, proposal, self.null_vals, self.alt_vals)
[docs]class LikelihoodRatioTest(NoNewAttributesAfterInit): """Likelihood Ratio Test. The likelihood ratio [1]_, D, is defined as:: ( likelihood for null model ) D = -2 ln ----------------------------------- ( likelihood for alternative model) = -2 ln(likelihood for null model) + 2 ln(likelihood for alternative model) Since the Cash and C fit statistics are defined as -2 ln(likelihood), the equation reduces to:: D = statistic for null model - statistic for alternative model References ---------- .. [1] http://en.wikipedia.org/wiki/Likelihood-ratio_test """
[docs] @staticmethod def calculate(nullfit, altfit, proposal, null_vals, alt_vals): # FIXME: only null perturbed? nullfit.model.thawedpars = proposal # Fake using poisson_noise with null fake = poisson_noise(nullfit.data.eval_model(nullfit.model)) # Set faked data for both nullfit and altfit nullfit.data.set_dep(fake) # Start the faked fit at initial null best-fit values # nullfit.model.thawedpars = null_vals # Fit with null model nullfr = nullfit.fit() debug(nullfr.format()) null_stat = nullfr.statval debug("statistic null = " + repr(null_stat)) # nullfit and altfit BOTH point to same faked dataset assert id(nullfit.data) == id(altfit.data) assert (nullfit.data.get_dep() == altfit.data.get_dep()).all() # Start the faked fit at the initial alt best-fit values # altfit.model.thawedpars = alt_vals debug("proposal: " + repr(proposal)) debug("alt model") debug(str(altfit.model)) # Set alt model and fit altfr = altfit.fit() debug(altfr.format()) debug(str(altfit.model)) alt_stat = altfr.statval debug("statistic alt = " + repr(alt_stat)) LR = -(alt_stat - null_stat) debug("LR = " + repr(LR)) return [null_stat, alt_stat, LR]
[docs] @staticmethod def run(fit, null_comp, alt_comp, conv_mdl=None, stat=None, method=None, niter=500, numcores=None): if stat is None: stat = CStat() if method is None: method = NelderMead() if not isinstance(stat, (Cash, CStat)): raise TypeError("Sherpa fit statistic must be Cash or CStat" + " for likelihood ratio test") niter = int(niter) alt = alt_comp null = null_comp oldaltvals = numpy.array(alt.thawedpars) oldnullvals = numpy.array(null.thawedpars) data = fit.data if conv_mdl is not None: # Copy the PSF null_conv_mdl = deepcopy(conv_mdl) alt = conv_mdl(alt_comp) if hasattr(conv_mdl, 'fold'): conv_mdl.fold(data) # Convolve the null model null = null_conv_mdl(null_comp) if hasattr(null_conv_mdl, 'fold'): null_conv_mdl.fold(data) nullfit = Fit(data, null, stat, method, Covariance()) # Fit with null model nullfit_results = nullfit.fit() debug(nullfit_results.format()) null_stat = nullfit_results.statval null_vals = nullfit_results.parvals # Calculate niter samples using null best-fit and covariance sampler = NormalParameterSampleFromScaleMatrix() samples = sampler.get_sample(nullfit, None, niter) # Fit with alt model, null component starts at null's best fit params. altfit = Fit(data, alt, stat, method, Covariance()) altfit_results = altfit.fit() debug(altfit_results.format()) alt_stat = altfit_results.statval alt_vals = altfit_results.parvals LR = -(alt_stat - null_stat) olddep = data.get_dep(filter=False) try: statistics = parallel_map( LikelihoodRatioTestWorker(nullfit, altfit, null_vals, alt_vals), samples, numcores ) finally: data.set_dep(olddep) alt.thawedpars = list(oldaltvals) null.thawedpars = list(oldnullvals) debug("statistic null = " + repr(null_stat)) debug("statistic alt = " + repr(alt_stat)) debug("LR = " + repr(LR)) statistics = numpy.asarray(statistics) pppvalue = numpy.sum(statistics[:, 2] > LR) / (1.0 * niter) debug('ppp value = ' + str(pppvalue)) return LikelihoodRatioResults(statistics[:, 2], statistics[:, 0:2], samples, LR, pppvalue, null_stat, alt_stat)