Source code for sherpa.data

#
#  Copyright (C) 2008, 2015 - 2017, 2019 - 2024
#  Smithsonian Astrophysical Observatory
#
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#

"""Tools for creating, storing, inspecting, and manipulating data sets.

The main classes for representing data sets are `Data1D`,
`Data1DInt`, and `Data2D`, to handle (x, y), (xlo,
xhi, y), and (x1, x2, y) data, although there are also
more-specialized cases, such as `Data1DAsymmetricErrs`. These
classes build on the `Data` class, which supports dynamic
filtering of data - to select a subset of the data range - as well as
data access and model evaluation to match the data range.

The `Filter` class is used to handle data filtering - that
is, to combine filters such as selecting the range a to b (``notice``)
and hiding the range c to d (``ignore``). This is used with the
`DataSpace1D` and `DataSpace2D` classes to handle
evaluating models on different grids to the data, and then converting
back to the data space, whether by rebinning or interpolation.

Design
------

The design for the `Data` class assumes

- the basic fields are the independent axis - which is thought of as a
  tuple of arrays - and the dependent axis, where normally the
  independent axis is labelled starting with ``x``, and the dependent axis
  ``y``.

- there are a number of related fields - such as the statistical and
  systematic errors - that follow the same behavior as the
  independent and dependent axes.

- fields are converted to `ndarray` when read in.

- the independent axes can either be points (`~sherpa.models.regrid.PointAxis`)
  or integrated (`~sherpa.models.regrid.IntegratedAxis`). There is
  currently no support for an object with a combination of point and
  integrated axes but it could be added.

- the independent axis is mediated by a "Data Space" (`DataSpaceND`,
  `DataSpace1D`, `IntegratedDataSpace1D`, `DataSpace2D`, and
  `IntegratedDataSpace2D`).

- `Data` objects contain the `~Data.ndim` field to indicate the number of
  independent axes it supports.

- to support arbitrary dimensions, the data is treated as one-dimensional
  arrays.

- `Data` objects can be created with no data, but are fixed once an
  axis - expected to be the independent axis but it need not be - is
  set (see the `~Data.size` attribute). In general the data is
  assumed to be set when the object is created.

- there are checks to ensure that the data has the correct size, shape,
  and potentially data type, but this is not intended to catch all
  possible problems.

- Numpy masked arrays can be used to initialize the dependent variable
  and the mask is converted to the format of the `~Data.mask`
  attribute of the `Data` object, taking into account that for Sherpa
  a value of True indicates a valid quantity, while the opposite is
  true in numpy.

  In general, it is expected that any filtering has been applied
  before the `Data` object is created.

- the independent axes are marked as read-only, so the only way to change
  a value is to replace the whole axis, in which case any existing
  filter will be removed.

Notebook support
----------------

The Data objects support the rich display protocol of IPython, with
HTML display of a table of information highlighting the relevant data.
Examples can be found at [NoteBook]_.

References
----------

.. [NoteBook] https://sherpa.readthedocs.io/en/latest/NotebookSupport.html

Examples
--------

Create a data set representing the independent axis (``x``) and
dependent axis (``y``) then filter to select only those values between
500-520 and 530-700:

>>> import numpy as np
>>> x = np.arange(1000)
>>> y = np.random.normal(size=1000)
>>> d1 = Data1D('example', x, y)
>>> d1.notice(500, 700)
>>> d1.ignore(520, 530)

"""

from abc import ABCMeta
import logging
from typing import Any, Literal, Optional, Sequence, Union, \
    overload
import warnings

import numpy as np

from sherpa.models.regrid import EvaluationSpace1D, IntegratedAxis, PointAxis
from sherpa.utils import NoNewAttributesAfterInit, formatting, \
    print_fields, create_expr, create_expr_integrated, \
    calc_total_error, bool_cast, filter_bins
from sherpa.utils.err import DataErr
from sherpa.utils.numeric_types import SherpaFloat
from sherpa.utils.parallel import parallel_map_funcs
from sherpa.utils.types import ArrayType, ModelFunc, StatErrFunc

warning = logging.getLogger(__name__).warning


__all__ = ('Data', 'DataSimulFit', 'Data1D', 'Data1DInt',
           'Data1DAsymmetricErrs', 'Data2D', 'Data2DInt')


# The alias does not really save any characters, but it's used
# throughout the code so it seems useful.
#
FieldsType = tuple[str, ...]


@overload
def _check(array: None) -> None:
    ...

@overload
def _check(array: ArrayType) -> np.ndarray:
    ...

def _check(array):
    """Ensure the data is a 1D array, or can be converted to one."""

    # Special case support for None.
    if array is None:
        return None

    # Assume that having a shape attribute means "this is near-enough
    # an ndarray" that we don't need to convert it.
    #
    if hasattr(array, "shape"):
        # Special-case the 0-dim case
        if len(array.shape) == 0:
            raise DataErr("notanarray")
        if len(array.shape) != 1:
            raise DataErr("not1darray")

        return array

    return _check(np.asanyarray(array))


# Can we get away with assuming array is not None?
#
@overload
def _check_nomask(array: None) -> None:
    ...

@overload
def _check_nomask(array: ArrayType) -> np.ndarray:
    ...

def _check_nomask(array):
    if hasattr(array, 'mask'):
        warnings.warn(f'Input array {array} has a mask attribute. Because masks are supported for dependent variables only the mask attribute of the independent array is ignored and values `behind the mask` are used.')

    # Ensure we do NumPy conversions after checking for mask to
    # make sure we don't end up removing .mask in the following.
    #
    return _check(array)


@overload
def _check_dep(array: None) -> tuple[None, Literal[True]]:
    ...

@overload
def _check_dep(array: ArrayType) -> tuple[np.ndarray, bool]:
    ...

def _check_dep(array):
    if not hasattr(array, 'mask'):
        return _check(array), True

    # We know the mask convention is opposite to sherpa
    if isinstance(array, np.ma.MaskedArray):
        return _check(array), ~array.mask

    # We don't know what the mask convention is
    warnings.warn(f'Format of mask for array {array} not supported thus the mask is is ignored and values `behind the mask` are used. Set .mask attribute manually or use "set_filter" function.')
    return _check(array), True


[docs] class DataSpace1D(EvaluationSpace1D): """ Class for representing 1-D Data Space. Data Spaces are spaces that describe the data domain. As models can be evaluated over data spaces, data spaces can be considered evaluation spaces themselves. However this "is-a" relationship is in the code mostly for convenience and could be removed in future versions. """ def __init__(self, filter, x): """ Parameters ---------- filter : Filter a filter object that initialized this data space x : array_like the x axis of this data space """ self.filter = filter super().__init__(_check_nomask(x))
[docs] def get(self, filter=False): """ Get a filtered representation of this data set. If `filter` is `False` this object is returned. Parameters ---------- filter : bool whether the data set should be filtered before being returned Returns ------- DataSpace1D """ if not bool_cast(filter): return self data = self.grid[0] data = self.filter.apply(data) return DataSpace1D(self.filter, data)
[docs] def for_model(self, model): """ Models can be defined over arbitrary evaluation spaces. However, at evaluation time during a fit, the model's evaluation space shall be done at the user's request space only and set to 0 every where else. Parameters ---------- model : The model whose evaluation space needs to be joined with the dataset's data space. Returns ------- DataSpace1D A data space that joins this data space with the model's evaluation space. if the model does not have an evaluation space assigned to itself then `self` is returned. """ evaluation_space = None if model is not None and hasattr(model, "evaluation_space") \ and self not in model.evaluation_space: evaluation_space = self return self if evaluation_space is None else evaluation_space
[docs] class IntegratedDataSpace1D(EvaluationSpace1D): """ Same as DataSpace1D, but for supporting integrated data sets. """ def __init__(self, filter, xlo, xhi): """ Parameters ---------- filter : Filter a filter object that initialized this data space xlo : array_like the lower bounds array of this data space xhi : array_like the higher bounds array of this data space """ xlo = _check_nomask(xlo) xhi = _check_nomask(xhi) # We only have to check if xhi is None, as if it's set then the # superclass will check the length. We do need to support both # values being None though. # if xlo is not None and xhi is None: raise DataErr("mismatchn", "lo", "hi", len(xlo), "None") self.filter = filter super().__init__(xlo, xhi)
[docs] def get(self, filter=False): """ Get a filtered representation of this data set. If `filter` is `False` this object is returned. Parameters ---------- filter : bool whether the data set should be filtered before being returned Returns ------- IntegratedDataSpace1D """ if not bool_cast(filter): return self data = self.grid data = tuple(self.filter.apply(axis) for axis in data) return IntegratedDataSpace1D(self.filter, *data)
[docs] def for_model(self, model): """ Models can be defined over arbitrary evaluation spaces. However, at evaluation time during a fit, the model's evaluation space and the data space will be joined together and the model will be evaluated over the joined domain. This makes sure that when the models are rebinned back to the data space the evaluation does not have to be extrapolated from the model's evaluation space alone. Parameters ---------- model : The model whose evaluation space needs to be joined with the dataset's data space. Returns ------- IntegratedDataSpace1D A data space that joins this data space with the model's evaluation space. if the model does not have an evaluation space assigned to itself then `self` is returned. """ evaluation_space = None if model is not None and hasattr(model, "evaluation_space") \ and self not in model.evaluation_space: evaluation_space = self return self if evaluation_space is None else evaluation_space
# We do not inherit from EvaluationSpace2D because that would require # that the x0 and x1 arrays form a grid (e.g. nx by ny) and we need # to be able to support a non-grid set of points. # # The code is written to resemble sherpa.models.grid.EvaluationSpace2D # though, so that the checks added for that code are run here. #
[docs] class DataSpace2D: """Class for representing 2-D Data Spaces. Data Spaces are spaces that describe the data domain. Parameters ---------- filter : Filter A filter object that initialized this data space. x0, x1 : array_like The first and second axes of this data space. The arrays are copied. """ def __init__(self, filter, x0, x1): x0 = _check_nomask(x0) x1 = _check_nomask(x1) self.filter = filter self.x_axis = PointAxis(x0) self.y_axis = PointAxis(x1) if self.x_axis.size != self.y_axis.size: raise DataErr("mismatchn", "x0", "x1", self.x_axis.size, self.y_axis.size)
[docs] def get(self, filter=False): """ Get a filtered representation of this data set. If `filter` is `False` this object is returned. Parameters ---------- filter : bool whether the data set should be filtered before being returned Returns ------- DataSpace2D """ if not bool_cast(filter): return self data = self.grid data = tuple(self.filter.apply(axis) for axis in data) return DataSpace2D(self.filter, *data)
@property def grid(self): """The grid representation of this dataset. The x0 and x1 arrays in the grid are one-dimensional representations of the meshgrid obtained from the x and y axis arrays, as in `numpy.meshgrid(x, y)[0].ravel()` Returns ------- (x0, x1) : (ndarray, ndarray) A tuple representing the x0 and x1 axes. """ return self.x_axis.x, self.y_axis.x # Should these be deprecated now that the code attempts to mimic EvaluationSpace2D? # @property def x0(self): """Return the first axis.""" return self.x_axis.x @property def x1(self): """Return the second axis.""" return self.y_axis.x
[docs] class IntegratedDataSpace2D: """Same as DataSpace2D, but for supporting integrated data sets. Parameters ---------- filter : Filter A filter object that initialized this data space. x0lo, x1lo, x0hi, x1hi : array_like The lower bounds array of the first and second axes, then the upper bounds. """ def __init__(self, filter, x0lo, x1lo, x0hi, x1hi): x0lo = _check_nomask(x0lo) x1lo = _check_nomask(x1lo) x0hi = _check_nomask(x0hi) x1hi = _check_nomask(x1hi) self.filter = filter self.x_axis = IntegratedAxis(x0lo, x0hi) self.y_axis = IntegratedAxis(x1lo, x1hi) if self.x_axis.size != self.y_axis.size: raise DataErr("mismatchn", "x0", "x1", self.x_axis.size, self.y_axis.size)
[docs] def get(self, filter=False): """ Get a filtered representation of this data set. If `filter` is `False` this object is returned. Parameters ---------- filter : bool whether the data set should be filtered before being returned Returns ------- IntegratedDataSpace2D """ if not bool_cast(filter): return self data = self.grid data = tuple(self.filter.apply(axis) for axis in data) return IntegratedDataSpace2D(self.filter, *data)
@property def grid(self): """The grid representation of this dataset. The x0 and x1 arrays in the grid are one-dimensional representations of the meshgrid obtained from the x and y axis arrays, as in `numpy.meshgrid(x, y)[0].ravel()` Returns ------- (x0lo, x1lo, x0hi, x1hi) : (ndarray, ndarray, ndarray, ndarray) The axis data. """ return self.x_axis.lo, self.y_axis.lo, self.x_axis.hi, self.y_axis.hi # Should these be deprecated now that the code attempts to mimic EvaluationSpace2D? # @property def x0lo(self): """Return the first axis (low edge)""" return self.x_axis.lo @property def x0hi(self): """Return the first axis (high edge).""" return self.x_axis.hi @property def x1lo(self): """Return the second axis (low edge).""" return self.y_axis.lo @property def x1hi(self): """Return the second axis (high edge).""" return self.y_axis.hi
[docs] class DataSpaceND: """Class for representing arbitrary N-Dimensional data domains Parameters ---------- filter : Filter A filter object that initialized this data space. indep : tuple of array_like The tuple of independent axes. The arrays are copied rather than storing a reference. """ def __init__(self, filter, indep): self.filter = filter self.indep = tuple(_check_nomask(d) for d in indep)
[docs] def get(self, filter=False): """ Get a filtered representation of this data set. If `filter` is `False` this object is returned. Parameters ---------- filter : bool whether the data set should be filtered before being returned Returns ------- DataSpaceND """ if not bool_cast(filter): return self data = tuple(self.filter.apply(axis) for axis in self.indep) return DataSpaceND(self.filter, data)
@property def grid(self): """ Return the grid representation of this dataset. The independent arrays are returned unchanged, i.e. unlike the DataSpace2D class they are not meshed Returns ------- tuple A tuple representing the independent axes. """ return self.indep
# NOTE: # # Although this is labelled as supporting N-D datasets, the # implementation assumes that the data has been flattened to 1D arrays # - in particular the notice method. It is likely that we can document # this - i.e. that the mask is going to be 1D. #
[docs] class Filter: """A class for representing filters of N-Dimensional datasets. The filter does not know the size of the dataset or the values of the independent axes. """ def __init__(self) -> None: self._mask: Union[np.ndarray, bool] = True @property def mask(self) -> Union[np.ndarray, bool]: """Mask array for dependent variable Returns ------- mask : bool or numpy.ndarray """ return self._mask @mask.setter def mask(self, val: Union[ArrayType, bool]) -> None: if val is None: raise DataErr('ismask') # The code below has to deal with bool-like values, such as # numpy.ma.nomask (this evaluates to False but is not a bool), # and numpy.bool_ values. The following code is intended to # catch these "special" values. Note that we explicitly check # for boolean values ('val is True' and 'val is False') rather # than just whether val behaves like a boolean - for example # 'if val or not val: ...'. # if not np.isscalar(val): # Is it possible for the following to throw a conversion # error? If so, we could catch it and convert it into a # DataErr, but it does not seem worth it (as it's not # obvious what the error to catch would be). # self._mask = np.asarray(val, bool) elif val is np.ma.nomask: self._mask = True elif (val is True) or (val is False): self._mask = val elif isinstance(val, np.bool_): # are there other types we could be looking for here? self._mask = bool(val) else: # Note that setting mask to 2.0 will fail, but an array # of 2.0's will get converted to booleans. # raise DataErr('ismask') @overload def apply(self, array: None) -> None: ... @overload def apply(self, array: ArrayType) -> np.ndarray: ...
[docs] def apply(self, array): """Apply this filter to an array Parameters ---------- array : array_like or None Array to be filtered Returns ------- array_like : ndarray or None Raises ------ sherpa.utils.err.DataErr The filter has removed all elements or there is a mismatch between the `mask` and the ``array`` argument. See Also -------- notice """ if array is None: return None # Note that mask may not be a boolean but an array. if self.mask is False: raise DataErr('notmask') # Ensure we always return a ndarray. # array = _check(array) if self.mask is True: return array if array.size != self.mask.size: raise DataErr("mismatchn", "mask", "data array", self.mask.size, array.size) return array[self.mask]
[docs] def notice(self, mins: ArrayType, maxes: ArrayType, axislist: Sequence[ArrayType], ignore: bool = False, integrated: bool = False ) -> None: """Select a range to notice or ignore (remove). The ``axislist`` argument is expected to be sent the independent axis of a `Data` object - so ``(x, )`` for one-dimensional data, ``(xlo, xhi)`` for integrated one-dimensional data, ``(x0, x1)`` for two-dimensional data, and ``(x0lo, x1lo, x0hi, x1hi)`` for integrated two-dimensinal data. The ``mins`` and ``maxes`` must then be set to match this ordering. Parameters ---------- mins : sequence of values The minimum value of the valid range (elements may be None to indicate no lower bound). When not None, it is treated as an inclusive limit, so points >= min are included. maxes : sequence of values The maximum value of the valid range (elements may be None to indicate no upper bound). It is treated as an inclusive limit (points <= max) when integrated is False, and an exclusive limit (points < max) when integrated is True. axislist: sequence of arrays The axis to apply the range to. There must be the same number of elements in mins, maxes, and axislist. The number of elements of each element of axislist must also agree (the cell values do not need to match). ignore : bool, optional If True the range is to be ignored, otherwise it is included. The default is to include the range. integrated : bool, optional Is the data integrated (we have low and high bin edges)? The default is False. When True it is expected that axislist contains a even number of rows, where the odd values are the low edges and the even values the upper edges, and that the mins and maxes only ever contain a single value, given in (None, hi) and (lo, None) ordering. See Also -------- apply Examples -------- Select points in xs which are in the range 1.5 <= x <= 4: >>> f = Filter() >>> f.mask True >>> xs = [1, 2, 3, 4, 5] >>> f.notice([1.5], [4], (xs, )) >>> f.mask array([False, True, True, True, False]) Filter the data to select all points with x0 >= 1.5 and x1 <= 4: >>> f = Filter() >>> x0 = [1, 1.4, 1.6, 2, 3] >>> x1 = [2, 2, 4, 4, 6] >>> f.notice([1.5, None], [None, 4], (x0, x1)) >>> f.mask array([False, False, True, True, False]) For integrated data sets the lower and upper edges should be sent separately with the max and min limits, along with setting the integrated flag. The following selects the bins that cover the range 2 to 4 and 1.5 to 3.5: >>> xlo = [1, 2, 3, 4, 5] >>> xhi = [2, 3, 4, 5, 6] >>> f = Filter() >>> f.notice([None, 2], [4, None], (xlo, xhi), integrated=True) >>> f.mask array([False, True, True, False, False]) >>> f.notice([None, 1.5], [3.5, None], (xlo, xhi), integrated=True) >>> f.mask array([ True, True, True, False, False]) """ # If integrated is True then we should have an even number # of axislist elements, but we do not require this. # ignore = bool_cast(ignore) for vals, label in zip([mins, maxes, axislist], ['lower bound', 'upper bound', 'grid']): if any(isinstance(val, str) for val in vals): raise DataErr('typecheck', label) mask = filter_bins(mins, maxes, axislist, integrated=integrated) if mask is None: self.mask = not ignore elif not ignore: if self.mask is True: self.mask = mask else: self.mask |= mask else: mask = ~mask if self.mask is False: self.mask = mask else: self.mask &= mask
[docs] class BaseData(metaclass=ABCMeta): """ Base class for all data classes. Left for compatibility with older versions. """ pass
# DATA-NOTE: ND Data cannot be plotted
[docs] class Data(NoNewAttributesAfterInit, BaseData): """Generic, N-Dimensional data sets. A data class is the collection of a data space and a number of data arrays for the dependent variable and associated errors. Parameters ---------- name : string name of this dataset indep: tuple of array_like the tuple of independent arrays. y : array_like The values of the dependent observable. If this is a numpy masked array, the mask will used to initialize a mask. staterror : array_like the statistical error associated with the data syserror : array_like the systematic error associated with the data Notes ----- This class can be extended by classes defining data sets of specific dimensionality. Extending classes should override the `_init_data_space` method. This class provides most of the infrastructure for extending classes for free. Data classes contain a ``mask`` attribute, which can be used ignore certain values in the array when fitting or plotting that data. The convention in Sherpa is that ``True`` marks a values as *valid* and ``False`` as *invalid* (note that this is opposite to the numpy convention). When a `Data` instance is initialized with a dependent array that has a ``mask`` attribute (e.g. numpy masked array), it will attempt to convert that mask to the Sherpa convention and raise a warning otherwise. In any case, the user can set ``data.mask`` after initialization if that conversion does not yield the expected result. """ _fields: FieldsType = ("name", "indep", "dep", "staterror", "syserror") """The main data values stored by the object (as a tuple). This is used to identify the column data - that is values that can be written out in tabular form - other than the "name" field. Other fields are listed in _extra_fields. """ _extra_fields: FieldsType = () """Any extra fields that should be displayed by str(object).""" _related_fields: FieldsType = ("y", "staterror", "syserror") """What fields must match the size of the independent axis. These fields are set to None whenever the independent axis size is set or changed. """ _y: Optional[np.ndarray] = None _size: Optional[int] = None _staterror: Optional[np.ndarray] = None _syserror: Optional[np.ndarray] = None ndim: Optional[int] = None "The dimensionality of the dataset, if defined, or None." def __init__(self, name: str, indep: Union[Sequence[ArrayType], Sequence[None]], y: Optional[ArrayType], staterror: Optional[ArrayType] = None, syserror: Optional[ArrayType] = None ) -> None: self.name = name self._data_space = self._init_data_space(Filter(), *indep) self.y, self.mask = _check_dep(y) self.staterror = staterror self.syserror = syserror self._ylabel = 'y' NoNewAttributesAfterInit.__init__(self) def _check_data_space(self, dataspace): """Check that the data space has the correct size. Note that this also sets the size of the data object if it has not been set. Parameters ---------- dataspace The dataspace object for the data object. """ indep0 = dataspace.get().grid[0] nold = self.size # If there is no data then we need to update the size, unless # the new data is also empty. # if nold is None: if indep0 is None: return self._size = len(indep0) return # We could allow the independent axis to be reset to None. # if indep0 is None: raise DataErr("independent axis can not be cleared") nnew = len(indep0) if nnew != nold: raise DataErr(f"independent axis can not change size: {nold} to {nnew}") def _init_data_space(self, filter: Filter, *data: ArrayType ) -> DataSpaceND: """ Extending classes should implement this method to provide the proper data space construction. Parameters ---------- filter : Filter a filter object passed by the initializer upon initialization of extending classes. data : tuple of array_like the tuple of independent arrays used to build the data space. Returns ------- object an instance of the dataspace associated with this data set. """ ds = DataSpaceND(filter, data) self._check_data_space(ds) return ds def _set_related(self, attr: str, val: Optional[ArrayType], check_mask: bool = True, **kwargs) -> None: """Set a field that must match the independent axes size. The value can be None or something with the same length as the independent axis. This is intended to be used from the property setter. There is also a check to warn the user if the value contains a NumPy masked array which does not match the dependent axis. """ if val is None: setattr(self, f"_{attr}", None) return # This will check whether val is an array or not. # vals = _check(val) nval = len(val) # Check the mask value of val, and not vals, since _check # could have called np.asarray and so lost the mask setting. # if check_mask and hasattr(val, "mask"): if not hasattr(self.y, "mask") or \ len(self.y) != nval or \ not np.all(self.y.mask == val.mask): warnings.warn(f"The mask of {attr} differs from the dependent array, only the mask of the dependent array is used in Sherpa.") nelem = self.size if nelem is None: # We set the object size here self._size = nval setattr(self, f"_{attr}", vals) return if nval != nelem: raise DataErr('mismatchn', 'independent axis', attr, nelem, nval) setattr(self, f"_{attr}", vals) @property def y(self) -> Optional[np.ndarray]: """The dependent axis. If set, it must match the size of the independent axes. """ return self._y @y.setter def y(self, val: ArrayType) -> None: self._set_related("y", val, check_mask=False) @property def size(self) -> Optional[int]: """The number of elements in the data set. Returns ------- size : int or None If the size has not been set then None is returned. """ return self._size # Allow users to len() a data object. The idea is that this # represents the "number of elements" (e.g. the size of the # independent axis). Does this make sense for some of the data # classes (in particular RMF)? # def __len__(self) -> int: if self.size is None: return 0 return self.size @property def dep(self) -> Optional[np.ndarray]: """ Left for compatibility with older versions """ return self.y @dep.setter def dep(self, val: Optional[ArrayType]) -> None: self.y = val @property def mask(self) -> Union[np.ndarray, bool]: """ Mask array for dependent variable Returns ------- mask : bool or numpy.ndarray """ return self._data_space.filter.mask @mask.setter def mask(self, val: Union[ArrayType, bool]) -> None: # If we have a scalar then # - we do not check sizes (as it's possible to set even though # the independent axis is not set) # - we do not want to convert it to a ndarray # # val can not be None but that is caught when setting the # filter.mask attribute. # if val is not None and np.ndim(val) != 0: if self.size is None: raise DataErr("The independent axis has not been set yet") nval = len(val) if nval != self.size: raise DataErr("mismatchn", "independent axis", "mask", self.size, nval) self._data_space.filter.mask = val # This is overloaded by Data1D and Data2D so can it be made "virtual"? #
[docs] def get_dims(self) -> tuple[int, ...]: """ Return the dimensions of this data space as a tuple of tuples. The first element in the tuple is a tuple with the dimensions of the data space, while the second element provides the size of the dependent array. Returns ------- tuple """ # What should this do when the independent or dependent axes # are not set? indep_size = tuple(indep.size for indep in self.indep) return indep_size, self.dep.size
def _clear_filter(self) -> None: """Clear out the existing filter. This is designed for use by @indep.setter. """ # This is currently a no-op. It may be overridden by a # subclass. # pass @property def indep(self ) -> Union[tuple[np.ndarray, ...], tuple[None, ...]]: """The grid of the data space associated with this data set. When set, the field must be set to a tuple, even for a one-dimensional data set. The "related" fields such as the dependent axis and the error fields are set to None if their size does not match. .. versionchanged:: 4.14.1 The filter created by `notice` and `ignore` is now cleared when the independent axis is changed. Returns ------- tuple of array_like or None """ return self._data_space.get().grid @indep.setter def indep(self, val: Union[tuple[ArrayType, ...], tuple[None, ...]] ) -> None: # This is a low-level check so raise a normal Python error # rather than DataErr. Do we want to allow a sequence here, # that is not forced to be a tuple? The concern then is that it gets # harder to distinguish between a user accidentally giving x, # where it's an array, rather than (x,), say. # if not isinstance(val, tuple): raise TypeError(f"independent axis must be sent a tuple, not {type(val).__name__}") ds = self._init_data_space(self._data_space.filter, *val) self._data_space = ds self._clear_filter()
[docs] def get_indep(self, filter: bool = False ) -> Union[tuple[np.ndarray, ...], tuple[None, ...]]: """Return the independent axes of a data set. Parameters ---------- filter : bool, optional Should the filter attached to the data set be applied to the return value or not. The default is `False`. Returns ------- axis: tuple of arrays The independent axis values for the data set. This gives the coordinates of each point in the data set. See Also -------- get_dep : Return the dependent axis of a data set. """ return self._data_space.get(filter).grid
[docs] def set_indep(self, val: Union[tuple[ArrayType, ...], tuple[None, ...]] ) -> None: self.indep = val
[docs] def get_dep(self, filter: bool = False ) -> Optional[np.ndarray]: """Return the dependent axis of a data set. Parameters ---------- filter : bool, optional Should the filter attached to the data set be applied to the return value or not. The default is `False`. Returns ------- axis: array The dependent axis values for the data set. This gives the value of each point in the data set. See Also -------- get_indep : Return the independent axis of a data set. get_error : Return the errors on the dependent axis of a data set. get_staterror : Return the statistical errors on the dependent axis of a data set. get_syserror : Return the systematic errors on the dependent axis of a data set. """ dep = self.dep if bool_cast(filter): dep = self.apply_filter(dep) return dep
# Do we want to allow val to be None?
[docs] def set_dep(self, val: Union[ArrayType, float]) -> None: """ Set the dependent variable values. Parameters ---------- val : sequence or number If a number then it is used for each element. """ if np.iterable(val): dep = np.asarray(val, SherpaFloat) else: nelem = self.size if nelem is None: raise DataErr("sizenotset", self.name) dep = SherpaFloat(val) * np.ones(nelem, dtype=SherpaFloat) self.y = dep
# It is not clear when default values need to be given in overload # statements. The current choice is based on mypy 1.10.0. # @overload def get_y(self, filter: bool, yfunc: None, use_evaluation_space: bool = False ) -> np.ndarray: ... @overload def get_y(self, filter: bool, yfunc: ModelFunc, use_evaluation_space: bool = False ) -> tuple[np.ndarray, ArrayType]: ...
[docs] def get_y(self, filter=False, yfunc=None, use_evaluation_space=False): """Return dependent axis in N-D view of dependent variable Parameters ---------- filter yfunc use_evaluation_space Returns ------- y: array or (array, array) or None If yfunc is not None and the dependent axis is set then the return value is (y, y2) where y2 is yfunc evaluated on the independent axis. """ y = self.get_dep(filter) if yfunc is None: return y if filter: y2 = self.eval_model_to_fit(yfunc) else: y2 = self.eval_model(yfunc) return (y, y2)
@property def staterror(self) -> Optional[np.ndarray]: """The statistical error on the dependent axis, if set. This must match the size of the independent axis. """ return self._staterror @staterror.setter def staterror(self, val: ArrayType) -> None: self._set_related("staterror", val) @property def syserror(self) -> Optional[np.ndarray]: """The systematic error on the dependent axis, if set. This must match the size of the independent axis. """ return self._syserror @syserror.setter def syserror(self, val: ArrayType) -> None: self._set_related("syserror", val)
[docs] def get_staterror(self, filter: bool = False, staterrfunc: Optional[StatErrFunc] = None ) -> Optional[ArrayType]: """Return the statistical error on the dependent axis of a data set. Parameters ---------- filter : bool, optional Should the filter attached to the data set be applied to the return value or not. The default is `False`. staterrfunc : function If no statistical error has been set, the errors will be calculated by applying this function to the dependent axis of the data set. Returns ------- axis : array or `None` The statistical error for each data point. A value of `None` is returned if the data set has no statistical error array and `staterrfunc` is `None`. See Also -------- get_error : Return the errors on the dependent axis of a data set. get_indep : Return the independent axis of a data set. get_syserror : Return the systematic errors on the dependent axis of a data set. """ staterror = getattr(self, 'staterror', None) if bool_cast(filter): staterror = self.apply_filter(staterror) if (staterror is None) and (staterrfunc is not None): dep = self.get_dep(filter) staterror = staterrfunc(dep) return staterror
[docs] def get_syserror(self, filter: bool = False ) -> Optional[np.ndarray]: """Return the systematic error on the dependent axis of a data set. Parameters ---------- filter : bool, optional Should the filter attached to the data set be applied to the return value or not. The default is `False`. Returns ------- axis : array or None The systematic error for each data point. A value of `None` is returned if the data set has no systematic errors. See Also -------- get_error : Return the errors on the dependent axis of a data set. get_indep : Return the independent axis of a data set. get_staterror : Return the statistical errors on the dependent axis of a data set. """ syserr = getattr(self, 'syserror', None) if bool_cast(filter): syserr = self.apply_filter(syserr) return syserr
[docs] def get_error(self, filter=False, staterrfunc=None): """Return the total error on the dependent variable. Parameters ---------- filter : bool, optional Should the filter attached to the data set be applied to the return value or not. The default is `False`. staterrfunc : function If no statistical error has been set, the errors will be calculated by applying this function to the dependent axis of the data set. Returns ------- axis : array or `None` The error for each data point, formed by adding the statistical and systematic errors in quadrature. See Also -------- get_dep : Return the independent axis of a data set. get_staterror : Return the statistical errors on the dependent axis of a data set. get_syserror : Return the systematic errors on the dependent axis of a data set. """ return calc_total_error(self.get_staterror(filter, staterrfunc), self.get_syserror(filter))
[docs] def get_yerr(self, filter=False, staterrfunc=None): """Return errors in dependent axis in N-D view of dependent variable. Parameters ---------- filter staterrfunc Returns ------- """ return self.get_error(filter, staterrfunc)
# It looks like we never set yfunc when calling this method so we # could remove it. #
[docs] def get_ylabel(self, yfunc=None) -> str: """Return label for dependent axis in N-D view of dependent variable. Parameters ---------- yfunc Unused. Returns ------- label: str The label. See Also -------- set_ylabel """ return self._ylabel
[docs] def set_ylabel(self, label: str) -> None: """Set the label for the dependent axis. .. versionadded:: 4.17.0 Parameters ---------- label: str The new label. See Also -------- get_ylabel """ self._ylabel = label
@overload def apply_filter(self, data: None) -> None: ... @overload def apply_filter(self, data: ArrayType) -> np.ndarray: ...
[docs] def apply_filter(self, data): if data is None: return None nelem = self.size if nelem is None: raise DataErr("sizenotset", self.name) data = _check(data) ndata = len(data) if ndata != nelem: raise DataErr("mismatchn", "data", "array", nelem, ndata) return self._data_space.filter.apply(data)
[docs] def notice(self, mins, maxes, ignore: bool = False, integrated: bool = False ) -> None: self._data_space.filter.notice(mins, maxes, self.get_indep(), ignore=ignore, integrated=integrated)
[docs] def ignore(self, *args, **kwargs) -> None: kwargs['ignore'] = True self.notice(*args, **kwargs)
def _can_apply_model(self, modelfunc: ModelFunc) -> None: """Check if model dimensions match data dimensions.""" # Should this also check whether the independent axis is set? mdim = getattr(modelfunc, "ndim", None) ddim = getattr(self, "ndim", None) if None not in [mdim, ddim] and mdim != ddim: raise DataErr(f"Data and model dimensionality do not match: {ddim}D and {mdim}D")
[docs] def eval_model(self, modelfunc: ModelFunc) -> ArrayType: """Evaluate the model on the independent axis.""" self._can_apply_model(modelfunc) return modelfunc(*self.get_indep())
[docs] def eval_model_to_fit(self, modelfunc: ModelFunc) -> ArrayType: """Evaluate the model on the independent axis after filtering.""" self._can_apply_model(modelfunc) return modelfunc(*self.get_indep(filter=True))
[docs] def to_guess(self) -> tuple[Optional[np.ndarray], ...]: # Should this also check whether the independent and dependent # axes are set? # arrays: list[Optional[np.ndarray]] arrays = [self.get_y(filter=True, yfunc=None)] arrays.extend(self.get_indep(True)) return tuple(arrays)
[docs] def to_fit(self, staterrfunc: Optional[StatErrFunc] = None ) -> tuple[Optional[np.ndarray], Optional[ArrayType], Optional[np.ndarray]]: return (self.get_dep(True), self.get_staterror(True, staterrfunc), self.get_syserror(True))
def __str__(self) -> str: """ Return a listing of the attributes listed in self._fields and, if set, self._extra_fields. """ fields = self._fields + self._extra_fields fdict = {f: getattr(self, f) for f in fields} return print_fields(fields, fdict) def __repr__(self) -> str: r = f'<{type(self).__name__} data set instance' if hasattr(self, 'name'): r += f" '{self.name}'" r += '>' return r
[docs] class DataSimulFit(NoNewAttributesAfterInit): """Store multiple data sets. This class lets multiple data sets be treated as a single dataset by concatenation. That is, if two data sets have lengths n1 and n2 then they can be considered as a single data set of length n1 + n2. Parameters ---------- name : str The name for the collection of data. datasets : sequence of Data objects The datasets to be stored; there must be at least one. They are assumed to behave as `sherpa.data.Data` objects, but there is no check for this condition. Attributes ---------- datasets : sequence of Data See Also -------- sherpa.models.model.SimulFitModel Examples -------- >>> d1 = Data1D('d1', [1, 2, 3], [10, 12, 15]) >>> d2 = Data1D('d2', [1, 2, 5, 7], [4, 15, 9, 24]) >>> dall = DataSimulFit('comb', (d1, d2)) >>> yvals, _, _ = dall.to_fit() >>> print(yvals) [10 12 15 4 15 9 24] """ def __init__(self, name: str, datasets: Sequence[Data], numcores: int = 1) -> None: if len(datasets) == 0: raise DataErr('zerodatasimulfit', type(self).__name__) self.name = name self.datasets = tuple(datasets) self.numcores = numcores super().__init__()
[docs] def eval_model_to_fit(self, modelfuncs: Sequence[ModelFunc] ) -> np.ndarray: if self.numcores == 1: total_model = [] for func, data in zip(modelfuncs, self.datasets): tmp_model = data.eval_model_to_fit(func) total_model.append(tmp_model) return np.concatenate(total_model) # best to make this a different derived class funcs = [] datasets = [] for func, data in zip(modelfuncs, self.datasets): funcs.append(func) datasets.append(data.get_indep(filter=False)) total_model = parallel_map_funcs(funcs, datasets, self.numcores) all_model = [] for model, data in zip(total_model, self.datasets): all_model.append(data.apply_filter(model)) return np.concatenate(all_model)
[docs] def to_fit(self, staterrfunc: Optional[StatErrFunc] = None ) -> tuple[np.ndarray, Optional[np.ndarray], Optional[np.ndarray]]: total_dep = [] total_staterror = [] total_syserror = [] no_staterror = True no_syserror = True for data in self.datasets: dep, staterror, syserror = data.to_fit(staterrfunc) total_dep.append(dep) if staterror is not None: no_staterror = False total_staterror.append(staterror) if syserror is not None: no_syserror = False else: syserror = np.zeros_like(dep) total_syserror.append(syserror) total_dep = np.concatenate(total_dep) if no_staterror: total_staterror = None elif np.any([np.equal(array, None).any() for array in total_staterror]): raise DataErr('staterrsimulfit') else: total_staterror = np.concatenate(total_staterror) if no_syserror: total_syserror = None else: total_syserror = np.concatenate(total_syserror) return total_dep, total_staterror, total_syserror
# DATA-NOTE: this implementation is weird. Is this even used? # Typing of this is a bit hard too since Data does not have # a to_plot method. #
[docs] def to_plot(self, yfunc=None, staterrfunc: Optional[StatErrFunc] = None ): return self.datasets[0].to_plot(yfunc.parts[0], staterrfunc)
[docs] class Data1D(Data): '''Dateset for 1D data. Parameters ---------- name : string name of this dataset x : array-like Coordinates of the independent variable y : array-like The values of the dependent observable. If this is a numpy masked array, the mask will used to initialize a mask. staterror : array-like the statistical error associated with the data syserror : array-like the systematic error associated with the data ''' _fields: FieldsType = ("name", "x", "y", "staterror", "syserror") ndim = 1 def __init__(self, name: str, x: Optional[ArrayType], y: Optional[ArrayType], staterror: Optional[ArrayType] = None, syserror: Optional[ArrayType] = None ) -> None: self._xlabel = 'x' super().__init__(name, (x, ), y, staterror, syserror) def _repr_html_(self) -> str: """Return a HTML (string) representation of the data """ return html_data1d(self) def _init_data_space(self, filter: Filter, *data: ArrayType ) -> DataSpace1D: ndata = len(data) if ndata != 1: raise DataErr("wrongaxiscount", self.name, 1, ndata) ds = DataSpace1D(filter, *data) self._check_data_space(ds) return ds
[docs] def get_x(self, filter: bool = False, model: Optional[ModelFunc] = None, use_evaluation_space: bool = False ) -> Optional[np.ndarray]: if model is not None: mdim = getattr(model, "ndim", None) if mdim is not None and mdim != 1: raise DataErr(f"Data and model dimensionality do not match: 1D and {mdim}D") return self.get_evaluation_indep(filter, model, use_evaluation_space)[0]
[docs] def get_xerr(self, filter: bool = False, yfunc=None ) -> Optional[np.ndarray]: """Return linear view of bin size in independent axis/axes. Parameters ---------- filter yfunc Returns ------- xerr : array or None """ return None
[docs] def get_xlabel(self) -> str: """Return label for linear view of independent axis/axes Returns ------- label : str See Also -------- set_xlabel """ return self._xlabel
[docs] def set_xlabel(self, label: str) -> None: """Set the label for the independent axis. .. versionadded:: 4.17.0 Parameters ------- label : str The new label. See Also -------- set_xlabel """ self._xlabel = label
# The superclass suggests returning both the independent and # dependent axis sizes. #
[docs] def get_dims(self, filter: bool = False) -> tuple[int, ...]: if self.size is None: raise DataErr("sizenotset", self.name) return len(self.get_x(filter)),
@overload def get_y(self, filter: bool, yfunc: None = None, use_evaluation_space: bool = False ) -> np.ndarray: ... @overload def get_y(self, filter: bool, yfunc: ModelFunc, use_evaluation_space: bool = False ) -> tuple[np.ndarray, ArrayType]: ...
[docs] def get_y(self, filter=False, yfunc=None, use_evaluation_space=False): """Return the dependent axis. Parameters ---------- filter yfunc use_evaluation_space Returns ------- y : array or (array, array) or None If yfunc is not None and the dependent axis is set then the return value is (y, y2) where y2 is yfunc evaluated on the independent axis. """ y = self.get_dep(filter) if yfunc is None: return y mdim = getattr(yfunc, "ndim", None) if mdim is not None and mdim != 1: raise DataErr(f"Data and model dimensionality do not match: 1D and {mdim}D") model_evaluation = yfunc(*self.get_evaluation_indep(filter, yfunc, use_evaluation_space)) return (y, model_evaluation)
@overload def get_bounding_mask(self) -> tuple[bool, None]: ... @overload def get_bounding_mask(self) -> tuple[np.ndarray, tuple[int]]: ...
[docs] def get_bounding_mask(self): mask = self.mask size = None if np.iterable(self.mask): # create bounding box around noticed image regions mask = np.array(self.mask) size = (mask.size,) return mask, size
[docs] def get_img(self, yfunc=None): """Return 1D dependent variable as a 1 x N image. Parameters ---------- yfunc Returns ------- """ y_img = self.get_y(False, yfunc) if yfunc is not None: y_img = (y_img[0].reshape(1, y_img[0].size), y_img[1].reshape(1, y_img[1].size)) else: y_img = y_img.reshape(1, y_img.size) return y_img
[docs] def get_imgerr(self): err = self.get_error() if err is None: return None return err.reshape(1, err.size)
[docs] def get_filter(self, format: str = '%.4f', delim: str = ':' ) -> str: """Return the data filter as a string. Parameters ---------- format : str, optional The formatting of the numeric values. delim : str, optional The string used to mark the low-to-high range. Returns ------- filter : str The filter, represented as a collection of single values or ranges, separated by commas. See Also -------- get_filter_expr, ignore, notice Examples -------- >>> import numpy as np >>> x = np.asarray([1, 2, 3, 5, 6]) >>> y = np.ones(5) >>> d = Data1D('example', x, y) >>> d.get_filter() '1.0000:6.0000' >>> d.ignore(2.5, 4.5) >>> d.get_filter() '1.0000:2.0000,5.0000:6.0000' >>> d.get_filter(format='%i', delim='-') '1-2,5-6' """ x = self.get_x(filter=True) if np.iterable(self.mask): mask = self.mask else: mask = np.ones(len(x), dtype=bool) return create_expr(x, mask=mask, format=format, delim=delim)
[docs] def get_filter_expr(self) -> str: """Return the data filter as a string along with the units. This is a specialised version of get_filter which adds the axis units. Returns ------- filter : str The filter, represented as a collection of single values or ranges, separated by commas. See Also -------- get_filter Examples -------- >>> d = Data1D('example', [1., 2., 3., 5., 6., 7.], [0, .4, .5, .6, .7, .8]) >>> d.notice(1., 6.) >>> d.ignore(2.5, 4.) >>> d.get_filter_expr() '1.0000-2.0000,5.0000-6.0000 x' Note that the expression lists the valid data points. While we ignore only the range 2.5-4.0, there is no data point between 4. and 5., so the second part of the valid range is 5.0 to 6.0. """ return self.get_filter(delim='-') + ' ' + self.get_xlabel()
# The return value is hard to type, so it is skipped at the # moment.
[docs] def to_plot(self, yfunc: Optional[ModelFunc] = None, staterrfunc: Optional[StatErrFunc] = None): # As we introduced models defined on arbitrary grids, the x array can also depend on the # model function, at least in principle. return (self.get_x(True, yfunc), self.get_y(True, yfunc), self.get_yerr(True, staterrfunc), self.get_xerr(True, yfunc), self.get_xlabel(), self.get_ylabel())
# The return value is hard to type, so it is skipped at the # moment.
[docs] def to_component_plot(self, yfunc: Optional[ModelFunc] = None, staterrfunc: Optional[StatErrFunc] = None): # As we introduced models defined on arbitrary grids, the x array can also depend on the # model function, at least in principle. return (self.get_x(True, yfunc, use_evaluation_space=True), self.get_y(True, yfunc, use_evaluation_space=True), self.get_yerr(True, staterrfunc), self.get_xerr(True, yfunc), self.get_xlabel(), self.get_ylabel())
[docs] def get_evaluation_indep(self, filter: bool = False, model: Optional[ModelFunc] = None, use_evaluation_space: bool = False ) -> Optional[np.ndarray]: data_space = self._data_space.get(filter) if use_evaluation_space: return data_space.for_model(model).grid return data_space.grid
[docs] def notice(self, xlo: Optional[float] = None, xhi: Optional[float] = None, ignore: bool = False) -> None: """Notice or ignore the given range. Ranges are inclusive for both the lower and upper limits. Parameters ---------- xlo, xhi : number or None, optional The range to change. A value of None means the minimum or maximum permitted value. ignore : bool, optional Set to True if the range should be ignored. The default is to notice the range. See Also -------- get_filter, get_filter_expr Notes ----- If no ranges have been ignored then a call to `notice` with `ignore=False` will select just the `lo` to `hi` range, and exclude any bins outside this range. If there has been a filter applied then the range `lo` to `hi` will be added to the range of noticed data (when `ignore=False`). Examples -------- >>> import numpy as np >>> x = np.arange(0.4, 2.6, 0.2) >>> y = np.ones_like(x) >>> d = Data1D('example', x, y) >>> print(d.x[0], d.x[-1]) 0.4 2.4000000000000004 >>> d.notice() >>> d.get_filter(format='%.1f') '0.4:2.4' >>> d.notice(0.8, 1.2) >>> d.get_filter(format='%.1f') '0.8:1.2' >>> d.notice(1.5, 2.1) >>> d.get_filter(format='%.1f') '0.8:1.2,1.6:2.0' """ Data.notice(self, (xlo,), (xhi,), ignore)
@property def x(self) -> Optional[np.ndarray]: """ Used for compatibility, in particular for __str__ and __repr__ """ return self.get_x()
[docs] class Data1DAsymmetricErrs(Data1D): """1-D data set with asymmetric errors See Also -------- sherpa.sim.ReSampleData Notes ----- The elo and ehi fields are stored as delta values relative to y. Sherpa's data and statistic objects assume symmetric errors, and so the methods here sometimes return a single array for an error, and sometimes a pair or 2D array (giving the low and high values). The assumption is that the staterror field is set when the object is created. """ _fields: FieldsType = ("name", "x", "y", "staterror", "syserror", "elo", "ehi") def __init__(self, name: str, x: Optional[ArrayType], y: Optional[ArrayType], elo: Optional[ArrayType], ehi: Optional[ArrayType], staterror=None, syserror=None ) -> None: self.elo = elo self.ehi = ehi super().__init__(name, x, y, staterror=staterror, syserror=syserror) # TODO: should we change get_error and get_staterror? #
[docs] def get_yerr(self, filter=False, staterrfunc=None): """Return the y error. The staterrfunc argument is currently ignored. """ if not filter: # A minor optimization return self.elo, self.ehi return self.apply_filter(self.elo), self.apply_filter(self.ehi)
[docs] class Data1DInt(Data1D): """1-D integrated data set. This class is designed to handle non-consecutive bins. Parameters ---------- name : string name of this dataset xlo : array-like lower bounds of the bins of the independent coordinates xhi : array-like Upper bound of the bins of the independent coordinates y : array-like The values of the dependent observable. If this is a numpy masked array, the mask will used to initialize a mask. staterror : array-like the statistical error associated with the data syserror : array-like the systematic error associated with the data """ _fields: FieldsType = ("name", "xlo", "xhi", "y", "staterror", "syserror") def __init__(self, name: str, xlo: Optional[ArrayType], xhi: Optional[ArrayType], y: Optional[ArrayType], staterror: Optional[ArrayType] = None, syserror: Optional[ArrayType] = None ) -> None: # Note: we do not call the superclass here. self._xlabel = 'x' Data.__init__(self, name, (xlo, xhi), y, staterror, syserror) def _repr_html_(self) -> str: """Return a HTML (string) representation of the data """ return html_data1dint(self) def _init_data_space(self, filter: Filter, *data: ArrayType ) -> IntegratedDataSpace1D: ndata = len(data) if ndata != 2: raise DataErr("wrongaxiscount", self.name, 2, ndata) ds = IntegratedDataSpace1D(filter, *data) self._check_data_space(ds) return ds
[docs] def get_x(self, filter: bool = False, model: Optional[ModelFunc] = None, use_evaluation_space: bool = False ) -> np.ndarray: indep = self.get_evaluation_indep(filter, model, use_evaluation_space) if len(indep) == 1: # assume all data has been filtered out return np.asarray([]) return (indep[0] + indep[1]) / 2.0
[docs] def get_xerr(self, filter: bool = False, model: Optional[ModelFunc] = None ) -> Optional[np.ndarray]: """Returns an X "error". The error value for the independent axis is not well defined in Sherpa. .. versionchanged:: 4.16.1 The return value is now half the bin width instead of the full bin width. Parameters ---------- filter : bool, optional Should the values be filtered to the current notice range? model : Model or None, optional Returns ------- xerr : ndarray The half-width of each bin. Although the types suggest it can be None, it will always be an array, even if empty. """ indep = self.get_evaluation_indep(filter, model) if len(indep) == 1: # assume all data has been filtered out return np.asarray([]) # Return the half-width: see #748 #1817 xlo, xhi = indep return (xhi - xlo) / 2
[docs] def get_filter(self, format='%.4f', delim=':') -> str: """Return the data filter as a string. For each noticed range the filter is reported as starting at the low edge of the first bin and ends at the upper edge of the last bin in the range. .. versionchanged:: 4.14.0 Prior to 4.14.0 the filter used the mid-point of the bin, not its low or high value. Parameters ---------- format : str, optional The formatting of the numeric values. delim : str, optional The string used to mark the low-to-high range. Returns ------- filter : str The filter, represented as a collection of ranges separated by commas. See Also -------- get_filter_expr, ignore, notice Examples -------- >>> import numpy as np >>> xlo = np.asarray([1, 2, 3, 5, 6]) >>> xhi = xlo + 1 >>> y = np.ones(5) >>> d = Data1DInt('example', xlo, xhi, y) >>> d.get_filter() '1.0000:7.0000' >>> d.ignore(2.5, 4.5) >>> d.get_filter() '1.0000:2.0000,5.0000:7.0000' >>> d.get_filter(format='%i', delim='-') '1-2,5-7' """ # We could just use the middle of each bin (which is what # would happen if we didn't over-ride Data1D.filter) but let's # use the start and end values for each selected bin. # indep = self.get_evaluation_indep(filter=True) if len(indep) == 1: # assume all data has been filtered out return '' if np.iterable(self.mask): mask = self.mask else: # Unlike create_expr we do not need to send # a mask in here mask = None return create_expr_integrated(indep[0], indep[1], mask=mask, format=format, delim=delim)
[docs] def notice(self, xlo: Optional[float] = None, xhi: Optional[float] = None, ignore: bool = False ) -> None: """Notice or ignore the given range. Ranges are inclusive for the lower limit and exclusive for the upper limit. .. versionchanged:: 4.14.0 Filtering Data1DInt datasets has been improved to fix a number of corner cases. As part of this the upper limit has been changed to be exclusive whereas previously it was not obvious what the filter was doing. Parameters ---------- xlo, xhi : number or None, optional The range to change. A value of None means the minimum or maximum permitted value. ignore : bool, optional Set to True if the range should be ignored. The default is to notice the range. See Also -------- get_filter, get_filter_expr Notes ----- If no ranges have been ignored then a call to `notice` with `ignore=False` will select just the `lo` to `hi` range, and exclude any bins outside this range. If there has been a filter applied then the range `lo` to `hi` will be added to the range of noticed data (when `ignore=False`). Examples -------- >>> import numpy as np >>> edges = np.arange(0.4, 2.6, 0.2) >>> xlo, xhi = edges[:-1], edges[1:] >>> y = np.ones_like(xlo) >>> d = Data1DInt('example', xlo, xhi, y) >>> print(d.xlo[0], d.xhi[-1]) 0.4 2.400000000000001 >>> d.get_filter(format='%.1f') '0.4:2.4' >>> d.notice(0.8, 1.9) >>> d.get_filter(format='%.1f') '0.8:2.0' >>> d.notice() >>> d.get_filter(format='%.1f') '0.4:2.4' """ Data.notice(self, (None, xlo), (xhi, None), ignore=ignore, integrated=True)
@property def xlo(self) -> Optional[np.ndarray]: """ Property kept for compatibility """ return self._data_space.x_axis.lo @property def xhi(self) -> Optional[np.ndarray]: """ Property kept for compatibility """ return self._data_space.x_axis.hi
[docs] class Data2D(Data): '''2D data set This class represents a 2D data set. It is desigend to work with flattened arrays for coordinates and independent variables, which makes it easy to deal with filters and sparse or irregularly-placed grids. Of course, the same structure can also be used for regularly-gridded data, it just has to be passed in as a flattened array. However, in this case the more specialized `~sherpa.astro.data.DataIMG` class might be more appropriate. Parameters ---------- name : string name of this dataset x0 : array-like Independent coordinate values for the first dimension x1 : array-like Independent coordinate values for the second dimension y : array-like The values of the dependent observable. If this is a numpy masked array, the mask will be used to initialize a mask. shape : tuple Shape of the data grid for regularly gridded data (optional). This is used to return the data as an image e.g. for display, but it not needed for fitting and modelling. For irregularly gridded data, shape must be `None`. staterror : array-like the statistical error associated with the data syserror : array-like the systematic error associated with the data Examples -------- An irregularly-gridded 2D dataset, with points at (-200, -200), (-200, 0), (0, 0), (200, -100), and (200, 150) can be created with: >>> irreg2d = Data2D("irregular2d", ... [-200, -200, 0, 200, 200], ... [-200, 0, 0, -100, 150], ... [12, 15, 23, 45, -2]) A regularly-gridded 2D dataset can be created, but the arguments must be flattened:: >>> import numpy as np >>> x1, x0 = np.mgrid[20:30:2, 5:20:2] >>> datashape = x0.shape >>> y = np.sqrt((x0 - 10)**2 + (x1 - 31)**2) >>> x0 = x0.flatten() >>> x1 = x1.flatten() >>> y = y.flatten() >>> reg2d = Data2D("regular2d", x0, x1, y, shape=datashape) .. note:: Sherpa provides the `~sherpa.astro.data.DataIMG` class to handle regularly-gridded data more easily. ''' _fields: FieldsType = ("name", "x0", "x1", "y", "shape", "staterror", "syserror") ndim = 2 # Why should we add shape to extra-fields instead? See #1359 to # fix #47. # # It would change the notebook output slightly so we don't change # for now. # # _extra_fields = ("shape", ) def __init__(self, name: str, x0: Optional[ArrayType], x1: Optional[ArrayType], y: Optional[ArrayType], shape: Optional[tuple[int, int]] = None, staterror: Optional[ArrayType] = None, syserror: Optional[ArrayType] = None ) -> None: self.shape = shape self._x0label = 'x0' self._x1label = 'x1' super().__init__(name, (x0, x1), y, staterror, syserror) def _repr_html_(self) -> str: """Return a HTML (string) representation of the data """ return html_data2d(self) def _init_data_space(self, filter: Filter, *data: ArrayType ) -> DataSpace2D: ndata = len(data) if ndata != 2: raise DataErr("wrongaxiscount", self.name, 2, ndata) ds = DataSpace2D(filter, *data) self._check_data_space(ds) return ds
[docs] def get_x0(self, filter: bool = False) -> Optional[np.ndarray]: return self._data_space.get(filter).x0
[docs] def get_x1(self, filter: bool = False) -> Optional[np.ndarray]: return self._data_space.get(filter).x1
[docs] def get_x0label(self) -> str: """Return label for first dimension in 2-D view of independent axis/axes. Returns ------- label: str See Also -------- get_x1label, set_x0label """ return self._x0label
[docs] def get_x1label(self) -> str: """Return label for second dimension in 2-D view of independent axis/axes. Returns ------- label: str See Also -------- get_x0label, set_x1label """ return self._x1label
[docs] def set_x0label(self, label: str) -> None: """Set the label for the first independent axis. .. versionadded:: 4.17.0 Parameters ------- label: str See Also -------- get_x0label, set_x1label """ self._x0label = label
[docs] def set_x1label(self, label: str) -> None: """Set the label for the second independent axis. .. versionadded:: 4.17.0 Parameters ------- label: str See Also -------- get_x1label, set_x0label """ self._x1label = label
[docs] def get_axes(self) -> tuple[np.ndarray, np.ndarray]: self._check_shape() # FIXME: how to filter an axis when self.mask is size of self.y? return (np.arange(self.shape[1]) + 1, np.arange(self.shape[0]) + 1)
[docs] def get_dims(self, filter: bool = False) -> tuple[int, ...]: if self.size is None: raise DataErr("sizenotset", self.name) # self._check_shape() if self.shape is not None: return self.shape[::-1] return len(self.get_x0(filter)), len(self.get_x1(filter))
[docs] def get_filter_expr(self) -> str: return ''
[docs] def get_filter(self) -> str: return ''
[docs] def get_max_pos(self, dep: Optional[np.ndarray] = None ) -> Union[tuple[float, float], list[tuple[float, float]]]: """Return the coordinates of the maximum value. Parameters ---------- dep : ndarray or None, optional The data to search and it must match the current data filter. If not given then the dependent axis is used. Returns ------- coords : pair or list of pairs The coordinates of the maximum location. The data values match the values returned by `get_x0` and `get_x1`. If there is only one maximum pixel then a pair is returned otherwise a list of pairs is returned. See Also -------- get_dep, get_x0, get_x1 """ if dep is None: dep = self.get_dep(True) x0 = self.get_x0(True) x1 = self.get_x1(True) # TODO: Should be able to just use numpy.argmax pos = np.asarray(np.where(dep == dep.max())).squeeze() if pos.ndim == 0: pos = int(pos) return x0[pos], x1[pos] return [(x0[index], x1[index]) for index in pos]
# For images, only need y-array # Also, we do not filter, as imager needs M x N (or # L x M x N) array
[docs] def get_img(self, yfunc=None): """Return the dependent axis as a 2D array. The data is not filtered. Parameters ---------- yfunc : sherpa.models.model.Model instance or None, optional If set then it is a model that is evaluated on the data grid and returned along with the dependent axis. Returns ------- img : ndarray or (ndarray, ndarray) The data as a 2D array or a pair of 2D arrays when yfunc is set. """ self._check_shape() y_img = self.get_y(False, yfunc) if yfunc is None: return y_img.reshape(*self.shape) return (y_img[0].reshape(*self.shape), y_img[1].reshape(*self.shape))
[docs] def get_imgerr(self): self._check_shape() err = self.get_error() if err is None: return None return err.reshape(*self.shape)
[docs] def to_contour(self, yfunc=None): return (self.get_x0(True), self.get_x1(True), self.get_y(True, yfunc), self.get_x0label(), self.get_x1label())
def _check_shape(self) -> None: if self.shape is None: raise DataErr('shape', self.name) @property def x0(self) -> Optional[np.ndarray]: """ kept for compatibility """ return self.get_x0() @property def x1(self) -> Optional[np.ndarray]: """ kept for compatibility """ return self.get_x1()
[docs] def notice(self, x0lo: Optional[float] = None, x0hi: Optional[float] = None, x1lo: Optional[float] = None, x1hi: Optional[float] = None, ignore: bool = False ) -> None: Data.notice(self, (x0lo, x1lo), (x0hi, x1hi), ignore=ignore)
[docs] class Data2DInt(Data2D): '''2-D integrated data set This class represents a 2D data set. It is desigend to work with flattened arrays for coordinates and independent variables, which makes it easy to deal with filters and sparse or irregularly-placed grids. Of course, the same structure can also be used for regularly-gridded data, it just has to be passed in as a flattened array. However, in this case the more specialiazed `~sherpa.astro.data.DataIMGInt` class might be more appropriate. Parameters ---------- name : string name of this dataset x0lo : array-like Lower bounds of the bins in the first dimension of the independent coordinate x1lo : array-like Lower bound of the bins in the second dimension of the independent coordinate x0hi : array-like Upper bound of the bins in the first dimension of the independent coordinate x1hi : array-like Upper bound of the bins in the second dimension of the independent coordinate y : array-like The values of the dependent observable. If this is a numpy masked array, the mask will be used to initialize a mask. shape : tuple Shape of the data grid for regularly gridded data (optional). This is used return the data as an image e.g. for display, but it not needed for fitting and modelling. For irregularly gridded data, shape must be `None`. staterror : array-like the statistical error associated with the data syserror : array-like the systematic error associated with the data Examples -------- Create an irregularly-gridded 2D dataset that has measurements in three areas of different size. The first is in a box of size 10 centered on (-15, -20), the second is a small rectangle centered on (0, 0), and the third another rectangle. The measured data values in these areas are 12, 15 and 23. >>> x0lo = [-20, -1, 10] >>> x1lo = [-25, -1, -10] >>> x0hi = [-10, 1, 15] >>> x1hi = [-10, 1, -8] >>> y = [12, 15, 23] >>> three_rect = Data2DInt("irregular2d_binned", x0lo=x0lo, x1lo=x1lo, ... x0hi=x0hi, x1hi=x1hi, y=y) The regular version of this might represent a binned image, here with square pixels. In this example, we create ``x1, x0`` arrays that represent the pixel center, but need to pass in the pixel edges to make the data object:: >>> import numpy as np >>> x1, x0 = np.mgrid[20:30, 5:20] >>> datashape = x0.shape >>> y = np.sqrt((x0 - 10)**2 + (x1 - 31)**2) >>> x0 = x0.flatten() >>> x1 = x1.flatten() >>> y = y.flatten() >>> image = Data2DInt("binned_image", x0lo=x0 - 0.5, x1lo=x1 - 0.5, ... x0hi=x0 + 0.5, x1hi=x1 + 0.5, y=y, shape=datashape) .. note:: Sherpa provides the `~sherpa.astro.data.DataIMGInt` class to make it easier to work with regularly-gridded data. ''' _fields: FieldsType = ("name", "x0lo", "x1lo", "x0hi", "x1hi", "y", "staterror", "syserror") _extra_fields: FieldsType = ("shape", ) def __init__(self, name: str, x0lo: Optional[ArrayType], x1lo: Optional[ArrayType], x0hi: Optional[ArrayType], x1hi: Optional[ArrayType], y: Optional[ArrayType], shape: Optional[Sequence[int]] = None, staterror: Optional[ArrayType] = None, syserror: Optional[ArrayType] = None ) -> None: # Note: we do not call the superclass here. self.shape = shape self._x0label = 'x0' self._x1label = 'x1' self._ylabel = 'y' Data.__init__(self, name, (x0lo, x1lo, x0hi, x1hi), y, staterror, syserror) def _init_data_space(self, filter: Filter, *data: ArrayType ) -> IntegratedDataSpace2D: ndata = len(data) if ndata != 4: raise DataErr("wrongaxiscount", self.name, 4, ndata) ds = IntegratedDataSpace2D(filter, *data) self._check_data_space(ds) return ds
[docs] def get_x0(self, filter: bool = False) -> Optional[np.ndarray]: if self.size is None: return None indep = self._data_space.get(filter) return (indep.x0lo + indep.x0hi) / 2.0
[docs] def get_x1(self, filter=False) -> Optional[np.ndarray]: if self.size is None: return None indep = self._data_space.get(filter) return (indep.x1lo + indep.x1hi) / 2.0
[docs] def notice(self, x0lo: Optional[float] = None, x0hi: Optional[float] = None, x1lo: Optional[float] = None, x1hi: Optional[float] = None, ignore: bool = False ) -> None: Data.notice(self, (None, None, x0lo, x1lo), (x0hi, x1hi, None, None), ignore=ignore, integrated=True)
@property def x0lo(self) -> Optional[np.ndarray]: """ Property kept for compatibility """ return self._data_space.x0lo @property def x0hi(self) -> Optional[np.ndarray]: """ Property kept for compatibility """ return self._data_space.x0hi @property def x1lo(self) -> Optional[np.ndarray]: """ Property kept for compatibility """ return self._data_space.x1lo @property def x1hi(self) -> Optional[np.ndarray]: """ Property kept for compatibility """ return self._data_space.x1hi
# Notebook representations # def html_data1d(data: Data1D) -> str: """HTML representation: Data1D If have matplotlib then plot the data, otherwise summarize it. """ from sherpa import plot dtype = type(data).__name__ plotter = plot.DataPlot() plotter.prepare(data) summary = f'{dtype} Plot' try: out = plot.backend.as_html_plot(plotter, summary) except AttributeError: out = None if out is not None: return formatting.html_from_sections(data, [out]) # Summary properties # meta: list[tuple[str, Any]] = [] if data.name is not None and data.name != '': meta.append(('Identifier', data.name)) meta.append(('Number of bins', len(data.x))) # Should this only be displayed if a filter has been applied? # fexpr = data.get_filter_expr() nbins = data.get_dep(filter=True).size meta.append(('Using', f'{fexpr} with {nbins} bins')) # Rely on the _fields ordering, ending at staterror. This # ignores _extra_fields. # for f in data._fields[1:]: if f == 'staterror': break meta.append((f.upper(), getattr(data, f))) if data.staterror is not None: meta.append(('Statistical error', data.staterror)) if data.syserror is not None: meta.append(('Systematic error', data.syserror)) ls = [formatting.html_section(meta, summary=f'{dtype} Summary', open_block=True)] return formatting.html_from_sections(data, ls) def html_data1dint(data: Data1DInt) -> str: """HTML representation: Data1DInt If have matplotlib then plot the data, otherwise summarize it. """ from sherpa import plot dtype = type(data).__name__ plotter = plot.DataHistogramPlot() plotter.prepare(data) summary = f'{dtype} Plot' try: out = plot.backend.as_html_plot(plotter, summary) except AttributeError: out = None if out is not None: return formatting.html_from_sections(data, [out]) # Summary properties # meta: list[tuple[str, Any]] = [] if data.name is not None and data.name != '': meta.append(('Identifier', data.name)) meta.append(('Number of bins', len(data.xlo))) # Should this only be displayed if a filter has been applied? # fexpr = data.get_filter_expr() nbins = data.get_dep(filter=True).size meta.append(('Using', f'{fexpr} with {nbins} bins')) # Rely on the _fields ordering, ending at staterror for f in data._fields[1:]: if f == 'staterror': break meta.append((f.upper(), getattr(data, f))) if data.staterror is not None: meta.append(('Statistical error', data.staterror)) if data.syserror is not None: meta.append(('Systematic error', data.syserror)) ls = [formatting.html_section(meta, summary=f'{dtype} Summary', open_block=True)] return formatting.html_from_sections(data, ls) def html_data2d(data: Data2D) -> str: """HTML representation: Data2D and derived classes """ dtype = type(data).__name__ # It would be nice to plot the plot, but there are several questions to # resolve, such as: # # - do we plot each point (okay for sparse data) or binned # - simple binning, adaptive binning, hexagonal binning? # - do we just pick a number, like 100, to bin the data to # Summary properties # meta: list[tuple[str, Any]] = [] if data.name is not None and data.name != '': meta.append(('Identifier', data.name)) # NOTE: shape is not well defined, is it x by y or # the inverse, so I am not going to display it at the # moment. # if data.shape != None: # meta.append(('Shape', data.shape)) meta.append(('Number of bins', len(data.y))) # Rely on the _fields ordering, ending at shape for f in data._fields[1:]: if f == 'shape': break meta.append((f.upper(), getattr(data, f))) if data.staterror is not None: meta.append(('Statistical error', data.staterror)) if data.syserror is not None: meta.append(('Systematic error', data.syserror)) ls = [formatting.html_section(meta, summary=f'{dtype} Summary', open_block=True)] return formatting.html_from_sections(data, ls)