Source code for sherpa.astro.io

from __future__ import absolute_import
#
#  Copyright (C) 2007, 2015, 2016, 2017, 2018  Smithsonian Astrophysical Observatory
#
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
from sherpa.astro.utils import reshape_2d_arrays

"""
Provide Astronomy-specific I/O routines for Sherpa.

This module contains read and write routines for handling FITS [1]_
and ASCII format data. The actual support is provided by
the selected I/O backend package (currently Crates, provided
by CIAO, or the FITS support in the AstroPy package, if installed).

This module should not be imported directly if there is no
I/O backend available.

References
----------

.. [1] Flexible Image Transport System, https://en.wikipedia.org/wiki/FITS

"""

from six.moves import zip as izip
from six.moves.configparser import ConfigParser

import logging
import os
import os.path
import sys
import numpy
import sherpa.io
from sherpa.utils.err import IOErr
from sherpa.utils import SherpaFloat
from sherpa.data import Data2D, Data1D, BaseData, Data2DInt
from sherpa.astro.data import DataIMG, DataIMGInt, DataARF, DataRMF, DataPHA, DataRosatRMF
from sherpa import get_config

import importlib

config = ConfigParser()
config.read(get_config())
io_opt = config.get('options', 'io_pkg')
io_opt = str(io_opt).strip().lower()

# Python 3 allows the following (although should move to the dictionary
# style provided by the mapping protocal access.
#
# ogip_emin = config.get('ogip', 'minimum_energy', fallback='1.0e-10')
if config.has_option('ogip', 'minimum_energy'):
    ogip_emin = config.get('ogip', 'minimum_energy')
else:
    # The original version of minimum_energy is 1e-10 keV, so use it as
    # the default value.
    ogip_emin = '1.0e-10'

if ogip_emin.upper() == 'NONE':
    ogip_emin = None
else:
    emsg = "Invalid value for [ogip] minimum_energy config value; " + \
           "it must be None or a float > 0"
    try:
        ogip_emin = float(ogip_emin)
    except ValueError:
        raise ValueError(emsg)

    if ogip_emin <= 0.0:
        raise ValueError(emsg)

if io_opt.startswith('pycrates') or io_opt.startswith('crates'):
    io_opt = 'crates_backend'

elif io_opt.startswith('pyfits'):
    io_opt = 'pyfits_backend'

try:
    importlib.import_module('.' + io_opt, package='sherpa.astro.io')
    backend = sys.modules['sherpa.astro.io.' + io_opt]
except ImportError:
    raise ImportError("""Cannot import selected FITS I/O backend {}.
    If you are using CIAO, this is most likely an error and you should contact the CIAO helpdesk.
    If you are using Standalone Sherpa, please install astropy."""
                      .format(io_opt))

warning = logging.getLogger(__name__).warning
info    = logging.getLogger(__name__).info


__all__ = ('read_table', 'read_image', 'read_arf', 'read_rmf', 'read_arrays',
           'read_pha', 'write_image', 'write_pha', 'write_table',
           'pack_table', 'pack_image', 'pack_pha', 'read_table_blocks')


def _is_subclass(t1, t2):
    return isinstance(t1, type) and issubclass(t1, t2) and (t1 is not t2)


# Note: write_arrays is not included in __all__, so don't add to the
#       See Also section.
#
[docs]def read_arrays(*args):
    """Create a dataset from multiple arrays.

    The return value defaults to a `sherpa.data.Data1D` instance,
    but this can be changed by supplying the required class
    as the last argument (anything that is derived from
    `sherpa.data.BaseData`).

    Parameters
    ----------
    *args
        There must be at least one argument. The number of
        arguments depends on the data type being read in.
        The supported argument types depends on the I/O backend
        in use (as supported by the ``get_column_data`` routine
        provided by the backend).

    Returns
    -------
    data : a sherpa.data.BaseData derived object

    Examples
    --------
    The following examples do not contain argument types specific to
    a particular I/O backend.

    Create a `sherpa.data.Data1D` instance from the data in the
    arrays ``x`` and ``y`` (taken to be the independent and
    dependent axes respectively):

    >>> d = read_arrays(x, y)

    As in the previous example, but explicitly declaring the data type:

    >>> d = read_arrays(x, y, sherpa.data.Data1D)

    Create a `sherpa.data.Data2D` instance with the independent
    axes ``x0`` and ``x1``, and dependent axis ``y``:

    >>> d = read_arrays(x0, x1, y, sherpa.data.Data2D)

    """
    args = list(args)
    if len(args) == 0:
        raise IOErr('noarrays')

    dstype = Data1D
    if _is_subclass(args[-1], BaseData):
        dstype = args.pop()

    args = backend.get_column_data(*args)

    # Determine max number of args for dataset constructor
    sherpa.io._check_args(len(args), dstype)

    return dstype('', *args)


[docs]def read_table(arg, ncols=2, colkeys=None, dstype=Data1D):
    """Create a dataset from a tabular file.

    The supported file types (e.g. ASCII or FITS) depends on the
    selected I/O backend.

    Parameters
    ----------
    arg
        The name of the file or a representation of the file
        (the type depends on the I/O backend).
    ncols : int, optional
        The first ncols columns are read from the file.
    colkeys : None or list of strings, optional
        If given, select these columns from the file.
    dstype : optional
        The data type to create (it is expected to follow the
        `sherpa.data.BaseData` interface).

    Returns
    -------
    data : a sherpa.data.BaseData derived object

    See Also
    --------
    write_table

    Examples
    --------
    The following examples do not contain argument types specific to
    a particular I/O backend.

    Create a `sherpa.data.Data1D` object from the first two
    columns in the file ``src.fits``:

    >>> d = read_table('src.fits')

    Create A `sherpa.data.Data1DInt` object from the first three
    columns in the file ``src.fits``:

    >>> d = read_table('src.fits', ncols=3, dstype=Data1DInt)

    Create a `sherpa.data.Data1D` data set from the specified
    columns in ``tbl.fits``, where ``WLEN`` is used for the
    independent axis, ``FLUX`` the dependent axis, and
    ``FLUXERR`` for the statistical error on the dependent axis:

    >>> d = read_table('tbl.fits', colkeys=['WLEN', 'FLUX', 'FLUXERR'])

    """
    colnames, cols, name, hdr = backend.get_table_data(arg, ncols, colkeys)

    # Determine max number of args for dataset constructor
    sherpa.io._check_args(len(cols), dstype)

    return dstype(name, *cols)


def read_ascii(filename, ncols=2, colkeys=None, dstype=Data1D, **kwargs):
    """Create a dataset from an ASCII tabular file.

    Parameters
    ----------
    filename : str
        The name of the file or a representation of the file
        (the type depends on the I/O backend).
    ncols : int, optional
        The first ncols columns are read from the file.
    colkeys : None or list of strings, optional
        If given, select these columns from the file.
    dstype : optional
        The data type to create (it is expected to follow the
        `sherpa.data.BaseData` interface).
    **kwargs
        The remaining arguments are passed through to the
        ``get_ascii_data`` routine of the I/O backend. It is
        expected that these include ``sep`` and ``comment``,
        which describe the column separators and indicator of
        a comment line, respectively.

    Returns
    -------
    data : a sherpa.data.BaseData derived object

    Examples
    --------
    The following examples do not contain argument types specific to
    a particular I/O backend.

    Create a `sherpa.data.Data1D` object from the first two
    columns in the file ``src.dat``:

    >>> d = read_ascii('src.dat')

    Create A `sherpa.data.Data1DInt` object from the first three
    columns in the file ``src.dat``:

    >>> d = read_ascii('src.dat', ncols=3, dstype=Data1DInt)

    Create a `sherpa.data.Data1D` data set from the specified
    columns in ``tbl.fits``, where ``WLEN`` is used for the
    independent axis, ``FLUX`` the dependent axis, and
    ``FLUXERR`` for the statistical error on the dependent axis:

    >>> d = read_ascii('tbl.fits', colkeys=['WLEN', 'FLUX', 'FLUXERR'])

    """
    colnames, cols, name = backend.get_ascii_data(filename, ncols=ncols,
                                                  colkeys=colkeys,
                                                  dstype=dstype, **kwargs)

    # Determine max number of args for dataset constructor
    sherpa.io._check_args(len(cols), dstype)

    return dstype(name, *cols)


[docs]def read_image(arg, coord='logical', dstype=DataIMG):
    """Create an image dataset from a file.

    Parameters
    ----------
    arg
        The name of the file or a representation of the file
        (the type depends on the I/O backend).
    coord : {'logical', 'physical', 'world'}, optional
        The type of axis coordinates to use. An error is raised if the
        file does not contain the necessary metadata for the selected
        coordinate system.
    dstype : optional
        The data type to create (it is expected to follow the
        `sherpa.data.BaseData` interface).

    Returns
    -------
    data : a sherpa.data.BaseData derived object

    See Also
    --------
    write_image

    Examples
    --------
    The following examples do not contain argument types specific to
    a particular I/O backend.

    Create a `sherpa.astro.data.DataIMG` object from the FITS file
    ``img.fits``:

    >>> d = read_image('img.fits')

    Select the physical coordinate system from the file:

    >>> d = read_image('img.fits', coord='physical')

    """
    data, filename = backend.get_image_data(arg)
    axlens = data['y'].shape

    x0 = numpy.arange(axlens[1], dtype=SherpaFloat) + 1.
    x1 = numpy.arange(axlens[0], dtype=SherpaFloat) + 1.
    x0, x1 = reshape_2d_arrays(x0, x1)

    data['y'] = data['y'].ravel()
    data['coord'] = coord
    data['shape'] = axlens

    if issubclass(dstype, DataIMGInt):
        dataset = dstype(filename, x0 - 0.5, x1 - 0.5, x0 + 0.5, x1 + 0.5,
                         **data)
    elif issubclass(dstype, Data2DInt):
        for name in ['coord', 'eqpos', 'sky', 'header']:
            data.pop(name, None)
        dataset = dstype(filename, x0 - 0.5, x1 - 0.5, x0 + 0.5, x1 + 0.5,
                         **data)
    else:
        dataset = dstype(filename, x0, x1, **data)

    return dataset


[docs]def read_arf(arg):
    """Create a DataARF object.

    Parameters
    ----------
    arg
        The name of the file or a representation of the file
        (the type depends on the I/O backend) containing the
        ARF data.

    Returns
    -------
    data : sherpa.astro.data.DataARF

    """
    data, filename = backend.get_arf_data(arg)

    # It is unlikely that the backend will set this, but allow
    # it to override the config setting.
    #
    if 'emin' not in data:
        data['ethresh'] = ogip_emin

    return DataARF(filename, **data)


[docs]def read_rmf(arg):
    """Create a DataRMF object.

    Parameters
    ----------
    arg
        The name of the file or a representation of the file
        (the type depends on the I/O backend) containing the
        RMF data.

    Returns
    -------
    data : sherpa.astro.data.DataRMF

    """
    data, filename = backend.get_rmf_data(arg)

    # It is unlikely that the backend will set this, but allow
    # it to override the config setting.
    #
    if 'emin' not in data:
        data['ethresh'] = ogip_emin

    return _rmf_factory(filename, data)


def _rmf_factory(filename, data):
    response_map = {
        'ROSAT': DataRosatRMF,
        'DEFAULT': DataRMF,
    }

    rmf_telescop = data['header'].get('TELESCOP', 'DEFAULT')
    rmf_class = response_map.get(rmf_telescop, DataRMF)

    return rmf_class(filename, **data)


def _read_ancillary(data, key, label, dname,
                    read_func, output_once=True):
    """Read in a file if the keyword is set.

    Parameters
    ----------
    data
        The header information, which behaves like a dictionary.
    key
        The key to look for in data. If not set, or its value compares
        case insensitively to "none" then nothing is read in.
    label : str
        This is used to identify the file being read in. It is only
        used if output_once is set.
    dname : str
        The location of the file containing the metadata. This is prepended
        to  the value read from the data argument if it is not an
        absolute path.
    read_func
        The function used to read in the file: it expects one argument,
        the file to read in, and returns the data object.
    output_once : bool, optional
        If set then the routine uses the Sherpa logger to display
        informational or warning information.

    Returns
    -------
    data : None or a sherpa.data.BaseData derived object

    """

    if not(data[key]) or data[key].lower() == 'none':
        return None

    out = None
    try:
        if os.path.dirname(data[key]) == '':
            data[key] = os.path.join(dname, data[key])

        out = read_func(data[key])
        if output_once:
            info('read {} file {}'.format(label, data[key]))

    except:
        if output_once:
            warning(str(sys.exc_info()[1]))

    return out


[docs]def read_pha(arg, use_errors=False, use_background=False):
    """Create a DataPHA object.

    Parameters
    ----------
    arg
        The name of the file or a representation of the file
        (the type depends on the I/O backend) containing the
        PHA data.
    use_errors : bool, optional
        If the PHA file contains statistical error values for the
        count (or count rate) column, should it be read in. This
        defaults to ``False``.
    use_background : bool, optional
        Should the background PHA data (and optional responses) also
        be read in and associated with the data set?

    Returns
    -------
    data : sherpa.astro.data.DataPHA

    """
    datasets, filename = backend.get_pha_data(arg,
                                              use_background=use_background)
    phasets = []
    output_once = True
    for data in datasets:
        if not use_errors:
            if data['staterror'] is not None or data['syserror'] is not None:
                if data['staterror'] is None:
                    msg = 'systematic'
                elif data['syserror'] is None:
                    msg = 'statistical'
                    if output_once:
                        wmsg = "systematic errors were not found in " + \
                               "file '{}'".format(filename)
                        warning(wmsg)
                else:
                    msg = 'statistical and systematic'
                if output_once:
                    imsg = msg + " errors were found in file " + \
                           "'{}' \nbut not used; ".format(filename) + \
                           "to use them, re-read with use_errors=True"
                    info(imsg)
                data['staterror'] = None
                data['syserror'] = None

        dname = os.path.dirname(filename)
        albl = 'ARF'
        rlbl = 'RMF'
        if use_background:
            albl = albl + ' (background)'
            rlbl = rlbl + ' (background)'

        arf = _read_ancillary(data, 'arffile', albl, dname, read_arf,
                              output_once)
        rmf = _read_ancillary(data, 'rmffile', rlbl, dname, read_rmf,
                              output_once)

        backgrounds = []

        if data['backfile'] and data['backfile'].lower() != 'none':
            try:
                if os.path.dirname(data['backfile']) == '':
                    data['backfile'] = os.path.join(os.path.dirname(filename),
                                                    data['backfile'])

                bkg_datasets = []
                # Do not read backgrounds of backgrounds
                if not use_background:
                    bkg_datasets = read_pha(data['backfile'], use_errors, True)

                    if output_once:
                        info('read background file {}'.format(
                            data['backfile']))

                if numpy.iterable(bkg_datasets):
                    for bkg_dataset in bkg_datasets:
                        if bkg_dataset.get_response() == (None, None) and \
                           rmf is not None:
                            bkg_dataset.set_response(arf, rmf)
                        backgrounds.append(bkg_dataset)
                else:
                    if bkg_datasets.get_response() == (None, None) and \
                       rmf is not None:
                        bkg_datasets.set_response(arf, rmf)
                    backgrounds.append(bkg_datasets)

            except:
                if output_once:
                    warning(str(sys.exc_info()[1]))

        for bkg_type, bscal_type in izip(('background_up', 'background_down'),
                                         ('backscup', 'backscdn')):
            if data[bkg_type] is not None:
                b = DataPHA(filename,
                            channel=data['channel'],
                            counts=data[bkg_type],
                            bin_lo=data['bin_lo'],
                            bin_hi=data['bin_hi'],
                            grouping=data['grouping'],
                            quality=data['quality'],
                            exposure=data['exposure'],
                            backscal=data[bscal_type],
                            header=data['header'])
                b.set_response(arf, rmf)
                if output_once:
                    info("read {} into a dataset from file {}".format(
                        bkg_type, filename))
                backgrounds.append(b)

        for k in ['backfile', 'arffile', 'rmffile', 'backscup', 'backscdn',
                  'background_up', 'background_down']:
            data.pop(k, None)

        pha = DataPHA(filename, **data)
        pha.set_response(arf, rmf)
        for id, b in enumerate(backgrounds):
            if b.grouping is None:
                b.grouping = pha.grouping
                b.grouped = (b.grouping is not None)
            if b.quality is None:
                b.quality = pha.quality
            pha.set_background(b, id + 1)

        # set units *after* bkgs have been set
        pha._set_initial_quantity()
        phasets.append(pha)
        output_once = False

    if len(phasets) == 1:
        phasets = phasets[0]

    return phasets


def _pack_table(dataset):
    names = dataset._fields
    cols = [(name, getattr(dataset, name)) for name in names]
    data = dict(cols)
    return data, names


def _pack_image(dataset):
    if not(isinstance(dataset, Data2D) or
           isinstance(dataset, DataIMG)):
        raise IOErr('notimage', dataset.name)
    data = {}

    header = {}
    if hasattr(dataset, 'header') and type(dataset.header) is dict:
        header = dataset.header.copy()

    data['pixels'] = numpy.asarray(dataset.get_img())
    data['sky'] = getattr(dataset, 'sky', None)
    data['eqpos'] = getattr(dataset, 'eqpos', None)

    return data, header


def _set_keyword(header, label, value):
    """Extract the string form of the value, and store
    the last path element in the header using the label
    key.
    """

    if value is None:
        return

    name = getattr(value, 'name', 'none')
    if name is not None and name.find('/') != -1:
        name = name.split('/')[-1]

    header[label] = name


def _pack_pha(dataset):
    if not isinstance(dataset, DataPHA):
        raise IOErr('notpha', dataset.name)

    data = {}

    arf, rmf = dataset.get_response()
    bkg = dataset.get_background()

    # Header Keys
    header = {}
    if hasattr(dataset, 'header'):  # and type(dataset.header) is dict:
        header = dataset.header.copy()

    header['EXPOSURE'] = getattr(dataset, 'exposure', 'none')

    _set_keyword(header, 'RESPFILE', rmf)
    _set_keyword(header, 'ANCRFILE', arf)
    _set_keyword(header, 'BACKFILE', bkg)

    # Columns
    col_names = ['channel', 'counts', 'stat_err', 'sys_err',
                 'bin_lo', 'bin_hi', 'grouping', 'quality']

    data['channel'] = getattr(dataset, 'channel', None)
    data['counts'] = getattr(dataset, 'counts', None)
    data['stat_err'] = getattr(dataset, 'staterror', None)
    data['sys_err'] = getattr(dataset, 'syserror', None)
    data['bin_lo'] = getattr(dataset, 'bin_lo', None)
    data['bin_hi'] = getattr(dataset, 'bin_hi', None)
    data['grouping'] = getattr(dataset, 'grouping', None)
    data['quality'] = getattr(dataset, 'quality', None)

    backscal = getattr(dataset, 'backscal', None)
    if backscal is not None:
        if numpy.isscalar(backscal):
            header['BACKSCAL'] = backscal
        else:
            data['backscal'] = backscal
            col_names.append('backscal')

    areascal = getattr(dataset, 'areascal', None)
    if areascal is not None:
        if numpy.isscalar(areascal):
            header['AREASCAL'] = areascal
        else:
            data['areascal'] = areascal
            col_names.append('areascal')

    return data, col_names, header


def write_arrays(filename, args, fields=None, ascii=True, clobber=False):
    """Write out a collection of arrays.

    Parameters
    ----------
    filename : str
       The name of the file.
    args
       The data to write out.
    fields : None or list of str, optional
       The names to use for each column. If ``None`` then the names
       default to `col1` ... `colN`.
    ascii : bool, optional
       If `True` use an ASCII format, otherwise a binary format.
    clobber : bool, optional
       If `True` then the output file will be over-written if it
       already exists, otherwise an error is raised.

    See Also
    --------
    read_arrays

    """
    backend.set_arrays(filename, args, fields, ascii=ascii, clobber=clobber)


[docs]def write_table(filename, dataset, ascii=True, clobber=False):
    """Write out a table.

    Parameters
    ----------
    filename : str
       The name of the file.
    dataset
       The data to write out.
    ascii : bool, optional
       If `True` use an ASCII format, otherwise a binary format.
    clobber : bool, optional
       If `True` then the output file will be over-written if it
       already exists, otherwise an error is raised.

    See Also
    --------
    read_table

    """
    data, names = _pack_table(dataset)
    backend.set_table_data(filename, data, names, ascii=ascii, clobber=clobber)


[docs]def write_image(filename, dataset, ascii=True, clobber=False):
    """Write out an image.

    Parameters
    ----------
    filename : str
       The name of the file.
    dataset
       The data to write out.
    ascii : bool, optional
       If `True` use an ASCII format, otherwise a binary format.
    clobber : bool, optional
       If `True` then the output file will be over-written if it
       already exists, otherwise an error is raised.

    See Also
    --------
    read_image

    """
    data, hdr = _pack_image(dataset)
    backend.set_image_data(filename, data, hdr, ascii=ascii, clobber=clobber )


[docs]def write_pha(filename, dataset, ascii=True, clobber=False):
    """Write out a PHA dataset.

    Parameters
    ----------
    filename : str
       The name of the file.
    dataset
       The data to write out.
    ascii : bool, optional
       If `True` use an ASCII format, otherwise a binary format.
    clobber : bool, optional
       If `True` then the output file will be over-written if it
       already exists, otherwise an error is raised.

    See Also
    --------
    read_pha

    """
    data, col_names, hdr = _pack_pha(dataset)
    backend.set_pha_data(filename, data, col_names, hdr, ascii=ascii,
                         clobber=clobber)


[docs]def pack_table(dataset):
    """Convert a Sherpa data object into an I/O item (tabular).

    Parameters
    ----------
    dataset : a sherpa.data.Data1D derived object

    Returns
    -------
    obj
       An object representing the data, the format of which depends
       on the I/O backend.

    See Also
    --------
    pack_image, pack_pha

    Examples
    --------

    >>> d = sherpa.data.Data1D('tmp', [1, 2, 3], [4, 10, 2])
    >>> tbl = pack_table(d)

    """
    data, names = _pack_table(dataset)
    return backend.set_table_data('', data, names, packup=True)


[docs]def pack_image(dataset):
    """Convert a Sherpa data object into an I/O item (image).

    Parameters
    ----------
    dataset : a sherpa.data.Data2D derived object

    Returns
    -------
    obj
       An object representing the data, the format of which depends
       on the I/O backend.

    See Also
    --------
    pack_image, pack_pha

    Examples
    --------

    >>> y, x = np.mgrid[:10, :5]
    >>> z = (x-2)**2 + (y-2)**3
    >>> d = sherpa.data.Data2D('img', x.flatten(), y.flatten(),
                               z.flatten(), shape=z.shape)
    >>> img = pack_image(d)

    """
    data, hdr = _pack_image(dataset)
    return backend.set_image_data('', data, hdr, packup=True)


[docs]def pack_pha(dataset):
    """Convert a Sherpa PHA data object into an I/O item (tabular).

    Parameters
    ----------
    dataset : a sherpa.astro.data.DataPHA derived object

    Returns
    -------
    obj
       An object representing the data, the format of which depends
       on the I/O backend.

    See Also
    --------
    pack_image, pack_table

    """
    data, col_names, hdr = _pack_pha(dataset)
    return backend.set_pha_data('', data, col_names, hdr, packup=True)


[docs]def read_table_blocks(arg, make_copy=False):
    """Return the HDU elements (columns and header) from a FITS table.

    Parameters
    ----------
    arg
       The data file, which can be the name of the file or an object
       representing the opened file (the type of this depends on the I/O
       backend in use).
    make_copy : bool, optional
       This argument is currently unused.

    Returns
    -------
    filename, cols, hdr : str, dict, dict
       The name of the file, the column data, and the header data
       from the HDUs in the file. The keys of the dictionaries are the
       block name and the values are dictionaries, with the keys
       being the column or header name.

    """

    return backend.read_table_blocks(arg, make_copy=make_copy)