#
# Copyright (C) 2007, 2015, 2016, 2019, 2020, 2021
# Smithsonian Astrophysical Observatory
#
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
import os
import numpy
from sherpa.utils import SherpaFloat, get_num_args, is_binary_file
from sherpa.utils.err import IOErr
from sherpa.data import Data1D, BaseData
__all__ = ('read_data', 'write_data', 'get_ascii_data', 'read_arrays',
'write_arrays')
def _is_subclass(t1, t2):
return isinstance(t1, type) and issubclass(t1, t2) and (t1 is not t2)
def _check_args(size, dstype):
# Find the number of required args minus self, filename
req_args = get_num_args(dstype.__init__)[1] - 2
if size < req_args:
# raise IOErr('badargs', dstype.__name__, req_args)
raise TypeError(f"data set '{dstype.__name__}' takes at least {req_args} args")
def read_file_data(filename, sep=' ', comment='#', require_floats=True):
bad_chars = '\t\n\r,;: |'
raw_names = []
rows = []
with open(filename, 'r') as fh:
for line in fh:
for char in bad_chars:
if char in line:
# replace any bad chars in line with sep for tokenize
line = line.replace(char, sep)
line = line.strip()
# look for last commented line before data
if len(line) > 0 and line[0] == comment:
# Slice off the comment
# TODO: why is this not just `line = line[1:]`?
line = line.replace(comment, ' ')
raw_names = line.strip().split(sep)
elif line == '':
continue
else:
# split line at sep
elems = line.strip().split(sep)
row = [elem for elem in elems if elem != '']
# make list of row elements
rows.append(row)
# rotate rows into list of columns
cols = numpy.column_stack(rows)
# cast columns to appropriate type
args = []
for col in cols:
try:
args.append(col.astype(SherpaFloat))
except ValueError:
if require_floats:
raise ValueError(f"The file {filename} could not " +
"be loaded, probably because it contained " +
"spurious data and/or strings")
args.append(col)
names = [name.strip(bad_chars) for name in raw_names if name != '']
if len(names) == 0:
names = ['col%i' % (i + 1) for i in range(len(args))]
return names, args
def get_column_data(*args):
"""
get_column_data( *NumPy_args )
"""
if len(args) == 0:
raise IOErr('noarrays')
cols = []
for arg in args:
if arg is None or isinstance(arg, (numpy.ndarray, list, tuple)):
vals = arg
else:
raise IOErr('badarray', arg)
if arg is not None:
vals = numpy.asanyarray(vals)
for col in numpy.atleast_2d(vals.T):
cols.append(col)
else:
cols.append(vals)
return cols
[docs]def get_ascii_data(filename, ncols=1, colkeys=None, sep=' ', dstype=Data1D,
comment='#', require_floats=True):
r"""Read in columns from an ASCII file.
Parameters
----------
filename : str
The name of the ASCII file to read in.
ncols : int, optional
The number of columns to read in (the first ``ncols`` columns
in the file). This is ignored if ``colkeys`` is given.
colkeys : array of str, optional
An array of the column name to read in. The default is
`None`.
sep : str, optional
The separator character. The default is ``' '``.
dstype : data class to use, optional
Used to check that the data file contains enough columns.
comment : str, optional
The comment character. The default is ``'#'``.
require_floats : bool, optional
If `True` (the default), non-numeric data values will
raise a `ValueError`.
Returns
-------
(colnames, coldata, filename)
The column names read in, the data for the columns
as an array, with each element being the data for the column
(the order matches ``colnames``), and the name of the file.
Raises
------
sherpa.utils.err.IOErr
Raised if a requested column is missing or the file appears
to be a binary file.
ValueError
If a column value can not be converted into a numeric value
and the `require_floats` parameter is `True`.
See Also
--------
read_arrays, read_data, write_arrays, write_data
Notes
-----
The file is processed by reading in each line, stripping out any
unsupported characters (replacing them by the ``sep`` argument),
skipping empty lines, and then identifying comment and data lines.
The list of unsupported characters are: ``\t``, ``\n``,
``\r``, comma, semi-colon, colon, space, and ``|``.
The last comment line before the data is used to define the
column names, splitting the line by the ``sep`` argument.
If there are no comment lines then the columns are named
starting at ``col1``, ``col2``, up to the number of columns.
Data lines are separated into columns - splitting by the
``sep`` comment - and then converted to NumPy arrays.
If the ``require_floats`` argument is `True` then the
column will be converted to a floating-point number
type, with an error raised if this fails.
An error is raised if the number of columns per row
is not constant.
If the ``colkeys`` argument is used then a case-sensitive
match is used to determine what columns to return.
Examples
--------
Read in the first column from the file:
>>> (colnames, coldata, fname) = get_ascii_data('src.dat')
Read in the first three columns from the file:
>>> colinfo = get_ascii_data('src.dat', ncols=3)
Read in a histogram data set, using the columns XLO, XHI,
and Y:
>>> cols = ['XLO', 'XHI', 'Y']
>>> res = get_ascii_data('hist.dat', colkeys=cols,
dstype=sherpa.data.Data1DInt)
Read in the first and third column from the file cols.dat,
where the file has no header information:
>>> res = get_ascii_data('cols.dat', colkeys=['col1', 'col3'])
"""
if is_binary_file(filename):
raise IOErr('notascii', filename)
names, args = read_file_data(filename, sep, comment, require_floats)
if colkeys is None:
kwargs = []
if ncols != 1:
_check_args(ncols, dstype)
kwargs.extend(args[:ncols])
return (names, kwargs, filename)
kwargs = []
colkeys = list(colkeys)
if len(names) > len(args):
raise IOErr('wrongnumcols', len(args), len(names))
assert(len(names) <= len(args))
for key in colkeys:
if key not in names:
raise IOErr('reqcol', key, numpy.asarray(names, numpy.string_))
kwargs.append(args[names.index(key)])
_check_args(len(kwargs), dstype)
return (colkeys, kwargs, filename)
[docs]def read_data(filename, ncols=2, colkeys=None, sep=' ', dstype=Data1D,
comment='#', require_floats=True):
"""Create a data object from an ASCII file.
Parameters
----------
filename : str
The name of the ASCII file to read in.
ncols : int, optional
The number of columns to read in (the first ``ncols`` columns
in the file). This is ignored if `colkeys` is given.
colkeys : array of str, optional
An array of the column name to read in. The default is
`None`.
sep : str, optional
The separator character. The default is ``' '``.
dstype : data class to use, optional
The class of the data object to create.
comment : str, optional
The comment character. The default is ``'#'``.
require_floats : bool, optional
If `True` (the default), non-numeric data values will
raise a `ValueError`.
Returns
-------
data
The data object (created by calling the dstype constructor
with the filename and then the data columns from the file).
Raises
------
sherpa.utils.err.IOErr
Raised if a requested column is missing or the file appears
to be a binary file.
ValueError
If a column value can not be converted into a numeric value
and the `require_floats` parameter is True.
See Also
--------
get_ascii_data, read_arrays, write_data
Notes
-----
The file format is described in `get_ascii_data`.
Examples
--------
Create a 1D data object from the first two columns in the file:
>>> dat = read_data('src.dat')
Use the third column as the error column (statistical):
>>> dat = read_data('src.dat', ncols=3)
Read in a histogram data set, using the columns XLO, XHI,
and Y:
>>> cols = ['XLO', 'XHI', 'Y']
>>> dat = read_data('hist.dat', colkeys=cols,
dstype=sherpa.data.Data1DInt)
Use the first and third column from the file cols.dat,
where the file has no header information:
>>> dat = read_data('cols.dat', colkeys=['col1', 'col3'])
"""
colnames, args, name = get_ascii_data(filename, ncols, colkeys,
sep, dstype, comment, require_floats)
return dstype(name, *args)
[docs]def read_arrays(*args):
"""Create a data object from arrays.
Parameters
----------
col1, ... coln : array_like
The data columns.
dstype : optional
The data type to create. It must be a subclass of
`sherpa.data.BaseData` and defaults to `sherpa.data.Data1D`
Returns
-------
data
The data object (created by calling the dstype constructor
with the filename and then the data columns from the file).
Raises
------
sherpa.utils.err.IOErr
Raised if no arrays are sent in.
See Also
--------
get_ascii_data, write_arrays
Examples
--------
Create a 1D data object from the x and y arrays:
>>> dat = read_arrays(x, y)
Include a statistical error column:
>>> dat = read_arrays(x, y, dy)
Create an integrated (i.e. histogram) data set:
>>> dat = read_arrays(xlo, xhi, y, dstype=sherpa.data.Data1DInt)
"""
args = list(args)
if len(args) == 0:
raise IOErr('noarrays')
dstype = Data1D
if _is_subclass(args[-1], BaseData):
dstype = args.pop()
args = get_column_data(*args)
# Determine max number of args for dataset constructor
_check_args(len(args), dstype)
return dstype('', *args)
[docs]def write_arrays(filename, args, fields=None, sep=' ', comment='#',
clobber=False, linebreak='\n', format='%g'):
"""Write a list of arrays to an ASCII file.
Parameters
----------
filename : str
The name of the file to write the array to.
args : array_like
The arrays to write out.
fields : array_like of str
The column names (should match the size of `args` if given).
sep : str, optional
The separator character. The default is ``' '``.
comment : str, optional
The comment character. The default is ``'#'``. This is only used
to write out the column names when `fields` is not `None`.
clobber : bool, optional
If `filename` is not `None`, then this flag controls
whether an existing file can be overwritten (``True``)
or if it raises an exception (``False``, the default
setting).
linebreak : str, optional
Indicate a new line. The default is ``'\\n'``.
format : str, optional
The format used to write out the numeric values. The
default is ``'%g%'``.
Raises
------
sherpa.utils.err.IOErr
If `filename` already exists and `clobber` is `False`
or if there is no data to write.
See Also
--------
get_ascii_data
Examples
--------
Write the x and y arrays to the file 'src.dat':
>>> write_arrays('src.dat', [x, y])
Use the column names "r" and "surbri" for the columns:
>>> write_arrays('prof.txt', [x, y], fields=["r", "surbri"],
clobber=True)
"""
if os.path.isfile(filename) and not clobber:
raise IOErr("filefound", filename)
if not numpy.iterable(args) or len(args) == 0:
raise IOErr('noarrayswrite')
if not numpy.iterable(args[0]):
raise IOErr('noarrayswrite')
size = len(args[0])
for arg in args:
if not numpy.iterable(arg):
raise IOErr('noarrayswrite')
elif len(arg) != size:
raise IOErr('arraysnoteq')
args = numpy.column_stack(numpy.asarray(args))
lines = []
for arg in args:
line = [format % elem for elem in arg]
lines.append(sep.join(line))
with open(filename, 'w') as fh:
if fields is not None:
fh.write(comment + sep.join(fields) + linebreak)
fh.write(linebreak.join(lines))
# add a newline at end
fh.write(linebreak)
[docs]def write_data(filename, dataset, fields=None, sep=' ', comment='#',
clobber=False, linebreak='\n', format='%g'):
"""Write out a dataset as an ASCII file.
Parameters
----------
filename : str
The name of the file to write the array to.
dataset :
The data object to write out.
fields : array_like of str
The column names (should match the size of ``args`` if given).
Any unknown columns are skipped. If not given then the field
names from the data set will be used (for those columns which
contain data).
sep : str, optional
The separator character. The default is ``' '``.
comment : str, optional
The comment character. The default is ``'#'``. This is used to
write out the column names (after converting to upper case)
before the data.
clobber : bool, optional
If `filename` is not `None`, then this flag controls
whether an existing file can be overwritten (`True`)
or if it raises an exception (`False`, the default
setting).
linebreak : str, optional
Indicate a new line. The default is ``'\\n'``.
format : str, optional
The format used to write out the numeric values. The
default is ``'%g%'``.
Raises
------
sherpa.utils.err.IOErr
If `filename` already exists and `clobber` is `False`
or if there is no data to write.
See Also
--------
get_ascii_data, read_data
Examples
--------
Write the x and y arrays to the file 'src.dat':
>>> write_data('src.dat', dat)
"""
if fields is None:
fields = dataset._fields
cols = []
col_names = []
for name in fields:
field = getattr(dataset, name, None)
if field is not None and name != 'name':
col_names.append(name.upper())
cols.append(field)
write_arrays(filename, cols, col_names, sep, comment, clobber,
linebreak, format)