Source code for superplot.data_loader

r"""
This module contains code for:

- Opening and processing a \\*.txt data file.
- Opening and processing an \\*.info information file.
- Using the \\*.info file to label the data.
"""

import warnings
import pandas as pd


[docs]def load(info_file, data_file):
    """
    Read data from \\*.info file and \\*.txt file.

    :param data_file: Name of \\*.txt file
    :type data_file: string
    :param info_file: Name of \\*.info file
    :type info_file: string

    :returns: Dictionary with chain's labels and array of data
    :rtype: dict (labels), array (data)
    """
    if not data_file:
        raise RuntimeWarning("Must specify a *.txt data file")

    data = _read_data_file(data_file)
    labels = _read_info_file(info_file)
    _label_chain(data, labels)

    return labels, data


def _read_data_file(file_name, fill=0.):
    """
    Read \\*.txt file into an array.

    :param file_name: Name of \\*.txt file
    :type file_name: string
    :param fill: Fill value for problematic data entries
    :type fill: float

    :returns: Data as an array, with first index as column number
    :rtype: numpy.array
    """

    # Make converters that don't raise exceptions on problematic data entries

    def safe_float(entry):
        """
        :param entry: String from \\*.txt file
        :type entry: str

        :returns: Float of argument
        :rtype: float
        """
        try:
            return float(entry)
        except ValueError:
            warnings.warn("{} filled with {}".format(entry, fill))
            return fill

    with open(file_name) as file_:
        n_cols = len(file_.readline().split())

    converters = dict.fromkeys(range(n_cols), safe_float)

    # Read data into a pandas data-frame
    data_frame = pd.read_csv(file_name,
                             header=None,
                             sep=r"\s+",
                             engine="c",
                             converters=converters,
                             na_filter=False)

    # Transpose data-frame, such that first index is column rather than row
    data_frame = data_frame.transpose()

    # Find array from data-frame
    data_array = data_frame.values.astype('float64')

    return data_array


def _read_info_file(file_name):
    """
    Read labels from a SuperBayeS-style *.info file into a dictionary.

    .. warning::
        SuperBayeS index begins at 1 and misses posterior weight and
        chi-squared. We begin at index 0 and include posterior weight and
        chi-squared. Thus, we add 1 to SuperBayeS indexes.

    :param file_name: Name of *.info file
    :type file_name: string

    :returns: Labels of columns in *.txt file
    :rtype: dict
    """

    # Add posterior weight and chi-squared to labels.
    labels = {0: r'$p_i$',
              1: r'$\chi^2$'
              }

    if file_name is None:
        warnings.warn("No *.info file for labels")
        return labels

    with open(file_name, 'rb') as info_file:

        for line in info_file:

            # Strip leading and trailing whitespace
            line = line.strip()

            # Look for "labX=string"
            if line.startswith("lab"):

                # Strip "lab" from line
                line = line.lstrip("lab")

                # Split line about "=" sign
                words = line.split("=")

                # Read corrected index
                index = int(words[0]) + 1

                # Read name of parameter
                name = str(words[1])

                # Add to dictionary of labels
                labels[index] = name

    return labels


def _label_chain(data, labels):
    r"""
    Check if labels match data. If they don't, add data indicies to the list
    of labels.

    .. warning::
        This alters labels in place.

    :param data: Data chain, to match arguments with
    :type data: numpy.array
    :param info: Labels for data chain
    :type info: dict
    """

    # Label all unlabelled columns with integers
    for index in range(len(data)):
        if not labels.get(index):
            warnings.warn("Labels did not match data. "
                          "Missing labels are integers.")
            labels[index] = str(index)
Source code for superplot.data_loader

SuperGUI

Navigation

Related Topics