Source code for superplot.data_loader

r"""
This module contains code for:

- Opening and processing a \\*.txt data file.
- Opening and processing an \\*.info information file.
- Using the \\*.info file to label the data.
"""

import warnings
import pandas as pd


[docs]def load(info_file, data_file): """ Read data from \\*.info file and \\*.txt file. :param data_file: Name of \\*.txt file :type data_file: string :param info_file: Name of \\*.info file :type info_file: string :returns: Dictionary with chain's labels and array of data :rtype: dict (labels), array (data) """ if not data_file: raise RuntimeWarning("Must specify a *.txt data file") data = _read_data_file(data_file) labels = _read_info_file(info_file) _label_chain(data, labels) return labels, data
def _read_data_file(file_name, fill=0.): """ Read \\*.txt file into an array. :param file_name: Name of \\*.txt file :type file_name: string :param fill: Fill value for problematic data entries :type fill: float :returns: Data as an array, with first index as column number :rtype: numpy.array """ # Make converters that don't raise exceptions on problematic data entries def safe_float(entry): """ :param entry: String from \\*.txt file :type entry: str :returns: Float of argument :rtype: float """ try: return float(entry) except ValueError: warnings.warn("{} filled with {}".format(entry, fill)) return fill with open(file_name) as file_: n_cols = len(file_.readline().split()) converters = dict.fromkeys(range(n_cols), safe_float) # Read data into a pandas data-frame data_frame = pd.read_csv(file_name, header=None, sep=r"\s+", engine="c", converters=converters, na_filter=False) # Transpose data-frame, such that first index is column rather than row data_frame = data_frame.transpose() # Find array from data-frame data_array = data_frame.values.astype('float64') return data_array def _read_info_file(file_name): """ Read labels from a SuperBayeS-style *.info file into a dictionary. .. warning:: SuperBayeS index begins at 1 and misses posterior weight and chi-squared. We begin at index 0 and include posterior weight and chi-squared. Thus, we add 1 to SuperBayeS indexes. :param file_name: Name of *.info file :type file_name: string :returns: Labels of columns in *.txt file :rtype: dict """ # Add posterior weight and chi-squared to labels. labels = {0: r'$p_i$', 1: r'$\chi^2$' } if file_name is None: warnings.warn("No *.info file for labels") return labels with open(file_name, 'rb') as info_file: for line in info_file: # Strip leading and trailing whitespace line = line.strip() # Look for "labX=string" if line.startswith("lab"): # Strip "lab" from line line = line.lstrip("lab") # Split line about "=" sign words = line.split("=") # Read corrected index index = int(words[0]) + 1 # Read name of parameter name = str(words[1]) # Add to dictionary of labels labels[index] = name return labels def _label_chain(data, labels): r""" Check if labels match data. If they don't, add data indicies to the list of labels. .. warning:: This alters labels in place. :param data: Data chain, to match arguments with :type data: numpy.array :param info: Labels for data chain :type info: dict """ # Label all unlabelled columns with integers for index in range(len(data)): if not labels.get(index): warnings.warn("Labels did not match data. " "Missing labels are integers.") labels[index] = str(index)