Source code for superplot.statslib.point
"""
===========================
Point Statistical Functions
===========================
This module contains statistical functions that return a single data point.
"""
import numpy as np
from scipy import stats
from patched_joblib import memory
DOCTEST_PRECISION = 10
def _shift(bin_number, nbins):
"""
Modify bin numbers so that bin numbers, initially `[0, nbins + 1]` because
:mod:`numpy` uses extra bins for outliers, match array indices
`[0, nbins - 1]`.
:param bin_number: A bin number
:type bin_number: integer
:param nbins: Total number of bins
:type nbins: integer
:returns: Shifted bin number
:rtype: integer
"""
if bin_number == 0 or bin_number == nbins + 1:
# First deal with outliers in 0 and nbins + 1 bins
bin_number = None
else:
# Subtract one from all bin numbers to shift (1, n_bins) to
# (0, n_bins - 1).
bin_number -= 1
return bin_number
@memory.cache
def posterior_mean(posterior, param):
r"""
Calculate the posterior mean:
.. math::
\hat x \equiv \int p(x) x dx
:param posterior: Data column of posterior weight
:type posterior: numpy.ndarray
:param param: Data column of parameter of interest
:type param: numpy.ndarray
:returns: Posterior mean
:rtype: numpy.float64
:Example:
>>> round(posterior_mean(data[0], data[2]), DOCTEST_PRECISION)
-1965.6810774827
>>> round(posterior_mean(data[0], data[3]), DOCTEST_PRECISION)
72.740677579
"""
# Calculate posterior mean - dot product weights with parameter
# values and normalize.
_posterior_mean = np.dot(posterior, param) / sum(posterior)
return _posterior_mean
@memory.cache
def best_fit(chi_sq, param):
"""
Calculate the best-fit value of a parameter, i.e. the parameter such that
the chi-squared is minimized.
:param chi_sq: Data column of chi-squared
:type chi_sq: numpy.ndarray
:param param: Data column of parameter of interest
:type param: numpy.ndarray
:returns: The best-fit value of a parameter
:rtype: numpy.float64
:Example:
>>> round(best_fit(data[1], data[1]), DOCTEST_PRECISION + 6)
1.6806818041e-06
>>> round(best_fit(data[1], data[2]), DOCTEST_PRECISION)
-1966.9007376503
>>> round(best_fit(data[1], data[3]), DOCTEST_PRECISION)
77.6690218129
"""
# Calculate the best-fit - find the point that corresponds
# to the smallest chi-squared.
_best_fit = param[chi_sq.argmin()]
return _best_fit
[docs]def p_value(chi_sq, dof):
r"""
Calculate the :math:`\textrm{$p$-value}` from a chi-squared distribution:
.. math::
\textrm{$p$-value} \equiv \int_\chi^2^\infty f(x; k) dx
:param chi_sq: Data column of chi-squared
:type chi_sq: numpy.ndarray
:param dof: Number of degrees of freedom
:type dof: integer
:returns: A p-value for the given chi_sq, dof
:rtype: numpy.float64
>>> round(p_value(data[1], 2), DOCTEST_PRECISION)
0.9999991597
"""
# Find the associated p-value. The survival function, sf,
# is 1 - the CDF.
_p_value = stats.chi2.sf(chi_sq.min(), dof)
return _p_value
if __name__ == "__main__":
import doctest
import superplot.data_loader as data_loader
GAUSS = "../example/gaussian_.txt"
GAUSS_DATA = data_loader.load(None, GAUSS)[1]
doctest.testmod(extraglobs={'data': GAUSS_DATA})