Source code for perfectns.estimators

#!/usr/bin/env python
"""
Contains classes representing quantities which can be calculated from nested
sampling run.

Each estimator class should contain a mandatory member function returning the
value of the estimator for a nested sampling run:

    def __call__(self, ns_run, logw=None, simulate=False):
        ...

This allows logw to be provided if many estimators are being calculated from
the same run so logw is only calculated once. Otherwise logw is calculated from
the run if required.

They may also optionally contain a function giving its analytical value for
some given set of calculation settings (for use in checking results):

    def analytical(self, settings):
        ...

as well as helper functions.
Estimators should also contain class variables:

    name: str
        used for results tables.
    latex_name: str
        used for plotting results diagrams.

"""

import functools
import numpy as np
import scipy
import perfectns.maths_functions as mf
import nestcheck.ns_run_utils
import nestcheck.estimators


# Estimators
# ----------

[docs]class EstimatorBase(object): """Base class for estimators.""" def __init__(self, func, **kwargs): """Set up estimator object, including making latex name and saving kwargs. Parameters ---------- func: function kwargs: dict, optional Saved keyword arguments for function. """ if kwargs: self.func = functools.partial(func, **kwargs) else: self.func = func self.latex_name = nestcheck.estimators.get_latex_name(func, **kwargs) def __call__(self, *args, **kwargs): """Returns estimator value for run.""" return self.func(*args, **kwargs)
[docs]class LogZ(EstimatorBase): """Natural log of Bayesian evidence.""" def __init__(self): """See EstimatorBase __init__ docstring.""" EstimatorBase.__init__(self, nestcheck.estimators.logz)
[docs] @staticmethod def analytical(settings): """Returns analytical value of estimator given settings.""" return settings.logz_analytic()
[docs]class Z(EstimatorBase): """Bayesian evidence.""" def __init__(self): """See EstimatorBase __init__ docstring.""" EstimatorBase.__init__(self, nestcheck.estimators.evidence)
[docs] @staticmethod def analytical(settings): """Returns analytical value of estimator given settings.""" return np.exp(settings.logz_analytic())
[docs]class CountSamples(EstimatorBase): """Number of samples in run.""" def __init__(self): """See EstimatorBase __init__ docstring.""" EstimatorBase.__init__(self, nestcheck.estimators.count_samples)
[docs]class ParamMean(EstimatorBase): """ Mean of a single parameter (single component of theta). By symmetry all parameters have the same distribution. """ def __init__(self, param_ind=0): """See EstimatorBase __init__ docstring.""" EstimatorBase.__init__(self, nestcheck.estimators.param_mean, param_ind=param_ind)
[docs] @staticmethod def analytical(settings): """Returns analytical value of estimator given settings.""" return 0.
[docs] @staticmethod def ftilde(logx, settings): """ Helper function for finding the analytic value of the estimator. See "check_by_integrating" docstring for more details. ftilde(X) is mean of f(theta) on the iso-likelihood contour L(theta) = L(X). """ return np.zeros(logx.shape)
[docs]class ParamCred(EstimatorBase): """ One-tailed credible interval on the value of a single parameter (component of theta). By symmetry all parameters have the same distribution. """ def __init__(self, probability, param_ind=0): """See EstimatorBase __init__ docstring.""" self.probability = probability EstimatorBase.__init__(self, nestcheck.estimators.param_cred, probability=probability, param_ind=param_ind)
[docs] def analytical(self, settings): """Returns analytical value of estimator given settings.""" if self.probability == 0.5: # by symmetry the median of any parameter given spherically # symmetric likelihoods and priors co-centred on zero is zero return 0 else: assert type(settings.likelihood).__name__ == 'Gaussian', \ "so far only set up for Gaussian likelihoods" assert type(settings.prior).__name__ in ['Gaussian', 'GaussianCached'], \ "so far only set up for Gaussian priors" # the product of two Gaussians is another Gaussian with sigma: sigma = ((settings.likelihood.likelihood_scale ** -2) + (settings.prior.prior_scale ** -2)) ** -0.5 # find numCer of sigma from the mean by inverting the CDF of the # normal distribution. # CDF(x) = (1/2) + (1/2) * error_function(x / sqrt(2)) zscore = (scipy.special.erfinv((self.probability * 2) - 1) * np.sqrt(2)) return zscore * sigma
[docs]class ParamSquaredMean(EstimatorBase): """ Mean of the square of single parameter (second moment of its posterior distribution). By symmetry all parameters have the same distribution. """ min_value = 0 def __init__(self, param_ind=0): """See EstimatorBase __init__ docstring.""" EstimatorBase.__init__(self, nestcheck.estimators.param_squared_mean, param_ind=param_ind)
[docs] @staticmethod def ftilde(logx, settings): """ Helper function for finding the analytic value of the estimator. See "check_by_integrating" docstring for more details. ftilde(X) is mean of f(theta) on the iso-likelihood contour L(theta) = L(X). """ # by symmetry at each (hyper)spherical iso-likelihood contour: r = settings.r_given_logx(logx) return r ** 2 / settings.n_dim
[docs] def analytical(self, settings): """Returns analytical value of estimator given settings.""" return check_by_integrating(self.ftilde, settings)
[docs]class RMean(EstimatorBase): """Mean of |theta| (the radial distance from the centre).""" min_value = 0 def __init__(self, from_theta=False): """See EstimatorBase __init__ docstring.""" EstimatorBase.__init__(self, nestcheck.estimators.r_mean) self.from_theta = from_theta def __call__(self, ns_run, logw=None, simulate=False): """ Overwrite __call__ from nestcheck r_mean to allow use of perfectns' 'r' key if from_theta=False, and to check all the dimensions have been sampled if from_theta=True. """ if self.from_theta: # If run contains a dims_to_sample setting, check that samples from # every dimension are included assert (ns_run['settings']['dims_to_sample'] == ns_run['settings']['n_dim']), "Cannot work out radius!" return self.func(ns_run, logw=logw, simulate=simulate) else: if logw is None: logw = nestcheck.ns_run_utils.get_logw( ns_run, simulate=simulate) w_relative = np.exp(logw - logw.max()) r = ns_run['r'] return np.sum(w_relative * r) / np.sum(w_relative)
[docs] def analytical(self, settings): """Returns analytical value of estimator given settings.""" return check_by_integrating(self.ftilde, settings)
[docs] @staticmethod def ftilde(logx, settings): """ Helper function for finding the analytic value of the estimator. See "check_by_integrating" docstring for more details. ftilde(X) is mean of f(theta) on the iso-likelihood contour L(theta) = L(X). """ return settings.r_given_logx(logx)
[docs]class RCred(EstimatorBase): """One-tailed credible interval on the value of |theta|.""" min_value = 0 def __init__(self, probability, from_theta=False): """See EstimatorBase __init__ docstring.""" self.probability = probability EstimatorBase.__init__(self, nestcheck.estimators.r_cred, probability=probability) self.from_theta = from_theta def __call__(self, ns_run, logw=None, simulate=False): """Returns estimator value for run.""" if self.from_theta: # If run contains a dims_to_sample setting, check that samples from # every dimension are included assert (ns_run['settings']['dims_to_sample'] == ns_run['settings']['n_dim']), "Cannot work out radius!" return self.func(ns_run, logw=logw, simulate=simulate, probability=self.probability) else: if logw is None: logw = nestcheck.ns_run_utils.get_logw( ns_run, simulate=simulate) # get sorted array of r values with their posterior weight wr = np.zeros((logw.shape[0], 2)) wr[:, 0] = np.exp(logw - logw.max()) wr[:, 1] = ns_run['r'] wr = wr[np.argsort(wr[:, 1], axis=0)] # calculate cumulative distribution function (cdf) # Adjust by subtracting 0.5 * weight of first point to correct skew # - otherwise we need cdf=1 to return the last value but will # return the smallest value if cdf<the fractional weight of the # first point. # This should not much matter as typically points' relative weights # will be very small compared to self.probability or # 1-self.probability. cdf = np.cumsum(wr[:, 0]) - (wr[0, 0] / 2) cdf /= np.sum(wr[:, 0]) # calculate cdf # linearly interpolate value return np.interp(self.probability, cdf, wr[:, 1])
# Functions for checking estimator results # ----------------------------------------
[docs]def get_true_estimator_values(estimators, settings): """ Calculates analytic values for estimators given the likelihood and prior in settings. If there is no method for calculating the values then np.nan is returned. Parameters ---------- estimators: estimator object or list of estimator objects settings: PerfectNSSettings object Returns ------- output: np.array of size (len(estimators),) if estimators is a list, float otherwise. """ if isinstance(estimators, list): output = np.zeros(len(estimators)) for i, est in enumerate(estimators): try: output[i] = est.analytical(settings) except (AttributeError, AssertionError): output[i] = np.nan return output else: try: return estimators.analytical(settings) except (AttributeError, AssertionError): return np.nan
[docs]def check_by_integrating(ftilde, settings): """ Return the true value of the estimator using numerical integration. Chopin and Robert (2010) show that the expectation of some function f(theta) is given by the integral int L(X) X ftilde(X) dX / Z, where ftilde(X) is mean of f(theta) on the iso-likelihood contour L(theta) = L(X). Parameters ---------- ftilde: function settings: PerfectNSSettings object Returns ------- float The estimator's true value. """ logx_terminate = mf.analytic_logx_terminate(settings) assert logx_terminate is not None, \ 'logx_terminate function not set up for current settings' result = scipy.integrate.quad(check_integrand, logx_terminate, 0.0, args=(ftilde, settings)) return result[0] / np.exp(settings.logz_analytic())
[docs]def check_integrand(logx, ftilde, settings): """Helper function to return integrand L(X) X ftilde(X) for checking estimator values by numerical integration. Note that the integral must be normalised by multiplying by a factor of (1/Z). Parameters ---------- logx: 1d numpy array Values on which to evaluate integrand. ftilde: function See check_by_integrating docstring for more details. settings: PerfectNSSettings object Returns ------- 1d numpy array Integrand evaluated at input logx coordinates. """ # returns L(X) X ftilde(X) for integrating dlogx return (np.exp(settings.logl_given_logx(logx) + logx) * ftilde(logx, settings))