Source code for perfectns.maths_functions

#!/usr/bin/env python
"""
Mathematical functions.
"""


import numpy as np
import scipy
import scipy.stats
import scipy.special
import scipy.misc
import mpmath


[docs]def gaussian_r_given_logx(logx, sigma, n_dim): """ Returns r coordinate corresponding to logx values for a Gaussian prior with the specificed standard deviation and dimension. This uses scipy.special.gammaincinv and requires exponentiating logx, so numerical errors occur with very low logx values. Parameters ---------- logx: float or numpy array Logx coordinates at which to work out r. sigma: float Standard deviation of Gaussian. n_dim: int Number of dimensions. Returns ------- float or numpy array Radial coordinates corresponding to input log X values. """ exponent = scipy.special.gammaincinv(n_dim / 2., np.exp(logx)) return np.sqrt(2 * exponent * sigma ** 2)
[docs]def gaussian_logx_given_r(r, sigma, n_dim): """ Returns logx coordinate corresponding to r values for a Gaussian prior with the specificed standard deviation and dimension Uses mpmath package for arbitary precision. Parameters ---------- r: float or numpy array Radial coordinates at which to evaluate logx. sigma: float Standard deviation of Gaussian. n_dim: int Number of dimensions. Returns ------- logx: float or numpy array Logx coordinates corresponding to input radial coordinates. """ exponent = 0.5 * (r / sigma) ** 2 if isinstance(r, np.ndarray): # needed to ensure output is numpy array logx = np.zeros(r.shape) for i, expo in enumerate(exponent): logx[i] = float(mpmath.log(mpmath.gammainc(n_dim / 2., a=0, b=expo, regularized=True))) return logx else: return float(mpmath.log(mpmath.gammainc(n_dim / 2., a=0, b=exponent, regularized=True)))
[docs]def analytic_logx_terminate(settings): """ Find logx_terminate analytically by assuming all likelihood at very low X approximately equals the maximum likelihood. This approximation breaks down in very high dimensions. Parameters ---------- settings: PerfectNSSettings object Return ------ float or None """ # use r=0 rather than logx=-np.inf as the latter causes numerical problems logl_max = settings.logl_given_r(0) if settings.logz_analytic() is not None: return logx_terminate_bound(logl_max, settings.termination_fraction, settings.logz_analytic()) else: return None
[docs]def logx_terminate_bound(logl_max, termination_fraction, logz_analytic): """ Find a lower bound logx_terminate analytically by assuming all likelihood at very low X approximately equals the maximum likelihood. This approximation breaks down in very high dimensions and the true logx terminate required will be larger. We want: Z_term = termination_fraction * Z_analytic = int_0^Xterm L(X) dX approx= Xterm L_max, so logx_term = log(termination_fraction) + log(Z_analytic) - logl_max Parameters ---------- logl_max: float Maximum loglikelihood. termination_frac: float Fraction of posterior mass remaining at termination. logz_analytic: float True value of log evidence. Returns ------- float """ return np.log(termination_fraction) + logz_analytic - logl_max
[docs]def sample_nsphere_shells_beta(r, n_dim, n_sample=None): """ Given some 1d numpy array of radial coordinates r, return a numpy array of sample coordinates on the hyperspherical shells with each radial coordinate. Sample single parameters on n_dim-dimensional sphere independently, as as described in Section 3.2 of 'Sampling Errors in nested sampling parameter estimation' (Higson et al., 2018). NB if each parameter is sampled independently and combined into a vector, that vector will not have a magnitude r. The n_sample argument can be used to set the number of parameters for which samples are returned (by symmetry they all have the same distribution). This is useful for saving memory in high dimensions. This function is much quicker than sample_nsphere_shells_normal when n_dim is high and n_sample is low. Parameters ---------- r: 1d numpy array n_dim: int n_sample: int or None, optional Number of parameters to include in output for each sample. Returns ------- thetas: 2d numpy array Each row is a random sample from the shells defined by the corresponding input r coordinate. """ assert isinstance(r, np.ndarray), 'input r must be a numpy array' if n_sample is None: n_sample = n_dim thetas = np.sqrt(np.random.beta(0.5, (n_dim - 1) * 0.5, size=(r.shape[0], n_sample))) # randomly select + or - thetas *= (-1) ** (np.random.randint(0, 2, size=thetas.shape)) # multiply by r thetas *= r[:, None] return thetas
[docs]def sample_nsphere_shells_normal(r, n_dim, n_sample=None): """ Given some 1d numpy array of radial coordinates r, return a numpy array of sample coordinates on the hyperspherical shells with each radial coordinate. This works by using the symmetry of the normal distribution to sample isotropically, then normalising each set of samples to lie on a spherical shell of the correct radius. The n_sample argument can be used to set the number of parameters for which samples are returned (by symmetry they all have the same distribution). This is useful for saving memory in high dimensions. Parameters ---------- r: 1d numpy array n_dim: int n_sample: int or None, optional Number of parameters to include in output for each sample. Returns ------- thetas: 2d numpy array Each row is a random sample from the shells defined by the corresponding input r coordinate. """ if n_sample is None: n_sample = n_dim assert n_sample <= n_dim, 'so far only set up for nsample <= ndim' thetas = np.random.normal(size=(r.shape[0], n_dim)) # calculate normalisation so sum_i(theta_i^2) = r^2 for each row norm = r / np.sqrt(np.sum(thetas ** 2, axis=1)) # only return n_sample columns thetas = thetas[:, :n_sample] # normalise each column thetas *= norm[:, None] return thetas
[docs]def sample_nsphere_shells(r, n_dim, n_sample): """ Wrapper calling either sample_nsphere_shells_normal or sample_nsphere_shells_beta depending on the dimension and number of samples required. See the docstrings of sample_nsphere_shells_normal and sample_nsphere_shells_beta for more information. """ if n_dim >= 100 and n_sample <= 2: return sample_nsphere_shells_beta(r, n_dim, n_sample) else: return sample_nsphere_shells_normal(r, n_dim, n_sample)
[docs]def nsphere_r_given_logx(logx, r_max, n_dim): """ Finds r coordinates given input logx values for a uniform prior within an n-dimensional sphere co-centred with a spherically symmetric likelihood. This will return an answer of the same type (float or numpy array) as the input {logx}. Parameters ---------- logx: float or numpy array r_max: float Boundry of the spherically symmetric prior. n_dim: int Number of dimensions Returns r: float or numpy array Radial coordinates corresponding to logx. """ r = np.exp(logx / n_dim) * r_max return r
[docs]def nsphere_logx_given_r(r, r_max, n_dim): """ Finds logx assuming the prior is an n-dimensional sphere co-centred with a spherically symmetric likelihood. This will return an answer of the same type (float or numpy array) as the input {r}. Parameters ---------- r: float or numpy array r_max: float n_dim: int Returns ------- logx: float or numpy array. Logx coordinates corresponding to input radial coordinates. """ logx = (np.log(r) - np.log(r_max)) * n_dim return logx
[docs]def nsphere_logvol(dim, radius=1.0): """ Returns the natural log of the hypervolume of a unit nsphere of specified dimension. Useful for very high dimensions. Formulae is from https://en.wikipedia.org/wiki/N-sphere#Volume_and_surface_area Parameters ---------- dim: int radius: float, optional Returns ------- float """ return ((np.log(radius) * dim) + (np.log(np.pi) * (dim / 2.0)) - (scipy.special.gammaln((dim / 2.0) + 1.0)))
[docs]def log_gaussian_given_r(r, sigma, n_dim): """ Returns the natural log of a normalised, uncorrelated Gaussian likelihood with equal variance in all n_dim dimensions. Parameters ---------- r: float or numpy array sigma: float n_dim: int Returns ------- logl: float or numpy array Loglikelihood values corresponding to input radial coordinates. """ logl = -0.5 * ((r ** 2) / (sigma ** 2)) # normalise logl -= n_dim * np.log(sigma) logl -= np.log(2 * np.pi) * (n_dim / 2.0) return logl
[docs]def log_exp_power_given_r(r, sigma, n_dim, b=0.5): """ Returns the natural log of an exponential power distribution. This equals a Gaussian distribution when b=1. Parameters ---------- r: float or numpy array sigma: float n_dim: int b: float, optional Returns ------- logl: float or numpy array Loglikelihood values corresponding to input radial coordinates. """ logl = -0.5 * (((r ** 2) / (sigma ** 2)) ** b) # normalise logl += np.log(n_dim) logl += scipy.special.gammaln((n_dim) / 2.0) logl -= np.log(np.pi) * (n_dim / 2.0) logl -= n_dim * np.log(sigma) logl -= np.log(2) * (1 + 0.5 * b) logl -= scipy.special.gammaln(1. + ((n_dim) / (2 * b))) return logl
[docs]def r_given_log_exp_power(logl, sigma, n_dim, b=0.5): """ Returns the natural log of an exponential power distribution. This equals a Gaussian distribution when b=1. Parameters ---------- logl: float or numpy array sigma: float n_dim: int b: float, optional Returns ------- r: float or numpy array Radial coordinates corresponding to input logl values. """ # remove normalisation constant exponent = logl - np.log(n_dim) exponent -= scipy.special.gammaln((n_dim) / 2.0) exponent += np.log(np.pi) * (n_dim / 2.0) exponent += n_dim * np.log(sigma) exponent += np.log(2) * (1 + 0.5 * b) exponent += scipy.special.gammaln(1. + ((n_dim) / (2 * b))) # rearrange exponent = (-2. * exponent) ** (1. / b) logl = np.sqrt(exponent * (sigma ** 2)) return logl
[docs]def r_given_log_gaussian(logl, sigma, n_dim): """ Returns the radius of a given logl for a normalised, uncorrelated Gaussian with equal variance in all n_dim dimensions. Parameters ---------- logl: float or numpy array sigma: float n_dim: int Returns ------- r: float or numpy array Radial coordinates corresponding to input logl values. """ # remove normalisation constant exponent = logl + n_dim * np.log(sigma) exponent += np.log(2 * np.pi) * (n_dim / 2.0) # rearrange r = np.sqrt(-2 * exponent) * sigma return r
[docs]def log_cauchy_given_r(r, sigma, n_dim): """ Returns the natural log of a normalised, uncorrelated Cauchy distribution with 1 degree of freedom. Parameters ---------- r: float or numpy array sigma: float n_dim: int Returns ------- logl: float or numpy array Loglikelihood values corresponding to input radial coordinates. """ logl = (-(1 + n_dim) / 2) * np.log(1 + ((r ** 2) / (sigma ** 2))) logl += scipy.special.gammaln((1.0 + n_dim) / 2.0) logl -= np.log(np.pi) * (n_dim + 1.0) / 2.0 # NB gamma(0.5) = sqrt(pi) logl += (-n_dim) * np.log(sigma) return logl
[docs]def r_given_log_cauchy(logl, sigma, n_dim): """ Returns the radius for a given logl of a normalised, uncorrelated Cauchy distribution with 1 degree of freedom. Parameters ---------- logl: float or numpy array sigma: float n_dim: int Returns ------- r: float or numpy array Radial coordinates corresponding to input logl values. """ exponent = logl # remove normalisation constant exponent -= scipy.special.gammaln((1.0 + n_dim) / 2.0) exponent += np.log(np.pi) * (n_dim + 1.0) / 2.0 # NB gamma(0.5) = sqrt(pi) exponent -= (-n_dim) * np.log(sigma) # remove power exponent /= -(n_dim + 1.0) / 2.0 # rearrange exponent = np.exp(exponent) - 1.0 r_squared = exponent * (sigma ** 2) return np.sqrt(r_squared)