Source code for perfectns.maths_functions

#!/usr/bin/env python
"""
Mathematical functions.
"""


import numpy as np
import scipy
import scipy.stats
import scipy.special
import scipy.misc
import mpmath


[docs]def gaussian_r_given_logx(logx, sigma, n_dim):
    """
    Returns r coordinate corresponding to logx values for a Gaussian prior with
    the specificed standard deviation and dimension.

    This uses scipy.special.gammaincinv and requires exponentiating logx, so
    numerical errors occur with very low logx values.

    Parameters
    ----------
    logx: float or numpy array
        Logx coordinates at which to work out r.
    sigma: float
        Standard deviation of Gaussian.
    n_dim: int
        Number of dimensions.

    Returns
    -------
    float or numpy array
        Radial coordinates corresponding to input log X values.
    """
    exponent = scipy.special.gammaincinv(n_dim / 2., np.exp(logx))
    return np.sqrt(2 * exponent * sigma ** 2)


[docs]def gaussian_logx_given_r(r, sigma, n_dim):
    """
    Returns logx coordinate corresponding to r values for a Gaussian prior with
    the specificed standard deviation and dimension

    Uses mpmath package for arbitary precision.

    Parameters
    ----------
    r: float or numpy array
        Radial coordinates at which to evaluate logx.
    sigma: float
        Standard deviation of Gaussian.
    n_dim: int
        Number of dimensions.

    Returns
    -------
    logx: float or numpy array
        Logx coordinates corresponding to input radial coordinates.
    """
    exponent = 0.5 * (r / sigma) ** 2
    if isinstance(r, np.ndarray):  # needed to ensure output is numpy array
        logx = np.zeros(r.shape)
        for i, expo in enumerate(exponent):
            logx[i] = float(mpmath.log(mpmath.gammainc(n_dim / 2., a=0, b=expo,
                                                       regularized=True)))
        return logx
    else:
        return float(mpmath.log(mpmath.gammainc(n_dim / 2., a=0, b=exponent,
                                                regularized=True)))


[docs]def analytic_logx_terminate(settings):
    """
    Find logx_terminate analytically by assuming all likelihood at very low
    X approximately equals the maximum likelihood.
    This approximation breaks down in very high dimensions.

    Parameters
    ----------
    settings: PerfectNSSettings object

    Return
    ------
    float or None
    """
    # use r=0 rather than logx=-np.inf as the latter causes numerical problems
    logl_max = settings.logl_given_r(0)
    if settings.logz_analytic() is not None:
        return logx_terminate_bound(logl_max, settings.termination_fraction,
                                    settings.logz_analytic())
    else:
        return None


[docs]def logx_terminate_bound(logl_max, termination_fraction, logz_analytic):
    """
    Find a lower bound logx_terminate analytically by assuming all likelihood
    at very low X approximately equals the maximum likelihood. This
    approximation breaks down in very high dimensions and the true logx
    terminate required will be larger.

    We want:

    Z_term = termination_fraction * Z_analytic
           = int_0^Xterm L(X) dX
           approx= Xterm L_max,
    so
    logx_term = log(termination_fraction) + log(Z_analytic) - logl_max

    Parameters
    ----------
    logl_max: float
        Maximum loglikelihood.
    termination_frac: float
        Fraction of posterior mass remaining at termination.
    logz_analytic: float
        True value of log evidence.

    Returns
    -------
    float
    """
    return np.log(termination_fraction) + logz_analytic - logl_max


[docs]def sample_nsphere_shells_beta(r, n_dim, n_sample=None):
    """
    Given some 1d numpy array of radial coordinates r, return a numpy array
    of sample coordinates on the hyperspherical shells with each radial
    coordinate.

    Sample single parameters on n_dim-dimensional sphere independently, as
    as described in Section 3.2 of 'Sampling Errors in nested sampling
    parameter estimation' (Higson et al., 2018).

    NB if each parameter is sampled independently and combined into a vector,
    that vector will not have a magnitude r.

    The n_sample argument can be used to set the number of parameters for which
    samples are returned (by symmetry they all have the same distribution).
    This is useful for saving memory in high dimensions.

    This function is much quicker than sample_nsphere_shells_normal when n_dim
    is high and n_sample is low.

    Parameters
    ----------
    r: 1d numpy array
    n_dim: int
    n_sample: int or None, optional
        Number of parameters to include in output for each sample.

    Returns
    -------
    thetas: 2d numpy array
        Each row is a random sample from the shells defined by the
        corresponding input r coordinate.
    """
    assert isinstance(r, np.ndarray), 'input r must be a numpy array'
    if n_sample is None:
        n_sample = n_dim
    thetas = np.sqrt(np.random.beta(0.5, (n_dim - 1) * 0.5,
                                    size=(r.shape[0], n_sample)))
    # randomly select + or -
    thetas *= (-1) ** (np.random.randint(0, 2, size=thetas.shape))
    # multiply by r
    thetas *= r[:, None]
    return thetas


[docs]def sample_nsphere_shells_normal(r, n_dim, n_sample=None):
    """
    Given some 1d numpy array of radial coordinates r, return a numpy array
    of sample coordinates on the hyperspherical shells with each radial
    coordinate.

    This works by using the symmetry of the normal distribution to sample
    isotropically, then normalising each set of samples to lie on a spherical
    shell of the correct radius.

    The n_sample argument can be used to set the number of parameters for which
    samples are returned (by symmetry they all have the same distribution).
    This is useful for saving memory in high dimensions.

    Parameters
    ----------
    r: 1d numpy array
    n_dim: int
    n_sample: int or None, optional
        Number of parameters to include in output for each sample.

    Returns
    -------
    thetas: 2d numpy array
        Each row is a random sample from the shells defined by the
        corresponding input r coordinate.
    """
    if n_sample is None:
        n_sample = n_dim
    assert n_sample <= n_dim, 'so far only set up for nsample <= ndim'
    thetas = np.random.normal(size=(r.shape[0], n_dim))
    # calculate normalisation so sum_i(theta_i^2) = r^2 for each row
    norm = r / np.sqrt(np.sum(thetas ** 2, axis=1))
    # only return n_sample columns
    thetas = thetas[:, :n_sample]
    # normalise each column
    thetas *= norm[:, None]
    return thetas


[docs]def sample_nsphere_shells(r, n_dim, n_sample):
    """
    Wrapper calling either sample_nsphere_shells_normal or
    sample_nsphere_shells_beta depending on the dimension and
    number of samples required.

    See the docstrings of sample_nsphere_shells_normal and
    sample_nsphere_shells_beta for more information.
    """
    if n_dim >= 100 and n_sample <= 2:
        return sample_nsphere_shells_beta(r, n_dim, n_sample)
    else:
        return sample_nsphere_shells_normal(r, n_dim, n_sample)


[docs]def nsphere_r_given_logx(logx, r_max, n_dim):
    """
    Finds r coordinates given input logx values for a uniform prior within an
    n-dimensional sphere co-centred with a spherically symmetric likelihood.
    This will return an answer of the same type (float or numpy array) as the
    input {logx}.

    Parameters
    ----------
    logx: float or numpy array
    r_max: float
        Boundry of the spherically symmetric prior.
    n_dim: int
        Number of dimensions

    Returns
    r: float or numpy array
        Radial coordinates corresponding to logx.
    """
    r = np.exp(logx / n_dim) * r_max
    return r


[docs]def nsphere_logx_given_r(r, r_max, n_dim):
    """
    Finds logx assuming the prior is an n-dimensional sphere co-centred with a
    spherically symmetric likelihood.
    This will return an answer of the same type (float or numpy array) as the
    input {r}.

    Parameters
    ----------
    r: float or numpy array
    r_max: float
    n_dim: int

    Returns
    -------
    logx: float or numpy array.
        Logx coordinates corresponding to input radial coordinates.
    """
    logx = (np.log(r) - np.log(r_max)) * n_dim
    return logx


[docs]def nsphere_logvol(dim, radius=1.0):
    """
    Returns the natural log of the hypervolume of a unit nsphere of specified
    dimension. Useful for very high dimensions. Formulae is from
    https://en.wikipedia.org/wiki/N-sphere#Volume_and_surface_area

    Parameters
    ----------
    dim: int
    radius: float, optional

    Returns
    -------
    float
    """
    return ((np.log(radius) * dim) + (np.log(np.pi) * (dim / 2.0)) -
            (scipy.special.gammaln((dim / 2.0) + 1.0)))


[docs]def log_gaussian_given_r(r, sigma, n_dim):
    """
    Returns the natural log of a normalised, uncorrelated Gaussian likelihood
    with equal variance in all n_dim dimensions.

    Parameters
    ----------
    r: float or numpy array
    sigma: float
    n_dim: int

    Returns
    -------
    logl: float or numpy array
        Loglikelihood values corresponding to input radial coordinates.
    """
    logl = -0.5 * ((r ** 2) / (sigma ** 2))
    # normalise
    logl -= n_dim * np.log(sigma)
    logl -= np.log(2 * np.pi) * (n_dim / 2.0)
    return logl


[docs]def log_exp_power_given_r(r, sigma, n_dim, b=0.5):
    """
    Returns the natural log of an exponential power distribution.
    This equals a Gaussian distribution when b=1.

    Parameters
    ----------
    r: float or numpy array
    sigma: float
    n_dim: int
    b: float, optional

    Returns
    -------
    logl: float or numpy array
        Loglikelihood values corresponding to input radial coordinates.
    """
    logl = -0.5 * (((r ** 2) / (sigma ** 2)) ** b)
    # normalise
    logl += np.log(n_dim)
    logl += scipy.special.gammaln((n_dim) / 2.0)
    logl -= np.log(np.pi) * (n_dim / 2.0)
    logl -= n_dim * np.log(sigma)
    logl -= np.log(2) * (1 + 0.5 * b)
    logl -= scipy.special.gammaln(1. + ((n_dim) / (2 * b)))
    return logl


[docs]def r_given_log_exp_power(logl, sigma, n_dim, b=0.5):
    """
    Returns the natural log of an exponential power distribution.
    This equals a Gaussian distribution when b=1.

    Parameters
    ----------
    logl: float or numpy array
    sigma: float
    n_dim: int
    b: float, optional

    Returns
    -------
    r: float or numpy array
        Radial coordinates corresponding to input logl values.
    """
    # remove normalisation constant
    exponent = logl - np.log(n_dim)
    exponent -= scipy.special.gammaln((n_dim) / 2.0)
    exponent += np.log(np.pi) * (n_dim / 2.0)
    exponent += n_dim * np.log(sigma)
    exponent += np.log(2) * (1 + 0.5 * b)
    exponent += scipy.special.gammaln(1. + ((n_dim) / (2 * b)))
    # rearrange
    exponent = (-2. * exponent) ** (1. / b)
    logl = np.sqrt(exponent * (sigma ** 2))
    return logl


[docs]def r_given_log_gaussian(logl, sigma, n_dim):
    """
    Returns the radius of a given logl for a normalised,  uncorrelated Gaussian
    with equal variance in all n_dim dimensions.

    Parameters
    ----------
    logl: float or numpy array
    sigma: float
    n_dim: int

    Returns
    -------
    r: float or numpy array
        Radial coordinates corresponding to input logl values.
    """
    # remove normalisation constant
    exponent = logl + n_dim * np.log(sigma)
    exponent += np.log(2 * np.pi) * (n_dim / 2.0)
    # rearrange
    r = np.sqrt(-2 * exponent) * sigma
    return r


[docs]def log_cauchy_given_r(r, sigma, n_dim):
    """
    Returns the natural log of a normalised,  uncorrelated Cauchy distribution
    with 1 degree of freedom.

    Parameters
    ----------
    r: float or numpy array
    sigma: float
    n_dim: int

    Returns
    -------
    logl: float or numpy array
        Loglikelihood values corresponding to input radial coordinates.
    """
    logl = (-(1 + n_dim) / 2) * np.log(1 + ((r ** 2) / (sigma ** 2)))
    logl += scipy.special.gammaln((1.0 + n_dim) / 2.0)
    logl -= np.log(np.pi) * (n_dim + 1.0) / 2.0  # NB gamma(0.5) = sqrt(pi)
    logl += (-n_dim) * np.log(sigma)
    return logl


[docs]def r_given_log_cauchy(logl, sigma, n_dim):
    """
    Returns the radius for a given logl of a normalised,  uncorrelated
    Cauchy distribution with 1 degree of freedom.

    Parameters
    ----------
    logl: float or numpy array
    sigma: float
    n_dim: int

    Returns
    -------
    r: float or numpy array
        Radial coordinates corresponding to input logl values.
    """
    exponent = logl
    # remove normalisation constant
    exponent -= scipy.special.gammaln((1.0 + n_dim) / 2.0)
    exponent += np.log(np.pi) * (n_dim + 1.0) / 2.0  # NB gamma(0.5) = sqrt(pi)
    exponent -= (-n_dim) * np.log(sigma)
    # remove power
    exponent /= -(n_dim + 1.0) / 2.0
    # rearrange
    exponent = np.exp(exponent) - 1.0
    r_squared = exponent * (sigma ** 2)
    return np.sqrt(r_squared)