Source code for bayesflow.computational_utilities

# Copyright (c) 2022 The BayesFlow Developers

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import tensorflow as tf
import numpy as np
from scipy import stats
from sklearn.calibration import calibration_curve

from bayesflow.default_settings import MMD_BANDWIDTH_LIST


[docs]def gaussian_kernel_matrix(x, y, sigmas=None): """Computes a Gaussian radial basis functions (RBFs) between the samples of x and y. We create a sum of multiple Gaussian kernels each having a width :math:`\sigma_i`. Parameters ---------- x : tf.Tensor of shape (num_draws_x, num_features) Comprises `num_draws_x` Random draws from the "source" distribution `P`. y : tf.Tensor of shape (num_draws_y, num_features) Comprises `num_draws_y` Random draws from the "source" distribution `Q`. sigmas : list(float), optional, default: None List which denotes the widths of each of the gaussians in the kernel. If `sigmas is None`, a default range will be used, contained in `bayesflow.default_settings.MMD_BANDWIDTH_LIST` Returns ------- kernel : tf.Tensor of shape (num_draws_x, num_draws_y) The kernel matrix between pairs from `x` and `y`. """ if sigmas is None: sigmas = MMD_BANDWIDTH_LIST norm = lambda v: tf.reduce_sum(tf.square(v), 1) beta = 1. / (2. * (tf.expand_dims(sigmas, 1))) dist = tf.transpose(norm(tf.expand_dims(x, 2) - tf.transpose(y))) s = tf.matmul(beta, tf.reshape(dist, (1, -1))) kernel = tf.reshape(tf.reduce_sum(tf.exp(-s), 0), tf.shape(dist)) return kernel
[docs]def inverse_multiquadratic_kernel_matrix(x, y, sigmas=None): """Computes an inverse multiquadratic RBF between the samples of x and y. We create a sum of multiple IM-RBF kernels each having a width :math:`\sigma_i`. Parameters ---------- x : tf.Tensor of shape (num_draws_x, num_features) Comprises `num_draws_x` Random draws from the "source" distribution `P`. y : tf.Tensor of shape (num_draws_y, num_features) Comprises `num_draws_y` Random draws from the "source" distribution `Q`. sigmas : list(float), optional, default: None List which denotes the widths of each of the gaussians in the kernel. If `sigmas is None`, a default range will be used, contained in `bayesflow.default_settings.MMD_BANDWIDTH_LIST` Returns ------- kernel : tf.Tensor of shape (num_draws_x, num_draws_y) The kernel matrix between pairs from `x` and `y`. """ if sigmas is None: sigmas = MMD_BANDWIDTH_LIST dist = tf.expand_dims(tf.reduce_sum((x[:, None, :] - y[None, :, :]) ** 2, axis=-1), axis=-1) sigmas = tf.expand_dims(sigmas, 0) return tf.reduce_sum(sigmas / (dist + sigmas), axis=-1)
[docs]def mmd_kernel(x, y, kernel): """Computes the estimator of the Maximum Mean Discrepancy (MMD) between two samples: x and y. Maximum Mean Discrepancy (MMD) is a distance-measure between random draws from the distributions `x ~ P` and `y ~ Q`. Parameters ---------- x : tf.Tensor of shape (N, num_features) An array of `N` random draws from the "source" distribution `x ~ P`. y : tf.Tensor of shape (M, num_features) An array of `M` random draws from the "target" distribution `y ~ Q`. kernel : callable A function which computes the distance between pairs of samples. Returns ------- loss : tf.Tensor of shape (,) The statistically biased squared maximum mean discrepancy (MMD) value. """ loss = tf.reduce_mean(kernel(x, x)) loss += tf.reduce_mean(kernel(y, y)) loss -= 2 * tf.reduce_mean(kernel(x, y)) return loss
[docs]def mmd_kernel_unbiased(x, y, kernel): """Computes the unbiased estimator of the Maximum Mean Discrepancy (MMD) between two samples: x and y. Maximum Mean Discrepancy (MMD) is a distance-measure between the samples of the distributions `x ~ P` and `y ~ Q`. Parameters ---------- x : tf.Tensor of shape (N, num_features) An array of `N` random draws from the "source" distribution `x ~ P`. y : tf.Tensor of shape (M, num_features) An array of `M` random draws from the "target" distribution `y ~ Q`. kernel : callable A function which computes the distance between pairs of random draws from `x` and `y`. Returns ------- loss : tf.Tensor of shape (,) The statistically unbiaserd squared maximum mean discrepancy (MMD) value. """ m, n = x.shape[0], y.shape[0] loss = (1.0/(m*(m+1))) * tf.reduce_sum(kernel(x, x)) loss += (1.0/(n*(n+1))) * tf.reduce_sum(kernel(y, y)) loss -= (2.0/(m*n)) * tf.reduce_sum(kernel(x, y)) return loss
[docs]def expected_calibration_error(m_true, m_pred, n_bins=15): """Estimates the calibration error of a model comparison network. Important --------- Make sure that ``m_true`` are **one-hot encoded** classes! Parameters ---------- m_true : np.array or list True model indices m_pred : np.array or list Predicted model indices n_bins : int, default: 15 Number of bins for plot Returns ------- #TODO """ # Convert tf.Tensors to numpy, if passed if type(m_true) is not np.ndarray: m_true = m_true.numpy() if type(m_pred) is not np.ndarray: m_pred = m_pred.numpy() # Extract number of models and prepare containers n_models = m_true.shape[1] cal_errs = [] probs = [] # Loop for each model and compute calibration errs per bin for k in range(n_models): y_true = (m_true.argmax(axis=1) == k).astype(np.float32) y_prob = m_pred[:, k] prob_true, prob_pred = calibration_curve(y_true, y_prob, n_bins=n_bins) cal_err = np.mean(np.abs(prob_true - prob_pred)) cal_errs.append(cal_err) probs.append((prob_true, prob_pred)) return cal_errs, probs
[docs]def maximum_mean_discrepancy(source_samples, target_samples, kernel='gaussian', mmd_weight=1., minimum=0.): """Computes the MMD given a particular choice of kernel. For details, consult Gretton et al. (2012): https://www.jmlr.org/papers/volume13/gretton12a/gretton12a.pdf Parameters ---------- source_samples : tf.Tensor of shape (N, num_features) An array of `N` random draws from the "source" distribution. target_samples : tf.Tensor of shape (M, num_features) An array of `M` random draws from the "target" distribution. kernel : str in ('gaussian', 'inverse_multiquadratic'), optional, default: 'gaussian' The kernel to use for computing the distance between pairs of random draws. mmd_weight : float, optional, default: 1.0 The weight of the MMD value. minimum : float, optional, default: 0.0 The lower bound of the MMD value. Returns ------- loss_value : tf.Tensor A scalar Maximum Mean Discrepancy, shape (,) """ # Determine kernel, fall back to Gaussian if unknown string passed if kernel == "gaussian": kernel_fun = gaussian_kernel_matrix elif kernel == "inverse_multiquadratic": kernel_fun = inverse_multiquadratic_kernel_matrix else: kernel_fun = gaussian_kernel_matrix # Compute and return MMD value loss_value = mmd_kernel(source_samples, target_samples, kernel=kernel_fun) loss_value = mmd_weight * tf.maximum(minimum, loss_value) return loss_value
[docs]def get_coverage_probs(z, u): """Vectorized function to compute the minimal coverage probability for uniform ECDFs given evaluation points z and a sample of samples u. Parameters ---------- z : np.ndarray of shape (num_points, ) The vector of evaluation points. u : np.ndarray of shape (num_simulations, num_samples) The matrix of simulated draws (samples) from U(0, 1) """ N = u.shape[1] F_m = np.sum((z[:, np.newaxis] >= u[:, np.newaxis, :] ), axis=-1) / u.shape[1] bin1 = stats.binom(N, z).cdf(N*F_m) bin2 = stats.binom(N, z).cdf(N*F_m - 1) gamma = 2*np.min(np.min(np.stack([bin1, 1 - bin2], axis=-1), axis=-1), axis=-1) return gamma
[docs]def simultaneous_ecdf_bands(num_samples, num_points=None, num_simulations=1000, confidence=0.95, eps=1e-5, max_num_points=1000): """Computes the simultaneous ECDF bands through simulation according to the algorithm described in Section 2.2: https://link.springer.com/content/pdf/10.1007/s11222-022-10090-6.pdf Depends on the vectorized utility function `get_coverage_probs(z, u)`. Parameters ---------- num_samples : int The sample size used for computing the ECDF. Will equal to the number of posterior samples when used for calibrarion. Corresponds to `N` in the paper above. num_points : int, optional, default: None The number of evaluation points on the interval (0, 1). Defaults to `num_points = num_samples` if not explicitly specified. Correspond to `K` in the paper above. num_simulations : int, optional, default: 1000 The number of samples of size `n_samples` to simulate for determining the simultaneous CIs. confidence : float in (0, 1), optional, default: 0.95 The confidence level, `confidence = 1 - alpha` specifies the width of the confidence interval. eps : float, optional, default: 1e-5 Small number to add to the lower and subtract from the upper bound of the interval [0, 1] to avoid edge artefacts. No need to touch this. max_num_points : int, optional, default: 1000 Upper bound on `num_points`. Saves computation time when `num_samples` is large. Returns ------- (alpha, z, L, U) - tuple of scalar and three arrays of size (num_samples,) containing the confidence level as well as the evaluation points, the lower, and the upper confidence bands, respectively. """ # Use shorter var names throughout N = num_samples if num_points is None: K = min(N, max_num_points) else: K = min(num_points, max_num_points) M = num_simulations # Specify evaluation points z = np.linspace(0+eps, 1-eps, K) # Simulate M samples of size N u = np.random.uniform(size=(M, N)) # Get alpha alpha = 1 - confidence # Compute minimal coverage probabilities gammas = get_coverage_probs(z, u) # Use insights from paper to compute lower and upper confidence interval gamma = np.percentile(gammas, 100*alpha) L = stats.binom(N, z).ppf(gamma / 2) / N U = stats.binom(N, z).ppf(1 - gamma / 2) / N return alpha, z, L, U