# Copyright (c) 2022 The BayesFlow Developers
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import numpy as np
import tensorflow as tf
from bayesflow import default_settings
from bayesflow.coupling_networks import AffineCouplingLayer
from bayesflow.helper_functions import build_meta_dict
[docs]class InvertibleNetwork(tf.keras.Model):
"""Implements a chain of conditional invertible coupling layers for conditional density estimation."""
def __init__(self, num_params, num_coupling_layers=4, coupling_net_settings=None,
coupling_design='dense', soft_clamping=1.9, use_permutation=True, use_act_norm=True,
act_norm_init=None, use_soft_flow=False, soft_flow_bounds=(1e-3, 5e-2), **kwargs):
"""Creates a chain of coupling layers with optional `ActNorm` layers in-between. Implements ideas from:
[1] Radev, S. T., Mertens, U. K., Voss, A., Ardizzone, L., & Köthe, U. (2020).
BayesFlow: Learning complex stochastic models with invertible neural networks.
IEEE transactions on neural networks and learning systems.
[2] Kim, H., Lee, H., Kang, W. H., Lee, J. Y., & Kim, N. S. (2020).
Softflow: Probabilistic framework for normalizing flow on manifolds.
Advances in Neural Information Processing Systems, 33, 16388-16397.
[3] Ardizzone, L., Kruse, J., Lüth, C., Bracher, N., Rother, C., & Köthe, U. (2020).
Conditional invertible neural networks for diverse image-to-image translation.
In DAGM German Conference on Pattern Recognition (pp. 373-387). Springer, Cham.
[4] Kingma, D. P., & Dhariwal, P. (2018).
Glow: Generative flow with invertible 1x1 convolutions.
Advances in neural information processing systems, 31.
Parameters
----------
num_params : int
The number of parameters to perform inference on. Equivalently, the dimensionality of the
latent space.
num_coupling_layers : int, optional, default: 4
The number of coupling layers to use as defined in [1] and [2]. In general, more coupling layers
will give you more expressive power, but will be slower and may need more simulations to train.
Typically, between 4 and 10 coupling layers should suffice for most applications.
coupling_net_settings : dict or None, optional, default: None
The coupling network settings to pass to the internal coupling layers. See `default_settings`
for the required entries.
coupling_design : str or callable, optional, default: 'dense'
The type of internal coupling network to use. Currently, only 'dense' is understood as a
string argument, but you can also pass a callable which constructs a custom network. In that case,
the `coupling_net_settings` will be passed as a first argument to the callable.
soft_clamping : float, optional, default: 1.9
The soft clamping parameter `alpha` in [3]. Typically you would not touch this.
use_permutation : bool, optional, default: True
Whether to use fixed permutations between coupling layers. Highly recommended.
use_act_norm : bool, optional, default: True
Whether to use activation normalization after each coupling layer, as used in [4].
Recommended to keep default.
act_norm_init : np.ndarray of shape (num_simulations, num_params) or None, optional, default: None
Optional data-dependent initialization for the internal `ActNorm` layers, as done in [4]. Could be helpful
for deep invertible networks.
use_soft_flow : bool, optional, default: False
Whether to perturb the taregt distribution (i.e., parameters) with small amount of independent
noise, as done in [3]. Could be helpful for degenrate distributions.
soft_flow_bounds : tuple(float, float), optional, default: (1e-3, 5e-2)
The bounds of the continuous uniform distribution from which the noise scale would be sampled
at each iteration. Only relevant when `use_soft_flow=True`.
The setting
**kwargs : dict
Optional keyword arguments (e.g., name) passed to the tf.keras.Model __init__ method.
"""
super().__init__(**kwargs)
# Create settings dict for coupling layer
settings = dict(
latent_dim=num_params,
coupling_net_settings=coupling_net_settings,
coupling_design=coupling_design,
use_permutation=use_permutation,
use_act_norm=use_act_norm,
act_norm_init=act_norm_init,
alpha=soft_clamping
)
# Create sequence of coupling layers and store reference to dimensionality
self.coupling_layers = [AffineCouplingLayer(settings) for _ in range(num_coupling_layers)]
# Store attributes
self.soft_flow = use_soft_flow
self.soft_low = soft_flow_bounds[0]
self.soft_high = soft_flow_bounds[1]
self.use_permutation = use_permutation
self.use_act_norm = use_act_norm
self.latent_dim = num_params
[docs] def call(self, targets, condition, inverse=False, **kwargs):
"""Performs one pass through an invertible chain (either inverse or forward).
Parameters
----------
targets : tf.Tensor
The estimation quantities of interest, shape (batch_size, ...)
condition : tf.Tensor
The conditional data x, shape (batch_size, summary_dim)
inverse : bool, default: False
Flag indicating whether to run the chain forward or backwards
Returns
-------
(z, log_det_J) : tuple(tf.Tensor, tf.Tensor)
If inverse=False: The transformed input and the corresponding Jacobian of the transformation,
v shape: (batch_size, ...), log_det_J shape: (batch_size, ...)
target : tf.Tensor
If inverse=True: The transformed out, shape (batch_size, ...)
Important
---------
If ``inverse=False``, the return is ``(z, log_det_J)``.\n
If ``inverse=True``, the return is ``target``.
"""
if inverse:
return self.inverse(targets, condition, **kwargs)
return self.forward(targets, condition, **kwargs)
[docs] @tf.function
def forward(self, targets, condition, **kwargs):
"""Performs a forward pass though the chain."""
# Add noise to target if using SoftFlow, use explicitly
# not in call(), since methods are public
if self.soft_flow and condition is not None:
# Needs to be concatinable with condition
shape_scale = (condition.shape[0], 1) if len(condition.shape) == 2 else (condition.shape[0], condition.shape[1], 1)
# Case training mode
if kwargs.get('training'):
noise_scale = tf.random.uniform(shape=shape_scale, minval=self.soft_low, maxval=self.soft_high)
# Case inference mode
else:
noise_scale = tf.zeros(shape=shape_scale) + self.soft_low
# Perturb data with noise (will broadcast to all dimensions)
if len(shape_scale) == 2 and len(targets.shape) == 3:
targets += tf.expand_dims(noise_scale, axis=1) * tf.random.normal(shape=targets.shape)
else:
targets += noise_scale * tf.random.normal(shape=targets.shape)
# Augment condition with noise scale variate
condition = tf.concat((condition, noise_scale), axis=-1)
z = targets
log_det_Js = []
for layer in self.coupling_layers:
z, log_det_J = layer(z, condition, **kwargs)
log_det_Js.append(log_det_J)
# Sum Jacobian determinants for all layers (coupling blocks) to obtain total Jacobian.
log_det_J = tf.add_n(log_det_Js)
return z, log_det_J
[docs] @tf.function
def inverse(self, z, condition, **kwargs):
"""Performs a reverse pass through the chain. Assumes that it is only used
in inference mode, so ``**kwargs`` contains ``training=False``."""
# Add noise to target if using SoftFlow, use explicitly
# not in call(), since methods are public
if self.soft_flow and condition is not None:
# Needs to be concatinable with condition
shape_scale = (condition.shape[0], 1) if len(condition.shape) == 2 else (condition.shape[0], condition.shape[1], 1)
noise_scale = tf.zeros(shape=shape_scale) + 2.*self.soft_low
# Augment condition with noise scale variate
condition = tf.concat((condition, noise_scale), axis=-1)
target = z
for layer in reversed(self.coupling_layers):
target = layer(target, condition, inverse=True, **kwargs)
return target
[docs] @classmethod
def create_config(cls, **kwargs):
""""Used to create the settings dictionary for the internal networks of the invertible
network. Will fill in missing """
settings = build_meta_dict(user_dict=kwargs,
default_setting=default_settings.DEFAULT_SETTING_INVERTIBLE_NET)
return settings
[docs]class EvidentialNetwork(tf.keras.Model):
"""Implements a network whose outputs are the concentration parameters of a Dirichlet density.
Follows ideas from:
[1] Radev, S. T., D'Alessandro, M., Mertens, U. K., Voss, A., Köthe, U., & Bürkner, P. C. (2021).
Amortized Bayesian model comparison with evidential deep learning.
IEEE Transactions on Neural Networks and Learning Systems.
[2] Sensoy, M., Kaplan, L., & Kandemir, M. (2018).
Evidential deep learning to quantify classification uncertainty.
Advances in neural information processing systems, 31.
"""
def __init__(self, num_models, dense_args=None, num_dense=3, output_activation='softplus', **kwargs):
"""Creates an instance of an evidential network for amortized model comparison.
Parameters
----------
num_models : int
The number of candidate (competing models) for the comparison scenario.
dense_args : dict or None, optional, default: None
The arguments for a tf.keras.layers.Dense layer. If None, defaults will be used.
num_dense : int, optional, default: 3
The number of dense layers for the main network part.
output_activation : str or callable, optional, default: 'softplus'
The activation function to use for the network outputs.
Important: needs to have positive outputs.
**kwargs : dict, optional, default: {}
Optional keyword arguments (e.g., name) passed to the tf.keras.Model __init__ method.
"""
super().__init__(**kwargs)
if dense_args is None:
dense_args = default_settings.DEFAULT_SETTING_DENSE_EVIDENTIAL
# A network to increase representation power
self.dense = tf.keras.Sequential([
tf.keras.layers.Dense(**dense_args)
for _ in range(num_dense)
])
# The layer to output model evidences
self.alpha_layer = tf.keras.layers.Dense(
num_models, activation=output_activation,
**{k: v for k, v in dense_args.items() if k != 'units' and k != 'activation'})
self.num_models = num_models
[docs] def call(self, condition, **kwargs):
"""Computes evidences for model comparison given a batch of data and optional concatenated context,
typically passed through a summayr network.
Parameters
----------
condition : tf.Tensor of shape (batch_size, ...)
The input variables used for determining `p(model | condition)`
Returns
-------
evidence : tf.Tensor of shape (batch_size, num_models) -- the learned model evidences
"""
return self.evidence(condition, **kwargs)
[docs] @tf.function
def evidence(self, condition, **kwargs):
rep = self.dense(condition, **kwargs)
alpha = self.alpha_layer(rep, **kwargs)
evidence = alpha + 1.
return evidence
[docs] def sample(self, condition, n_samples, **kwargs):
"""Samples posterior model probabilities from the higher-order Dirichlet density.
Parameters
----------
condition : tf.Tensor
The summary of the observed (or simulated) data, shape (n_data_sets, ...)
n_samples : int
Number of samples to obtain from the approximate posterior
Returns
-------
pm_samples : tf.Tensor or np.array
The posterior draws from the Dirichlet distribution, shape (num_samples, num_batch, num_models)
"""
alpha = self.evidence(condition, **kwargs)
n_datasets = alpha.shape[0]
pm_samples = np.stack(
[np.default_rng().dirichlet(alpha[n, :], size=n_samples) for n in range(n_datasets)], axis=1)
return pm_samples
[docs] @classmethod
def create_config(cls, **kwargs):
""""Used to create the settings dictionary for the internal networks of the invertible
network. Will fill in missing """
settings = build_meta_dict(user_dict=kwargs,
default_setting=default_settings.DEFAULT_SETTING_EVIDENTIAL_NET)
return settings