# Copyright (c) 2022 The BayesFlow Developers
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import numpy as np
import tensorflow as tf
from bayesflow.exceptions import ConfigurationError, SummaryStatsError
from bayesflow.losses import log_loss, mmd_summary_space, kl_dirichlet
from bayesflow.default_settings import DEFAULT_KEYS
import tensorflow_probability as tfp
from warnings import warn
from abc import ABC, abstractmethod
[docs]class AmortizedTarget(ABC):
"""An abstract interface for an amortized learned distribution. Children should
implement the following public methods:
1. ``compute_loss(self, input_dict, **kwargs)``
2. ``sample(input_dict, **kwargs)``
3. ``log_prob(input_dict, **kwargs)``
"""
@abstractmethod
def __init__(self, *args, **kwargs):
pass
[docs] @abstractmethod
def compute_loss(self, input_dict, **kwargs):
pass
[docs] @abstractmethod
def sample(input_dict, **kwargs):
pass
[docs] @abstractmethod
def log_prob(input_dict, **kwargs):
pass
[docs]class AmortizedPosterior(tf.keras.Model, AmortizedTarget):
"""A wrapper to connect an inference network for parameter estimation with an optional summary network
as in the original BayesFlow set-up described in the paper:
[1] Radev, S. T., Mertens, U. K., Voss, A., Ardizzone, L., & Köthe, U. (2020).
BayesFlow: Learning complex stochastic models with invertible neural networks.
IEEE Transactions on Neural Networks and Learning Systems.
But also allowing for augmented functionality, such as model misspecification detection in summary space:
[2] Schmitt, M., Bürkner, P. C., Köthe, U., & Radev, S. T. (2022).
Detecting Model Misspecification in Amortized Bayesian Inference with Neural Networks
arXiv preprint arXiv:2112.08866.
And learning of fat-tailed posteriors with a Student-t latent pushforward density:
[3] Jaini, P., Kobyzev, I., Yu, Y., & Brubaker, M. (2020, November).
Tails of lipschitz triangular flows.
In International Conference on Machine Learning (pp. 4673-4681). PMLR.
Serves as in interface for learning ``p(parameters | data, context).``
"""
def __init__(self, inference_net, summary_net=None, latent_dist=None, latent_is_dynamic=False,
summary_loss_fun=None, **kwargs):
"""Initializes a composite neural network to represent an amortized approximate posterior.
Parameters
----------
inference_net : tf.keras.Model
An (invertible) inference network which processes the outputs of a generative model
summary_net : tf.keras.Model or None, optional, default: None
An optional summary network to compress non-vector data structures.
latent_dist : callable or None, optional, default: None
The latent distribution towards which to optimize the networks. Defaults to
a multivariate unit Gaussian.
latent_is_dynamic : bool, optional, default: False
If set to `True`, assumes that `latent_dist` is a function of the condtion and takes
a different shape depending on the condition. Useful for more expressive transforms
of complex distributions, such as fat-tailed or highly-multimodal distros.
Important: In the case of dynamic latents, the user is responsible that the
latent is appropriately parameterized! If not using `tensorflow_probability`,
the `latent_dist` object needs to implement the following methods:
- `latent_dist(x).log_prob(z)` and
- `latent_dist(x).sample(n_samples)`
summary_loss_fun : callable or None, optional, default: None
The loss function which accepts the outputs of the summary network. If None, no loss is provided.
**kwargs : dict, optional
Additional keyword arguments passed to the __init__ method of a tf.keras.Model instance.
Important
----------
- If no `summary_net` is provided, then the output dictionary of your generative model should not contain
any `sumamry_conditions`, i.e., `summary_conditions` should be set to None, otherwise these will be ignored.
"""
tf.keras.Model.__init__(self, **kwargs)
self.inference_net = inference_net
self.summary_net = summary_net
self.latent_dim = self.inference_net.latent_dim
self.latent_is_dynamic = latent_is_dynamic
self.summary_loss = self._determine_summary_loss(summary_loss_fun)
self.latent_dist = self._determine_latent_dist(latent_dist)
[docs] def call(self, input_dict, return_summary=False, **kwargs):
"""Performs a forward pass through the summary and inference network.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys, if DEFAULT keys unchanged:
`parameters` - the latent model parameters over which a condition density is learned
`summary_conditions` - the conditioning variables (including data) that are first passed through a summary network
`direct_conditions` - the conditioning variables that the directly passed to the inference network
return_summary : bool, optional, default: False
A flag which determines whether the learnable data summaries (representations) are returned or not.
Returns
-------
net_out or (net_out, summary_out) : tuple of tf.Tensor
the outputs of ``inference_net(theta, summary_net(x, c_s), c_d)``, usually a latent variable and
log(det(Jacobian)), that is a tuple ``(z, log_det_J) or (sum_outputs, (z, log_det_J)) if
return_summary is set to True and a summary network is defined.``
"""
# Concatenate conditions, if given
summary_out, full_cond = self._compute_summary_condition(
input_dict.get(DEFAULT_KEYS['summary_conditions']),
input_dict.get(DEFAULT_KEYS['direct_conditions']),
**kwargs
)
# Compute output of inference net
net_out = self.inference_net(input_dict[DEFAULT_KEYS['parameters']], full_cond, **kwargs)
# Return summary outputs or not, depending on parameter
if return_summary:
return net_out, summary_out
return net_out
[docs] def compute_loss(self, input_dict, **kwargs):
"""Computes the loss of the posterior amortizer given an input dictionary.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys:
`parameters` - the latent model parameters over which a condition density is learned
`summary_conditions` - the conditioning variables that are first passed through a summary network
`direct_conditions` - the conditioning variables that the directly passed to the inference network
Returns
-------
total_loss : tf.Tensor of shape (1,) - the total computed loss given input variables
"""
# Get amortizer outputs
net_out, sum_out = self(input_dict, return_summary=True, **kwargs)
z, log_det_J = net_out
# Case summary loss should be computed
if self.summary_loss is not None:
sum_loss = self.summary_loss(sum_out)
# Case no summary loss, simply add 0 for convenience
else:
sum_loss = 0.
# Case dynamic latent space - function of summary conditions
if self.latent_is_dynamic:
logpdf = self.latent_dist(sum_out).log_prob(z)
# Case static latent space
else:
logpdf = self.latent_dist.log_prob(z)
# Compute and return total loss
total_loss = tf.reduce_mean(-logpdf - log_det_J) + sum_loss
return total_loss
[docs] def call_loop(self, input_list, return_summary=False, **kwargs):
"""Performs a forward pass through the summary and inference network given a list of dicts
with the appropriate entries (i.e., as used for the standard call method).
This method is useful when GPU memory is limited or data sets have a different (non-Tensor) structure.
Parameters
----------
input_list : list of dicts, where each dict contains the following mandatory keys, if DEFAULT keys unchanged:
`parameters` - the latent model parameters over which a condition density is learned
`summary_conditions` - the conditioning variables (including data) that are first passed through a summary network
`direct_conditions` - the conditioning variables that the directly passed to the inference network
return_summary : bool, optional, default: False
A flag which determines whether the learnable data summaries (representations) are returned or not.
Returns
-------
net_out or (net_out, summary_out) : tuple of tf.Tensor
the outputs of ``inference_net(theta, summary_net(x, c_s), c_d)``, usually a latent variable and
log(det(Jacobian)), that is a tuple ``(z, log_det_J) or (sum_outputs, (z, log_det_J)) if
return_summary is set to True and a summary network is defined.``
"""
outputs = []
for forward_dict in input_list:
outputs.append(self(forward_dict, return_summary, **kwargs))
net_out = [tf.concat([o[i] for o in outputs], axis=0) for i in range(len(outputs[0]))]
return tuple(net_out)
[docs] def sample(self, input_dict, n_samples, to_numpy=True, **kwargs):
""" Generates random draws from the approximate posterior given a dictionary with conditonal variables.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys, if DEFAULT KEYS unchanged:
`summary_conditions` : the conditioning variables (including data) that are first passed through a summary network
`direct_conditions` : the conditioning variables that the directly passed to the inference network
n_samples : int
The number of posterior draws (samples) to obtain from the approximate posterior
to_numpy : bool, optional, default: True
Flag indicating whether to return the samples as a `np.array` or a `tf.Tensor`.
**kwargs : dict, optional
Additional keyword arguments passed to the networks
Returns
-------
post_samples : tf.Tensor or np.ndarray of shape (n_data_sets, n_samples, n_params)
the sampled parameters per data set
"""
# Compute learnable summaries, if appropriate
_, conditions = self._compute_summary_condition(
input_dict.get(DEFAULT_KEYS['summary_conditions']),
input_dict.get(DEFAULT_KEYS['direct_conditions']),
training=False,
**kwargs
)
# Obtain number of data sets
n_data_sets = conditions.shape[0]
# Obtain random draws from the approximate posterior given conditioning variables
# Case dynamic, assume tensorflow_probability instance, so need to reshape output from
# (n_samples, n_data_sets, latent_dim) to (n_data_sets, n_samples, latent_dim)
if self.latent_is_dynamic:
z_samples = self.latent_dist(conditions).sample(n_samples)
z_samples = tf.transpose(z_samples, (1, 0, 2))
# Case static latent - marginal samples from the specified dist
else:
z_samples = self.latent_dist.sample((n_data_sets, n_samples))
# Obtain random draws from the approximate posterior given conditioning variables
post_samples = self.inference_net.inverse(z_samples, conditions, training=False, **kwargs)
# Only return 2D array, if first dimensions is 1
if post_samples.shape[0] == 1:
post_samples = post_samples[0]
# Return numpy version of tensor or tensor itself
if to_numpy:
return post_samples.numpy()
return post_samples
[docs] def sample_loop(self, input_list, n_samples, to_numpy=True, **kwargs):
"""Generates random draws from the approximate posterior given a list of dicts with conditonal variables.
Useful when GPU memory is limited or data sets have a different (non-Tensor) structure.
Parameters
----------
input_list : list of dictionaries, each dictionary having the following mandatory keys, if DEFAULT KEYS unchanged:
`summary_conditions` : the conditioning variables (including data) that are first passed through a summary network
`direct_conditions` : the conditioning variables that the directly passed to the inference network
n_samples : int
The number of posterior draws (samples) to obtain from the approximate posterior
to_numpy : bool, optional, default: True
Flag indicating whether to return the samples as a `np.darray` or a `tf.Tensor`
**kwargs : dict, optional
Additional keyword arguments passed to the networks
Returns
-------
post_samples : tf.Tensor or np.ndarray of shape (n_datasets, n_samples, n_params)
the sampled parameters per data set
"""
post_samples = []
for input_dict in input_list:
post_samples.append(self.sample(input_dict, n_samples, to_numpy, **kwargs))
if to_numpy:
return np.concatenate(post_samples, axis=0)
return tf.concat(post_samples, axis=0)
[docs] def log_posterior(self, input_dict, to_numpy=True, **kwargs):
"""Calculates the approximate log-posterior of targets given conditional variables via
the change-of-variable formula for a conditional normalizing flow.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys, if DEFAULT_KEYS unchanged:
`parameters` : the latent model parameters over which a conditional density (i.e., a posterior) is learned
`summary_conditions` : the conditioning variables (including data) that are first passed through a summary network
`direct_conditions` : the conditioning variables that are directly passed to the inference network
to_numpy : bool, optional, default: True
Flag indicating whether to return the lpdf values as a `np.array` or a `tf.Tensor`
Returns
-------
log_post : tf.Tensor of shape (batch_size, n_obs)
the approximate log-posterior density of each each parameter
"""
# Compute learnable summaries, if appropriate
_, conditions = self._compute_summary_condition(
input_dict.get(DEFAULT_KEYS['summary_conditions']),
input_dict.get(DEFAULT_KEYS['direct_conditions']),
training=False,
**kwargs
)
# Forward pass through the network
z, log_det_J = self.inference_net.forward(
input_dict[DEFAULT_KEYS['parameters']], conditions, training=False, **kwargs)
# Compute approximate log posterior
# Case dynamic latent - function of conditions
if self.latent_is_dynamic:
log_post = self.latent_dist(conditions).log_prob(z) + log_det_J
# Case static latent - marginal samples from z
else:
log_post = self.latent_dist.log_prob(z) + log_det_J
if to_numpy:
return log_post.numpy()
return log_post
[docs] def log_prob(self, input_dict, to_numpy=True, **kwargs):
"""Identical to `log_posterior(input_dict, to_numpy, **kwargs)`."""
return self.log_posterior(input_dict, to_numpy=to_numpy, **kwargs)
def _compute_summary_condition(self, summary_conditions, direct_conditions, **kwargs):
"""Determines how to concatenate the provided conditions."""
# Compute learnable summaries, if given
if self.summary_net is not None:
sum_condition = self.summary_net(summary_conditions, **kwargs)
else:
sum_condition = None
# Concatenate learnable summaries with fixed summaries
if sum_condition is not None and direct_conditions is not None:
full_cond = tf.concat([sum_condition, direct_conditions], axis=-1)
elif sum_condition is not None:
full_cond = sum_condition
elif direct_conditions is not None:
full_cond = direct_conditions
else:
raise SummaryStatsError("Could not concatenarte or determine conditioning inputs...")
return sum_condition, full_cond
def _determine_latent_dist(self, latent_dist):
"""Determines which latent distribution to use and defaults to unit normal if none provided.
"""
if latent_dist is None:
return tfp.distributions.MultivariateNormalDiag(loc=[0.]*self.latent_dim)
else:
return latent_dist
def _determine_summary_loss(self, loss_fun):
"""Determines which summary loss to use if default `None` argument provided, otherwise return identity.
"""
# If callable, return provided loss
if loss_fun is None or callable(loss_fun):
return loss_fun
# If string, check for MMD or mmd
elif type(loss_fun) is str:
if loss_fun.lower() == 'mmd':
return mmd_summary_space
else:
raise NotImplementedError("For now, only 'mmd' is supported as a string argument for summary_loss_fun!")
# Throw if loss type unexpected
else:
raise NotImplementedError("Could not infer summary_loss_fun, argument should be of type (None, callable, or str)!")
[docs]class AmortizedLikelihood(tf.keras.Model, AmortizedTarget):
"""An interface for a surrogate model of a simulator, or an implicit likelihood
``p(data | parameters, context).``
"""
def __init__(self, surrogate_net, latent_dist=None, **kwargs):
"""Initializes a composite neural architecture representing an amortized emulator
for the simulator (i.e., the implicit likelihood model).
Parameters
----------
surrogate_net : tf.keras.Model
An (invertible) inference network which processes the outputs of the generative model.
latent_dist : callable or None, optional, default: None
The latent distribution towards which to optimize the surrogate network outputs. Defaults to
a multivariate unit Gaussian.
"""
tf.keras.Model.__init__(self, **kwargs)
self.surrogate_net = surrogate_net
self.latent_dim = self.surrogate_net.latent_dim
self.latent_dist = self._determine_latent_dist(latent_dist)
[docs] def call(self, input_dict, **kwargs):
"""Performs a forward pass through the summary and inference network.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys:
`observables` - the observables over which a condition density is learned (i.e., the data)
`conditions` - the conditioning variables that the directly passed to the inference network
Returns
-------
net_out
the outputs of ``surrogate_net(theta, summary_net(x, c_s), c_d)``, usually a latent variable and
log(det(Jacobian)), that is a tuple ``(z, log_det_J)``.
"""
net_out = self.surrogate_net(
input_dict[DEFAULT_KEYS['observables']],
input_dict[DEFAULT_KEYS['conditions']],
**kwargs)
return net_out
[docs] def call_loop(self, input_list, **kwargs):
"""Performs a forward pass through the surrogate network given a list of dicts
with the appropriate entries (i.e., as used for the standard call method).
This method is useful when GPU memory is limited or data sets have a different (non-Tensor) structure.
Parameters
----------
input_list : list of dicts, where each dict contains the following mandatory keys, if DEFAULT keys unchanged:
`observables` - the observables over which a condition density is learned (i.e., the data)
`conditions` - the conditioning variables that the directly passed to the inference network
Returns
-------
net_out or (net_out, summary_out) : tuple of tf.Tensor
the outputs of ``inference_net(theta, summary_net(x, c_s), c_d)``, usually a latent variable and
log(det(Jacobian)), that is a tuple ``(z, log_det_J)``.
"""
outputs = []
for forward_dict in input_list:
outputs.append(self(forward_dict, **kwargs))
net_out = [tf.concat([o[i] for o in outputs], axis=0) for i in range(len(outputs[0]))]
return tuple(net_out)
[docs] def sample(self, input_dict, n_samples, to_numpy=True, **kwargs):
"""Generates `n_samples` random draws from the surrogate likelihood given input conditions.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys, if DEFAULT_KEYS unchanged:
`conditions` - the conditioning variables that the directly passed to the inference network
n_samples : int
The number of posterior samples to obtain from the approximate posterior
to_numpy : bool, optional, default: True
Flag indicating whether to return the samples as a `np.array` or a `tf.Tensor`
Returns
-------
lik_samples : tf.Tensor or np.ndarray of shape (n_datasets, n_samples, None)
Simulated batch of observables from the surrogate likelihood.
"""
# Extract condition
conditions = input_dict[DEFAULT_KEYS['conditions']]
# Obtain number of data sets
n_data_sets = conditions.shape[0]
# Obtain random draws from the surrogate likelihood given conditioning variables
z_samples = self.latent_dist.sample((n_data_sets, n_samples))
# Obtain random draws from the surrogate likelihood given conditioning variables
lik_samples = self.surrogate_net.inverse(z_samples, conditions, training=False, **kwargs)
# Only return 2D array, if first dimensions is 1
if lik_samples.shape[0] == 1:
lik_samples = lik_samples[0]
if to_numpy:
return lik_samples.numpy()
return lik_samples
[docs] def sample_loop(self, input_list, n_samples, to_numpy=True, **kwargs):
"""Generates random draws from the surrogate network given a list of dicts with conditonal variables.
Useful when GPU memory is limited or data sets have a different (non-Tensor) structure.
Parameters
----------
input_list : list of dictionaries, each dictionary having the following mandatory keys, if DEFAULT KEYS unchanged:
`conditions` - the conditioning variables that the directly passed to the inference network
n_samples : int
The number of posterior draws (samples) to obtain from the approximate posterior
to_numpy : bool, optional, default: True
Flag indicating whether to return the samples as a `np.array` or a `tf.Tensor`
**kwargs : dict, optional
Additional keyword arguments passed to the networks
Returns
-------
post_samples : tf.Tensor or np.ndarray of shape (n_data_sets, n_samples, data_dim)
the sampled parameters per data set
"""
post_samples = []
for input_dict in input_list:
post_samples.append(self.sample(input_dict, n_samples, to_numpy, **kwargs))
if to_numpy:
return np.concatenate(post_samples, axis=0)
return tf.concat(post_samples, axis=0)
[docs] def log_likelihood(self, input_dict, to_numpy=True, **kwargs):
"""Calculates the approximate log-likelihood of targets given conditional variables via
the change-of-variable formula for a conditional normalizing flow.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys, if DEFAULT_KEYS unchanged:
`observables` - the variables over which a condition density is learned (i.e., the observables)
`conditions` - the conditioning variables that the directly passed to the inference network
to_numpy : bool, optional, default: True
Boolean flag indicating whether to return the log-lik values as a `np.array` or a `tf.Tensor`
Returns
-------
log_lik : tf.Tensor of shape (batch_size, n_obs)
the approximate log-likelihood of each data point in each data set
"""
# Forward pass through the network
z, log_det_J = self.surrogate_net.forward(
input_dict[DEFAULT_KEYS['observables']],
input_dict[DEFAULT_KEYS['conditions']], training=False, **kwargs
)
# Compute approximate log likelihood
log_lik = self.latent_dist.log_prob(z) + log_det_J
# Convert tensor to numpy array, if specified
if to_numpy:
return log_lik.numpy()
return log_lik
[docs] def log_prob(self, input_dict, to_numpy=True, **kwargs):
"""Identical to `log_likelihood(input_dict, to_numpy, **kwargs)`."""
return self.log_likelihood(input_dict, to_numpy=to_numpy, **kwargs)
[docs] def compute_loss(self, input_dict, **kwargs):
"""Computes the loss of the amortized given input data provided in input_dict.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys:
`data` - the observables over which a condition density is learned (i.e., the observables)
`conditions` - the conditioning variables that the directly passed to the inference network
Returns
-------
loss : tf.Tensor of shape (1,) - the total computed loss given input variables
"""
z, log_det_J = self(input_dict, **kwargs)
loss = tf.reduce_mean(-self.latent_dist.log_prob(z) - log_det_J)
return loss
def _determine_latent_dist(self, latent_dist):
"""Determines which latent distribution to use and defaults to unit normal if `None` provided.
"""
if latent_dist is None:
return tfp.distributions.MultivariateNormalDiag(loc=[0.]*self.latent_dim)
else:
return latent_dist
[docs]class AmortizedPosteriorLikelihood(tf.keras.Model, AmortizedTarget):
"""An interface for jointly learning a surrogate model of the simulator and an approximate
posterior given a generative model.
"""
def __init__(self, amortized_posterior, amortized_likelihood, **kwargs):
"""Initializes a joint learner comprising an amortized posterior and an amortized emulator.
Parameters
----------
amortized_posterior : an instance of AmortizedPosterior or a custom tf.keras.Model
The generative neural posterior approximator.
amortized_likelihood : an instance of AmortizedLikelihood or a custom tf.keras.Model
The generative neural likelihood approximator.
"""
tf.keras.Model.__init__(self, **kwargs)
self.amortized_posterior = amortized_posterior
self.amortized_likelihood = amortized_likelihood
[docs] def call(self, input_dict, **kwargs):
""" Performs a forward pass through both amortizers.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys:
`posterior_inputs` - The input dictionary for the amortized posterior
`likelihood_inputs` - The input dictionary for the amortized likelihood
Returns
-------
(post_out, lik_out) : tuple
The outputs of the posterior and likelihood networks given input variables.
"""
post_out = self.amortized_posterior(input_dict['posterior_inputs'], **kwargs)
lik_out = self.amortized_likelihood(input_dict['likelihood_inputs'], **kwargs)
return post_out, lik_out
[docs] def compute_loss(self, input_dict, **kwargs):
""" Computes the loss of the join amortizer by summing the corresponding amortized posterior
and likelihood losses.
Parameters
----------
input_dict : dict
Nested input dictionary containing the following mandatory keys, if DEFAULT_KEYS unchanged::
`posterior_inputs` - The input dictionary for the amortized posterior
`likelihood_inputs` - The input dictionary for the amortized likelihood
Returns
-------
total_losses : dict
A dictionary with keys `Post.Loss` and `Lik.Loss` containing the individual losses for the
two amortizers.
"""
loss_post = self.amortized_posterior.compute_loss(input_dict[DEFAULT_KEYS['posterior_inputs']], **kwargs)
loss_lik = self.amortized_likelihood.compute_loss(input_dict[DEFAULT_KEYS['likelihood_inputs']], **kwargs)
return {'Post.Loss': loss_post, 'Lik.Loss': loss_lik}
[docs] def log_likelihood(self, input_dict, to_numpy=True, **kwargs):
""" Calculates the approximate log-likelihood of data given conditional variables via
the change-of-variable formula for conditional normalizing flows.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys, if DEFAULT_KEYS unchanged:
`observables` - the variables over which a condition density is learned (i.e., the observables)
`conditions` - the conditioning variables that are directly passed to the inference network
OR a nested dictionary with key `likelihood_inputs` containing the above input dictionary
to_numpy : bool, optional, default: True
Flag indicating whether to return the samples as a `np.array` or a `tf.Tensor`
Returns
-------
log_lik : tf.Tensor of shape (batch_size, n_obs)
the approximate log-likelihood of each data point in each data set
"""
if input_dict.get(DEFAULT_KEYS['likelihood_inputs']) is not None:
return self.amortized_likelihood.log_likelihood(
input_dict[DEFAULT_KEYS['likelihood_inputs']], to_numpy=to_numpy, **kwargs
)
return self.amortized_likelihood.log_likelihood(input_dict, to_numpy=to_numpy, **kwargs)
[docs] def log_posterior(self, input_dict, to_numpy=True, **kwargs):
"""Calculates the approximate log-posterior of targets given conditional variables via
the change-of-variable formula for conditional normalizing flows.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys, if DEFAULT_KEYS unchanged:
`parameters` - the latent generative model parameters over which a condition density is learned
`summary_conditions` - the conditioning variables that are first passed through a summary network
`direct_conditions` - the conditioning variables that the directly passed to the inference network
OR a nested dictionary with key `posterior_inputs` containing the above input dictionary
Returns
-------
log_post : tf.Tensor of shape (batch_size, n_obs)
the approximate log-likelihood of each data point in each data set
"""
if input_dict.get(DEFAULT_KEYS['posterior_inputs']) is not None:
return self.amortized_posterior.log_posterior(
input_dict[DEFAULT_KEYS['posterior_inputs']], to_numpy=to_numpy, **kwargs
)
return self.amortized_posterior.log_posterior(input_dict, to_numpy=to_numpy, **kwargs)
[docs] def log_prob(self, input_dict, to_numpy=True, **kwargs):
"""Identical to calling separate `log_likelihood()` and `log_posterior()`.
Returns
-------
out_dict : dict with keys `log_posterior` and `log_likelihood` corresponding
to the computed log_pdfs of the approximate posterior and likelihood.
"""
log_post = self.log_posterior(input_dict, to_numpy=to_numpy, **kwargs)
log_lik = self.log_likelihood(input_dict, to_numpy=to_numpy, **kwargs)
out_dict = {'log_posterior': log_post, 'log_likelihood': log_lik}
return out_dict
[docs] def sample_data(self, input_dict, n_samples, to_numpy=True, **kwargs):
"""Generates `n_samples` random draws from the surrogate likelihood given input conditions.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys, if DEFAULT_KEYS unchanged:
`conditions` - the conditioning variables that the directly passed to the inference network
OR a nested dictionary with key `likelihood_inputs` containing the above input dictionary
n_samples : int
The number of posterior samples to obtain from the approximate posterior
to_numpy : bool, optional, default: True
Flag indicating whether to return the samples as a `np.array` or a `tf.Tensor`
Returns
-------
lik_samples : tf.Tensor or np.ndarray of shape (n_datasets, n_samples, None)
Simulated observables from the surrogate likelihood.
"""
if input_dict.get(DEFAULT_KEYS['likelihood_inputs']) is not None:
return self.amortized_likelihood.sample(
input_dict[DEFAULT_KEYS['likelihood_inputs']], n_samples, to_numpy=to_numpy, **kwargs
)
return self.amortized_likelihood.sample(input_dict, n_samples, to_numpy=to_numpy, **kwargs)
[docs] def sample_parameters(self, input_dict, n_samples, to_numpy=True, **kwargs):
"""Generates random draws from the approximate posterior given conditonal variables.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys, if DEFAULT KEYS unchanged:
`summary_conditions` : the conditioning variables (including data) that are first passed through a summary network
`direct_conditions` : the conditioning variables that the directly passed to the inference network
OR a nested dictionary with key `posterior_inputs` containing the above input dictionary
n_samples : int
The number of posterior samples to obtain from the approximate posterior
to_numpy : bool, optional, default: True
Boolean flag indicating whether to return the samples as a `np.array` or a `tf.Tensor`
Returns
-------
post_samples : tf.Tensor or np.ndarray of shape (n_datasets, n_samples, n_params)
the sampled parameters per data set
"""
if input_dict.get(DEFAULT_KEYS['posterior_inputs']) is not None:
return self.amortized_posterior.sample(
input_dict[DEFAULT_KEYS['posterior_inputs']], n_samples, to_numpy=to_numpy, **kwargs
)
return self.amortized_posterior.sample(input_dict, n_samples, to_numpy=to_numpy, **kwargs)
[docs] def sample(self, input_dict, n_post_samples, n_lik_samples, to_numpy=True, **kwargs):
"""Identical to calling `sample_parameters()` and `sample_data()` separately.
Returns
-------
out_dict : dict with keys `posterior_samples` and `likelihood_samples` corresponding
to the `n_samples` from the approximate posterior and likelihood, respectively
"""
post_samples = self.sample_parameters(input_dict, n_post_samples, to_numpy=to_numpy, **kwargs)
lik_samples = self.sample_data(input_dict, n_lik_samples, to_numpy=to_numpy, **kwargs)
out_dict = {'posterior_samples': post_samples, 'likelihood_samples': lik_samples}
return out_dict
[docs]class AmortizedModelComparison(tf.keras.Model):
"""An interface to connect an evidential network for Bayesian model comparison with an optional summary network,
as described in the original paper on evidential neural networks for model comparison:
[1] Radev, S. T., D'Alessandro, M., Mertens, U. K., Voss, A., Köthe, U., & Bürkner, P. C. (2021).
Amortized bayesian model comparison with evidential deep learning.
IEEE Transactions on Neural Networks and Learning Systems.
Note: the original paper does not distinguish between the summary and the evidential networks, but
treats them as a whole, with the appropriate architetcure dictated by the model application. For the
sake of consistency, the BayesFlow library distinguisahes the two modules.
"""
def __init__(self, evidence_net, summary_net=None, loss_fun=None, kl_weight=None):
"""Initializes a composite neural architecture for amortized bayesian model comparison.
Parameters
----------
evidence_net : tf.keras.Model
A neural network which outputs model evidences.
summary_net : tf.keras.Model or None, optional, default: None
An optional summary network
loss_fun : callable or None, optional, default: None
The loss function which accepts the outputs of the amortizer. If None, the loss will be the log-loss.
kl_weight : callable or None, optional, defult: None
The weight of the KL regularization, if None, no regualrization will be used.
Important
----------
- If no `summary_net` is provided, then the output dictionary of your generative model should not contain
any `sumamry_conditions`, i.e., `summary_conditions` should be set to None, otherwise these will be ignored.
- If no custom `loss_fun` is provided, the loss function will be the log loss for the means of a Dirichlet
distribution, as described in:
Radev, S. T., D'Alessandro, M., Mertens, U. K., Voss, A., Köthe, U., & Bürkner, P. C. (2021).
Amortized bayesian model comparison with evidential deep learning.
IEEE Transactions on Neural Networks and Learning Systems.
- If no `kl_weight` is provided, no regularization (ground-trith preserving prior) will be used
for detecting implausible observables during inference.
"""
super().__init__()
self.evidence_net = evidence_net
self.summary_net = summary_net
self.loss = self._determine_loss(loss_fun)
self.kl_weight = kl_weight
self.num_models = self.evidence_net.num_models
def __call__(self, input_dict, return_summary=False, **kwargs):
""" Performs a forward pass through both networks.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys, if DEFAULT_KEYS unchanged
`summary_conditions` - the conditioning variables that are first passed through a summary network
`direct_conditions` - the conditioning variables that the directly passed to the evidential network
`model_indices` - the ground-truth, one-hot encoded model indices sampled from the model prior
return_summary : bool, optional, default: False
Indicates whether the summary network outputs are returned along the estimated evidences.
Returns
-------
net_out : tf.Tensor of shape (batch_size, num_models) or tuple of (net_out (batch_size, num_models),
summary_out (batch_size, summary_dim)), the latter being the summary network outputs, if
`return_summary` set to True.
"""
summary_out, full_cond = self._compute_summary_condition(
input_dict.get(DEFAULT_KEYS['summary_conditions']),
input_dict.get(DEFAULT_KEYS['direct_conditions']),
**kwargs
)
net_out = self.evidence_net(full_cond, **kwargs)
if not return_summary:
return net_out
return net_out, summary_out
[docs] def compute_loss(self, input_dict, **kwargs):
"""Computes the loss of the amortized model comparison instance.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys, if DEFAULT_KEYS unchanged::
`summary_conditions` - the conditioning variables that are first passed through a summary network
`direct_conditions` - the conditioning variables that the directly passed to the evidence network
`model_indices` - the ground-truth, one-hot encoded model indices sampled from the model prior
Returns
-------
total_loss : tf.Tensor of shape (1,) - the total computed loss given input variables
"""
alphas = self(input_dict, **kwargs)
loss = self.loss(input_dict[DEFAULT_KEYS['model_indices']], alphas)
if self.kl_weight is None:
return loss
else:
kl = self.kl_weight * kl_dirichlet(input_dict[DEFAULT_KEYS['model_indices']], alphas)
return loss + kl
[docs] def sample(self, input_dict, to_numpy=True, **kwargs):
"""Samples posterior model probabilities from the higher order Dirichlet density.
Parameters
----------
input_dict : dict
Input dictionary containing the following mandatory keys, if DEFAULT_KEYS unchanged
`summary_conditions` - the conditioning variables that are first passed through a summary network
`direct_conditions` - the conditioning variables that the directly passed to the evidential network
`model_indices` - the ground-truth, one-hot encoded model indices sampled from the model prior
n_samples : int
Number of samples to obtain from the approximate posterior
to_numpy : bool, default: True
Flag indicating whether to return the samples as a np.array or a tf.Tensor
Returns
-------
pm_samples : tf.Tensor or np.array
The posterior draws from the Dirichlet distribution, shape (num_samples, num_batch, num_models)
"""
_, full_cond = self._compute_summary_condition(
input_dict.get(DEFAULT_KEYS['summary_conditions']),
input_dict.get(DEFAULT_KEYS['direct_conditions']),
**kwargs
)
return self.evidence_net.sample(full_cond, to_numpy, **kwargs)
[docs] def evidence(self, input_dict, to_numpy=True, **kwargs):
"""Computes the evidence for the competing models given the data sets
contained in `input_dict`."""
_, full_cond = self._compute_summary_condition(
input_dict.get(DEFAULT_KEYS['summary_conditions']),
input_dict.get(DEFAULT_KEYS['direct_conditions']),
**kwargs
)
alphas = self(full_cond, return_summary=False, **kwargs)
if to_numpy:
return alphas.numpy()
return alphas
[docs] def uncertainty_score(self, input_dict, to_numpy=True, **kwargs):
"""Computes the uncertainy score according to sum(alphas) / num_models."""
_, full_cond = self._compute_summary_condition(
input_dict.get(DEFAULT_KEYS['summary_conditions']),
input_dict.get(DEFAULT_KEYS['direct_conditions']),
**kwargs
)
alphas = self(full_cond, return_summary=False, **kwargs)
u = tf.reduce_sum(alphas, axis=-1) / self.evidence_net.num_models
if to_numpy:
return u.numpy()
return u
def _compute_summary_condition(self, summary_conditions, direct_conditions, **kwargs):
"""Determines how to concatenate the provided conditions."""
# Compute learnable summaries, if given
if self.summary_net is not None:
sum_condition = self.summary_net(summary_conditions, **kwargs)
else:
sum_condition = None
# Concatenate learnable summaries with fixed summaries, this
if sum_condition is not None and direct_conditions is not None:
full_cond = tf.concat([sum_condition, direct_conditions], axis=-1)
elif sum_condition is not None:
full_cond = sum_condition
elif direct_conditions is not None:
full_cond = direct_conditions
else:
raise SummaryStatsError("Could not concatenarte or determine conditioning inputs...")
return sum_condition, full_cond
def _determine_loss(self, loss_fun):
"""Helper method to determine loss function to use."""
if loss_fun is None:
return log_loss
elif callable(loss_fun):
return loss_fun
else:
raise ConfigurationError("Loss function is neither default (`None`) not callable. Please provide a valid loss function!")
[docs]class SingleModelAmortizer(AmortizedPosterior):
"""Deprecated class for amortizer posterior estimation."""
def __init_subclass__(cls, **kwargs):
warn(f'{cls.__name__} will be deprecated. Use `AmortizedPosterior` instead.', DeprecationWarning, stacklevel=2)
super().__init_subclass__(**kwargs)
def __init__(self, *args, **kwargs):
warn(f'{self.__class__.__name__} will be deprecated. Use `AmortizedPosterior` instead.', DeprecationWarning, stacklevel=2)
super().__init__(*args, **kwargs)