Skip to content

Commit

Permalink
Merge pull request #808 from SimonKohl/add_instance_norm
Browse files Browse the repository at this point in the history
Implementation of instance normalization and layer normalization
  • Loading branch information
f0k committed Jun 10, 2018
2 parents 7992faa + 18f7ee0 commit a61b76f
Show file tree
Hide file tree
Showing 4 changed files with 500 additions and 0 deletions.
3 changes: 3 additions & 0 deletions docs/modules/layers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@
LocalResponseNormalization2DLayer
BatchNormLayer
batch_norm
StandardizationLayer
instance_norm
layer_norm


.. rubric:: :doc:`layers/embedding`
Expand Down
7 changes: 7 additions & 0 deletions docs/modules/layers/normalization.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,10 @@ Normalization layers

.. autofunction:: batch_norm

.. autoclass:: StandardizationLayer
:members:

.. autofunction:: instance_norm

.. autofunction:: layer_norm

243 changes: 243 additions & 0 deletions lasagne/layers/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@
"LocalResponseNormalization2DLayer",
"BatchNormLayer",
"batch_norm",
"StandardizationLayer",
"instance_norm",
"layer_norm",
]


Expand Down Expand Up @@ -376,3 +379,243 @@ def batch_norm(layer, **kwargs):
nonlin_name = bn_name and bn_name + '_nonlin'
layer = NonlinearityLayer(layer, nonlinearity, name=nonlin_name)
return layer


class StandardizationLayer(Layer):
"""
Standardize inputs to zero mean and unit variance:
.. math::
y_i = \\frac{x_i - \\mu_i}{\\sqrt{\\sigma_i^2 + \\epsilon}}
The mean :math:`\\mu_i` and variance :math:`\\sigma_i^2` are computed and
shared across a given set of axes. In contrast to batch normalization,
these axes usually do not include the batch dimension, so each example is
normalized independently from other examples in the minibatch, both during
training and testing.
The :class:`StandardizationLayer` can be employed to realize instance
normalization [1]_ and layer normalization [2]_, for both of which
convenience functions (:func:`instance_norm` and :func:`layer_norm`) are
available.
Parameters
----------
incoming : a :class:`Layer` instance or a tuple
The layer feeding into this layer, or the expected input shape
axes : 'auto', 'spatial', 'features', int or tuple of int
The axis or axes to normalize over. If ``'auto'`` (the default),
two-dimensional inputs are normalized over the last dimension (i.e.,
this will normalize over units for dense layers), input tensors with
more than two dimensions are normalized over all but the first two
dimensions (i.e., this will normalize over all spatial dimensions for
convolutional layers). If ``'spatial'``, will normalize over all but
the first two dimensions. If ``'features'``, will normalize over all
but the first dimension.
epsilon : scalar
Small constant :math:`\\epsilon` added to the variance before taking
the square root and dividing by it, to avoid numerical problems
**kwargs
Any additional keyword arguments are passed to the :class:`Layer`
superclass.
See also
--------
instance_norm : Convenience function to apply instance normalization
layer_norm : Convenience function to apply layer normalization to a layer
References
----------
.. [1] Ulyanov, D., Vedaldi, A., & Lempitsky, V. (2016):
Instance Normalization: The Missing Ingredient for Fast Stylization.
https://arxiv.org/abs/1607.08022.
.. [2] Ba, J., Kiros, J., & Hinton, G. (2016):
Layer normalization.
https://arxiv.org/abs/1607.06450.
"""
def __init__(self, incoming, axes='auto', epsilon=1e-4, **kwargs):
super(StandardizationLayer, self).__init__(incoming, **kwargs)

if axes == 'auto':
# default: normalize across 2nd dimension for 2D inputs
# and across all but the first two axes for 3D+ inputs
if len(self.input_shape) == 2:
axes = (1,)
else:
axes = tuple(range(2, len(self.input_shape)))
elif axes == 'spatial':
# normalize over spatial dimensions only,
# separate for each instance in the batch
axes = tuple(range(2, len(self.input_shape)))
elif axes == 'features':
# normalize over features and spatial dimensions,
# separate for each instance in the batch
axes = tuple(range(1, len(self.input_shape)))
elif isinstance(axes, int):
axes = (axes,)
self.axes = axes

self.epsilon = epsilon

def get_output_for(self, input, **kwargs):
mean = input.mean(self.axes, keepdims=True)
std = T.sqrt(input.var(self.axes, keepdims=True) + self.epsilon)
return (input - mean) / std


def instance_norm(layer, learn_scale=True, learn_bias=True, **kwargs):
"""
Apply instance normalization to an existing layer. This is a convenience
function modifying an existing layer to include instance normalization: It
will steal the layer's nonlinearity if there is one (effectively
introducing the normalization right before the nonlinearity), remove
the layer's bias if there is one (because it would be effectless), and add
a :class:`StandardizationLayer` and :class:`NonlinearityLayer` on top.
Depending on the given arguments, an additional :class:`ScaleLayer` and
:class:`BiasLayer` will be inserted in between.
In effect, it will separately standardize each feature map of each input
example, followed by an optional scale and shift learned per channel,
followed by the original nonlinearity, as proposed in [1]_.
Parameters
----------
layer : A :class:`Layer` instance
The layer to apply the normalization to; note that it will be
irreversibly modified as specified above
learn_scale : bool (default: True)
Whether to add a ScaleLayer after the StandardizationLayer
learn_bias : bool (default: True)
Whether to add a BiasLayer after the StandardizationLayer (or the
optional ScaleLayer)
**kwargs
Any additional keyword arguments are passed on to the
:class:`StandardizationLayer` constructor.
Returns
-------
StandardizationLayer, ScaleLayer, BiasLayer, or NonlinearityLayer instance
The last layer stacked on top of the given modified `layer` to
implement instance normalization with optional scaling and shifting.
Examples
--------
Just wrap any layer into a :func:`instance_norm` call on creating it:
>>> from lasagne.layers import InputLayer, Conv2DLayer, instance_norm
>>> from lasagne.nonlinearities import rectify
>>> l1 = InputLayer((10, 3, 28, 28))
>>> l2 = instance_norm(Conv2DLayer(l1, num_filters=64, filter_size=3,
... nonlinearity=rectify))
This introduces instance normalization right before its nonlinearity:
>>> from lasagne.layers import get_all_layers
>>> [l.__class__.__name__ for l in get_all_layers(l2)]
['InputLayer', 'Conv2DLayer', 'StandardizationLayer', \
'ScaleLayer', 'BiasLayer', 'NonlinearityLayer']
References
----------
.. [1] Ulyanov, D., Vedaldi, A., & Lempitsky, V. (2016):
Instance Normalization: The Missing Ingredient for Fast Stylization.
https://arxiv.org/abs/1607.08022.
"""
nonlinearity = getattr(layer, 'nonlinearity', None)
if nonlinearity is not None:
layer.nonlinearity = nonlinearities.identity
if hasattr(layer, 'b') and layer.b is not None:
del layer.params[layer.b]
layer.b = None
in_name = (kwargs.pop('name', None) or
(getattr(layer, 'name', None) and layer.name + '_in'))
layer = StandardizationLayer(layer, axes='spatial', name=in_name, **kwargs)
if learn_scale:
from .special import ScaleLayer
scale_name = in_name and in_name + '_scale'
layer = ScaleLayer(layer, shared_axes='auto', name=scale_name)
if learn_bias:
from .special import BiasLayer
bias_name = in_name and in_name + '_bias'
layer = BiasLayer(layer, shared_axes='auto', name=bias_name)
if nonlinearity is not None:
from .special import NonlinearityLayer
nonlin_name = in_name and in_name + '_nonlin'
layer = NonlinearityLayer(layer, nonlinearity, name=nonlin_name)
return layer


def layer_norm(layer, **kwargs):
"""
Apply layer normalization to an existing layer. This is a convenience
function modifying an existing layer to include layer normalization: It
will steal the layer's nonlinearity if there is one (effectively
introducing the normalization right before the nonlinearity), remove
the layer's bias if there is one, and add a :class:`StandardizationLayer`,
:class:`ScaleLayer`, :class:`BiasLayer`, and :class:`NonlinearityLayer` on
top.
In effect, it will standardize each input example across the feature and
spatial dimensions (if any), followed by a scale and shift learned per
feature, followed by the original nonlinearity, as proposed in [1]_.
Parameters
----------
layer : A :class:`Layer` instance
The layer to apply the normalization to; note that it will be
irreversibly modified as specified above
**kwargs
Any additional keyword arguments are passed on to the
:class:`StandardizationLayer` constructor.
Returns
-------
StandardizationLayer or NonlinearityLayer instance
The last layer stacked on top of the given modified `layer` to
implement layer normalization with feature-wise scaling and shifting.
Examples
--------
Just wrap any layer into a :func:`layer_norm` call on creating it:
>>> from lasagne.layers import InputLayer, DenseLayer, layer_norm
>>> from lasagne.nonlinearities import rectify
>>> l1 = InputLayer((10, 28))
>>> l2 = layer_norm(DenseLayer(l1, num_units=64, nonlinearity=rectify))
This introduces layer normalization right before its nonlinearity:
>>> from lasagne.layers import get_all_layers
>>> [l.__class__.__name__ for l in get_all_layers(l2)]
['InputLayer', 'DenseLayer', 'StandardizationLayer', \
'ScaleLayer', 'BiasLayer', 'NonlinearityLayer']
References
----------
.. [1] Ba, J., Kiros, J., & Hinton, G. (2016):
Layer normalization.
https://arxiv.org/abs/1607.06450.
"""
nonlinearity = getattr(layer, 'nonlinearity', None)
if nonlinearity is not None:
layer.nonlinearity = nonlinearities.identity
ln_name = (kwargs.pop('name', None) or
(getattr(layer, 'name', None) and layer.name + '_ln'))
if hasattr(layer, 'b') and layer.b is not None:
del layer.params[layer.b]
layer.b = None
layer = StandardizationLayer(layer, axes='features', name=ln_name,
**kwargs)
scale_name = ln_name and ln_name + '_scale'
from .special import ScaleLayer
layer = ScaleLayer(layer, shared_axes='auto', name=scale_name)
from .special import BiasLayer
bias_name = ln_name and ln_name + '_bias'
layer = BiasLayer(layer, shared_axes='auto', name=bias_name)

if nonlinearity is not None:
from .special import NonlinearityLayer
nonlin_name = ln_name and ln_name + '_nonlin'
layer = NonlinearityLayer(layer, nonlinearity, name=nonlin_name)
return layer
Loading

0 comments on commit a61b76f

Please sign in to comment.