Skip to content

Commit

Permalink
added an implementation of instance normalization and layer normaliza…
Browse files Browse the repository at this point in the history
…tion
  • Loading branch information
SimonKohl authored and f0k committed Jun 8, 2018
1 parent 7992faa commit 075e1a5
Show file tree
Hide file tree
Showing 4 changed files with 489 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/modules/layers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@
LocalResponseNormalization2DLayer
BatchNormLayer
batch_norm
InstanceNormLayer
instance_norm


.. rubric:: :doc:`layers/embedding`
Expand Down
5 changes: 5 additions & 0 deletions docs/modules/layers/normalization.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@ Normalization layers

.. autofunction:: batch_norm

.. autoclass:: InstanceNormLayer
:members:

.. autofunction:: instance_norm

235 changes: 235 additions & 0 deletions lasagne/layers/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@
"LocalResponseNormalization2DLayer",
"BatchNormLayer",
"batch_norm",
"StandardizationLayer",
"instance_norm",
"layer_norm",
]


Expand Down Expand Up @@ -376,3 +379,235 @@ def batch_norm(layer, **kwargs):
nonlin_name = bn_name and bn_name + '_nonlin'
layer = NonlinearityLayer(layer, nonlinearity, name=nonlin_name)
return layer


class StandardizationLayer(Layer):
"""
This layer implements the normalization of the input layer's
outputs across the specified axes:
.. math::
y_i = \\frac{x_i - \\mu_i}{\\sqrt{\\sigma_i^2 + \\epsilon}}
That is, each input feature (or input pixel) :math:`\\x_i`
is normalized to zero mean and unit variance.
The mean :math:`\\mu_i` and variance
:math:`\\sigma_i^2` is calculated across the specified axes.
In contrast to batch normalization, the mean and
variance is not restricted to be defined across examples,
so the same operation can be applied during training and testing.
The advantages of using this implementation over e.g.
:class:`BatchNormLayer` with adapted axes arguments, are its
independence of the input size, as no parameters are learned and stored.
:class:`StandardizationLayer` can be employed to realize
different normalization schemes such as instance normalization [1]
and layer normalization [2], for both of which convenience functions
(:func:`instance_norm` and :func:`layer_norm`) are available.
Parameters
----------
incoming : a :class:`Layer` instance or a tuple
The layer feeding into this layer, or the expected input shape
axes : 'auto', int or tuple of int
The axis or axes to normalize over. If ``'auto'`` (the default),
2D inputs are normalized over the second dimension, inputs
with more than 2 dimensions are normalized over all but the first
two dimensions (i.e. this will normalize over all spatial dimensions
for convolutional layers).
epsilon : scalar
Small constant :math:`\\epsilon` added to the variance before taking
the square root and dividing by it, to avoid numerical problems
**kwargs
Any additional keyword arguments are passed to the :class:`Layer`
superclass.
Notes
-----
The convenience functions :func:`instance_norm` :func:`layer_norm`
modify an existing layer to insert instance normalization or
layer normalization in front of its nonlinearity.
See also
--------
instance_norm : Convenience function to apply instance normalization to a
layer
References
layer_norm : Convenience function to apply layer normalization to a layer
References
----------
.. [1] Ulyanov, D., Vedaldi, A., & Lempitsky, V. (2016):
Instance Normalization: The Missing Ingredient for Fast Stylization.
https://arxiv.org/abs/1607.08022.
.. [2] Ba, J., Kiros, J., & Hinton, G. (2016):
Layer normalization. https://arxiv.org/abs/1607.06450.
"""
def __init__(self, incoming, axes='auto', epsilon=1e-4, **kwargs):
super(StandardizationLayer, self).__init__(incoming, **kwargs)

if axes == 'auto':
# default: normalize across 2nd dimension for 2D inputs
# and across all but the first two axes for 3D+ inputs
if len(self.input_shape) == 2:
axes = (1,)
else:
axes = tuple(range(2, len(self.input_shape)))
elif axes == 'spatial':
# normalize over spatial dimensions only,
# separate for each instance in the batch
axes = tuple(range(2, len(self.input_shape)))
elif axes == 'features':
# normalize over features and spatial dimensions,
# separate for each instance in the batch
axes = tuple(range(1, len(self.input_shape)))
elif isinstance(axes, int):
axes = (axes,)
self.axes = axes

self.epsilon = epsilon

def get_output_for(self, input, **kwargs):
mean = input.mean(self.axes, keepdims=True)
std = T.sqrt(input.var(self.axes, keepdims=True) + self.epsilon)
return (input - mean) / std


def instance_norm(layer, learn_scale=True, learn_bias=True, **kwargs):
"""
Apply instance normalization to an existing layer. This is a convenience
function modifying an existing layer to include instance normalization: It
will steal the layer's nonlinearity if there is one (effectively
introducing the normalization right before the nonlinearity), remove
the layer's bias if there is one (because it would be redundant), and add
a :class:`StandardizationLayer` and :class:`NonlinearityLayer` on top.
Depending on the given arguments, an additional :class:`ScaleLayer` and
:class:`BiasLayer` will be inserted inbetween.
Parameters
----------
layer : A :class:`Layer` instance
The layer to apply the normalization to; note that it will be
irreversibly modified as specified above
learn_scale : bool (default: True)
Whether to add a ScaleLayer after the StandardizationLayer
learn_bias : bool (default: True)
Whether to add a BiasLayer after the StandardizationLayer or the
optionally added ScaleLayer
**kwargs
Any additional keyword arguments are passed on to the
:class:`StandardizationLayer` constructor.
Returns
-------
Instance of StandardizationLayer, ScaleLayer, BiasLayer, NonlinearityLayer
An instance normalization layer stacked on the given modified `layer`,
or a scale layer or a bias layer or a nonlinearity layer stacked on top
of the respectively present additional layers depending on whether
`layer` was nonlinear and on the arguments to :func:`instance_norm`.
Examples
--------
Just wrap any layer into a :func:`instance_norm` call on creating it:
>>> from lasagne.layers import InputLayer, Conv2DLayer, instance_norm
>>> from lasagne.nonlinearities import rectify
>>> l1 = InputLayer((10, 3, 28, 28))
>>> l2 = instance_norm(Conv2DLayer(l1, num_filters=64, filter_size=3,\
nonlinearity=rectify))
This introduces instance normalization right before its nonlinearity:
>>> from lasagne.layers import get_all_layers
>>> [l.__class__.__name__ for l in get_all_layers(l2)]
['InputLayer', 'Conv2DLayer', 'StandardizationLayer', \
'ScaleLayer', 'BiasLayer', 'NonlinearityLayer']
"""
nonlinearity = getattr(layer, 'nonlinearity', None)
if nonlinearity is not None:
layer.nonlinearity = nonlinearities.identity
if hasattr(layer, 'b') and layer.b is not None:
del layer.params[layer.b]
layer.b = None
in_name = (kwargs.pop('name', None) or
(getattr(layer, 'name', None) and layer.name + '_in'))
layer = StandardizationLayer(layer, axes='spatial', name=in_name, **kwargs)
if learn_scale:
from .special import ScaleLayer
scale_name = in_name and in_name + '_scale'
layer = ScaleLayer(layer, shared_axes='auto', name=scale_name)
if learn_bias:
from .special import BiasLayer
bias_name = in_name and in_name + '_bias'
layer = BiasLayer(layer, shared_axes='auto', name=bias_name)
if nonlinearity is not None:
from .special import NonlinearityLayer
nonlin_name = in_name and in_name + '_nonlin'
layer = NonlinearityLayer(layer, nonlinearity, name=nonlin_name)
return layer


def layer_norm(layer, **kwargs):
"""
Apply layer normalization to an existing layer. This is a convenience
function modifying an existing layer to include layer normalization: It
will steal the layer's nonlinearity if there is one (effectively
introducing the normalization right before the nonlinearity), remove
the layer's bias if there is one, and add
a :class:`StandardizationLayer`, :class:`ScaleLayer`, :class:`BiasLayer`,
and :class:`NonlinearityLayer` on top.
Parameters
----------
layer : A :class:`Layer` instance
The layer to apply the normalization to; note that it will be
irreversibly modified as specified above
**kwargs
Any additional keyword arguments are passed on to the
:class:`InstanceNormLayer` constructor.
Returns
-------
StandardizationLayer or NonlinearityLayer instance
A layer normalization layer stacked on the given modified `layer`,
or a nonlinearity layer stacked on top of both
if `layer` was nonlinear.
Examples
--------
Just wrap any layer into a :func:`layer_norm` call on creating it:
>>> from lasagne.layers import InputLayer, DenseLayer, layer_norm
>>> from lasagne.nonlinearities import rectify
>>> l1 = InputLayer((10, 28))
>>> l2 = layer_norm(DenseLayer(l1, num_units=64, nonlinearity=rectify))
This introduces layer normalization right before its nonlinearity:
>>> from lasagne.layers import get_all_layers
>>> [l.__class__.__name__ for l in get_all_layers(l2)]
['InputLayer', 'DenseLayer', 'StandardizationLayer', \
'ScaleLayer', 'BiasLayer', 'NonlinearityLayer']
"""
nonlinearity = getattr(layer, 'nonlinearity', None)
if nonlinearity is not None:
layer.nonlinearity = nonlinearities.identity
ln_name = (kwargs.pop('name', None) or
(getattr(layer, 'name', None) and layer.name + '_ln'))
if hasattr(layer, 'b') and layer.b is not None:
del layer.params[layer.b]
layer.b = None
layer = StandardizationLayer(layer, axes='features', name=ln_name,
**kwargs)
scale_name = ln_name and ln_name + '_scale'
from .special import ScaleLayer
layer = ScaleLayer(layer, shared_axes='auto', name=scale_name)
from .special import BiasLayer
bias_name = ln_name and ln_name + '_bias'
layer = BiasLayer(layer, shared_axes='auto', name=bias_name)

if nonlinearity is not None:
from .special import NonlinearityLayer
nonlin_name = ln_name and ln_name + '_nonlin'
layer = NonlinearityLayer(layer, nonlinearity, name=nonlin_name)
return layer
Loading

0 comments on commit 075e1a5

Please sign in to comment.