diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..9ac5f66
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,19 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+## [0.1.4] - 2021-03-31
+
+### Changed
+
+- RotoGrad no longer normalizes the losses by default (now it is only performed
+  internally to compute GradNorm's weights). We keep the option to still normalize them 
+  if desired.
+
+[unreleased]: https://github.com/adrianjav/rotograd/compare/v0.1.4...HEAD
+[0.1.4]: https://github.com/adrianjav/rotograd/compare/v0.1.3...v0.1.4
diff --git a/rotograd/__init__.py b/rotograd/__init__.py
index 9539987..6ccd49f 100644
--- a/rotograd/__init__.py
+++ b/rotograd/__init__.py
@@ -1,5 +1,5 @@
 from .rotograd import VanillaMTL, RotoGrad, RotoGradNorm, cached
 
-__version__ = '0.1.3'
+__version__ = '0.1.4'
 
 __all__ = ['VanillaMTL', 'RotoGrad', 'RotoGradNorm', 'cached']
diff --git a/rotograd/rotograd.py b/rotograd/rotograd.py
index 84d6d2f..71679f6 100644
--- a/rotograd/rotograd.py
+++ b/rotograd/rotograd.py
@@ -124,7 +124,7 @@ class RotoGrad(nn.Module):
         value*, :math:`{L_k(t)}/{L_k(t_0 = 0)}`. This parameter sets a number of iterations after which the denominator
         will be replaced by the value of the loss at that iteration, that is, :math:`t_0 = burn\_in\_period`.
         This is done to overcome problems with losses quickly changing in the first iterations.
-    normalize_losses : optional, default=True
+    normalize_losses : optional, default=False
         Whether to use this normalized losses to back-propagate through the task-specific parameters as well.
 
 
@@ -155,7 +155,7 @@ class RotoGrad(nn.Module):
     rep: torch.Tensor
 
     def __init__(self, backbone: nn.Module, heads: Sequence[nn.Module], latent_size: int, *args, alpha: float,
-                 burn_in_period: int = 20, normalize_losses: bool = True):
+                 burn_in_period: int = 20, normalize_losses: bool = False):
         super(RotoGrad, self).__init__()
         num_tasks = len(heads)
 
@@ -335,7 +335,7 @@ class RotoGradNorm(RotoGrad):
         value*, :math:`{L_k(t)}/{L_k(t_0 = 0)}`. This parameter sets a number of iterations after which the denominator
         will be replaced by the value of the loss at that iteration, that is, :math:`t_0 = burn\_in\_period`.
         This is done to overcome problems with losses quickly changing in the first iterations.
-    normalize_losses : optional, default=True
+    normalize_losses : optional, default=False
         Whether to use this normalized losses to back-propagate through the task-specific parameters as well.
 
 
@@ -362,7 +362,7 @@ class RotoGradNorm(RotoGrad):
     """
 
     def __init__(self, backbone: nn.Module, heads: Sequence[nn.Module], latent_size: int, *args, alpha: float,
-                 burn_in_period: int = 20, normalize_losses: bool = True):
+                 burn_in_period: int = 20, normalize_losses: bool = False):
         super().__init__(backbone, heads, latent_size, *args, alpha=alpha, burn_in_period=burn_in_period,
                          normalize_losses=normalize_losses)
         self.weight_ = nn.ParameterList([nn.Parameter(torch.ones([]), requires_grad=True) for _ in range(len(heads))])