From 78fe08c0fe0beae1903a5e3e68d508424c8a5649 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Tue, 6 Jul 2021 10:53:24 +0000
Subject: [PATCH 01/26] update pl

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 834011306..83c0131fc 100644
--- a/environment.yml
+++ b/environment.yml
@@ -47,7 +47,7 @@ dependencies:
       - pytest-cov==2.10.1
       - pytest-forked==1.3.0
       - pytest-xdist==1.34.0
-      - pytorch-lightning==1.2.8
+      - pytorch-lightning==1.3.8
       - rich==5.1.1
       - rpdb==0.1.6
       - runstats==1.8.0

From e712f8791096d0693d3540a1fc0d9b23305d1bfd Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Tue, 6 Jul 2021 12:45:57 +0000
Subject: [PATCH 02/26] fix one test

---
 Tests/ML/test_metrics.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/Tests/ML/test_metrics.py b/Tests/ML/test_metrics.py
index b30a754fb..22230a8cf 100644
--- a/Tests/ML/test_metrics.py
+++ b/Tests/ML/test_metrics.py
@@ -248,10 +248,6 @@ def test_average_without_nan() -> None:
     average.reset()
     assert average.count == 0
     assert average.sum == 0.0
-    # This is a weird side effect of Lightning's way of caching metric results. As long as we don't call
-    # .update, the last computed value will be kept and returned, even though we have called .reset() already.
-    assert average.compute() == expected
-    average.reset()
     # Update with a tensor that does not contain any values: Can't compute the average then.
     average.update(torch.zeros((0,)))
     with pytest.raises(ValueError) as ex:

From 453d5892a587316ba54013a7db3a1f93449964f4 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Tue, 6 Jul 2021 12:46:08 +0000
Subject: [PATCH 03/26] our fix not needed anymore

---
 InnerEye/ML/model_training.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/InnerEye/ML/model_training.py b/InnerEye/ML/model_training.py
index 6c4810094..a2f7a5cc9 100644
--- a/InnerEye/ML/model_training.py
+++ b/InnerEye/ML/model_training.py
@@ -137,13 +137,6 @@ def create_lightning_trainer(container: LightningContainer,
     # Accelerator should be "ddp" when running large models in AzureML (when using DDP_spawn, we get out of GPU memory).
     # For unit tests, only "ddp_spawn" works
     accelerator = "ddp" if effective_num_gpus > 1 else None
-    if effective_num_gpus > 1:
-        # Initialize the DDP plugin with find_unused_parameters=False by default. If True (default), it prints out
-        # lengthy warnings about the performance impact of find_unused_parameters
-        plugins = [InnerEyeDDPPlugin(num_nodes=num_nodes, sync_batchnorm=True,
-                                     find_unused_parameters=container.pl_find_unused_parameters)]
-    else:
-        plugins = []
     logging.info(f"Using {num_gpus} GPUs per node with accelerator '{accelerator}'")
     tensorboard_logger = TensorBoardLogger(save_dir=str(container.logs_folder), name="Lightning", version="")
     loggers = [tensorboard_logger, AzureMLLogger()]
@@ -194,7 +187,6 @@ def create_lightning_trainer(container: LightningContainer,
                       sync_batchnorm=True,
                       terminate_on_nan=container.detect_anomaly,
                       resume_from_checkpoint=str(resume_from_checkpoint) if resume_from_checkpoint else None,
-                      plugins=plugins,
                       **kwargs)
     return trainer, storing_logger
 

From 5061ae64e1d8adb92edbcfb1f0c299f7c60da5a5 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Tue, 6 Jul 2021 14:00:53 +0000
Subject: [PATCH 04/26] fix yet another test

---
 InnerEye/ML/lightning_metrics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/InnerEye/ML/lightning_metrics.py b/InnerEye/ML/lightning_metrics.py
index 9f98df320..1f58f742a 100644
--- a/InnerEye/ML/lightning_metrics.py
+++ b/InnerEye/ML/lightning_metrics.py
@@ -8,7 +8,7 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
-from pytorch_lightning import metrics
+import torchmetrics as metrics
 from pytorch_lightning.metrics import Metric
 from pytorch_lightning.metrics.functional import accuracy, auc, auroc, precision_recall_curve, roc
 from torch.nn import ModuleList
@@ -68,7 +68,7 @@ def has_predictions(self) -> bool:
         Returns True if the present object stores at least 1 prediction (self.update has been called at least once),
         or False if no predictions are stored.
         """
-        return len(self.y_pred) > 0  # type: ignore
+        return self.n_obs > 0  # type: ignore
 
 
 class Accuracy05(metrics.Accuracy):

From 6086fbf1b21f85b8e7ab4e9360e837bd8094320c Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Tue, 6 Jul 2021 14:02:05 +0000
Subject: [PATCH 05/26] add new torchmetrics

---
 environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environment.yml b/environment.yml
index 83c0131fc..3553be39c 100644
--- a/environment.yml
+++ b/environment.yml
@@ -62,4 +62,5 @@ dependencies:
       - tensorboard==2.3.0
       - tensorboardX==2.1
       - torchprof==1.3.3
+      - torchmetrics==0.4.1
       - yacs==0.1.8

From c5ba75d74a57d94002a1817d90d09163ec13f931 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Tue, 6 Jul 2021 14:30:58 +0000
Subject: [PATCH 06/26] fix checkpoints

---
 InnerEye/ML/model_training.py | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/InnerEye/ML/model_training.py b/InnerEye/ML/model_training.py
index a2f7a5cc9..1b530168b 100644
--- a/InnerEye/ML/model_training.py
+++ b/InnerEye/ML/model_training.py
@@ -21,7 +21,7 @@
 from InnerEye.Azure.azure_util import RUN_CONTEXT, is_offline_run_context
 from InnerEye.Common.common_util import SUBJECT_METRICS_FILE_NAME, change_working_directory
 from InnerEye.Common.resource_monitor import ResourceMonitor
-from InnerEye.ML.common import ModelExecutionMode, RECOVERY_CHECKPOINT_FILE_NAME, create_best_checkpoint
+from InnerEye.ML.common import ModelExecutionMode, RECOVERY_CHECKPOINT_FILE_NAME, create_best_checkpoint, LAST_CHECKPOINT_FILE_NAME
 from InnerEye.ML.deep_learning_config import ARGS_TXT, VISUALIZATION_FOLDER
 from InnerEye.ML.lightning_base import InnerEyeContainer, InnerEyeLightning
 from InnerEye.ML.lightning_container import LightningContainer
@@ -96,7 +96,8 @@ def __init__(self, container: LightningContainer):
                          filename=RECOVERY_CHECKPOINT_FILE_NAME + "_{epoch}",
                          period=container.recovery_checkpoint_save_interval,
                          save_top_k=container.recovery_checkpoints_save_last_k,
-                         mode="max")
+                         mode="max",
+                         save_last=True)
 
     def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule, outputs: Any) -> None:
         pl_module.log(name="epoch", value=trainer.current_epoch)
@@ -117,19 +118,13 @@ def create_lightning_trainer(container: LightningContainer,
     :param kwargs: Any additional keyowrd arguments will be passed to the constructor of Trainer.
     :return: A tuple [Trainer object, diagnostic logger]
     """
-    # For now, stick with the legacy behaviour of always saving only the last epoch checkpoint. For large segmentation
-    # models, this still appears to be the best way of choosing them because validation loss on the relatively small
-    # training patches is not stable enough. Going by the validation loss somehow works for the Prostate model, but
-    # not for the HeadAndNeck model.
-    best_checkpoint_callback = ModelCheckpoint(dirpath=str(container.checkpoint_folder),
-                                               # filename=BEST_CHECKPOINT_FILE_NAME,
-                                               # monitor=f"{VALIDATION_PREFIX}{MetricType.LOSS.value}",
-                                               # save_top_k=1,
-                                               save_last=True)
-
     # Recovery checkpoints: {epoch} will turn into a string like "epoch=1"
     # Store 1 recovery checkpoint every recovery_checkpoint_save_interval epochs, keep the last
-    # recovery_checkpoints_save_last_k.
+    # recovery_checkpoints_save_last_k. For now, stick with the legacy behaviour of always using the last epoch 
+    # checkpoint as "best_checkpoint". For large segmentation models, this still appears to be the best way of 
+    # choosing them because validation loss on the relatively small
+    # training patches is not stable enough. Going by the validation loss somehow works for the Prostate model, but
+    # not for the HeadAndNeck model.
     recovery_checkpoint_callback = InnerEyeRecoveryCheckpointCallback(container)
 
     num_gpus = container.num_gpus_per_node
@@ -158,9 +153,9 @@ def create_lightning_trainer(container: LightningContainer,
     else:
         deterministic = False
         benchmark = True
-    # If the users provides additional callbacks via get_trainer_arguments (for custom
-    # containers
-    callbacks = [best_checkpoint_callback, recovery_checkpoint_callback]
+
+    callbacks = [recovery_checkpoint_callback]
+    # If the users provides additional callbacks via get_trainer_arguments (for custom containers)
     if "callbacks" in kwargs:
         callbacks.append(kwargs.pop("callbacks"))  # type: ignore
     is_azureml_run = not is_offline_run_context(RUN_CONTEXT)

From 9b516110a05024836e862ac645ea14a3b5785848 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Tue, 6 Jul 2021 15:02:50 +0000
Subject: [PATCH 07/26] fix some test

---
 InnerEye/ML/model_training.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/InnerEye/ML/model_training.py b/InnerEye/ML/model_training.py
index 1b530168b..b1c034102 100644
--- a/InnerEye/ML/model_training.py
+++ b/InnerEye/ML/model_training.py
@@ -97,7 +97,7 @@ def __init__(self, container: LightningContainer):
                          period=container.recovery_checkpoint_save_interval,
                          save_top_k=container.recovery_checkpoints_save_last_k,
                          mode="max",
-                         save_last=True)
+                         save_last=False)
 
     def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule, outputs: Any) -> None:
         pl_module.log(name="epoch", value=trainer.current_epoch)
@@ -118,13 +118,15 @@ def create_lightning_trainer(container: LightningContainer,
     :param kwargs: Any additional keyowrd arguments will be passed to the constructor of Trainer.
     :return: A tuple [Trainer object, diagnostic logger]
     """
-    # Recovery checkpoints: {epoch} will turn into a string like "epoch=1"
-    # Store 1 recovery checkpoint every recovery_checkpoint_save_interval epochs, keep the last
-    # recovery_checkpoints_save_last_k. For now, stick with the legacy behaviour of always using the last epoch 
-    # checkpoint as "best_checkpoint". For large segmentation models, this still appears to be the best way of 
-    # choosing them because validation loss on the relatively small
+    # For now, stick with the legacy behaviour of always saving only the last epoch checkpoint. For large segmentation
+    # models, this still appears to be the best way of choosing them because validation loss on the relatively small
     # training patches is not stable enough. Going by the validation loss somehow works for the Prostate model, but
     # not for the HeadAndNeck model.
+    last_checkpoint_callback = ModelCheckpoint(dirpath=str(container.checkpoint_folder), save_last=True, save_top_k=0)
+
+    # Recovery checkpoints: {epoch} will turn into a string like "epoch=1"
+    # Store 1 recovery checkpoint every recovery_checkpoint_save_interval epochs, keep the last
+    # recovery_checkpoints_save_last_k.
     recovery_checkpoint_callback = InnerEyeRecoveryCheckpointCallback(container)
 
     num_gpus = container.num_gpus_per_node
@@ -153,9 +155,9 @@ def create_lightning_trainer(container: LightningContainer,
     else:
         deterministic = False
         benchmark = True
-
-    callbacks = [recovery_checkpoint_callback]
-    # If the users provides additional callbacks via get_trainer_arguments (for custom containers)
+    # If the users provides additional callbacks via get_trainer_arguments (for custom
+    # containers
+    callbacks = [last_checkpoint_callback, recovery_checkpoint_callback]  # last_checkpoint_callback, 
     if "callbacks" in kwargs:
         callbacks.append(kwargs.pop("callbacks"))  # type: ignore
     is_azureml_run = not is_offline_run_context(RUN_CONTEXT)

From 42717c4ac8cad72aca883b342a852b6158a916f5 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Tue, 6 Jul 2021 15:07:01 +0000
Subject: [PATCH 08/26] fix one test more

---
 InnerEye/ML/lightning_metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InnerEye/ML/lightning_metrics.py b/InnerEye/ML/lightning_metrics.py
index 1f58f742a..f66ed03ee 100644
--- a/InnerEye/ML/lightning_metrics.py
+++ b/InnerEye/ML/lightning_metrics.py
@@ -82,7 +82,7 @@ def has_predictions(self) -> bool:
         Returns True if the present object stores at least 1 prediction (self.update has been called at least once),
         or False if no predictions are stored.
         """
-        return self.total > 0  # type: ignore
+        return (self.total) or (self.tp + self.fp + self.tn + self.fn) > 0  # type: ignore
 
 
 class AverageWithoutNan(Metric):

From 4f4e30215e162c770cbcfa789b688247b1b9e800 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Tue, 6 Jul 2021 15:16:30 +0000
Subject: [PATCH 09/26] attempt to fix test

---
 InnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/InnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py b/InnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py
index 3baa3044d..429affda4 100644
--- a/InnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py
+++ b/InnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py
@@ -56,7 +56,7 @@ def on_pretrain_routine_start(self, trainer: pl.Trainer, pl_module: pl.Lightning
                                                       p=self.drop_p,
                                                       n_hidden=self.hidden_dim).to(pl_module.device)
         assert isinstance(pl_module.non_linear_evaluator, torch.nn.Module)
-        self.optimizer = torch.optim.Adam(pl_module.non_linear_evaluator.parameters(),
+        self.online_optimizer = torch.optim.Adam(pl_module.non_linear_evaluator.parameters(),
                                           lr=self.learning_rate,
                                           weight_decay=self.weight_decay)
 
@@ -126,9 +126,9 @@ def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, data
         if ids_linear_head not in self.visited_ids:
             self.visited_ids.add(ids_linear_head)
             loss = self.shared_step(batch, pl_module, is_training=True)
+            self.online_optimizer.zero_grad()
             loss.backward()
-            self.optimizer.step()
-            self.optimizer.zero_grad()
+            self.online_optimizer.step()
 
             # log metrics
             pl_module.log('ssl_online_evaluator/train/loss', loss)

From b14c9df45b05a1cb71cd9fbb673944238f16a7b5 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Tue, 6 Jul 2021 15:51:05 +0000
Subject: [PATCH 10/26] Update byol code to match new pl bolts

---
 .../SSL/lightning_modules/byol/byol_module.py | 27 +++----------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/InnerEye/ML/SSL/lightning_modules/byol/byol_module.py b/InnerEye/ML/SSL/lightning_modules/byol/byol_module.py
index 8acc4bc43..a97dfdbad 100644
--- a/InnerEye/ML/SSL/lightning_modules/byol/byol_module.py
+++ b/InnerEye/ML/SSL/lightning_modules/byol/byol_module.py
@@ -9,7 +9,6 @@
 import pytorch_lightning as pl
 import torch
 import torch.nn.functional as F
-from pl_bolts.optimizers.lars_scheduling import LARSWrapper
 from pl_bolts.optimizers.lr_scheduler import LinearWarmupCosineAnnealingLR
 from torch import Tensor as T
 from torch.optim import Adam
@@ -110,32 +109,14 @@ def setup(self, *args: Any, **kwargs: Any) -> None:
         global_batch_size = self.trainer.world_size * self.hparams.batch_size  # type: ignore
         self.train_iters_per_epoch = self.hparams.num_samples // global_batch_size  # type: ignore
 
-    def configure_optimizers(self) -> Any:
-        """
-        Configures the optimizer to use for training: Adam optimizer with Lars scheduling, excluding certain parameters
-        (batch norm and bias of convolution) from weight decay. Apply Linear Cosine Annealing schedule of learning
-        rate with warm-up.
-        """
-        # TRICK 1 (Use lars + filter weights)
+    def configure_optimizers(self):
         # exclude certain parameters
         parameters = self.exclude_from_wt_decay(self.online_network.named_parameters(),
                                                 weight_decay=self.hparams.weight_decay)  # type: ignore
-        optimizer = LARSWrapper(Adam(parameters, lr=self.hparams.learning_rate))  # type: ignore
-
-        # Trick 2 (after each step)
-        self.hparams.warmup_epochs = self.hparams.warmup_epochs * self.train_iters_per_epoch  # type: ignore
-        max_epochs = self.trainer.max_epochs * self.train_iters_per_epoch
-
-        linear_warmup_cosine_decay = LinearWarmupCosineAnnealingLR(
-            optimizer,
-            warmup_epochs=self.hparams.warmup_epochs,  # type: ignore
-            max_epochs=max_epochs,
-            warmup_start_lr=0,
-            eta_min=self.min_learning_rate,
+        optimizer = Adam(parameters, lr=self.hparams.learning_rate, weight_decay=self.hparams.weight_decay)
+        scheduler = LinearWarmupCosineAnnealingLR(
+            optimizer, warmup_epochs=self.hparams.warmup_epochs, max_epochs=self.hparams.max_epochs
         )
-
-        scheduler = {'scheduler': linear_warmup_cosine_decay, 'interval': 'step', 'frequency': 1}
-
         return [optimizer], [scheduler]
 
     def exclude_from_wt_decay(self,

From eef975bea010caa18882f48e2e83963c7349f062 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Tue, 6 Jul 2021 15:51:43 +0000
Subject: [PATCH 11/26] needed to update

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 3553be39c..9a2b6181a 100644
--- a/environment.yml
+++ b/environment.yml
@@ -26,7 +26,7 @@ dependencies:
       - joblib==0.16.0
       - jupyter==1.0.0
       - jupyter-client==6.1.5
-      - lightning-bolts==0.3.1
+      - lightning-bolts==0.3.4
       - matplotlib==3.3.0
       - mlflow==1.17.0
       - mypy==0.812

From 909853444fd3e49b0f9875250391f04f89ce5811 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Tue, 6 Jul 2021 15:52:15 +0000
Subject: [PATCH 12/26] back to how it was

---
 InnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/InnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py b/InnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py
index 429affda4..de1ccf2bb 100644
--- a/InnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py
+++ b/InnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py
@@ -56,7 +56,7 @@ def on_pretrain_routine_start(self, trainer: pl.Trainer, pl_module: pl.Lightning
                                                       p=self.drop_p,
                                                       n_hidden=self.hidden_dim).to(pl_module.device)
         assert isinstance(pl_module.non_linear_evaluator, torch.nn.Module)
-        self.online_optimizer = torch.optim.Adam(pl_module.non_linear_evaluator.parameters(),
+        self.optimizer = torch.optim.Adam(pl_module.non_linear_evaluator.parameters(),
                                           lr=self.learning_rate,
                                           weight_decay=self.weight_decay)
 
@@ -126,9 +126,9 @@ def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, data
         if ids_linear_head not in self.visited_ids:
             self.visited_ids.add(ids_linear_head)
             loss = self.shared_step(batch, pl_module, is_training=True)
-            self.online_optimizer.zero_grad()
+            self.optimizer.zero_grad()
             loss.backward()
-            self.online_optimizer.step()
+            self.optimizer.step()
 
             # log metrics
             pl_module.log('ssl_online_evaluator/train/loss', loss)

From 6e4ebbcb8b6755a0872c3bee9a37d910d21133e7 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Tue, 6 Jul 2021 16:21:34 +0000
Subject: [PATCH 13/26] update

---
 InnerEye/ML/SSL/lightning_containers/ssl_container.py | 3 ++-
 InnerEye/ML/SSL/lightning_modules/byol/byol_module.py | 1 +
 Tests/SSL/byol/test_byol_module.py                    | 6 +++---
 Tests/SSL/byol/test_byol_moving_average.py            | 3 ++-
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/InnerEye/ML/SSL/lightning_containers/ssl_container.py b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
index 22be43f0e..6ad2bda8c 100644
--- a/InnerEye/ML/SSL/lightning_containers/ssl_container.py
+++ b/InnerEye/ML/SSL/lightning_containers/ssl_container.py
@@ -157,7 +157,8 @@ def create_model(self) -> LightningModule:
                                  batch_size=self.data_module.batch_size,
                                  learning_rate=self.l_rate,
                                  use_7x7_first_conv_in_resnet=use_7x7_first_conv_in_resnet,
-                                 warmup_epochs=10)
+                                 warmup_epochs=10,
+                                 max_epochs=self.num_epochs)
         else:
             raise ValueError(
                 f"Unknown value for ssl_training_type, should be {SSLTrainingType.SimCLR.value} or "
diff --git a/InnerEye/ML/SSL/lightning_modules/byol/byol_module.py b/InnerEye/ML/SSL/lightning_modules/byol/byol_module.py
index a97dfdbad..07250a5d3 100644
--- a/InnerEye/ML/SSL/lightning_modules/byol/byol_module.py
+++ b/InnerEye/ML/SSL/lightning_modules/byol/byol_module.py
@@ -32,6 +32,7 @@ def __init__(self,
                  batch_size: int,
                  encoder_name: str,
                  warmup_epochs: int,
+                 max_epochs: int,
                  use_7x7_first_conv_in_resnet: bool = True,
                  weight_decay: float = 1e-6,
                  **kwargs: Any) -> None:
diff --git a/Tests/SSL/byol/test_byol_module.py b/Tests/SSL/byol/test_byol_module.py
index 8674d95d3..ca0e4419e 100644
--- a/Tests/SSL/byol/test_byol_module.py
+++ b/Tests/SSL/byol/test_byol_module.py
@@ -26,7 +26,7 @@ def test_cosine_loss() -> None:
 
 # Test if initial set of parameters are equal between student and teacher.
 def test_module_param_eq() -> None:
-    byol = BYOLInnerEye(num_samples=16, learning_rate=1e-3, batch_size=4, encoder_name="resnet50", warmup_epochs=10)
+    byol = BYOLInnerEye(num_samples=16, learning_rate=1e-3, batch_size=4, encoder_name="resnet50", warmup_epochs=10, max_epochs=100)
     pars1 = byol.online_network.parameters()
     pars2 = byol.target_network.parameters()
     for par1, par2 in zip(pars1, pars2):
@@ -39,7 +39,7 @@ def test_encoder_init(encoder_name: str) -> None:
 
 # Test shared step - loss should be bounded between some value and cannot be outside that value.
 def test_shared_forward_step() -> None:
-    byol = BYOLInnerEye(num_samples=16, learning_rate=1e-3, batch_size=4, warmup_epochs=10, encoder_name="resnet50")
+    byol = BYOLInnerEye(num_samples=16, learning_rate=1e-3, batch_size=4, warmup_epochs=10, encoder_name="resnet50", max_epochs=100)
     imgs = torch.rand((4, 3, 32, 32))
     lbls = torch.rand((4,))
     batch = ([imgs, imgs], lbls)
@@ -50,7 +50,7 @@ def test_shared_forward_step() -> None:
 
 # Check if output pooling works
 def test_output_spatial_pooling() -> None:
-    byol = BYOLInnerEye(num_samples=16, learning_rate=1e-3, batch_size=4, warmup_epochs=10, encoder_name="resnet50")
+    byol = BYOLInnerEye(num_samples=16, learning_rate=1e-3, batch_size=4, warmup_epochs=10, encoder_name="resnet50", max_epochs=100)
     imgs = torch.rand((4, 3, 32, 32))
 
     embeddings = byol(imgs)
diff --git a/Tests/SSL/byol/test_byol_moving_average.py b/Tests/SSL/byol/test_byol_moving_average.py
index 965384722..50f24c32e 100644
--- a/Tests/SSL/byol/test_byol_moving_average.py
+++ b/Tests/SSL/byol/test_byol_moving_average.py
@@ -43,7 +43,8 @@ def __getitem__(self, item: Any) -> Any:
                                learning_rate=1e-3,
                                batch_size=4,
                                encoder_name="resnet50",
-                               warmup_epochs=10)
+                               warmup_epochs=10,
+                               max_epochs=100)
     with mock.patch("InnerEye.ML.SSL.lightning_modules.byol.byol_module.BYOLInnerEye.global_step", 15):
         new_tau = byol_weight_update.update_tau(pl_module=byol_module, trainer=trainer)
     assert new_tau == 1 - 0.01 * (math.cos(math.pi * 15 / total_steps) + 1) / 2

From 076b881b865819703872cf31bfd4bad22aae3c96 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Tue, 6 Jul 2021 17:28:34 +0000
Subject: [PATCH 14/26] update

---
 Tests/SSL/byol/test_byol_module.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Tests/SSL/byol/test_byol_module.py b/Tests/SSL/byol/test_byol_module.py
index ca0e4419e..18cb43ab3 100644
--- a/Tests/SSL/byol/test_byol_module.py
+++ b/Tests/SSL/byol/test_byol_module.py
@@ -35,7 +35,7 @@ def test_module_param_eq() -> None:
 # Test initialisation with different encoder types.
 @pytest.mark.parametrize("encoder_name", ["resnet18", "resnet50", "resnet101", "densenet121"])
 def test_encoder_init(encoder_name: str) -> None:
-    BYOLInnerEye(num_samples=16, learning_rate=1e-3, batch_size=4, warmup_epochs=10, encoder_name=encoder_name)
+    BYOLInnerEye(num_samples=16, learning_rate=1e-3, batch_size=4, warmup_epochs=10, encoder_name=encoder_name, max_epochs=100)
 
 # Test shared step - loss should be bounded between some value and cannot be outside that value.
 def test_shared_forward_step() -> None:

From 99f87b5e5f4234639e6f7bacb6734231a44dc2d2 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Wed, 7 Jul 2021 10:07:59 +0000
Subject: [PATCH 15/26] changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 13f042dd1..fca98d906 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,7 @@ module on test data with partial ground truth files. (Also [522](https://github.
 jobs that run in AzureML.
 
 ### Changed
+- ([#531])(https://github.com/microsoft/InnerEye-DeepLearning/pull/531)) Updated PL to 1.3.8, torchmetrics and pl-bolts and changed relevant metrics and SSL code API.
 - ([#502](https://github.com/microsoft/InnerEye-DeepLearning/pull/502)) Renamed command line option 'perform_training_set_inference' to 'inference_on_train_set'. Replaced command line option 'perform_validation_and_test_set_inference' with the pair of options 'inference_on_val_set' and 'inference_on_test_set'.
 - ([#496](https://github.com/microsoft/InnerEye-DeepLearning/pull/496)) All plots are now saved as PNG, rather than JPG.
 - ([#497](https://github.com/microsoft/InnerEye-DeepLearning/pull/497)) Reducing the size of the code snapshot that

From e67d2446f1868bd8486ed12dca076d354ae7fed0 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Wed, 7 Jul 2021 13:17:30 +0000
Subject: [PATCH 16/26] update regression metrics

---
 .../PR_BasicModel2Epochs/OUTPUT/Train/epoch_metrics.csv       | 4 ++--
 .../PR_BasicModel2Epochs/OUTPUT/Val/epoch_metrics.csv         | 4 ++--
 RegressionTestResults/PR_HelloContainer/OUTPUT/test_mse.txt   | 2 +-
 .../PR_Train2Nodes/OUTPUT/Train/epoch_metrics.csv             | 4 ++--
 .../PR_Train2Nodes/OUTPUT/Val/epoch_metrics.csv               | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Train/epoch_metrics.csv b/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Train/epoch_metrics.csv
index 9b8f039c6..bcab7e484 100644
--- a/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Train/epoch_metrics.csv
+++ b/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Train/epoch_metrics.csv
@@ -1,3 +1,3 @@
 subject_count,loss,learning_rate,Dice/AverageAcrossStructures,Dice/spinalcord,Dice/lung_r,Dice/lung_l,VoxelCount/spinalcord,VoxelCount/lung_r,VoxelCount/lung_l,epoch,cross_validation_split_index
-2.000000,0.718717,0.000100,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,126273.000000,0,-1
-2.000000,0.775691,0.000090,0.000000,0.000000,0.000000,0.000000,0.000000,84030.000000,0.000000,1,-1
+1.000000,0.718559,0.000100,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,98256.000000,0,-1
+1.000000,0.792988,0.000090,0.000000,0.000000,0.000000,0.000000,0.000000,43307.000000,13992.500000,1,-1
diff --git a/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Val/epoch_metrics.csv b/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Val/epoch_metrics.csv
index 72b267213..34ffeee9d 100644
--- a/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Val/epoch_metrics.csv
+++ b/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Val/epoch_metrics.csv
@@ -1,3 +1,3 @@
 subject_count,loss,Dice/AverageAcrossStructures,Dice/spinalcord,Dice/lung_r,Dice/lung_l,VoxelCount/spinalcord,VoxelCount/lung_r,VoxelCount/lung_l,epoch,cross_validation_split_index
-2.000000,0.716739,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,84282.000000,0,-1
-2.000000,0.716731,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,84282.000000,1,-1
+1.000000,0.715468,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,89502.000000,0,-1
+1.000000,0.715476,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,89502.000000,1,-1
diff --git a/RegressionTestResults/PR_HelloContainer/OUTPUT/test_mse.txt b/RegressionTestResults/PR_HelloContainer/OUTPUT/test_mse.txt
index 5ccc7afeb..7f15df5f3 100644
--- a/RegressionTestResults/PR_HelloContainer/OUTPUT/test_mse.txt
+++ b/RegressionTestResults/PR_HelloContainer/OUTPUT/test_mse.txt
@@ -1 +1 @@
-0.011612942442297935
\ No newline at end of file
+0.012701375409960747
\ No newline at end of file
diff --git a/RegressionTestResults/PR_Train2Nodes/OUTPUT/Train/epoch_metrics.csv b/RegressionTestResults/PR_Train2Nodes/OUTPUT/Train/epoch_metrics.csv
index ef43fba9c..7a801bdb9 100644
--- a/RegressionTestResults/PR_Train2Nodes/OUTPUT/Train/epoch_metrics.csv
+++ b/RegressionTestResults/PR_Train2Nodes/OUTPUT/Train/epoch_metrics.csv
@@ -1,3 +1,3 @@
 subject_count,loss,learning_rate,Dice/AverageAcrossStructures,Dice/spinalcord,Dice/lung_r,Dice/lung_l,VoxelCount/spinalcord,VoxelCount/lung_r,VoxelCount/lung_l,epoch,cross_validation_split_index
-4.000000,0.716913,0.000100,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,99342.000000,0,-1
-4.000000,0.773825,0.000090,0.000000,0.000000,0.000000,0.000000,181.250000,83803.250000,122.250000,1,-1
+1.000000,0.753852,0.000100,0.000000,0.000000,0.000000,0.000000,0.000000,18609.000000,61179.000000,0,-1
+1.000000,0.773389,0.000090,0.000000,0.000000,0.000000,0.000000,0.000000,33453.000000,24258.500000,1,-1
diff --git a/RegressionTestResults/PR_Train2Nodes/OUTPUT/Val/epoch_metrics.csv b/RegressionTestResults/PR_Train2Nodes/OUTPUT/Val/epoch_metrics.csv
index 51a3b2dba..d53bfc7d8 100644
--- a/RegressionTestResults/PR_Train2Nodes/OUTPUT/Val/epoch_metrics.csv
+++ b/RegressionTestResults/PR_Train2Nodes/OUTPUT/Val/epoch_metrics.csv
@@ -1,3 +1,3 @@
 subject_count,loss,Dice/AverageAcrossStructures,Dice/spinalcord,Dice/lung_r,Dice/lung_l,VoxelCount/spinalcord,VoxelCount/lung_r,VoxelCount/lung_l,epoch,cross_validation_split_index
-4.000000,0.729041,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,65335.000000,0,-1
-4.000000,0.729027,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,65335.000000,1,-1
+1.000000,0.758059,0.000000,0.000000,0.000000,0.000000,0.000000,32021.000000,48380.750000,0,-1
+1.000000,0.758054,0.000000,0.000000,0.000000,0.000000,0.000000,32021.000000,48380.750000,1,-1

From 3fb78c37e084694e67fb13a3f86537a2fa15ec4f Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Wed, 7 Jul 2021 13:36:09 +0000
Subject: [PATCH 17/26] skip test on wind

---
 Tests/ML/test_plotting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Tests/ML/test_plotting.py b/Tests/ML/test_plotting.py
index 5070f51e0..8b98c5782 100644
--- a/Tests/ML/test_plotting.py
+++ b/Tests/ML/test_plotting.py
@@ -167,7 +167,7 @@ def test_plot_normalization_result(test_output_dirs: OutputFolderForTests) -> No
     expected = ["042_slice_001.png", "042_slice_001_contour.png"]
     compare_files(files, expected)
 
-
+@pytest.mark.skipif(common_util.is_windows(), reason="Random plotting errors on Windows")
 def test_plot_contours_for_all_classes(test_output_dirs: OutputFolderForTests) -> None:
     size = (3, 3, 3)
     image = np.zeros((1,) + size)

From 15990b344dc4560adab41fcaacd8cfd0eb5d7c20 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Wed, 7 Jul 2021 13:36:40 +0000
Subject: [PATCH 18/26] flake8

---
 InnerEye/ML/model_training.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InnerEye/ML/model_training.py b/InnerEye/ML/model_training.py
index b1c034102..475f4e47d 100644
--- a/InnerEye/ML/model_training.py
+++ b/InnerEye/ML/model_training.py
@@ -21,7 +21,7 @@
 from InnerEye.Azure.azure_util import RUN_CONTEXT, is_offline_run_context
 from InnerEye.Common.common_util import SUBJECT_METRICS_FILE_NAME, change_working_directory
 from InnerEye.Common.resource_monitor import ResourceMonitor
-from InnerEye.ML.common import ModelExecutionMode, RECOVERY_CHECKPOINT_FILE_NAME, create_best_checkpoint, LAST_CHECKPOINT_FILE_NAME
+from InnerEye.ML.common import ModelExecutionMode, RECOVERY_CHECKPOINT_FILE_NAME, create_best_checkpoint
 from InnerEye.ML.deep_learning_config import ARGS_TXT, VISUALIZATION_FOLDER
 from InnerEye.ML.lightning_base import InnerEyeContainer, InnerEyeLightning
 from InnerEye.ML.lightning_container import LightningContainer

From dc3c5fcd0c8e3cbf2fa72a7e610811f77dc6fbb6 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Wed, 7 Jul 2021 14:59:57 +0000
Subject: [PATCH 19/26] forgot to update this

---
 .../PR_TrainEnsemble/OUTPUT/Train/epoch_metrics.csv           | 4 ++--
 .../PR_TrainEnsemble/OUTPUT/Val/epoch_metrics.csv             | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Train/epoch_metrics.csv b/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Train/epoch_metrics.csv
index e179b844d..e677cc029 100644
--- a/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Train/epoch_metrics.csv
+++ b/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Train/epoch_metrics.csv
@@ -1,3 +1,3 @@
 subject_count,loss,learning_rate,Dice/AverageAcrossStructures,Dice/spinalcord,Dice/lung_r,Dice/lung_l,VoxelCount/spinalcord,VoxelCount/lung_r,VoxelCount/lung_l,epoch,cross_validation_split_index
-2.000000,0.723947,0.000100,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,148093.000000,0,0
-2.000000,0.768603,0.000090,0.000000,0.000000,0.000000,0.000000,0.000000,86614.000000,0.000000,1,0
+1.000000,0.720039,0.000100,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,110279.000000,0,0
+1.000000,0.793326,0.000090,0.000000,0.000000,0.000000,0.000000,0.000000,43307.000000,3900.500000,1,0
diff --git a/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Val/epoch_metrics.csv b/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Val/epoch_metrics.csv
index 5137305ea..751cccbbc 100644
--- a/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Val/epoch_metrics.csv
+++ b/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Val/epoch_metrics.csv
@@ -1,3 +1,3 @@
 subject_count,loss,Dice/AverageAcrossStructures,Dice/spinalcord,Dice/lung_r,Dice/lung_l,VoxelCount/spinalcord,VoxelCount/lung_r,VoxelCount/lung_l,epoch,cross_validation_split_index
-2.000000,0.715168,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,94367.500000,0,0
-2.000000,0.715166,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,94367.500000,1,0
+1.000000,0.721449,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,54751.500000,0,0
+1.000000,0.721449,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,54751.500000,1,0

From 0bb4a9ab8e2e0420c4b5379c47bab8fd785d3b5d Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Thu, 8 Jul 2021 14:51:20 +0000
Subject: [PATCH 20/26] mypy

---
 InnerEye/ML/SSL/datamodules_and_datasets/datamodules.py  | 2 +-
 InnerEye/ML/SSL/encoders.py                              | 2 +-
 InnerEye/ML/SSL/lightning_modules/byol/byol_module.py    | 9 +++++----
 .../ML/SSL/lightning_modules/ssl_classifier_module.py    | 2 +-
 .../ML/SSL/lightning_modules/ssl_online_evaluator.py     | 2 +-
 InnerEye/ML/lightning_base.py                            | 7 ++++---
 InnerEye/ML/lightning_metrics.py                         | 2 +-
 InnerEye/ML/lightning_models.py                          | 6 ++++--
 InnerEye/ML/model_training.py                            | 8 ++++----
 Tests/ML/utils/test_model_util.py                        | 4 ++--
 10 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/InnerEye/ML/SSL/datamodules_and_datasets/datamodules.py b/InnerEye/ML/SSL/datamodules_and_datasets/datamodules.py
index 288ccbfe9..19b4864ba 100644
--- a/InnerEye/ML/SSL/datamodules_and_datasets/datamodules.py
+++ b/InnerEye/ML/SSL/datamodules_and_datasets/datamodules.py
@@ -130,7 +130,7 @@ def prepare_data(self, *args: Any, **kwargs: Any) -> None:
         logging.info(f"Len encoder train dataloader {len(self.encoder_module.train_dataloader())}")
         logging.info(f"Len total train dataloader {len(self.train_dataloader())}")
 
-    def train_dataloader(self, *args: Any, **kwargs: Any) -> Dict[SSLDataModuleType, DataLoader]:
+    def train_dataloader(self, *args: Any, **kwargs: Any) -> Dict[SSLDataModuleType, DataLoader]:  # type: ignore
         """
         The train dataloaders
         """
diff --git a/InnerEye/ML/SSL/encoders.py b/InnerEye/ML/SSL/encoders.py
index e0113654a..5ebcfdbf6 100644
--- a/InnerEye/ML/SSL/encoders.py
+++ b/InnerEye/ML/SSL/encoders.py
@@ -71,7 +71,7 @@ def get_encoder_output_dim(pl_module: Union[pl.LightningModule, torch.nn.Module]
     if dm is not None:
         from InnerEye.ML.SSL.lightning_modules.ssl_online_evaluator import SSLOnlineEvaluatorInnerEye
         dataloader = dm.train_dataloader()
-        dataloader = dataloader[SSLDataModuleType.LINEAR_HEAD] if isinstance(dataloader, dict) else dataloader
+        dataloader = dataloader[SSLDataModuleType.LINEAR_HEAD] if isinstance(dataloader, dict) else dataloader  # type: ignore
         batch = iter(dataloader).next()  # type: ignore
         x, _ = SSLOnlineEvaluatorInnerEye.to_device(batch, device)
     else:
diff --git a/InnerEye/ML/SSL/lightning_modules/byol/byol_module.py b/InnerEye/ML/SSL/lightning_modules/byol/byol_module.py
index 07250a5d3..603f45063 100644
--- a/InnerEye/ML/SSL/lightning_modules/byol/byol_module.py
+++ b/InnerEye/ML/SSL/lightning_modules/byol/byol_module.py
@@ -16,6 +16,7 @@
 from InnerEye.ML.SSL.lightning_modules.byol.byol_models import SiameseArm
 from InnerEye.ML.SSL.lightning_modules.byol.byol_moving_average import ByolMovingAverageWeightUpdate
 from InnerEye.ML.SSL.utils import SSLDataModuleType
+from pytorch_lightning import Trainer
 
 SingleBatchType = Tuple[List, T]
 BatchType = Union[Dict[SSLDataModuleType, SingleBatchType], SingleBatchType]
@@ -59,6 +60,7 @@ def __init__(self,
 
     def on_train_batch_end(self, *args: Any, **kwargs: Any) -> None:
         # Add callback for user automatically since it's key to BYOL weight update
+        assert isinstance(self.trainer, Trainer)
         self.weight_callback.on_before_zero_grad(self.trainer, self)
 
     def forward(self, x: T) -> T:  # type: ignore
@@ -110,14 +112,13 @@ def setup(self, *args: Any, **kwargs: Any) -> None:
         global_batch_size = self.trainer.world_size * self.hparams.batch_size  # type: ignore
         self.train_iters_per_epoch = self.hparams.num_samples // global_batch_size  # type: ignore
 
-    def configure_optimizers(self):
+    def configure_optimizers(self) -> Any:
         # exclude certain parameters
         parameters = self.exclude_from_wt_decay(self.online_network.named_parameters(),
                                                 weight_decay=self.hparams.weight_decay)  # type: ignore
-        optimizer = Adam(parameters, lr=self.hparams.learning_rate, weight_decay=self.hparams.weight_decay)
+        optimizer = Adam(parameters, lr=self.hparams.learning_rate, weight_decay=self.hparams.weight_decay)  # type: ignore
         scheduler = LinearWarmupCosineAnnealingLR(
-            optimizer, warmup_epochs=self.hparams.warmup_epochs, max_epochs=self.hparams.max_epochs
-        )
+            optimizer, warmup_epochs=self.hparams.warmup_epochs, max_epochs=self.hparams.max_epochs)   # type: ignore
         return [optimizer], [scheduler]
 
     def exclude_from_wt_decay(self,
diff --git a/InnerEye/ML/SSL/lightning_modules/ssl_classifier_module.py b/InnerEye/ML/SSL/lightning_modules/ssl_classifier_module.py
index dde76d362..e5908bc47 100644
--- a/InnerEye/ML/SSL/lightning_modules/ssl_classifier_module.py
+++ b/InnerEye/ML/SSL/lightning_modules/ssl_classifier_module.py
@@ -5,7 +5,7 @@
 from typing import Any, List, Optional
 
 import torch
-from pytorch_lightning.metrics import Metric
+from torchmetrics import Metric
 from pl_bolts.models.self_supervised import SSLEvaluator
 from torch.nn import functional as F
 
diff --git a/InnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py b/InnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py
index de1ccf2bb..3eddaff53 100644
--- a/InnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py
+++ b/InnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py
@@ -9,7 +9,7 @@
 import torch
 from pl_bolts.callbacks.ssl_online import SSLOnlineEvaluator
 from pl_bolts.models.self_supervised.evaluator import SSLEvaluator
-from pytorch_lightning.metrics import Metric
+from torchmetrics import Metric
 from torch import Tensor as T
 from torch.nn import functional as F
 
diff --git a/InnerEye/ML/lightning_base.py b/InnerEye/ML/lightning_base.py
index 417e02d1b..878379c09 100644
--- a/InnerEye/ML/lightning_base.py
+++ b/InnerEye/ML/lightning_base.py
@@ -9,7 +9,7 @@
 
 import param
 import torch
-from pytorch_lightning import LightningDataModule, LightningModule
+from pytorch_lightning import LightningDataModule, LightningModule, Trainer
 from pytorch_lightning.utilities import rank_zero_only
 from torch.optim import Optimizer
 from torch.optim.lr_scheduler import _LRScheduler
@@ -257,8 +257,8 @@ def use_sync_dist(self) -> bool:
         """
         Returns True if metric logging should use sync_dist=True. This is read off from the use_ddp flag of the trainer.
         """
-        # For PL from version 1.2.0 on: self.trainer.accelerator_connector.use_ddp
-        return self.trainer.use_ddp
+        assert isinstance(self.trainer, Trainer)
+        return self.trainer.accelerator_connector.use_ddp
 
     def on_train_epoch_start(self) -> None:
         self.train_timers.reset()
@@ -497,6 +497,7 @@ def write_loss(self, is_training: bool, loss: torch.Tensor) -> None:
         :param is_training: If True, the logged metric will be called "train/Loss". If False, the metric will
         be called "val/Loss"
         """
+        assert isinstance(self.trainer, Trainer)
         self.log_on_epoch(MetricType.LOSS, loss, is_training)
         if is_training:
             learning_rate = self.trainer.lr_schedulers[0]['scheduler'].get_last_lr()[0]
diff --git a/InnerEye/ML/lightning_metrics.py b/InnerEye/ML/lightning_metrics.py
index f66ed03ee..fedd463e9 100644
--- a/InnerEye/ML/lightning_metrics.py
+++ b/InnerEye/ML/lightning_metrics.py
@@ -9,7 +9,7 @@
 import torch
 import torch.nn.functional as F
 import torchmetrics as metrics
-from pytorch_lightning.metrics import Metric
+from torchmetrics import Metric
 from pytorch_lightning.metrics.functional import accuracy, auc, auroc, precision_recall_curve, roc
 from torch.nn import ModuleList
 
diff --git a/InnerEye/ML/lightning_models.py b/InnerEye/ML/lightning_models.py
index 490e693ee..f27a1231f 100644
--- a/InnerEye/ML/lightning_models.py
+++ b/InnerEye/ML/lightning_models.py
@@ -27,6 +27,7 @@
 from InnerEye.ML.utils import image_util, metrics_util, model_util
 from InnerEye.ML.utils.model_util import get_scalar_model_inputs_and_labels
 from InnerEye.ML.utils.sequence_utils import apply_sequence_model_loss, get_masked_model_outputs_and_labels
+from pytorch_lightning import Trainer
 
 SUBJECT_OUTPUT_PER_RANK_PREFIX = f"{SUBJECT_METRICS_FILE_NAME}.rank"
 
@@ -249,6 +250,7 @@ def on_train_start(self) -> None:
         """
         # These loggers store the per-subject model outputs. They cannot be initialized in the constructor because
         # the trainer object will not yet be set, and we need to get the rank from there.
+        assert isinstance(self.trainer, Trainer)
         fixed_logger_columns = {LoggingColumns.CrossValidationSplitIndex.value: self.cross_validation_split_index}
         subject_output_file = get_subject_output_file_per_rank(self.trainer.global_rank)
         self.train_subject_outputs_logger = DataframeLogger(self.train_metrics_folder / subject_output_file,
@@ -323,7 +325,7 @@ def compute_and_log_metrics(self,
                     zip(_subject_ids, [prediction_target] * len(_subject_ids), _posteriors.tolist(), _labels.tolist()))
         # Write a full breakdown of per-subject predictions and labels to a file. These files are local to the current
         # rank in distributed training, and will be aggregated after training.
-        logger = self.train_subject_outputs_logger if is_training else self.val_subject_outputs_logger
+        logger = self.train_subject_outputs_logger if is_training else self.val_subject_outputs_logger  # type: ignore
         data_split = ModelExecutionMode.TRAIN if is_training else ModelExecutionMode.VAL
         for subject, prediction_target, model_output, label in per_subject_outputs:
             logger.add_record({
@@ -350,7 +352,7 @@ def training_or_validation_epoch_end(self, is_training: bool) -> None:
                     # Hence, only log if anything has been accumulated.
                     self.log(name=prefix + metric.name + target_suffix, value=metric.compute())
                     metric.reset()
-        logger = self.train_subject_outputs_logger if is_training else self.val_subject_outputs_logger
+        logger = self.train_subject_outputs_logger if is_training else self.val_subject_outputs_logger  # type: ignore
         logger.flush()
         super().training_or_validation_epoch_end(is_training)
 
diff --git a/InnerEye/ML/model_training.py b/InnerEye/ML/model_training.py
index 475f4e47d..5eb56b6af 100644
--- a/InnerEye/ML/model_training.py
+++ b/InnerEye/ML/model_training.py
@@ -99,8 +99,8 @@ def __init__(self, container: LightningContainer):
                          mode="max",
                          save_last=False)
 
-    def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule, outputs: Any) -> None:
-        pl_module.log(name="epoch", value=trainer.current_epoch)
+    def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule, unused: bool=None) -> None:
+        pl_module.log(name="epoch", value=trainer.current_epoch)  # type: ignore
 
 
 def create_lightning_trainer(container: LightningContainer,
@@ -272,8 +272,8 @@ def model_train(checkpoint_handler: CheckpointHandler,
     # Per-subject model outputs for regression models are written per rank, and need to be aggregated here.
     # Each thread per rank will come here, and upload its files to the run outputs. Rank 0 will later download them.
     if is_azureml_run and world_size > 1 and isinstance(lightning_model, ScalarLightning):
-        upload_output_file_as_temp(lightning_model.train_subject_outputs_logger.csv_path, container.outputs_folder)
-        upload_output_file_as_temp(lightning_model.val_subject_outputs_logger.csv_path, container.outputs_folder)
+        upload_output_file_as_temp(lightning_model.train_subject_outputs_logger.csv_path, container.outputs_folder)  # type: ignore
+        upload_output_file_as_temp(lightning_model.val_subject_outputs_logger.csv_path, container.outputs_folder)  # type: ignore
     # DDP will start multiple instances of the runner, one for each GPU. Those should terminate here after training.
     # We can now use the global_rank of the Lightining model, rather than environment variables, because DDP has set
     # all necessary properties.
diff --git a/Tests/ML/utils/test_model_util.py b/Tests/ML/utils/test_model_util.py
index 4892e9f7a..13a64db69 100644
--- a/Tests/ML/utils/test_model_util.py
+++ b/Tests/ML/utils/test_model_util.py
@@ -45,8 +45,8 @@ def create_model_and_store_checkpoint(config: ModelConfigBase, checkpoint_path:
     trainer.model = model
     # Before saving, the values for epoch and step are incremented. Save them here in such a way that we can assert
     # easily later.
-    trainer.current_epoch = FIXED_EPOCH - 1
-    trainer.global_step = FIXED_GLOBAL_STEP - 1
+    trainer.current_epoch = FIXED_EPOCH - 1  # type: ignore
+    trainer.global_step = FIXED_GLOBAL_STEP - 1  # type: ignore
     # In PL, it is the Trainer's responsibility to save the model. Checkpoint handling refers back to the trainer
     # to get a save_func. Mimicking that here.
     trainer.save_checkpoint(checkpoint_path, weights_only=weights_only)

From bc55f1024a1b5b55a8e38d53fef054014d50f60b Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Thu, 8 Jul 2021 14:55:26 +0000
Subject: [PATCH 21/26] remove comment

---
 InnerEye/ML/model_training.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InnerEye/ML/model_training.py b/InnerEye/ML/model_training.py
index 5eb56b6af..49c20591f 100644
--- a/InnerEye/ML/model_training.py
+++ b/InnerEye/ML/model_training.py
@@ -157,7 +157,7 @@ def create_lightning_trainer(container: LightningContainer,
         benchmark = True
     # If the users provides additional callbacks via get_trainer_arguments (for custom
     # containers
-    callbacks = [last_checkpoint_callback, recovery_checkpoint_callback]  # last_checkpoint_callback, 
+    callbacks = [last_checkpoint_callback, recovery_checkpoint_callback]
     if "callbacks" in kwargs:
         callbacks.append(kwargs.pop("callbacks"))  # type: ignore
     is_azureml_run = not is_offline_run_context(RUN_CONTEXT)

From 64eda3468f13d1d1908d212894f86911e97ac61c Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Thu, 8 Jul 2021 15:51:15 +0000
Subject: [PATCH 22/26] try to see if problem comes from sync dist flag

---
 InnerEye/ML/lightning_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InnerEye/ML/lightning_base.py b/InnerEye/ML/lightning_base.py
index 878379c09..82c4d56f0 100644
--- a/InnerEye/ML/lightning_base.py
+++ b/InnerEye/ML/lightning_base.py
@@ -383,7 +383,7 @@ def log_on_epoch(self,
         if isinstance(value, numbers.Number):
             value = torch.tensor(value, dtype=torch.float, device=self.device)
         prefix = TRAIN_PREFIX if is_training else VALIDATION_PREFIX
-        sync_dist = self.use_sync_dist if sync_dist_override is None else sync_dist_override
+        sync_dist = True if sync_dist_override is None else sync_dist_override
         self.log(prefix + metric_name, value,
                  sync_dist=sync_dist,
                  on_step=False, on_epoch=True,

From 26dea77e759e5f0ee21e550c54a89045495f698b Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Fri, 9 Jul 2021 09:08:06 +0000
Subject: [PATCH 23/26] few fixes

---
 InnerEye/ML/SSL/lightning_modules/ssl_classifier_module.py | 2 +-
 InnerEye/ML/lightning_base.py                              | 2 +-
 InnerEye/ML/lightning_models.py                            | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/InnerEye/ML/SSL/lightning_modules/ssl_classifier_module.py b/InnerEye/ML/SSL/lightning_modules/ssl_classifier_module.py
index e5908bc47..805366f22 100644
--- a/InnerEye/ML/SSL/lightning_modules/ssl_classifier_module.py
+++ b/InnerEye/ML/SSL/lightning_modules/ssl_classifier_module.py
@@ -86,7 +86,7 @@ def training_step(self, batch: Any, batch_id: int, *args: Any, **kwargs: Any) ->
 
     def validation_step(self, batch: Any, batch_id: int, *args: Any, **kwargs: Any) -> None:  # type: ignore
         loss = self.shared_step(batch, is_training=False)
-        self.log('val/loss', loss, on_step=False, on_epoch=True, sync_dist=False)
+        self.log('val/loss', loss, on_step=False, on_epoch=True, sync_dist=True)
         for metric in self.val_metrics:
             self.log(f"val/{metric.name}", metric, on_epoch=True, on_step=False)
 
diff --git a/InnerEye/ML/lightning_base.py b/InnerEye/ML/lightning_base.py
index 82c4d56f0..878379c09 100644
--- a/InnerEye/ML/lightning_base.py
+++ b/InnerEye/ML/lightning_base.py
@@ -383,7 +383,7 @@ def log_on_epoch(self,
         if isinstance(value, numbers.Number):
             value = torch.tensor(value, dtype=torch.float, device=self.device)
         prefix = TRAIN_PREFIX if is_training else VALIDATION_PREFIX
-        sync_dist = True if sync_dist_override is None else sync_dist_override
+        sync_dist = self.use_sync_dist if sync_dist_override is None else sync_dist_override
         self.log(prefix + metric_name, value,
                  sync_dist=sync_dist,
                  on_step=False, on_epoch=True,
diff --git a/InnerEye/ML/lightning_models.py b/InnerEye/ML/lightning_models.py
index f27a1231f..cd512c60b 100644
--- a/InnerEye/ML/lightning_models.py
+++ b/InnerEye/ML/lightning_models.py
@@ -143,8 +143,8 @@ def compute_metrics(self, cropped_sample: CroppedSample, segmentation: torch.Ten
         self.log_on_epoch(name=MetricType.SUBJECT_COUNT,
                           value=num_subjects,
                           is_training=is_training,
-                          reduce_fx=sum,
-                          sync_dist_op=None)
+                          reduce_fx=torch.sum,
+                          sync_dist_op="sum")
 
     def training_or_validation_epoch_end(self, is_training: bool) -> None:
         """

From ddfc302bd4da352526a54ab0a8a93b4cc894e6e0 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Fri, 9 Jul 2021 09:17:05 +0000
Subject: [PATCH 24/26] Update expected number of subjects

---
 .../PR_Train2Nodes/OUTPUT/Train/epoch_metrics.csv             | 4 ++--
 .../PR_Train2Nodes/OUTPUT/Val/epoch_metrics.csv               | 4 ++--
 .../PR_TrainEnsemble/OUTPUT/Train/epoch_metrics.csv           | 4 ++--
 .../PR_TrainEnsemble/OUTPUT/Val/epoch_metrics.csv             | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/RegressionTestResults/PR_Train2Nodes/OUTPUT/Train/epoch_metrics.csv b/RegressionTestResults/PR_Train2Nodes/OUTPUT/Train/epoch_metrics.csv
index 7a801bdb9..907a2371c 100644
--- a/RegressionTestResults/PR_Train2Nodes/OUTPUT/Train/epoch_metrics.csv
+++ b/RegressionTestResults/PR_Train2Nodes/OUTPUT/Train/epoch_metrics.csv
@@ -1,3 +1,3 @@
 subject_count,loss,learning_rate,Dice/AverageAcrossStructures,Dice/spinalcord,Dice/lung_r,Dice/lung_l,VoxelCount/spinalcord,VoxelCount/lung_r,VoxelCount/lung_l,epoch,cross_validation_split_index
-1.000000,0.753852,0.000100,0.000000,0.000000,0.000000,0.000000,0.000000,18609.000000,61179.000000,0,-1
-1.000000,0.773389,0.000090,0.000000,0.000000,0.000000,0.000000,0.000000,33453.000000,24258.500000,1,-1
+4.000000,0.753852,0.000100,0.000000,0.000000,0.000000,0.000000,0.000000,18609.000000,61179.000000,0,-1
+4.000000,0.773389,0.000090,0.000000,0.000000,0.000000,0.000000,0.000000,33453.000000,24258.500000,1,-1
diff --git a/RegressionTestResults/PR_Train2Nodes/OUTPUT/Val/epoch_metrics.csv b/RegressionTestResults/PR_Train2Nodes/OUTPUT/Val/epoch_metrics.csv
index d53bfc7d8..1b716b1da 100644
--- a/RegressionTestResults/PR_Train2Nodes/OUTPUT/Val/epoch_metrics.csv
+++ b/RegressionTestResults/PR_Train2Nodes/OUTPUT/Val/epoch_metrics.csv
@@ -1,3 +1,3 @@
 subject_count,loss,Dice/AverageAcrossStructures,Dice/spinalcord,Dice/lung_r,Dice/lung_l,VoxelCount/spinalcord,VoxelCount/lung_r,VoxelCount/lung_l,epoch,cross_validation_split_index
-1.000000,0.758059,0.000000,0.000000,0.000000,0.000000,0.000000,32021.000000,48380.750000,0,-1
-1.000000,0.758054,0.000000,0.000000,0.000000,0.000000,0.000000,32021.000000,48380.750000,1,-1
+4.000000,0.758059,0.000000,0.000000,0.000000,0.000000,0.000000,32021.000000,48380.750000,0,-1
+4.000000,0.758054,0.000000,0.000000,0.000000,0.000000,0.000000,32021.000000,48380.750000,1,-1
diff --git a/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Train/epoch_metrics.csv b/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Train/epoch_metrics.csv
index e677cc029..cb29ab21c 100644
--- a/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Train/epoch_metrics.csv
+++ b/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Train/epoch_metrics.csv
@@ -1,3 +1,3 @@
 subject_count,loss,learning_rate,Dice/AverageAcrossStructures,Dice/spinalcord,Dice/lung_r,Dice/lung_l,VoxelCount/spinalcord,VoxelCount/lung_r,VoxelCount/lung_l,epoch,cross_validation_split_index
-1.000000,0.720039,0.000100,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,110279.000000,0,0
-1.000000,0.793326,0.000090,0.000000,0.000000,0.000000,0.000000,0.000000,43307.000000,3900.500000,1,0
+2.000000,0.720039,0.000100,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,110279.000000,0,0
+2.000000,0.793326,0.000090,0.000000,0.000000,0.000000,0.000000,0.000000,43307.000000,3900.500000,1,0
diff --git a/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Val/epoch_metrics.csv b/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Val/epoch_metrics.csv
index 751cccbbc..e92c441a0 100644
--- a/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Val/epoch_metrics.csv
+++ b/RegressionTestResults/PR_TrainEnsemble/OUTPUT/Val/epoch_metrics.csv
@@ -1,3 +1,3 @@
 subject_count,loss,Dice/AverageAcrossStructures,Dice/spinalcord,Dice/lung_r,Dice/lung_l,VoxelCount/spinalcord,VoxelCount/lung_r,VoxelCount/lung_l,epoch,cross_validation_split_index
-1.000000,0.721449,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,54751.500000,0,0
-1.000000,0.721449,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,54751.500000,1,0
+2.000000,0.721449,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,54751.500000,0,0
+2.000000,0.721449,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,54751.500000,1,0

From b3400a2ceaa3002e70ee01b24530d4054245bc01 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Fri, 9 Jul 2021 10:25:05 +0000
Subject: [PATCH 25/26] correct more

---
 .../PR_BasicModel2Epochs/OUTPUT/Train/epoch_metrics.csv       | 4 ++--
 .../PR_BasicModel2Epochs/OUTPUT/Val/epoch_metrics.csv         | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Train/epoch_metrics.csv b/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Train/epoch_metrics.csv
index bcab7e484..c6b01290a 100644
--- a/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Train/epoch_metrics.csv
+++ b/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Train/epoch_metrics.csv
@@ -1,3 +1,3 @@
 subject_count,loss,learning_rate,Dice/AverageAcrossStructures,Dice/spinalcord,Dice/lung_r,Dice/lung_l,VoxelCount/spinalcord,VoxelCount/lung_r,VoxelCount/lung_l,epoch,cross_validation_split_index
-1.000000,0.718559,0.000100,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,98256.000000,0,-1
-1.000000,0.792988,0.000090,0.000000,0.000000,0.000000,0.000000,0.000000,43307.000000,13992.500000,1,-1
+2.000000,0.718559,0.000100,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,98256.000000,0,-1
+2.000000,0.792988,0.000090,0.000000,0.000000,0.000000,0.000000,0.000000,43307.000000,13992.500000,1,-1
diff --git a/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Val/epoch_metrics.csv b/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Val/epoch_metrics.csv
index 34ffeee9d..58155a8e4 100644
--- a/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Val/epoch_metrics.csv
+++ b/RegressionTestResults/PR_BasicModel2Epochs/OUTPUT/Val/epoch_metrics.csv
@@ -1,3 +1,3 @@
 subject_count,loss,Dice/AverageAcrossStructures,Dice/spinalcord,Dice/lung_r,Dice/lung_l,VoxelCount/spinalcord,VoxelCount/lung_r,VoxelCount/lung_l,epoch,cross_validation_split_index
-1.000000,0.715468,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,89502.000000,0,-1
-1.000000,0.715476,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,89502.000000,1,-1
+2.000000,0.715468,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,89502.000000,0,-1
+2.000000,0.715476,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,89502.000000,1,-1

From 6cf302d8b4a2d935d43682172e26c10d363cfb37 Mon Sep 17 00:00:00 2001
From: melanibe <mebernha@microsoft.com>
Date: Fri, 9 Jul 2021 10:25:58 +0000
Subject: [PATCH 26/26] flake8

---
 InnerEye/ML/model_training.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InnerEye/ML/model_training.py b/InnerEye/ML/model_training.py
index 49c20591f..8d386dd0b 100644
--- a/InnerEye/ML/model_training.py
+++ b/InnerEye/ML/model_training.py
@@ -99,7 +99,7 @@ def __init__(self, container: LightningContainer):
                          mode="max",
                          save_last=False)
 
-    def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule, unused: bool=None) -> None:
+    def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule, unused: bool = None) -> None:
         pl_module.log(name="epoch", value=trainer.current_epoch)  # type: ignore