tests for callback

microsoft · ant0nsc · Nov 18, 2021 · Sep 22, 2021 · Oct 13, 2021 · Oct 13, 2021
commit 144698a6f3701c8a00843678dfcce7e330a6bcc0
diff --git a/InnerEye/ML/lightning_base.py b/InnerEye/ML/lightning_base.py
@@ -226,10 +226,10 @@ def on_fit_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
  self.module = pl_module
 
  def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
- self.train_timers.reset()
+ self.train_timers.epoch_start()
 
  def on_validation_epoch_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
- self.val_timers.reset()
+ self.val_timers.epoch_start()
  # In Lightning, the validation epoch is running "inside" the training. If we get here, it means that training
  # is done for this epoch, even though the on_training_epoch hook has not yet been called.
  self.train_timers.epoch_end()
@@ -327,7 +327,7 @@ def write_and_log_epoch_time(self, is_training: bool) -> None:
  f"for data took {timers.total_load_time:0.2f} sec total.")
  if timers.num_load_time_exceeded > 0 and timers.should_warn_in_this_epoch:
  logging.warning("The dataloaders were not fast enough to always supply the next batch in less than "
- f"{timers.max_item_load_time_seconds}sec.")
+ f"{timers.max_item_load_time_seconds:0.2f}sec.")
  logging.warning(
  f"In this epoch, {timers.num_load_time_exceeded} out of {timers.num_batches} batches exceeded the load "
  f"time threshold. Total loading time for the slow batches was {timers.total_extra_load_time:0.2f}sec.")
@@ -364,13 +364,6 @@ def get_timers(self, is_training: bool) -> EpochTimers:
  """
  return self.train_timers if is_training else self.val_timers
 
- def reset_timers(self) -> None:
- """
- Resets all timers and counters, for both the validation and the training epoch.
- """
- self.train_timers.reset()
- self.val_timers.reset()
-
 
 class InnerEyeLightning(LightningModule):
  """

diff --git a/InnerEye/ML/metrics.py b/InnerEye/ML/metrics.py
@@ -81,14 +81,6 @@ class EpochTimers:
  """
  Contains all information necessary to compute the IO metrics: Epoch times, batch times, loading times.
  """
- epoch_start_time: float = 0.0
- epoch_end_time: float = 0.0
- batch_start_time: float = 0.0
- num_load_time_warnings: int = 0
- num_load_time_exceeded: int = 0
- total_extra_load_time: float = 0.0
- total_load_time: float = 0.0
- num_batches: int = 0
 
  def __init__(self,
  max_item_load_time_seconds: float = 0.5,
@@ -106,9 +98,16 @@ def __init__(self,
  self.max_load_time_warnings = max_load_time_warnings
  self.max_load_time_epochs = max_load_time_epochs
  self.load_time_warning_epochs: Set[int] = set()
- self.reset()
-
- def reset(self) -> None:
+ self.epoch_start_time: float = 0.0
+ self.epoch_end_time: float = 0.0
+ self.batch_start_time: float = 0.0
+ self.num_load_time_warnings: int = 0
+ self.num_load_time_exceeded: int = 0
+ self.total_extra_load_time: float = 0.0
+ self.total_load_time: float = 0.0
+ self.num_batches: int = 0
+
+ def epoch_start(self) -> None:
  """
  Resets all timers to the current time, and all counters to 0. The set of epochs for which warnings about
  load time were produced will not be reset.

diff --git a/Tests/ML/test_loggers.py b/Tests/ML/test_loggers.py
@@ -4,11 +4,14 @@
 # ------------------------------------------------------------------------------------------
 import logging
 import math
-from typing import List
+from typing import Callable, Dict, List, Optional
 from unittest import mock
 
+import torch
 from _pytest.logging import LogCaptureFixture
 
+from InnerEye.Common.metrics_constants import MetricType, TRAIN_PREFIX, VALIDATION_PREFIX
+from InnerEye.ML.lightning_base import BatchTimeCallback
 from InnerEye.ML.lightning_loggers import (AzureMLProgressBar, PROGRESS_STAGE_PREDICT, PROGRESS_STAGE_TEST,
  PROGRESS_STAGE_TRAIN, PROGRESS_STAGE_VAL)
 from InnerEye.ML.metrics import EpochTimers
@@ -96,6 +99,9 @@ def write_message(message: str) -> None:
 
 
 def test_epoch_timers(caplog: LogCaptureFixture) -> None:
+ """
+ Test the class that measures batch and epoch times.
+ """
  caplog.set_level(logging.INFO)
  batch_index = 123
  epoch = 24
@@ -140,6 +146,10 @@ def test_epoch_timers(caplog: LogCaptureFixture) -> None:
  assert f"prefix: Loading minibatch {batch_index} took" in message
  assert f"This message will be printed at most {timer.max_load_time_warnings} times"
  assert timer.num_load_time_warnings > 0
+ # Test if the warnings disappear after the max number of warnings
+ assert timer.should_warn_in_this_epoch
+ timer.num_load_time_warnings = timer.max_load_time_warnings + 1
+ assert not timer.should_warn_in_this_epoch
 
  # Epoch end time should be stored
  assert timer.total_epoch_time == 0.0
@@ -148,6 +158,81 @@ def test_epoch_timers(caplog: LogCaptureFixture) -> None:
  assert timer.epoch_end_time > old_epoch_end_time
  assert timer.total_epoch_time > 0.0
 
- timer.reset()
+ # Test the resetting logic
+ timer.epoch_start()
  assert timer.total_load_time == 0.0
  assert timer.num_load_time_warnings == 0
+ # The object should keep track of all epochs in which warnings were printed
+ assert len(timer.load_time_warning_epochs) > 0
+
+
+def test_batch_time_callback(caplog: LogCaptureFixture) -> None:
+ """
+ Test the callback that measures data loading times.
+ """
+ caplog.set_level(logging.INFO)
+ callback = BatchTimeCallback()
+ epoch = 1234
+ # This dictionary stores all metrics that are written via module.log
+ logged_metrics = {}
+
+ def mock_log(name: str, value: float, reduce_fx: Callable, **kwargs: Dict) -> None:
+ logged_metrics[name] = (value, reduce_fx)
+
+ mock_module = mock.MagicMock(current_epoch=epoch, log=mock_log)
+ callback.on_fit_start(trainer=None, pl_module=mock_module) # type: ignore
+ assert callback.module == mock_module
+
+ # Upon epoch start, the timers should be reset. We can check that by looking at epoch_start_time
+ assert callback.train_timers.epoch_start_time == 0.0
+ callback.on_train_epoch_start(None, None) # type: ignore
+ assert callback.train_timers.epoch_start_time > 0.0
+ assert callback.val_timers.epoch_start_time == 0.0
+ old_train_epoch_end_time = callback.train_timers.epoch_end_time
+ callback.on_validation_epoch_start(None, None) # type: ignore
+ assert callback.val_timers.epoch_start_time > 0.0
+ # When calling epoch_start for validation, training epoch should be ended
+ assert callback.train_timers.epoch_end_time > old_train_epoch_end_time
+
+ # Run 1 training batch
+ callback.on_train_batch_start(None, None, None, batch_idx=0, dataloader_idx=0) # type: ignore
+ callback.on_train_batch_end(None, None, None, None, batch_idx=0, dataloader_idx=0) # type: ignore
+ assert len(logged_metrics) == 2
+ # Upon batch end, we should see metrics being logged. Batch level timings should be logged both as averages and max
+ def check_batch_metrics(train_or_val: str) -> None:
+ for suffix in [" avg", " max"]:
+ name = f"timing/{train_or_val}/SecondsPerBatch" + suffix
+ assert name in logged_metrics
+ assert logged_metrics[name][1] == max if suffix == " max" else torch.mean
+ check_batch_metrics("train")
+ assert caplog.messages[-1].startswith(f"Epoch {epoch} training: Loaded the first")
+ # Run 2 validation batches
+ for batch_idx in range(2):
+ callback.on_validation_batch_start(None, None, None, batch_idx=batch_idx, dataloader_idx=0) # type: ignore
+ callback.on_validation_batch_end(None, None, None, None, batch_idx=batch_idx, dataloader_idx=0) # type: ignore
+ assert caplog.messages[-1].startswith(f"Epoch {epoch} validation: Loaded the first")
+ assert callback.train_timers.num_batches == 1
+ assert callback.val_timers.num_batches == 2
+ check_batch_metrics("val")
+
+ # Check that the metrics are written at the end of the validation epoch.
+ # Hack the timers to trigger the warning message for validation only
+ callback.val_timers.num_load_time_exceeded = 1
+ callback.val_timers.total_extra_load_time = 100.00
+ callback.val_timers.max_item_load_time_seconds = 2.0
+ assert callback.val_timers.should_warn_in_this_epoch
+ old_val_epoch_end_time = callback.train_timers.epoch_end_time
+ callback.on_validation_epoch_end(None, None) # type: ignore
+ assert callback.val_timers.epoch_end_time > old_val_epoch_end_time
+ assert len(logged_metrics) > 0
+
+ assert f"Epoch {epoch} training took " in caplog.messages[-4]
+ assert f"Epoch {epoch} validation took " in caplog.messages[-3]
+ assert "The dataloaders were not fast enough" in caplog.messages[-2]
+ assert "in less than 2.00sec" in caplog.messages[-2]
+ assert "1 out of 2 batches exceeded the load time threshold" in caplog.messages[-1]
+ assert "Total loading time for the slow batches was 100.00sec" in caplog.messages[-1]
+
+ for prefix in [TRAIN_PREFIX, VALIDATION_PREFIX]:
+ for metric in [MetricType.SECONDS_PER_EPOCH.value, MetricType.EXCESS_BATCH_LOADING_TIME.value]:
+ assert f"timing/{prefix}{metric}" in logged_metrics