microsoft · melanibe · May 7, 2021 · Apr 1, 2021 · Apr 1, 2021 · Apr 1, 2021
diff --git a/.amlignore b/.amlignore
@@ -14,3 +14,5 @@ TestsOutsidePackage/azureml-models
 tensorboard_runs
 InnerEyeTestVariables.txt
 InnerEyePrivateSettings.yml
+cifar-10-batches-py
+cifar-100-python
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -63,6 +63,12 @@ with only minimum code changes required. See [the MD documentation](docs/bring_y
 - ([#450](https://github.com/microsoft/InnerEye-DeepLearning/pull/450)) Adds the metric "Accuracy at threshold 0.5" to the classification report (`classification_crossval_report.ipynb`). 
 - ([#451](https://github.com/microsoft/InnerEye-DeepLearning/pull/451)) Write a file `model_outputs.csv` with columns 
  `subject`, `prediction_target`, `label`, `model_output` and `cross_validation_split_index`. This file is not written out for sequence models.
+- ([#440](https://github.com/microsoft/InnerEye-DeepLearning/pull/440)) Added support for training of self-supervised
+ models (BYOL and SimCLR) based on the bring-your-own-model framework. Providing examples configurations for training
+ of SSL models on CIFAR10/100 datasets as well as for chest-x-ray datasets such as NIH CHest-Xray or RSNA Pneumonia
+ Detection Challenge datasets. See
+ [SSL doc](https://github.com/microsoft/InnerEye-DeepLearning/blob/main/docs/self_supervised_models.md) for more
+ details.
 
 ### Changed
 
@@ -76,7 +82,7 @@ with only minimum code changes required. See [the MD documentation](docs/bring_y
 - ([#432](https://github.com/microsoft/InnerEye-DeepLearning/pull/432)) Upgraded to PyTorch-Lightning 1.2.7. Add
  end-to-end test for classification cross-validation. WARNING: upgrade PL version causes hanging of multi-node
  training.
-- ([#437])(https://github.com/microsoft/InnerEye-DeepLearning/pull/437)) Upgrade to PyTorch-Lightning 1.2.8.
+- ([#437](https://github.com/microsoft/InnerEye-DeepLearning/pull/437)) Upgrade to PyTorch-Lightning 1.2.8.
 - ([#439](https://github.com/microsoft/InnerEye-DeepLearning/pull/439)) Recovery checkpoints are now
  named `recovery_epoch=x.ckpt` instead of `recovery.ckpt` or `recovery-v0.ckpt`.
 - ([#451](https://github.com/microsoft/InnerEye-DeepLearning/pull/451)) Change the signature for function `generate_custom_report` 

diff --git a/InnerEye/Azure/azure_config.py b/InnerEye/Azure/azure_config.py
@@ -75,6 +75,12 @@ class AzureConfig(GenericConfig):
  "('--pytest_mark gpu' will run all tests marked with 'pytest.mark.gpu')")
  run_recovery_id: str = param.String(doc="A run recovery id string in the form 'experiment name:run id'"
  " to use for inference or recovering a model training run.")
+ pretraining_run_recovery_id: str = param.String(default=None,
+ allow_None=True,
+ doc="Extra run recovery id to download checkpoints from,"
+ "for custom modules (e.g. for loading pretrained weights)."
+ "Warning: this argument will be ignored for InnerEyeContainer"
+ "models.")
  experiment_name: str = param.String(doc="If provided, use this string as the name of the AzureML experiment. "
  "If not provided, create the experiment off the git branch name.")
  build_number: int = param.Integer(0, doc="The numeric ID of the Azure pipeline that triggered this training run.")

diff --git a/InnerEye/Azure/azure_runner.py b/InnerEye/Azure/azure_runner.py
@@ -24,8 +24,7 @@
 from InnerEye.Azure import azure_util
 from InnerEye.Azure.azure_config import AzureConfig, ParserResult, SourceConfig
 from InnerEye.Azure.azure_util import CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, RUN_RECOVERY_FROM_ID_KEY_NAME, \
- RUN_RECOVERY_ID_KEY_NAME, \
- is_offline_run_context, merge_conda_dependencies
+ RUN_RECOVERY_ID_KEY_NAME, is_offline_run_context, merge_conda_dependencies
 from InnerEye.Azure.secrets_handling import read_all_settings
 from InnerEye.Azure.tensorboard_monitor import AMLTensorBoardMonitorConfig, monitor
 from InnerEye.Common.generic_parsing import GenericConfig
@@ -43,13 +42,17 @@
 
 def submit_to_azureml(azure_config: AzureConfig,
  source_config: SourceConfig,
- azure_dataset_id: str) -> Run:
+ azure_dataset_id: str,
+ extra_azure_dataset_ids: List[str]) -> Run:
  """
  The main entry point. It creates an AzureML workspace if needed, submits an experiment using the code
  as specified in source_config, and waits for completion if needed.
  :param azure_config: azure related configurations to setup valid workspace
  :param source_config: The information about which code should be submitted, and which arguments should be used.
  :param azure_dataset_id: The name of the dataset on blob storage to be used for this run.
+ :param extra_azure_dataset_ids: A list of additional dataset names on blob storage to be used for this run. This
+ will be ignore for InnerEyeContainer models, may only be used by custom LightningContainer (see bring your own model
+ and self-supervised training documentation).
  """
  azure_run: Optional[Run] = None
 
@@ -66,7 +69,7 @@ def interrupt_handler(signal: int, _: Any) -> None:
  for s in [signal.SIGINT, signal.SIGTERM]:
  signal.signal(s, interrupt_handler)
  # create train/test experiment
- azure_run = create_and_submit_experiment(azure_config, source_config, azure_dataset_id)
+ azure_run = create_and_submit_experiment(azure_config, source_config, azure_dataset_id, extra_azure_dataset_ids)
 
  if azure_config.wait_for_completion:
  # We want the job output to be visible on the console, but the program should not exit if the
@@ -121,18 +124,22 @@ def create_experiment_name(azure_config: AzureConfig) -> str:
 def create_and_submit_experiment(
  azure_config: AzureConfig,
  source_config: SourceConfig,
- azure_dataset_id: str) -> Run:
+ azure_dataset_id: str,
+ extra_azure_dataset_ids: List[str]) -> Run:
  """
  Creates an AzureML experiment in the workspace and submits it for execution.
  :param azure_config: azure related configurations to setup valid workspace
  :param source_config: The information about which code should be submitted, and which arguments should be used.
  :param azure_dataset_id: The name of the dataset in blob storage to be used for this run.
+ :param extra_azure_dataset_ids: A list of additional dataset names on blob storage to be used for this run. This
+ will be ignore for InnerEyeContainer models, may only be used by custom LightningContainer (see bring your own model
+ and self-supervised training documentation).
  :returns: Run object for the submitted AzureML run
  """
  workspace = azure_config.get_workspace()
  experiment_name = create_experiment_name(azure_config)
  exp = Experiment(workspace=workspace, name=azure_util.to_azure_friendly_string(experiment_name))
- script_run_config = create_run_config(azure_config, source_config, azure_dataset_id)
+ script_run_config = create_run_config(azure_config, source_config, azure_dataset_id, extra_azure_dataset_ids)
 
  # submit a training/testing run associated with the experiment
  run: Run = exp.submit(script_run_config)
@@ -273,40 +280,47 @@ def get_or_create_python_environment(azure_config: AzureConfig,
  return env
 
 
-def get_dataset_consumption(azure_config: AzureConfig, azure_dataset_id: str) -> DatasetConsumptionConfig:
+def get_dataset_consumption(azure_config: AzureConfig,
+ azure_dataset_id: str,
+ dataset_index: int = 0) -> DatasetConsumptionConfig:
  """
  Creates a configuration for using an AzureML dataset inside of an AzureML run. This will make the AzureML
  dataset with given name available as a named input, using INPUT_DATA_KEY as the key.
  :param azure_config: azure related configurations to use for model scale-out behaviour
  :param azure_dataset_id: The name of the dataset in blob storage to be used for this run. This can be an empty
  string to not use any datasets.
+ :param dataset_index: suffix for the dataset name, dataset name will be set to INPUT_DATA_KEY_idx
  """
  azureml_dataset = get_or_create_dataset(azure_config, azure_dataset_id=azure_dataset_id)
  if not azureml_dataset:
  raise ValueError(f"AzureML dataset {azure_dataset_id} could not be found or created.")
- named_input = azureml_dataset.as_named_input(INPUT_DATA_KEY)
+ named_input = azureml_dataset.as_named_input(f"{INPUT_DATA_KEY}_{dataset_index}")
  return named_input.as_mount() if azure_config.use_dataset_mount else named_input.as_download()
 
 
 def create_run_config(azure_config: AzureConfig,
  source_config: SourceConfig,
  azure_dataset_id: str = "",
+ extra_azure_dataset_ids: List[str] = [],
  environment_name: str = "") -> ScriptRunConfig:
  """
  Creates a configuration to run the InnerEye training script in AzureML.
  :param azure_config: azure related configurations to use for model scale-out behaviour
  :param source_config: configurations for model execution, such as name and execution mode
  :param azure_dataset_id: The name of the dataset in blob storage to be used for this run. This can be an empty
  string to not use any datasets.
+ :param extra_azure_dataset_ids: List of extra datasets in blob storage to be used for this run. This can be empty.
  :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that
  is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used
  when running inference for an existing model.
  :return: The configured script run.
  """
- if azure_dataset_id:
- dataset_consumption = get_dataset_consumption(azure_config, azure_dataset_id)
- else:
- dataset_consumption = None
+ dataset_consumptions = {}
+ all_dataset_ids = [azure_dataset_id] + extra_azure_dataset_ids if azure_dataset_id else extra_azure_dataset_ids
+ for i, dataset_id in enumerate(all_dataset_ids):
+ dataset_consumption = get_dataset_consumption(azure_config, dataset_id, i)
+ dataset_consumptions.update({dataset_consumption.name: dataset_consumption})
+
  # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path
  entry_script_relative_path = source_config.entry_script.relative_to(source_config.root_folder).as_posix()
  logging.info(f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to "
@@ -329,8 +343,8 @@ def create_run_config(azure_config: AzureConfig,
  run_config.framework = "Python"
  run_config.communicator = "IntelMpi"
  run_config.node_count = distributed_job_config.node_count
- if dataset_consumption:
- run_config.data = {dataset_consumption.name: dataset_consumption}
+ if len(dataset_consumptions) > 0:
+ run_config.data = dataset_consumptions
  # Use blob storage for storing the source, rather than the FileShares section of the storage account.
  run_config.source_directory_data_store = workspace.datastores.get(WORKSPACE_DEFAULT_BLOB_STORE_NAME).name
  script_run_config = ScriptRunConfig(

diff --git a/InnerEye/ML/SSL/datamodules_and_datasets/cifar_datasets.py b/InnerEye/ML/SSL/datamodules_and_datasets/cifar_datasets.py
@@ -0,0 +1,30 @@
+# ------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+# ------------------------------------------------------------------------------------------
+
+from torchvision.datasets import CIFAR10, CIFAR100
+
+from InnerEye.ML.SSL.datamodules_and_datasets.dataset_cls_utils import InnerEyeDataClassBaseWithReturnIndex
+
+
+class InnerEyeCIFAR10(InnerEyeDataClassBaseWithReturnIndex, CIFAR10):
+ """
+ Wrapper class around torchvision CIFAR10 class to optionally return the
+ index on top of the image and the label in __getitem__ as well as defining num_classes property.
+ """
+
+ @property
+ def num_classes(self) -> int:
+ return 10
+
+
+class InnerEyeCIFAR100(InnerEyeDataClassBaseWithReturnIndex, CIFAR100):
+ """
+ Wrapper class around torchvision CIFAR100 class class to optionally return the
+ index on top of the image and the label in __getitem__ as well as defining num_classes property.
+ """
+
+ @property
+ def num_classes(self) -> int:
+ return 100