📝 Finish ML/configs API

microsoft · peterhessey · Aug 18, 2022 · Aug 5, 2022 · Aug 5, 2022 · Aug 8, 2022
commit 0cbd7c71cc2e4b70f99b374bbed48dfff2ea8289
diff --git a/InnerEye/ML/configs/classification/CovidModel.py b/InnerEye/ML/configs/classification/CovidModel.py
@@ -47,12 +47,12 @@ class CovidModel(ScalarModelBase):
  """
  Model to train a CovidDataset model from scratch or finetune from SSL-pretrained model.
 
- For AML you need to provide the run_id of your SSL training job as a command line argument
- --pretraining_run_recovery_id=id_of_your_ssl_model, this will download the checkpoints of the run to your
+ For AML you need to provide the run_id of your SSL training job as a command line argument:
+ ``--pretraining_run_recovery_id=<id_of_your_ssl_model>``. This will download the checkpoints of the run to your
  machine and load the corresponding pretrained model.
 
- To recover from a particular checkpoint from your SSL run e.g. "recovery_epoch=499.ckpt" please use the
- --name_of_checkpoint argument.
+ To recover from a particular checkpoint from your SSL run e.g. ``"recovery_epoch=499.ckpt"`` please use the
+ ``--name_of_checkpoint`` argument.
  """
  use_pretrained_model = param.Boolean(default=False, doc="If True, start training from a model pretrained with SSL."
  "If False, start training a DenseNet model from scratch"
@@ -242,6 +242,7 @@ def generate_custom_report(self, report_dir: Path, model_proc: ModelProcessing)
  Generate a custom report for the Covid model. This report will read the file model_output.csv generated for
  the training, validation or test sets and compute both the multiclass accuracy and the accuracy for each of the
  hierarchical tasks.
+
  :param report_dir: Directory report is to be written to
  :param model_proc: Whether this is a single or ensemble model (model_output.csv will be located in different
  paths for single vs ensemble runs.)
@@ -364,6 +365,11 @@ def compute_binary_accuracy(model_outputs: pd.Series, labels: pd.Series) -> floa
 
 class DicomPreparation:
  def __call__(self, item: torch.Tensor) -> PIL.Image:
+ """Call class as a function. This will act as a transformation function for the dataset.
+
+ :param item: tensor to transform.
+ :return: transformed data.
+ """
  # Item will be of dimension [C, Z, X, Y]
  images = item.numpy()
  assert images.shape[0] == 1 and images.shape[1] == 1

diff --git a/InnerEye/ML/configs/other/fastmri_varnet.py b/InnerEye/ML/configs/other/fastmri_varnet.py
@@ -47,11 +47,12 @@ def get_fastmri_data_module(azure_dataset_id: str,
  Creates a LightningDataModule that consumes data from the FastMRI challenge. The type of challenge
  (single/multicoil) is determined from the name of the dataset in Azure blob storage. The mask type is set to
  equispaced, with 4x acceleration.
+
  :param azure_dataset_id: The name of the dataset (folder name in blob storage).
  :param local_dataset: The local folder at which the dataset has been mounted or downloaded.
  :param sample_rate: Fraction of slices of the training data split to use. Set to a value <1.0 for rapid prototyping.
  :param test_path: The name of the folder inside the dataset that contains the test data.
- :return: A LightningDataModule object.
+ :return: The FastMRI LightningDataModule object.
  """
  if not azure_dataset_id:
  raise ValueError("The azure_dataset_id argument must be provided.")

diff --git a/InnerEye/ML/configs/segmentation/HeadAndNeckBase.py b/InnerEye/ML/configs/segmentation/HeadAndNeckBase.py
@@ -47,6 +47,7 @@ def __init__(self,
  **kwargs: Any) -> None:
  """
  Creates a new instance of the class.
+
  :param ground_truth_ids: List of ground truth ids.
  :param ground_truth_ids_display_names: Optional list of ground truth id display names. If
  present then must be of the same length as ground_truth_ids.

diff --git a/InnerEye/ML/configs/segmentation/HelloWorld.py b/InnerEye/ML/configs/segmentation/HelloWorld.py
@@ -94,6 +94,7 @@ def get_parameter_search_hyperdrive_config(self, run_config: ScriptRunConfig) ->
  https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-tune-hyperparameters
  A reference is provided at https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train
  .hyperdrive?view=azure-ml-py
+
  :param run_config: The configuration for running an individual experiment.
  :return: An Azure HyperDrive run configuration (configured PyTorch environment).
  """

diff --git a/InnerEye/ML/configs/segmentation/ProstatePaper.py b/InnerEye/ML/configs/segmentation/ProstatePaper.py
@@ -20,6 +20,7 @@ class ProstatePaper(ProstateBase):
  def __init__(self, **kwargs: Any) -> None:
  """
  Creates a new instance of the class.
+
  :param kwargs: Additional arguments that will be passed through to the SegmentationModelBase constructor.
  """
  ground_truth_ids = fg_classes

diff --git a/InnerEye/ML/lightning_container.py b/InnerEye/ML/lightning_container.py
@@ -183,6 +183,7 @@ def get_data_module(self) -> LightningDataModule:
  Because the method deals with data loaders, not loaded data, we cannot check automatically that cross validation
  is handled correctly within the base class, i.e. if the cross validation split is not handled in the method then
  nothing will fail, but each child run will be identical since they will each be given the full dataset.
+
  :return: A LightningDataModule
  """
  return None # type: ignore

diff --git a/InnerEye/ML/model_config_base.py b/InnerEye/ML/model_config_base.py
@@ -55,6 +55,7 @@ def get_parameter_search_hyperdrive_config(self, run_config: ScriptRunConfig) ->
  Returns a configuration for AzureML Hyperdrive that should be used when running hyperparameter
  tuning.
  This is an abstract method that each specific model should override.
+
  :param run_config: The AzureML estimator object that runs model training.
  :return: A hyperdrive configuration object.
  """
@@ -66,6 +67,7 @@ def get_model_train_test_dataset_splits(self, dataset_df: pd.DataFrame) -> Datas
  """
  Computes the training, validation and test splits for the model, from a dataframe that contains
  the full dataset.
+
  :param dataset_df: A dataframe that contains the full dataset that the model is using.
  :return: An instance of DatasetSplits with dataframes for training, validation and testing.
  """
@@ -83,6 +85,7 @@ def create_and_set_torch_datasets(self, for_training: bool = True, for_inference
  are False, the derived method *may* still create the corresponding datasets, but should not assume that
  the relevant splits (train/test/val) are non-empty. If either or both is True, they *must* create the
  corresponding datasets, and should be able to make the assumption.
+
  :param for_training: whether to create the datasets required for training.
  :param for_inference: whether to create the datasets required for inference.
  """
@@ -103,6 +106,8 @@ def get_torch_dataset_for_inference(self, mode: ModelExecutionMode) -> Any:
  """
  Returns a torch Dataset for running the model in inference mode, on the given split of the full dataset.
  The torch dataset must return data in the format required for running the model in inference mode.
+
+ :param mode: The mode of the model, either test, train or val.
  :return: A torch Dataset object.
  """
  if self._datasets_for_inference is None:

diff --git a/InnerEye/ML/scalar_config.py b/InnerEye/ML/scalar_config.py
@@ -542,16 +542,15 @@ def compute_and_log_metrics(self,
  data_split: ModelExecutionMode) -> None:
  """
  Computes all the metrics for a given (logits, labels) pair, and writes them to the loggers.
+
  :param logits: The model output before normalization.
  :param targets: The expected model outputs.
  :param subject_ids: The subject IDs for the present minibatch.
  :param is_training: If True, write the metrics as training metrics, otherwise as validation metrics.
- :param metrics: A dictionary mapping from names of prediction targets to a list of metric computers,
- as returned by create_metric_computers.
+ :param metrics: A dictionary mapping from names of prediction targets to a list of metric computers, as returned by create_metric_computers.
  :param logger: An object of type DataframeLogger which can be be used for logging within this function.
  :param current_epoch: Current epoch number.
  :param data_split: ModelExecutionMode object indicating if this is the train or validation split.
- :return:
  """
  per_subject_outputs: List[Tuple[str, str, torch.Tensor, torch.Tensor]] = []
  for i, (prediction_target, metric_list) in enumerate(metrics.items()):

diff --git a/docs/source/rst/api/ML/configs/classification/index.rst b/docs/source/rst/api/ML/configs/classification/index.rst
@@ -0,0 +1,10 @@
+Classification Configs
+=============================
+
+COVID Model
+-----------
+
+.. autoclass:: InnerEye.ML.configs.classification.CovidModel.CovidModel
+
+.. autoclass:: InnerEye.ML.configs.classification.CovidModel.DicomPreparation
+ :special-members: __call__
diff --git a/docs/source/rst/api/ML/configs/index.rst b/docs/source/rst/api/ML/configs/index.rst
@@ -0,0 +1,10 @@
+Configurations
+==============
+
+.. toctree::
+
+ classification/index
+ regression/index
+ segmentation/index
+ ssl/index
+ other/index
diff --git a/docs/source/rst/api/ML/configs/other/index.rst b/docs/source/rst/api/ML/configs/other/index.rst
@@ -0,0 +1,7 @@
+Other configs
+==========================
+
+FastMRT Varnet
+----------------
+
+.. automodule:: InnerEye.ML.configs.other.fastmri_varnet
diff --git a/docs/source/rst/api/ML/configs/regression/index.rst b/docs/source/rst/api/ML/configs/regression/index.rst
@@ -0,0 +1,7 @@
+Regression Configs
+===================
+
+Dummy Regression
+-----------------
+
+.. automodule:: InnerEye.ML.configs.regression.DummyRegression
diff --git a/docs/source/rst/api/ML/configs/segmentation/index.rst b/docs/source/rst/api/ML/configs/segmentation/index.rst
@@ -0,0 +1,45 @@
+Segmentation Configs
+=====================
+
+Basic Models
+------------
+
+.. automodule:: InnerEye.ML.configs.segmentation.BasicModel2Epochs
+
+.. automodule:: InnerEye.ML.configs.segmentation.BasicModel2Epochs1Channel
+
+.. automodule:: InnerEye.ML.configs.segmentation.BasicModel2EpochsMoreData
+
+HelloWorld
+------------
+
+.. automodule:: InnerEye.ML.configs.segmentation.HelloWorld
+
+Head and Neck
+-------------
+
+.. automodule:: InnerEye.ML.configs.segmentation.HeadAndNeckBase
+
+.. automodule:: InnerEye.ML.configs.segmentation.HeadAndNeckPaper
+
+Prostate
+-------------
+
+.. automodule:: InnerEye.ML.configs.segmentation.ProstateBase
+
+.. automodule:: InnerEye.ML.configs.segmentation.ProstatePaper
+
+Hippocampus
+------------
+
+.. automodule:: InnerEye.ML.configs.segmentation.Hippocampus
+
+Lung
+------------
+
+.. automodule:: InnerEye.ML.configs.segmentation.Lung
+
+Glioblastoma
+------------
+
+.. automodule:: InnerEye.ML.configs.segmentation.GbmBase
diff --git a/docs/source/rst/api/ML/configs/ssl/index.rst b/docs/source/rst/api/ML/configs/ssl/index.rst
@@ -0,0 +1,19 @@
+Self-Supervised Learning Configs
+================================
+
+CIFAR
+-----
+
+.. automodule:: InnerEye.ML.configs.ssl.CIFAR_classifier_configs
+
+.. automodule:: InnerEye.ML.configs.ssl.CIFAR_SSL_configs
+
+COVID
+-----
+
+.. automodule:: InnerEye.ML.configs.ssl.CovidContainers
+
+Chest X-Rays
+------------
+
+.. automodule:: InnerEye.ML.configs.ssl.CXR_SSL_configs
diff --git a/docs/source/rst/api/ML/configs/unit_testing/index.rst b/docs/source/rst/api/ML/configs/unit_testing/index.rst
diff --git a/docs/source/rst/api/ML/index.rst b/docs/source/rst/api/ML/index.rst
@@ -3,8 +3,8 @@ Machine learning
 
 .. toctree::
 
- configs
+ configs/index
+ augmentations/index
  runner
- augmentations/index.rst
  photometric_normalization
  pipelines