Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Enable Bring-your-own-Lightning-model (#417)
Browse files Browse the repository at this point in the history
- Enable brining arbitrary PyTorch-Lightning models to the InnerEye toolbox
- Upgrade mypy and simplify the way we invoke it
  • Loading branch information
ant0nsc committed Apr 19, 2021
1 parent 780e420 commit 0d479ba
Show file tree
Hide file tree
Showing 72 changed files with 3,178 additions and 1,303 deletions.
1 change: 1 addition & 0 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
ignore = E226,E302,E41,W391, E701, W291, E722, W503, E128, E126, E127, E731, E401
max-line-length = 160
max-complexity = 25
exclude = fastMRI/
7 changes: 7 additions & 0 deletions .github/workflows/linting_and_hello_world.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ jobs:
PYTHONPATH: ${{ github.workspace }}
if: always()

- name: Run HelloContainer model
run: |
$CONDA/envs/InnerEye/bin/python ./InnerEye/ML/runner.py --model=HelloContainer
env:
PYTHONPATH: ${{ github.workspace }}
if: always()

windows:
runs-on: windows-latest
steps:
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "fastMRI"]
path = fastMRI
url = https://github.com/facebookresearch/fastMRI
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ created.

### Added

- ([#417](https://github.com/microsoft/InnerEye-DeepLearning/pull/417)) Added a generic way of adding PyTorch Lightning
models to the toolbox. It is now possible to train almost any Lightning model with the InnerEye toolbox in AzureML,
with only minimum code changes required. See [the MD documentation](docs/bring_your_own_model.md) for details.
- ([#430](https://github.com/microsoft/InnerEye-DeepLearning/pull/430)) Update conversion to 1.0.1 InnerEye-DICOM-RT to
add: manufacturer, SoftwareVersions, Interpreter and ROIInterpretedTypes.
- ([#385](https://github.com/microsoft/InnerEye-DeepLearning/pull/385)) Add the ability to train a model on multiple
Expand Down Expand Up @@ -70,6 +73,7 @@ created.
- ([#437](https://github.com/microsoft/InnerEye-DeepLearning/pull/437)) Fixed multi-node DDP bug in PL v1.2.8. Re-add
end-to-end test for multi-node.
### Removed
- ([#417](https://github.com/microsoft/InnerEye-DeepLearning/pull/417)) Removed an output file that only contains metadata for a legacy consumer

### Deprecated

Expand Down
13 changes: 0 additions & 13 deletions InnerEye/Azure/azure_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,19 +275,6 @@ def set_script_params_except_submit_flag(self) -> None:
self.script_params = retained_args


@dataclass
class ExperimentResultLocation:
"""
Information that is need to recover where the results of an experiment reside.
"""
results_container_name: Optional[str] = None
results_uri: Optional[str] = None
dataset_folder: Optional[str] = None
dataset_uri: Optional[str] = None
azure_job_name: Optional[str] = None
commandline_overrides: Optional[str] = None


@dataclass
class ParserResult:
"""
Expand Down
59 changes: 32 additions & 27 deletions InnerEye/Azure/azure_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,18 @@
from azureml.core.runconfig import MpiConfiguration, RunConfiguration
from azureml.core.workspace import WORKSPACE_DEFAULT_BLOB_STORE_NAME
from azureml.data import FileDataset
from azureml.data.dataset_consumption_config import DatasetConsumptionConfig

from InnerEye.Azure import azure_util
from InnerEye.Azure.azure_config import AzureConfig, ParserResult, SourceConfig
from InnerEye.Azure.azure_util import CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, RUN_RECOVERY_FROM_ID_KEY_NAME, \
RUN_RECOVERY_ID_KEY_NAME, \
merge_conda_dependencies
is_offline_run_context, merge_conda_dependencies
from InnerEye.Azure.secrets_handling import read_all_settings
from InnerEye.Azure.tensorboard_monitor import AMLTensorBoardMonitorConfig, monitor
from InnerEye.Common.generic_parsing import GenericConfig
from InnerEye.ML.common import ModelExecutionMode
from InnerEye.ML.utils.config_util import ModelConfigLoader
from InnerEye.ML.utils.config_loader import ModelConfigLoader

SLEEP_TIME_SECONDS = 30
INPUT_DATA_KEY = "input_data"
Expand All @@ -42,15 +43,12 @@

def submit_to_azureml(azure_config: AzureConfig,
source_config: SourceConfig,
model_config_overrides: str,
azure_dataset_id: str) -> Run:
"""
The main entry point. It creates an AzureML workspace if needed, submits an experiment using the code
as specified in source_config, and waits for completion if needed.
:param azure_config: azure related configurations to setup valid workspace
:param source_config: The information about which code should be submitted, and which arguments should be used.
:param model_config_overrides: A string that describes which model parameters were overwritten by commandline
arguments in the present run. This is only used for diagnostic purposes (it is set as a Tag on the run).
:param azure_dataset_id: The name of the dataset on blob storage to be used for this run.
"""
azure_run: Optional[Run] = None
Expand All @@ -68,8 +66,7 @@ def interrupt_handler(signal: int, _: Any) -> None:
for s in [signal.SIGINT, signal.SIGTERM]:
signal.signal(s, interrupt_handler)
# create train/test experiment
azure_run = create_and_submit_experiment(azure_config, source_config, model_config_overrides,
azure_dataset_id)
azure_run = create_and_submit_experiment(azure_config, source_config, azure_dataset_id)

if azure_config.wait_for_completion:
# We want the job output to be visible on the console, but the program should not exit if the
Expand All @@ -79,13 +76,12 @@ def interrupt_handler(signal: int, _: Any) -> None:
return azure_run


def set_run_tags(run: Run, azure_config: AzureConfig, model_config_overrides: str) -> None:
def set_run_tags(run: Run, azure_config: AzureConfig, commandline_args: str) -> None:
"""
Set metadata for the run
:param run: Run to set metadata for.
:param azure_config: The configurations for the present AzureML job
:param model_config_overrides: A string that describes which model parameters were overwritten by commandline
arguments in the present run.
:param commandline_args: A string that holds all commandline arguments that were used for the present run.
"""
git_information = azure_config.get_git_information()
run.set_tags({
Expand All @@ -103,7 +99,7 @@ def set_run_tags(run: Run, azure_config: AzureConfig, model_config_overrides: st
"source_message": git_information.commit_message,
"source_author": git_information.commit_author,
"source_dirty": str(git_information.is_dirty),
"overrides": model_config_overrides,
"commandline_args": commandline_args,
CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY: -1,
})

Expand All @@ -125,14 +121,11 @@ def create_experiment_name(azure_config: AzureConfig) -> str:
def create_and_submit_experiment(
azure_config: AzureConfig,
source_config: SourceConfig,
model_config_overrides: str,
azure_dataset_id: str) -> Run:
"""
Creates an AzureML experiment in the workspace and submits it for execution.
:param azure_config: azure related configurations to setup valid workspace
:param source_config: The information about which code should be submitted, and which arguments should be used.
:param model_config_overrides: A string that describes which model parameters were overwritten by commandline
arguments in the present run. This is only used for diagnostic purposes (it is set as a Tag on the run).
:param azure_dataset_id: The name of the dataset in blob storage to be used for this run.
:returns: Run object for the submitted AzureML run
"""
Expand All @@ -144,8 +137,12 @@ def create_and_submit_experiment(
# submit a training/testing run associated with the experiment
run: Run = exp.submit(script_run_config)

# set metadata for the run
set_run_tags(run, azure_config, model_config_overrides)
if is_offline_run_context(run):
# This codepath will only be executed in unit tests, when exp.submit is mocked.
return run

# Set metadata for the run.
set_run_tags(run, azure_config, commandline_args=(" ".join(source_config.script_params)))

print("\n==============================================================================")
print(f"Successfully queued new run {run.id} in experiment: {exp.name}")
Expand Down Expand Up @@ -276,6 +273,21 @@ def get_or_create_python_environment(azure_config: AzureConfig,
return env


def get_dataset_consumption(azure_config: AzureConfig, azure_dataset_id: str) -> DatasetConsumptionConfig:
"""
Creates a configuration for using an AzureML dataset inside of an AzureML run. This will make the AzureML
dataset with given name available as a named input, using INPUT_DATA_KEY as the key.
:param azure_config: azure related configurations to use for model scale-out behaviour
:param azure_dataset_id: The name of the dataset in blob storage to be used for this run. This can be an empty
string to not use any datasets.
"""
azureml_dataset = get_or_create_dataset(azure_config, azure_dataset_id=azure_dataset_id)
if not azureml_dataset:
raise ValueError(f"AzureML dataset {azure_dataset_id} could not be found or created.")
named_input = azureml_dataset.as_named_input(INPUT_DATA_KEY)
return named_input.as_mount() if azure_config.use_dataset_mount else named_input.as_download()


def create_run_config(azure_config: AzureConfig,
source_config: SourceConfig,
azure_dataset_id: str = "",
Expand All @@ -292,11 +304,7 @@ def create_run_config(azure_config: AzureConfig,
:return: The configured script run.
"""
if azure_dataset_id:
azureml_dataset = get_or_create_dataset(azure_config, azure_dataset_id=azure_dataset_id)
if not azureml_dataset:
raise ValueError(f"AzureML dataset {azure_dataset_id} could not be found or created.")
named_input = azureml_dataset.as_named_input(INPUT_DATA_KEY)
dataset_consumption = named_input.as_mount() if azure_config.use_dataset_mount else named_input.as_download()
dataset_consumption = get_dataset_consumption(azure_config, azure_dataset_id)
else:
dataset_consumption = None
# AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path
Expand Down Expand Up @@ -354,8 +362,7 @@ def create_runner_parser(model_config_class: type = None) -> argparse.ArgumentPa
def parse_args_and_add_yaml_variables(parser: ArgumentParser,
yaml_config_file: Optional[Path] = None,
project_root: Optional[Path] = None,
fail_on_unknown_args: bool = False,
args: List[str] = None) -> ParserResult:
fail_on_unknown_args: bool = False) -> ParserResult:
"""
Reads arguments from sys.argv, modifies them with secrets from local YAML files,
and parses them using the given argument parser.
Expand All @@ -364,14 +371,12 @@ def parse_args_and_add_yaml_variables(parser: ArgumentParser,
:param yaml_config_file: The path to the YAML file that contains values to supply into sys.argv.
:param fail_on_unknown_args: If True, raise an exception if the parser encounters an argument that it does not
recognize. If False, unrecognized arguments will be ignored, and added to the "unknown" field of the parser result.
:param args: arguments to parse
:return: The parsed arguments, and overrides
"""
settings_from_yaml = read_all_settings(yaml_config_file, project_root=project_root)
return parse_arguments(parser,
settings_from_yaml=settings_from_yaml,
fail_on_unknown_args=fail_on_unknown_args,
args=args)
fail_on_unknown_args=fail_on_unknown_args)


def _create_default_namespace(parser: ArgumentParser) -> Namespace:
Expand Down Expand Up @@ -471,7 +476,7 @@ def run_duration_string_to_seconds(s: str) -> Optional[int]:
elif suffix == "d":
multiplier = 24 * 60 * 60
else:
raise ArgumentError("s", f"Invalid suffix: Must be one of 's', 'm', 'h', 'd', but got: {s}")
raise ArgumentError("s", f"Invalid suffix: Must be one of 's', 'm', 'h', 'd', but got: {s}") # type: ignore
return int(float(s[:-1]) * multiplier)


Expand Down
24 changes: 15 additions & 9 deletions InnerEye/Azure/azure_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,6 @@
INNEREYE_SDK_VERSION = "1.0"


def get_results_blob_path(run_id: str) -> str:
"""
Creates the name of the top level folder that contains the results for a given AzureML run.
:param run_id: The AzureML run ID for which the folder should be created.
:return: A full Azure blob storage path, starting with the container name.
"""
return AZUREML_RUN_FOLDER + run_id


def create_run_recovery_id(run: Run) -> str:
"""
Creates an recovery id for a run so it's checkpoints could be recovered for training/testing
Expand Down Expand Up @@ -293,6 +284,21 @@ def merge_conda_files(files: List[Path], result_file: Path) -> None:
ruamel.yaml.dump(unified_definition, f, indent=2, default_flow_style=False)


def get_all_environment_files(project_root: Path) -> List[Path]:
"""
Returns a list of all Conda environment files that should be used. This is firstly the InnerEye conda file,
and possibly a second environment.yml file that lives at the project root folder.
:param project_root: The root folder of the code that starts the present training run.
:return: A list with 1 or 2 entries that are conda environment files.
"""
innereye_yaml = fixed_paths.get_environment_yaml_file()
project_yaml = project_root / fixed_paths.ENVIRONMENT_YAML_FILE_NAME
files = [innereye_yaml]
if innereye_yaml != project_yaml:
files.append(project_yaml)
return files


def merge_conda_dependencies(files: List[Path]) -> Tuple[CondaDependencies, str]:
"""
Creates a CondaDependencies object from the Conda environments specified in one or more files.
Expand Down
52 changes: 0 additions & 52 deletions InnerEye/Common/build_config.py

This file was deleted.

26 changes: 26 additions & 0 deletions InnerEye/Common/common_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,3 +389,29 @@ def remove_file_or_directory(pth: Path) -> None:
pth.rmdir()
elif pth.exists():
pth.unlink()


def add_folder_to_sys_path_if_needed(folder_under_repo_root: str) -> None:
"""
Checks if the Python paths in sys.path already contain the given folder, which is expected to be relative
to the repository root. If that folder is not yet in sys.path, add it.
"""
full_folder = repository_root_directory() / folder_under_repo_root
for path_str in sys.path:
path = Path(path_str)
if path == full_folder:
return
print(f"Adding {full_folder} to sys.path")
sys.path.append(str(full_folder))


@contextmanager
def change_working_directory(path_or_str: PathOrString) -> Generator:
"""
Context manager for changing the current working directory
"""
new_path = Path(path_or_str).expanduser()
old_path = Path.cwd()
os.chdir(new_path)
yield
os.chdir(old_path)
2 changes: 2 additions & 0 deletions InnerEye/Common/fixed_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def repository_root_directory(path: Optional[PathOrString] = None) -> Path:
DEFAULT_AML_LOGS_DIR = "azureml-logs"

DEFAULT_LOGS_DIR_NAME = "logs"
LOG_FILE_NAME = "stdout.txt"

DEFAULT_MODEL_SUMMARIES_DIR_PATH = Path(DEFAULT_LOGS_DIR_NAME) / "model_summaries"
# The folder at the project root directory that holds datasets for local execution.
DATASETS_DIR_NAME = "datasets"
Expand Down
11 changes: 0 additions & 11 deletions InnerEye/Common/fixed_paths_for_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,6 @@ def full_ml_test_data_path(path: str = "") -> Path:
return _full_test_data_path("ML", path)


def full_azure_test_data_path(path: str = "") -> Path:
"""
Takes a relative path inside of the Azure/tests/test_data folder, and returns its
full absolute path.
:param path: A path relative to the Tests/Azure/test_data
:return: The full absolute path of the argument.
"""
return _full_test_data_path("Azure", path)


def _full_test_data_path(prefix: str, suffix: str) -> Path:
root = tests_root_directory()
return root / prefix / "test_data" / suffix
Expand Down
Loading

0 comments on commit 0d479ba

Please sign in to comment.