Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
ENH: Upgrading package versions for security patches (#757)
Browse files Browse the repository at this point in the history
* 🚧 πŸ’₯ Update to secure versions of packages

* ⬆️ Upgrade hi-ml version

* 🏷️ Fix mypy and update windows env

* βœ… Update current epoch value in lightning tests

* πŸ› Fix wnidows line endings

* βœ… Remove checkpoint load epoch check

* πŸ“Œ Lock env anew

* 🎨 πŸ› Add merge_conda_files() to common_util

* βœ… Add logging to tests

* 🚧 Update VarINetWithImageLogging logger syntax

* βœ… Fix Train2Nodes tests

* βœ… Remove cwd change, update CIFAR SSL metrics

* ⚰️ Remove unnecessary PL backwards compatibility

* πŸ“Œ Lock env

* πŸ“Œ Upgrade to hi-ml v0.2.5

* πŸ“Œ Testing lightning 1.6.5

* ♻️ Resolve PR comments
  • Loading branch information
peterhessey committed Sep 14, 2022
1 parent 7894498 commit 5b21840
Show file tree
Hide file tree
Showing 15 changed files with 210 additions and 142 deletions.
81 changes: 80 additions & 1 deletion InnerEye/Common/common_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,14 @@
from enum import Enum
from functools import wraps
from pathlib import Path
from typing import Any, Callable, Generator, Iterable, List, Optional, Union
from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Union

import conda_merge
import ruamel.yaml
from health_azure.utils import (
CONDA_CHANNELS, CONDA_DEPENDENCIES, CONDA_NAME, CONDA_PIP, CondaDependencies, PinnedOperator,
_log_conda_dependencies_stats, _retrieve_unique_deps, is_conda_file_with_pip_include, is_pip_include_dependency
)

from InnerEye.Common.fixed_paths import repository_root_directory
from InnerEye.Common.type_annotations import PathOrString
Expand Down Expand Up @@ -427,3 +434,75 @@ def change_working_directory(path_or_str: PathOrString) -> Generator:
os.chdir(new_path)
yield
os.chdir(old_path)


def merge_conda_files(
conda_files: List[Path],
result_file: Path,
pip_files: Optional[List[Path]] = None,
) -> None:
"""
Merges the given Conda environment files using the conda_merge package, optionally adds any
dependencies from pip requirements files, and writes the merged file to disk.
:param conda_files: The Conda environment files to read.
:param result_file: The location where the merge results should be written.
:param pip_files: An optional list of one or more pip requirements files including extra dependencies.
"""
env_definitions: List[Any] = []
for file in conda_files:
_, pip_without_include = is_conda_file_with_pip_include(file)
env_definitions.append(pip_without_include)
unified_definition = {}

extra_pip_deps = []
for pip_file in pip_files or []:
additional_pip_deps = [d for d in pip_file.read_text().split("\n") if d and not is_pip_include_dependency(d)]
extra_pip_deps.extend(additional_pip_deps)

name = conda_merge.merge_names(env.get(CONDA_NAME) for env in env_definitions)
if name:
unified_definition[CONDA_NAME] = name

try:
channels = conda_merge.merge_channels(env.get(CONDA_CHANNELS) for env in env_definitions)
except conda_merge.MergeError:
logging.error("Failed to merge channel priorities.")
raise
if channels:
unified_definition[CONDA_CHANNELS] = channels

try:
deps_to_merge = [env.get(CONDA_DEPENDENCIES) for env in env_definitions]
if len(extra_pip_deps) > 0:
deps_to_merge.append([{CONDA_PIP: extra_pip_deps}])
deps = conda_merge.merge_dependencies(deps_to_merge)

# Get conda dependencies and pip dependencies from specification
pip_deps_entries = [d for d in deps if isinstance(d, dict) and CONDA_PIP in d] # type: ignore
if len(pip_deps_entries) == 0:
raise ValueError("Didn't find a dictionary with the key 'pip' in the list of dependencies")
pip_deps_entry: Dict[str, List[str]] = pip_deps_entries[0]
pip_deps = pip_deps_entry[CONDA_PIP]
# temporarily remove pip dependencies from deps to be added back after deduplicaton
deps.remove(pip_deps_entry)

# remove all non-pip duplicates from the list of dependencies
unique_deps = _retrieve_unique_deps(deps, PinnedOperator.CONDA)

unique_pip_deps = sorted(_retrieve_unique_deps(pip_deps, PinnedOperator.PIP))

# finally add back the deduplicated list of dependencies
unique_deps.append({CONDA_PIP: unique_pip_deps}) # type: ignore

except conda_merge.MergeError:
logging.error("Failed to merge dependencies.")
raise
if unique_deps:
unified_definition[CONDA_DEPENDENCIES] = unique_deps
else:
raise ValueError("No dependencies found in any of the conda files.")

with result_file.open("w") as f:
ruamel.yaml.dump(unified_definition, f, indent=2, default_flow_style=False)
_log_conda_dependencies_stats(CondaDependencies(result_file), "Merged Conda environment")
12 changes: 3 additions & 9 deletions InnerEye/ML/SSL/lightning_modules/ssl_online_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,10 @@ def on_pretrain_routine_start(self, trainer: pl.Trainer, pl_module: pl.Lightning
p=self.drop_p,
n_hidden=self.hidden_dim)
self.evaluator.to(pl_module.device)
if hasattr(trainer, "accelerator_connector"):
# This works with Lightning 1.3.8
accelerator = trainer.accelerator_connector
elif hasattr(trainer, "_accelerator_connector"):
# This works with Lightning 1.5.5
accelerator = trainer._accelerator_connector
else:
raise ValueError("Unable to retrieve the accelerator information")
accelerator = trainer._accelerator_connector

if accelerator.is_distributed:
if accelerator.use_ddp:
if accelerator.strategy.strategy_name == "ddp":
self.evaluator = SyncBatchNorm.convert_sync_batchnorm(self.evaluator)
self.evaluator = DistributedDataParallel(self.evaluator, device_ids=[pl_module.device]) # type: ignore
else:
Expand Down
4 changes: 2 additions & 2 deletions InnerEye/ML/configs/other/fastmri_varnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ class VarNetWithImageLogging(VarNetModule):
"""

def log_image(self, name: str, image: torch.Tensor) -> None:
experiments = self.logger.experiment if isinstance(self.logger.experiment, list) \
else [self.logger.experiment]
experiments = self.loggers[0].experiment if isinstance(self.loggers[0].experiment, list) \
else [self.loggers[0].experiment]
for experiment in experiments:
if isinstance(experiment, SummaryWriter):
experiment.add_image(name, image, global_step=self.global_step)
Expand Down
2 changes: 1 addition & 1 deletion InnerEye/ML/lightning_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def on_train_end(self) -> None:
This hook is called at the very end of training. Use that to write the very last set of training and
validation metrics from the StoringLogger to disk.
"""
self.read_epoch_results_from_logger_and_store(epoch=self.current_epoch)
self.read_epoch_results_from_logger_and_store(epoch=self.current_epoch-1)

@rank_zero_only
def read_epoch_results_from_logger_and_store(self, epoch: int) -> None:
Expand Down
11 changes: 5 additions & 6 deletions InnerEye/ML/model_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def create_lightning_trainer(container: LightningContainer,
save_top_k=0)
recovery_checkpoint_callback = ModelCheckpoint(dirpath=str(container.checkpoint_folder),
filename=AUTOSAVE_CHECKPOINT_FILE_NAME,
every_n_val_epochs=container.autosave_every_n_val_epochs,
every_n_epochs=container.autosave_every_n_val_epochs,
save_last=False)
callbacks: List[Callback] = [
last_checkpoint_callback,
Expand Down Expand Up @@ -264,11 +264,10 @@ def model_train(checkpoint_path: Optional[Path],
lightning_model.storing_logger = storing_logger

logging.info("Starting training")
# When training models that are not built-in InnerEye models, we have no guarantee that they write
# files to the right folder. Best guess is to change the current working directory to where files should go.
with change_working_directory(container.outputs_folder):
trainer.fit(lightning_model, datamodule=data_module)
trainer.logger.close() # type: ignore

trainer.fit(lightning_model, datamodule=data_module)
trainer.logger.close() # type: ignore

world_size = getattr(trainer, "world_size", 0)
is_azureml_run = not is_offline_run_context(RUN_CONTEXT)
# Per-subject model outputs for regression models are written per rank, and need to be aggregated here.
Expand Down
52 changes: 29 additions & 23 deletions InnerEye/ML/run_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,33 +15,37 @@
import torch.multiprocessing
from azureml._restclient.constants import RunStatus
from azureml.core import Model, Run, model
from health_azure import AzureRunInfo
from health_azure.utils import ENVIRONMENT_VERSION, create_run_recovery_id, is_global_rank_zero
from pytorch_lightning import LightningModule, seed_everything
from pytorch_lightning.core.datamodule import LightningDataModule
from torch.utils.data import DataLoader

from InnerEye.Azure import azure_util
from InnerEye.Azure.azure_config import AzureConfig
from InnerEye.Azure.azure_runner import ENV_OMPI_COMM_WORLD_RANK, get_git_tags
from InnerEye.Azure.azure_util import CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, DEFAULT_CROSS_VALIDATION_SPLIT_INDEX, \
EFFECTIVE_RANDOM_SEED_KEY_NAME, IS_ENSEMBLE_KEY_NAME, MODEL_ID_KEY_NAME, PARENT_RUN_CONTEXT, \
PARENT_RUN_ID_KEY_NAME, RUN_CONTEXT, RUN_RECOVERY_FROM_ID_KEY_NAME, RUN_RECOVERY_ID_KEY_NAME, \
get_all_environment_files, is_offline_run_context
from InnerEye.Azure.azure_util import (
CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, DEFAULT_CROSS_VALIDATION_SPLIT_INDEX, EFFECTIVE_RANDOM_SEED_KEY_NAME,
IS_ENSEMBLE_KEY_NAME, MODEL_ID_KEY_NAME, PARENT_RUN_CONTEXT, PARENT_RUN_ID_KEY_NAME, RUN_CONTEXT,
RUN_RECOVERY_FROM_ID_KEY_NAME, RUN_RECOVERY_ID_KEY_NAME, get_all_environment_files, is_offline_run_context
)
from InnerEye.Common import fixed_paths
from InnerEye.Common.common_util import (BASELINE_COMPARISONS_FOLDER, BASELINE_WILCOXON_RESULTS_FILE,
CROSSVAL_RESULTS_FOLDER, ENSEMBLE_SPLIT_NAME, FULL_METRICS_DATAFRAME_FILE,
METRICS_AGGREGATES_FILE, ModelProcessing,
OTHER_RUNS_SUBDIR_NAME, SCATTERPLOTS_SUBDIR_NAME, SUBJECT_METRICS_FILE_NAME,
change_working_directory, get_best_epoch_results_path, is_windows,
logging_section, print_exception, remove_file_or_directory)
from InnerEye.Common.common_util import (
BASELINE_COMPARISONS_FOLDER, BASELINE_WILCOXON_RESULTS_FILE, CROSSVAL_RESULTS_FOLDER, ENSEMBLE_SPLIT_NAME,
FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE, OTHER_RUNS_SUBDIR_NAME, SCATTERPLOTS_SUBDIR_NAME,
SUBJECT_METRICS_FILE_NAME, ModelProcessing, change_working_directory, get_best_epoch_results_path,
is_windows, logging_section, merge_conda_files, print_exception, remove_file_or_directory
)
from InnerEye.Common.fixed_paths import INNEREYE_PACKAGE_NAME, PYTHON_ENVIRONMENT_NAME
from InnerEye.Common.type_annotations import PathOrString
from InnerEye.ML.baselines_util import compare_folders_and_run_outputs
from InnerEye.ML.common import CHECKPOINT_FOLDER, EXTRA_RUN_SUBFOLDER, FINAL_ENSEMBLE_MODEL_FOLDER, \
FINAL_MODEL_FOLDER, \
ModelExecutionMode
from InnerEye.ML.common import (
CHECKPOINT_FOLDER, EXTRA_RUN_SUBFOLDER, FINAL_ENSEMBLE_MODEL_FOLDER, FINAL_MODEL_FOLDER, ModelExecutionMode
)
from InnerEye.ML.config import SegmentationModelBase
from InnerEye.ML.deep_learning_config import DeepLearningConfig, ModelCategory, MultiprocessingStartMethod, \
load_checkpoint
from InnerEye.ML.deep_learning_config import (
DeepLearningConfig, ModelCategory, MultiprocessingStartMethod, load_checkpoint
)
from InnerEye.ML.lightning_base import InnerEyeContainer
from InnerEye.ML.lightning_container import InnerEyeInference, LightningContainer
from InnerEye.ML.lightning_loggers import StoringLogger
Expand All @@ -50,16 +54,16 @@
from InnerEye.ML.model_inference_config import ModelInferenceConfig
from InnerEye.ML.model_testing import model_test
from InnerEye.ML.model_training import create_lightning_trainer, model_train
from InnerEye.ML.reports.notebook_report import generate_classification_crossval_notebook, \
generate_classification_multilabel_notebook, generate_classification_notebook, generate_segmentation_notebook, \
get_ipynb_report_name, reports_folder
from InnerEye.ML.reports.notebook_report import (
generate_classification_crossval_notebook, generate_classification_multilabel_notebook,
generate_classification_notebook, generate_segmentation_notebook, get_ipynb_report_name, reports_folder
)
from InnerEye.ML.scalar_config import ScalarModelBase
from InnerEye.ML.utils.checkpoint_handling import CheckpointHandler, download_all_checkpoints_from_run
from InnerEye.ML.visualizers import activation_maps
from InnerEye.ML.visualizers.plot_cross_validation import \
from InnerEye.ML.visualizers.plot_cross_validation import (
get_config_and_results_for_offline_runs, plot_cross_validation_from_files
from health_azure import AzureRunInfo
from health_azure.utils import ENVIRONMENT_VERSION, create_run_recovery_id, is_global_rank_zero, merge_conda_files
)

ModelDeploymentHookSignature = Callable[[LightningContainer, AzureConfig, Model, ModelProcessing], Any]
PostCrossValidationHookSignature = Callable[[ModelConfigBase, Path], None]
Expand Down Expand Up @@ -797,8 +801,10 @@ def create_ensemble_model_and_run_inference(self) -> None:
remove_file_or_directory(other_runs_dir)

def plot_cross_validation_and_upload_results(self) -> Path:
from InnerEye.ML.visualizers.plot_cross_validation import crossval_config_from_model_config, \
plot_cross_validation, unroll_aggregate_metrics
from InnerEye.ML.visualizers.plot_cross_validation import (
crossval_config_from_model_config, plot_cross_validation, unroll_aggregate_metrics
)

# perform aggregation as cross val splits are now ready
plot_crossval_config = crossval_config_from_model_config(self.innereye_config)
plot_crossval_config.run_recovery_id = PARENT_RUN_CONTEXT.tags[RUN_RECOVERY_ID_KEY_NAME]
Expand Down
30 changes: 15 additions & 15 deletions InnerEye/ML/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,36 +24,36 @@
# in a submodule
fixed_paths.add_submodules_to_path()

import matplotlib
from azureml._base_sdk_common import user_agent
from azureml._restclient.constants import RunStatus
from azureml.core import Run, ScriptRunConfig
from health_azure import AzureRunInfo, submit_to_azure_if_needed
from health_azure.utils import create_run_recovery_id, is_global_rank_zero, is_local_rank_zero, merge_conda_files, \
to_azure_friendly_string
import matplotlib
from health_azure.utils import create_run_recovery_id, is_global_rank_zero, is_local_rank_zero, to_azure_friendly_string

from InnerEye.Azure.tensorboard_monitor import AMLTensorBoardMonitorConfig, monitor
from InnerEye.Azure import azure_util
from InnerEye.Azure.azure_config import AzureConfig, ParserResult, SourceConfig
from InnerEye.Azure.azure_runner import (DEFAULT_DOCKER_BASE_IMAGE, create_dataset_configs, create_experiment_name,
create_runner_parser,
get_git_tags,
parse_args_and_add_yaml_variables,
parse_arguments, additional_run_tags,
set_environment_variables_for_multi_node)
from InnerEye.Azure.azure_util import (RUN_CONTEXT, RUN_RECOVERY_ID_KEY_NAME, get_all_environment_files,
is_offline_run_context)
from InnerEye.Azure.azure_runner import (
DEFAULT_DOCKER_BASE_IMAGE, additional_run_tags, create_dataset_configs,
create_experiment_name, create_runner_parser, get_git_tags,
parse_args_and_add_yaml_variables, parse_arguments, set_environment_variables_for_multi_node
)
from InnerEye.Azure.azure_util import (
RUN_CONTEXT, RUN_RECOVERY_ID_KEY_NAME, get_all_environment_files, is_offline_run_context
)
from InnerEye.Azure.run_pytest import download_pytest_result, run_pytest
from InnerEye.Common.common_util import (FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE,
is_linux, logging_to_stdout)
from InnerEye.Azure.tensorboard_monitor import AMLTensorBoardMonitorConfig, monitor
from InnerEye.Common.common_util import (
FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE, is_linux, logging_to_stdout, merge_conda_files
)
from InnerEye.Common.generic_parsing import GenericConfig
from InnerEye.ML.common import DATASET_CSV_FILE_NAME
from InnerEye.ML.deep_learning_config import DeepLearningConfig
from InnerEye.ML.lightning_base import InnerEyeContainer
from InnerEye.ML.lightning_container import LightningContainer
from InnerEye.ML.model_config_base import ModelConfigBase
from InnerEye.ML.run_ml import MLRunner, ModelDeploymentHookSignature, PostCrossValidationHookSignature
from InnerEye.ML.utils.config_loader import ModelConfigLoader
from InnerEye.ML.lightning_container import LightningContainer

# We change the current working directory before starting the actual training. However, this throws off starting
# the child training threads because sys.argv[0] is a relative path when running in AzureML. Turn that into an absolute
Expand Down
Loading

0 comments on commit 5b21840

Please sign in to comment.