Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Speed up dicom loading by factor 30x (#427)
Browse files Browse the repository at this point in the history
* Speed up dicom loading by factor 30x

* Fix doc

* CHANGELOG.md

* Flake8

* Use the enum not plain text
  • Loading branch information
melanibe committed Apr 7, 2021
1 parent 821cb3b commit 59d6995
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 56 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ refactoring, to use PyTorch Lightning as the foundation for all training. As a c
This is because patch sampling is expensive to compute, taking 1min per large CT scan.
- ([#336](https://github.com/microsoft/InnerEye-DeepLearning/pull/336)) Renamed `HeadAndNeckBase` to `HeadAndNeckPaper`,
and `ProstateBase` to `ProstatePaper`.
- ([#427](https://github.com/microsoft/InnerEye-DeepLearning/pull/427)) Move dicom loading function from SimpleITK to pydicom. Loading time improved by 30x.

### Fixed
- When registering a model, it now has a consistent folder structured, described [here](docs/deploy_on_aml.md). This
Expand Down
18 changes: 6 additions & 12 deletions InnerEye/ML/utils/io_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import SimpleITK as sitk
import numpy as np
import pandas as pd
import pydicom as dicom
import torch
from tabulate import tabulate

Expand Down Expand Up @@ -258,24 +259,17 @@ def load_dicom_image(path: PathOrString) -> np.ndarray:
Loads an array from a single dicom file.
:param path: The path to the dicom file.
"""
reader = sitk.ImageFileReader()
reader.SetFileName(str(path))
image = reader.Execute()
pixels = sitk.GetArrayFromImage(image)

reader.ReadImageInformation()
if reader.GetMetaData(DicomTags.PhotometricInterpretation.value).strip() \
== PhotometricInterpretation.MONOCHROME1.value:
# invert image so bit interpretation is like MONOCHROME2, where a 0 bit is black
bits_stored = int(reader.GetMetaData(DicomTags.BitsStored.value))
pixel_repr = int(reader.GetMetaData(DicomTags.PixelRepresentation.value))
ds = dicom.dcmread(path)
pixels = ds.pixel_array
bits_stored = ds.BitsStored
if ds.PhotometricInterpretation == PhotometricInterpretation.MONOCHROME1.value:
pixel_repr = ds.PixelRepresentation
if pixel_repr == 0: # unsigned
pixels = 2 ** bits_stored - 1 - pixels
elif pixel_repr == 1: # signed
pixels = -1 * (pixels + 1)
else:
raise ValueError("Unknown value for DICOM tag 0028,0103 PixelRepresentation")

# Return a float array, we may resize this in load_3d_images_and_stack, and interpolation will not work on int
return pixels.astype(np.float)

Expand Down
6 changes: 3 additions & 3 deletions Tests/ML/models/test_scalar_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def test_train_classification_model(class_name: str, test_output_dirs: OutputFol
"""
check_log_file(inference_metrics_path, inference_metrics_expected, ignore_columns=[])


@pytest.mark.skipif(common_util.is_windows(), reason="Has OOM issues on windows build")
@pytest.mark.cpu_and_gpu
def test_train_classification_multilabel_model(test_output_dirs: OutputFolderForTests) -> None:
"""
Expand Down Expand Up @@ -307,7 +307,7 @@ def test_run_ml_with_segmentation_model(test_output_dirs: OutputFolderForTests)
azure_config.train = True
MLRunner(config, azure_config).run()


@pytest.mark.skipif(common_util.is_windows(), reason="Has OOM issues on windows build")
def test_runner1(test_output_dirs: OutputFolderForTests) -> None:
"""
Test starting a classification model via the commandline runner. Test if we can provide overrides
Expand Down Expand Up @@ -338,7 +338,7 @@ def test_runner1(test_output_dirs: OutputFolderForTests) -> None:
assert str(config.outputs_folder).startswith(output_root)
assert (config.logs_folder / runner.LOG_FILE_NAME).exists()


@pytest.mark.skipif(common_util.is_windows(), reason="Has OOM issues on windows build")
def test_runner2(test_output_dirs: OutputFolderForTests) -> None:
"""
Test starting a classification model via the commandline runner, and provide the same arguments
Expand Down
69 changes: 28 additions & 41 deletions Tests/ML/utils/test_io_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
import os
from pathlib import Path
import shutil
from typing import Any, Callable, Optional, Tuple
from typing import Any, Optional, Tuple
from unittest import mock
import zipfile

import SimpleITK as sitk
import numpy as np
import pydicom
import pytest
import torch
from skimage.transform import resize
Expand All @@ -20,7 +21,7 @@
from InnerEye.ML.dataset.sample import PatientDatasetSource, PatientMetadata
from InnerEye.ML.utils import io_util
from InnerEye.ML.utils.dataset_util import DatasetExample, store_and_upload_example
from InnerEye.ML.utils.io_util import DicomTags, ImageAndSegmentations, ImageHeader, PhotometricInterpretation, \
from InnerEye.ML.utils.io_util import ImageAndSegmentations, ImageHeader, PhotometricInterpretation, \
is_dicom_file_path, is_nifti_file_path, is_numpy_file_path, load_dicom_image, load_image_in_known_formats, \
load_images_and_stack, load_numpy_image, reverse_tuple_float3, load_dicom_series_and_save
from Tests.ML.util import assert_file_contains_string
Expand Down Expand Up @@ -240,37 +241,27 @@ def test_is_dicom_file(input: Tuple[str, bool]) -> None:
assert is_dicom_file_path(Path(file)) == expected


def write_test_dicom(array: np.ndarray, path: Path) -> None:
def write_test_dicom(array: np.ndarray, path: Path, is_monochrome2: bool = True,
bits_stored: Optional[int] = None) -> None:
"""
This saves the input array as a Dicom file.
This function DOES NOT create a usable Dicom file and is meant only for testing: tags are set to
random/default values so that pydicom does not complain when reading the file.
"""

# Write a file directly with pydicom is cumbersome (all tags need to be set by hand). Hence using simpleITK to
# create the file. However SimpleITK does not let you set the tags directly, so using pydicom so set them after.
image = sitk.GetImageFromArray(array)
writer = sitk.ImageFileWriter()
writer.SetFileName(str(path))
writer.Execute(image)


def get_mock_function(is_monochrome2: bool, bits_stored: Optional[int] = None) -> Callable:
"""
SimpleITK does not allow us to set the Photometric Interpretation and Stored Bits tags when writing the Dicom image.
In these tests, if the image should be MONOCHROME1 we write an inverted image with tag MONOCHROME2
and use this wrapper around the SimpleITK metadata reader to make it look to the test like the tag was MONOCHROME1.
Similarly, we write images with StoredBits set to 16, but use this wrapper to change StoredBits while reading.
"""
get_metadata_function = sitk.ImageFileReader.GetMetaData

def mock_function(image_reader: sitk.ImageFileReader, key: str) -> str:
if bits_stored and key == DicomTags.BitsStored.value:
return str(bits_stored)
elif not is_monochrome2 and key == DicomTags.PhotometricInterpretation.value:
return PhotometricInterpretation.MONOCHROME1.value
else:
return get_metadata_function(image_reader, key)

return mock_function

ds = pydicom.dcmread(path)
ds.PhotometricInterpretation = PhotometricInterpretation.MONOCHROME2.value if is_monochrome2 else \
PhotometricInterpretation.MONOCHROME1.value
if bits_stored is not None:
ds.BitsStored = bits_stored
ds.save_as(path)

@pytest.mark.parametrize("is_signed", [True, False])
@pytest.mark.parametrize("is_monochrome2", [True, False])
Expand Down Expand Up @@ -301,17 +292,15 @@ def test_load_dicom_image_ones(test_output_dirs: OutputFolderForTests,

dcm_file = test_output_dirs.root_dir / "file.dcm"
assert is_dicom_file_path(dcm_file)
write_test_dicom(array=to_write, path=dcm_file)
write_test_dicom(array=to_write, path=dcm_file, is_monochrome2=is_monochrome2, bits_stored=1)

with mock.patch.object(sitk.ImageFileReader, 'GetMetaData',
new=get_mock_function(is_monochrome2=is_monochrome2, bits_stored=1)):
image = load_dicom_image(dcm_file)
assert image.ndim == 3 and image.shape == (1,) + array_size
assert np.array_equal(image, array[None, ...])
image = load_dicom_image(dcm_file)
assert image.ndim == 2 and image.shape == array_size
assert np.array_equal(image, array)

image_and_segmentation = load_image_in_known_formats(dcm_file, load_segmentation=False)
assert image_and_segmentation.images.ndim == 3 and image_and_segmentation.images.shape == (1,) + array_size
assert np.array_equal(image_and_segmentation.images, array[None, ...])
image_and_segmentation = load_image_in_known_formats(dcm_file, load_segmentation=False)
assert image_and_segmentation.images.ndim == 2 and image_and_segmentation.images.shape == array_size
assert np.array_equal(image_and_segmentation.images, array)


@pytest.mark.parametrize("is_signed", [True, False])
Expand Down Expand Up @@ -339,17 +328,15 @@ def test_load_dicom_image_random(test_output_dirs: OutputFolderForTests,

dcm_file = test_output_dirs.root_dir / "file.dcm"
assert is_dicom_file_path(dcm_file)
write_test_dicom(array=to_write, path=dcm_file)
write_test_dicom(array=to_write, path=dcm_file, is_monochrome2=is_monochrome2, bits_stored=bits_stored)

with mock.patch.object(sitk.ImageFileReader, 'GetMetaData',
new=get_mock_function(is_monochrome2=is_monochrome2, bits_stored=bits_stored)):
image = load_dicom_image(dcm_file)
assert image.ndim == 3 and image.shape == (1,) + array_size
assert np.array_equal(image, array[None, ...])
image = load_dicom_image(dcm_file)
assert image.ndim == 2 and image.shape == array_size
assert np.array_equal(image, array)

image_and_segmentation = load_image_in_known_formats(dcm_file, load_segmentation=False)
assert image_and_segmentation.images.ndim == 3 and image_and_segmentation.images.shape == (1,) + array_size
assert np.array_equal(image_and_segmentation.images, array[None, ...])
image_and_segmentation = load_image_in_known_formats(dcm_file, load_segmentation=False)
assert image_and_segmentation.images.ndim == 2 and image_and_segmentation.images.shape == array_size
assert np.array_equal(image_and_segmentation.images, array)


@pytest.mark.parametrize(["file_path", "expected_shape"],
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ dependencies:
- param==1.9.3
- pillow==8.1.2
- psutil==5.7.2
- pydicom==2.0.0
- pyflakes==2.2.0
- PyJWT==1.7.1
- pytest==6.0.1
Expand Down

0 comments on commit 59d6995

Please sign in to comment.