Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Enable tiling non-PANDA WSI datasets #621

Merged
merged 16 commits into from
Dec 16, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Create and test mock slides dataset
  • Loading branch information
dccastro committed Dec 16, 2021
commit a551a9b617cb529fa8d7d0bff208741e2a19c1a3
40 changes: 40 additions & 0 deletions Tests/ML/histopathology/datasets/test_slides_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import os

import pandas as pd

from InnerEye.Common.fixed_paths_for_tests import tests_root_directory
from InnerEye.ML.Histopathology.datasets.base_dataset import SlidesDataset
from InnerEye.ML.Histopathology.utils.naming import SlideKey

HISTO_TEST_DATA_DIR = str(tests_root_directory("ML/histopathology/test_data"))


class MockSlidesDataset(SlidesDataset):
DEFAULT_CSV_FILENAME = "test_slides_dataset.csv"
METADATA_COLUMNS = ('meta1', 'meta2')

def __init__(self) -> None:
super().__init__(root=HISTO_TEST_DATA_DIR)


def test_slides_dataset() -> None:
dataset = MockSlidesDataset()
assert isinstance(dataset.dataset_df, pd.DataFrame)
assert dataset.dataset_df.index.name == dataset.SLIDE_ID_COLUMN
assert len(dataset) == len(dataset.dataset_df)

sample = dataset[0]
assert isinstance(sample, dict)
assert all(isinstance(key, SlideKey) for key in sample)

expected_keys = [SlideKey.SLIDE_ID, SlideKey.IMAGE, SlideKey.IMAGE_PATH, SlideKey.LABEL,
SlideKey.METADATA]
assert all(key in sample for key in expected_keys)

image_path = sample[SlideKey.IMAGE_PATH]
assert isinstance(image_path, str)
assert os.path.isfile(image_path)

metadata = sample[SlideKey.METADATA]
assert isinstance(metadata, dict)
assert all(meta_col in metadata for meta_col in type(dataset).METADATA_COLUMNS)
16 changes: 4 additions & 12 deletions Tests/ML/histopathology/preprocessing/test_slide_loading.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,19 @@
import os
from typing import Any, Dict, Optional
from typing import Optional

import numpy as np
import pytest
from cucim import CuImage
from monai.data.image_reader import WSIReader

from InnerEye.Common.fixed_paths_for_tests import tests_root_directory
from InnerEye.ML.Histopathology.datasets.default_paths import PANDA_DATASET_DIR
from InnerEye.ML.Histopathology.datasets.panda_dataset import PandaDataset
from InnerEye.ML.Histopathology.preprocessing.tiling import tile_array_2d
from InnerEye.ML.Histopathology.preprocessing.loading import LoadROId, get_luminance, load_slide_at_level, segment_foreground
from InnerEye.ML.Histopathology.utils.naming import SlideKey
from Tests.ML.histopathology.datasets.test_slides_dataset import MockSlidesDataset

TEST_IMAGE_PATH = str(tests_root_directory("ML/histopathology/test_data/panda_wsi_example.tiff"))


def _get_sample() -> Dict[SlideKey, Any]:
dataset = PandaDataset(PANDA_DATASET_DIR)
return dataset[0]


def test_load_slide() -> None:
level = 2
reader = WSIReader('cuCIM')
Expand Down Expand Up @@ -134,13 +127,12 @@ def test_get_bounding_box(level: int, foreground_threshold: Optional[float]) ->
assert level0_bbox_margin.h == level0_bbox.h + 2 * level0_margin


@pytest.mark.skipif(not os.path.isdir(PANDA_DATASET_DIR),
reason="PANDA dataset is unavailable")
@pytest.mark.parametrize('level', [1, 2])
@pytest.mark.parametrize('margin', [0, 42])
@pytest.mark.parametrize('foreground_threshold', [None, 215])
def test_load_roi(level: int, margin: int, foreground_threshold: Optional[float]) -> None:
sample = _get_sample()
dataset = MockSlidesDataset()
sample = dataset[0]
reader = WSIReader('cuCIM')
loader = LoadROId(reader, image_key=SlideKey.IMAGE, level=level, margin=margin,
foreground_threshold=foreground_threshold)
Expand Down
2 changes: 2 additions & 0 deletions Tests/ML/histopathology/test_data/test_slides_dataset.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
slide_id,image,label,meta1,meta2
foo,panda_wsi_example.tiff,0,bar,baz