Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow SlideData to use existing h5path files #337

Open
wants to merge 22 commits into
base: load-data-in-workers
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add test to exercise pipeline on h5path
  • Loading branch information
tddough98 committed Oct 20, 2022
commit ba9cfe02175d04e471c4f5d5c2e6d08333895eb7
6 changes: 4 additions & 2 deletions pathml/core/slide_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def __init__(

self.masks = pathml.core.Masks(h5manager=self.h5manager, masks=masks)
self.tiles = pathml.core.Tiles(h5manager=self.h5manager, tiles=tiles)
self._add_tiles = tiles is None
self._add_tiles = tiles is None and not _load_from_h5path

self.tile_size = tile_size
self._tile_stride = tile_stride
Expand Down Expand Up @@ -298,7 +298,9 @@ def run(
assert isinstance(
pipeline, pathml.preprocessing.pipeline.Pipeline
), f"pipeline is of type {type(pipeline)} but must be of type pathml.preprocessing.pipeline.Pipeline"
assert self.slide is not None, "cannot run pipeline because self.slide is None"
assert (
self.slide is not None or not self._add_tiles
), "cannot run pipeline because self.slide is None and no tiles already exist"

shutdown_after = False

Expand Down
2 changes: 2 additions & 0 deletions pathml/preprocessing/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -999,8 +999,10 @@ def apply(self, tile):

class DropTileException(Exception):
"""Stops the SlideData from adding a tile to the h5path."""

pass


class TissueDetectionHE(Transform):
"""
Detect tissue regions from H&E stained slide.
Expand Down
22 changes: 22 additions & 0 deletions tests/integration_tests/test_pipeline_running.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,25 @@ def test_pipeline_overlapping_tiles(tmp_path, stride, pad, tile_size):
)
expected = AddMean().F(im).astype(np.float16)
np.testing.assert_equal(readslidedata.tiles[(1000, 1000)].image, expected)


@pytest.mark.parametrize("dist", [False, True])
def test_pipeline_on_h5path(tmp_path, dist, cluster):
save_path = str(tmp_path) + str(np.round(np.random.rand(), 8)) + "HE_slide.h5"
# Make h5path
slide = HESlide("tests/testdata/small_HE.svs")
pipeline = Pipeline([BoxBlur(kernel_size=15)])
cli = Client(cluster) if dist else None
slide.run(pipeline, distributed=dist, client=cli)
slide.write(path=save_path)
# Load saved h5path and run pipeline
h5path_slide = HESlide(save_path, dtype=np.uint8)
h5path_slide.run(pipeline, distributed=dist, client=cli)
h5path_slide.write(path=save_path)

if dist:
cli.shutdown()

# test out the dataset
dataset = TileDataset(save_path)
assert len(dataset) == len(slide.tiles)