Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parameterize dtype for h5path with SlideData constructor #335

Open
wants to merge 8 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Use dtype from SlideData constructor
  • Loading branch information
tddough98 committed Oct 4, 2022
commit 6e6f8027c76608c227e0a14dfaa87bf330b6b796
9 changes: 6 additions & 3 deletions pathml/core/h5managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
import pathml.core
import pathml.core.masks
import pathml.core.tile
from pathml.core.utils import readcounts
from pathml.core.utils import readcounts, get_tiles_dtype


class h5pathManager:
"""
Interface between slidedata object and data management on disk by h5py.
Interface between SlideData object and data management on disk by h5py.
"""

def __init__(self, h5path=None, slidedata=None):
Expand Down Expand Up @@ -50,9 +50,12 @@ def __init__(self, h5path=None, slidedata=None):
self.counts.filename = (
str(self.countspath.name) + "/tmpfile.h5ad"
)
# Default to float16 if there are no tiles
self.dtype = get_tiles_dtype(h5path) or np.dtype('float16')

else:
assert slidedata, f"must pass slidedata object to create h5path"
self.dtype = slidedata.dtype
# initialize h5path file hierarchy
# fields
fieldsgroup = self.h5.create_group("fields")
Expand Down Expand Up @@ -137,7 +140,7 @@ def add_tile(self, tile):
compression="gzip",
compression_opts=5,
shuffle=True,
dtype="float16",
dtype=self.dtype,
)

# save tile_shape as an attribute to enforce consistency
Expand Down
7 changes: 7 additions & 0 deletions pathml/core/slide_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import matplotlib.pyplot as plt
import numpy as np
import pathml.core
from pathml.core.utils import get_tiles_dtype
import pathml.preprocessing.pipeline
from pathml.core.slide_types import SlideType

Expand Down Expand Up @@ -73,6 +74,7 @@ class SlideData:
time_series (bool, optional): Flag indicating whether the image is a time series.
Defaults to ``None``. Ignored if ``slide_type`` is specified.
counts (anndata.AnnData): object containing counts matrix associated with image quantification
dtype (np.dtype): datatype for image storage
"""

def __init__(
Expand Down Expand Up @@ -176,11 +178,15 @@ def __init__(
self.name = name
self.labels = labels
self.slide_type = slide_type
self.dtype = np.dtype('float16') if dtype is None else np.dtype(dtype)

if _load_from_h5path:
# populate the SlideData object from existing h5path file
with h5py.File(filepath, "r") as f:
self.h5manager = pathml.core.h5managers.h5pathManager(h5path=f)
self.dtype = get_tiles_dtype(f)
if dtype != self.dtype and dtype is not None:
logger.info(f"using dtype {self.dtype} from h5path instead of {dtype}")
self.name = self.h5manager.h5["fields"].attrs["name"]
self.labels = {
key: val
Expand Down Expand Up @@ -211,6 +217,7 @@ def __repr__(self):
if self.backend:
out.append(f"backend={repr(self.backend)}")
out.append(f"image shape: {self.shape}")
out.append(f"image dtype: {self.dtype}")
try:
nlevels = self.slide.level_count
except:
Expand Down
21 changes: 19 additions & 2 deletions pathml/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
import anndata
import h5py
import numpy as np
import pathml.core.slide_backends
import pathml.core.slide_data


# TODO: Fletcher32 checksum?
Expand Down Expand Up @@ -115,3 +113,22 @@ def readcounts(h5):
for ds in h5.keys():
h5.copy(ds, f)
return anndata.read_h5ad(path.name)

def get_tiles_dtype(h5):
"""
Returns the dtype of tile images in h5path file.

Returns:
np.dtype or None if no tiles in file
"""
# Check that all tiles have the same dtype as the first tile
dtype = None
for tile in h5["tiles"]:
tile_dtype = h5["tiles"][tile]['array'].dtype
if dtype is None:
dtype = tile_dtype
assert (
dtype == tile_dtype,
f"all tiles must have the same dtype. Tile {tile} has dtype {tile_dtype} instead of {dtype}"
)
return dtype
13 changes: 11 additions & 2 deletions tests/core_tests/test_h5managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,25 @@ def test_h5manager2(tileHE):


def test_tile_dtype_HE(tileHE):
"""make sure that retrieved tiles and corresponding masks are float16"""
"""Test that tiles have default float16 dtype and masks are bool"""
slidedata = HESlide("tests/testdata/small_HE.svs")
slidedata.tiles.add(tileHE)
tile_retrieved = slidedata.tiles[tileHE.coords]
assert tile_retrieved.image.dtype == np.float16
assert tile_retrieved.masks["testmask"].dtype == bool

def test_tile_dtype_HE_uint8(tileHE):
"""Test that tiles have modified uint8 dtype and masks are bool"""
slidedata = HESlide("tests/testdata/small_HE.svs", dtype=np.dtype('uint8'))
slidedata.tiles.add(tileHE)
tile_retrieved = slidedata.tiles[tileHE.coords]
import pdb; pdb.set_trace()
assert tile_retrieved.image.dtype == np.uint8
assert tile_retrieved.masks["testmask"].dtype == bool


def test_tile_dtype_IF(tileVectra, vectra_slide):
"""make sure that retrieved tiles and corresponding masks are float16"""
"""Test that tiles have float16 dtype and masks have bool dtype."""
vectra_slide.tiles.add(tileVectra)
tile_retrieved = vectra_slide.tiles[tileVectra.coords]
assert tile_retrieved.image.dtype == np.float16
Expand Down