Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sprint assorted #87

Merged
merged 8 commits into from
Feb 4, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
update tiles
  • Loading branch information
ryanccarelli committed Feb 1, 2021
commit f8ffe1a2436fe7e91d41c3746d164cd99f732615
30 changes: 24 additions & 6 deletions pathml/core/h5managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,23 @@

import numpy as np

class h5_manager:
"""
Abstract class for h5 data management
"""
def __init__(self):
path = tempfile.TemporaryFile()
f = h5py.File(path, 'w')
self.h5 = f
self.h5path = path
self.shape = None

def add(self, key, val):
raise NotImplementedError

def


class _tiles_h5_manager:
"""
Interface between tiles object and data management on disk by h5py.
Expand Down Expand Up @@ -52,6 +69,9 @@ def add(self, key, tile):
)

def slice(self, slices):
'''
shouldn't slice be rewritten as reshape?
'''
"""
Generator to slice all tiles in self.h5 extending numpy array slicing

Expand All @@ -63,14 +83,12 @@ def slice(self, slices):
key(str): tile coordinates
val(`~pathml.core.tile.Tile`): tile
"""
if not isinstance(slices,list[slice]):
raise KeyError(f"slices must of of type list[slice] but is {type(slices)} with elements {type(slices[0])}")
for key, val in self.h5.items():
val = val[slices:...]
yield key, val
for key in self.h5.keys():
name, tile, maskdict, labels = self.get(key)
yield name, tile, maskdict, labels

def get(self, item):
if isinstance(item, tuple):
if isinstance(item, (str, tuple)):
if str(item) not in self.h5.keys():
raise KeyError(f'key {item} does not exist')
tile = self.h5[str(item)]['tile'][:]
Expand Down
9 changes: 5 additions & 4 deletions pathml/core/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ class Tile:
slidetype (str): type of image (e.g. "HE"). Defaults to None.
labels: labels for the tile
"""
def __init__(self, image, coords, slidetype=None, masks=None, labels=None):
# check inputs
def __init__(self, image, name=None, coords=None, slidetype=None, masks=None, labels=None):
assert isinstance(image, np.ndarray), f"image of type {type(image)} must be a np.ndarray"
assert isinstance(masks, (type(None), Masks, dict)), \
f"masks is of type {type(masks)} but must be of type pathml.core.masks.Masks or dict"
assert isinstance(coords, tuple) and len(coords) == 2, "coords must be a tuple of (i, j)"
assert (isinstance(coords, tuple) and len(coords) == 2) or isinstance(coords, None), "coords must be a tuple of (i, j)"
assert isinstance(labels, (type(None), dict))
assert isinstance(name, (str, type(None))), f"name is of type {type(name)} but must be of type str or None"

if isinstance(masks, Masks):
self.masks = masks
Expand All @@ -35,6 +35,7 @@ def __init__(self, image, coords, slidetype=None, masks=None, labels=None):
self.masks = Masks(masks)
elif masks is None:
self.masks = masks
self.name = name
self.image = image
self.coords = coords
self.masks = masks
Expand All @@ -43,4 +44,4 @@ def __init__(self, image, coords, slidetype=None, masks=None, labels=None):

def __repr__(self):
return f"Tile(image shape {self.image.shape}, slidetype={self.slidetype}, " \
f"mask={repr(self.masks)}, coords={self.coords}, labels={list(self.labels.keys())})"
f"mask={repr(self.masks)}, coords={self.coords}, labels={None if self.labels is None else list(self.labels.keys())})"
41 changes: 22 additions & 19 deletions pathml/core/tiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,38 +14,32 @@


class Tiles:
# TODO:
# 1. do we want to be able to tiles[tile].masks, tiles[tile].masks.add(), tiles[tile].masks.remove()
# at the moment we getitem the whole tile object, modify it, add it back
# 2. connected to ^ we keep a copy of masks in the reference to the tile (redundant)
# both of these problems are connected to the question whether h5 should be one file (we could hold
# reference to tile.masks giving us (1) but this gives us 2+ objects)
# 3. label type
"""
Object holding tiles.
Object wrapping a dict of tiles.

Args:
tiles (Union[dict[tuple[int], `~pathml.core.tiles.Tile`], list]): tile objects
"""
def __init__(self, tiles=None):
if tiles:
if not isinstance(tiles, dict) or (isinstance(tiles, list) and all([isinstance(t, Tile) for t in tiles])):
if not (isinstance(tiles, dict) or (isinstance(tiles, list) and all([isinstance(t, Tile) for t in tiles]))):
raise ValueError(f"tiles must be passed as dicts of the form coordinate1:Tile1,... "
f"or lists of Tile objects containing i,j")
if isinstance(tiles, dict):
for val in tiles.values():
if not isinstance(val, Tile):
raise ValueError(f"dict vals must be Tile")
for key in tiles.values():
if not isinstance(key, tuple) and all([isinstance(c, int) for c in key]):
raise ValueError(f"dict keys must be tuple[int]")
for key in tiles.keys():
if not (isinstance(key, tuple) and list(map(type, key)) == [int, int]) or isinstance(key, str):
ryanccarelli marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(f"dict keys must be of type str or tuple[int]")
self._tiles = OrderedDict(tiles)
else:
tiledictionary = {}
for tile in tiles:
if not isinstance(tile, Tile):
raise ValueError(f"Tiles expects a list of type Tile but was given {type(tile)}")
tiledictionary[(tile.i, tile.j)] = tiles[tile]
name = tile.name if tile.name is not None else str(tile.coords)
tiledictionary[name] = tiles[tile]
self._tiles = OrderedDict(tiledictionary)
else:
self._tiles = OrderedDict()
Expand All @@ -64,8 +58,9 @@ def __len__(self):
def __getitem__(self, item):
name, tile, maskdict, labels = self.h5manager.get(item)
if isinstance(item, tuple):
return Tile(tile, masks=Masks(maskdict), labels=labels, coords = (item[0], item[1]))
return Tile(tile, masks=Masks(maskdict), labels=labels)
return Tile(tile, masks=Masks(maskdict), labels=labels, coords = (item[0], item[1]), name=name)
# TODO: better handle coords
return Tile(tile, masks=Masks(maskdict), labels=labels, name=name)

def add(self, coordinates, tile):
"""
Expand All @@ -78,17 +73,25 @@ def add(self, coordinates, tile):
if not isinstance(tile, Tile):
raise ValueError(f"can not add {type(tile)}, tile must be of type pathml.core.tiles.Tile")
self.h5manager.add(coordinates, tile)
del tile

def slice(self, coordinates):
def slice(self, slices):
"""
Slice all tiles in self.h5manager extending numpy array slicing

Args:
coordinates(tuple[int]): coordinates denoting slice i.e. 'selection' https://numpy.org/doc/stable/reference/arrays.indexing.html
slices: list where each element is an object of type slice indicating
how the dimension should be sliced
"""
if not isinstance(slices,list[slice]):
raise KeyError(f"slices must of of type list[slice] but is {type(slices)} with elements {type(slices[0])}")
sliced = Tiles()
for key, val in self.h5manager.slice(coordinates):
sliced.add(key, val)
for name, tile, maskdict, labels in self.h5manager.slice(slices):
# rebuild as tile
tile = Tile(name, image=tile, masks=Masks(maskdict), labels=labels)
tile.image = tile.image(slices)
tile.masks
sliced.add(name, tile)
return sliced

def remove(self, key):
Expand Down
33 changes: 26 additions & 7 deletions tests/core_tests/test_tiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def tile_nomasks(shape=(224, 224, 3), i=1, j=3):
testtile = Tile(np.random.randn(*shape), coords = (i, j))
return testtile


@pytest.fixture
def tile_withmasks(shape=(224, 224, 3), coords=(1, 3), stack=50, labeltype=str):
if labeltype == str:
letters = string.ascii_letters + string.digits
Expand All @@ -36,7 +36,7 @@ def test_init_incorrect_input(incorrect_input):
tiles = Tiles(incorrect_input)


def test_init():
def test_init(tile_withmasks):
tilelist = [tile_withmasks(coords = (k, k)) for k in range(20)]
tiledict = {(k, k): tile_withmasks(coords = (k, k)) for k in range(20)}
tiles = Tiles(tilelist)
Expand All @@ -58,15 +58,17 @@ def test_add_get_nomasks(emptytiles, tile_nomasks):
tiles = emptytiles
tile = tile_nomasks
tiles.add((1, 3), tile)
assert tiles[(1, 3)] == tile
assert tiles[0] == tile
assert (tiles[(1, 3)].image == tile.image).all()
assert (tiles[0].image == tile.image).all()


def test_add_get_withmasks(emptytiles, tile_withmasks):
tiles = emptytiles
testmask = tile_withmasks.masks[0]
emptytiles.add((1, 3), tile_withmasks)
assert emptytiles[(1, 3)].masks[0] == testmask
assert emptytiles[0].masks[0] == testmask
tile = tile_withmasks
tiles.add((1, 3), tile)
assert (tiles[(1, 3)].masks[0] == testmask).all()
assert (tiles[0].masks[0] == testmask).all()


@pytest.mark.parametrize("incorrect_input", ["string", None, True, 5, [5, 4, 3], {"dict": "testing"}])
Expand All @@ -85,3 +87,20 @@ def test_remove_nomasks(emptytiles, tile_nomasks):
tiles.remove((1, 3))
with pytest.raises(Exception):
triggerexception = tiles['(1, 3)']

def test_slice_nomasks(emptytiles, tile_nomasks):
# slice one tile
tiles = emptytiles
tile = tile_nomasks
tiles.add((1,3), tile)
slices = [slice(2,5)]
for s in tiles.slice(slices):
print(s)

def test_slice_withmasks(emptytiles, tile_withmasks):
tiles = emptytiles
tile = tile_withmasks
tiles.add((1,3), tile)
slices = [slice(2,5)]
for s in tiles.slice(slices):
print(s)