From 81b1e8fdcf53521fb4039eaaef32aa798af3c913 Mon Sep 17 00:00:00 2001 From: Balaji Veeramani Date: Thu, 3 Aug 2023 19:22:35 -0500 Subject: [PATCH] [Data] Raise error if PIL can't load image (#38030) If you call read_images and PIL can't load a file, you get an unhelpful error message: > PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x19997c350> This PR updates the error message to include the path to the file --- python/ray/data/datasource/image_datasource.py | 8 ++++++-- python/ray/data/tests/test_image.py | 6 ++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/python/ray/data/datasource/image_datasource.py b/python/ray/data/datasource/image_datasource.py index 3470944ed1068..eab349fb2ae16 100644 --- a/python/ray/data/datasource/image_datasource.py +++ b/python/ray/data/datasource/image_datasource.py @@ -76,13 +76,17 @@ def _read_file( include_paths: bool, **reader_args, ) -> "pyarrow.Table": - from PIL import Image + from PIL import Image, UnidentifiedImageError records = super()._read_file(f, path, include_paths=True, **reader_args) assert len(records) == 1 path, data = records[0] - image = Image.open(io.BytesIO(data)) + try: + image = Image.open(io.BytesIO(data)) + except UnidentifiedImageError as e: + raise ValueError(f"PIL couldn't load image file at path '{path}'.") from e + if size is not None: height, width = size image = image.resize((width, height)) diff --git a/python/ray/data/tests/test_image.py b/python/ray/data/tests/test_image.py index e067304f79b78..36ded32008071 100644 --- a/python/ray/data/tests/test_image.py +++ b/python/ray/data/tests/test_image.py @@ -1,4 +1,5 @@ import os +import tempfile from typing import Dict from unittest.mock import ANY, patch @@ -256,6 +257,11 @@ def test_args_passthrough(ray_start_regular_shared): mock.assert_called_once_with(ANY, **kwargs) assert isinstance(mock.call_args[0][0], ImageDatasource) + def test_unidentified_image_error(ray_start_regular_shared): + with tempfile.NamedTemporaryFile(suffix=".png") as file: + with pytest.raises(ValueError): + ray.data.read_images(paths=file.name).materialize() + if __name__ == "__main__": import sys