diff --git a/python/ray/data/datasource/binary_datasource.py b/python/ray/data/datasource/binary_datasource.py index 760b01d108d3d..882d28d307aba 100644 --- a/python/ray/data/datasource/binary_datasource.py +++ b/python/ray/data/datasource/binary_datasource.py @@ -43,20 +43,13 @@ def _read_file(self, f: "pyarrow.NativeFile", path: str, **reader_args): else: data = f.readall() - output_arrow_format = reader_args.pop("output_arrow_format", False) - if output_arrow_format: - builder = ArrowBlockBuilder() - if include_paths: - item = {self._COLUMN_NAME: data, "path": path} - else: - item = {self._COLUMN_NAME: data} - builder.add(item) - return builder.build() + builder = ArrowBlockBuilder() + if include_paths: + item = {self._COLUMN_NAME: data, "path": path} else: - if include_paths: - return [(path, data)] - else: - return [data] + item = {self._COLUMN_NAME: data} + builder.add(item) + return builder.build() def _rows_per_file(self): return 1 diff --git a/python/ray/data/datasource/image_datasource.py b/python/ray/data/datasource/image_datasource.py index f9d4a838bc65a..5b96b4a849804 100644 --- a/python/ray/data/datasource/image_datasource.py +++ b/python/ray/data/datasource/image_datasource.py @@ -8,7 +8,6 @@ from ray.data._internal.delegating_block_builder import DelegatingBlockBuilder from ray.data._internal.util import _check_import from ray.data.block import BlockMetadata -from ray.data.datasource.binary_datasource import BinaryDatasource from ray.data.datasource.datasource import Reader from ray.data.datasource.file_based_datasource import ( FileBasedDatasource, @@ -39,7 +38,7 @@ @DeveloperAPI -class ImageDatasource(BinaryDatasource): +class ImageDatasource(FileBasedDatasource): """A datasource that lets you read images.""" _WRITE_FILE_PER_ROW = True @@ -79,9 +78,7 @@ def _read_file( ) -> "pyarrow.Table": from PIL import Image, UnidentifiedImageError - records = super()._read_file(f, path, include_paths=True, **reader_args) - assert len(records) == 1 - path, data = records[0] + data = f.readall() try: image = Image.open(io.BytesIO(data)) @@ -105,6 +102,9 @@ def _read_file( return block + def _rows_per_file(self): + return 1 + def _write_row( self, f: "pyarrow.NativeFile", diff --git a/python/ray/data/datasource/text_datasource.py b/python/ray/data/datasource/text_datasource.py index 068913a0fd894..1f963463fd492 100644 --- a/python/ray/data/datasource/text_datasource.py +++ b/python/ray/data/datasource/text_datasource.py @@ -1,7 +1,7 @@ from typing import TYPE_CHECKING, List from ray.data._internal.delegating_block_builder import DelegatingBlockBuilder -from ray.data.datasource.binary_datasource import BinaryDatasource +from ray.data.datasource.file_based_datasource import FileBasedDatasource from ray.util.annotations import PublicAPI if TYPE_CHECKING: @@ -9,7 +9,7 @@ @PublicAPI -class TextDatasource(BinaryDatasource): +class TextDatasource(FileBasedDatasource): """Text datasource, for reading and writing text files.""" _COLUMN_NAME = "text" @@ -17,9 +17,7 @@ class TextDatasource(BinaryDatasource): def _read_file( self, f: "pyarrow.NativeFile", path: str, **reader_args ) -> List[str]: - block = super()._read_file(f, path, **reader_args) - assert len(block) == 1 - data = block[0] + data = f.readall() builder = DelegatingBlockBuilder() @@ -33,6 +31,3 @@ def _read_file( block = builder.build() return block - - def _rows_per_file(self): - return None