Skip to content

Commit

Permalink
[Data] Remove dead code from BinaryDatasource (ray-project#38234)
Browse files Browse the repository at this point in the history
BinaryDatasource has a separate code path if output_arrow_format is False, but this variable is always True, so the code path is dead. This PR cleans it up.

Signed-off-by: Balaji Veeramani <[email protected]>
  • Loading branch information
bveeramani committed Aug 9, 2023
1 parent 9c193f4 commit 8a21135
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 26 deletions.
19 changes: 6 additions & 13 deletions python/ray/data/datasource/binary_datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,13 @@ def _read_file(self, f: "pyarrow.NativeFile", path: str, **reader_args):
else:
data = f.readall()

output_arrow_format = reader_args.pop("output_arrow_format", False)
if output_arrow_format:
builder = ArrowBlockBuilder()
if include_paths:
item = {self._COLUMN_NAME: data, "path": path}
else:
item = {self._COLUMN_NAME: data}
builder.add(item)
return builder.build()
builder = ArrowBlockBuilder()
if include_paths:
item = {self._COLUMN_NAME: data, "path": path}
else:
if include_paths:
return [(path, data)]
else:
return [data]
item = {self._COLUMN_NAME: data}
builder.add(item)
return builder.build()

def _rows_per_file(self):
return 1
10 changes: 5 additions & 5 deletions python/ray/data/datasource/image_datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from ray.data._internal.delegating_block_builder import DelegatingBlockBuilder
from ray.data._internal.util import _check_import
from ray.data.block import BlockMetadata
from ray.data.datasource.binary_datasource import BinaryDatasource
from ray.data.datasource.datasource import Reader
from ray.data.datasource.file_based_datasource import (
FileBasedDatasource,
Expand Down Expand Up @@ -39,7 +38,7 @@


@DeveloperAPI
class ImageDatasource(BinaryDatasource):
class ImageDatasource(FileBasedDatasource):
"""A datasource that lets you read images."""

_WRITE_FILE_PER_ROW = True
Expand Down Expand Up @@ -79,9 +78,7 @@ def _read_file(
) -> "pyarrow.Table":
from PIL import Image, UnidentifiedImageError

records = super()._read_file(f, path, include_paths=True, **reader_args)
assert len(records) == 1
path, data = records[0]
data = f.readall()

try:
image = Image.open(io.BytesIO(data))
Expand All @@ -105,6 +102,9 @@ def _read_file(

return block

def _rows_per_file(self):
return 1

def _write_row(
self,
f: "pyarrow.NativeFile",
Expand Down
11 changes: 3 additions & 8 deletions python/ray/data/datasource/text_datasource.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,23 @@
from typing import TYPE_CHECKING, List

from ray.data._internal.delegating_block_builder import DelegatingBlockBuilder
from ray.data.datasource.binary_datasource import BinaryDatasource
from ray.data.datasource.file_based_datasource import FileBasedDatasource
from ray.util.annotations import PublicAPI

if TYPE_CHECKING:
import pyarrow


@PublicAPI
class TextDatasource(BinaryDatasource):
class TextDatasource(FileBasedDatasource):
"""Text datasource, for reading and writing text files."""

_COLUMN_NAME = "text"

def _read_file(
self, f: "pyarrow.NativeFile", path: str, **reader_args
) -> List[str]:
block = super()._read_file(f, path, **reader_args)
assert len(block) == 1
data = block[0]
data = f.readall()

builder = DelegatingBlockBuilder()

Expand All @@ -33,6 +31,3 @@ def _read_file(

block = builder.build()
return block

def _rows_per_file(self):
return None

0 comments on commit 8a21135

Please sign in to comment.