From 4ab0ba0823a9d113a6a0ceb10d7ddd56596e9c1a Mon Sep 17 00:00:00 2001 From: Cheng Su Date: Mon, 16 Oct 2023 08:21:53 -0700 Subject: [PATCH] [Data] Remove FileMetadataShuffler (#40341) As a followup of https://github.com/ray-project/ray/pull/40154#issuecomment-1760168047, remove the `FileMetadataShuffler` and the config setting in `DataContext` now. They are not used any more. Signed-off-by: Cheng Su --- python/ray/data/_default_config.py | 4 --- python/ray/data/context.py | 4 --- .../data/datasource/file_metadata_shuffler.py | 33 ------------------- 3 files changed, 41 deletions(-) delete mode 100644 python/ray/data/_default_config.py delete mode 100644 python/ray/data/datasource/file_metadata_shuffler.py diff --git a/python/ray/data/_default_config.py b/python/ray/data/_default_config.py deleted file mode 100644 index d8add6c7ebf24..0000000000000 --- a/python/ray/data/_default_config.py +++ /dev/null @@ -1,4 +0,0 @@ -# Default file metadata shuffler class to use. -DEFAULT_FILE_METADATA_SHUFFLER = ( - "ray.data.datasource.file_metadata_shuffler.SequentialFileMetadataShuffler" -) diff --git a/python/ray/data/context.py b/python/ray/data/context.py index a591397374183..06741a93ba0cb 100644 --- a/python/ray/data/context.py +++ b/python/ray/data/context.py @@ -4,7 +4,6 @@ import ray from ray._private.ray_constants import env_integer -from ray.data._default_config import DEFAULT_FILE_METADATA_SHUFFLER from ray.util.annotations import DeveloperAPI from ray.util.scheduling_strategies import SchedulingStrategyT @@ -171,7 +170,6 @@ def __init__( execution_options: "ExecutionOptions", use_ray_tqdm: bool, enable_progress_bars: bool, - file_metadata_shuffler: str, enable_get_object_locations_for_metrics: bool, ): """Private constructor (use get_current() instead).""" @@ -205,7 +203,6 @@ def __init__( self.execution_options = execution_options self.use_ray_tqdm = use_ray_tqdm self.enable_progress_bars = enable_progress_bars - self.file_metadata_shuffler = file_metadata_shuffler self.enable_get_object_locations_for_metrics = ( enable_get_object_locations_for_metrics ) @@ -257,7 +254,6 @@ def get_current() -> "DataContext": execution_options=ray.data.ExecutionOptions(), use_ray_tqdm=DEFAULT_USE_RAY_TQDM, enable_progress_bars=DEFAULT_ENABLE_PROGRESS_BARS, - file_metadata_shuffler=DEFAULT_FILE_METADATA_SHUFFLER, enable_get_object_locations_for_metrics=DEFAULT_ENABLE_GET_OBJECT_LOCATIONS_FOR_METRICS, # noqa E501 ) diff --git a/python/ray/data/datasource/file_metadata_shuffler.py b/python/ray/data/datasource/file_metadata_shuffler.py deleted file mode 100644 index 35a687842b852..0000000000000 --- a/python/ray/data/datasource/file_metadata_shuffler.py +++ /dev/null @@ -1,33 +0,0 @@ -import sys -from typing import Any, List, Union - -import numpy as np - -if sys.version_info >= (3, 8): - from typing import Literal -else: - from typing_extensions import Literal - - -class FileMetadataShuffler: - """Random shuffle file metadata when the `shuffle` parameter enables it. - Otherwise returns file metadata in its original order. - """ - - def __init__(self, shuffle: Union[Literal["files"], None]): - self._is_shuffle_enabled = False - if shuffle == "files": - self._is_shuffle_enabled = True - self._generator = np.random.default_rng() - - def shuffle_files( - self, - files_metadata: List[Any], - ) -> List[Any]: - if self._is_shuffle_enabled: - return [ - files_metadata[i] - for i in self._generator.permutation(len(files_metadata)) - ] - else: - return files_metadata