Skip to content

Commit

Permalink
[Data] Deprecate num_blocks parameter from Dataset.random_shuffle (
Browse files Browse the repository at this point in the history
…#41111)

This PR is to deprecate `num_blocks` parameter from `random_shuffle()` This is to avoid surfacing block concept from our API.

Signed-off-by: Cheng Su <[email protected]>
  • Loading branch information
c21 committed Nov 14, 2023
1 parent b441fdd commit ce55056
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,11 @@ def __init__(
input_op: LogicalOperator,
name: str = "RandomShuffle",
seed: Optional[int] = None,
num_outputs: Optional[int] = None,
ray_remote_args: Optional[Dict[str, Any]] = None,
):
super().__init__(
name,
input_op,
num_outputs=num_outputs,
sub_progress_bar_names=[
ExchangeTaskSpec.MAP_SUB_PROGRESS_BAR_NAME,
ExchangeTaskSpec.REDUCE_SUB_PROGRESS_BAR_NAME,
Expand Down
11 changes: 8 additions & 3 deletions python/ray/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1068,13 +1068,19 @@ def random_shuffle(
Args:
seed: Fix the random seed to use, otherwise one is chosen
based on system randomness.
num_blocks: The number of output blocks after the shuffle, or ``None``
to retain the number of blocks.
Returns:
The shuffled :class:`Dataset`.
""" # noqa: E501

if num_blocks is not None:
warnings.warn(
"`num_blocks` parameter is deprecated in Ray 2.9. random_shuffle() "
"does not support to change the number of output blocks. Use "
"repartition() instead.", # noqa: E501
DeprecationWarning,
)

plan = self._plan.with_stage(
RandomShuffleStage(seed, num_blocks, ray_remote_args)
)
Expand All @@ -1084,7 +1090,6 @@ def random_shuffle(
op = RandomShuffle(
logical_plan.dag,
seed=seed,
num_outputs=num_blocks,
ray_remote_args=ray_remote_args,
)
logical_plan = LogicalPlan(op)
Expand Down

0 comments on commit ce55056

Please sign in to comment.