From 83d9d72371322b99397449ec67025f84e62fd908 Mon Sep 17 00:00:00 2001 From: Balaji Veeramani Date: Mon, 21 Aug 2023 23:42:46 -0700 Subject: [PATCH 1/2] Update files Manually set parallelism Signed-off-by: Balaji Veeramani --- doc/source/ray-air/doc_code/computer_vision.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/source/ray-air/doc_code/computer_vision.py b/doc/source/ray-air/doc_code/computer_vision.py index 405aa28d1bfff..4b6e8cdcddbd0 100644 --- a/doc/source/ray-air/doc_code/computer_vision.py +++ b/doc/source/ray-air/doc_code/computer_vision.py @@ -97,7 +97,10 @@ def read_images(): dataset = ray.data.read_images(root, partitioning=partitioning) # __read_images1_stop__ - dataset = dataset.limit(32) + # The autodetected parallelism is low. As a result, blocks are large and we + # unnecessarily read more than a thousand images (even though we limit the dataset + # to 32 rows!) To avoid this issue, we manually set the parallelism. + dataset = ray.data.read_images(root, partitioning=partitioning, parallelism=1875) # __read_images2_start__ from typing import Dict From 59fa22b004af7dc673464f7a24029cc0b4afa0bb Mon Sep 17 00:00:00 2001 From: Balaji Veeramani Date: Mon, 21 Aug 2023 23:46:31 -0700 Subject: [PATCH 2/2] Update files Update BUILD Signed-off-by: Balaji Veeramani --- doc/BUILD | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/doc/BUILD b/doc/BUILD index e5085c39b427d..673833fc942e9 100644 --- a/doc/BUILD +++ b/doc/BUILD @@ -200,22 +200,11 @@ py_test_run_all_subdirectory( py_test_run_all_subdirectory( size = "large", include = ["source/ray-air/doc_code/*.py"], - exclude = [ - # Too large. Use a custom test below. - "source/ray-air/doc_code/computer_vision.py", - ], + exclude = [], extra_srcs = [], tags = ["exclusive", "team:ml"], ) -py_test( - size = "enormous", - name = "computer_vision_test", - main = "source/ray-air/doc_code/computer_vision.py", - srcs = ["source/ray-air/doc_code/computer_vision.py"], - tags = ["exclusive", "team:ml"], -) - # -------------------------------------------------------------------- # Test all doc/source/train/doc_code code included in rst/md files. # --------------------------------------------------------------------