From 8b9d6acd9d93a0414843da874874833800299dd0 Mon Sep 17 00:00:00 2001 From: SangBin Cho Date: Sun, 14 Aug 2022 01:19:02 -0700 Subject: [PATCH 1/2] Fix a test bug --- python/ray/train/tests/test_base_trainer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/ray/train/tests/test_base_trainer.py b/python/ray/train/tests/test_base_trainer.py index 93276e26d9f94..dd744f5246286 100644 --- a/python/ray/train/tests/test_base_trainer.py +++ b/python/ray/train/tests/test_base_trainer.py @@ -183,9 +183,11 @@ def train_loop(self): ) tune.run(trainer.as_trainable(), num_samples=4) - # TODO(ekl/sang) this currently fails. - # Check we don't deadlock with too low of a fraction either. - scale_config = ScalingConfig(num_workers=1, _max_cpu_fraction_per_node=0.01) + # Needs to request 0 CPU for the trainer otherwise the pg + # will require {CPU: 1} * 2 resources, which means + # _max_cpu_fraction_per_node == 0.01 cannot schedule it + # (because this only allows to have 1 CPU for pg per node). + scale_config = ScalingConfig(num_workers=1, _max_cpu_fraction_per_node=0.01, trainer_resources={"CPU": 0}) trainer = DummyTrainer( train_loop, scaling_config=scale_config, From 134dc489fccd9db3c11a5d51b379ca93a32a65fb Mon Sep 17 00:00:00 2001 From: SangBin Cho Date: Sun, 14 Aug 2022 01:19:35 -0700 Subject: [PATCH 2/2] lint --- python/ray/train/tests/test_base_trainer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/ray/train/tests/test_base_trainer.py b/python/ray/train/tests/test_base_trainer.py index dd744f5246286..8beb6ab289a5c 100644 --- a/python/ray/train/tests/test_base_trainer.py +++ b/python/ray/train/tests/test_base_trainer.py @@ -184,10 +184,12 @@ def train_loop(self): tune.run(trainer.as_trainable(), num_samples=4) # Needs to request 0 CPU for the trainer otherwise the pg - # will require {CPU: 1} * 2 resources, which means + # will require {CPU: 1} * 2 resources, which means # _max_cpu_fraction_per_node == 0.01 cannot schedule it # (because this only allows to have 1 CPU for pg per node). - scale_config = ScalingConfig(num_workers=1, _max_cpu_fraction_per_node=0.01, trainer_resources={"CPU": 0}) + scale_config = ScalingConfig( + num_workers=1, _max_cpu_fraction_per_node=0.01, trainer_resources={"CPU": 0} + ) trainer = DummyTrainer( train_loop, scaling_config=scale_config,