diff --git a/python/ray/train/tests/test_base_trainer.py b/python/ray/train/tests/test_base_trainer.py index 93276e26d9f94..8beb6ab289a5c 100644 --- a/python/ray/train/tests/test_base_trainer.py +++ b/python/ray/train/tests/test_base_trainer.py @@ -183,9 +183,13 @@ def train_loop(self): ) tune.run(trainer.as_trainable(), num_samples=4) - # TODO(ekl/sang) this currently fails. - # Check we don't deadlock with too low of a fraction either. - scale_config = ScalingConfig(num_workers=1, _max_cpu_fraction_per_node=0.01) + # Needs to request 0 CPU for the trainer otherwise the pg + # will require {CPU: 1} * 2 resources, which means + # _max_cpu_fraction_per_node == 0.01 cannot schedule it + # (because this only allows to have 1 CPU for pg per node). + scale_config = ScalingConfig( + num_workers=1, _max_cpu_fraction_per_node=0.01, trainer_resources={"CPU": 0} + ) trainer = DummyTrainer( train_loop, scaling_config=scale_config,