Skip to content

Commit

Permalink
[autoscaler v2] add unit tests for NodeProviderConfig (#35590)
Browse files Browse the repository at this point in the history
Why are these changes needed?
add tests and fix bugs for NodeProviderConfig
  • Loading branch information
scv119 committed May 22, 2023
1 parent 8427de2 commit ea5bcb9
Show file tree
Hide file tree
Showing 5 changed files with 185 additions and 30 deletions.
4 changes: 3 additions & 1 deletion python/ray/autoscaler/aws/BUILD
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
filegroup(
name = "example",
data = glob(["example-*.yaml"]),
visibility = ["//python/ray/tests:__pkg__"],
visibility = [
"//python/ray/tests:__pkg__",
],
)

filegroup(
Expand Down
12 changes: 11 additions & 1 deletion python/ray/autoscaler/v2/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,14 @@ py_test(
srcs = ["tests/test_storage.py"],
tags = ["team:core"],
deps = ["//:ray_lib",],
)
)

py_test(
name = "test_config",
size = "small",
srcs = ["tests/test_config.py"],
tags = ["team:core"],
deps = [
"//:ray_lib",
]
)
56 changes: 28 additions & 28 deletions python/ray/autoscaler/v2/instance_manager/config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import copy
from typing import Any, Dict, List

from ray.autoscaler._private.util import hash_runtime_conf
from ray.core.generated.instance_manager_pb2 import Instance
from ray.autoscaler._private.util import hash_runtime_conf, prepare_config


class NodeProviderConfig(object):
Expand All @@ -16,19 +15,19 @@ def __init__(self, node_configs: Dict[str, Any]) -> None:
self.update_configs(node_configs)

def update_configs(self, node_configs: Dict[str, Any]) -> None:
self._node_configs = node_configs
self._node_configs = prepare_config(node_configs)
self._calculate_hashes()
self._sync_continuously = self._node_configs.get(
"generate_file_mounts_contents_hash", True
)

def _calculate_hashes(self) -> None:
self._runtime_hash, self._file_mounts_contents_hash = hash_runtime_conf(
self._node_configs["file_mounts"],
self._node_configs["cluster_synced_files"],
self._node_configs.get("file_mounts", {}),
self._node_configs.get("cluster_synced_files", []),
[
self._node_configs["worker_setup_commands"],
self._node_configs["worker_start_ray_commands"],
self._node_configs.get("worker_setup_commands", []),
self._node_configs.get("worker_start_ray_commands", []),
],
generate_file_mounts_contents_hash=self._node_configs.get(
"generate_file_mounts_contents_hash", True
Expand All @@ -43,41 +42,42 @@ def get_node_config(self, instance_type_name: str) -> Dict[str, Any]:
)

def get_docker_config(self, instance_type_name: str) -> Dict[str, Any]:
if "docker" not in self._node_configs:
return {}
docker_config = copy.deepcopy(self._node_configs.get("docker", {}))
node_specific_docker_config = self._node_configs["available_node_types"][
instance_type_name
].get("docker", {})
docker_config.update(node_specific_docker_config)
return docker_config

def get_worker_start_ray_commands(self, instance: Instance) -> List[str]:
if (
instance.num_successful_updates > 0
and not self._node_config_provider.restart_only
):
def get_worker_start_ray_commands(
self, num_successful_updates: int = 0
) -> List[str]:
if num_successful_updates > 0 and not self._node_config_provider.restart_only:
return []
return self._node_configs["worker_start_ray_commands"]
return self._node_configs.get("worker_start_ray_commands", [])

def get_worker_setup_commands(self, instance: Instance) -> List[str]:
if (
instance.num_successful_updates > 0
and self._node_config_provider.restart_only
):
return []
def get_head_setup_commands(self) -> List[str]:
return self._node_configs.get("head_setup_commands", [])

def get_head_start_ray_commands(self) -> List[str]:
return self._node_configs.get("head_start_ray_commands", [])

return self._node_configs["available_node_types"][instance.name][
"worker_setup_commands"
]
def get_worker_setup_commands(
self, instance_type_name: str, num_successful_updates: int = 0
) -> List[str]:
if num_successful_updates > 0 and self._node_config_provider.restart_only:
return []
return self.get_node_type_specific_config(
instance_type_name, "worker_setup_commands"
)

def get_node_type_specific_config(
self, instance_type_name: str, config_name: str
) -> Any:
config = self._node_config_provider.get_config(config_name)
node_specific_config = self._node_configs["available_node_types"][
instance_type_name
]
config = self.get_config(config_name)
node_specific_config = self._node_configs["available_node_types"].get(
instance_type_name, {}
)
if config_name in node_specific_config:
config = node_specific_config[config_name]
return config
Expand Down
81 changes: 81 additions & 0 deletions python/ray/autoscaler/v2/tests/test_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# coding: utf-8
import os
import sys

import pytest # noqa

from ray._private.test_utils import load_test_config
from ray.autoscaler.v2.instance_manager.config import NodeProviderConfig


def test_simple():
raw_config = load_test_config("test_multi_node.yaml")
config = NodeProviderConfig(raw_config)
assert config.get_node_config("head_node") == {"InstanceType": "m5.large"}
assert config.get_docker_config("head_node") == {
"image": "anyscale/ray-ml:latest",
"container_name": "ray_container",
"pull_before_run": True,
}
assert config.get_worker_start_ray_commands


def test_complex():
raw_config = load_test_config("test_ray_complex.yaml")
config = NodeProviderConfig(raw_config)
assert config.get_head_setup_commands() == [
"echo a",
"echo b",
"echo ${echo hi}",
"echo head",
]
assert config.get_head_start_ray_commands() == [
"ray stop",
"ray start --head --autoscaling-config=~/ray_bootstrap_config.yaml",
]
assert config.get_worker_setup_commands("worker_nodes") == [
"echo a",
"echo b",
"echo ${echo hi}",
"echo worker",
]
assert config.get_worker_start_ray_commands() == [
"ray stop",
"ray start --address=$RAY_HEAD_IP",
]
assert config.get_worker_setup_commands("worker_nodes1") == [
"echo worker1",
]

assert config.get_docker_config("head_node") == {
"image": "anyscale/ray-ml:latest",
"container_name": "ray_container",
"pull_before_run": True,
}

assert config.get_docker_config("worker_nodes") == {
"image": "anyscale/ray-ml:latest",
"container_name": "ray_container",
"pull_before_run": True,
}

assert config.get_docker_config("worker_nodes1") == {
"image": "anyscale/ray-ml:nightly",
"container_name": "ray_container",
"pull_before_run": True,
}

assert config.get_node_type_specific_config(
"worker_nodes", "initialization_commands"
) == ["echo what"]

assert config.get_node_type_specific_config(
"worker_nodes1", "initialization_commands"
) == ["echo init"]


if __name__ == "__main__":
if os.environ.get("PARALLEL_CI"):
sys.exit(pytest.main(["-n", "auto", "--boxed", "-vs", __file__]))
else:
sys.exit(pytest.main(["-sv", __file__]))
62 changes: 62 additions & 0 deletions python/ray/tests/test_cli_patterns/test_ray_complex.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
auth:
ssh_user: ubuntu
cluster_name: test-cli
docker:
image: anyscale/ray-ml:latest
container_name: ray_container
pull_before_run: true
file_mounts:
~/tests: .
available_node_types:
head_node:
resources: {}
max_workers: 0
node_config:
ImageId: latest_dlami
InstanceType: t1.micro
worker_nodes:
resources: {}
max_workers: 2
min_workers: 1
node_config:
ImageId: latest_dlami
InstanceType: t1.micro
initialization_commands:
- echo what
worker_nodes1:
resources: {}
max_workers: 2
min_workers: 1
node_config:
ImageId: latest_dlami
InstanceType: t1.micro
worker_setup_commands:
- echo worker1
docker:
image: anyscale/ray-ml:nightly
container_name: ray_container
pull_before_run: true
head_node_type: head_node
head_setup_commands:
- echo head
head_start_ray_commands:
- ray stop
- ray start --head --autoscaling-config=~/ray_bootstrap_config.yaml
idle_timeout_minutes: 5
initialization_commands:
- echo init
provider:
availability_zone: us-west-2a
key_pair:
key_name: __test-cli
region: us-west-2
type: aws
setup_commands:
- echo a
- echo b
- echo ${echo hi}
worker_setup_commands:
- echo worker
worker_start_ray_commands:
- ray stop
- ray start --address=$RAY_HEAD_IP

0 comments on commit ea5bcb9

Please sign in to comment.