diff --git a/dashboard/agent.py b/dashboard/agent.py index 16844620d658a..3856a5ea24490 100644 --- a/dashboard/agent.py +++ b/dashboard/agent.py @@ -63,7 +63,7 @@ def __init__( log_dir=None, metrics_export_port=None, node_manager_port=None, - listen_port=0, + listen_port=ray_constants.DEFAULT_DASHBOARD_AGENT_LISTEN_PORT, object_store_name=None, raylet_name=None, logging_params=None, @@ -329,7 +329,7 @@ async def _check_parent(): "--listen-port", required=False, type=int, - default=0, + default=ray_constants.DEFAULT_DASHBOARD_AGENT_LISTEN_PORT, help="Port for HTTP server to listen on", ) parser.add_argument( diff --git a/dashboard/http_server_agent.py b/dashboard/http_server_agent.py index 0cbf2b76a1ac2..704d3c6452b62 100644 --- a/dashboard/http_server_agent.py +++ b/dashboard/http_server_agent.py @@ -53,12 +53,20 @@ async def start(self, modules): self.runner = aiohttp.web.AppRunner(app) await self.runner.setup() - site = aiohttp.web.TCPSite( - self.runner, - "127.0.0.1" if self.ip == "127.0.0.1" else "0.0.0.0", - self.listen_port, - ) - await site.start() + try: + site = aiohttp.web.TCPSite( + self.runner, + "127.0.0.1" if self.ip == "127.0.0.1" else "0.0.0.0", + self.listen_port, + ) + await site.start() + except OSError as e: + logger.error( + f"Agent port #{self.listen_port} already in use. " + "Failed to start agent. " + f"Ensure port #{self.listen_port} is available, and then try again." + ) + raise e self.http_host, self.http_port, *_ = site._server.sockets[0].getsockname() logger.info( "Dashboard agent http address: %s:%s", self.http_host, self.http_port diff --git a/dashboard/modules/dashboard_sdk.py b/dashboard/modules/dashboard_sdk.py index 32c503396ee71..1323560abeef4 100644 --- a/dashboard/modules/dashboard_sdk.py +++ b/dashboard/modules/dashboard_sdk.py @@ -205,6 +205,14 @@ def __init__( def _check_connection_and_version( self, min_version: str = "1.9", version_error_message: str = None + ): + self._check_connection_and_version_with_url(min_version, version_error_message) + + def _check_connection_and_version_with_url( + self, + min_version: str = "1.9", + version_error_message: str = None, + url: str = "/api/version", ): if version_error_message is None: version_error_message = ( @@ -212,7 +220,7 @@ def _check_connection_and_version( ) try: - r = self._do_request("GET", "/api/version") + r = self._do_request("GET", url) if r.status_code == 404: raise RuntimeError(version_error_message) r.raise_for_status() diff --git a/dashboard/modules/job/common.py b/dashboard/modules/job/common.py index cf6a6e52cac46..10f3bae1991d8 100644 --- a/dashboard/modules/job/common.py +++ b/dashboard/modules/job/common.py @@ -19,9 +19,6 @@ JOB_ID_METADATA_KEY = "job_submission_id" JOB_NAME_METADATA_KEY = "job_name" -# Version 0 -> 1: Added log streaming and changed behavior of job logs cli. -CURRENT_VERSION = "1" - class JobStatus(str, Enum): """An enumeration for describing the status of a job.""" @@ -179,13 +176,6 @@ def validate_request_type(json_data: Dict[str, Any], request_type: dataclass) -> return request_type(**json_data) -@dataclass -class VersionResponse: - version: str - ray_version: str - ray_commit: str - - @dataclass class JobSubmitRequest: # Command to start execution, ex: "python script.py" diff --git a/dashboard/modules/job/job_head.py b/dashboard/modules/job/job_head.py index 09c22a2586471..18b6ee1cdcca1 100644 --- a/dashboard/modules/job/job_head.py +++ b/dashboard/modules/job/job_head.py @@ -16,16 +16,18 @@ pin_runtime_env_uri, ) from ray.dashboard.modules.job.common import ( - CURRENT_VERSION, http_uri_components_to_uri, JobInfo, JobSubmitRequest, JobSubmitResponse, JobStopResponse, JobLogsResponse, - VersionResponse, validate_request_type, ) +from ray.dashboard.modules.version import ( + CURRENT_VERSION, + VersionResponse, +) from ray.dashboard.modules.job.job_manager import JobManager logger = logging.getLogger(__name__) diff --git a/dashboard/modules/job/tests/test_http_job_server.py b/dashboard/modules/job/tests/test_http_job_server.py index 2846ffd7a6176..8fc95a5b923a9 100644 --- a/dashboard/modules/job/tests/test_http_job_server.py +++ b/dashboard/modules/job/tests/test_http_job_server.py @@ -19,7 +19,8 @@ wait_until_server_available, ) from ray.dashboard.modules.dashboard_sdk import ClusterInfo, parse_cluster_info -from ray.dashboard.modules.job.common import CURRENT_VERSION, JobInfo +from ray.dashboard.modules.job.common import JobInfo +from ray.dashboard.modules.version import CURRENT_VERSION from ray.dashboard.tests.conftest import * # noqa from ray.job_submission import JobStatus, JobSubmissionClient from ray.tests.conftest import _ray_start diff --git a/dashboard/modules/serve/sdk.py b/dashboard/modules/serve/sdk.py index 3cf191b19d527..c98e4cc24e1a4 100644 --- a/dashboard/modules/serve/sdk.py +++ b/dashboard/modules/serve/sdk.py @@ -19,7 +19,7 @@ class ServeSubmissionClient(SubmissionClient): def __init__( self, - dashboard_address: str, + dashboard_agent_address: str, create_cluster_if_needed=False, cookies: Optional[Dict[str, Any]] = None, metadata: Optional[Dict[str, Any]] = None, @@ -31,17 +31,18 @@ def __init__( "installation: `pip install 'ray[default']``" ) super().__init__( - address=dashboard_address, + address=dashboard_agent_address, create_cluster_if_needed=create_cluster_if_needed, cookies=cookies, metadata=metadata, headers=headers, ) - self._check_connection_and_version( + self._check_connection_and_version_with_url( min_version="1.12", version_error_message="Serve CLI is not supported on the Ray " "cluster. Please ensure the cluster is " "running Ray 1.12 or higher.", + url="/api/ray/version", ) def deploy_application(self, config: Dict) -> None: diff --git a/dashboard/modules/serve/serve_head.py b/dashboard/modules/serve/serve_agent.py similarity index 69% rename from dashboard/modules/serve/serve_head.py rename to dashboard/modules/serve/serve_agent.py index 2a079e6267abd..152587a89901f 100644 --- a/dashboard/modules/serve/serve_head.py +++ b/dashboard/modules/serve/serve_agent.py @@ -3,8 +3,15 @@ from aiohttp.web import Request, Response +import dataclasses +import ray +import aiohttp.web import ray.dashboard.optional_utils as optional_utils import ray.dashboard.utils as dashboard_utils +from ray.dashboard.modules.version import ( + CURRENT_VERSION, + VersionResponse, +) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -13,11 +20,28 @@ # NOTE (shrekris-anyscale): This class uses delayed imports for all -# Ray Serve-related modules. That way, users can use the Ray dashboard for +# Ray Serve-related modules. That way, users can use the Ray dashboard agent for # non-Serve purposes without downloading Serve dependencies. -class ServeHead(dashboard_utils.DashboardHeadModule): - def __init__(self, dashboard_head): - super().__init__(dashboard_head) +class ServeAgent(dashboard_utils.DashboardAgentModule): + def __init__(self, dashboard_agent): + super().__init__(dashboard_agent) + + # TODO: It's better to use `/api/version`. + # It requires a refactor of ClassMethodRouteTable to differentiate the server. + @routes.get("/api/ray/version") + async def get_version(self, req: Request) -> Response: + # NOTE(edoakes): CURRENT_VERSION should be bumped and checked on the + # client when we have backwards-incompatible changes. + resp = VersionResponse( + version=CURRENT_VERSION, + ray_version=ray.__version__, + ray_commit=ray.__commit__, + ) + return Response( + text=json.dumps(dataclasses.asdict(resp)), + content_type="application/json", + status=aiohttp.web.HTTPOk.status_code, + ) @routes.get("/api/serve/deployments/") @optional_utils.init_ray_and_catch_exceptions(connect_to_serve=True) diff --git a/dashboard/modules/serve/tests/test_serve_head.py b/dashboard/modules/serve/tests/test_serve_agent.py similarity index 93% rename from dashboard/modules/serve/tests/test_serve_head.py rename to dashboard/modules/serve/tests/test_serve_agent.py index c81589a96100f..296a4270710eb 100644 --- a/dashboard/modules/serve/tests/test_serve_head.py +++ b/dashboard/modules/serve/tests/test_serve_agent.py @@ -9,15 +9,21 @@ import ray from ray import serve from ray._private.test_utils import wait_for_condition +import ray._private.ray_constants as ray_constants -GET_OR_PUT_URL = "http://localhost:8265/api/serve/deployments/" -STATUS_URL = "http://localhost:8265/api/serve/deployments/status" +GET_OR_PUT_URL = "http://localhost:52365/api/serve/deployments/" +STATUS_URL = "http://localhost:52365/api/serve/deployments/status" @pytest.fixture def ray_start_stop(): subprocess.check_output(["ray", "stop", "--force"]) subprocess.check_output(["ray", "start", "--head"]) + wait_for_condition( + lambda: requests.get("http://localhost:52365/api/ray/version").status_code + == 200, + timeout=15, + ) yield subprocess.check_output(["ray", "stop", "--force"]) @@ -248,5 +254,13 @@ def test_serve_namespace(ray_start_stop): serve.shutdown() +def test_default_dashboard_agent_listen_port(): + """ + Defaults in the code and the documentation assume + the dashboard agent listens to HTTP on port 52365. + """ + assert ray_constants.DEFAULT_DASHBOARD_AGENT_LISTEN_PORT == 52365 + + if __name__ == "__main__": sys.exit(pytest.main(["-v", __file__])) diff --git a/dashboard/modules/version.py b/dashboard/modules/version.py new file mode 100644 index 0000000000000..ff6eb580a4c2e --- /dev/null +++ b/dashboard/modules/version.py @@ -0,0 +1,11 @@ +from dataclasses import dataclass + +# Version 0 -> 1: Added log streaming and changed behavior of job logs cli. +CURRENT_VERSION = "1" + + +@dataclass +class VersionResponse: + version: str + ray_version: str + ray_commit: str diff --git a/dashboard/optional_utils.py b/dashboard/optional_utils.py index afed9f170c63e..06524eb906408 100644 --- a/dashboard/optional_utils.py +++ b/dashboard/optional_utils.py @@ -259,10 +259,13 @@ async def decorator(self, *args, **kwargs): try: if not ray.is_initialized(): try: - address = self._dashboard_head.gcs_address + address = self.get_gcs_address() logger.info(f"Connecting to ray with address={address}") + # Init ray without logging to driver + # to avoid infinite logging issue. ray.init( address=address, + log_to_driver=False, namespace=RAY_INTERNAL_DASHBOARD_NAMESPACE, ) except Exception as e: diff --git a/dashboard/tests/test_dashboard.py b/dashboard/tests/test_dashboard.py index afabde47a3213..7a5a2e1e756db 100644 --- a/dashboard/tests/test_dashboard.py +++ b/dashboard/tests/test_dashboard.py @@ -12,6 +12,7 @@ import numpy as np import pytest import requests +import socket import ray import ray.dashboard.consts as dashboard_consts @@ -817,6 +818,7 @@ def test_gcs_check_alive(fast_gcs_failure_detection, ray_start_with_dashboard): ) def test_dashboard_does_not_depend_on_serve(): """Check that the dashboard can start without Serve.""" + ray.shutdown() with pytest.raises(ImportError): from ray import serve # noqa: F401 @@ -826,13 +828,117 @@ def test_dashboard_does_not_depend_on_serve(): # Ensure standard dashboard features, like snapshot, still work response = requests.get(f"http://{ctx.dashboard_url}/api/snapshot") assert response.status_code == 200 + assert response.json()["result"] is True assert "snapshot" in response.json()["data"] + agent_url = ( + ctx.address_info["node_ip_address"] + + ":" + + str(ctx.address_info["dashboard_agent_listen_port"]) + ) + + # Check that Serve-dependent features fail + response = requests.get(f"http://{agent_url}/api/serve/deployments/") + assert response.status_code == 500 + + +@pytest.mark.skipif( + os.environ.get("RAY_DEFAULT") != "1", + reason="This test only works for default installation.", +) +def test_agent_does_not_depend_on_serve(shutdown_only): + """Check that the dashboard agent can start without Serve.""" + ray.shutdown() + + with pytest.raises(ImportError): + from ray import serve # noqa: F401 + + ray.init(include_dashboard=True) + + node = ray._private.worker._global_node + all_processes = node.all_processes + raylet_proc_info = all_processes[ray_constants.PROCESS_TYPE_RAYLET][0] + raylet_proc = psutil.Process(raylet_proc_info.process.pid) + + wait_for_condition(lambda: search_agent(raylet_proc.children())) + agent_proc = search_agent(raylet_proc.children()) + agent_pid = agent_proc.pid + + check_agent_register(raylet_proc, agent_pid) + + logger.info("Agent works.") + + agent_url = node.node_ip_address + ":" + str(node.dashboard_agent_listen_port) + # Check that Serve-dependent features fail - response = requests.get(f"http://{ctx.dashboard_url}/api/serve/deployments/") + response = requests.get(f"http://{agent_url}/api/serve/deployments/") assert response.status_code == 500 - assert "ModuleNotFoundError" in response.text + + # The agent should be dead if raylet exits. + raylet_proc.kill() + raylet_proc.wait() + agent_proc.wait(5) + + +@pytest.mark.skipif( + os.environ.get("RAY_MINIMAL") == "1" or os.environ.get("RAY_DEFAULT") == "1", + reason="This test is not supposed to work for minimal or default installation.", +) +def test_agent_port_conflict(): + ray.shutdown() + + # start ray and test agent works. + ray.init(include_dashboard=True) + + node = ray._private.worker._global_node + agent_url = node.node_ip_address + ":" + str(node.dashboard_agent_listen_port) + wait_for_condition( + lambda: requests.get(f"http://{agent_url}/api/serve/deployments/").status_code + == 200 + ) + ray.shutdown() + + # ocuppy the port with a socket. + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + + wait_for_condition( + lambda: s.connect_ex( + ("localhost", ray_constants.DEFAULT_DASHBOARD_AGENT_LISTEN_PORT) + ) + != 0 + ) + + # start ray and the agent http server should fail + # to start due to port conflict, but the agent still starts. + ray.init(include_dashboard=True) + node = ray._private.worker._global_node + all_processes = node.all_processes + raylet_proc_info = all_processes[ray_constants.PROCESS_TYPE_RAYLET][0] + raylet_proc = psutil.Process(raylet_proc_info.process.pid) + + wait_for_condition(lambda: search_agent(raylet_proc.children())) + agent_proc = search_agent(raylet_proc.children()) + agent_pid = agent_proc.pid + + check_agent_register(raylet_proc, agent_pid) + + # Release the port from socket. + s.close() + + agent_url = node.node_ip_address + ":" + str(node.dashboard_agent_listen_port) + + # Check that Serve-dependent features fail. + try: + wait_for_condition( + lambda: requests.get( + f"http://{agent_url}/api/serve/deployments/" + ).status_code + == 200 + ) + assert False + except Exception as e: + assert e is not None @pytest.mark.skipif( diff --git a/dashboard/utils.py b/dashboard/utils.py index 29e77d96e93c3..f0e287936798f 100644 --- a/dashboard/utils.py +++ b/dashboard/utils.py @@ -55,6 +55,9 @@ def is_minimal_module(): dependencies. """ + def get_gcs_address(self): + return self._dashboard_agent.gcs_address + class DashboardHeadModule(abc.ABC): def __init__(self, dashboard_head): @@ -81,6 +84,9 @@ def is_minimal_module(): dependencies. """ + def get_gcs_address(self): + return self._dashboard_head.gcs_address + def dashboard_module(enable): """A decorator for dashboard module.""" diff --git a/doc/source/serve/production.md b/doc/source/serve/production.md index ba314c3158b37..e58923883aee8 100644 --- a/doc/source/serve/production.md +++ b/doc/source/serve/production.md @@ -306,44 +306,49 @@ As a side note, you could also package your deployment graph into a standalone P ### Using a Remote Cluster -By default, `serve deploy` deploys to a cluster running locally. However, you should also use `serve deploy` whenever you want to deploy your Serve application to a remote cluster. `serve deploy` takes in an optional `--address/-a` argument where you can specify the dashboard address of your remote Ray cluster. This address should be of the form: +By default, `serve deploy` deploys to a cluster running locally. However, you should also use `serve deploy` whenever you want to deploy your Serve application to a remote cluster. `serve deploy` takes in an optional `--address/-a` argument where you can specify your remote Ray cluster's dashboard agent address. This address should be of the form: ``` -[YOUR_RAY_CLUSTER_URI]:[DASHBOARD PORT] +[YOUR_RAY_CLUSTER_URI]:[DASHBOARD AGENT PORT] ``` -As an example, the address for the local cluster started by `ray start --head` is `http://127.0.0.1:8265`. We can explicitly deploy to this address using the command +As an example, the address for the local cluster started by `ray start --head` is `http://127.0.0.1:52365`. We can explicitly deploy to this address using the command ```console -$ serve deploy config_file.yaml -a http://127.0.0.1:8265 +$ serve deploy config_file.yaml -a http://127.0.0.1:52365 ``` -The Ray dashboard's default port is 8265. This port may be different if: -* You explicitly set it using the `--dashboard-port` argument when running `ray start`. -* Port 8265 was unavailable when Ray started. In that case, the dashboard port is incremented until an available port is found. E.g. if 8265 is unavailable, the port becomes 8266. If that's unavailable, it becomes 8267, and so on. +The Ray dashboard agent's default port is 52365. You can set it to a different value using the `--dashboard-agent-listen-port` argument when running `ray start`." + +:::{note} +If the port 52365 (or whichever port you specify with `--dashboard-agent-listen-port`) is unavailable when Ray starts, the dashboard agent’s HTTP server will fail. However, the dashboard agent and Ray will continue to run. +You can check if an agent’s HTTP server is running by sending a curl request: `curl http://{node_ip}:{dashboard_agent_port}/api/serve/deployments/`. If the request succeeds, the server is running on that node. If the request fails, the server is not running on that node. To launch the server on that node, terminate the process occupying the dashboard agent’s port, and restart Ray on that node. +::: :::{tip} -By default, all the Serve CLI commands assume that you're working with a local cluster, so if you don't specify an `--address/-a` value, they use the Ray address associated with a local cluster started by `ray start --head`. However, if the `RAY_ADDRESS` environment variable is set, all Serve CLI commands will default to that value instead (unless you also specify an `--address/-a` value). +By default, all the Serve CLI commands assume that you're working with a local cluster. All Serve CLI commands, except `serve start` and `serve run` use the Ray agent address associated with a local cluster started by `ray start --head`. However, if the `RAY_AGENT_ADDRESS` environment variable is set, these Serve CLI commands will default to that value instead. + +Similarly, `serve start` and `serve run`, use the Ray head node address associated with a local cluster by default. If the `RAY_ADDRESS` environment variable is set, they will use that value instead. -You can check this variable's value by running: +You can check `RAY_AGENT_ADDRESS`'s value by running: ```console -$ echo $RAY_ADDRESS +$ echo $RAY_AGENT_ADDRESS ``` You can set this variable by running the CLI command: ```console -$ export RAY_ADDRESS=[YOUR VALUE] +$ export RAY_AGENT_ADDRESS=[YOUR VALUE] ``` You can unset this variable by running the CLI command: ```console -$ unset RAY_ADDRESS +$ unset RAY_AGENT_ADDRESS ``` -Check for this variable in your environment to make sure you're using your desired Ray address. +Check for this variable in your environment to make sure you're using your desired Ray agent address. ::: (serve-in-production-inspecting)= diff --git a/python/ray/_private/node.py b/python/ray/_private/node.py index 19f614132bfa9..9a5f566105b06 100644 --- a/python/ray/_private/node.py +++ b/python/ray/_private/node.py @@ -152,6 +152,8 @@ def __init__( self._ray_params = ray_params self._config = ray_params._system_config or {} + self._dashboard_agent_listen_port = ray_params.dashboard_agent_listen_port + # Configure log rotation parameters. self.max_bytes = int( os.getenv("RAY_ROTATION_MAX_BYTES", ray_constants.LOGGING_ROTATE_BYTES) @@ -551,6 +553,11 @@ def metrics_export_port(self): """Get the port that exposes metrics""" return self._metrics_export_port + @property + def dashboard_agent_listen_port(self): + """Get the dashboard agent's listen port""" + return self._dashboard_agent_listen_port + @property def logging_config(self): """Get the logging config of the current node.""" @@ -573,6 +580,7 @@ def address_info(self): "metrics_export_port": self._metrics_export_port, "gcs_address": self.gcs_address, "address": self.address, + "dashboard_agent_listen_port": self.dashboard_agent_listen_port, } def is_head(self): diff --git a/python/ray/_private/parameter.py b/python/ray/_private/parameter.py index ee6dc163df55d..1dd48129c100b 100644 --- a/python/ray/_private/parameter.py +++ b/python/ray/_private/parameter.py @@ -87,6 +87,7 @@ class RayParams: Defaults to 8265. dashboard_agent_listen_port: The port for dashboard agents to listen on for HTTP requests. + Defaults to 52365. plasma_store_socket_name: If provided, it will specify the socket name used by the plasma store. raylet_socket_name: If provided, it will specify the socket path @@ -156,7 +157,9 @@ def __init__( include_dashboard: Optional[bool] = None, dashboard_host: Optional[str] = ray_constants.DEFAULT_DASHBOARD_IP, dashboard_port: Optional[bool] = ray_constants.DEFAULT_DASHBOARD_PORT, - dashboard_agent_listen_port: Optional[int] = 0, + dashboard_agent_listen_port: Optional[ + int + ] = ray_constants.DEFAULT_DASHBOARD_AGENT_LISTEN_PORT, plasma_store_socket_name: Optional[str] = None, raylet_socket_name: Optional[str] = None, temp_dir: Optional[str] = None, diff --git a/python/ray/_private/ray_constants.py b/python/ray/_private/ray_constants.py index a6384e4bfbf99..427f599abcbe3 100644 --- a/python/ray/_private/ray_constants.py +++ b/python/ray/_private/ray_constants.py @@ -89,6 +89,7 @@ def env_bool(key, default): DEFAULT_DASHBOARD_PORT = 8265 DASHBOARD_ADDRESS = "dashboard" PROMETHEUS_SERVICE_DISCOVERY_FILE = "prom_metrics_service_discovery.json" +DEFAULT_DASHBOARD_AGENT_LISTEN_PORT = 52365 # Default resource requirements for actors when no resource requirements are # specified. DEFAULT_ACTOR_METHOD_CPU_SIMPLE = 1 diff --git a/python/ray/scripts/scripts.py b/python/ray/scripts/scripts.py index c81439618d05d..41ad9d948747e 100644 --- a/python/ray/scripts/scripts.py +++ b/python/ray/scripts/scripts.py @@ -423,7 +423,7 @@ def debug(address): @click.option( "--dashboard-agent-listen-port", type=int, - default=0, + default=ray_constants.DEFAULT_DASHBOARD_AGENT_LISTEN_PORT, help="the port for dashboard agents to listen for http on.", ) @click.option( diff --git a/python/ray/serve/scripts.py b/python/ray/serve/scripts.py index d3548e2d389f6..94f1614ff1c8b 100644 --- a/python/ray/serve/scripts.py +++ b/python/ray/serve/scripts.py @@ -37,9 +37,9 @@ "using the RAY_ADDRESS environment variable." ) RAY_DASHBOARD_ADDRESS_HELP_STR = ( - "Address to use to query the Ray dashboard (defaults to " - "http://localhost:8265). Can also be specified using the " - "RAY_ADDRESS environment variable." + "Address to use to query the Ray dashboard agent (defaults to " + "http://localhost:52365). Can also be specified using the " + "RAY_AGENT_ADDRESS environment variable." ) @@ -122,7 +122,7 @@ def start( @click.option( "--address", "-a", - default=os.environ.get("RAY_ADDRESS", "http://localhost:8265"), + default=os.environ.get("RAY_AGENT_ADDRESS", "http://localhost:52365"), required=False, type=str, help=RAY_DASHBOARD_ADDRESS_HELP_STR, @@ -280,7 +280,7 @@ def run( @click.option( "--address", "-a", - default=os.environ.get("RAY_ADDRESS", "http://localhost:8265"), + default=os.environ.get("RAY_AGENT_ADDRESS", "http://localhost:52365"), required=False, type=str, help=RAY_DASHBOARD_ADDRESS_HELP_STR, @@ -307,7 +307,7 @@ def config(address: str): @click.option( "--address", "-a", - default=os.environ.get("RAY_ADDRESS", "http://localhost:8265"), + default=os.environ.get("RAY_AGENT_ADDRESS", "http://localhost:52365"), required=False, type=str, help=RAY_DASHBOARD_ADDRESS_HELP_STR, @@ -324,7 +324,7 @@ def status(address: str): @click.option( "--address", "-a", - default=os.environ.get("RAY_ADDRESS", "http://localhost:8265"), + default=os.environ.get("RAY_AGENT_ADDRESS", "http://localhost:52365"), required=False, type=str, help=RAY_DASHBOARD_ADDRESS_HELP_STR, diff --git a/python/ray/serve/tests/test_cli.py b/python/ray/serve/tests/test_cli.py index 19eca43b25ec5..aedb9b0e87eac 100644 --- a/python/ray/serve/tests/test_cli.py +++ b/python/ray/serve/tests/test_cli.py @@ -46,7 +46,13 @@ def assert_deployments_live(names: List[str]): @pytest.fixture def ray_start_stop(): + subprocess.check_output(["ray", "stop", "--force"]) subprocess.check_output(["ray", "start", "--head"]) + wait_for_condition( + lambda: requests.get("http://localhost:52365/api/ray/version").status_code + == 200, + timeout=15, + ) yield subprocess.check_output(["ray", "stop", "--force"]) @@ -59,7 +65,7 @@ def test_start_shutdown(ray_start_stop): @pytest.mark.skipif(sys.platform == "win32", reason="File path incorrect on Windows.") def test_deploy(ray_start_stop): """Deploys some valid config files and checks that the deployments work.""" - + ray.shutdown() # Initialize serve in test to enable calling serve.list_deployments() ray.init(address="auto", namespace=SERVE_NAMESPACE) @@ -107,7 +113,7 @@ def test_deploy(ray_start_stop): print("Deploying arithmetic config.") deploy_response = subprocess.check_output( - ["serve", "deploy", arithmetic_file_name, "-a", "http://localhost:8265/"] + ["serve", "deploy", arithmetic_file_name, "-a", "http://localhost:52365/"] ) assert success_message_fragment in deploy_response print("Deploy request sent successfully.") @@ -170,7 +176,7 @@ def num_live_deployments(): wait_for_condition(lambda: num_live_deployments() == 5, timeout=15) status_response = subprocess.check_output( - ["serve", "status", "-a", "http://localhost:8265/"] + ["serve", "status", "-a", "http://localhost:52365/"] ) serve_status = yaml.safe_load(status_response) @@ -420,6 +426,7 @@ def test_build(ray_start_stop, node): @pytest.mark.parametrize("use_command", [True, False]) def test_idempotence_after_controller_death(ray_start_stop, use_command: bool): """Check that CLI is idempotent even if controller dies.""" + ray.shutdown() config_file_name = os.path.join( os.path.dirname(__file__), "test_config_files", "basic_graph.yaml"