Skip to content

Commit

Permalink
Revert "Revert "[Dashboard][Serve] Move Serve related endpoints to da…
Browse files Browse the repository at this point in the history
…shboard agent"" (ray-project#26336)
  • Loading branch information
brucez-anyscale committed Jul 7, 2022
1 parent b803792 commit f76d7b2
Show file tree
Hide file tree
Showing 20 changed files with 257 additions and 59 deletions.
4 changes: 2 additions & 2 deletions dashboard/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def __init__(
log_dir=None,
metrics_export_port=None,
node_manager_port=None,
listen_port=0,
listen_port=ray_constants.DEFAULT_DASHBOARD_AGENT_LISTEN_PORT,
object_store_name=None,
raylet_name=None,
logging_params=None,
Expand Down Expand Up @@ -329,7 +329,7 @@ async def _check_parent():
"--listen-port",
required=False,
type=int,
default=0,
default=ray_constants.DEFAULT_DASHBOARD_AGENT_LISTEN_PORT,
help="Port for HTTP server to listen on",
)
parser.add_argument(
Expand Down
20 changes: 14 additions & 6 deletions dashboard/http_server_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,20 @@ async def start(self, modules):

self.runner = aiohttp.web.AppRunner(app)
await self.runner.setup()
site = aiohttp.web.TCPSite(
self.runner,
"127.0.0.1" if self.ip == "127.0.0.1" else "0.0.0.0",
self.listen_port,
)
await site.start()
try:
site = aiohttp.web.TCPSite(
self.runner,
"127.0.0.1" if self.ip == "127.0.0.1" else "0.0.0.0",
self.listen_port,
)
await site.start()
except OSError as e:
logger.error(
f"Agent port #{self.listen_port} already in use. "
"Failed to start agent. "
f"Ensure port #{self.listen_port} is available, and then try again."
)
raise e
self.http_host, self.http_port, *_ = site._server.sockets[0].getsockname()
logger.info(
"Dashboard agent http address: %s:%s", self.http_host, self.http_port
Expand Down
10 changes: 9 additions & 1 deletion dashboard/modules/dashboard_sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,14 +205,22 @@ def __init__(

def _check_connection_and_version(
self, min_version: str = "1.9", version_error_message: str = None
):
self._check_connection_and_version_with_url(min_version, version_error_message)

def _check_connection_and_version_with_url(
self,
min_version: str = "1.9",
version_error_message: str = None,
url: str = "/api/version",
):
if version_error_message is None:
version_error_message = (
f"Please ensure the cluster is running Ray {min_version} or higher."
)

try:
r = self._do_request("GET", "/api/version")
r = self._do_request("GET", url)
if r.status_code == 404:
raise RuntimeError(version_error_message)
r.raise_for_status()
Expand Down
10 changes: 0 additions & 10 deletions dashboard/modules/job/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@
JOB_ID_METADATA_KEY = "job_submission_id"
JOB_NAME_METADATA_KEY = "job_name"

# Version 0 -> 1: Added log streaming and changed behavior of job logs cli.
CURRENT_VERSION = "1"


class JobStatus(str, Enum):
"""An enumeration for describing the status of a job."""
Expand Down Expand Up @@ -179,13 +176,6 @@ def validate_request_type(json_data: Dict[str, Any], request_type: dataclass) ->
return request_type(**json_data)


@dataclass
class VersionResponse:
version: str
ray_version: str
ray_commit: str


@dataclass
class JobSubmitRequest:
# Command to start execution, ex: "python script.py"
Expand Down
6 changes: 4 additions & 2 deletions dashboard/modules/job/job_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,18 @@
pin_runtime_env_uri,
)
from ray.dashboard.modules.job.common import (
CURRENT_VERSION,
http_uri_components_to_uri,
JobInfo,
JobSubmitRequest,
JobSubmitResponse,
JobStopResponse,
JobLogsResponse,
VersionResponse,
validate_request_type,
)
from ray.dashboard.modules.version import (
CURRENT_VERSION,
VersionResponse,
)
from ray.dashboard.modules.job.job_manager import JobManager

logger = logging.getLogger(__name__)
Expand Down
3 changes: 2 additions & 1 deletion dashboard/modules/job/tests/test_http_job_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
wait_until_server_available,
)
from ray.dashboard.modules.dashboard_sdk import ClusterInfo, parse_cluster_info
from ray.dashboard.modules.job.common import CURRENT_VERSION, JobInfo
from ray.dashboard.modules.job.common import JobInfo
from ray.dashboard.modules.version import CURRENT_VERSION
from ray.dashboard.tests.conftest import * # noqa
from ray.job_submission import JobStatus, JobSubmissionClient
from ray.tests.conftest import _ray_start
Expand Down
7 changes: 4 additions & 3 deletions dashboard/modules/serve/sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
class ServeSubmissionClient(SubmissionClient):
def __init__(
self,
dashboard_address: str,
dashboard_agent_address: str,
create_cluster_if_needed=False,
cookies: Optional[Dict[str, Any]] = None,
metadata: Optional[Dict[str, Any]] = None,
Expand All @@ -31,17 +31,18 @@ def __init__(
"installation: `pip install 'ray[default']``"
)
super().__init__(
address=dashboard_address,
address=dashboard_agent_address,
create_cluster_if_needed=create_cluster_if_needed,
cookies=cookies,
metadata=metadata,
headers=headers,
)
self._check_connection_and_version(
self._check_connection_and_version_with_url(
min_version="1.12",
version_error_message="Serve CLI is not supported on the Ray "
"cluster. Please ensure the cluster is "
"running Ray 1.12 or higher.",
url="/api/ray/version",
)

def deploy_application(self, config: Dict) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,15 @@

from aiohttp.web import Request, Response

import dataclasses
import ray
import aiohttp.web
import ray.dashboard.optional_utils as optional_utils
import ray.dashboard.utils as dashboard_utils
from ray.dashboard.modules.version import (
CURRENT_VERSION,
VersionResponse,
)

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
Expand All @@ -13,11 +20,28 @@


# NOTE (shrekris-anyscale): This class uses delayed imports for all
# Ray Serve-related modules. That way, users can use the Ray dashboard for
# Ray Serve-related modules. That way, users can use the Ray dashboard agent for
# non-Serve purposes without downloading Serve dependencies.
class ServeHead(dashboard_utils.DashboardHeadModule):
def __init__(self, dashboard_head):
super().__init__(dashboard_head)
class ServeAgent(dashboard_utils.DashboardAgentModule):
def __init__(self, dashboard_agent):
super().__init__(dashboard_agent)

# TODO: It's better to use `/api/version`.
# It requires a refactor of ClassMethodRouteTable to differentiate the server.
@routes.get("/api/ray/version")
async def get_version(self, req: Request) -> Response:
# NOTE(edoakes): CURRENT_VERSION should be bumped and checked on the
# client when we have backwards-incompatible changes.
resp = VersionResponse(
version=CURRENT_VERSION,
ray_version=ray.__version__,
ray_commit=ray.__commit__,
)
return Response(
text=json.dumps(dataclasses.asdict(resp)),
content_type="application/json",
status=aiohttp.web.HTTPOk.status_code,
)

@routes.get("/api/serve/deployments/")
@optional_utils.init_ray_and_catch_exceptions(connect_to_serve=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,21 @@
import ray
from ray import serve
from ray._private.test_utils import wait_for_condition
import ray._private.ray_constants as ray_constants

GET_OR_PUT_URL = "http:https://localhost:8265/api/serve/deployments/"
STATUS_URL = "http:https://localhost:8265/api/serve/deployments/status"
GET_OR_PUT_URL = "http:https://localhost:52365/api/serve/deployments/"
STATUS_URL = "http:https://localhost:52365/api/serve/deployments/status"


@pytest.fixture
def ray_start_stop():
subprocess.check_output(["ray", "stop", "--force"])
subprocess.check_output(["ray", "start", "--head"])
wait_for_condition(
lambda: requests.get("http:https://localhost:52365/api/ray/version").status_code
== 200,
timeout=15,
)
yield
subprocess.check_output(["ray", "stop", "--force"])

Expand Down Expand Up @@ -248,5 +254,13 @@ def test_serve_namespace(ray_start_stop):
serve.shutdown()


def test_default_dashboard_agent_listen_port():
"""
Defaults in the code and the documentation assume
the dashboard agent listens to HTTP on port 52365.
"""
assert ray_constants.DEFAULT_DASHBOARD_AGENT_LISTEN_PORT == 52365


if __name__ == "__main__":
sys.exit(pytest.main(["-v", __file__]))
11 changes: 11 additions & 0 deletions dashboard/modules/version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from dataclasses import dataclass

# Version 0 -> 1: Added log streaming and changed behavior of job logs cli.
CURRENT_VERSION = "1"


@dataclass
class VersionResponse:
version: str
ray_version: str
ray_commit: str
5 changes: 4 additions & 1 deletion dashboard/optional_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,13 @@ async def decorator(self, *args, **kwargs):
try:
if not ray.is_initialized():
try:
address = self._dashboard_head.gcs_address
address = self.get_gcs_address()
logger.info(f"Connecting to ray with address={address}")
# Init ray without logging to driver
# to avoid infinite logging issue.
ray.init(
address=address,
log_to_driver=False,
namespace=RAY_INTERNAL_DASHBOARD_NAMESPACE,
)
except Exception as e:
Expand Down
Loading

0 comments on commit f76d7b2

Please sign in to comment.