Skip to content

Commit

Permalink
Revert "Revert "[Doc] [Jobs] Document JobDetails, JobType and `Dr…
Browse files Browse the repository at this point in the history
…iverInfo`"" (#30500)

Reverts #30499. The reason this PR was originally reverted because it caused Jobs runtime env tests to fail, because runtime_env CI tests were installing "ray" into the runtime_env conda environment instead of "ray[default]", which is required for Jobs. This PR fixes this by installing "ray[default]" in runtime_env CI tests.

Original PR description:

The classes JobDetails, JobType and DriverInfo are exposed to the user because they are the return type of JobSubmissionClient.get_job_info, so we need to document them.

This PR adds them to the API reference.
  • Loading branch information
architkulkarni committed Nov 29, 2022
1 parent c54dcc3 commit b682c2b
Show file tree
Hide file tree
Showing 11 changed files with 73 additions and 30 deletions.
2 changes: 1 addition & 1 deletion dashboard/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ async def _check_parent():
help=(
"Minimal agent only contains a subset of features that don't "
"require additional dependencies installed when ray is installed "
"by `pip install ray[default]`."
"by `pip install 'ray[default]'`."
),
)
parser.add_argument(
Expand Down
4 changes: 4 additions & 0 deletions dashboard/modules/job/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
_internal_kv_initialized,
)

from ray.util.annotations import PublicAPI

# NOTE(edoakes): these constants should be considered a public API because
# they're exposed in the snapshot API.
JOB_ID_METADATA_KEY = "job_submission_id"
Expand All @@ -25,6 +27,7 @@
SUPERVISOR_ACTOR_RAY_NAMESPACE = "SUPERVISOR_ACTOR_RAY_NAMESPACE"


@PublicAPI(stability="stable")
class JobStatus(str, Enum):
"""An enumeration for describing the status of a job."""

Expand Down Expand Up @@ -55,6 +58,7 @@ def is_terminal(self) -> bool:


# TODO(aguo): Convert to pydantic model
@PublicAPI(stability="stable")
@dataclass
class JobInfo:
"""A class for recording information associated with a job and its execution."""
Expand Down
35 changes: 23 additions & 12 deletions dashboard/modules/job/pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,34 @@

from pydantic import BaseModel, Field
from ray.dashboard.modules.job.common import JobStatus
from ray.util.annotations import PublicAPI


@PublicAPI(stability="beta")
class DriverInfo(BaseModel):
"""A class for recording information about the driver related to the job."""

id: str = Field(..., description="The id of the driver")
node_ip_address: str = Field(
..., description="The ip address of the node the driver is running on"
..., description="The IP address of the node the driver is running on."
)
pid: str = Field(
..., description="The pid of the worker process the driver is using."
..., description="The PID of the worker process the driver is using."
)
# TODO(aguo): Add node_id as a field.


@PublicAPI(stability="beta")
class JobType(str, Enum):
"""An enumeration for describing the different job types."""

#: A job that was initiated by the job submission apis
#: A job that was initiated by the Ray Jobs API.
SUBMISSION = "SUBMISSION"
#: A job that was initiated by a driver script.
DRIVER = "DRIVER"


@PublicAPI(stability="beta")
class JobDetails(BaseModel):
"""
Job data with extra details about its driver and its submission.
Expand All @@ -38,18 +42,20 @@ class JobDetails(BaseModel):
)
job_id: Optional[str] = Field(
None,
description="The job id. An id that is created for every job that is "
"launched in ray. This can be used to fetch data about jobs using ray "
"core apis.",
description="The job ID. An ID that is created for every job that is "
"launched in Ray. This can be used to fetch data about jobs using Ray "
"Core APIs.",
)
submission_id: Optional[str] = Field(
None,
description="A submission id is an id created for every submission job. It can "
"be used to fetch data about jobs using the job submission apis.",
description="A submission ID is an ID created for every job submitted via"
"the Ray Jobs API. It can "
"be used to fetch data about jobs using the Ray Jobs API.",
)
driver_info: Optional[DriverInfo] = Field(
None,
description="The driver related to this job. For submission jobs, "
description="The driver related to this job. For jobs submitted via "
"the Ray Jobs API, "
"it is the last driver launched by that job submission, "
"or None if there is no driver.",
)
Expand All @@ -62,7 +68,7 @@ class JobDetails(BaseModel):
None, description="A message describing the status in more detail."
)
error_type: Optional[str] = Field(
None, description="Internal error, user script error"
None, description="Internal error or user script error."
)
start_time: Optional[int] = Field(
None,
Expand All @@ -83,6 +89,11 @@ class JobDetails(BaseModel):
# - driver_agent_http_address: this node's agent http address
# - driver_node_id: this node's id.
driver_agent_http_address: Optional[str] = Field(
None, description="Driver agent http address."
None,
description="The HTTP address of the JobAgent on the node the job "
"entrypoint command is running on.",
)
driver_node_id: Optional[str] = Field(
None,
description="The node ID of the node the job entrypoint command is running on.",
)
driver_node_id: Optional[str] = Field(None, description="Driver node id.")
7 changes: 1 addition & 6 deletions dashboard/modules/job/tests/test_http_job_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,12 +333,7 @@ def test_submit_job(job_sdk_client, runtime_env_option, monkeypatch):
runtime_env=runtime_env_option["runtime_env"],
)

# Conda env takes longer to install, causing flakiness.
timeout = 240 if runtime_env_option["runtime_env"].get("conda") is not None else 120

wait_for_condition(
_check_job_succeeded, client=client, job_id=job_id, timeout=timeout
)
wait_for_condition(_check_job_succeeded, client=client, job_id=job_id, timeout=60)

logs = client.get_job_logs(job_id)
assert runtime_env_option["expected_logs"] in logs
Expand Down
5 changes: 1 addition & 4 deletions dashboard/modules/job/tests/test_job_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,14 +227,11 @@ async def test_submit_job(job_sdk_client, runtime_env_option, monkeypatch):
submit_result = await agent_client.submit_job_internal(request)
job_id = submit_result.submission_id

# Conda env takes longer to install, causing flakiness.
timeout = 240 if runtime_env_option["runtime_env"].get("conda") is not None else 120

wait_for_condition(
partial(
_check_job, client=head_client, job_id=job_id, status=JobStatus.SUCCEEDED
),
timeout=timeout,
timeout=60,
)

# There is only one node, so there is no need to replace the client of the JobAgent
Expand Down
6 changes: 3 additions & 3 deletions dashboard/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,13 @@ def get_all_modules(module_type):
except ModuleNotFoundError as e:
logger.info(
f"Module {name} cannot be loaded because "
"we cannot import all dependencies. Download "
"`pip install ray[default]` for the full "
"we cannot import all dependencies. Install this module using "
"`pip install 'ray[default]'` for the full "
f"dashboard functionality. Error: {e}"
)
if not should_only_load_minimal_modules:
logger.info(
"Although `pip install ray[default] is downloaded, "
"Although `pip install 'ray[default]'` is downloaded, "
"module couldn't be imported`"
)
raise e
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,27 @@ JobInfo

.. autoclass:: ray.job_submission.JobInfo
:members:

.. _job-details-ref:

JobDetails
~~~~~~~~~~

.. autopydantic_model:: ray.job_submission.JobDetails

.. _job-type-ref:

JobType
~~~~~~~

.. autoclass:: ray.job_submission.JobType
:members:
:noindex:

.. _driver-info-ref:

DriverInfo
~~~~~~~~~~

.. autopydantic_model:: ray.job_submission.DriverInfo

Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Ray Jobs is available in versions 1.9+ and requires a full installation of Ray.

.. code-block:: shell
pip install ray[default]
pip install "ray[default]"
See the :ref:`installation guide <installation>` for more details on installing Ray.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Ray Jobs is available in versions 1.9+ and requires a full installation of Ray.

.. code-block:: shell
pip install ray[default]
pip install "ray[default]"
See the :ref:`installation guide <installation>` for more details on installing Ray.

Expand Down
6 changes: 5 additions & 1 deletion python/ray/_private/runtime_env/conda.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ def _get_ray_setup_spec():

def _resolve_install_from_source_ray_dependencies():
"""Find the Ray dependencies when Ray is installed from source."""
return _get_ray_setup_spec().install_requires
deps = (
_get_ray_setup_spec().install_requires + _get_ray_setup_spec().extras["default"]
)
# Remove duplicates
return list(set(deps))


def _inject_ray_to_conda_site(
Expand Down
10 changes: 9 additions & 1 deletion python/ray/job_submission/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
from ray.dashboard.modules.job.common import JobInfo, JobStatus
from ray.dashboard.modules.job.pydantic_models import DriverInfo, JobDetails, JobType
from ray.dashboard.modules.job.sdk import JobSubmissionClient

__all__ = ["JobSubmissionClient", "JobStatus", "JobInfo"]
__all__ = [
"JobSubmissionClient",
"JobStatus",
"JobInfo",
"JobDetails",
"DriverInfo",
"JobType",
]

0 comments on commit b682c2b

Please sign in to comment.