Skip to content

Commit

Permalink
[State Observability] pre-alpha documentation (ray-project#26560)
Browse files Browse the repository at this point in the history
Adds

Documentation for state APIs
API reference
  • Loading branch information
rkooo567 committed Jul 26, 2022
1 parent 78d6fc6 commit 39b9c44
Show file tree
Hide file tree
Showing 14 changed files with 1,111 additions and 162 deletions.
24 changes: 12 additions & 12 deletions dashboard/memory_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class GroupByType(Enum):
STACK_TRACE = "stack_trace"


class ReferenceType:
class ReferenceType(Enum):
# We don't use enum because enum is not json serializable.
ACTOR_HANDLE = "ACTOR_HANDLE"
PINNED_IN_MEMORY = "PINNED_IN_MEMORY"
Expand Down Expand Up @@ -146,17 +146,17 @@ def group_key(self, group_by_type: GroupByType) -> str:

def _get_reference_type(self) -> str:
if self._is_object_ref_actor_handle():
return ReferenceType.ACTOR_HANDLE
return ReferenceType.ACTOR_HANDLE.value
if self.pinned_in_memory:
return ReferenceType.PINNED_IN_MEMORY
return ReferenceType.PINNED_IN_MEMORY.value
elif self.submitted_task_ref_count > 0:
return ReferenceType.USED_BY_PENDING_TASK
return ReferenceType.USED_BY_PENDING_TASK.value
elif self.local_ref_count > 0:
return ReferenceType.LOCAL_REFERENCE
return ReferenceType.LOCAL_REFERENCE.value
elif len(self.contained_in_owned) > 0:
return ReferenceType.CAPTURED_IN_OBJECT
return ReferenceType.CAPTURED_IN_OBJECT.value
else:
return ReferenceType.UNKNOWN_STATUS
return ReferenceType.UNKNOWN_STATUS.value

def _is_object_ref_actor_handle(self) -> bool:
object_ref_hex = self.object_ref.hex()
Expand Down Expand Up @@ -247,15 +247,15 @@ def summarize(self):
for entry in self.table:
if entry.object_size > 0:
total_object_size += entry.object_size
if entry.reference_type == ReferenceType.LOCAL_REFERENCE:
if entry.reference_type == ReferenceType.LOCAL_REFERENCE.value:
total_local_ref_count += 1
elif entry.reference_type == ReferenceType.PINNED_IN_MEMORY:
elif entry.reference_type == ReferenceType.PINNED_IN_MEMORY.value:
total_pinned_in_memory += 1
elif entry.reference_type == ReferenceType.USED_BY_PENDING_TASK:
elif entry.reference_type == ReferenceType.USED_BY_PENDING_TASK.value:
total_used_by_pending_task += 1
elif entry.reference_type == ReferenceType.CAPTURED_IN_OBJECT:
elif entry.reference_type == ReferenceType.CAPTURED_IN_OBJECT.value:
total_captured_in_objects += 1
elif entry.reference_type == ReferenceType.ACTOR_HANDLE:
elif entry.reference_type == ReferenceType.ACTOR_HANDLE.value:
total_actor_handles += 1

self.summary = {
Expand Down
69 changes: 38 additions & 31 deletions dashboard/state_aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,45 +74,52 @@ def _convert_filters_type(
schema: The state schema. It is used to infer the type of the column for filter.
Returns:
A new list of filters with correctly types that match the schema.
A new list of filters with correct types that match the schema.
"""
new_filter = []
schema = {field.name: field.type for field in fields(schema)}

for col, predicate, val in filter:
if col in schema:
column_type = schema[col]
if isinstance(val, column_type):
# Do nothing.
try:
isinstance(val, column_type)
except TypeError:
# Calling `isinstance` to the Literal type raises a TypeError.
# Ignore this case.
pass
elif column_type is int:
try:
val = convert_string_to_type(val, int)
except ValueError:
raise ValueError(
f"Invalid filter `--filter {col} {val}` for a int type "
"column. Please provide an integer filter "
f"`--filter {col} [int]`"
)
elif column_type is float:
try:
val = convert_string_to_type(val, float)
except ValueError:
raise ValueError(
f"Invalid filter `--filter {col} {val}` for a float "
"type column. Please provide an integer filter "
f"`--filter {col} [float]`"
)
elif column_type is bool:
try:
val = convert_string_to_type(val, bool)
except ValueError:
raise ValueError(
f"Invalid filter `--filter {col} {val}` for a boolean "
"type column. Please provide "
f"`--filter {col} [True|true|1]` for True or "
f"`--filter {col} [False|false|0]` for False."
)
else:
if isinstance(val, column_type):
# Do nothing.
pass
elif column_type is int:
try:
val = convert_string_to_type(val, int)
except ValueError:
raise ValueError(
f"Invalid filter `--filter {col} {val}` for a int type "
"column. Please provide an integer filter "
f"`--filter {col} [int]`"
)
elif column_type is float:
try:
val = convert_string_to_type(val, float)
except ValueError:
raise ValueError(
f"Invalid filter `--filter {col} {val}` for a float "
"type column. Please provide an integer filter "
f"`--filter {col} [float]`"
)
elif column_type is bool:
try:
val = convert_string_to_type(val, bool)
except ValueError:
raise ValueError(
f"Invalid filter `--filter {col} {val}` for a boolean "
"type column. Please provide "
f"`--filter {col} [True|true|1]` for True or "
f"`--filter {col} [False|false|0]` for False."
)
new_filter.append((col, predicate, val))
return new_filter

Expand Down
12 changes: 6 additions & 6 deletions dashboard/tests/test_memory_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def test_invalid_memory_entry():

def test_valid_reference_memory_entry():
memory_entry = build_local_reference_entry()
assert memory_entry.reference_type == ReferenceType.LOCAL_REFERENCE
assert memory_entry.reference_type == ReferenceType.LOCAL_REFERENCE.value
assert memory_entry.object_ref == ray.ObjectRef(
decode_object_ref_if_needed(OBJECT_ID)
)
Expand All @@ -176,19 +176,19 @@ def test_valid_reference_memory_entry():
def test_reference_type():
# pinned in memory
memory_entry = build_pinned_in_memory_entry()
assert memory_entry.reference_type == ReferenceType.PINNED_IN_MEMORY
assert memory_entry.reference_type == ReferenceType.PINNED_IN_MEMORY.value

# used by pending task
memory_entry = build_used_by_pending_task_entry()
assert memory_entry.reference_type == ReferenceType.USED_BY_PENDING_TASK
assert memory_entry.reference_type == ReferenceType.USED_BY_PENDING_TASK.value

# captued in object
memory_entry = build_captured_in_object_entry()
assert memory_entry.reference_type == ReferenceType.CAPTURED_IN_OBJECT
assert memory_entry.reference_type == ReferenceType.CAPTURED_IN_OBJECT.value

# actor handle
memory_entry = build_actor_handle_entry()
assert memory_entry.reference_type == ReferenceType.ACTOR_HANDLE
assert memory_entry.reference_type == ReferenceType.ACTOR_HANDLE.value


def test_memory_table_summary():
Expand Down Expand Up @@ -228,7 +228,7 @@ def test_memory_table_sort_by_reference_type():
]
entries = [build_entry(reference_type=reference_type) for reference_type in unsort]
memory_table = MemoryTable(entries, sort_by_type=SortingType.REFERENCE_TYPE)
sort = sorted(unsort)
sort = sorted([entry.value for entry in unsort])
for reference_type, entry in zip(sort, memory_table.table):
assert reference_type == entry.reference_type

Expand Down
2 changes: 2 additions & 0 deletions doc/source/ray-core/objects/memory-management.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ ObjectRef Reference Counting

Ray implements distributed reference counting so that any ``ObjectRef`` in scope in the cluster is pinned in the object store. This includes local python references, arguments to pending tasks, and IDs serialized inside of other objects.

.. _debug-with-ray-memory:

Debugging using 'ray memory'
~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
5 changes: 3 additions & 2 deletions doc/source/ray-observability/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ Observability
===============

.. toctree::
:maxdepth: 1
:maxdepth: 2
:caption: Observability, Debugging, and Profiling

../ray-core/ray-dashboard.rst
state/state-api.rst
ray-debugging.rst
ray-logging.rst
ray-metrics.rst
ray-tracing.rst
../ray-contribute/debugging.rst
../ray-contribute/profiling.rst
../ray-contribute/profiling.rst
167 changes: 167 additions & 0 deletions doc/source/ray-observability/state/ray-state-api-reference.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
Ray State API
=============

.. _state-api-ref:

.. tip:: APIs are pre-alpha and under active development. APIs are subject to change and not stable across versions.

State CLI
---------

State CLI allows users to access the state of various resources (e.g., actor, task, object).

.. click:: ray.experimental.state.state_cli:task_summary
:prog: ray summary tasks

.. click:: ray.experimental.state.state_cli:actor_summary
:prog: ray summary actors

.. click:: ray.experimental.state.state_cli:object_summary
:prog: ray summary objects

.. click:: ray.experimental.state.state_cli:list
:prog: ray list

.. click:: ray.experimental.state.state_cli:get
:prog: ray get

.. _ray-logs-api-doc:

Log CLI
-------

Log CLI allows users to access the log from the cluster.
Note that only the logs from alive nodes are available through this API.

.. click:: ray.scripts.scripts:ray_logs
:prog: ray logs

.. _state-api-schema:

State APIs Schema
-----------------

.. _state-api-schema-actor:

ActorState
~~~~~~~~~~

.. autoclass:: ray.experimental.state.common.ActorState
:members:

.. _state-api-schema-task:

TaskState
~~~~~~~~~

.. autoclass:: ray.experimental.state.common.TaskState
:members:

.. _state-api-schema-node:

NodeState
~~~~~~~~~

.. autoclass:: ray.experimental.state.common.NodeState
:members:

.. _state-api-schema-pg:

PlacementGroupState
~~~~~~~~~~~~~~~~~~~

.. autoclass:: ray.experimental.state.common.PlacementGroupState
:members:

.. _state-api-schema-worker:

WorkerState
~~~~~~~~~~~

.. autoclass:: ray.experimental.state.common.WorkerState
:members:

.. _state-api-schema-obj:

ObjectState
~~~~~~~~~~~

.. autoclass:: ray.experimental.state.common.ObjectState
:members:

.. _state-api-schema-runtime-env:

RuntimeEnvState
~~~~~~~~~~~~~~~

.. autoclass:: ray.experimental.state.common.RuntimeEnvState
:members:

.. _state-api-schema-job:

JobState
~~~~~~~~

.. autoclass:: ray.experimental.state.common.JobState
:members:

.. _state-api-schema-summary:

StateSummary
~~~~~~~~~~~~

.. autoclass:: ray.experimental.state.common.StateSummary
:members:

.. _state-api-schema-task-summary:

TaskSummary
~~~~~~~~~~~

.. _state-api-schema-task-summaries:

.. autoclass:: ray.experimental.state.common.TaskSummaries
:members:

.. _state-api-schema-task-summary-per-key:

.. autoclass:: ray.experimental.state.common.TaskSummaryPerFuncOrClassName
:members:

.. _state-api-schema-actor-summary:

ActorSummary
~~~~~~~~~~~~

.. _state-api-schema-actor-summaries:

.. autoclass:: ray.experimental.state.common.ActorSummaries
:members:

.. _state-api-schema-actor-summary-per-key:

.. autoclass:: ray.experimental.state.common.ActorSummaryPerClass
:members:

.. _state-api-schema-object-summary:

ObjectSummary
~~~~~~~~~~~~~

.. _state-api-schema-object-summaries:

.. autoclass:: ray.experimental.state.common.ObjectSummaries
:members:

.. _state-api-schema-object-summary-per-key:

.. autoclass:: ray.experimental.state.common.ObjectSummaryPerKey
:members:

State APIs Exceptions
---------------------

.. _state-api-exceptions:

.. autoclass:: ray.experimental.state.exception.RayStateApiException
:members:
Loading

0 comments on commit 39b9c44

Please sign in to comment.