Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
Signed-off-by: Balaji Veeramani <[email protected]>
  • Loading branch information
bveeramani committed Mar 10, 2023
2 parents 46f43d9 + 94eb7e5 commit a644307
Show file tree
Hide file tree
Showing 252 changed files with 13,841 additions and 6,658 deletions.
2 changes: 1 addition & 1 deletion .buildkite/pipeline.build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@
# Horovod needs to be installed separately (needed for API ref imports)
- ./ci/env/install-horovod.sh
# See https://stackoverflow.com/questions/63383400/error-cannot-uninstall-ruamel-yaml-while-creating-docker-image-for-azure-ml-a
- pip install mosaicml==0.10.1 --ignore-installed
- pip install mosaicml==0.12.1 --ignore-installed
- ./ci/ci.sh build

- label: ":octopus: Tune multinode tests"
Expand Down
2 changes: 2 additions & 0 deletions .buildkite/pipeline.ml.yml
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@
conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_PYTHON_AFFECTED", "RAY_CI_DATA_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- ./ci/env/install-java.sh
- DATA_PROCESSING_TESTING=1 ARROW_VERSION=9.* ARROW_MONGO_VERSION=0.5.* ./ci/env/install-dependencies.sh
- ./ci/env/env_info.sh
- sudo apt-get purge -y mongodb*
Expand All @@ -302,6 +303,7 @@
instance_size: medium
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- ./ci/env/install-java.sh
- DATA_PROCESSING_TESTING=1 ARROW_VERSION=7.* ./ci/env/install-dependencies.sh
- ./ci/env/env_info.sh
- bazel test --config=ci $(./ci/run/bazel_export_options) --action_env=RAY_DATASET_USE_STREAMING_EXECUTOR=1 --build_tests_only --test_tag_filters=-dataset_integration python/ray/data/...
Expand Down
1 change: 0 additions & 1 deletion ci/env/install-dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,6 @@ install_pip_packages() {

# Additional Train test dependencies.
if [ "${TRAIN_TESTING-}" = 1 ] || [ "${DOC_TESTING-}" = 1 ]; then
rm -rf "${SITE_PACKAGES}"/ruamel* # https://stackoverflow.com/questions/63383400/error-cannot-uninstall-ruamel-yaml-while-creating-docker-image-for-azure-ml-a
pip install -U -c "${WORKSPACE_DIR}"/python/requirements.txt -r "${WORKSPACE_DIR}"/python/requirements/ml/requirements_train.txt
fi

Expand Down
17 changes: 8 additions & 9 deletions cpp/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ cc_binary(
linkstatic = 1,
visibility = ["//visibility:public"],
deps = [
":ray_api",
":ray_api_lib",
":symbols/ray_api_exported_symbols_linux.lds",
],
)

cc_library(
name = "ray_api",
name = "ray_api_lib",
srcs = glob([
"src/ray/api.cc",
"src/ray/api/*.cc",
Expand Down Expand Up @@ -95,7 +95,7 @@ cc_binary(
"@bazel_tools//src/conditions:windows": [
# TODO(SongGuyang): Change to use dynamic library
# "ray_cpp_lib" when we make it work on Windows.
"ray_api",
"ray_api_lib",
],
"//conditions:default": [
"ray_cpp_lib",
Expand All @@ -111,7 +111,6 @@ genrule(
name = "ray_cpp_pkg",
srcs = [
"default_worker",
"ray_api",
"libray_api.so",
],
outs = ["ray_cpp_pkg.out"],
Expand Down Expand Up @@ -175,7 +174,7 @@ cc_test(
linkstatic = True,
tags = ["team:core"],
deps = [
"ray_api",
"ray_api_lib",
"@com_google_googletest//:gtest_main",
],
)
Expand Down Expand Up @@ -205,7 +204,7 @@ cc_test(
linkstatic = True,
tags = ["team:core"],
deps = [
"ray_api",
"ray_api_lib",
"@com_google_googletest//:gtest_main",
],
)
Expand All @@ -228,7 +227,7 @@ cc_test(
linkstatic = True,
tags = ["team:core"],
deps = [
"ray_api",
"ray_api_lib",
"@com_google_googletest//:gtest_main",
],
)
Expand Down Expand Up @@ -293,7 +292,7 @@ cc_test(
linkstatic = True,
tags = ["team:core"],
deps = [
"ray_api",
"ray_api_lib",
],
)

Expand Down Expand Up @@ -327,7 +326,7 @@ cc_binary(
linkstatic = True,
tags = ["team:core"],
deps = [
":ray_api",
":ray_api_lib",
],
)

Expand Down
2 changes: 1 addition & 1 deletion dashboard/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ async def _check_parent():
error = True
except Exception as e:
msg += f"Failed to read Raylet logs at {log_path}: {e}!"
logger.exception()
logger.exception(msg)
error = True
if error:
logger.error(msg)
Expand Down
2 changes: 1 addition & 1 deletion dashboard/state_aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ def _to_task_state(task_attempt: dict) -> dict:
],
),
(task_attempt, ["task_id", "attempt_number", "job_id"]),
(state_updates, ["node_id", "worker_id"]),
(state_updates, ["node_id", "worker_id", "error_type"]),
]
for src, keys in mappings:
for key in keys:
Expand Down
7 changes: 7 additions & 0 deletions doc/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ py_test(
tags = ["exclusive", "team:ml"]
)

py_test(
name = "tensor",
size = "small",
srcs = ["source/data/doc_code/tensor.py"],
tags = ["exclusive", "team:ml"]
)

py_test(
name = "big_data_ingestion",
size = "small",
Expand Down
4 changes: 4 additions & 0 deletions doc/source/_toc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ parts:
- file: ray-air/examples/index
sections:
- file: ray-air/examples/torch_image_example
- file: ray-air/examples/torch_detection
- file: ray-air/examples/convert_existing_pytorch_code_to_ray_air
- file: ray-air/examples/convert_existing_tf_code_to_ray_air
- file: ray-air/examples/tfx_tabular_train_to_serve
Expand All @@ -74,7 +75,10 @@ parts:
- file: ray-air/examples/batch_forecasting
- file: ray-air/examples/pytorch_resnet_batch_prediction
- file: ray-air/examples/stablediffusion_batch_prediction
- file: ray-air/examples/gptj_deepspeed_fine_tuning
- file: ray-air/examples/gptj_batch_prediction
- file: ray-air/examples/gptj_serving
- file: ray-air/examples/dreambooth_finetuning
- file: ray-air/api/api
- file: ray-air/benchmarks

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ metadata:
labels:
app: ray-cluster-head
spec:
clusterIP: None
ports:
- name: client
protocol: TCP
Expand Down Expand Up @@ -198,7 +199,7 @@ spec:
imagePullPolicy: Always
command: ["/bin/bash", "-c", "--"]
args:
- "ray start --num-cpus=$MY_CPU_REQUEST --address=$SERVICE_RAY_CLUSTER_SERVICE_HOST:$SERVICE_RAY_CLUSTER_SERVICE_PORT_GCS_SERVER --object-manager-port=8076 --node-manager-port=8077 --dashboard-agent-grpc-port=8078 --dashboard-agent-listen-port=52365 --block"
- "ray start --num-cpus=$MY_CPU_REQUEST --address=service-ray-cluster:6380 --object-manager-port=8076 --node-manager-port=8077 --dashboard-agent-grpc-port=8078 --dashboard-agent-listen-port=52365 --block"

# This volume allocates shared memory for Ray to use for its plasma
# object store. If you do not provide this, Ray will fall back to
Expand Down
5 changes: 2 additions & 3 deletions doc/source/data/creating-datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,8 @@ Supported File Formats
Read Parquet files into a tabular ``Dataset``. The Parquet data will be read into
`Arrow Table <https://arrow.apache.org/docs/python/generated/pyarrow.Table.html>`__
blocks. Although this simple example demonstrates reading a single file, note that
Datasets can also read directories of Parquet files, with one tabular block created
per file. For Parquet in particular, we also support reading partitioned Parquet
datasets with partition column values pulled from the file paths.
Datasets can also read directories of Parquet files. We also support reading partitioned
Parquet datasets with partition column values pulled from the file paths.

.. literalinclude:: ./doc_code/creating_datasets.py
:language: python
Expand Down
4 changes: 3 additions & 1 deletion doc/source/data/dask-on-ray.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ workload. Using the Dask-on-Ray scheduler, the entire Dask ecosystem can be exec

* - Ray Version
- Dask Version
* - ``2.3.0``
- ``2022.10.1``
* - ``2.2.0``
- ``2022.2.0``
- ``2022.10.1``
* - ``2.1.0``
- ``2022.2.0``
* - ``2.0.0``
Expand Down
33 changes: 20 additions & 13 deletions doc/source/data/doc_code/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,31 +489,38 @@ def add_one(batch: Dict[str, Any]) -> Dict[str, Any]:
# fmt: off
# __create_variable_shaped_tensors_begin___
# Create a Dataset of variable-shaped tensors.
arr = np.array([np.ones((2, 2)), np.ones((3, 3))], dtype=object)
ds = ray.data.from_numpy([arr, arr])
ragged_array = np.array([np.ones((2, 2)), np.ones((3, 3))], dtype=object)
df = pd.DataFrame({"feature": ragged_array, "label": [1, 1]})
ds = ray.data.from_pandas([df, df])
# -> Dataset(num_blocks=2, num_rows=4,
# schema={__value__: ArrowVariableShapedTensorType(dtype=double)})
# schema={feature: TensorDtype(shape=(None, None), dtype=float64),
# label: int64})

ds.take(2)
# -> [array([[1., 1.],
# [1., 1.]]),
# array([[1., 1., 1.],
# [1., 1., 1.],
# [1., 1., 1.]])]
# -> [{'feature': array([[1., 1.],
# [1., 1.]]),
# 'label': 1},
# {'feature': array([[1., 1., 1.],
# [1., 1., 1.],
# [1., 1., 1.]]),
# 'label': 1}]
# __create_variable_shaped_tensors_end__

# fmt: off
# __tf_variable_shaped_tensors_begin___
# Convert Ray Dataset to a TensorFlow Dataset.
tf_ds = ds.to_tf(
batch_size=2,
output_signature=tf.RaggedTensorSpec(shape=(None, None, None), dtype=tf.float64),
feature_columns="feature",
label_columns="label"
)
# Iterate through the tf.RaggedTensors.
for ragged_tensor in tf_ds:
print(ragged_tensor)
# -> <tf.RaggedTensor [[[1.0, 1.0], [1.0, 1.0]],
# [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]]>
# <tf.RaggedTensor [[[1.0, 1.0], [1.0, 1.0]],
# [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]]>
# -> (<tf.RaggedTensor [[[1.0, 1.0], [1.0, 1.0]],
# [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]]>,
# <tf.Tensor: shape=(2,), dtype=int64, numpy=array([1, 1])>)
# (<tf.RaggedTensor [[[1.0, 1.0], [1.0, 1.0]],
# [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]]>,
# <tf.Tensor: shape=(2,), dtype=int64, numpy=array([1, 1])>)
# __tf_variable_shaped_tensors_end__
12 changes: 11 additions & 1 deletion doc/source/data/getting-started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,17 @@ transform datasets. Ray executes transformations in parallel for performance at
.. testoutput::

MapBatches(transform_batch)
+- Dataset(num_blocks=1, num_rows=150, schema={sepal length (cm): double, sepal width (cm): double, petal length (cm): double, petal width (cm): double, target: int64})
+- Dataset(
num_blocks=...,
num_rows=150,
schema={
sepal length (cm): double,
sepal width (cm): double,
petal length (cm): double,
petal width (cm): double,
target: int64
}
)

To learn more about transforming datasets, read
:ref:`Transforming datasets <transforming_datasets>`.
Expand Down
18 changes: 16 additions & 2 deletions doc/source/data/glossary.rst
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,11 @@ Ray Datasets Glossary
>>> import numpy as np
>>> import ray
>>> ray.data.from_numpy(np.zeros((100, 32, 32, 3)))
Dataset(num_blocks=1, num_rows=100, schema={__value__: ArrowTensorType(shape=(32, 32, 3), dtype=double)})
Dataset(
num_blocks=1,
num_rows=100,
schema={__value__: ArrowTensorType(shape=(32, 32, 3), dtype=double)}
)

Tabular Dataset
A Dataset that represents columnar data.
Expand All @@ -125,7 +129,17 @@ Ray Datasets Glossary

>>> import ray
>>> ray.data.read_csv("s3:https://anonymous@air-example-data/iris.csv")
Dataset(num_blocks=1, num_rows=150, schema={sepal length (cm): double, sepal width (cm): double, petal length (cm): double, petal width (cm): double, target: int64})
Dataset(
num_blocks=1,
num_rows=150,
schema={
sepal length (cm): double,
sepal width (cm): double,
petal length (cm): double,
petal width (cm): double,
target: int64
}
)

User-defined function (UDF)
A callable that transforms batches or :term:`records <Record>` of data. UDFs let you arbitrarily transform datasets.
Expand Down
Binary file added doc/source/images/detection.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 5 additions & 0 deletions doc/source/ray-air/examples/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,11 @@ py_test_run_all_notebooks(
"feast_example.ipynb", # REGRESSION
"upload_to_comet_ml.ipynb", # Needs credentials
"upload_to_wandb.ipynb", # Needs credentials
"torch_detection.ipynb", # Requires GPUs
"gptj_batch_prediction.ipynb", # Requires GPUs
"gptj_serving.ipynb", # Requires GPUs
"stablediffusion_batch_prediction.ipynb", # Requires GPUs
"gptj_deepspeed_fine_tuning.ipynb", # Requires release test
],
data = ["//doc/source/ray-air/examples:air_examples"],
tags = ["exclusive", "team:ml", "ray_air"],
Expand Down Expand Up @@ -79,7 +82,9 @@ py_test_run_all_notebooks(
include = [
"huggingface_text_classification.ipynb",
"pytorch_resnet_batch_prediction.ipynb",
"torch_detection.ipynb",
"gptj_batch_prediction.ipynb",
"gptj_serving.ipynb",
"stablediffusion_batch_prediction.ipynb",
],
exclude = [],
Expand Down
Loading

0 comments on commit a644307

Please sign in to comment.