Merge remote-tracking branch 'upstream/master'

Signed-off-by: Balaji Veeramani <[email protected]>
ray-project · Mar 10, 2023 · a644307 · a644307
2 parents 46f43d9 + 94eb7e5
commit a644307
Show file tree

Hide file tree

Showing 252 changed files with 13,841 additions and 6,658 deletions.
diff --git a/.buildkite/pipeline.build.yml b/.buildkite/pipeline.build.yml
@@ -577,7 +577,7 @@
  # Horovod needs to be installed separately (needed for API ref imports)
  - ./ci/env/install-horovod.sh
  # See https://stackoverflow.com/questions/63383400/error-cannot-uninstall-ruamel-yaml-while-creating-docker-image-for-azure-ml-a
- - pip install mosaicml==0.10.1 --ignore-installed
+ - pip install mosaicml==0.12.1 --ignore-installed
  - ./ci/ci.sh build
 
 - label: ":octopus: Tune multinode tests"

diff --git a/.buildkite/pipeline.ml.yml b/.buildkite/pipeline.ml.yml
@@ -287,6 +287,7 @@
  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_PYTHON_AFFECTED", "RAY_CI_DATA_AFFECTED"]
  commands:
  - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+ - ./ci/env/install-java.sh
  - DATA_PROCESSING_TESTING=1 ARROW_VERSION=9.* ARROW_MONGO_VERSION=0.5.* ./ci/env/install-dependencies.sh
  - ./ci/env/env_info.sh
  - sudo apt-get purge -y mongodb*
@@ -302,6 +303,7 @@
  instance_size: medium
  commands:
  - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+ - ./ci/env/install-java.sh
  - DATA_PROCESSING_TESTING=1 ARROW_VERSION=7.* ./ci/env/install-dependencies.sh
  - ./ci/env/env_info.sh
  - bazel test --config=ci $(./ci/run/bazel_export_options) --action_env=RAY_DATASET_USE_STREAMING_EXECUTOR=1 --build_tests_only --test_tag_filters=-dataset_integration python/ray/data/...

diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh
@@ -367,7 +367,6 @@ install_pip_packages() {
 
  # Additional Train test dependencies.
  if [ "${TRAIN_TESTING-}" = 1 ] || [ "${DOC_TESTING-}" = 1 ]; then
- rm -rf "${SITE_PACKAGES}"/ruamel* # https://stackoverflow.com/questions/63383400/error-cannot-uninstall-ruamel-yaml-while-creating-docker-image-for-azure-ml-a
  pip install -U -c "${WORKSPACE_DIR}"/python/requirements.txt -r "${WORKSPACE_DIR}"/python/requirements/ml/requirements_train.txt
  fi
 

diff --git a/cpp/BUILD.bazel b/cpp/BUILD.bazel
@@ -21,13 +21,13 @@ cc_binary(
  linkstatic = 1,
  visibility = ["//visibility:public"],
  deps = [
- ":ray_api",
+ ":ray_api_lib",
  ":symbols/ray_api_exported_symbols_linux.lds",
  ],
 )
 
 cc_library(
- name = "ray_api",
+ name = "ray_api_lib",
  srcs = glob([
  "src/ray/api.cc",
  "src/ray/api/*.cc",
@@ -95,7 +95,7 @@ cc_binary(
  "@bazel_tools//src/conditions:windows": [
  # TODO(SongGuyang): Change to use dynamic library
  # "ray_cpp_lib" when we make it work on Windows.
- "ray_api",
+ "ray_api_lib",
  ],
  "//conditions:default": [
  "ray_cpp_lib",
@@ -111,7 +111,6 @@ genrule(
  name = "ray_cpp_pkg",
  srcs = [
  "default_worker",
- "ray_api",
  "libray_api.so",
  ],
  outs = ["ray_cpp_pkg.out"],
@@ -175,7 +174,7 @@ cc_test(
  linkstatic = True,
  tags = ["team:core"],
  deps = [
- "ray_api",
+ "ray_api_lib",
  "@com_google_googletest//:gtest_main",
  ],
 )
@@ -205,7 +204,7 @@ cc_test(
  linkstatic = True,
  tags = ["team:core"],
  deps = [
- "ray_api",
+ "ray_api_lib",
  "@com_google_googletest//:gtest_main",
  ],
 )
@@ -228,7 +227,7 @@ cc_test(
  linkstatic = True,
  tags = ["team:core"],
  deps = [
- "ray_api",
+ "ray_api_lib",
  "@com_google_googletest//:gtest_main",
  ],
 )
@@ -293,7 +292,7 @@ cc_test(
  linkstatic = True,
  tags = ["team:core"],
  deps = [
- "ray_api",
+ "ray_api_lib",
  ],
 )
 
@@ -327,7 +326,7 @@ cc_binary(
  linkstatic = True,
  tags = ["team:core"],
  deps = [
- ":ray_api",
+ ":ray_api_lib",
  ],
 )
 

diff --git a/dashboard/agent.py b/dashboard/agent.py
@@ -243,7 +243,7 @@ async def _check_parent():
  error = True
  except Exception as e:
  msg += f"Failed to read Raylet logs at {log_path}: {e}!"
- logger.exception()
+ logger.exception(msg)
  error = True
  if error:
  logger.error(msg)

diff --git a/dashboard/state_aggregator.py b/dashboard/state_aggregator.py
@@ -421,7 +421,7 @@ def _to_task_state(task_attempt: dict) -> dict:
  ],
  ),
  (task_attempt, ["task_id", "attempt_number", "job_id"]),
- (state_updates, ["node_id", "worker_id"]),
+ (state_updates, ["node_id", "worker_id", "error_type"]),
  ]
  for src, keys in mappings:
  for key in keys:

diff --git a/doc/BUILD b/doc/BUILD
@@ -27,6 +27,13 @@ py_test(
  tags = ["exclusive", "team:ml"]
 )
 
+py_test(
+ name = "tensor",
+ size = "small",
+ srcs = ["source/data/doc_code/tensor.py"],
+ tags = ["exclusive", "team:ml"]
+)
+
 py_test(
  name = "big_data_ingestion",
  size = "small",

diff --git a/doc/source/_toc.yml b/doc/source/_toc.yml
@@ -54,6 +54,7 @@ parts:
  - file: ray-air/examples/index
  sections:
  - file: ray-air/examples/torch_image_example
+ - file: ray-air/examples/torch_detection
  - file: ray-air/examples/convert_existing_pytorch_code_to_ray_air
  - file: ray-air/examples/convert_existing_tf_code_to_ray_air
  - file: ray-air/examples/tfx_tabular_train_to_serve
@@ -74,7 +75,10 @@ parts:
  - file: ray-air/examples/batch_forecasting
  - file: ray-air/examples/pytorch_resnet_batch_prediction
  - file: ray-air/examples/stablediffusion_batch_prediction
+ - file: ray-air/examples/gptj_deepspeed_fine_tuning
  - file: ray-air/examples/gptj_batch_prediction
+ - file: ray-air/examples/gptj_serving
+ - file: ray-air/examples/dreambooth_finetuning
  - file: ray-air/api/api
  - file: ray-air/benchmarks
 

diff --git a/doc/source/cluster/kubernetes/configs/static-ray-cluster.with-fault-tolerance.yaml b/doc/source/cluster/kubernetes/configs/static-ray-cluster.with-fault-tolerance.yaml
@@ -73,6 +73,7 @@ metadata:
  labels:
  app: ray-cluster-head
 spec:
+ clusterIP: None
  ports:
  - name: client
  protocol: TCP
@@ -198,7 +199,7 @@ spec:
  imagePullPolicy: Always
  command: ["/bin/bash", "-c", "--"]
  args:
- - "ray start --num-cpus=$MY_CPU_REQUEST --address=$SERVICE_RAY_CLUSTER_SERVICE_HOST:$SERVICE_RAY_CLUSTER_SERVICE_PORT_GCS_SERVER --object-manager-port=8076 --node-manager-port=8077 --dashboard-agent-grpc-port=8078 --dashboard-agent-listen-port=52365 --block"
+ - "ray start --num-cpus=$MY_CPU_REQUEST --address=service-ray-cluster:6380 --object-manager-port=8076 --node-manager-port=8077 --dashboard-agent-grpc-port=8078 --dashboard-agent-listen-port=52365 --block"
 
  # This volume allocates shared memory for Ray to use for its plasma
  # object store. If you do not provide this, Ray will fall back to

diff --git a/doc/source/data/creating-datasets.rst b/doc/source/data/creating-datasets.rst
@@ -77,9 +77,8 @@ Supported File Formats
  Read Parquet files into a tabular ``Dataset``. The Parquet data will be read into
  `Arrow Table <https://arrow.apache.org/docs/python/generated/pyarrow.Table.html>`__
  blocks. Although this simple example demonstrates reading a single file, note that
- Datasets can also read directories of Parquet files, with one tabular block created
- per file. For Parquet in particular, we also support reading partitioned Parquet
- datasets with partition column values pulled from the file paths.
+ Datasets can also read directories of Parquet files. We also support reading partitioned
+ Parquet datasets with partition column values pulled from the file paths.
 
  .. literalinclude:: ./doc_code/creating_datasets.py
  :language: python

diff --git a/doc/source/data/dask-on-ray.rst b/doc/source/data/dask-on-ray.rst
@@ -31,8 +31,10 @@ workload. Using the Dask-on-Ray scheduler, the entire Dask ecosystem can be exec
 
  * - Ray Version
  - Dask Version
+ * - ``2.3.0``
+ - ``2022.10.1``
  * - ``2.2.0``
- - ``2022.2.0``
+ - ``2022.10.1``
  * - ``2.1.0``
  - ``2022.2.0``
  * - ``2.0.0``

diff --git a/doc/source/data/doc_code/tensor.py b/doc/source/data/doc_code/tensor.py
@@ -489,31 +489,38 @@ def add_one(batch: Dict[str, Any]) -> Dict[str, Any]:
 # fmt: off
 # __create_variable_shaped_tensors_begin___
 # Create a Dataset of variable-shaped tensors.
-arr = np.array([np.ones((2, 2)), np.ones((3, 3))], dtype=object)
-ds = ray.data.from_numpy([arr, arr])
+ragged_array = np.array([np.ones((2, 2)), np.ones((3, 3))], dtype=object)
+df = pd.DataFrame({"feature": ragged_array, "label": [1, 1]})
+ds = ray.data.from_pandas([df, df])
 # -> Dataset(num_blocks=2, num_rows=4,
-# schema={__value__: ArrowVariableShapedTensorType(dtype=double)})
+# schema={feature: TensorDtype(shape=(None, None), dtype=float64), 
+# label: int64})
 
 ds.take(2)
-# -> [array([[1., 1.],
-# [1., 1.]]),
-# array([[1., 1., 1.],
-# [1., 1., 1.],
-# [1., 1., 1.]])]
+# -> [{'feature': array([[1., 1.],
+# [1., 1.]]), 
+# 'label': 1}, 
+# {'feature': array([[1., 1., 1.],
+# [1., 1., 1.],
+# [1., 1., 1.]]), 
+# 'label': 1}]
 # __create_variable_shaped_tensors_end__
 
 # fmt: off
 # __tf_variable_shaped_tensors_begin___
 # Convert Ray Dataset to a TensorFlow Dataset.
 tf_ds = ds.to_tf(
  batch_size=2,
- output_signature=tf.RaggedTensorSpec(shape=(None, None, None), dtype=tf.float64),
+ feature_columns="feature",
+ label_columns="label"
 )
 # Iterate through the tf.RaggedTensors.
 for ragged_tensor in tf_ds:
  print(ragged_tensor)
-# -> <tf.RaggedTensor [[[1.0, 1.0], [1.0, 1.0]],
-# [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]]>
-# <tf.RaggedTensor [[[1.0, 1.0], [1.0, 1.0]],
-# [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]]>
+# -> (<tf.RaggedTensor [[[1.0, 1.0], [1.0, 1.0]],
+# [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]]>,
+# <tf.Tensor: shape=(2,), dtype=int64, numpy=array([1, 1])>)
+# (<tf.RaggedTensor [[[1.0, 1.0], [1.0, 1.0]],
+# [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]]>,
+# <tf.Tensor: shape=(2,), dtype=int64, numpy=array([1, 1])>)
 # __tf_variable_shaped_tensors_end__
diff --git a/doc/source/data/getting-started.rst b/doc/source/data/getting-started.rst
@@ -62,7 +62,17 @@ transform datasets. Ray executes transformations in parallel for performance at
 .. testoutput::
 
  MapBatches(transform_batch)
- +- Dataset(num_blocks=1, num_rows=150, schema={sepal length (cm): double, sepal width (cm): double, petal length (cm): double, petal width (cm): double, target: int64})
+ +- Dataset(
+ num_blocks=...,
+ num_rows=150,
+ schema={
+ sepal length (cm): double,
+ sepal width (cm): double,
+ petal length (cm): double,
+ petal width (cm): double,
+ target: int64
+ }
+ )
 
 To learn more about transforming datasets, read
 :ref:`Transforming datasets <transforming_datasets>`.

diff --git a/doc/source/data/glossary.rst b/doc/source/data/glossary.rst
@@ -116,7 +116,11 @@ Ray Datasets Glossary
  >>> import numpy as np
  >>> import ray
  >>> ray.data.from_numpy(np.zeros((100, 32, 32, 3)))
- Dataset(num_blocks=1, num_rows=100, schema={__value__: ArrowTensorType(shape=(32, 32, 3), dtype=double)})
+ Dataset(
+ num_blocks=1,
+ num_rows=100,
+ schema={__value__: ArrowTensorType(shape=(32, 32, 3), dtype=double)}
+ )
 
  Tabular Dataset
  A Dataset that represents columnar data.
@@ -125,7 +129,17 @@ Ray Datasets Glossary
 
  >>> import ray
  >>> ray.data.read_csv("s3:https://anonymous@air-example-data/iris.csv")
- Dataset(num_blocks=1, num_rows=150, schema={sepal length (cm): double, sepal width (cm): double, petal length (cm): double, petal width (cm): double, target: int64})
+ Dataset(
+ num_blocks=1,
+ num_rows=150,
+ schema={
+ sepal length (cm): double,
+ sepal width (cm): double,
+ petal length (cm): double,
+ petal width (cm): double,
+ target: int64
+ }
+ )
 
  User-defined function (UDF)
  A callable that transforms batches or :term:`records <Record>` of data. UDFs let you arbitrarily transform datasets.

diff --git a/doc/source/images/detection.jpeg b/doc/source/images/detection.jpeg
diff --git a/doc/source/ray-air/examples/BUILD b/doc/source/ray-air/examples/BUILD
@@ -46,8 +46,11 @@ py_test_run_all_notebooks(
  "feast_example.ipynb", # REGRESSION
  "upload_to_comet_ml.ipynb", # Needs credentials
  "upload_to_wandb.ipynb", # Needs credentials
+ "torch_detection.ipynb", # Requires GPUs
  "gptj_batch_prediction.ipynb", # Requires GPUs
+ "gptj_serving.ipynb", # Requires GPUs
  "stablediffusion_batch_prediction.ipynb", # Requires GPUs
+ "gptj_deepspeed_fine_tuning.ipynb", # Requires release test
  ],
  data = ["//doc/source/ray-air/examples:air_examples"],
  tags = ["exclusive", "team:ml", "ray_air"],
@@ -79,7 +82,9 @@ py_test_run_all_notebooks(
  include = [
  "huggingface_text_classification.ipynb",
  "pytorch_resnet_batch_prediction.ipynb",
+ "torch_detection.ipynb",
  "gptj_batch_prediction.ipynb",
+ "gptj_serving.ipynb",
  "stablediffusion_batch_prediction.ipynb",
  ],
  exclude = [],