[RLlib] RLlib contrib (ray-project#35141)

Signed-off-by: Avnish <[email protected]>
pcmoritz · May 12, 2023 · 3185543 · 3185543
1 parent f936826
commit 3185543
Show file tree

Hide file tree

Showing 26 changed files with 2,528 additions and 0 deletions.
diff --git a/.buildkite/pipeline.ml.yml b/.buildkite/pipeline.ml.yml
@@ -528,3 +528,30 @@
  - ./ci/env/env_info.sh
  - python ./ci/env/setup_credentials.py wandb comet_ml
  - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=needs_credentials,-timeseries_libs,-gpu,-py37,-post_wheel_build doc/...
+
+
+- label: ":exploding_death_star: RLlib Contrib: A3C Tests"
+ conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_CONTRIB_AFFECTED"]
+ commands:
+ - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+ - (cd rllib_contrib/a3c && pip install -r requirements.txt && pip install -e .)
+ - ./ci/env/env_info.sh
+ - pytest rllib_contrib/a3c/tests/test_a3c.py
+
+- label: ":exploding_death_star: RLlib Contrib: MAML Tests"
+ conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_CONTRIB_AFFECTED"]
+ commands:
+ - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
+
+ # Install mujoco necessary for the testing environments
+ - sudo apt install libosmesa6-dev libgl1-mesa-glx libglfw3 patchelf -y
+ - wget https://mujoco.org/download/mujoco210-linux-x86_64.tar.gz
+ - mkdir /root/.mujoco
+ - mv mujoco210-linux-x86_64.tar.gz /root/.mujoco/.
+ - (cd /root/.mujoco && tar -xf /root/.mujoco/mujoco210-linux-x86_64.tar.gz)
+ - echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mujoco210/bin' >> /root/.bashrc
+ - source /root/.bashrc
+
+ - (cd rllib_contrib/maml && pip install -r requirements.txt && pip install -e .)
+ - ./ci/env/env_info.sh
+ - pytest rllib_contrib/maml/tests/test_maml.py
diff --git a/ci/pipeline/determine_tests_to_run.py b/ci/pipeline/determine_tests_to_run.py
@@ -88,6 +88,8 @@ def get_commit_range():
  # Whether all RLlib tests should be run.
  # Set to 1 only when a source file in `ray/rllib` has been changed.
  RAY_CI_RLLIB_DIRECTLY_AFFECTED = 0
+ # Whether to run all RLlib contrib tests
+ RAY_CI_RLLIB_CONTRIB_AFFECTED = 0
  RAY_CI_SERVE_AFFECTED = 0
  RAY_CI_CORE_CPP_AFFECTED = 0
  RAY_CI_CPP_AFFECTED = 0
@@ -179,6 +181,9 @@ def get_commit_range():
  RAY_CI_RLLIB_DIRECTLY_AFFECTED = 1
  RAY_CI_LINUX_WHEELS_AFFECTED = 1
  RAY_CI_MACOS_WHEELS_AFFECTED = 1
+ elif re.match("rllib_contrib/", changed_file):
+ if not changed_file.endswith(".md"):
+ RAY_CI_RLLIB_CONTRIB_AFFECTED = 1
  elif changed_file.startswith("python/ray/serve"):
  RAY_CI_DOC_AFFECTED = 1
  RAY_CI_SERVE_AFFECTED = 1
@@ -307,6 +312,8 @@ def get_commit_range():
  RAY_CI_TRAIN_AFFECTED = 1
  RAY_CI_RLLIB_AFFECTED = 1
  RAY_CI_RLLIB_DIRECTLY_AFFECTED = 1
+ # the rllib contrib ci should only be run on pull requests
+ RAY_CI_RLLIB_CONTRIB_AFFECTED = 0
  RAY_CI_SERVE_AFFECTED = 1
  RAY_CI_CPP_AFFECTED = 1
  RAY_CI_CORE_CPP_AFFECTED = 1
@@ -331,6 +338,7 @@ def get_commit_range():
  "RAY_CI_TRAIN_AFFECTED={}".format(RAY_CI_TRAIN_AFFECTED),
  "RAY_CI_RLLIB_AFFECTED={}".format(RAY_CI_RLLIB_AFFECTED),
  "RAY_CI_RLLIB_DIRECTLY_AFFECTED={}".format(RAY_CI_RLLIB_DIRECTLY_AFFECTED),
+ "RAY_CI_RLLIB_CONTRIB_AFFECTED={}".format(RAY_CI_RLLIB_CONTRIB_AFFECTED),
  "RAY_CI_SERVE_AFFECTED={}".format(RAY_CI_SERVE_AFFECTED),
  "RAY_CI_DASHBOARD_AFFECTED={}".format(RAY_CI_DASHBOARD_AFFECTED),
  "RAY_CI_DOC_AFFECTED={}".format(RAY_CI_DOC_AFFECTED),

diff --git a/rllib_contrib/README.md b/rllib_contrib/README.md
@@ -0,0 +1,30 @@
+# RLlib-Contrib
+
+RLlib-Contrib is a directory for more experimental community contributions to RLlib including contributed algorithms. **This directory has a more relaxed bar for contributions than Ray or RLlib.** If you are interested in contributing to RLlib-Contrib, please see the [contributing guide](CONTRIBUTING.md).
+
+## Getting Started and Installation
+Navigate to the algorithm sub-directory you are interested in and see the README.md for installation instructions and example scripts to help you get started!
+
+## Maintenance
+
+**Any issues that are filed in `rllib_contrib` will be solved best-effort by the community and there is no expectation of maintenance by the RLlib team.**
+
+**The API surface between algorithms in `rllib_contrib` and current versions of Ray / RLlib is not guaranteed. This means that any APIs that are used in rllib_contrib could potentially become modified/removed in newer version of Ray/RLlib.**
+
+We will generally accept contributions to this directory that meet any of the following criteria:
+
+1. Updating dependencies.
+2. Submitting community contributed algorithms that have been tested and are ready for use.
+3. Enabling algorithms to be run in different environments (ex. adding support for a new type of gymnasium environment).
+4. Updating algorithms for use with the newer RLlib APIs.
+5. General bug fixes.
+
+We will not accept contributions that generally add a significant maintenance burden. In this case users should instead make their own repo with their contribution, using the same guidelines as this directory, and the RLlib team can help to market/promote it in the Ray docs.
+
+## Getting Involved
+
+| Platform | Purpose | Support Level |
+| --- | --- | --- |
+| [Discuss Forum](https://discuss.ray.io) | For discussions about development and questions about usage. | Community |
+| [GitHub Issues](https://github.com/ray-project/rllib-contrib-maml/issues) | For reporting bugs and filing feature requests. | Community |
+| [Slack](https://forms.gle/9TSdDYUgxYs8SA9e8) | For collaborating with other Ray users. | Community |
diff --git a/rllib_contrib/a3c/README.rst b/rllib_contrib/a3c/README.rst
@@ -0,0 +1,21 @@
+A3C (Asynchronous Advantage Actor-Critic)
+-----------------------------------------
+
+`A3C <https://arxiv.org/abs/1602.01783>` is the asynchronous version of A2C, where gradients are computed on the workers directly after trajectory rollouts, and only then shipped to a central learner to accumulate these gradients on the central model. After the central model update, parameters are broadcast back to all workers. Similar to A2C, A3C scales to 16-32+ worker processes depending on the environment.
+
+
+Installation
+------------
+
+.. code-block:: bash
+
+ conda create -n rllib-a3c python=3.10
+ conda activate rllib-a3c
+ pip install -r requirements.txt
+ pip install -e '.[development]'
+
+
+Usage
+-----
+
+.. literalinclude:: examples/a3c_cartpole_v1.py
diff --git a/rllib_contrib/a3c/examples/a3c_cartpole_v1.py b/rllib_contrib/a3c/examples/a3c_cartpole_v1.py
@@ -0,0 +1,29 @@
+from rllib_a3c.a3c import A3C, A3CConfig
+
+import ray
+from ray import air, tune
+
+if __name__ == "__main__":
+ ray.init()
+
+ config = (
+ A3CConfig()
+ .rollouts(num_rollout_workers=1)
+ .framework("torch")
+ .environment("CartPole-v1")
+ .training(
+ gamma=0.95,
+ )
+ )
+
+ num_iterations = 100
+
+ tuner = tune.Tuner(
+ A3C,
+ param_space=config.to_dict(),
+ run_config=air.RunConfig(
+ stop={"episode_reward_mean": 150, "timesteps_total": 200000},
+ failure_config=air.FailureConfig(fail_fast="raise"),
+ ),
+ )
+ results = tuner.fit()
diff --git a/rllib_contrib/a3c/pyproject.toml b/rllib_contrib/a3c/pyproject.toml
@@ -0,0 +1,18 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.packages.find]
+where = ["src"]
+
+[project]
+name = "rllib-a3c"
+authors = [{name = "Anyscale Inc."}]
+version = "0.1.0"
+description = ""
+readme = "README.md"
+requires-python = ">=3.7, <3.11"
+dependencies = ["gym[accept-rom-license]", "gymnasium[mujoco]==0.26.3", "higher", "ray[rllib]==2.3.1"]
+
+[project.optional-dependencies]
+development = ["pytest>=7.2.2", "pre-commit==2.21.0", "tensorflow==2.11.0", "torch==1.12.0"]
diff --git a/rllib_contrib/a3c/requirements.txt b/rllib_contrib/a3c/requirements.txt
@@ -0,0 +1,2 @@
+tensorflow==2.11.0
+torch==1.12.0
diff --git a/rllib_contrib/a3c/src/rllib_a3c/a3c/__init__.py b/rllib_contrib/a3c/src/rllib_a3c/a3c/__init__.py
@@ -0,0 +1,7 @@
+from rllib_a3c.a3c.a3c import A3C, A3CConfig
+
+from ray.tune.registry import register_trainable
+
+__all__ = ["A3CConfig", "A3C"]
+
+register_trainable("rllib-contrib-a3c", A3C)