[civ2][7] move rllib data tests to civ2 (#40162)

As title, move rllib data tests to civ2. Rllib data tests only run on PR. Signed-off-by: can <[email protected]>
ray-project · Oct 12, 2023 · 8e7753d · 8e7753d
1 parent 4f69d7e
commit 8e7753d
Show file tree

Hide file tree

Showing 4 changed files with 96 additions and 74 deletions.
diff --git a/.buildkite/pipeline.ml.yml b/.buildkite/pipeline.ml.yml
@@ -77,51 +77,6 @@
  --test_arg=--framework=torch
  rllib/...
 
-- label: ":brain: RLlib: Learning tests Pytorch (With Ray Data)"
- conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_DATA_AFFECTED"]
- instance_size: large
- commands:
- # skip on master because we are running these test under all RLlib suites anyw/ay
- - if [ "$BUILDKITE_PULL_REQUEST" = "false" ]; then exit 0; fi
- - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
- - ./ci/env/env_info.sh
- - bazel test --config=ci $(./ci/run/bazel_export_options)
- --build_tests_only
- --test_tag_filters=learning_tests_with_ray_data,-multi_gpu,-gpu,-tf_only,-tf2_only
- --test_arg=--framework=torch
- rllib/...
-
-
-- label: ":brain: RLlib: Learning tests TF2 (With Ray Data)"
- conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_DATA_AFFECTED"]
- instance_size: large
- commands:
- # skip on master because we are running these test under all RLlib suites anyw/ay
- - if [ "$BUILDKITE_PULL_REQUEST" = "false" ]; then exit 0; fi
- - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
- - ./ci/env/env_info.sh
- - bazel test --config=ci $(./ci/run/bazel_export_options)
- --build_tests_only
- --test_tag_filters=learning_tests_with_ray_data,-multi_gpu,-gpu,-torch_only
- --test_arg=--framework=tf2
- rllib/...
-
-- label: ":brain: RLlib: Unit-tests (With Ray Data)"
- conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_DATA_AFFECTED"]
- instance_size: large
- commands:
- # skip on master because we are running these test under all RLlib suites anyw/ay
- - if [ "$BUILDKITE_PULL_REQUEST" = "false" ]; then exit 0; fi
- - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
- - ./ci/env/env_info.sh
- - bazel test --config=ci $(./ci/run/bazel_export_options)
- --build_tests_only
- --test_tag_filters=ray_data,-learning_tests_with_ray_data,-multi_gpu,-gpu
- rllib/...
-
 - label: ":brain: RLlib: Algorithm, Model and other tests"
  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
  parallelism: 4
@@ -136,32 +91,6 @@
  --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 
  rllib/...
 
-- label: ":brain: RLlib: RLModule tests"
- conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
- parallelism: 4
- instance_size: large
- commands:
- - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
- - ./ci/env/env_info.sh
- - ./ci/run/run_bazel_test_with_sharding.sh --config=ci $(./ci/run/bazel_export_options)
- --build_tests_only
- --test_tag_filters=rlm
- --test_env=RLLIB_ENABLE_RL_MODULE=1
- --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
-
-- label: ":brain: RLlib: Examples"
- conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
- parallelism: 5
- instance_size: large
- commands:
- - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh
- - ./ci/env/env_info.sh
- - ./ci/run/run_bazel_test_with_sharding.sh --config=ci $(./ci/run/bazel_export_options) --build_tests_only
- --test_tag_filters=examples,-multi_gpu,-gpu --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
- rllib/...
-
 - label: ":brain: RLlib: tests/ dir"
  conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_DIRECTLY_AFFECTED"]
  parallelism: 2

diff --git a/.buildkite/rllib.rayci.yml b/.buildkite/rllib.rayci.yml
@@ -1,6 +1,6 @@
 group: rllib tests
 steps:
- - label: ":brain: rllib: learning tests TF2-static-graph"
+ - label: ":brain: rllib: learning tests tf2-static-graph"
  tags: rllib
  parallelism: 3
  instance_type: large
@@ -13,7 +13,7 @@ steps:
  depends_on: rllibbuild
  job_env: forge
 
- - label: ":brain: rllib: learning tests Pytorch"
+ - label: ":brain: rllib: learning tests pytorch"
  tags: rllib
  parallelism: 3
  instance_type: large
@@ -26,6 +26,19 @@ steps:
  depends_on: rllibbuild
  job_env: forge
 
+ - label: ":brain: rllib: examples"
+ tags: rllib
+ parallelism: 3
+ instance_type: large
+ commands:
+ - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
+ --workers "$${BUILDKITE_PARALLEL_JOB_COUNT}" --worker-id "$${BUILDKITE_PARALLEL_JOB}" --parallelism-per-worker 3
+ --only-tags examples
+ --except-tags multi_gpu,gpu 
+ --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
+ depends_on: rllibbuild
+ job_env: forge
+
  - label: ":brain: rllib: learning tests tf2-eager-tracing"
  tags: rllib
  parallelism: 2
@@ -39,6 +52,47 @@ steps:
  depends_on: rllibbuild
  job_env: forge
 
+ - label: ":brain: rllib: rlmodule tests"
+ tags: rllib_directly
+ instance_type: large
+ commands:
+ - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
+ --parallelism-per-worker 3
+ --only-tags rlm
+ --test-env RLLIB_ENABLE_RL_MODULE=1
+ --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
+ depends_on: rllibbuild
+ job_env: forge
+
+ - label: ":brain: rllib: data tests"
+ if: build.branch != "master"
+ tags: data
+ instance_type: large
+ commands:
+ # learning tests pytorch
+ - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
+ --parallelism-per-worker 3
+ --only-tags learning_tests_with_ray_data
+ --except-tags multi_gpu,gpu,tf_only,tf2_only
+ --test-arg --framework=torch
+
+ # learning tests tF2
+ - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
+ --parallelism-per-worker 3
+ --only-tags learning_tests_with_ray_data
+ --except-tags multi_gpu,gpu,torch_only
+ --test-arg --framework=tf2
+ --skip-ray-installation # reuse the same docker image as the previous run
+
+ # rllib unittests
+ - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib 
+ --parallelism-per-worker 3
+ --only-tags ray_data
+ --except-tags learning_tests_with_ray_data,multi_gpu,gpu
+ --skip-ray-installation # reuse the same docker image as the previous run
+ depends_on: rllibbuild
+ job_env: forge
+
  - label: ":brain: rllib: benchmarks"
  tags: rllib
  instance_type: medium
@@ -53,7 +107,7 @@ steps:
  commands:
  # torch
  - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3
- --only-tags fake_gpus,learning_tests_discrete,crashing_cartpole,stateless_cartpole,learning_tests_continuous
+ --only-tags fake_gpus,learning_tests_discrete,learning_tests_with_ray_data,crashing_cartpole,stateless_cartpole,learning_tests_continuous
  --except-tags tf_only,tf2_only,multi_gpu
  --test-arg --framework=torch
 
@@ -70,6 +124,20 @@ steps:
  --except-tags fake_gpus,torch_only,multi_gpu,no_tf_eager_tracing
  --test-arg --framework=tf2
  --skip-ray-installation # reuse the same docker image as the previous run
+
+ # examples
+ - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3
+ --only-tags examples
+ --except-tags multi_gpu,gpu 
+ --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
+ --skip-ray-installation # reuse the same docker image as the previous run
+
+ # rlmodule tests
+ - bazel run //ci/ray_ci:test_in_docker -- //rllib/... rllib --run-flaky-tests --parallelism-per-worker 3
+ --only-tags rlm
+ --test-env RLLIB_ENABLE_RL_MODULE=1
+ --test-env RAY_USE_MULTIPROCESSING_CPU_COUNT=1
+ --skip-ray-installation # reuse the same docker image as the previous run
  depends_on: rllibbuild
  soft_fail: true
  job_env: forge
diff --git a/ci/pipeline/determine_tests_to_run.py b/ci/pipeline/determine_tests_to_run.py
@@ -294,6 +294,8 @@ def get_commit_range():
  RAY_CI_TUNE_AFFECTED = 1
  RAY_CI_TRAIN_AFFECTED = 1
  RAY_CI_RLLIB_AFFECTED = 1
+ RAY_CI_RLLIB_DIRECTLY_AFFECTED = 1
+ RAY_CI_DATA_AFFECTED = 1
  RAY_CI_SERVE_AFFECTED = 1
  RAY_CI_CORE_CPP_AFFECTED = 1
  RAY_CI_CPP_AFFECTED = 1

diff --git a/ci/ray_ci/rllib.tests.yml b/ci/ray_ci/rllib.tests.yml
@@ -7,3 +7,26 @@ flaky_tests:
  - //rllib:learning_tests_cartpole_ddppo
  - //rllib:learning_tests_two_step_game_qmix
  - //rllib:learning_tests_pendulum_cql
+ # algorithm, model and other tests
+ - //rllib:test_bc 
+ - //rllib:policy/tests/test_policy
+ - //rllib:env/tests/test_local_inference_cartpole
+ - //rllib:evaluation/tests/test_envs_that_crash
+ - //rllib:test_apex_ddpg
+ - //rllib:test_algorithm_export_checkpoint
+ - //rllib:test_dataset_reader
+ # examples
+ - //rllib:examples/nested_action_spaces_ppo_torch
+ - //rllib:examples/rl_module/mobilenet_rlm
+ - //rllib:examples/action_masking_torch
+ - //rllib:examples/action_masking_tf2
+ - //rllib:examples/learner/train_w_bc_finetune_w_ppo
+ # memory leak tf2-eager-tracing
+ - //rllib:test_memory_leak_ppo
+ - //rllib:test_memory_leak_sac
+ - //rllib:test_memory_leak_impala
+ - //rllib:test_memory_leak_a3c
+ - //rllib:test_memory_leak_dqn
+ - //rllib:test_memory_leak_ddpg
+ # tests/ dir
+ - //rllib:tests/test_io