From ca90c6348304720f5f22ff890fdee434f4ab53c2 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 16 Nov 2021 08:12:08 -0800 Subject: [PATCH] [Serve] Add serve failure test to CI (#20392) --- .buildkite/pipeline.yml | 3 +++ release/BUILD | 21 +++++++++++++++++++ .../workloads/serve_failure.py | 13 +++++++++--- 3 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 release/BUILD diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 03c2d655dce12..83a43459ee5c3 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -222,6 +222,9 @@ - bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-post_wheel_build python/ray/serve/... + - bazel test --config=ci $(./scripts/bazel_export_options) + --test_tag_filters=team:serve + release/... - label: ":python: Minimal install" conditions: ["RAY_CI_PYTHON_AFFECTED"] diff --git a/release/BUILD b/release/BUILD new file mode 100644 index 0000000000000..3552c7714c019 --- /dev/null +++ b/release/BUILD @@ -0,0 +1,21 @@ +load("@rules_python//python:defs.bzl", "py_test") + +test_srcs = glob(["**/*.py"]) + +py_test( + name = "serve_failure_smoke_test", + size = "medium", + srcs = test_srcs, + env = { + "IS_SMOKE_TEST": "1", + }, + main = "serve_failure.py", + tags = [ + "exclusive", + "team:serve", + ], + deps = [ + "//:ray_lib", + "//python/ray/serve:serve_lib", + ], +) diff --git a/release/long_running_tests/workloads/serve_failure.py b/release/long_running_tests/workloads/serve_failure.py index eadcb3586fdab..8b99145249146 100644 --- a/release/long_running_tests/workloads/serve_failure.py +++ b/release/long_running_tests/workloads/serve_failure.py @@ -24,6 +24,8 @@ # RandomTest setup constants CPUS_PER_NODE = 10 +IS_SMOKE_TEST = "IS_SMOKE_TEST" in os.environ + def update_progress(result): """ @@ -54,7 +56,8 @@ def update_progress(result): namespace="serve_failure_test", address=cluster.address, dashboard_host="0.0.0.0", - log_to_driver=True) + log_to_driver=True, +) serve.start(detached=True) @@ -124,7 +127,7 @@ def run(self): start_time = time.time() previous_time = start_time while True: - for _ in range(100): + for _ in range(20): actions, weights = zip(*self.weighted_actions) action_chosen = random.choices(actions, weights=weights)[0] print(f"Executing {action_chosen}") @@ -146,7 +149,11 @@ def run(self): previous_time = new_time iteration += 1 + if IS_SMOKE_TEST: + break + +tester = RandomTest(max_deployments=NUM_NODES * CPUS_PER_NODE) random_killer = RandomKiller.remote() random_killer.run.remote() -RandomTest(max_deployments=NUM_NODES * CPUS_PER_NODE).run() +tester.run()