Skip to content

Commit

Permalink
[Data] Add stable diffusion benchmark (ray-project#39524)
Browse files Browse the repository at this point in the history
This PR adds a nightly test that benchmarks stable diffusion batch inference.

---------

Signed-off-by: Balaji Veeramani <[email protected]>
  • Loading branch information
bveeramani authored Sep 27, 2023
1 parent baa861d commit 5dba924
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 3 deletions.
94 changes: 94 additions & 0 deletions release/nightly_tests/dataset/stable_diffusion_benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import argparse
import json
import os
from timeit import default_timer as timer
from typing import Dict

import numpy as np
import torch
from diffusers import StableDiffusionImg2ImgPipeline

import ray

DATA_URI = "s3:https://air-example-data-2/10G-image-data-synthetic-raw-parquet/"
# This isn't the largest batch size that fits in memory, but it achieves virtually 100%
# GPU utilization, and throughput declines at higher batch sizes.
BATCH_SIZE = 32
PROMPT = "ghibli style"


def parse_args():
parser = argparse.ArgumentParser(description="Stable diffusion benchmark")
parser.add_argument("--smoke-test", action="store_true")
return parser.parse_args()


def main(args):
ray.init()
ray.data.DataContext.get_current().execution_options.verbose_progress = True

start_time = timer()

dataset = ray.data.read_parquet(DATA_URI)

if args.smoke_test:
dataset = dataset.limit(1)

actor_pool_size = int(ray.cluster_resources().get("GPU"))
dataset = dataset.map_batches(
GenerateImage,
compute=ray.data.ActorPoolStrategy(size=actor_pool_size),
batch_size=BATCH_SIZE,
num_gpus=1,
)

num_images = 0
for batch in dataset.iter_batches(batch_format="pyarrow", batch_size=None):
num_images += len(batch)

end_time = timer()

total_time = end_time - start_time
throughput = num_images / total_time

# For structured output integration with internal tooling
results = {
"data_uri": DATA_URI,
"perf_metrics": {
"total_time_s": total_time,
"throughput_images_s": throughput,
"num_images": num_images,
},
}

test_output_json = os.environ.get("TEST_OUTPUT_JSON", "release_test_out.json")
with open(test_output_json, "wt") as f:
json.dump(results, f)

print(results)


class GenerateImage:
def __init__(self):
device = "cuda" if torch.cuda.is_available() else "cpu"
self.pipeline = StableDiffusionImg2ImgPipeline.from_pretrained(
"nitrosocke/Ghibli-Diffusion",
torch_dtype=torch.float16,
use_safetensors=True,
requires_safety_checker=False,
safety_checker=None,
).to(device)
self.pipeline.set_progress_bar_config(disable=True)

def __call__(self, batch: Dict[str, np.ndarray]):
output = self.pipeline(
prompt=[PROMPT] * len(batch["image"]),
image=batch["image"],
output_type="np",
)
return {"image": output.images}


if __name__ == "__main__":
args = parse_args()
main(args)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
cloud_id: {{env["ANYSCALE_CLOUD_ID"]}}
region: us-west-2

head_node_type:
name: head_node
instance_type: m5.4xlarge

worker_node_types:
- name: worker_node
instance_type: g4dn.4xlarge
max_workers: 16
min_workers: 16
use_spot: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
cloud_id: {{env["ANYSCALE_CLOUD_ID"]}}
region: us-west1
allowed_azs:
- us-west1-b

head_node_type:
name: head_node
instance_type: n2-standard-16 # m5.4xlarge

worker_node_types:
- name: worker_node
instance_type: n1-standard-16-nvidia-tesla-t4-1 # g4dn.4xlarge
min_workers: 16
max_workers: 16
use_spot: false
1 change: 1 addition & 0 deletions release/ray_release/byod/requirements_ml_byod_3.8.in
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ crc32c
cupy-cuda113
datasets
deepspeed
diffusers
evaluate
fastapi
filelock
Expand Down
25 changes: 22 additions & 3 deletions release/ray_release/byod/requirements_ml_byod_3.8.txt
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,10 @@ decorator==5.1.1 \
deepspeed==0.10.0 \
--hash=sha256:afb06a97fde2a33d0cbd60a8357a70087c037b9f647ca48377728330c35eff3e
# via -r release/ray_release/byod/requirements_ml_byod_3.8.in
diffusers==0.21.3 \
--hash=sha256:aaa9220b3e44bc3d252c75115eafa260f0e3db770572f3db7dda3dfbbe6a4edd \
--hash=sha256:eb5a0d9d98f68b785bf74714b1c8f82a00ab92edbea7b80d1ed829b4051f05f8
# via -r release/ray_release/byod/requirements_ml_byod_3.8.in
dill==0.3.7 \
--hash=sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e \
--hash=sha256:cc1c8b182eb3013e24bd475ff2e9295af86c1a38eb1aff128dac8962a9ce3c03
Expand Down Expand Up @@ -732,6 +736,7 @@ filelock==3.12.2 \
--hash=sha256:cbb791cdea2a72f23da6ac5b5269ab0a0d161e9ef0100e653b69049a7706d1ec
# via
# -r release/ray_release/byod/requirements_ml_byod_3.8.in
# diffusers
# huggingface-hub
# torch
# transformers
Expand Down Expand Up @@ -1019,6 +1024,7 @@ huggingface-hub==0.16.4 \
--hash=sha256:608c7d4f3d368b326d1747f91523dbd1f692871e8e2e7a4750314a2dd8b63e14
# via
# datasets
# diffusers
# evaluate
# transformers
idna==3.4 \
Expand All @@ -1028,6 +1034,10 @@ idna==3.4 \
# anyio
# requests
# yarl
importlib-metadata==6.8.0 \
--hash=sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb \
--hash=sha256:dbace7892d8c0c4ac1ad096662232f831d4e64f4c4545bd53016a3e9d4654743
# via diffusers
importlib-resources==5.12.0 \
--hash=sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6 \
--hash=sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a
Expand Down Expand Up @@ -1467,6 +1477,7 @@ numpy==1.24.3 \
# cupy-cuda113
# datasets
# deepspeed
# diffusers
# evaluate
# matplotlib
# modin
Expand Down Expand Up @@ -1677,6 +1688,7 @@ pillow==9.5.0 \
--hash=sha256:fe7e1c262d3392afcf5071df9afa574544f28eac825284596ac6db56e6d11062 \
--hash=sha256:fed1e1cf6a42577953abbe8e6cf2fe2f566daebde7c34724ec8803c4c0cda579
# via
# diffusers
# matplotlib
# torchvision
pkgutil-resolve-name==1.3.10 \
Expand Down Expand Up @@ -2165,13 +2177,16 @@ regex==2023.6.3 \
--hash=sha256:f415f802fbcafed5dcc694c13b1292f07fe0befdb94aa8a52905bd115ff41e88 \
--hash=sha256:fb5ec16523dc573a4b277663a2b5a364e2099902d3944c9419a40ebd56a118f9 \
--hash=sha256:fea75c3710d4f31389eed3c02f62d0b66a9da282521075061ce875eb5300cf23
# via transformers
# via
# diffusers
# transformers
requests==2.31.0 \
--hash=sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f \
--hash=sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1
# via
# azure-core
# datasets
# diffusers
# evaluate
# fsspec
# gcsfs
Expand Down Expand Up @@ -2356,7 +2371,9 @@ safetensors==0.3.1 \
--hash=sha256:dcf527ecc5f58907fd9031510378105487f318cc91ecdc5aee3c7cc8f46030a8 \
--hash=sha256:ddd0ddd502cf219666e7d30f23f196cb87e829439b52b39f3e7da7918c3416df \
--hash=sha256:e2f083112cf97aa9611e2a05cc170a2795eccec5f6ff837f4565f950670a9d83
# via transformers
# via
# diffusers
# transformers
scikit-learn==1.3.0 \
--hash=sha256:0e8102d5036e28d08ab47166b48c8d5e5810704daecf3a476a4282d562be9a28 \
--hash=sha256:151ac2bf65ccf363664a689b8beafc9e6aae36263db114b4ca06fbbbf827444a \
Expand Down Expand Up @@ -2974,7 +2991,9 @@ yarl==1.9.2 \
zipp==3.15.0 \
--hash=sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b \
--hash=sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556
# via importlib-resources
# via
# importlib-metadata
# importlib-resources
zstd==1.5.5.1 \
--hash=sha256:022f935a8666e08f0fff6204938a84d9fe4fcd8235a205787275933a07a164fb \
--hash=sha256:03444e357b7632c64480a81ce7095242dab9d7f8aed317326563ef6c663263eb \
Expand Down
24 changes: 24 additions & 0 deletions release/release_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5756,6 +5756,30 @@
cluster:
cluster_compute: data_ingest_benchmark_compute_gce.yaml

- name: stable_diffusion_benchmark
group: data-tests
working_dir: nightly_tests/dataset

frequency: nightly
team: data

cluster:
byod:
type: gpu
cluster_compute: stable_diffusion_benchmark_compute.yaml

run:
timeout: 1800
script: python stable_diffusion_benchmark.py

variations:
- __suffix__: aws
- __suffix__: gce
env: gce
frequency: manual
cluster:
cluster_compute: stable_diffusion_benchmark_compute_gce.yaml

- name: streaming_data_ingest_benchmark_1tb
group: data-tests
working_dir: nightly_tests/dataset
Expand Down

0 comments on commit 5dba924

Please sign in to comment.