Add more config information to the stable diffusion template and upda…

…te to 2.9 (ray-project#41987) Signed-off-by: akshay-anyscale <[email protected]>
sihanwang41 · Jan 2, 2024 · 923ea82 · 923ea82
1 parent 3a15b56
commit 923ea82
Show file tree

Hide file tree

Showing 5 changed files with 20 additions and 15 deletions.
diff --git a/doc/source/templates/03_serving_stable_diffusion/README.md b/doc/source/templates/03_serving_stable_diffusion/README.md
@@ -4,8 +4,8 @@
 | ---------------------- | ----------- |
 | Summary | This app provides users a one click production option for serving a pre-trained Stable Diffusion model from HuggingFace. It leverages [Ray Serve](https://docs.ray.io/en/latest/serve/index.html) to deploy locally and built in IDE integration on an Anyscale Workspace to iterate and add additional logic to the application. You can then use a simple CLI to deploy to production with [Anyscale Services](https://docs.anyscale.com/productionize/services/get-started). |
 | Time to Run | Around 2 minutes to setup the models and generate your first image(s). Less than 10 seconds for every subsequent round of image generation (depending on the image size). |
-| Minimum Compute Requirements | At least 1 GPU node. The default is 4 nodes, each with 1 NVIDIA T4 GPU. |
-| Cluster Environment | This template uses a docker image built on top of the latest Anyscale-provided Ray image using Python 3.9: [`anyscale/ray:latest-py39-cu118`](https://docs.anyscale.com/reference/base-images/overview). See the appendix below for more details. |
+| Minimum Compute Requirements | At least 1 GPU node with 1 NVIDIA A10 GPU. |
+| Cluster Environment | This template uses a docker image built on top of the latest Anyscale-provided Ray 2.9 image using Python 3.9: [`anyscale/ray:latest-py39-cu118`](https://docs.anyscale.com/reference/base-images/overview). See the appendix below for more details. |
 
 ## Get Started
 

diff --git a/doc/source/templates/03_serving_stable_diffusion/app.py b/doc/source/templates/03_serving_stable_diffusion/app.py
@@ -5,15 +5,17 @@
 from fastapi.responses import Response
 import torch
 from diffusers import EulerDiscreteScheduler, StableDiffusionPipeline
+import logging
 
 app = FastAPI()
+logger = logging.getLogger("ray.serve")
 
 
 @serve.deployment(num_replicas=1)
 @serve.ingress(app)
 class APIIngress:
  def __init__(self, diffusion_model_handle) -> None:
- self.handle = diffusion_model_handle.options(use_new_handle_api=True)
+ self.handle = diffusion_model_handle
 
  @app.get(
  "/imagine",
@@ -24,15 +26,19 @@ async def generate(self, prompt: str, img_size: int = 512):
  assert len(prompt), "prompt parameter cannot be empty"
 
  image = await self.handle.generate.remote(prompt, img_size=img_size)
-
  file_stream = BytesIO()
  image.save(file_stream, "PNG")
  return Response(content=file_stream.getvalue(), media_type="image/png")
 
 
 @serve.deployment(
- ray_actor_options={"num_gpus": 1},
- autoscaling_config={"min_replicas": 1, "max_replicas": 3},
+ ray_actor_options={"num_gpus": 1, "num_cpus": 1},
+ max_concurrent_queries=2,
+ autoscaling_config={
+ "min_replicas": 1,
+ "max_replicas": 3,
+ "target_num_ongoing_requests_per_replica": 1,
+ },
 )
 class StableDiffusionV2:
  def __init__(self):
@@ -48,9 +54,8 @@ def __init__(self):
 
  def generate(self, prompt: str, img_size: int = 512):
  assert len(prompt), "prompt parameter cannot be empty"
-
+ logger.info("Prompt: [%s]", prompt)
  image = self.pipe(prompt, height=img_size, width=img_size).images[0]
-
  return image
 
 

diff --git a/doc/source/templates/03_serving_stable_diffusion/cluster_env.yaml b/doc/source/templates/03_serving_stable_diffusion/cluster_env.yaml
@@ -1,6 +1,6 @@
 # See https://hub.docker.com/r/anyscale/ray for full list of
 # available Ray, Python, and CUDA versions.
-base_image: anyscale/ray:latest-py39-cu118
+base_image: anyscale/ray:2.9.0-py39-cu118
 
 env_vars: {}
 
@@ -19,4 +19,4 @@ python:
 
  conda_packages: []
 
-post_build_cmds: []
+post_build_cmds: []
diff --git a/doc/source/templates/03_serving_stable_diffusion/start.ipynb b/doc/source/templates/03_serving_stable_diffusion/start.ipynb
@@ -179,7 +179,7 @@
  "@serve.ingress(app)\n",
  "class APIIngress:\n",
  " def __init__(self, diffusion_model_handle) -> None:\n",
- " self.handle = diffusion_model_handle.options(use_new_handle_api=True)\n",
+ " self.handle = diffusion_model_handle\n",
  "\n",
  " @app.get(\n",
  " \"/imagine\",\n",

diff --git a/doc/source/templates/configs/compute/gpu/aws.yaml b/doc/source/templates/configs/compute/gpu/aws.yaml
@@ -1,11 +1,11 @@
-# 4 g4dn.2xlarge nodes --> 32 CPUs, 4 GPUs
+# 3 g5.4xlarge nodes --> 48 CPUs, 3 GPUs
 head_node_type:
  name: head_node_type
- instance_type: g4dn.2xlarge
+ instance_type: g5.4xlarge
 
 worker_node_types:
 - name: gpu_worker
- instance_type: g4dn.2xlarge
- min_workers: 3
+ instance_type: g5.4xlarge
+ min_workers: 0
  max_workers: 3
  use_spot: false