diff --git a/doc/source/serve/doc_code/http_guide.py b/doc/source/serve/doc_code/http_guide.py new file mode 100644 index 0000000000000..27883ef1f8892 --- /dev/null +++ b/doc/source/serve/doc_code/http_guide.py @@ -0,0 +1,112 @@ +# flake8: noqa + +# __begin_starlette__ +import starlette.requests +import requests +from ray import serve + + +@serve.deployment +class Counter: + def __call__(self, request: starlette.requests.Request): + return request.query_params + + +serve.run(Counter.bind()) +resp = requests.get("http://localhost:8000?a=b&c=d") +assert resp.json() == {"a": "b", "c": "d"} +# __end_starlette__ + +# __begin_dagdriver__ +import numpy as np +import requests +from ray import serve +from ray.serve.drivers import DAGDriver +from ray.serve.http_adapters import json_to_ndarray + + +@serve.deployment +class Model: + def __call__(self, arr: np.ndarray): + return arr.sum() + + +serve.run(DAGDriver.bind(Model.bind(), http_adapter=json_to_ndarray)) +resp = requests.post("http://localhost:8000", json={"array": [[1, 2], [2, 3]]}) +assert resp.json() == 8 + +# __end_dagdriver__ + +# __begin_fastapi__ +import ray +import requests +from fastapi import FastAPI +from ray import serve + +app = FastAPI() + + +@serve.deployment(route_prefix="/hello") +@serve.ingress(app) +class MyFastAPIDeployment: + @app.get("/") + def root(self): + return "Hello, world!" + + +serve.run(MyFastAPIDeployment.bind()) +resp = requests.get("http://localhost:8000/hello") +assert resp.json() == "Hello, world!" +# __end_fastapi__ + + +# __begin_fastapi_multi_routes__ +import ray +import requests +from fastapi import FastAPI +from ray import serve + +app = FastAPI() + + +@serve.deployment(route_prefix="/hello") +@serve.ingress(app) +class MyFastAPIDeployment: + @app.get("/") + def root(self): + return "Hello, world!" + + @app.post("/{subpath}") + def root(self, subpath: str): + return f"Hello from {subpath}!" + + +serve.run(MyFastAPIDeployment.bind()) +resp = requests.post("http://localhost:8000/hello/Serve") +assert resp.json() == "Hello from Serve!" +# __end_fastapi_multi_routes__ + +# __begin_byo_fastapi__ +import ray +import requests +from fastapi import FastAPI +from ray import serve + +app = FastAPI() + + +@app.get("/") +def f(): + return "Hello from the root!" + + +@serve.deployment(route_prefix="/") +@serve.ingress(app) +class FastAPIWrapper: + pass + + +serve.run(FastAPIWrapper.bind()) +resp = requests.get("http://localhost:8000/") +assert resp.json() == "Hello from the root!" +# __end_byo_fastapi__ diff --git a/doc/source/serve/http-guide.md b/doc/source/serve/http-guide.md index 641dad37d43ae..d0b6888e4ab4a 100644 --- a/doc/source/serve/http-guide.md +++ b/doc/source/serve/http-guide.md @@ -1,124 +1,82 @@ -# HTTP with Serve - - +# HTTP API This section should help you understand how to: - - send HTTP requests to Serve deployments - use Ray Serve to integrate with FastAPI - use customized HTTP Adapters +- choose which feature to use for your use case +## Choosing the right HTTP feature -:::{note} -HTTP Proxy HA is enabled by using [REST API](serve-in-production-deploying) or [Kubernetes operator](deploying-serve-on-kubernetes) to start the Ray Serve -::: +Serve offers a layered approach to expose your model with the right HTTP API. -(serve-http)= +Considering your use case, you can choose the right level of abstraction: +- If you are comfortable working with the raw request object, use [`starlette.request.Requests` API](serve-http). +- If you want a fully fledged API server with validation and doc generation, use the [FastAPI integration](serve-fastapi-http). +- If you just want a pre-defined HTTP schema, use the [`DAGDriver` with `http_adapter`](serve-http-adapters). -## Calling Deployments via HTTP -When you deploy a Serve application, the ingress deployment (the one passed to `serve.run`) will be exposed over HTTP. If you want to route to another deployment, you can do so using the [ServeHandle API](serve-model-composition). +(serve-http)= +## Calling Deployments via HTTP +When you deploy a Serve application, the [ingress deployment](serve-key-concepts-ingress-deployment) (the one passed to `serve.run`) will be exposed over HTTP. -```python -@serve.deployment -class Counter: - def __call__(self, request): - pass +```{literalinclude} ../serve/doc_code/http_guide.py +:start-after: __begin_starlette__ +:end-before: __end_starlette__ +:language: python ``` -Any request to the Serve HTTP server at `/` is routed to the deployment's `__call__` method with a [Starlette Request object](https://www.starlette.io/requests/) as the sole argument. The `__call__` method can return any JSON-serializable object or a [Starlette Response object](https://www.starlette.io/responses/) (e.g., to return a custom status code). +Requests to the Serve HTTP server at `/` are routed to the deployment's `__call__` method with a [Starlette Request object](https://www.starlette.io/requests/) as the sole argument. The `__call__` method can return any JSON-serializable object or a [Starlette Response object](https://www.starlette.io/responses/) (e.g., to return a custom status code or custom headers). -Below, we discuss some advanced features for customizing Ray Serve's HTTP functionality. +Often for ML models, you just need the API to accept a `numpy` array. You can use Serve's `DAGDriver` to simply the request parsing. -(serve-fastapi-http)= +```{literalinclude} ../serve/doc_code/http_guide.py +:start-after: __begin_dagdriver__ +:end-before: __end_dagdriver__ +:language: python +``` +```{note} +Serve provides a library of HTTP adapters to help you avoid boilerplate code. The [later section](serve-http-adapters) dives deeper into how these works. +``` + +(serve-fastapi-http)= ## FastAPI HTTP Deployments If you want to define more complex HTTP handling logic, Serve integrates with [FastAPI](https://fastapi.tiangolo.com/). This allows you to define a Serve deployment using the {mod}`@serve.ingress ` decorator that wraps a FastAPI app with its full range of features. The most basic example of this is shown below, but for more details on all that FastAPI has to offer such as variable routes, automatic type validation, dependency injection (e.g., for database connections), and more, please check out [their documentation](https://fastapi.tiangolo.com/). -```python -import ray - -from fastapi import FastAPI -from ray import serve - -app = FastAPI() -ray.init(address="auto", namespace="summarizer") - -@serve.deployment(route_prefix="/hello") -@serve.ingress(app) -class MyFastAPIDeployment: - @app.get("/") - def root(self): - return "Hello, world!" - -serve.run(MyFastAPIDeployment.bind()) +```{literalinclude} ../serve/doc_code/http_guide.py +:start-after: __begin_fastapi__ +:end-before: __end_fastapi__ +:language: python ``` Now if you send a request to `/hello`, this will be routed to the `root` method of our deployment. We can also easily leverage FastAPI to define multiple routes with different HTTP methods: -```python -import ray - -from fastapi import FastAPI -from ray import serve - -app = FastAPI() -ray.init(address="auto", namespace="summarizer") - -@serve.deployment(route_prefix="/hello") -@serve.ingress(app) -class MyFastAPIDeployment: - @app.get("/") - def root(self): - return "Hello, world!" - - @app.post("/{subpath}") - def root(self, subpath: str): - return f"Hello from {subpath}!" - -serve.run(MyFastAPIDeployment.bind()) +```{literalinclude} ../serve/doc_code/http_guide.py +:start-after: __begin_fastapi_multi_routes__ +:end-before: __end_fastapi_multi_routes__ +:language: python ``` You can also pass in an existing FastAPI app to a deployment to serve it as-is: -```python -import ray - -from fastapi import FastAPI -from ray import serve - -app = FastAPI() -ray.init(address="auto", namespace="summarizer") - -@app.get("/") -def f(): - return "Hello from the root!" - -# ... add more routes, routers, etc. to `app` ... - -@serve.deployment(route_prefix="/") -@serve.ingress(app) -class FastAPIWrapper: - pass - -serve.run(FastAPIWrapper.bind()) +```{literalinclude} ../serve/doc_code/http_guide.py +:start-after: __begin_byo_fastapi__ +:end-before: __end_byo_fastapi__ +:language: python ``` This is useful for scaling out an existing FastAPI app with no modifications necessary. -Existing middlewares, automatic OpenAPI documentation generation, and other advanced FastAPI features should work as-is. - -To try it out, save a code snippet in a local python file (e.g. `main.py`) and in the same directory, run the following commands to start a local Ray cluster on your machine. +Existing middlewares, **automatic OpenAPI documentation generation**, and other advanced FastAPI features should work as-is. -```bash -ray start --head -python main.py +```{note} +Serve currently does not support WebSockets. If you have a use case that requires it, please [let us know](https://github.com/ray-project/ray/issues/new/choose)! ``` (serve-http-adapters)= ## HTTP Adapters - HTTP adapters are functions that convert raw HTTP requests to basic Python types that you know and recognize. For example, here is an adapter that extracts the JSON content from a request: @@ -141,7 +99,6 @@ def parse_query_args(field_a: int, field_b: str): You can specify different type signatures to facilitate the extraction of HTTP fields, including - [query parameters](https://fastapi.tiangolo.com/tutorial/query-params/), - [body parameters](https://fastapi.tiangolo.com/tutorial/body/), -and - [many other data types](https://fastapi.tiangolo.com/tutorial/extra-data-types/). For more details, you can take a look at the [FastAPI documentation](https://fastapi.tiangolo.com/). @@ -212,14 +169,6 @@ async def endpoint(np_array = Depends(json_to_ndarray)): ... ``` -It has the following schema for input: - -(serve-ndarray-schema)= - -```{eval-rst} -.. autopydantic_model:: ray.serve.http_adapters.NdArray - -``` ### Pydantic models as adapters @@ -245,6 +194,8 @@ DAGDriver.bind(other_node, http_adapter=User) Here is a list of adapters; please feel free to [contribute more](https://github.com/ray-project/ray/issues/new/choose)! +(serve-ndarray-schema)= + ```{eval-rst} .. automodule:: ray.serve.http_adapters :members: json_to_ndarray, image_to_ndarray, starlette_request, json_request, pandas_read_json, json_to_multi_ndarray diff --git a/doc/source/serve/key-concepts.md b/doc/source/serve/key-concepts.md index 2906660366b00..d9f72e6d00cfe 100644 --- a/doc/source/serve/key-concepts.md +++ b/doc/source/serve/key-concepts.md @@ -50,7 +50,7 @@ class Driver: refa = await self._model_a_handle.remote(request) refb = await self._model_b_handle.remote(request) return (await refa) + (await refb) - + model_a = ModelA.bind() model_b = ModelB.bind() @@ -62,6 +62,8 @@ driver = Driver.bind(model_a, model_b) serve.run(driver) ``` +(serve-key-concepts-ingress-deployment)= + ## Ingress Deployment (HTTP handling) A Serve application can consist of multiple deployments that can be combined to perform model composition or complex business logic. @@ -79,7 +81,7 @@ Here's an example: class MostBasicIngress: async def __call__(self, request: starlette.requests.Request) -> str: name = await request.json()["name"] - return f"Hello {name}" + return f"Hello {name}" ``` After binding the deployment and running `serve.run()`, it is now exposed by the HTTP server and handles requests using the specified class. @@ -103,7 +105,7 @@ app = FastAPI() class MostBasicIngress: @app.get("/{name}") async def say_hi(self, name: str) -> str: - return f"Hello {name}" + return f"Hello {name}" ``` (serve-key-concepts-deployment-graph)= diff --git a/python/ray/serve/http_adapters.py b/python/ray/serve/http_adapters.py index 01b43125a7f76..063e934d68168 100644 --- a/python/ray/serve/http_adapters.py +++ b/python/ray/serve/http_adapters.py @@ -43,7 +43,10 @@ class NdArray(BaseModel): @PublicAPI(stability="beta") def json_to_ndarray(payload: NdArray) -> np.ndarray: - """Accepts an NdArray JSON from an HTTP body and converts it to a numpy array.""" + """Accepts an NdArray JSON from an HTTP body and converts it to a numpy array. + + .. autopydantic_model:: ray.serve.http_adapters.NdArray + """ arr = np.array(payload.array) if payload.shape: arr = arr.reshape(*payload.shape)