Skip to content

Commit

Permalink
Autogenerate OpenAPI specs file (#2047)
Browse files Browse the repository at this point in the history
* Add docstrings to the REST API endpoint to have them included in the OpenAPI specs

* Attempt at make GitHub CI generate the OpenAPI specs

* Missing __init__.py was breaking rest_api import

* Add comment on dummy pipeline

* Create separate workflow file for the OpenAPI specs generation

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Markus Paff <[email protected]>
  • Loading branch information
3 people committed Jan 27, 2022
1 parent 3c02aa5 commit 7137710
Show file tree
Hide file tree
Showing 12 changed files with 158 additions and 30 deletions.
47 changes: 47 additions & 0 deletions .github/workflows/generate_openapi_specs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: Generate OpenAPI Specs

on:
workflow_dispatch:
push:
paths:
- "rest_api/**"

jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
with:
persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal token
fetch-depth: 0 # otherwise, you will failed to push refs to dest repo

- name: Set up Python 3.8.10
uses: actions/setup-python@v2
with:
python-version: 3.8.10

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install .[rest]
# Generates the docstrings and tutorials so that we have the latest for the deployment
- name: Generate Docstrings and Tutorials
run: |
cd docs/_src/api/openapi/
python generate_openapi_specs.py
cd ../../../../
git status
- name: Commit files
run: |
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add .
git commit -m "Update OpenAPI Specs" -a || echo "No changes to commit"
- name: Push changes
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: ${{ github.ref }}
2 changes: 1 addition & 1 deletion .github/workflows/update_docsstrings_tutorials.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
cd docs/_src/api/api/
./generate_docstrings.sh
cd ../../tutorials/tutorials/
python3 convert_ipynb.py
python3 convert_ipynb.py
cd ../../../../
git status
Expand Down
21 changes: 21 additions & 0 deletions docs/_src/api/openapi/generate_openapi_specs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import json
from pathlib import Path
import os
import sys
sys.path.append("../../../../")

rest_path = Path("../../../../rest_api").absolute()
pipeline_path = str(rest_path/"pipeline"/"pipeline_empty.yaml")
app_path = str(rest_path/"application.py")
print(f"Loading OpenAPI specs from {app_path} with pipeline at {pipeline_path}")

os.environ["PIPELINE_YAML_PATH"] = pipeline_path

from rest_api.application import get_openapi_specs

# Generate the openapi specs
specs = get_openapi_specs()

# Dump the specs into a JSON file
with open(f"openapi.json", "w") as f:
json.dump(specs, f)
1 change: 1 addition & 0 deletions docs/_src/api/openapi/openapi.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"openapi": "3.0.2", "info": {"title": "Haystack-API", "version": "1.0.0"}, "paths": {"/initialized": {"get": {"tags": ["search"], "summary": "Check Status", "description": "This endpoint can be used during startup to understand if the \nserver is ready to take any requests, or is still loading.\n\nThe recommended approach is to call this endpoint with a short timeout,\nlike 500ms, and in case of no reply, consider the server busy.", "operationId": "check_status_initialized_get", "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}}}}, "/hs_version": {"get": {"tags": ["search"], "summary": "Haystack Version", "description": "Get the running Haystack version.", "operationId": "haystack_version_hs_version_get", "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}}}}, "/query": {"post": {"tags": ["search"], "summary": "Query", "description": "This endpoint receives the question as a string and allows the requester to set \nadditional parameters that will be passed on to the Haystack pipeline.", "operationId": "query_query_post", "requestBody": {"content": {"application/json": {"schema": {"$ref": "#/components/schemas/QueryRequest"}}}, "required": true}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/QueryResponse"}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/feedback": {"get": {"tags": ["feedback"], "summary": "Get Feedback", "description": "This endpoint allows the API user to retrieve all the\nfeedback that has been sumbitted through the \n`POST /feedback` endpoint", "operationId": "get_feedback_feedback_get", "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}}}, "post": {"tags": ["feedback"], "summary": "Post Feedback", "description": "This endpoint allows the API user to submit feedback on \nan answer for a particular query. For example, the user \ncan send feedback on whether the answer was correct and \nwhether the right snippet was identified as the answer. \nInformation submitted through this endpoint is used to \ntrain the underlying QA model.", "operationId": "post_feedback_feedback_post", "requestBody": {"content": {"application/json": {"schema": {"$ref": "#/components/schemas/LabelSerialized"}}}, "required": true}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/eval-feedback": {"post": {"tags": ["feedback"], "summary": "Get Feedback Metrics", "description": "This endpoint returns basic accuracy metrics based on user feedback, \ne.g., the ratio of correct answers or correctly identified documents. \nYou can filter the output by document or label.\n\nExample:\n`curl --location --request POST 'http:https://127.0.0.1:8000/eval-doc-qa-feedback' --header 'Content-Type: application/json' --data-raw '{ \"filters\": {\"document_id\": [\"XRR3xnEBCYVTkbTystOB\"]} }'`", "operationId": "get_feedback_metrics_eval_feedback_post", "requestBody": {"content": {"application/json": {"schema": {"$ref": "#/components/schemas/FilterRequest"}}}}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/export-feedback": {"get": {"tags": ["feedback"], "summary": "Export Feedback", "description": "This endpoint returns JSON output in the SQuAD format for question/answer pairs \nthat were marked as \"relevant\" by user feedback through the `POST /feedback` endpoint.\n\nThe context_size param can be used to limit response size for large documents.", "operationId": "export_feedback_export_feedback_get", "parameters": [{"required": false, "schema": {"title": "Context Size", "type": "integer", "default": 100000}, "name": "context_size", "in": "query"}, {"required": false, "schema": {"title": "Full Document Context", "type": "boolean", "default": true}, "name": "full_document_context", "in": "query"}, {"required": false, "schema": {"title": "Only Positive Labels", "type": "boolean", "default": false}, "name": "only_positive_labels", "in": "query"}], "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/file-upload": {"post": {"tags": ["file-upload"], "summary": "Upload File", "description": "You can use this endpoint to upload a file for indexing \n(see [http:https://localhost:3000/guides/rest-api#indexing-documents-in-the-haystack-rest-api-document-store]).", "operationId": "upload_file_file_upload_post", "requestBody": {"content": {"multipart/form-data": {"schema": {"$ref": "#/components/schemas/Body_upload_file_file_upload_post"}}}, "required": true}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/documents/get_by_filters": {"post": {"tags": ["document"], "summary": "Get Documents", "description": "This endpoint allows you to retrieve documents contained in your document store. \nYou can filter the documents to delete by metadata (like the document's name),\nor provide an empty JSON object to clear the document store.\n\nExample of filters: \n`'{\"filters\": {{\"name\": [\"some\", \"more\"], \"category\": [\"only_one\"]}}'`\n\nTo get all documents you should provide an empty dict, like:\n`'{\"filters\": {}}'`", "operationId": "get_documents_documents_get_by_filters_post", "requestBody": {"content": {"application/json": {"schema": {"$ref": "#/components/schemas/FilterRequest"}}}, "required": true}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {"title": "Response Get Documents Documents Get By Filters Post", "type": "array", "items": {"$ref": "#/components/schemas/DocumentSerialized"}}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}, "/documents/delete_by_filters": {"post": {"tags": ["document"], "summary": "Delete Documents", "description": "This endpoint allows you to delete documents contained in your document store. \nYou can filter the documents to delete by metadata (like the document's name), \nor provide an empty JSON object to clear the document store.\n\nExample of filters: \n`'{\"filters\": {{\"name\": [\"some\", \"more\"], \"category\": [\"only_one\"]}}'`\n\nTo get all documents you should provide an empty dict, like:\n`'{\"filters\": {}}'`", "operationId": "delete_documents_documents_delete_by_filters_post", "requestBody": {"content": {"application/json": {"schema": {"$ref": "#/components/schemas/FilterRequest"}}}, "required": true}, "responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {"title": "Response Delete Documents Documents Delete By Filters Post", "type": "boolean"}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}}}, "components": {"schemas": {"AnswerSerialized": {"title": "AnswerSerialized", "required": ["answer"], "type": "object", "properties": {"answer": {"title": "Answer", "type": "string"}, "type": {"title": "Type", "enum": ["generative", "extractive", "other"], "type": "string", "default": "extractive"}, "score": {"title": "Score", "type": "number"}, "context": {"title": "Context", "type": "string"}, "offsets_in_document": {"title": "Offsets In Document", "type": "array", "items": {"$ref": "#/components/schemas/Span"}}, "offsets_in_context": {"title": "Offsets In Context", "type": "array", "items": {"$ref": "#/components/schemas/Span"}}, "document_id": {"title": "Document Id", "type": "string"}, "meta": {"title": "Meta", "type": "object"}}}, "Body_upload_file_file_upload_post": {"title": "Body_upload_file_file_upload_post", "required": ["files"], "type": "object", "properties": {"files": {"title": "Files", "type": "array", "items": {"type": "string", "format": "binary"}}, "meta": {"title": "Meta", "type": "string", "default": "null"}, "remove_numeric_tables": {"title": "Remove Numeric Tables"}, "valid_languages": {"title": "Valid Languages"}, "clean_whitespace": {"title": "Clean Whitespace"}, "clean_empty_lines": {"title": "Clean Empty Lines"}, "clean_header_footer": {"title": "Clean Header Footer"}, "split_by": {"title": "Split By"}, "split_length": {"title": "Split Length"}, "split_overlap": {"title": "Split Overlap"}, "split_respect_sentence_boundary": {"title": "Split Respect Sentence Boundary"}}}, "DocumentSerialized": {"title": "DocumentSerialized", "required": ["content", "content_type", "id", "meta"], "type": "object", "properties": {"content": {"title": "Content", "type": "string"}, "content_type": {"title": "Content Type", "enum": ["text", "table", "image"], "type": "string"}, "id": {"title": "Id", "type": "string"}, "meta": {"title": "Meta", "type": "object"}, "score": {"title": "Score", "type": "number"}, "embedding": {"title": "Embedding", "type": "array", "items": {"type": "number"}}, "id_hash_keys": {"title": "Id Hash Keys", "type": "array", "items": {"type": "string"}}}}, "FilterRequest": {"title": "FilterRequest", "type": "object", "properties": {"filters": {"title": "Filters", "type": "object", "additionalProperties": {"anyOf": [{"type": "string"}, {"type": "array", "items": {"type": "string"}}]}}}}, "HTTPValidationError": {"title": "HTTPValidationError", "type": "object", "properties": {"detail": {"title": "Detail", "type": "array", "items": {"$ref": "#/components/schemas/ValidationError"}}}}, "LabelSerialized": {"title": "LabelSerialized", "required": ["id", "query", "document", "is_correct_answer", "is_correct_document", "origin"], "type": "object", "properties": {"id": {"title": "Id", "type": "string"}, "query": {"title": "Query", "type": "string"}, "document": {"$ref": "#/components/schemas/DocumentSerialized"}, "is_correct_answer": {"title": "Is Correct Answer", "type": "boolean"}, "is_correct_document": {"title": "Is Correct Document", "type": "boolean"}, "origin": {"title": "Origin", "enum": ["user-feedback", "gold-label"], "type": "string"}, "answer": {"$ref": "#/components/schemas/AnswerSerialized"}, "no_answer": {"title": "No Answer", "type": "boolean"}, "pipeline_id": {"title": "Pipeline Id", "type": "string"}, "created_at": {"title": "Created At", "type": "string"}, "updated_at": {"title": "Updated At", "type": "string"}, "meta": {"title": "Meta", "type": "object"}}}, "QueryRequest": {"title": "QueryRequest", "required": ["query"], "type": "object", "properties": {"query": {"title": "Query", "type": "string"}, "params": {"title": "Params", "type": "object"}, "debug": {"title": "Debug", "type": "boolean", "default": false}}, "additionalProperties": false}, "QueryResponse": {"title": "QueryResponse", "required": ["query", "answers"], "type": "object", "properties": {"query": {"title": "Query", "type": "string"}, "answers": {"title": "Answers", "type": "array", "items": {"$ref": "#/components/schemas/AnswerSerialized"}}, "documents": {"title": "Documents", "type": "array", "items": {"$ref": "#/components/schemas/DocumentSerialized"}}, "_debug": {"title": " Debug", "type": "object"}}}, "Span": {"title": "Span", "required": ["start", "end"], "type": "object", "properties": {"start": {"title": "Start", "type": "integer"}, "end": {"title": "End", "type": "integer"}}}, "ValidationError": {"title": "ValidationError", "required": ["loc", "msg", "type"], "type": "object", "properties": {"loc": {"title": "Location", "type": "array", "items": {"type": "string"}}, "msg": {"title": "Message", "type": "string"}, "type": {"title": "Error Type", "type": "string"}}}}}}
22 changes: 19 additions & 3 deletions rest_api/application.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
from pathlib import Path

logging.basicConfig(format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p")
logger = logging.getLogger(__name__)
Expand All @@ -10,6 +9,7 @@
import uvicorn
from fastapi import FastAPI, HTTPException
from fastapi.routing import APIRoute
from fastapi.openapi.utils import get_openapi
from starlette.middleware.cors import CORSMiddleware

from rest_api.controller.errors.http_error import http_error_handler
Expand All @@ -18,7 +18,7 @@

except (ImportError, ModuleNotFoundError) as ie:
from haystack.utils.import_utils import _optional_component_not_installed
_optional_component_not_installed(__name__, "rest", ie)
_optional_component_not_installed("rest_api", "rest", ie)



Expand All @@ -30,12 +30,28 @@ def get_application() -> FastAPI:
application.add_middleware(
CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"],
)

application.add_exception_handler(HTTPException, http_error_handler)
application.include_router(api_router)

return application


def get_openapi_specs() -> dict:
"""
Used to autogenerate OpenAPI specs file to use in the documentation.
See `docs/_src/api/openapi/generate_openapi_specs.py`
"""
app = get_application()
return get_openapi(
title=app.title if app.title else None,
version=app.version if app.version else None,
openapi_version=app.openapi_version if app.openapi_version else None,
description=app.description if app.description else None,
routes=app.routes if app.routes else None,
)


def use_route_names_as_operation_ids(app: FastAPI) -> None:
"""
Simplify operation IDs so that generated API clients have simpler function
Expand Down
30 changes: 18 additions & 12 deletions rest_api/controller/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@
@router.post("/documents/get_by_filters", response_model=List[DocumentSerialized], response_model_exclude_none=True)
def get_documents(filters: FilterRequest):
"""
Can be used to get documents from a document store.
:param filters: Filters to narrow down the documents to delete.
Example: '{"filters": {{"name": ["some", "more"], "category": ["only_one"]}}'
To get all documents you should provide an empty dict, like:
'{"filters": {}}'
This endpoint allows you to retrieve documents contained in your document store.
You can filter the documents to delete by metadata (like the document's name),
or provide an empty JSON object to clear the document store.
Example of filters:
`'{"filters": {{"name": ["some", "more"], "category": ["only_one"]}}'`
To get all documents you should provide an empty dict, like:
`'{"filters": {}}'`
"""
docs = [doc.to_dict() for doc in DOCUMENT_STORE.get_all_documents(filters=filters.filters)]
for doc in docs:
Expand All @@ -35,12 +38,15 @@ def get_documents(filters: FilterRequest):
@router.post("/documents/delete_by_filters", response_model=bool)
def delete_documents(filters: FilterRequest):
"""
Can be used to delete documents from a document store.
:param filters: Filters to narrow down the documents to delete.
Example: '{"filters": {{"name": ["some", "more"], "category": ["only_one"]}}'
To delete all documents you should provide an empty dict, like:
'{"filters": {}}'
This endpoint allows you to delete documents contained in your document store.
You can filter the documents to delete by metadata (like the document's name),
or provide an empty JSON object to clear the document store.
Example of filters:
`'{"filters": {{"name": ["some", "more"], "category": ["only_one"]}}'`
To get all documents you should provide an empty dict, like:
`'{"filters": {}}'`
"""
DOCUMENT_STORE.delete_documents(filters=filters.filters)
return True
Loading

0 comments on commit 7137710

Please sign in to comment.