Skip to content

Commit

Permalink
Adjust Docker and REST API to allow TransformsReader Class (deepset-a…
Browse files Browse the repository at this point in the history
  • Loading branch information
guillim committed Jul 7, 2020
1 parent fe33a48 commit 8a616da
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 22 deletions.
5 changes: 4 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@ COPY rest_api /home/user/rest_api
# copy saved FARM models
COPY models /home/user/models

# copy sqlite db if needed for testing
# optional : copy sqlite db if needed for testing
#COPY qa.db /home/user/

# optional: copy data directory containing docs for indexing
#COPY data /home/user/data

EXPOSE 8000

# cmd for running the API
Expand Down
3 changes: 3 additions & 0 deletions Dockerfile-GPU
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ COPY models /home/user/models
# Optional: copy sqlite db if needed for testing
#COPY qa.db /home/user/

# Optional: copy data directory containing docs for indexing
#COPY data /home/user/data

EXPOSE 8000

ENV LC_ALL=C.UTF-8
Expand Down
33 changes: 25 additions & 8 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,19 +1,36 @@
version: '3'
services:
haystack-api:
image: "deepset/haystack-cpu:0.2.0"
build:
context: .
dockerfile: Dockerfile
image: "deepset/haystack-cpu:latest"
ports:
- 8000:8000
volumes:
# Optional: mount your own models from disk into the container
- "./models:/home/user/models"
environment:
# see haystack/api/config.py for additional variables to configure.
# load reader model from transformers' model hub.
- READER_MODEL_PATH=deepset/roberta-base-squad2
# See rest_api/config.py for more variables that you can configure here.
- DB_HOST=elasticsearch
- USE_GPU=False
# Load a model from transformers' model hub or a local path into the FARMReader.
- READER_MODEL_PATH=deepset/roberta-base-squad2
# - READER_MODEL_PATH=home/user/models/roberta-base-squad2
# Alternative: If you want to use the TransformersReader (e.g. for loading a local model in transformers format):
# - READER_USE_TRANSFORMERS=True
# - READER_MODEL_PATH=/home/user/models/roberta-base-squad2
# - READER_TOKENIZER=/home/user/models/roberta-base-squad2
restart: always

depends_on:
- elasticsearch
command: "/bin/bash -c 'sleep 15 && gunicorn rest_api.application:app -b 0.0.0.0 -k uvicorn.workers.UvicornWorker --workers 1 --timeout 180'"
elasticsearch:
# This demo image contains Game of Thrones Wikipedia articles indexed.
# For starting a new Elasticsearch instance, replace image with "elasticsearch:7.6.1".
image: "deepset/elasticsearch-game-of-thrones"
# This will start an empty elasticsearch instance (so you have to add your documents yourself)
image: "elasticsearch:7.6.1"
# If you want a demo image instead that is "ready-to-query" with some indexed Game of Thrones articles:
# image: "deepset/elasticsearch-game-of-thrones"
ports:
- 9200:9200
environment:
- discovery.type=single-node
2 changes: 1 addition & 1 deletion haystack/reader/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(
:param tokenizer: name of the tokenizer (usually the same as model)
:param context_window_size: num of chars (before and after the answer) to return as "context" for each answer.
The context usually helps users to understand if the answer really makes sense.
:param use_gpu: < 1 -> use cpu
:param use_gpu: < 0 -> use cpu
>= 0 -> ordinal of the gpu to use
"""
self.model = pipeline("question-answering", model=model, tokenizer=tokenizer, device=use_gpu)
Expand Down
3 changes: 3 additions & 0 deletions rest_api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

# Resources / Computation
USE_GPU = os.getenv("USE_GPU", "True").lower() == "true"
GPU_NUMBER = int(os.getenv("GPU_NUMBER", 1))
MAX_PROCESSES = int(os.getenv("MAX_PROCESSES", 4))
BATCHSIZE = int(os.getenv("BATCHSIZE", 50))
CONCURRENT_REQUEST_PER_WORKER = int(os.getenv("CONCURRENT_REQUEST_PER_WORKER", 4))
Expand All @@ -26,6 +27,8 @@

# Reader
READER_MODEL_PATH = os.getenv("READER_MODEL_PATH", None)
READER_USE_TRANSFORMERS = os.getenv("READER_USE_TRANSFORMERS", "False").lower() == "true"
READER_TOKENIZER = os.getenv("READER_TOKENIZER", None)
CONTEXT_WINDOW_SIZE = int(os.getenv("CONTEXT_WINDOW_SIZE", 500))
DEFAULT_TOP_K_READER = int(os.getenv("DEFAULT_TOP_K_READER", 5))
TOP_K_PER_CANDIDATE = int(os.getenv("TOP_K_PER_CANDIDATE", 3))
Expand Down
34 changes: 22 additions & 12 deletions rest_api/controller/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
EMBEDDING_DIM, EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, EMBEDDING_MODEL_PATH, USE_GPU, READER_MODEL_PATH, \
BATCHSIZE, CONTEXT_WINDOW_SIZE, TOP_K_PER_CANDIDATE, NO_ANS_BOOST, MAX_PROCESSES, MAX_SEQ_LEN, DOC_STRIDE, \
DEFAULT_TOP_K_READER, DEFAULT_TOP_K_RETRIEVER, CONCURRENT_REQUEST_PER_WORKER, FAQ_QUESTION_FIELD_NAME, \
EMBEDDING_MODEL_FORMAT
EMBEDDING_MODEL_FORMAT, READER_USE_TRANSFORMERS, READER_TOKENIZER, GPU_NUMBER
from rest_api.controller.utils import RequestLimiter
from haystack.database.elasticsearch import ElasticsearchDocumentStore
from haystack.reader.farm import FARMReader
from haystack.reader.transformers import TransformersReader
from haystack.retriever.base import BaseRetriever
from haystack.retriever.sparse import ElasticsearchRetriever
from haystack.retriever.dense import EmbeddingRetriever
Expand Down Expand Up @@ -54,17 +55,26 @@
retriever = ElasticsearchRetriever(document_store=document_store)

if READER_MODEL_PATH: # for extractive doc-qa
reader = FARMReader(
model_name_or_path=str(READER_MODEL_PATH),
batch_size=BATCHSIZE,
use_gpu=USE_GPU,
context_window_size=CONTEXT_WINDOW_SIZE,
top_k_per_candidate=TOP_K_PER_CANDIDATE,
no_ans_boost=NO_ANS_BOOST,
num_processes=MAX_PROCESSES,
max_seq_len=MAX_SEQ_LEN,
doc_stride=DOC_STRIDE,
) # type: Optional[FARMReader]
if READER_USE_TRANSFORMERS:
use_gpu = -1 if not USE_GPU else GPU_NUMBER
reader = TransformersReader(
model=str(READER_MODEL_PATH),
use_gpu=use_gpu,
context_window_size=CONTEXT_WINDOW_SIZE,
tokenizer=str(READER_TOKENIZER)
) # type: Optional[FARMReader]
else:
reader = FARMReader(
model_name_or_path=str(READER_MODEL_PATH),
batch_size=BATCHSIZE,
use_gpu=USE_GPU,
context_window_size=CONTEXT_WINDOW_SIZE,
top_k_per_candidate=TOP_K_PER_CANDIDATE,
no_ans_boost=NO_ANS_BOOST,
num_processes=MAX_PROCESSES,
max_seq_len=MAX_SEQ_LEN,
doc_stride=DOC_STRIDE,
) # type: Optional[FARMReader]
else:
reader = None # don't need one for pure FAQ matching

Expand Down

0 comments on commit 8a616da

Please sign in to comment.