fix facebookresearch#39 facebookresearch#42 facebookresearch#46

erip · Sep 7, 2023 · f7d42f8 · f7d42f8
1 parent 7fd8197
commit f7d42f8
Show file tree

Hide file tree

Showing 5 changed files with 27 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -14,8 +14,6 @@ This is the official repository for Nougat, the academic document PDF parser tha
 
 Project page: https://facebookresearch.github.io/nougat/
 
-Huggingface Community Demo: https://huggingface.co/spaces/ysharma/nougat
-
 ## Install
 
 From pip:
@@ -40,7 +38,9 @@ Install via
 
 To get predictions for a PDF run 
 
-```$ nougat path/to/file.pdf```
+```
+$ nougat path/to/file.pdf -o output_directory
+```
 
 ```
 usage: nougat [-h] [--batchsize BATCHSIZE] [--checkpoint CHECKPOINT] [--out OUT] [--recompute] [--markdown] pdf [pdf ...]
@@ -65,12 +65,23 @@ In the output directory every PDF will be saved as a `.mmd` file, the lightweigh
 
 With the extra dependencies you use `app.py` to start an API. Call
 
-```
+```sh
 $ nougat_api
 ```
 
 To get a prediction of a PDF file by making a POST request to https://127.0.0.1:8503/predict/. It also accepts parameters `start` and `stop` to limit the computation to select page numbers (boundaries are included).
 
+The response is a string with the markdown text of the document.
+
+```sh
+curl -X 'POST' \
+  'https://127.0.0.1:8503/predict/' \
+  -H 'accept: application/json' \
+  -H 'Content-Type: multipart/form-data' \
+  -F 'file=@<PDFFILE.pdf>;type=application/pdf'
+```
+To use the limit the conversion to pages 1 to 5, ue the start/stop parameters in the request URL: https://127.0.0.1:8503/predict/?start=1&stop=5
+
 ## Dataset
 ### Generate dataset
 

diff --git a/app.py b/app.py
@@ -31,7 +31,7 @@
     )
     sys.exit(1)
 
-app = FastAPI(title="LaTeX-OCR API")
+app = FastAPI(title="Nougat API")
 origins = ["https://localhost", "https://127.0.0.1"]
 
 app.add_middleware(

diff --git a/nougat/_version.py b/nougat/_version.py
@@ -4,4 +4,4 @@
 This source code is licensed under the MIT license found in the
 LICENSE file in the root directory of this source tree.
 """
-__version__ = "0.1.3"
+__version__ = "0.1.4"
diff --git a/predict.py b/predict.py
@@ -26,9 +26,11 @@
     BATCH_SIZE = int(
         torch.cuda.get_device_properties(0).total_memory / 1024 / 1024 / 1000 * 0.3
     )
+    if BATCH_SIZE == 0:
+        logging.warning("GPU VRAM is too small. Computing on CPU.")
 else:
     # don't know what a good value is here. Would not recommend to run on CPU
-    BATCH_SIZE = 5
+    BATCH_SIZE = 1
     logging.warning("No GPU found. Conversion on CPU is very slow.")
 
 
@@ -86,8 +88,12 @@ def get_args():
 def main():
     args = get_args()
     model = NougatModel.from_pretrained(args.checkpoint).to(torch.bfloat16)
-    if torch.cuda.is_available():
-        model.to("cuda")
+    if args.batchsize > 0:
+        if torch.cuda.is_available():
+            model.to("cuda")
+    else:
+        # set batch size to 1. Need to check if there are benefits for CPU conversion for >1
+        args.batchsize = 1
     model.eval()
     datasets = []
     for pdf in args.pdf:

diff --git a/train.py b/train.py
@@ -182,7 +182,7 @@ def train(config):
         devices=torch.cuda.device_count(),
         strategy="ddp_find_unused_parameters_true",
         accelerator="gpu",
-        plugins=[SLURMEnvironment(auto_requeue=False)],
+        # plugins=[SLURMEnvironment(auto_requeue=False)],
         max_epochs=config.max_epochs,
         max_steps=config.max_steps,
         val_check_interval=config.val_check_interval,