From e68946965df578860ec3f5e58938f199440bb07a Mon Sep 17 00:00:00 2001 From: Branden Chan <33759007+brandenchan@users.noreply.github.com> Date: Tue, 5 Oct 2021 14:38:15 +0200 Subject: [PATCH] Update Reader language options --- docs/latest/guides/languages.mdx | 47 ++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/docs/latest/guides/languages.mdx b/docs/latest/guides/languages.mdx index bb415c821..bc904ffbe 100644 --- a/docs/latest/guides/languages.mdx +++ b/docs/latest/guides/languages.mdx @@ -89,6 +89,17 @@ there are a couple QA models that are directly usable in Haystack. ), }, + { + title: "French", + content: ( +
++ ), + }, { title: "Italian", content: ( @@ -137,13 +148,24 @@ there are a couple QA models that are directly usable in Haystack. content: (from haystack.reader import FARMReader
++ reader = FARMReader("etalab-ia/camembert-base-squadFR-fquad-piaf") +
+
from haystack.reader import TransformersReader
+
+ reader = TransformersReader("deepset/gelectra-large-germanquad")
+
+
+ ),
+ },
{
title: "French",
content: (
), @@ -154,8 +176,22 @@ there are a couple QA models that are directly usable in Haystack.from haystack.reader import TransformersReader
- reader = TransformersReader("illuin/camembert-base-fquad") + reader = TransformersReader("etalab-ia/camembert-base-squadFR-fquad-piaf")
+ ), + }, + { + title: "Chinese", + content: ( +from haystack.reader import TransformersReader
- reader = - TransformersReader("mrm8488/bert-italian-finedtuned-squadv1-it-alfa") + reader = TransformersReader("mrm8488/bert-italian-finedtuned-squadv1-it-alfa") +
+
+), @@ -166,8 +202,7 @@ there are a couple QA models that are directly usable in Haystack.from haystack.reader import TransformersReader
++ reader = TransformersReader("uer/roberta-base-chinese-extractive-qa") +
+# or
++ reader = TransformersReader("wptoux/albert-chinese-large-qa")
), @@ -185,7 +220,7 @@ We are the creators of the **German** model and you can find out more about it [ The **French**, **Italian**, **Spanish**, **Portuguese** and **Chinese** models are monolingual language models trained on versions of the SQuAD dataset in their respective languages and their authors report decent results in their model cards -(e.g. [here](https://huggingface.co/illuin/camembert-base-fquad) and [here](https://huggingface.co/mrm8488/bert-italian-finedtuned-squadv1-it-alfa)). +(e.g. [here](https://huggingface.co/etalab-ia/camembert-base-squadFR-fquad-piaf) and [here](https://huggingface.co/mrm8488/bert-italian-finedtuned-squadv1-it-alfa)). There also exist Korean QA models on the model hub but their performance is not reported. The **zero-shot model** that is shown above is a **multilingual XLM-RoBERTa Large** that is trained on English SQuAD.from haystack.reader import TransformersReader
- reader = - TransformersReader("deepset/xlm-roberta-large-squad2") + reader = TransformersReader("deepset/xlm-roberta-large-squad2")