Skip to content

Commit

Permalink
Improve tutorials' output (#1694)
Browse files Browse the repository at this point in the history
* Modify __str__ and __repr__ for Document and Answer

* Rename QueryClassifier in Tutorial11

* Improve the output of tutorial1

* Make the output of Tutorial8 a bit less dense

* Add a print_questions util to print the output of question generating pipelines

* Replace custom printing with the new utility in Tutorial13

* Ensure all output is printed with minimal details in Tutorial14 and add some titles

* Minor change to print_answers

* Make tutorial3's output the same as tutorial1

* Add __repr__ to Answer and fix to_dict()

* Fix a bug in the Document and Answer's __str__ method

* Improve print_answers, print_documents and print_questions

* Using print_answers in Tutorial7 and fixing typo in the utils

* Remove duplicate line in Tutorial12

* Use print_answers in Tutorial4

* Add explanation of what the documents in the output of the basic QA pipeline are

* Move the fields constant into print_answers

* Normalize all 'minimal' to 'minimum' (they were mixed up)

* Improve the sample output to include all fields from Document and Answer

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
ZanSara and github-actions[bot] authored Nov 9, 2021
1 parent 861522b commit 91cafb4
Show file tree
Hide file tree
Showing 27 changed files with 487 additions and 187 deletions.
30 changes: 29 additions & 1 deletion docs/_src/tutorials/tutorials/1.md
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,35 @@ prediction = pipe.run(


```python
print_answers(prediction, details="minimal")
# Now you can either print the object directly...
from pprint import pprint

pprint(prediction)

# Sample output:
# {
# 'answers': [ <Answer: answer='Eddard', type='extractive', score=0.9919578731060028, offsets_in_document=[{'start': 608, 'end': 615}], offsets_in_context=[{'start': 72, 'end': 79}], document_id='cc75f739897ecbf8c14657b13dda890e', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
# <Answer: answer='Ned', type='extractive', score=0.9767240881919861, offsets_in_document=[{'start': 3687, 'end': 3801}], offsets_in_context=[{'start': 18, 'end': 132}], document_id='9acf17ec9083c4022f69eb4a37187080', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
# ...
# ]
# 'documents': [ <Document: content_type='text', score=0.8034909798951382, meta={'name': '332_Sansa_Stark.txt'}, embedding=None, id=d1f36ec7170e4c46cde65787fe125dfe', content='\n===\'\'A Game of Thrones\'\'===\nSansa Stark begins the novel by being betrothed to Crown ...'>,
# <Document: content_type='text', score=0.8002150354529785, meta={'name': '191_Gendry.txt'}, embedding=None, id='dd4e070a22896afa81748d6510006d2', 'content='\n===Season 2===\nGendry travels North with Yoren and other Night's Watch recruits, including Arya ...'>,
# ...
# ],
# 'no_ans_gap': 11.688868522644043,
# 'node_id': 'Reader',
# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},
# 'query': 'Who is the father of Arya Stark?',
# 'root_node': 'Query'
# }

```


```python
# ...or use a util to simplify the output
# Change `minimum` to `medium` or `all` to raise the level of detail
print_answers(prediction, details="minimum")
```

## About us
Expand Down
4 changes: 2 additions & 2 deletions docs/_src/tutorials/tutorials/11.md
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ Below, we define a very naive `QueryClassifier` and show how to use it:


```python
class QueryClassifier(BaseComponent):
class CustomQueryClassifier(BaseComponent):
outgoing_edges = 2

def run(self, query: str):
Expand All @@ -307,7 +307,7 @@ class QueryClassifier(BaseComponent):

# Here we build the pipeline
p_classifier = Pipeline()
p_classifier.add_node(component=QueryClassifier(), name="QueryClassifier", inputs=["Query"])
p_classifier.add_node(component=CustomQueryClassifier(), name="QueryClassifier", inputs=["Query"])
p_classifier.add_node(component=es_retriever, name="ESRetriever", inputs=["QueryClassifier.output_1"])
p_classifier.add_node(component=dpr_retriever, name="DPRRetriever", inputs=["QueryClassifier.output_2"])
p_classifier.add_node(component=reader, name="QAReader", inputs=["ESRetriever", "DPRRetriever"])
Expand Down
19 changes: 13 additions & 6 deletions docs/_src/tutorials/tutorials/13.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ from tqdm import tqdm
from haystack.nodes import QuestionGenerator, ElasticsearchRetriever, FARMReader
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.pipelines import QuestionGenerationPipeline, RetrieverQuestionGenerationPipeline, QuestionAnswerGenerationPipeline
from haystack.utils import launch_es
from haystack.utils import launch_es, print_questions

```

Let's start an Elasticsearch instance with one of the options below:
Expand Down Expand Up @@ -98,9 +99,11 @@ which the the document can answer.

```python
question_generation_pipeline = QuestionGenerationPipeline(question_generator)
for document in document_store:
for idx, document in enumerate(document_store):

print(f"\n * Generating questions for document {idx}: {document.content[:100]}...\n")
result = question_generation_pipeline.run(documents=[document])
pprint(result)
print_questions(result)
```

## Retriever Question Generation Pipeline
Expand All @@ -111,8 +114,10 @@ This pipeline takes a query as input. It retrieves relevant documents and then g
```python
retriever = ElasticsearchRetriever(document_store=document_store)
rqg_pipeline = RetrieverQuestionGenerationPipeline(retriever, question_generator)

print(f"\n * Generating questions for documents matching the query 'Arya Stark'\n")
result = rqg_pipeline.run(query="Arya Stark")
pprint(result)
print_questions(result)
```

## Question Answer Generation Pipeline
Expand All @@ -124,9 +129,11 @@ a Reader model
```python
reader = FARMReader("deepset/roberta-base-squad2")
qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader)
for document in tqdm(document_store):
for idx, document in enumerate(tqdm(document_store)):

print(f"\n * Generating questions and answers for document {idx}: {document.content[:100]}...\n")
result = qag_pipeline.run(documents=[document])
pprint(result)
print_questions(result)
```

## About us
Expand Down
28 changes: 14 additions & 14 deletions docs/_src/tutorials/tutorials/14.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,14 +161,14 @@ res_1 = sklearn_keyword_classifier.run(
query="Who is the father of Arya Stark?"
)
print("DPR Results" + "\n" + "="*15)
print_answers(res_1)
print_answers(res_1, details="minimum")

# Run only the sparse retriever on a keyword based query
res_2 = sklearn_keyword_classifier.run(
query="arya stark father"
)
print("ES Results" + "\n" + "="*15)
print_answers(res_2)
print_answers(res_2, details="minimum")

```

Expand All @@ -180,14 +180,14 @@ res_3 = sklearn_keyword_classifier.run(
query="which country was jon snow filmed ?"
)
print("DPR Results" + "\n" + "="*15)
print_answers(res_3)
print_answers(res_3, details="minimum")

# Run only the sparse retriever on a keyword based query
res_4 = sklearn_keyword_classifier.run(
query="jon snow country"
)
print("ES Results" + "\n" + "="*15)
print_answers(res_4)
print_answers(res_4, details="minimum")
```


Expand All @@ -197,14 +197,14 @@ res_5 = sklearn_keyword_classifier.run(
query="who are the younger brothers of arya stark ?"
)
print("DPR Results" + "\n" + "="*15)
print_answers(res_5)
print_answers(res_5, details="minimum")

# Run only the sparse retriever on a keyword based query
res_6 = sklearn_keyword_classifier.run(
query="arya stark younger brothers"
)
print("ES Results" + "\n" + "="*15)
print_answers(res_6)
print_answers(res_6, details="minimum")
```

## Transformer Keyword vs Question/Statement Classifier
Expand Down Expand Up @@ -234,14 +234,14 @@ res_1 = transformer_keyword_classifier.run(
query="Who is the father of Arya Stark?"
)
print("DPR Results" + "\n" + "="*15)
print_answers(res_1)
print_answers(res_1, details="minimum")

# Run only the sparse retriever on a keyword based query
res_2 = transformer_keyword_classifier.run(
query="arya stark father"
)
print("ES Results" + "\n" + "="*15)
print_answers(res_2)
print_answers(res_2, details="minimum")

```

Expand All @@ -253,14 +253,14 @@ res_3 = transformer_keyword_classifier.run(
query="which country was jon snow filmed ?"
)
print("DPR Results" + "\n" + "="*15)
print_answers(res_3)
print_answers(res_3, details="minimum")

# Run only the sparse retriever on a keyword based query
res_4 = transformer_keyword_classifier.run(
query="jon snow country"
)
print("ES Results" + "\n" + "="*15)
print_answers(res_4)
print_answers(res_4, details="minimum")
```


Expand All @@ -270,14 +270,14 @@ res_5 = transformer_keyword_classifier.run(
query="who are the younger brothers of arya stark ?"
)
print("DPR Results" + "\n" + "="*15)
print_answers(res_5)
print_answers(res_5, details="minimum")

# Run only the sparse retriever on a keyword based query
res_6 = transformer_keyword_classifier.run(
query="arya stark younger brothers"
)
print("ES Results" + "\n" + "="*15)
print_answers(res_6)
print_answers(res_6, details="minimum")
```

## Question vs Statement Classifier
Expand Down Expand Up @@ -305,14 +305,14 @@ res_1 = transformer_question_classifier.run(
query="Who is the father of Arya Stark?"
)
print("DPR Results" + "\n" + "="*15)
print_answers(res_1)
print_answers(res_1, details="minimum")

# Show only DPR results
res_2 = transformer_question_classifier.run(
query="Arya Stark was the daughter of a Lord."
)
print("ES Results" + "\n" + "="*15)
res_2
print_answers(res_2, details="minimum")
```

## Standalone Query Classifier
Expand Down
29 changes: 28 additions & 1 deletion docs/_src/tutorials/tutorials/3.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,34 @@ prediction = pipe.run(


```python
print_answers(prediction, details="minimal")
# Now you can either print the object directly...
from pprint import pprint

pprint(prediction)

# Sample output:
# {
# 'answers': [ <Answer: answer='Eddard', type='extractive', score=0.9919578731060028, offsets_in_document=[{'start': 608, 'end': 615}], offsets_in_context=[{'start': 72, 'end': 79}], document_id='cc75f739897ecbf8c14657b13dda890e', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
# <Answer: answer='Ned', type='extractive', score=0.9767240881919861, offsets_in_document=[{'start': 3687, 'end': 3801}], offsets_in_context=[{'start': 18, 'end': 132}], document_id='9acf17ec9083c4022f69eb4a37187080', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
# ...
# ]
# 'documents': [ <Document: content_type='text', score=0.8034909798951382, meta={'name': '332_Sansa_Stark.txt'}, embedding=None, id=d1f36ec7170e4c46cde65787fe125dfe', content='\n===\'\'A Game of Thrones\'\'===\nSansa Stark begins the novel by being betrothed to Crown ...'>,
# <Document: content_type='text', score=0.8002150354529785, meta={'name': '191_Gendry.txt'}, embedding=None, id='dd4e070a22896afa81748d6510006d2', 'content='\n===Season 2===\nGendry travels North with Yoren and other Night's Watch recruits, including Arya ...'>,
# ...
# ],
# 'no_ans_gap': 11.688868522644043,
# 'node_id': 'Reader',
# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},
# 'query': 'Who is the father of Arya Stark?',
# 'root_node': 'Query'
# }
```


```python
# ...or use a util to simplify the output
# Change `minimum` to `medium` or `all` to raise the level of detail
print_answers(prediction, details="minimum")
```

## About us
Expand Down
8 changes: 3 additions & 5 deletions docs/_src/tutorials/tutorials/4.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,12 +155,10 @@ pipe = FAQPipeline(retriever=retriever)


```python
from haystack.utils import print_answers

prediction = pipe.run(query="How is the virus spreading?", params={"Retriever": {"top_k": 10}})
for a in prediction["answers"]:
print(f"Answer: {a.answer}")
print(f"Question: {a.meta['query']}")
print(f"Score: {a.score}")
print("---------------------")
print_answers(prediction, details="medium")
```

## About us
Expand Down
3 changes: 2 additions & 1 deletion docs/_src/tutorials/tutorials/7.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,12 @@ for question in QUESTIONS:
```python
# Or alternatively use the Pipeline class
from haystack.pipelines import GenerativeQAPipeline
from haystack.utils import print_answers

pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)
for question in QUESTIONS:
res = pipe.run(query=question, params={"Generator": {"top_k": 1}, "Retriever": {"top_k": 5}})
print(res)
print_answers(res, details="minimum")
```

## About us
Expand Down
15 changes: 12 additions & 3 deletions haystack/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,13 @@ def __eq__(self, other):
getattr(other, 'id_hash_keys', None) == self.id_hash_keys)

def __repr__(self):
return str(self.to_dict())
return f"<Document: {str(self.to_dict())}>"

def __str__(self):
return f"content: {self.content[:100]} {'[...]' if len(self.content) > 100 else ''}"
# In some cases, self.content is None (therefore not subscriptable)
if not self.content:
return f"<Document: id={self.id}, content=None>"
return f"<Document: id={self.id}, content='{self.content[:100]} {'...' if len(self.content) > 100 else ''}'>"

def __lt__(self, other):
""" Enable sorting of Documents by score """
Expand Down Expand Up @@ -262,7 +265,13 @@ def __lt__(self, other):
return self.score < other.score

def __str__(self):
return f"answer: {self.answer} \nscore: {self.score} \ncontext: {self.context}"
# self.context might be None (therefore not subscriptable)
if not self.context:
return f"<Answer: answer='{self.answer}', score={self.score}, context=None>"
return f"<Answer: answer='{self.answer}', score={self.score}, context='{self.context[:50]}{'...' if len(self.context) > 50 else ''}'>"

def __repr__(self):
return f"<Answer {asdict(self)}>"

def to_dict(self):
return asdict(self)
Expand Down
1 change: 1 addition & 0 deletions haystack/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from haystack.utils.export_utils import (
print_answers,
print_documents,
print_questions,
export_answers_to_csv,
convert_labels_to_squad,
)
Expand Down
Loading

0 comments on commit 91cafb4

Please sign in to comment.