Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add no_answer option to results #24

Merged
merged 6 commits into from
Feb 19, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
add no_answer option to aggregation of paragraphs level preds
  • Loading branch information
tholor committed Feb 17, 2020
commit 58e56b362db0c682cad57ef895e3b959f9ae3a17
27 changes: 23 additions & 4 deletions haystack/reader/farm.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def __init__(
self.inferencer = Inferencer.load(model_name_or_path, batch_size=batch_size, gpu=use_gpu, task_type="question_answering")
self.inferencer.model.prediction_heads[0].context_window_size = context_window_size
self.inferencer.model.prediction_heads[0].no_ans_threshold = no_ans_threshold
self.no_ans_threshold = no_ans_threshold
self.inferencer.model.prediction_heads[0].n_best = n_candidates_per_passage

def train(self, data_dir, train_filename, dev_filename=None, test_file_name=None,
Expand Down Expand Up @@ -183,16 +184,20 @@ def predict(self, question, paragrahps, meta_data_paragraphs=None, top_k=None, m
}
input_dicts.append(cur)

# get answers from QA model (Top 5 per input paragraph)
# get answers from QA model (Default: top 5 per input paragraph)
predictions = self.inferencer.inference_from_dicts(
dicts=input_dicts, rest_api_schema=True, max_processes=max_processes
)

# assemble answers from all the different paragraphs & format them
# for the "no answer" option, we choose the no_answer score from the paragraph with the best "real answer"
# the score of this "no answer" is then "boosted" with the no_ans_gap
answers = []
best_score_answer = 0
for pred in predictions:
for a in pred["predictions"][0]["answers"]:
if a["answer"]: #skip "no answer"
# skip "no answers" here
if a["answer"]:
cur = {"answer": a["answer"],
"score": a["score"],
"probability": float(expit(np.asarray([a["score"]]) / 8)), #just a pseudo prob for now
Expand All @@ -201,14 +206,28 @@ def predict(self, question, paragrahps, meta_data_paragraphs=None, top_k=None, m
"offset_end": a["offset_answer_end"] - a["offset_context_start"],
"document_id": a["document_id"]}
answers.append(cur)
# if cur answer is the best, we store the gap to "no answer" in this paragraph
if a["score"] > best_score_answer:
best_score_answer = a["score"]
no_ans_gap = pred["predictions"][0]["no_ans_gap"]
no_ans_score = (best_score_answer+no_ans_gap)-self.no_ans_threshold

# add no answer option from the paragraph with the best answer
cur = {"answer": "",
"score": no_ans_score,
"probability": float(expit(np.asarray(no_ans_score) / 8)), # just a pseudo prob for now
"context": "",
"offset_start": -1,
"offset_end": -1,
"document_id": None}
answers.append(cur)

# sort answers by their `probability` and select top-k
answers = sorted(
answers, key=lambda k: k["probability"], reverse=True
)
answers = answers[:top_k]

result = {"question": question,
"answers": answers}

return result
return result