Skip to content

Commit

Permalink
random sampling, adding contexts and use None when predicted answer i…
Browse files Browse the repository at this point in the history
…s missing
  • Loading branch information
Maria Rosario Mestre authored and Maria Rosario Mestre committed Jun 11, 2024
1 parent cbfcde5 commit 9f1eb03
Showing 1 changed file with 19 additions and 7 deletions.
26 changes: 19 additions & 7 deletions evaluations/evaluation_aragog.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import os
import random
from pathlib import Path
import datetime
from typing import Tuple, List

from openai import BadRequestError

from haystack import Pipeline
Expand Down Expand Up @@ -67,12 +67,16 @@ def run_basic_rag(doc_store, sample_questions, embedding_model, top_k):
try:
response = rag.run(
data={"query_embedder": {"text": q}, "prompt_builder": {"question": q}, "answer_builder": {"query": q}})
predicted_answers.append(response["answer_builder"]["answers"][0].data)
predicted_answer = response["answer_builder"]["answers"][0].data
if predicted_answer.lower() in ["none", "none."]:
predicted_answers.append(None)
else:
predicted_answers.append(predicted_answer)
retrieved_contexts.append([d.content for d in response['answer_builder']['answers'][0].documents])
except BadRequestError as e:
print(f"Error with question: {q}")
print(e)
predicted_answers.append("error")
predicted_answers.append(None)
retrieved_contexts.append(retrieved_contexts)

return retrieved_contexts, predicted_answers
Expand Down Expand Up @@ -101,7 +105,10 @@ def run_evaluation(sample_questions, sample_answers, retrieved_contexts, predict
"sas": eval_pipeline_results['sas']
}

inputs = {'questions': sample_questions, 'true_answers': sample_answers, 'predicted_answers': predicted_answers}
inputs = {'questions': sample_questions,
'contexts': retrieved_contexts,
'true_answers': sample_answers,
'predicted_answers': predicted_answers}

return results, inputs

Expand Down Expand Up @@ -136,7 +143,7 @@ def parameter_tuning(questions, answers, out_path: str):
print(f"Running evaluation")
results, inputs = run_evaluation(questions, answers, retrieved_contexts, predicted_answers, embedding_model)
eval_results = EvaluationRunResult(run_name=name_params, inputs=inputs, results=results)
eval_results.score_report().to_csv(f"{out_path}/score_report_{name_params}.csv", index=False)
eval_results.score_report().to_csv(f"{out_path}/score_report_{name_params}.csv")
eval_results.to_pandas().to_csv(f"{out_path}/detailed_{name_params}.csv", index=False)


Expand All @@ -153,15 +160,20 @@ def create_args():


def main():
t1 = datetime.datetime.now()
args = create_args()
questions, answers = read_question_answers()

if args.sample:
random.seed(42)
questions = random.sample(questions, args.sample)
answers = random.sample(answers, args.sample)
sampled_ids = random.sample(range(len(questions)), args.sample)
questions = [questions[id] for id in sampled_ids]
answers = [answers[id] for id in sampled_ids]

parameter_tuning(questions, answers, args.output_dir)
t2 = datetime.datetime.now()

print(f"Experiment took {t2 - t1}")


if __name__ == '__main__':
Expand Down

0 comments on commit 9f1eb03

Please sign in to comment.