From 4f5d0412678b8669aa6c380ccc9091f4776b4952 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Tue, 11 Jun 2024 17:51:31 +0200 Subject: [PATCH 01/17] wip --- .../sentence-window-retrieval.py | 33 +++++++ evaluations/evaluation_aragog_harness.py | 99 +++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 evaluations/architectures/sentence-window-retrieval.py create mode 100644 evaluations/evaluation_aragog_harness.py diff --git a/evaluations/architectures/sentence-window-retrieval.py b/evaluations/architectures/sentence-window-retrieval.py new file mode 100644 index 0000000..1b848bf --- /dev/null +++ b/evaluations/architectures/sentence-window-retrieval.py @@ -0,0 +1,33 @@ +from haystack import Pipeline +from haystack.components.builders import PromptBuilder, AnswerBuilder +from haystack.components.embedders import SentenceTransformersTextEmbedder +from haystack.components.generators import OpenAIGenerator +from haystack.components.retrievers import InMemoryEmbeddingRetriever + +""" +The sentence-window approach breaks down documents into smaller chunks (sentences) and indexes them separately. + +During retrieval, we retrieve the sentences that are most relevant to the query via similarity search and replace the +sentence with the full surrounding context, using a static sentence-window around the context. +""" + + +def indexing(embedding_model: str, chunk_size: int): + full_path = Path(base_path) + files_path = full_path / "papers_for_questions" + document_store = InMemoryDocumentStore() + pipeline = Pipeline() + pipeline.add_component("converter", PyPDFToDocument()) + pipeline.add_component("cleaner", DocumentCleaner()) + pipeline.add_component("splitter", DocumentSplitter(split_length=chunk_size)) # splitting by word + pipeline.add_component("writer", DocumentWriter(document_store=document_store, policy=DuplicatePolicy.SKIP)) + pipeline.add_component("embedder", SentenceTransformersDocumentEmbedder(embedding_model)) + pipeline.connect("converter", "cleaner") + pipeline.connect("cleaner", "splitter") + pipeline.connect("splitter", "embedder") + pipeline.connect("embedder", "writer") + pdf_files = [full_path / "papers_for_questions" / f_name for f_name in os.listdir(files_path)] + pipeline.run({"converter": {"sources": pdf_files}}) + + return document_store + diff --git a/evaluations/evaluation_aragog_harness.py b/evaluations/evaluation_aragog_harness.py new file mode 100644 index 0000000..16b0a05 --- /dev/null +++ b/evaluations/evaluation_aragog_harness.py @@ -0,0 +1,99 @@ +# pip install haystack-experimental git+https://github.com/deepset-ai/haystack-experimental.git + +import json +import os +from pathlib import Path +from typing import Tuple, List + +from architectures.basic_rag import basic_rag +from haystack import Pipeline +from haystack.components.converters import PyPDFToDocument +from haystack.components.embedders import SentenceTransformersDocumentEmbedder +from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter +from haystack.components.writers import DocumentWriter +from haystack.document_stores.in_memory import InMemoryDocumentStore +from haystack.document_stores.types import DuplicatePolicy +from haystack_experimental.evaluation.harness.rag import ( + RAGEvaluationHarness, + RAGEvaluationMetric, + RAGEvaluationInput, + RAGExpectedComponent, RAGExpectedComponentMetadata, +) +from utils.utils import timeit + +base_path = "../datasets/ARAGOG/" + + +@timeit +def indexing(embedding_model: str, chunk_size: int): + full_path = Path(base_path) + files_path = full_path / "papers_for_questions" + document_store = InMemoryDocumentStore() + pipeline = Pipeline() + pipeline.add_component("converter", PyPDFToDocument()) + pipeline.add_component("cleaner", DocumentCleaner()) + pipeline.add_component("splitter", DocumentSplitter(split_length=chunk_size)) # splitting by word + pipeline.add_component("writer", DocumentWriter(document_store=document_store, policy=DuplicatePolicy.SKIP)) + pipeline.add_component("embedder", SentenceTransformersDocumentEmbedder(embedding_model)) + pipeline.connect("converter", "cleaner") + pipeline.connect("cleaner", "splitter") + pipeline.connect("splitter", "embedder") + pipeline.connect("embedder", "writer") + pdf_files = [full_path / "papers_for_questions" / f_name for f_name in os.listdir(files_path)] + pipeline.run({"converter": {"sources": pdf_files}}) + + return document_store + + +def read_question_answers() -> Tuple[List[str], List[str]]: + with open(base_path + "eval_questions.json", "r") as f: + data = json.load(f) + questions = data["questions"] + answers = data["ground_truths"] + return questions, answers + + +def main(): + + questions, answers = read_question_answers() + embedding_model = "sentence-transformers/all-MiniLM-L6-v2" + chunk_size = 32 + top_k = 1 + doc_store = indexing(embedding_model, chunk_size) + + rag = basic_rag(document_store=doc_store, embedding_model=embedding_model, top_k=top_k) + + rag_components = { + RAGExpectedComponent.QUERY_PROCESSOR: RAGExpectedComponentMetadata( + name="query_embedder", input_mapping={"query": "text"}), + RAGExpectedComponent.DOCUMENT_RETRIEVER: RAGExpectedComponentMetadata( + name="retriever", output_mapping={"retrieved_documents": "documents"}), + RAGExpectedComponent.RESPONSE_GENERATOR: RAGExpectedComponentMetadata( + name="llm", output_mapping={"replies": "replies"}) + } + + emb_eval_harness = RAGEvaluationHarness( + rag, + metrics={RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY}, + rag_components=rag_components + ) + + input_questions = questions[:3] + gold_answers = answers[:3] + + eval_harness_input = RAGEvaluationInput( + queries=input_questions, + ground_truth_answers=gold_answers, + additional_rag_inputs={ + "prompt_builder": {"question": [q for q in input_questions]}, + "answer_builder": {"query": [q for q in input_questions]}, + }, + ) + + emb_eval_run = emb_eval_harness.run(inputs=eval_harness_input, run_name="emb_eval_run") + + print(emb_eval_run) + + +if __name__ == '__main__': + main() \ No newline at end of file From 27f7347ff110fabdb7551b2c1043827f9249f999 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Fri, 14 Jun 2024 11:22:35 +0200 Subject: [PATCH 02/17] wip --- evaluations/evaluation_aragog_harness.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/evaluations/evaluation_aragog_harness.py b/evaluations/evaluation_aragog_harness.py index 16b0a05..966c17a 100644 --- a/evaluations/evaluation_aragog_harness.py +++ b/evaluations/evaluation_aragog_harness.py @@ -94,6 +94,8 @@ def main(): print(emb_eval_run) + # ToDo: run the evaluation harness with hyde rag + if __name__ == '__main__': main() \ No newline at end of file From 951c9848a998af46bb30e8e9039cd679f57b756e Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Tue, 18 Jun 2024 10:30:48 +0200 Subject: [PATCH 03/17] wip: aragog with harness --- .gitignore | 1 + datasets/ARAGOG/.DS_Store | Bin 6148 -> 0 bytes evaluations/evaluation_aragog_harness.py | 74 +++++++++++------- .../evaluation_sentence_window_retrieval.py | 0 4 files changed, 46 insertions(+), 29 deletions(-) delete mode 100644 datasets/ARAGOG/.DS_Store create mode 100644 evaluations/evaluation_sentence_window_retrieval.py diff --git a/.gitignore b/.gitignore index 5243b13..d1da556 100644 --- a/.gitignore +++ b/.gitignore @@ -162,3 +162,4 @@ cython_debug/ # MacOS .DS_Store */.DS_Store +**/.DS_Store diff --git a/datasets/ARAGOG/.DS_Store b/datasets/ARAGOG/.DS_Store deleted file mode 100644 index 129b546deda7f3aa3b333a7538b71f5dd63623d1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}N6?5T3NvZYfF+3OxqA7OX!l6)#JzFW`zERO&7*y0~s?ciSFHVbA(PK8erc zOp=PF>O~MKGcfrklbM8kSuz^{5baTK51;}74k}@;gv}R1TQ z;{`$zwCOEDC@s1cGlMvSB1|fxNfq{qAxt{@rHykfW(G|<2)#1SV^d4m>EP53jYXb8n|Hw{*-}t14>ba diff --git a/evaluations/evaluation_aragog_harness.py b/evaluations/evaluation_aragog_harness.py index 966c17a..045ce6e 100644 --- a/evaluations/evaluation_aragog_harness.py +++ b/evaluations/evaluation_aragog_harness.py @@ -1,11 +1,8 @@ -# pip install haystack-experimental git+https://github.com/deepset-ai/haystack-experimental.git - import json import os from pathlib import Path from typing import Tuple, List -from architectures.basic_rag import basic_rag from haystack import Pipeline from haystack.components.converters import PyPDFToDocument from haystack.components.embedders import SentenceTransformersDocumentEmbedder @@ -19,6 +16,9 @@ RAGEvaluationInput, RAGExpectedComponent, RAGExpectedComponentMetadata, ) + +from architectures.basic_rag import basic_rag +from architectures.hyde_rag import rag_with_hyde from utils.utils import timeit base_path = "../datasets/ARAGOG/" @@ -53,16 +53,48 @@ def read_question_answers() -> Tuple[List[str], List[str]]: return questions, answers -def main(): +@timeit +def eval_pipeline(questions, answers, pipeline, components, run_name): + + pipeline_eval_harness = RAGEvaluationHarness( + pipeline, + metrics={ + RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY, + RAGEvaluationMetric.ANSWER_FAITHFULNESS + # ToDo: RAGEvaluationMetric.CONTEXT_RELEVANCE + }, + rag_components=components + ) + hyde_eval_harness_input = RAGEvaluationInput( + queries=questions, + ground_truth_answers=answers, + additional_rag_inputs={ + "prompt_builder": {"question": [q for q in questions]}, + "answer_builder": {"query": [q for q in questions]}, + }, + ) + return pipeline_eval_harness.run(inputs=hyde_eval_harness_input, run_name=run_name) + + +def main(): questions, answers = read_question_answers() - embedding_model = "sentence-transformers/all-MiniLM-L6-v2" - chunk_size = 32 - top_k = 1 + embedding_model = "sentence-transformers/msmarco-distilroberta-base-v2" + chunk_size = 128 + top_k = 3 doc_store = indexing(embedding_model, chunk_size) - rag = basic_rag(document_store=doc_store, embedding_model=embedding_model, top_k=top_k) + hyde_rag = rag_with_hyde(document_store=doc_store, embedding_model=embedding_model, top_k=top_k) + hyde_components = { + RAGExpectedComponent.QUERY_PROCESSOR: RAGExpectedComponentMetadata( + name="hyde", input_mapping={"query": "query"}), + RAGExpectedComponent.DOCUMENT_RETRIEVER: RAGExpectedComponentMetadata( + name="retriever", output_mapping={"retrieved_documents": "documents"}), + RAGExpectedComponent.RESPONSE_GENERATOR: RAGExpectedComponentMetadata( + name="llm", output_mapping={"replies": "replies"}) + } + rag = basic_rag(document_store=doc_store, embedding_model=embedding_model, top_k=top_k) rag_components = { RAGExpectedComponent.QUERY_PROCESSOR: RAGExpectedComponentMetadata( name="query_embedder", input_mapping={"query": "text"}), @@ -72,29 +104,13 @@ def main(): name="llm", output_mapping={"replies": "replies"}) } - emb_eval_harness = RAGEvaluationHarness( - rag, - metrics={RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY}, - rag_components=rag_components - ) - - input_questions = questions[:3] - gold_answers = answers[:3] + baseline_rag_eval_output = eval_pipeline(questions[:25], answers[:25], rag, rag_components, "baseline_rag") + hyde_rag_eval_output = eval_pipeline(questions[:25], answers[:25], hyde_rag, hyde_components, "hyde_rag") - eval_harness_input = RAGEvaluationInput( - queries=input_questions, - ground_truth_answers=gold_answers, - additional_rag_inputs={ - "prompt_builder": {"question": [q for q in input_questions]}, - "answer_builder": {"query": [q for q in input_questions]}, - }, + comparative_df = baseline_rag_eval_output.results.comparative_individual_scores_report( + hyde_rag_eval_output.results, keep_columns=["response"] ) - - emb_eval_run = emb_eval_harness.run(inputs=eval_harness_input, run_name="emb_eval_run") - - print(emb_eval_run) - - # ToDo: run the evaluation harness with hyde rag + comparative_df.to_csv("comparative_scores.csv") if __name__ == '__main__': diff --git a/evaluations/evaluation_sentence_window_retrieval.py b/evaluations/evaluation_sentence_window_retrieval.py new file mode 100644 index 0000000..e69de29 From 21711cbba83825eb5a211d90583edde4a45f26d2 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Tue, 18 Jun 2024 10:32:47 +0200 Subject: [PATCH 04/17] cleaning up --- .../sentence-window-retrieval.py | 33 ------------------- .../evaluation_sentence_window_retrieval.py | 0 2 files changed, 33 deletions(-) delete mode 100644 evaluations/architectures/sentence-window-retrieval.py delete mode 100644 evaluations/evaluation_sentence_window_retrieval.py diff --git a/evaluations/architectures/sentence-window-retrieval.py b/evaluations/architectures/sentence-window-retrieval.py deleted file mode 100644 index 1b848bf..0000000 --- a/evaluations/architectures/sentence-window-retrieval.py +++ /dev/null @@ -1,33 +0,0 @@ -from haystack import Pipeline -from haystack.components.builders import PromptBuilder, AnswerBuilder -from haystack.components.embedders import SentenceTransformersTextEmbedder -from haystack.components.generators import OpenAIGenerator -from haystack.components.retrievers import InMemoryEmbeddingRetriever - -""" -The sentence-window approach breaks down documents into smaller chunks (sentences) and indexes them separately. - -During retrieval, we retrieve the sentences that are most relevant to the query via similarity search and replace the -sentence with the full surrounding context, using a static sentence-window around the context. -""" - - -def indexing(embedding_model: str, chunk_size: int): - full_path = Path(base_path) - files_path = full_path / "papers_for_questions" - document_store = InMemoryDocumentStore() - pipeline = Pipeline() - pipeline.add_component("converter", PyPDFToDocument()) - pipeline.add_component("cleaner", DocumentCleaner()) - pipeline.add_component("splitter", DocumentSplitter(split_length=chunk_size)) # splitting by word - pipeline.add_component("writer", DocumentWriter(document_store=document_store, policy=DuplicatePolicy.SKIP)) - pipeline.add_component("embedder", SentenceTransformersDocumentEmbedder(embedding_model)) - pipeline.connect("converter", "cleaner") - pipeline.connect("cleaner", "splitter") - pipeline.connect("splitter", "embedder") - pipeline.connect("embedder", "writer") - pdf_files = [full_path / "papers_for_questions" / f_name for f_name in os.listdir(files_path)] - pipeline.run({"converter": {"sources": pdf_files}}) - - return document_store - diff --git a/evaluations/evaluation_sentence_window_retrieval.py b/evaluations/evaluation_sentence_window_retrieval.py deleted file mode 100644 index e69de29..0000000 From 84a9b597021f32b675b5680b086bca63d43a819f Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Tue, 18 Jun 2024 11:59:14 +0200 Subject: [PATCH 05/17] wip --- evaluations/evaluation_aragog_harness.py | 26 +++++++++++++----------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/evaluations/evaluation_aragog_harness.py b/evaluations/evaluation_aragog_harness.py index 045ce6e..7408ebc 100644 --- a/evaluations/evaluation_aragog_harness.py +++ b/evaluations/evaluation_aragog_harness.py @@ -40,7 +40,7 @@ def indexing(embedding_model: str, chunk_size: int): pipeline.connect("splitter", "embedder") pipeline.connect("embedder", "writer") pdf_files = [full_path / "papers_for_questions" / f_name for f_name in os.listdir(files_path)] - pipeline.run({"converter": {"sources": pdf_files}}) + pipeline.run({"converter": {"sources": pdf_files[0:3]}}) return document_store @@ -59,9 +59,9 @@ def eval_pipeline(questions, answers, pipeline, components, run_name): pipeline_eval_harness = RAGEvaluationHarness( pipeline, metrics={ - RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY, - RAGEvaluationMetric.ANSWER_FAITHFULNESS - # ToDo: RAGEvaluationMetric.CONTEXT_RELEVANCE + RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY, # how to specify the embedding model to use? + RAGEvaluationMetric.ANSWER_FAITHFULNESS, + # RAGEvaluationMetric.CONTEXT_RELEVANCE }, rag_components=components ) @@ -84,33 +84,35 @@ def main(): top_k = 3 doc_store = indexing(embedding_model, chunk_size) - hyde_rag = rag_with_hyde(document_store=doc_store, embedding_model=embedding_model, top_k=top_k) - hyde_components = { + rag = basic_rag(document_store=doc_store, embedding_model=embedding_model, top_k=top_k) + rag_components = { RAGExpectedComponent.QUERY_PROCESSOR: RAGExpectedComponentMetadata( - name="hyde", input_mapping={"query": "query"}), + name="query_embedder", input_mapping={"query": "text"}), RAGExpectedComponent.DOCUMENT_RETRIEVER: RAGExpectedComponentMetadata( name="retriever", output_mapping={"retrieved_documents": "documents"}), RAGExpectedComponent.RESPONSE_GENERATOR: RAGExpectedComponentMetadata( name="llm", output_mapping={"replies": "replies"}) } + baseline_rag_eval_output = eval_pipeline(questions[:25], answers[:25], rag, rag_components, "baseline_rag") - rag = basic_rag(document_store=doc_store, embedding_model=embedding_model, top_k=top_k) - rag_components = { + """ + hyde_rag = rag_with_hyde(document_store=doc_store, embedding_model=embedding_model, top_k=top_k) + hyde_components = { RAGExpectedComponent.QUERY_PROCESSOR: RAGExpectedComponentMetadata( - name="query_embedder", input_mapping={"query": "text"}), + name="hyde", input_mapping={"query": "query"}), RAGExpectedComponent.DOCUMENT_RETRIEVER: RAGExpectedComponentMetadata( name="retriever", output_mapping={"retrieved_documents": "documents"}), RAGExpectedComponent.RESPONSE_GENERATOR: RAGExpectedComponentMetadata( name="llm", output_mapping={"replies": "replies"}) } - - baseline_rag_eval_output = eval_pipeline(questions[:25], answers[:25], rag, rag_components, "baseline_rag") + hyde_rag_eval_output = eval_pipeline(questions[:25], answers[:25], hyde_rag, hyde_components, "hyde_rag") comparative_df = baseline_rag_eval_output.results.comparative_individual_scores_report( hyde_rag_eval_output.results, keep_columns=["response"] ) comparative_df.to_csv("comparative_scores.csv") + """ if __name__ == '__main__': From b3c8245e2df3636f12d12352b89b10b067b26859 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Tue, 18 Jun 2024 16:53:21 +0200 Subject: [PATCH 06/17] adding context relevance --- evaluations/evaluation_aragog_harness.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluations/evaluation_aragog_harness.py b/evaluations/evaluation_aragog_harness.py index 7408ebc..3f05692 100644 --- a/evaluations/evaluation_aragog_harness.py +++ b/evaluations/evaluation_aragog_harness.py @@ -61,7 +61,7 @@ def eval_pipeline(questions, answers, pipeline, components, run_name): metrics={ RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY, # how to specify the embedding model to use? RAGEvaluationMetric.ANSWER_FAITHFULNESS, - # RAGEvaluationMetric.CONTEXT_RELEVANCE + RAGEvaluationMetric.CONTEXT_RELEVANCE }, rag_components=components ) From 6ad42fd75e96b52182676d42f263c16b4b39ac54 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Wed, 19 Jun 2024 09:54:23 +0200 Subject: [PATCH 07/17] wip: component bug --- evaluations/evaluation_aragog_harness.py | 26 +++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/evaluations/evaluation_aragog_harness.py b/evaluations/evaluation_aragog_harness.py index 3f05692..d40b4af 100644 --- a/evaluations/evaluation_aragog_harness.py +++ b/evaluations/evaluation_aragog_harness.py @@ -14,7 +14,9 @@ RAGEvaluationHarness, RAGEvaluationMetric, RAGEvaluationInput, - RAGExpectedComponent, RAGExpectedComponentMetadata, + RAGExpectedComponent, + RAGExpectedComponentMetadata, + RAGEvaluationOverrides ) from architectures.basic_rag import basic_rag @@ -54,12 +56,12 @@ def read_question_answers() -> Tuple[List[str], List[str]]: @timeit -def eval_pipeline(questions, answers, pipeline, components, run_name): +def eval_pipeline(questions, answers, pipeline, components, run_name, sas_embedding_model): pipeline_eval_harness = RAGEvaluationHarness( pipeline, metrics={ - RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY, # how to specify the embedding model to use? + RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY, RAGEvaluationMetric.ANSWER_FAITHFULNESS, RAGEvaluationMetric.CONTEXT_RELEVANCE }, @@ -74,7 +76,16 @@ def eval_pipeline(questions, answers, pipeline, components, run_name): "answer_builder": {"query": [q for q in questions]}, }, ) - return pipeline_eval_harness.run(inputs=hyde_eval_harness_input, run_name=run_name) + + overrides = RAGEvaluationOverrides( + eval_pipeline={RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY: {"model": sas_embedding_model}} + ) + + return pipeline_eval_harness.run( + inputs=hyde_eval_harness_input, + run_name=run_name, + overrides=overrides + ) def main(): @@ -93,7 +104,9 @@ def main(): RAGExpectedComponent.RESPONSE_GENERATOR: RAGExpectedComponentMetadata( name="llm", output_mapping={"replies": "replies"}) } - baseline_rag_eval_output = eval_pipeline(questions[:25], answers[:25], rag, rag_components, "baseline_rag") + baseline_rag_eval_output = eval_pipeline( + questions[:25], answers[:25], rag, rag_components, "baseline_rag", embedding_model + ) """ hyde_rag = rag_with_hyde(document_store=doc_store, embedding_model=embedding_model, top_k=top_k) @@ -107,11 +120,10 @@ def main(): } hyde_rag_eval_output = eval_pipeline(questions[:25], answers[:25], hyde_rag, hyde_components, "hyde_rag") - comparative_df = baseline_rag_eval_output.results.comparative_individual_scores_report( hyde_rag_eval_output.results, keep_columns=["response"] ) - comparative_df.to_csv("comparative_scores.csv") + comparative_df.to_csv("comparative_scores.csv", index=False) """ From 23bf1caf2cf4c87ed83ead4ddd6b49be9820add4 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Wed, 19 Jun 2024 13:52:56 +0200 Subject: [PATCH 08/17] comparative harness --- evaluations/evaluation_aragog_harness.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/evaluations/evaluation_aragog_harness.py b/evaluations/evaluation_aragog_harness.py index d40b4af..e484e3e 100644 --- a/evaluations/evaluation_aragog_harness.py +++ b/evaluations/evaluation_aragog_harness.py @@ -108,7 +108,6 @@ def main(): questions[:25], answers[:25], rag, rag_components, "baseline_rag", embedding_model ) - """ hyde_rag = rag_with_hyde(document_store=doc_store, embedding_model=embedding_model, top_k=top_k) hyde_components = { RAGExpectedComponent.QUERY_PROCESSOR: RAGExpectedComponentMetadata( @@ -119,13 +118,12 @@ def main(): name="llm", output_mapping={"replies": "replies"}) } - hyde_rag_eval_output = eval_pipeline(questions[:25], answers[:25], hyde_rag, hyde_components, "hyde_rag") + hyde_rag_eval_output = eval_pipeline(questions[:25], answers[:25], hyde_rag, hyde_components, "hyde_rag", embedding_model) comparative_df = baseline_rag_eval_output.results.comparative_individual_scores_report( hyde_rag_eval_output.results, keep_columns=["response"] ) comparative_df.to_csv("comparative_scores.csv", index=False) - """ if __name__ == '__main__': - main() \ No newline at end of file + main() From 73e6c2c52f557b3f0e4c72f8d6da0c0f8cfd3f78 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Wed, 19 Jun 2024 14:58:23 +0200 Subject: [PATCH 09/17] raise_on_failure=False for LLM-based evaluators --- evaluations/architectures/hyde_rag.py | 6 ++--- evaluations/evaluation_aragog_harness.py | 30 +++++++++++++++--------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/evaluations/architectures/hyde_rag.py b/evaluations/architectures/hyde_rag.py index 130f3a0..578cca2 100644 --- a/evaluations/architectures/hyde_rag.py +++ b/evaluations/architectures/hyde_rag.py @@ -80,10 +80,10 @@ def run(self, query: str): stacked_embeddings = array([doc.embedding for doc in result["embedder"]["documents"]]) avg_embeddings = mean(stacked_embeddings, axis=0) hyde_vector = avg_embeddings.reshape((1, len(avg_embeddings))) - return {"hypothetical_embedding": hyde_vector[0].tolist()} + return {"hypothetical_embedding": hyde_vector[0].tolist(), "documents": result["embedder"]["documents"]} -def rag_with_hyde(document_store, embedding_model, top_k=2): +def rag_with_hyde(document_store, embedding_model, nr_completions=3, top_k=2): template = """ You have to answer the following question based on the given context information only. If the context is empty or just a '\n' answer "I don't know". @@ -97,7 +97,7 @@ def rag_with_hyde(document_store, embedding_model, top_k=2): Answer: """ - hyde = HypotheticalDocumentEmbedder(embedder_model=embedding_model) + hyde = HypotheticalDocumentEmbedder(embedder_model=embedding_model, nr_completions=nr_completions) hyde_rag = Pipeline() hyde_rag.add_component("hyde", hyde) diff --git a/evaluations/evaluation_aragog_harness.py b/evaluations/evaluation_aragog_harness.py index e484e3e..4c582e5 100644 --- a/evaluations/evaluation_aragog_harness.py +++ b/evaluations/evaluation_aragog_harness.py @@ -78,7 +78,11 @@ def eval_pipeline(questions, answers, pipeline, components, run_name, sas_embedd ) overrides = RAGEvaluationOverrides( - eval_pipeline={RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY: {"model": sas_embedding_model}} + eval_pipeline={ + RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY: {"model": sas_embedding_model}, + RAGEvaluationMetric.CONTEXT_RELEVANCE: {"raise_on_failure": False}, + RAGEvaluationMetric.ANSWER_FAITHFULNESS: {"raise_on_failure": False}, + } ) return pipeline_eval_harness.run( @@ -90,12 +94,13 @@ def eval_pipeline(questions, answers, pipeline, components, run_name, sas_embedd def main(): questions, answers = read_question_answers() - embedding_model = "sentence-transformers/msmarco-distilroberta-base-v2" + embeddings = "sentence-transformers/msmarco-distilroberta-base-v2" chunk_size = 128 top_k = 3 - doc_store = indexing(embedding_model, chunk_size) + doc_store = indexing(embeddings, chunk_size) - rag = basic_rag(document_store=doc_store, embedding_model=embedding_model, top_k=top_k) + # baseline RAG + rag = basic_rag(document_store=doc_store, embedding_model=embeddings, top_k=top_k) rag_components = { RAGExpectedComponent.QUERY_PROCESSOR: RAGExpectedComponentMetadata( name="query_embedder", input_mapping={"query": "text"}), @@ -104,11 +109,10 @@ def main(): RAGExpectedComponent.RESPONSE_GENERATOR: RAGExpectedComponentMetadata( name="llm", output_mapping={"replies": "replies"}) } - baseline_rag_eval_output = eval_pipeline( - questions[:25], answers[:25], rag, rag_components, "baseline_rag", embedding_model - ) + baseline_rag_eval_output = eval_pipeline(questions, answers, rag, rag_components, "baseline_rag", embeddings) - hyde_rag = rag_with_hyde(document_store=doc_store, embedding_model=embedding_model, top_k=top_k) + # HyDE RAG + hyde_rag = rag_with_hyde(document_store=doc_store, embedding_model=embeddings, top_k=top_k) hyde_components = { RAGExpectedComponent.QUERY_PROCESSOR: RAGExpectedComponentMetadata( name="hyde", input_mapping={"query": "query"}), @@ -117,11 +121,15 @@ def main(): RAGExpectedComponent.RESPONSE_GENERATOR: RAGExpectedComponentMetadata( name="llm", output_mapping={"replies": "replies"}) } - - hyde_rag_eval_output = eval_pipeline(questions[:25], answers[:25], hyde_rag, hyde_components, "hyde_rag", embedding_model) + hyde_rag_eval_output = eval_pipeline(questions, answers, hyde_rag, hyde_components, "hyde_rag", embeddings) + + print(baseline_rag_eval_output.results.score_report()) + print(hyde_rag_eval_output.results.score_report()) + comparative_df = baseline_rag_eval_output.results.comparative_individual_scores_report( - hyde_rag_eval_output.results, keep_columns=["response"] + hyde_rag_eval_output.results, keep_columns=["responses"] ) + comparative_df.to_csv("comparative_scores.csv", index=False) From ea53812739f8906f32f2ea97026d6f316955565c Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Wed, 19 Jun 2024 18:15:04 +0200 Subject: [PATCH 10/17] wip --- README.md | 2 +- evaluations/architectures/hyde_rag.py | 4 ++-- evaluations/evaluation_aragog_harness.py | 11 +++++++---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 46bd84a..ddbf372 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # haystack-evaluation -This repository contains examples on how to use Haystack to build different RAG architectures and evaluate their performance over different datasets. +This repository contains examples on how to use Haystack to build RAG architectures and evaluate their performance using human annotated datasets. - [RAG Techniques/Architectures](evaluations/architectures/README.md) - [Datasets](datasets/README.md) diff --git a/evaluations/architectures/hyde_rag.py b/evaluations/architectures/hyde_rag.py index 578cca2..faa41f7 100644 --- a/evaluations/architectures/hyde_rag.py +++ b/evaluations/architectures/hyde_rag.py @@ -83,10 +83,10 @@ def run(self, query: str): return {"hypothetical_embedding": hyde_vector[0].tolist(), "documents": result["embedder"]["documents"]} -def rag_with_hyde(document_store, embedding_model, nr_completions=3, top_k=2): +def rag_with_hyde(document_store, embedding_model, nr_completions=5, top_k=2): template = """ You have to answer the following question based on the given context information only. - If the context is empty or just a '\n' answer "I don't know". + If the context is empty or just a '\n' answer with None, example: "None". Context: {% for document in documents %} diff --git a/evaluations/evaluation_aragog_harness.py b/evaluations/evaluation_aragog_harness.py index 4c582e5..1e88c30 100644 --- a/evaluations/evaluation_aragog_harness.py +++ b/evaluations/evaluation_aragog_harness.py @@ -80,8 +80,8 @@ def eval_pipeline(questions, answers, pipeline, components, run_name, sas_embedd overrides = RAGEvaluationOverrides( eval_pipeline={ RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY: {"model": sas_embedding_model}, - RAGEvaluationMetric.CONTEXT_RELEVANCE: {"raise_on_failure": False}, - RAGEvaluationMetric.ANSWER_FAITHFULNESS: {"raise_on_failure": False}, + # RAGEvaluationMetric.CONTEXT_RELEVANCE: {"raise_on_failure": False}, + # RAGEvaluationMetric.ANSWER_FAITHFULNESS: {"raise_on_failure": False}, } ) @@ -112,7 +112,7 @@ def main(): baseline_rag_eval_output = eval_pipeline(questions, answers, rag, rag_components, "baseline_rag", embeddings) # HyDE RAG - hyde_rag = rag_with_hyde(document_store=doc_store, embedding_model=embeddings, top_k=top_k) + hyde_rag = rag_with_hyde(document_store=doc_store, embedding_model=embeddings, top_k=top_k, nr_completions=5) hyde_components = { RAGExpectedComponent.QUERY_PROCESSOR: RAGExpectedComponentMetadata( name="hyde", input_mapping={"query": "query"}), @@ -124,14 +124,17 @@ def main(): hyde_rag_eval_output = eval_pipeline(questions, answers, hyde_rag, hyde_components, "hyde_rag", embeddings) print(baseline_rag_eval_output.results.score_report()) + print() print(hyde_rag_eval_output.results.score_report()) comparative_df = baseline_rag_eval_output.results.comparative_individual_scores_report( - hyde_rag_eval_output.results, keep_columns=["responses"] + hyde_rag_eval_output.results, keep_columns=["responses", "ground_truth_answers"] ) comparative_df.to_csv("comparative_scores.csv", index=False) + # ToDo: try with SQuAD dataset and see retrieval performance + if __name__ == '__main__': main() From 20c72721503d80091d772b144c01aac33cc46064 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Thu, 20 Jun 2024 12:27:59 +0200 Subject: [PATCH 11/17] wip --- evaluations/evaluation_aragog_harness.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/evaluations/evaluation_aragog_harness.py b/evaluations/evaluation_aragog_harness.py index 1e88c30..4f347a2 100644 --- a/evaluations/evaluation_aragog_harness.py +++ b/evaluations/evaluation_aragog_harness.py @@ -80,8 +80,6 @@ def eval_pipeline(questions, answers, pipeline, components, run_name, sas_embedd overrides = RAGEvaluationOverrides( eval_pipeline={ RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY: {"model": sas_embedding_model}, - # RAGEvaluationMetric.CONTEXT_RELEVANCE: {"raise_on_failure": False}, - # RAGEvaluationMetric.ANSWER_FAITHFULNESS: {"raise_on_failure": False}, } ) @@ -99,6 +97,9 @@ def main(): top_k = 3 doc_store = indexing(embeddings, chunk_size) + questions = questions[0:10] + answers = answers[0:10] + # baseline RAG rag = basic_rag(document_store=doc_store, embedding_model=embeddings, top_k=top_k) rag_components = { From 0dad1f219615ca45b3975e17dd751eede026ab88 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Thu, 20 Jun 2024 13:00:43 +0200 Subject: [PATCH 12/17] fixing indexing --- evaluations/evaluation_aragog_harness.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/evaluations/evaluation_aragog_harness.py b/evaluations/evaluation_aragog_harness.py index 4f347a2..62d39ce 100644 --- a/evaluations/evaluation_aragog_harness.py +++ b/evaluations/evaluation_aragog_harness.py @@ -42,7 +42,7 @@ def indexing(embedding_model: str, chunk_size: int): pipeline.connect("splitter", "embedder") pipeline.connect("embedder", "writer") pdf_files = [full_path / "papers_for_questions" / f_name for f_name in os.listdir(files_path)] - pipeline.run({"converter": {"sources": pdf_files[0:3]}}) + pipeline.run({"converter": {"sources": pdf_files}}) return document_store @@ -97,9 +97,6 @@ def main(): top_k = 3 doc_store = indexing(embeddings, chunk_size) - questions = questions[0:10] - answers = answers[0:10] - # baseline RAG rag = basic_rag(document_store=doc_store, embedding_model=embeddings, top_k=top_k) rag_components = { @@ -134,8 +131,6 @@ def main(): comparative_df.to_csv("comparative_scores.csv", index=False) - # ToDo: try with SQuAD dataset and see retrieval performance - if __name__ == '__main__': main() From fe930f584333e1e7937dd61d76e37b26778a63a4 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Thu, 20 Jun 2024 13:49:16 +0200 Subject: [PATCH 13/17] cleaning and adding harness example --- ... => analyze_aragog_parameter_search.ipynb} | 0 .../comparative_scores.csv | 134 ++++++++++++++++++ ...-MiniLM-L6-v2__top_k:1__chunk_size:128.csv | 0 ...-MiniLM-L6-v2__top_k:1__chunk_size:256.csv | 0 ...l-MiniLM-L6-v2__top_k:1__chunk_size:64.csv | 0 ...-MiniLM-L6-v2__top_k:2__chunk_size:128.csv | 0 ...-MiniLM-L6-v2__top_k:2__chunk_size:256.csv | 0 ...l-MiniLM-L6-v2__top_k:2__chunk_size:64.csv | 0 ...-MiniLM-L6-v2__top_k:3__chunk_size:128.csv | 0 ...-MiniLM-L6-v2__top_k:3__chunk_size:256.csv | 0 ...l-MiniLM-L6-v2__top_k:3__chunk_size:64.csv | 0 ...mpnet-base-v2__top_k:1__chunk_size:128.csv | 0 ...mpnet-base-v2__top_k:1__chunk_size:256.csv | 0 ...-mpnet-base-v2__top_k:1__chunk_size:64.csv | 0 ...mpnet-base-v2__top_k:2__chunk_size:128.csv | 0 ...mpnet-base-v2__top_k:2__chunk_size:256.csv | 0 ...-mpnet-base-v2__top_k:2__chunk_size:64.csv | 0 ...mpnet-base-v2__top_k:3__chunk_size:128.csv | 0 ...mpnet-base-v2__top_k:3__chunk_size:256.csv | 0 ...-mpnet-base-v2__top_k:3__chunk_size:64.csv | 0 ...berta-base-v2__top_k:1__chunk_size:128.csv | 0 ...berta-base-v2__top_k:1__chunk_size:256.csv | 0 ...oberta-base-v2__top_k:1__chunk_size:64.csv | 0 ...berta-base-v2__top_k:2__chunk_size:128.csv | 0 ...berta-base-v2__top_k:2__chunk_size:256.csv | 0 ...oberta-base-v2__top_k:2__chunk_size:64.csv | 0 ...berta-base-v2__top_k:3__chunk_size:128.csv | 0 ...berta-base-v2__top_k:3__chunk_size:256.csv | 0 ...oberta-base-v2__top_k:3__chunk_size:64.csv | 0 ...-MiniLM-L6-v2__top_k:1__chunk_size:128.csv | 0 ...-MiniLM-L6-v2__top_k:1__chunk_size:256.csv | 0 ...l-MiniLM-L6-v2__top_k:1__chunk_size:64.csv | 0 ...-MiniLM-L6-v2__top_k:2__chunk_size:128.csv | 0 ...-MiniLM-L6-v2__top_k:2__chunk_size:256.csv | 0 ...l-MiniLM-L6-v2__top_k:2__chunk_size:64.csv | 0 ...-MiniLM-L6-v2__top_k:3__chunk_size:128.csv | 0 ...-MiniLM-L6-v2__top_k:3__chunk_size:256.csv | 0 ...l-MiniLM-L6-v2__top_k:3__chunk_size:64.csv | 0 ...mpnet-base-v2__top_k:1__chunk_size:128.csv | 0 ...mpnet-base-v2__top_k:1__chunk_size:256.csv | 0 ...-mpnet-base-v2__top_k:1__chunk_size:64.csv | 0 ...mpnet-base-v2__top_k:2__chunk_size:128.csv | 0 ...mpnet-base-v2__top_k:2__chunk_size:256.csv | 0 ...-mpnet-base-v2__top_k:2__chunk_size:64.csv | 0 ...mpnet-base-v2__top_k:3__chunk_size:128.csv | 0 ...mpnet-base-v2__top_k:3__chunk_size:256.csv | 0 ...-mpnet-base-v2__top_k:3__chunk_size:64.csv | 0 ...berta-base-v2__top_k:1__chunk_size:128.csv | 0 ...berta-base-v2__top_k:1__chunk_size:256.csv | 0 ...oberta-base-v2__top_k:1__chunk_size:64.csv | 0 ...berta-base-v2__top_k:2__chunk_size:128.csv | 0 ...berta-base-v2__top_k:2__chunk_size:256.csv | 0 ...oberta-base-v2__top_k:2__chunk_size:64.csv | 0 ...berta-base-v2__top_k:3__chunk_size:128.csv | 0 ...berta-base-v2__top_k:3__chunk_size:256.csv | 0 ...oberta-base-v2__top_k:3__chunk_size:64.csv | 0 56 files changed, 134 insertions(+) rename evaluations/{analyze_aragog.ipynb => analyze_aragog_parameter_search.ipynb} (100%) create mode 100644 evaluations/results/results_aragog_base_vs_hyde_2024_06_20/comparative_scores.csv rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:1__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:1__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:1__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:2__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:2__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:2__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:3__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:3__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:3__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:1__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:1__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:1__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:2__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:2__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:2__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:3__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:3__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:3__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_2024_06_12 => results_aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv (100%) diff --git a/evaluations/analyze_aragog.ipynb b/evaluations/analyze_aragog_parameter_search.ipynb similarity index 100% rename from evaluations/analyze_aragog.ipynb rename to evaluations/analyze_aragog_parameter_search.ipynb diff --git a/evaluations/results/results_aragog_base_vs_hyde_2024_06_20/comparative_scores.csv b/evaluations/results/results_aragog_base_vs_hyde_2024_06_20/comparative_scores.csv new file mode 100644 index 0000000..aedfe4a --- /dev/null +++ b/evaluations/results/results_aragog_base_vs_hyde_2024_06_20/comparative_scores.csv @@ -0,0 +1,134 @@ +questions,baseline_rag_ground_truth_answers,contexts,baseline_rag_responses,baseline_rag_metric_answer_faithfulness,baseline_rag_metric_context_relevance,baseline_rag_metric_sas,hyde_rag_ground_truth_answers,hyde_rag_responses,hyde_rag_metric_answer_faithfulness,hyde_rag_metric_context_relevance,hyde_rag_metric_sas +What are the two main tasks BERT is pre-trained on?,Masked LM (MLM) and Next Sentence Prediction (NSP).,"['steps in our\nframework: pre-training and fine-tuning . Dur-\ning pre-training, the model is trained on unlabeled\ndata over different pre-training tasks. For fine-\ntuning, the BERT model is first initialized with\nthe pre-trained parameters, and all of the param-\neters are fine-tuned using labeled data from the\ndownstream tasks. Each downstream task has sep-\narate fine-tuned models, even though they are ini-\ntialized with the same pre-trained parameters. The\nquestion-answering example in Figure 1 will serve\nas a running example for this section.\nA distinctive feature of BERT is its unified ar-\nchitecture across different tasks. There is mini-mal difference between the pre-trained architec-\nture and the final downstream architecture.\nModel Architecture BERT’s model architec-\nture is a multi-layer bidirectional Transformer en-\ncoder based on the original implementation de-\nscribed in Vaswani et al. (2017) and released in\nthetensor2tensor library.1Because the use\nof Transformers has become common ', 'BERT: Pre-training of Deep Bidirectional Transformers for\nLanguage Understanding\nJacob Devlin Ming-Wei Chang Kenton Lee Kristina Toutanova\nGoogle AI Language\n{jacobdevlin,mingweichang,kentonl,kristout }@google.com\nAbstract\nWe introduce a new language representa-\ntion model called BERT , which stands for\nBidirectional Encoder Representations from\nTransformers. Unlike recent language repre-\nsentation models (Peters et al., 2018a; Rad-\nford et al., 2018), BERT is designed to pre-\ntrain deep bidirectional representations from\nunlabeled text by jointly conditioning on both\nleft and right context in all layers. As a re-\nsult, the pre-trained BERT model can be fine-\ntuned with just one additional output layer\nto create state-of-the-art models for a wide\nrange of tasks, such as question answering and\nlanguage inference, without substantial task-\nspecific architecture modifications.\nBERT is conceptually simple and empirically\npowerful. It obtains new state-of-the-art re-\nsults on eleven natural language processing\ntasks, including pushing the GLUE score to\n80.5% (7.7% point absolute improvement),\nMultiNLI accuracy ', 'RoBERTa is trained with dynamic mask-\ning (Section 4.1),FULL -SENTENCES without NSP\nloss (Section 4.2), large mini-batches (Section 4.3)\nand a larger byte-level BPE (Section 4.4).\nAdditionally, we investigate two other impor-\ntant factors that have been under-emphasized in\nprevious work: (1) the data used for pretraining,\nand (2) the number of training passes through the\ndata. For example, the recently proposed XLNet\narchitecture ( Yang et al. ,2019 ) is pretrained us-\ning nearly 10 times more data than the original\nBERT ( Devlin et al. ,2019 ). It is also trained with\na batch size eight times larger for half as many op-\ntimization steps, thus seeing four times as many\nsequences in pretraining compared to BERT.\nTo help disentangle the importance of these fac-\ntors from other modeling choices (e.g., the pre-\ntraining objective), we begin by training RoBERTa\nfollowing the BERT LARGE ']",BERT is pre-trained on unlabeled data over different pre-training tasks during the pre-training phase.,1.0,1.0,0.17130853235721588,Masked LM (MLM) and Next Sentence Prediction (NSP).,Masked Language Modeling (MLM) and Next Sentence Prediction,1.0,0.6666666666666666,0.8635661602020264 +"What model sizes are reported for BERT, and what are their specifications?","BERTBASE (L=12, H=768, A=12, Total Parameters=110M) and BERTLARGE (L=24, H=1024, A=16, Total Parameters=340M).","['on the 336M model, the modifications\nin (b) enable stable training with lower training loss.\nUsing the architecture change in Figure 7(b), we consider\nthree different cases as detailed in Table 4. The 336M model\nhas the same size as BERT-large. The 1.3B is the same as\nthe BERT-xlarge configuration that was previously shown\nto get worse results than the 336M BERT-large model (Lan\net al., 2019). We further scale the BERT model using both\nlarger hidden size as well as more layers to arrive at the 3.9B\nparameter case. In all cases, the hidden size per attention\nhead is kept constant at 64. 336M and 1.3B models are\ntrained for 2 million iterations while the 3.9B model is\ntrained for 1.5 million iterations and is still training.\nOn a 3% held-out set, 336M, 1.3B, and 3.9B models achieve\nvalidation set perplexity ', '- 85.6 - 85.8\nR.M. Reader (Ensemble) 81.2 87.9 82.3 88.5\nOurs\nBERT BASE (Single) 80.8 88.5 - -\nBERT LARGE (Single) 84.1 90.9 - -\nBERT LARGE (Ensemble) 85.8 91.8 - -\nBERT LARGE (Sgl.+TriviaQA) 84.2 91.1 85.1 91.8\nBERT LARGE (Ens.+TriviaQA) 86.2 92.2 87.4 93.2\nTable 2: SQuAD 1.1 results. The BERT ensemble\nis 7x systems which use different pre-training check-\npoints and fine-tuning seeds.\nSystem Dev Test\nEM F1 EM F1\nTop Leaderboard Systems (Dec 10th, 2018)\nHuman 86.3 89.0 86.9 89.5\n#1 Single - MIR-MRC (F-Net) - - 74.8 78.0\n#2 Single - nlnet - - 74.2 77.1\nPublished\nunet (Ensemble) - - 71.4 74.9\nSLQA+ (Single) - 71.4 74.4\nOurs\nBERT LARGE (Single) 78.7 81.9 80.0 83.1\nTable 3: SQuAD 2.0 results. We exclude entries that\nuse BERT as one of their components.\ntuning data, we only lose 0.1-0.4 F1, still outper-\nforming all existing systems by a ', 'and our im-\nplementation is almost identical to the original,\nwe will omit an exhaustive background descrip-\ntion of the model architecture and refer readers to\nVaswani et al. (2017) as well as excellent guides\nsuch as “The Annotated Transformer.”2\nIn this work, we denote the number of layers\n(i.e., Transformer blocks) as L, the hidden size as\nH, and the number of self-attention heads as A.3\nWe primarily report results on two model sizes:\nBERT BASE (L=12, H=768, A=12, Total Param-\neters=110M) and BERT LARGE (L=24, H=1024,\nA=16, Total Parameters=340M).\nBERT BASE was chosen to have the same model\nsize as OpenAI GPT for comparison purposes.\nCritically, however, the BERT Transformer uses\nbidirectional self-attention, while the GPT Trans-\nformer uses constrained self-attention where every\ntoken can only attend to context to its left.4\n1https://github.com/tensorflow/tensor2tensor\n2http://nlp.seas.harvard.edu/2018/04/03/attention.html\n3In all cases we set the feed-forward/filter size to be 4H,\ni.e., 3072 for ']","BERT BASE (L=12, H=768, A=12, Total Parameters=110M) and BERT LARGE (L=24, H=1024, A=16, Total Parameters=340M)",1.0,0.5,0.738328218460083,"BERTBASE (L=12, H=768, A=12, Total Parameters=110M) and BERTLARGE (L=24, H=1024, A=16, Total Parameters=340M).","BERT comes in several sizes, including BERT BASE and BERT LARGE. The specifications for these models are as follows: +- BERT BASE: EM 80.8, F1 88.5 +- BERT LARGE: EM 84.1, F1 90.9",1.0,1.0,0.5711044669151306 +How does BERT's architecture facilitate the use of a unified model across diverse NLP tasks?,"BERT uses a multi-layer bidirectional Transformer encoder architecture, allowing for minimal task-specific architecture modifications in fine-tuning.","['DistilBERT, a distilled version of BERT: smaller,\nfaster, cheaper and lighter\nVictor SANH, Lysandre DEBUT, Julien CHAUMOND, Thomas WOLF\nHugging Face\n{victor,lysandre,julien,thomas}@huggingface.co\nAbstract\nAs Transfer Learning from large-scale pre-trained models becomes more prevalent\nin Natural Language Processing (NLP), operating these large models in on-the-\nedge and/or under constrained computational training or inference budgets remains\nchallenging. In this work, we propose a method to pre-train a smaller general-\npurpose language representation model, called DistilBERT, which can then be fine-\ntuned with good performances on a wide range of tasks like its larger counterparts.\nWhile most prior work investigated the use of distillation for building task-specific\nmodels, we leverage knowledge distillation during the pre-training phase and show\nthat it is possible to reduce the size of a BERT model by 40%, while retaining 97%\nof its language understanding capabilities and being 60% faster. To leverage ', 'BERT: Pre-training of Deep Bidirectional Transformers for\nLanguage Understanding\nJacob Devlin Ming-Wei Chang Kenton Lee Kristina Toutanova\nGoogle AI Language\n{jacobdevlin,mingweichang,kentonl,kristout }@google.com\nAbstract\nWe introduce a new language representa-\ntion model called BERT , which stands for\nBidirectional Encoder Representations from\nTransformers. Unlike recent language repre-\nsentation models (Peters et al., 2018a; Rad-\nford et al., 2018), BERT is designed to pre-\ntrain deep bidirectional representations from\nunlabeled text by jointly conditioning on both\nleft and right context in all layers. As a re-\nsult, the pre-trained BERT model can be fine-\ntuned with just one additional output layer\nto create state-of-the-art models for a wide\nrange of tasks, such as question answering and\nlanguage inference, without substantial task-\nspecific architecture modifications.\nBERT is conceptually simple and empirically\npowerful. It obtains new state-of-the-art re-\nsults on eleven natural language processing\ntasks, including pushing the GLUE score to\n80.5% (7.7% point absolute improvement),\nMultiNLI accuracy ', 'a deep bidirectional Transformer. In addi-\ntion to the masked language model, we also use\na “next sentence prediction” task that jointly pre-\ntrains text-pair representations. The contributions\nof our paper are as follows:\n• We demonstrate the importance of bidirectional\npre-training for language representations. Un-\nlike Radford et al. (2018), which uses unidirec-\ntional language models for pre-training, BERT\nuses masked language models to enable pre-\ntrained deep bidirectional representations. This\nis also in contrast to Peters et al. (2018a), which\nuses a shallow concatenation of independently\ntrained left-to-right and right-to-left LMs.\n• We show that pre-trained representations reduce\nthe need for many heavily-engineered task-\nspecific architectures. BERT is the first fine-\ntuning based representation model that achieves\nstate-of-the-art performance on a large suite\nof sentence-level andtoken-level tasks, outper-\nforming many task-specific architectures.\n• BERT advances the state of the art for eleven\nNLP tasks. The code and pre-trained ']",BERT's architecture allows for pre-training deep bidirectional representations from unlabeled text by conditioning on both left and right context in all layers. This enables BERT to be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks without substantial task-specific architecture modifications. This approach reduces the need for many heavily-engineered task-specific architectures and allows for a unified model that can perform well across various NLP tasks.,1.0,1.0,0.7838907241821289,"BERT uses a multi-layer bidirectional Transformer encoder architecture, allowing for minimal task-specific architecture modifications in fine-tuning.","BERT's architecture facilitates the use of a unified model across diverse NLP tasks by pre-training deep bidirectional representations from unlabeled text, conditioning on both left and right context in all layers. This allows BERT to be fine-tuned with just one additional output layer for various tasks without requiring substantial task-specific architecture modifications. Additionally, BERT's masked language model and ""next sentence prediction"" tasks during pre-training contribute to its ability to handle a wide range of tasks effectively.",1.0,1.0,0.7342596054077148 +Can you describe the modifications LLaMA makes to the transformer architecture for improved performance?," LLaMA incorporates pre-normalization (using RMSNorm), the SwiGLU activation function, and rotary embeddings (RoPE) into its transformer architecture for enhanced training stability and performance.","['increasing computational\nneeds. Our work aims to provide the tools necessary to take\nanother step forward in this trend.\n2.2. Transformer Language Models and Multi-Head\nAttention\nCurrent work in NLP trends towards using transformer mod-\nels (Vaswani et al., 2017) due to their superior accuracy\x0cMegatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism\nFigure 2. Transformer Architecture. Purple blocks correspond to\nfully connected layers. Each blue block represents a single trans-\nformer layer that is replicated N times.\nand compute efficiency. The original transformer formula-\ntion was designed as a machine translation architecture that\ntransforms an input sequence into another output sequence\nusing two parts, an Encoder andDecoder . However, recent\nwork leveraging transformers for language modeling such as\nBERT (Devlin et al., 2018) and GPT-2 (Radford et al., 2019)\nuse only the Encoder orDecoder depending on their needs.\nThis work explores both a decoder ', 'of 43.3% and model FLOPs utilization (MFU) of 32.5% due to\nre-materialization.\nGLM-130B Configurations. We aim to enable our 100B-scale LLM to run a single DGX-A100\n(40G) node in FP16 precision. Based on the hidden state dimension of 12,288 we adopt from\nGPT-3, the resultant model size has to be no more than 130B parameters, thus GLM-130B. To\nmaximize GPU utilization, we configure the model based on the platform and its corresponding\nparallel strategy. To avoid insufficient memory utilization in the middle stages due to the additional\nword embedding at both ends, we balance the pipeline partition by removing one layer from them,\nmaking 9 ×8-2=70 transformer layers in GLM-130B.\nDuring the 60-day access to the cluster, we manage to train GLM-130B for 400 billion tokens\n(roughly 200 billion each for Chinese and English) with a fixed sequence ', 'to\nthe existing PyTorch transformer implementations. We ef-\nficiently trained transformer based models up to 8.3 bil-\nlion parameter on 512 NVIDIA V100 GPUs with 8-way\nmodel parallelism and achieved up to 15.1 PetaFLOPs sus-\ntained over the entire application. We also showed that for\nBERT models, careful attention to the placement of layer\nnormalization in BERT-like models is critical to achieving\nincreased accuracies as the model size increases. We study\nthe effect of model size on down-stream task accuracy and\nachieve far superior results on downstream tasks and estab-\nlish new SOTA for WikiText103, LAMBADA, and RACE\ndatasets. Finally, we open sourced our code to enable future\nwork leveraging model parallel transformers.\nThere are several directions for future work. Continuing\nto increase the scale of pretraining is a promising line of\x0cMegatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism\ninvestigation that will further ']",None,0.0,1.0,-0.04651763290166855," LLaMA incorporates pre-normalization (using RMSNorm), the SwiGLU activation function, and rotary embeddings (RoPE) into its transformer architecture for enhanced training stability and performance.","LLaMA makes modifications to the transformer architecture by fusing groups of two GEMMs, parallelizing the MLP and self-attention layers, eliminating synchronization points, and reducing the number of communication points between GPUs. This results in better scaling and allows for all GEMMs in a simple transformer layer to be performed using only two all-reduces in the forward path and two in the backward path.",1.0,1.0,0.7121716737747192 +"How does LLaMA's approach to embedding layer optimization differ from traditional transformer models, and what are the specific benefits of these modifications?","LLaMA introduces optimizations in its embedding layer through techniques like factorized embeddings and the use of advanced activation functions, such as SwiGLU, for efficient computation and memory usage. These modifications benefit the model by enhancing learning efficiency and enabling the scaling of model parameters without a proportional increase in computational demand.","['embedding GEMM. How-\never, in transformer language models, the output embed-\nding layer shares weights with the input embedding, requir-\ning modifications to both. We parallelize the input embed-\nding weight matrix EH×valong the vocabulary dimension\nE= [E1,E2](column-wise). Since each partition now only\x0cMegatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism\nFigure 4. Communication operations in a transformer layer. There\nare 4 total communication operations in the forward and backward\npass of a single model parallel transformer layer.\ncontains a portion of the embedding table, an all-reduce ( g\noperator) is required after the input embedding. For the\noutput embedding, one approach is to perform the parallel\nGEMM [Y1,Y2] = [XE 1,XE 2]to obtain the logits, add an\nall-gatherY=all-gather ([Y1,Y2]), and send the results to\nthe cross-entropy loss function. However, for this case, the\nall-gather will communicate b×s×velements (bis the\nbatch-size and sis the ', 'the patch projection layer. Unfortunately,\nwe cannot freeze the training of the embedding layer in language models.\nFinally, we find the gradient shrink on embedding layers could overcome loss spikes and thus sta-\nbilize GLM-130B’s training. It is first used in the multi-modal transformer CogView (Ding et al.,\n2021). Let αbe the shrinking factor, the strategy can be easily implemented via word _embedding =\nword _embedding ∗α+word _embedding .detach ()∗(1−α). Figure 4 (b) suggests that empirically,\nsetting α= 0.1wipes out most spikes we would have met, with negligible latency.\nIn fact, the final GLM-130B training run only experiences three late-stage loss divergence cases,\nthough it fails numerous times due to hardware failures. For the three unexpected spikes, it turns out\nfurther shrinking the embedding gradient can still help stabilize the GLM-130B training. See the\ntraining notes and Tensorboard ', 'increasing computational\nneeds. Our work aims to provide the tools necessary to take\nanother step forward in this trend.\n2.2. Transformer Language Models and Multi-Head\nAttention\nCurrent work in NLP trends towards using transformer mod-\nels (Vaswani et al., 2017) due to their superior accuracy\x0cMegatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism\nFigure 2. Transformer Architecture. Purple blocks correspond to\nfully connected layers. Each blue block represents a single trans-\nformer layer that is replicated N times.\nand compute efficiency. The original transformer formula-\ntion was designed as a machine translation architecture that\ntransforms an input sequence into another output sequence\nusing two parts, an Encoder andDecoder . However, recent\nwork leveraging transformers for language modeling such as\nBERT (Devlin et al., 2018) and GPT-2 (Radford et al., 2019)\nuse only the Encoder orDecoder depending on their needs.\nThis work explores both a decoder ']","LLaMA's approach to embedding layer optimization differs from traditional transformer models by parallelizing the input embedding weight matrix along the vocabulary dimension, utilizing all-reduce operations after the input embedding, and using gradient shrinkage on embedding layers to overcome loss spikes. These modifications provide benefits such as improved training stability, reduced loss divergence cases, and the ability to handle unexpected spikes in training without sacrificing performance or introducing significant latency. Additionally, LLaMA's approach allows for increased model parallelism and scalability to address the increasing computational needs of language models.",1.0,0.6666666666666666,0.8244255781173706,"LLaMA introduces optimizations in its embedding layer through techniques like factorized embeddings and the use of advanced activation functions, such as SwiGLU, for efficient computation and memory usage. These modifications benefit the model by enhancing learning efficiency and enabling the scaling of model parameters without a proportional increase in computational demand.","LLaMA's approach to embedding layer optimization differs from traditional transformer models by incorporating a gradient shrink technique on the embedding layers. This technique involves multiplying the word embedding by a shrinking factor, α, and then adding the word embedding without gradients (word_embedding.detach()*(1-alpha)). This modification helps overcome loss spikes and stabilize the training of the GLM-130B model. The specific benefits of this modification include reducing loss spikes, improving training stability, and minimizing unexpected training divergence cases. Additionally, further shrinking the embedding gradient can help stabilize the training even more, making it a valuable technique in optimizing the performance of large language models like GLM-130B.",1.0,0.6666666666666666,0.7966227531433105 +"How were the questions for the multitask test sourced, and what was the criteria for their inclusion?","Questions were manually collected by graduate and undergraduate students from freely available online sources, including practice questions for standardized tests and undergraduate courses, ensuring a wide representation of difficulty levels and subjects.","['Published as a conference paper at ICLR 2021\nMEASURING MASSIVE MULTITASK\nLANGUAGE UNDERSTANDING\nDan Hendrycks\nUC BerkeleyCollin Burns\nColumbia UniversitySteven Basart\nUChicagoAndy Zou\nUC Berkeley\nMantas Mazeika\nUIUCDawn Song\nUC BerkeleyJacob Steinhardt\nUC Berkeley\nABSTRACT\nWe propose a new test to measure a text model’s multitask accuracy. The test\ncovers 57 tasks including elementary mathematics, US history, computer science,\nlaw, and more. To attain high accuracy on this test, models must possess extensive\nworld knowledge and problem solving ability. We find that while most recent\nmodels have near random-chance accuracy, the very largest GPT-3 model improves\nover random chance by almost 20 percentage points on average. However, on every\none of the 57 tasks, the best models still need substantial improvements before\nthey can reach expert-level accuracy. Models also have lopsided performance\nand frequently do not know when they are wrong. Worse, they still have near-\nrandom accuracy on some ', 'subjects for which models can already achieve strong\nperformance (Clark et al., 2018; Khot et al., 2019; Mihaylov et al., 2018; Clark et al., 2019), or\nare focused on linguistic understanding in the form of reading comprehension (Lai et al., 2017;\nRichardson et al., 2013). In contrast, we include a wide range of difficult subjects that go far beyond\nlinguistic understanding.\n3 A M ULTITASK TEST\nWe create a massive multitask test consisting of multiple-choice questions from various branches of\nknowledge. The test spans subjects in the humanities, social sciences, hard sciences, and other areas\nthat are important for some people to learn. There are 57tasks in total, which is also the number\nof Atari games (Bellemare et al., 2013), all of which are listed in Appendix B. The questions in\nthe dataset were manually collected by graduate ', 'is a QA task where each\nexample consists of a context paragraph, a question about that paragraph, and a list of possible\nanswers. The system must predict which answers are true and which are false. While many QA\ntasks exist, we use MultiRC because of a number of desirable properties: (i) each question can have\nmultiple possible correct answers, so each question-answer pair must be evaluated independent of\nother pairs, (ii) the questions are designed such that answering each question requires drawing facts\nfrom multiple context sentences, and (iii) the question-answer pair format more closely matches\nthe API of other tasks in SuperGLUE than the more popular span-extractive QA format does. The\nparagraphs are drawn from seven domains including news, fiction, and historical text. The evaluation\nmetrics are F1 over all answer-options (F1 a) and exact match ']","The questions for the multitask test were manually collected by graduate students from various branches of knowledge. The criteria for inclusion were that the questions span subjects in the humanities, social sciences, hard sciences, and other areas that are important for some people to learn. There are a total of 57 tasks in the test, each with multiple-choice questions.",1.0,1.0,0.5354890823364258,"Questions were manually collected by graduate and undergraduate students from freely available online sources, including practice questions for standardized tests and undergraduate courses, ensuring a wide representation of difficulty levels and subjects.","The questions for the multitask test were manually collected by graduate and undergraduate students from freely available sources online. The criteria for their inclusion were that they span subjects in the humanities, social sciences, hard sciences, and other areas that are important for some people to learn. Some tasks cover a subject at a specific level of difficulty, such as ""Elementary,"" ""High School,"" ""College,"" or ""Professional.""",1.0,1.0,0.5041295289993286 +How does BERT's performance on the GLUE benchmark compare to previous state-of-the-art models?,"BERT achieved new state-of-the-art on the GLUE benchmark (80.5%), surpassing the previous best models.","['recent work in this area.\nSince its release, GLUE has been used as a testbed and showcase by the developers of several\ninfluential models, including GPT (Radford et al., 2018) and BERT (Devlin et al., 2019). As shown\nin Figure 1, progress on GLUE since its release has been striking. On GLUE, GPT and BERT\nachieved scores of 72.8 and 80.2 respectively, relative to 66.5 for an ELMo-based model (Peters\net al., 2018) and 63.7 for the strongest baseline with no multitask learning or pretraining above the\nword level. Recent models (Liu et al., 2019d; Yang et al., 2019) have clearly surpassed estimates of\nnon-expert human performance on GLUE (Nangia and Bowman, 2019). The success of these models\non GLUE has been driven by ever-increasing model capacity, compute power, and data quantity, as\nwell as innovations in ', '56.0 75.1\nBERT BASE 84.6/83.4 71.2 90.5 93.5 52.1 85.8 88.9 66.4 79.6\nBERT LARGE 86.7/85.9 72.1 92.7 94.9 60.5 86.5 89.3 70.1 82.1\nTable 1: GLUE Test results, scored by the evaluation server ( https://gluebenchmark.com/leaderboard ).\nThe number below each task denotes the number of training examples. The “Average” column is slightly different\nthan the official GLUE score, since we exclude the problematic WNLI set.8BERT and OpenAI GPT are single-\nmodel, single task. F1 scores are reported for QQP and MRPC, Spearman correlations are reported for STS-B, and\naccuracy scores are reported for the other tasks. We exclude entries that use BERT as one of their components.\nWe use a batch size of 32 and fine-tune for 3\nepochs over the data for all GLUE tasks. For each\ntask, we selected the best fine-tuning learning rate\n(among 5e-5, ', '4e-5, 3e-5, and 2e-5) on the Dev set.\nAdditionally, for BERT LARGE we found that fine-\ntuning was sometimes unstable on small datasets,\nso we ran several random restarts and selected the\nbest model on the Dev set. With random restarts,\nwe use the same pre-trained checkpoint but per-\nform different fine-tuning data shuffling and clas-\nsifier layer initialization.9\nResults are presented in Table 1. Both\nBERT BASE and BERT LARGE outperform all sys-\ntems on all tasks by a substantial margin, obtaining\n4.5% and 7.0% respective average accuracy im-\nprovement over the prior state of the art. Note that\nBERT BASE and OpenAI GPT are nearly identical\nin terms of model architecture apart from the at-\ntention masking. For the largest and most widely\nreported GLUE task, MNLI, BERT obtains a 4.6%\nabsolute accuracy improvement. On the official\nGLUE leaderboard10, BERT LARGE obtains a score\nof ']",BERT's performance on the GLUE benchmark significantly outperforms previous state-of-the-art models. BERT BASE and BERT LARGE obtained an average accuracy improvement of 4.5% and 7.0% respectively over the prior state of the art models.,1.0,1.0,0.8479288816452026,"BERT achieved new state-of-the-art on the GLUE benchmark (80.5%), surpassing the previous best models.","BERT's performance on the GLUE benchmark surpasses previous state-of-the-art models, achieving a score of 80.5% with a 7.7% absolute improvement.",1.0,1.0,0.9234704971313477 +"What significant improvements does BERT bring to the SQuAD v1.1,v2.0 and v13.5 tasks compared to prior models?","BERT set new records on SQuAD v1.1 and v2.0, significantly outperforming the top leaderboard systems at the time. Version 13.5 doesn't exist. ","['- 85.6 - 85.8\nR.M. Reader (Ensemble) 81.2 87.9 82.3 88.5\nOurs\nBERT BASE (Single) 80.8 88.5 - -\nBERT LARGE (Single) 84.1 90.9 - -\nBERT LARGE (Ensemble) 85.8 91.8 - -\nBERT LARGE (Sgl.+TriviaQA) 84.2 91.1 85.1 91.8\nBERT LARGE (Ens.+TriviaQA) 86.2 92.2 87.4 93.2\nTable 2: SQuAD 1.1 results. The BERT ensemble\nis 7x systems which use different pre-training check-\npoints and fine-tuning seeds.\nSystem Dev Test\nEM F1 EM F1\nTop Leaderboard Systems (Dec 10th, 2018)\nHuman 86.3 89.0 86.9 89.5\n#1 Single - MIR-MRC (F-Net) - - 74.8 78.0\n#2 Single - nlnet - - 74.2 77.1\nPublished\nunet (Ensemble) - - 71.4 74.9\nSLQA+ (Single) - 71.4 74.4\nOurs\nBERT LARGE (Single) 78.7 81.9 80.0 83.1\nTable 3: SQuAD 2.0 results. We exclude entries that\nuse BERT as one of their components.\ntuning data, we only lose 0.1-0.4 F1, still outper-\nforming all existing systems by a ', '4e-5, 3e-5, and 2e-5) on the Dev set.\nAdditionally, for BERT LARGE we found that fine-\ntuning was sometimes unstable on small datasets,\nso we ran several random restarts and selected the\nbest model on the Dev set. With random restarts,\nwe use the same pre-trained checkpoint but per-\nform different fine-tuning data shuffling and clas-\nsifier layer initialization.9\nResults are presented in Table 1. Both\nBERT BASE and BERT LARGE outperform all sys-\ntems on all tasks by a substantial margin, obtaining\n4.5% and 7.0% respective average accuracy im-\nprovement over the prior state of the art. Note that\nBERT BASE and OpenAI GPT are nearly identical\nin terms of model architecture apart from the at-\ntention masking. For the largest and most widely\nreported GLUE task, MNLI, BERT obtains a 4.6%\nabsolute accuracy improvement. On the official\nGLUE leaderboard10, BERT LARGE obtains a score\nof ', 'ELMo,\nOpenAI GPT and BERT. The comparisons be-\ntween the model architectures are shown visually\nin Figure 3. Note that in addition to the architec-\nture differences, BERT and OpenAI GPT are fine-\ntuning approaches, while ELMo is a feature-based\napproach.\nThe most comparable existing pre-training\nmethod to BERT is OpenAI GPT, which trains a\nleft-to-right Transformer LM on a large text cor-\npus. In fact, many of the design decisions in BERT\nwere intentionally made to make it as close to\nGPT as possible so that the two methods could be\nminimally compared. The core argument of this\nwork is that the bi-directionality and the two pre-\ntraining tasks presented in Section 3.1 account for\nthe majority of the empirical improvements, but\nwe do note that there are several other differences\nbetween how BERT and GPT were trained:\n• GPT is trained on the BooksCorpus (800M\nwords); ']","BERT brings substantial improvements to the SQuAD v1.1, v2.0, and v13.5 tasks compared to prior models by outperforming all existing systems by a significant margin. Specifically, BERT BASE and BERT LARGE obtained a respective average accuracy improvement of 4.5% and 7.0% over the prior state of the art on all tasks.",1.0,1.0,0.6432052850723267,"BERT set new records on SQuAD v1.1 and v2.0, significantly outperforming the top leaderboard systems at the time. Version 13.5 doesn't exist. ","BERT brings significant improvements to the SQuAD v1.1, v2.0, and v1.3.5 tasks compared to prior models by achieving new state-of-the-art results on eleven natural language processing tasks.",0.5,0.5,0.6002674102783203 +"What unique aspect of the LLaMA training dataset distinguishes it from datasets used by models like GPT-3, Chinchilla, and PaLM?"," LLaMA's training dataset is distinctive for being composed exclusively of publicly available data, unlike the datasets used for GPT-3, Chinchilla, and PaLM, which include proprietary or undocumented data.","['tasks have\nbeen dramatically improved using LLMs, as these models\nhave been shown to leverage the vast amounts of knowledge\nthey learn from their pre-training corpora (Roberts et al.,\n2020; Petroni et al., 2019; De Cao et al., 2021). However, it\nremains unclear as to what kind of knowledge LMs actually\ncapture—for example, do they simply learn “easy” facts\nthat frequently appear in their pre-training data?\nWe study this question using closed-book QA evalua-\ntions (Roberts et al., 2020) of LLMs in the few-shot set-\nting (Brown et al., 2020). Models are prompted with in-\ncontext training examples (QA pairs) and a test question\nwithout any relevant background text. The goal of our work\nis to investigate the relationship between an LM’s ability\nto answer a question and the number of times information\nrelevant to that question appears in the pre-training data.\nOur Approach ', 'MATH and and k= 40 for GSM8k). LLaMA-65B\noutperforms Minerva 62B on GSM8k, although it has\nnot been fine-tuned on mathematical data.\ndocstring. The model needs to generate a Python\nprogram that fits the description and satisfies the\ntest cases. In Table 8, we compare the pass@1\nscores of our models with existing language mod-\nels that have not been finetuned on code, namely\nPaLM and LaMDA (Thoppilan et al., 2022). PaLM\nand LLaMA were trained on datasets that contain\na similar number of code tokens.\nAs show in Table 8, for a similar number\nof parameters, LLaMA outperforms other gen-\neral models such as LaMDA and PaLM, which\nare not trained or finetuned specifically for code.\nLLaMA with 13B parameters and more outper-\nforms LaMDA 137B on both HumanEval and\nMBPP. LLaMA 65B also outperforms PaLM 62B,\neven when it is trained longer. The pass@1 ', 'LLaMA: Open and Efficient Foundation Language Models\nHugo Touvron∗, Thibaut Lavril∗, Gautier Izacard∗, Xavier Martinet\nMarie-Anne Lachaux, Timothee Lacroix, Baptiste Rozière, Naman Goyal\nEric Hambro, Faisal Azhar, Aurelien Rodriguez, Armand Joulin\nEdouard Grave∗, Guillaume Lample∗\nMeta AI\nAbstract\nWe introduce LLaMA, a collection of founda-\ntion language models ranging from 7B to 65B\nparameters. We train our models on trillions\nof tokens, and show that it is possible to train\nstate-of-the-art models using publicly avail-\nable datasets exclusively, without resorting\nto proprietary and inaccessible datasets. In\nparticular, LLaMA-13B outperforms GPT-3\n(175B) on most benchmarks, and LLaMA-\n65B is competitive with the best models,\nChinchilla-70B and PaLM-540B. We release\nall our models to the research community1.\n1 Introduction\nLarge Languages Models (LLMs) trained on mas-\nsive corpora of texts have shown their ability to per-\nform new tasks from textual instructions or from a\nfew examples (Brown et al., 2020). These few-shot\nproperties ']","LLaMA is trained on publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets, which distinguishes it from datasets used by models like GPT-3, Chinchilla, and PaLM.",1.0,1.0,0.8318937420845032," LLaMA's training dataset is distinctive for being composed exclusively of publicly available data, unlike the datasets used for GPT-3, Chinchilla, and PaLM, which include proprietary or undocumented data.","LLaMA was trained using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets, which distinguishes it from datasets used by models like GPT-3, Chinchilla, and PaLM.",1.0,1.0,0.8319891691207886 +"What detailed methodology does LLaMA utilize to ensure the diversity of its pre-training data, particularly in the context of filtering and language identification?","LLaMA's methodology for ensuring data diversity includes comprehensive filtering criteria to remove low-quality content, language identification to support multilingual capabilities, and a balanced inclusion of various data sources such as Wikipedia, web text, and books, emphasizing the representation of a wide range of topics and languages.","['learning model\nusage should be developed for guiding users to learn ‘Dos’\nand Dont’ in AI. Detailed policies could also be proposed\nto list all user’s responsibilities before the model access.\nC. Language Models Beyond ChatGPT\nThe examination of ethical implications associated with\nlanguage models necessitates a comprehensive examina-\ntion of the broader challenges that arise within the domain\x0cof language models, in light of recent advancements in\nthe field of artificial intelligence. The last decade has seen\na rapid evolution of AI techniques, characterized by an\nexponential increase in the size and complexity of AI\nmodels, and a concomitant scale-up of model parameters.\nThe scaling laws that govern the development of language\nmodels,asdocumentedinrecentliterature[84,85],suggest\nthatwecanexpecttoencounterevenmoreexpansivemod-\nels that incorporate multiple modalities in the near future.\nEfforts to integrate multiple modalities into a single model\nare driven by the ultimate goal of realizing the concept of\nfoundation models [86]. ', 'LLaMA: Open and Efficient Foundation Language Models\nHugo Touvron∗, Thibaut Lavril∗, Gautier Izacard∗, Xavier Martinet\nMarie-Anne Lachaux, Timothee Lacroix, Baptiste Rozière, Naman Goyal\nEric Hambro, Faisal Azhar, Aurelien Rodriguez, Armand Joulin\nEdouard Grave∗, Guillaume Lample∗\nMeta AI\nAbstract\nWe introduce LLaMA, a collection of founda-\ntion language models ranging from 7B to 65B\nparameters. We train our models on trillions\nof tokens, and show that it is possible to train\nstate-of-the-art models using publicly avail-\nable datasets exclusively, without resorting\nto proprietary and inaccessible datasets. In\nparticular, LLaMA-13B outperforms GPT-3\n(175B) on most benchmarks, and LLaMA-\n65B is competitive with the best models,\nChinchilla-70B and PaLM-540B. We release\nall our models to the research community1.\n1 Introduction\nLarge Languages Models (LLMs) trained on mas-\nsive corpora of texts have shown their ability to per-\nform new tasks from textual instructions or from a\nfew examples (Brown et al., 2020). These few-shot\nproperties ', 'involves\nusing language models to develop fresh and creative\ncontent, such as writing a story, composing poetry, or\nscripting film dialogue. This is achieved by training the\nmodel on a massive corpus of existing books, articles, and\nscripts. The model learns the patterns, structures, and\nstyles of text, allowing it to generate similar content. This\nhas several downstream applications, such as producing\ncontent for entertainment [32], marketing [33], advertis-\ning [34], and content summarization [35].\nb) Decision-making: The use of language models in\ndecision-making is a significant application scenario in\nthe field of machine learning. This refers to using these\nmodels to make informed decisions based on natural\nlanguage input, as demonstrated in studies on sentiment\nanalysis [36], text classification [37], and question answer-\ning [38]. By analyzing and comprehending the meaning\nand context of the input, these models are able to provide\njudgments or suggestions ']",None,0.0,1.0,0.0216057226061821,"LLaMA's methodology for ensuring data diversity includes comprehensive filtering criteria to remove low-quality content, language identification to support multilingual capabilities, and a balanced inclusion of various data sources such as Wikipedia, web text, and books, emphasizing the representation of a wide range of topics and languages.",None,0.0,1.0,0.021605579182505608 +"What are the specific domains covered by the multitask test, and why were they selected?","The test covers 57 subjects across STEM, humanities, social sciences, and more, selected to evaluate a model's world knowledge and problem-solving ability across a wide range of difficulty levels, from elementary to advanced professional levels.","['subjects for which models can already achieve strong\nperformance (Clark et al., 2018; Khot et al., 2019; Mihaylov et al., 2018; Clark et al., 2019), or\nare focused on linguistic understanding in the form of reading comprehension (Lai et al., 2017;\nRichardson et al., 2013). In contrast, we include a wide range of difficult subjects that go far beyond\nlinguistic understanding.\n3 A M ULTITASK TEST\nWe create a massive multitask test consisting of multiple-choice questions from various branches of\nknowledge. The test spans subjects in the humanities, social sciences, hard sciences, and other areas\nthat are important for some people to learn. There are 57tasks in total, which is also the number\nof Atari games (Bellemare et al., 2013), all of which are listed in Appendix B. The questions in\nthe dataset were manually collected by graduate ', 'of other orders. Therefore, we decided to further subdivide the Passeriformes order\ntask into family tasks ( i.e., grouping species into families) to provide a more balanced partition. This resulted in 15 usable\nfamily tasks ( i.e., has more than one species) out of 22 family tasks. Unlike iNaturalist, tasks from CUB have only a few\nhundreds of samples and hence benefit more from carefully selecting an expert.\nIn the iNAT+CUB meta-task the classification tasks are the same tasks used to train the experts. To avoid trivial solu-\ntions (always selecting the expert trained on the task we are trying to solve) we test in a leave-one-out fashion: given a\nclassficication task, we aim to select the best expert that was not trained on the same data.\nFor the Mixed meta-task, we chose 40 random ', 'improvement over the Symmetric version, especially on iNat + CUB, where\nexperts trained on very similar tasks may be too simple to yield good transfer, and should be avoided.\nstatistics are useful (analogous to our choice of probe net-\nwork) has also been considered, for example [9] train an\nautoencoder that learns to extract fixed dimensional sum-\nmary statistics that can reproduce many different datasets\naccurately. However, for general vision tasks which apply\nto all natural images, the domain is the same across tasks.\nTaskonomy [39] explores the structure of the space of\ntasks, focusing on the question of effective knowledge\ntransfer in a curated collection of 26 visual tasks, ranging\nfrom classification to 3D reconstruction, defined on a com-\nmon domain. They compute pairwise transfer distances be-\ntween pairs of tasks and use the results to compute a di-\nrected hierarchy. ']","The specific domains covered by the multitask test are subjects in the humanities, social sciences, hard sciences, and other important areas for individuals to learn. These domains were selected to go beyond linguistic understanding and to provide a wide range of difficult subjects for the test.",1.0,1.0,0.421245813369751,"The test covers 57 subjects across STEM, humanities, social sciences, and more, selected to evaluate a model's world knowledge and problem-solving ability across a wide range of difficulty levels, from elementary to advanced professional levels.","The specific domains covered by the multitask test include elementary mathematics, US history, computer science, law, and more. These domains were selected because they require extensive world knowledge and problem-solving ability, challenging models to possess a wide range of knowledge beyond just linguistic understanding.",1.0,1.0,0.4791794717311859 +What specific enhancements are recommended for language models to bridge the knowledge application gap identified through comprehensive multitask testing?,"Enhancements should focus on developing models with improved procedural knowledge, superior calculation abilities, and a more accurate calibration between confidence and actual performance, directly addressing the weaknesses uncovered in current evaluations.","['a year since the release of SuperGLUE, performance is again essentially human-level\n(Raffel et al., 2019). While these benchmarks evaluate linguistic skills more than overall language\nunderstanding, an array of commonsense benchmarks have been proposed to measure basic reasoning\nand everyday knowledge (Zellers et al., 2019; Huang et al., 2019; Bisk et al., 2019). However, these\nrecent benchmarks have similarly seen rapid progress (Khashabi et al., 2020). Overall, the near\nhuman-level performance on these benchmarks suggests that they are not capturing important facets\nof language understanding.\nTransformer models have driven this recent progress by pretraining on massive text corpora, including\nall of Wikipedia, thousands of books, and numerous websites. These models consequently see\nextensive information about specialized topics, most of which is not assessed by existing NLP\nbenchmarks. It consequently remains an open question just how capable current ', 'statistically significant degradation\nin performance, averaging approximately 5% across the\nvarious perturbation methods. Notably, all three language\nmodels exhibit a heightened level of susceptibility to\nsemantics-altering perturbations in the IMDB sentiment\nanalysis scenario, as indicated by the EM ctstmetric.\nIn addition, the model performance is evaluated on\nthe BoolQA dataset using identical experimental settings.\nThe results of this evaluation are consistent with the\naforementioned findings, with ChatGPT outperforming\nthe other two baselines on both the original and per-\nturbed subsets. Furthermore, it is observed that the three\nlanguage models exhibit a degradation in performance\nwhen subjected to contrasting semantic perturbations,\nsuggesting that equivariance of robustness may pose a\nsignificant challenge for the current advanced language\nmodels and their applications.\x0cFinding 2.1: Compared to two LLM baselines,\nChatGPT demonstrates superior accuracy and ad-\nversarial robustness, while still exhibiting a height-\nened level of susceptibility to semantics-altering\nperturbations.\n3) Beyond Benchmarking:\nIn addition ', 'learning model\nusage should be developed for guiding users to learn ‘Dos’\nand Dont’ in AI. Detailed policies could also be proposed\nto list all user’s responsibilities before the model access.\nC. Language Models Beyond ChatGPT\nThe examination of ethical implications associated with\nlanguage models necessitates a comprehensive examina-\ntion of the broader challenges that arise within the domain\x0cof language models, in light of recent advancements in\nthe field of artificial intelligence. The last decade has seen\na rapid evolution of AI techniques, characterized by an\nexponential increase in the size and complexity of AI\nmodels, and a concomitant scale-up of model parameters.\nThe scaling laws that govern the development of language\nmodels,asdocumentedinrecentliterature[84,85],suggest\nthatwecanexpecttoencounterevenmoreexpansivemod-\nels that incorporate multiple modalities in the near future.\nEfforts to integrate multiple modalities into a single model\nare driven by the ultimate goal of realizing the concept of\nfoundation models [86]. ']",None,0.0,1.0,0.032425783574581146,"Enhancements should focus on developing models with improved procedural knowledge, superior calculation abilities, and a more accurate calibration between confidence and actual performance, directly addressing the weaknesses uncovered in current evaluations.",None,0.0,0.0,0.03242576867341995 +What methodology does DetectGPT use to generate minor perturbations in the candidate passage for evaluation?,"DetectGPT generates minor perturbations using a generic pre-trained model, such as T5, and then compares the log probability of the original sample with each perturbed sample.","['source model pθ,\nDetectGPT compares the log probability of the candidate\npassage under pθwith the average log probability of several\nperturbations of the passage under pθ(generated with, e.g.,\nT5; Raffel et al. (2020)). If the perturbed passages tend\nto have lower average log probability than the original by\nsome margin, the candidate passage is likely to have come\nfrompθ. See Figure 1 for an overview of the problem and\nDetectGPT. See Figure 2 for an illustration of the under-\nlying hypothesis and Figure 3 for empirical evaluation of\nthe hypothesis. Our experiments find that DetectGPT is\nmore accurate than existing zero-shot methods for detect-\ning machine-generated text, improving over the strongest\nzero-shot baseline by over 0.1 AUROC for multiple source\nmodels when detecting machine-generated news articles.\nContributions. Our main contributions are: (a) the identi-\nfication and empirical validation of the hypothesis that the\ncurvature of ', 'pas-\nsage length. We bin the paired human- and model-generated\nsequences by their average length into three bins of equal\nsize (bottom/middle/top third), and plot the AUROC within\neach bin. The relationship between detection performance\nand passage length generally depends on the dataset and\nmodel (or tokenizer). For very long sequences, DetectGPT\nmay see reduced performance because our implementation\nof DetectGPT applies all T5 mask-filling perturbations at\nonce, and T5 may fail to track many mask tokens at once.\nBy applying perturbations in multiple sequential rounds of\nsmaller numbers of masks, this effect may be mitigated.\n6. Discussion\nAs large language models continue to improve, they will\nbecome increasingly attractive tools for replacing human\nwriters in a variety of contexts, such as education, jour-\nnalism, and art. While legitimate uses of language model\ntechnologies exist in all of these settings, teachers, readers,\nand consumers are likely ', 'and WritingPrompts datasets\nand compare the two pre-trained RoBERTa-based detector\nmodels with DetectGPT and the probability thresholding\nbaseline. We show in Table 2 that DetectGPT can provide\ndetection competitive with or better than the stronger of the\ntwo supervised models, and it again greatly outperforms\nprobability thresholding on average.\n5.2. Variants of Machine-Generated Text Detection\nDetecting paraphrased machine-generated text. In prac-\ntice, humans may manually edit or refine machine-generated\ntext rather than blindly use a model’s generations for their\ntask of interest. We therefore conduct an experiment to\nsimulate the detection problem for model samples that have\nbeen increasingly heavily revised. We simulate human re-\nvision by replacing 5 word spans of the text with samples\nfrom T5-3B until r% of the text has been replaced, and\nreport performance as rvaries. Figure 5 shows that De-\ntectGPT maintains detection AUROC above 0.8 even when\nnearly a ']",DetectGPT uses mask-filling perturbations generated with T5 to perturb the passages for evaluation.,1.0,0.3333333333333333,0.832136869430542,"DetectGPT generates minor perturbations using a generic pre-trained model, such as T5, and then compares the log probability of the original sample with each perturbed sample.",DetectGPT uses a generic pre-trained model such as T5 to generate minor perturbations in the candidate passage for evaluation.,1.0,1.0,0.9134736061096191 +Discuss the significance of DetectGPT's detection approach in the context of evolving LLM capabilities and the potential for misuse.,"DtectGPT's approach is significant as it provides a robust, zero-shot detection method that does not require dataset collection or model retraining, addressing the challenge of rapidly evolving LLM capabilities and their potential misuse in generating misleading content.","['performance; the results are shown\nin Figure 7. In particular, the increased discrimination power\nof DetectGPT for larger mask-filling models supports the\ninterpretation that DetectGPT is estimating the curvature\nof the log probability in a latent semantic space, rather\nthan in raw token embedding space. Larger T5 models\nbetter represent this latent space, where random directions\ncorrespond to meaningful changes in the text.\nNumber of perturbations for DetectGPT. We evaluate the\nperformance of DetectGPT as a function of the number of\nperturbations used to estimate the expectation in Equation 1\non three datasets. The results are presented in Figure 8.\nDetection accuracy continues to improve until 100 pertur-\nbations, where it converges. Evaluations use 100 examples\nfrom each dataset.\nData distributional properties. We study more closely\nthe impact of the data distribution on DetectGPT, particu-\n1 10 100 10000.60.70.80.91.0Detection AUROC\nGPT-2\nXSum\nSQuAD\nWritingPrompts\n1 10 100 1000\nGPT-J\nNumber of perturbationsFigure ', 'algorithms can be\nused in conjunction with detection algorithms like Detect-\nGPT to further improve detection robustness as language\nmodels continually improve their reproductions of human\ntext. Separately, the results in Section 5.2 suggest that ex-\ntending DetectGPT to use ensembles of models for scoring,\nrather than a single model, may improve detection in the\nblack box setting. Another topic that remains unexplored\nis the relationship between prompting and detection; that\nis, can a clever prompt successfully prevent a model’s gen-\nerations from being detected by existing methods? Finally,\nfuture work may explore whether the local log probabil-\nity curvature property we identify is present for generative\nmodels in other domains, such as audio, video, or images.\nWe hope that the present work serves as inspiration to fu-\nture work developing effective, general-purpose methods\nfor mitigating potential harms of machine-generated media.\nAcknowledgements\nEM gratefully acknowledges funding ', 'DetectGPT: Zero-Shot Machine-Generated Text Detection\nusing Probability Curvature\nEric Mitchell1Yoonho Lee1Alexander Khazatsky1Christopher D. Manning1Chelsea Finn1\nAbstract\nThe increasing fluency and widespread usage of\nlarge language models (LLMs) highlight the de-\nsirability of corresponding tools aiding detection\nof LLM-generated text. In this paper, we identify\na property of the structure of an LLM’s proba-\nbility function that is useful for such detection.\nSpecifically, we demonstrate that text sampled\nfrom an LLM tends to occupy negative curva-\nture regions of the model’s log probability func-\ntion. Leveraging this observation, we then define\na new curvature-based criterion for judging if a\npassage is generated from a given LLM. This\napproach, which we call DetectGPT, does not re-\nquire training a separate classifier, collecting a\ndataset of real or generated passages, or explic-\nitly watermarking generated text. It uses only\nlog probabilities computed by the model of in-\nterest and random perturbations of ']","DetectGPT's detection approach is significant in the context of evolving LLM capabilities and the potential for misuse because it offers a novel and effective way to detect machine-generated text without the need for training a separate classifier or collecting a dataset of real or generated passages. This is important as LLMs continue to improve in their ability to replicate human text, making it increasingly challenging to distinguish between machine-generated and human-generated content. + +DetectGPT leverages the observation that text sampled from an LLM tends to occupy negative curvature regions of the model's log probability function. By defining a new curvature-based criterion for judging if a passage is generated from a given LLM, DetectGPT provides a way to detect machine-generated content based on the structure of the model's probability function. This approach can help mitigate the potential misuse of LLMs for generating harmful or malicious content, as it provides a tool for identifying and filtering out machine-generated text. + +Furthermore, DetectGPT's approach is particularly valuable in the black box setting, where the inner workings of LLMs are not fully transparent. By utilizing only log probabilities computed by the model of interest and random perturbations, DetectGPT offers a general-purpose method for detecting machine-generated text that is robust and adaptable to different LLM models. + +Overall, DetectGPT's detection approach addresses the need for tools to aid in the detection of machine-generated text in light of evolving LLM capabilities and the potential for misuse. It provides a promising solution for mitigating the risks associated with the increasing fluency and widespread usage of LLMs in generating text.",1.0,1.0,0.7822686433792114,"DtectGPT's approach is significant as it provides a robust, zero-shot detection method that does not require dataset collection or model retraining, addressing the challenge of rapidly evolving LLM capabilities and their potential misuse in generating misleading content.","The DetectGPT detection approach is significant in the context of evolving LLM capabilities and the potential for misuse because it provides a novel method for identifying text generated by large language models (LLMs) without the need for a separate classifier or training data. By leveraging the observation that text sampled from an LLM tends to occupy negative curvature regions of the model's log probability function, DetectGPT defines a curvature-based criterion for judging if a passage is generated from a given LLM. This approach is particularly valuable as LLMs continue to improve and their capabilities expand, making it crucial to have tools in place to detect machine-generated text. + +Moreover, as the text generated by LLMs becomes more fluent and harder to distinguish from human-generated text, the potential for misuse of machine-generated media increases. DetectGPT's detection approach can play a crucial role in identifying and mitigating the potential harms of machine-generated media by providing a reliable way to discern between human and machine-generated text. Additionally, the limitations of probability-based methods for zero-shot machine-generated text detection, such as the cost of evaluating probabilities for models behind APIs like GPT-3, highlight the importance of efficient and effective detection techniques like DetectGPT in safeguarding against misuse of LLMs. In conclusion, DetectGPT's detection approach holds significant importance in addressing the challenges posed by evolving LLM capabilities and the potential for misuse in machine-generated media.",1.0,1.0,0.810316801071167 +"How is the student model, DistilBERT, initialized from the teacher model for effective training?",DistilBERT is initialized from the teacher model by adopting every other layer to maintain some structural and knowledge integrity.,"['DistilBERT, a distilled version of BERT: smaller,\nfaster, cheaper and lighter\nVictor SANH, Lysandre DEBUT, Julien CHAUMOND, Thomas WOLF\nHugging Face\n{victor,lysandre,julien,thomas}@huggingface.co\nAbstract\nAs Transfer Learning from large-scale pre-trained models becomes more prevalent\nin Natural Language Processing (NLP), operating these large models in on-the-\nedge and/or under constrained computational training or inference budgets remains\nchallenging. In this work, we propose a method to pre-train a smaller general-\npurpose language representation model, called DistilBERT, which can then be fine-\ntuned with good performances on a wide range of tasks like its larger counterparts.\nWhile most prior work investigated the use of distillation for building task-specific\nmodels, we leverage knowledge distillation during the pre-training phase and show\nthat it is possible to reduce the size of a BERT model by 40%, while retaining 97%\nof its language understanding capabilities and being 60% faster. To leverage ', 'steps in our\nframework: pre-training and fine-tuning . Dur-\ning pre-training, the model is trained on unlabeled\ndata over different pre-training tasks. For fine-\ntuning, the BERT model is first initialized with\nthe pre-trained parameters, and all of the param-\neters are fine-tuned using labeled data from the\ndownstream tasks. Each downstream task has sep-\narate fine-tuned models, even though they are ini-\ntialized with the same pre-trained parameters. The\nquestion-answering example in Figure 1 will serve\nas a running example for this section.\nA distinctive feature of BERT is its unified ar-\nchitecture across different tasks. There is mini-mal difference between the pre-trained architec-\nture and the final downstream architecture.\nModel Architecture BERT’s model architec-\nture is a multi-layer bidirectional Transformer en-\ncoder based on the original implementation de-\nscribed in Vaswani et al. (2017) and released in\nthetensor2tensor library.1Because the use\nof Transformers has become common ', 'which includes a\ncareful evaluation of the effects of hyperparmeter\ntuning and training set size. We find that BERT\nwas significantly undertrained and propose an im-\nproved recipe for training BERT models, which\nwe call RoBERTa, that can match or exceed the\nperformance of all of the post-BERT methods.\nOur modifications are simple, they include: (1)\ntraining the model longer, with bigger batches,\nover more data; (2) removing the next sentence\nprediction objective; (3) training on longer se-\nquences; and (4) dynamically changing the mask-\ning pattern applied to the training data. We also\ncollect a large new dataset (CC-N EWS) of compa-\nrable size to other privately used datasets, to better\ncontrol for training set size effects.\nWhen controlling for training data, our im-\nproved training procedure improves upon the pub-\nlished BERT results on both GLUE and SQuAD.\nWhen trained for longer over additional data, ']","The student model, DistilBERT, is initialized with the pre-trained parameters from the teacher model during the fine-tuning phase.",1.0,0.6666666666666666,0.8249573707580566,DistilBERT is initialized from the teacher model by adopting every other layer to maintain some structural and knowledge integrity.,"The student model, DistilBERT, is initialized by leveraging knowledge distillation during the pre-training phase, which allows for the reduction of the size of a BERT model by 40% while retaining 97% of its language understanding capabilities and being 60% faster.",1.0,1.0,0.8226005434989929 +Explain how BERT uses the 'masked LM' (MLM) for its pre-training.,"In MLM, a percentage of input tokens are masked randomly, and the model predicts these masked tokens based on their context.","['model. How-\never, in terms of absolute accuracy the MLM\nmodel begins to outperform the LTR model\nalmost immediately.\nC.2 Ablation for Different Masking\nProcedures\nIn Section 3.1, we mention that BERT uses a\nmixed strategy for masking the target tokens when\npre-training with the masked language model\n(MLM) objective. The following is an ablation\nstudy to evaluate the effect of different masking\nstrategies.\n200 400 600 800 1,0007678808284\nPre-training Steps (Thousands)MNLI Dev Accuracy\nBERT BASE (Masked LM)\nBERT BASE (Left-to-Right)\nFigure 5: Ablation over number of training steps. This\nshows the MNLI accuracy after fine-tuning, starting\nfrom model parameters that have been pre-trained for\nksteps. The x-axis is the value of k.Note that the purpose of the masking strategies\nis to reduce the mismatch between pre-training\nand fine-tuning, as the [MASK] symbol never ap-\npears during the fine-tuning stage. We report the\nDev results for both MNLI and NER. For ', 'is first pretrained on a large unla-\nbeled text corpus and subsequently finetuned us-\ning end-task labeled data.\n2.2 Architecture\nBERT uses the now ubiquitous transformer archi-\ntecture ( Vaswani et al. ,2017 ), which we will not\nreview in detail. We use a transformer architecture\nwithLlayers. Each block uses Aself-attention\nheads and hidden dimension H.\n2.3 Training Objectives\nDuring pretraining, BERT uses two objectives:\nmasked language modeling and next sentence pre-\ndiction.\nMasked Language Model (MLM) A random\nsample of the tokens in the input sequence is\nselected and replaced with the special token\n[MASK]. The MLM objective is a cross-entropy\nloss on predicting the masked tokens. BERT uni-\nformly selects 15% of the input tokens for possi-\nble replacement. Of the selected tokens, 80% are\nreplaced with [MASK], 10% are left unchanged,and 10% are replaced by a randomly selected vo-\ncabulary token.\nIn the original implementation, random mask-\ning ', 'the specific strategies used\nduring MLM pre-training (BERT uses 80%, 10%,\n10%). The right part of the paper represents the\nDev set results. For the feature-based approach,\nwe concatenate the last 4 layers of BERT as the\nfeatures, which was shown to be the best approach\nin Section 5.3.\nFrom the table it can be seen that fine-tuning is\nsurprisingly robust to different masking strategies.\nHowever, as expected, using only the M ASK strat-\negy was problematic when applying the feature-\nbased approach to NER. Interestingly, using only\nthe R NDstrategy performs much worse than our\nstrategy as well.']","During pretraining, BERT uses the masked language model (MLM) objective, where a random sample of tokens in the input sequence is selected and replaced with the special token [MASK]. BERT uniformly selects 15% of the input tokens for possible replacement, with 80% of the selected tokens replaced with [MASK], 10% left unchanged, and 10% replaced by a randomly selected vocabulary token. The MLM objective is a cross-entropy loss on predicting the masked tokens, helping the model learn contextual representations of words by predicting missing tokens within a sentence.",1.0,1.0,0.6388821005821228,"In MLM, a percentage of input tokens are masked randomly, and the model predicts these masked tokens based on their context.","During pre-training, BERT uses a masked language modeling (MLM) objective where a random sample of tokens in the input sequence is selected and replaced with a special token [MASK]. The MLM objective involves predicting the masked tokens, and BERT uniformly selects 15% of the input tokens for possible replacement. Of the selected tokens, 80% are replaced with [MASK], 10% are left unchanged, and 10% are replaced by a randomly selected vocabulary token. This approach allows BERT to learn bidirectional representations by predicting missing words within a sentence.",1.0,1.0,0.5244350433349609 +Discuss the impact of model size on BERT's performance across different tasks.,"Larger BERT models consistently show better performance across tasks, indicating the importance of model size in achieving high accuracy.","['the RTL\nmodel would not be able to condition the answer\non the question; (c) this it is strictly less powerful\nthan a deep bidirectional model, since it can use\nboth left and right context at every layer.\n5.2 Effect of Model Size\nIn this section, we explore the effect of model size\non fine-tuning task accuracy. We trained a number\nof BERT models with a differing number of layers,\nhidden units, and attention heads, while otherwise\nusing the same hyperparameters and training pro-\ncedure as described previously.\nResults on selected GLUE tasks are shown in\nTable 6. In this table, we report the average Dev\nSet accuracy from 5 random restarts of fine-tuning.\nWe can see that larger models lead to a strict ac-\ncuracy improvement across all four datasets, even\nfor MRPC which only has 3,600 labeled train-\ning examples, and is substantially different ', 'in\nour training data.\n5.3. Bi-directional Transformer Results Using BERT\nIn this section, we apply our methodology to BERT-style\ntransformer models and study the effect of model scaling\non several downstream tasks. Prior work (Lan et al., 2019)\nfound that increasing model size beyond BERT-large with\n336M parameters results in unexpected model degradation.\nTo address this degradation, the authors of that work (Lan\net al., 2019) introduced parameter sharing and showed that\nthat their models scale much better compared to the original\nBERT model.\nWe further investigated this behaviour and empirically\ndemonstrated that rearranging the order of the layer nor-\nmalization and the residual connections as shown in Figure\n7 is critical to enable the scaling of the BERT-style mod-\nels beyond BERT-Large. The architecture (b) in Figure 7\neliminates instabilities observed using the original BERT\narchitecture in (a) and also has a lower training loss. ', 'on the 336M model, the modifications\nin (b) enable stable training with lower training loss.\nUsing the architecture change in Figure 7(b), we consider\nthree different cases as detailed in Table 4. The 336M model\nhas the same size as BERT-large. The 1.3B is the same as\nthe BERT-xlarge configuration that was previously shown\nto get worse results than the 336M BERT-large model (Lan\net al., 2019). We further scale the BERT model using both\nlarger hidden size as well as more layers to arrive at the 3.9B\nparameter case. In all cases, the hidden size per attention\nhead is kept constant at 64. 336M and 1.3B models are\ntrained for 2 million iterations while the 3.9B model is\ntrained for 1.5 million iterations and is still training.\nOn a 3% held-out set, 336M, 1.3B, and 3.9B models achieve\nvalidation set perplexity ']","The impact of model size on BERT's performance across different tasks is significant. The study found that larger models lead to a strict accuracy improvement across all four datasets, even for tasks with limited training examples. Additionally, prior work has shown that increasing model size beyond BERT-large with 336M parameters can result in unexpected model degradation. However, by introducing parameter sharing and rearranging the order of layer normalization and residual connections, the models can scale better and achieve stable training with lower training loss. By scaling the BERT model with larger hidden size and more layers, better results can be achieved across various tasks.",1.0,0.5,0.8617293238639832,"Larger BERT models consistently show better performance across tasks, indicating the importance of model size in achieving high accuracy.","The impact of model size on BERT's performance across different tasks is discussed in the context provided. It is observed that larger models lead to a strict accuracy improvement across all tasks, even for datasets with fewer labeled training examples. However, it is also noted that increasing model size beyond a certain point may result in unexpected model degradation. To address this, parameter sharing and rearranging the order of layer normalization and residual connections have been suggested to enable the scaling of BERT-style models. Different model sizes, such as 336M, 1.3B, and 3.9B, have been considered, with varying results in terms of training loss and performance. Overall, the impact of model size on BERT's performance varies depending on the specific configuration and task being considered.",1.0,0.5,0.8236802816390991 +What are the hyperparameters of the AdamW optimizer used in training the LLaMA models?,"The AdamW optimizer for LLaMA models uses hyperparameters β1 = 0.9, β2 = 0.95, with a cosine learning rate schedule, a weight decay of 0.1, and gradient clipping set to 1.0.","['GPT-3, PaLM)ContextMask(s)\nFigure 2: GLM-130B and LLMs of similar\nscale on zero-shot LAMBADA language\nmodeling. Details on GLM’s bidirectional\nattention are provided in Du et al. (2022).Conceptually, the blank infilling objective with bidi-\nrectional attention enables a more effective compre-\nhension of contexts than GPT-style models: when us-\ning [MASK], GLM-130B behaves as BERT (Devlin\net al., 2019) and T5 (Raffel et al., 2020); when us-\ning [gMASK], GLM-130B behaves similarly to Pre-\nfixLM (Liu et al., 2018; Dong et al., 2019).\nEmpirically, GLM-130B offers a record-high accuracy\nof 80.2% on zero-shot LAMBADA by outperforming\nboth GPT-3 and PaLM 540B in Figure 2. By setting\nthe attention mask, GLM-130B’s unidirectional vari-\nant is comparable to GPT-3 and OPT-175B. Our ob-\nservations are in line with existing findings (Liu et al.,\n2018; Dong et al., 2019).\nLayer Normalization (LN, Ba et al. (2016)). Training instability is one ', 'use the RMSNorm normalizing func-\ntion, introduced by Zhang and Sennrich (2019).\nSwiGLU activation function [PaLM]. We re-\nplace the ReLU non-linearity by the SwiGLU ac-\ntivation function, introduced by Shazeer (2020) to\nimprove the performance. We use a dimension of\n2\n34dinstead of 4das in PaLM.\nRotary Embeddings [GPTNeo]. We remove the\nabsolute positional embeddings, and instead, add\nrotary positional embeddings (RoPE), introduced\nby Su et al. (2021), at each layer of the network.\nThe details of the hyper-parameters for our dif-\nferent models are given in Table 2.\n2.3 Optimizer\nOur models are trained using the AdamW opti-\nmizer (Loshchilov and Hutter, 2017), with the fol-\nlowing hyper-parameters: β1= 0.9,β2= 0.95.\nWe use a cosine learning rate schedule, such that\nthe final learning rate is equal to 10% of the maxi-\nmal learning rate. We use a weight decay of 0.1and\ngradient clipping of 1.0. We use ', 'at\nruntime.\nB.8.1 Q UANTIZATION RESULTS AT SCALES\nGLM models at 110M to 10B scale are from GLM’s original paper(Du et al., 2022). Although the\narchitecture of smaller scale GLMs are not the same as GLM-130B, we believe that the training\nobjective is the key factor for quantization. Table 10 shows the performance of GLM and BLOOM\nfamily models at different scales on the LAMBADA dataset with different quantization methods.\nAlmost all models maintain performance at INT8 precision. In general, GLM maintains better\nperformance than BLOOM at INT4 precision as it scales.\n28\x0cPublished as a conference paper at ICLR 2023\nTable 10: Accuracy on LAMBADA dataset for GLM and BLOOM family at 100M to 176B scales\nacross different quantization precision.\nBLOOM-560M BLOOM-1B1 BLOOM-3B BLOOM-7B BLOOM-176B\nOriginal 31.40% 40.68% 48.30% 54.91% 64.37%\nAbsmax INT8, col-wise 26.12% 40.69% 48.83% 55.33% 65.03%\nAbsmax INT4, col-wise 9.30% ']","β1=0.9, β2=0.95",0.0,0.0,0.40075868368148804,"The AdamW optimizer for LLaMA models uses hyperparameters β1 = 0.9, β2 = 0.95, with a cosine learning rate schedule, a weight decay of 0.1, and gradient clipping set to 1.0.",The hyperparameters of the AdamW optimizer used in training the LLaMA models are β1=0.9 and β2=0.95.,1.0,1.0,0.8558956384658813 +"In what ways does LLaMA's evaluation strategy extend beyond standard NLP tasks, and what new dimensions of model performance does this reveal?","LLaMA's evaluation strategy includes novel tasks that assess the model's ability in reasoning, domain-specific knowledge (e.g., legal or medical), and its proficiency in languages other than English. This approach reveals dimensions of model performance such as cross-domain adaptability, fine-grained linguistic understanding, and the capacity to generalize knowledge across different contexts and languages.","['can improve\nlong-tail knowledge. Our work differs in that we conduct\ncausal re-training experiments and find that model scaling\nis highly beneficial to long-tail QA performance.\n6. Conclusion and Future Work\nLarge language models demonstrate impressive few-shot\nlearning capabilities that arise from simply training on large-\nscale internet text. With the open-source release of LLMs—\nand their associated pre-training datasets—the research com-\nmunity can now begin to understand the origins of these\ncapabilities. Our work is one of the first to relate an ob-\nserved phenomenon in LLMs back to the pre-training data\nitself. In our case, our results are negative: while LLMs\nachieve moderate performance on open-domain QA bench-\nmarks, they are mainly successful on questions that probe\nknowledge that appears widely in their pre-training datasets.\nOur work raises numerous directions for further inquiry,\nnamely, how to improve retention of long-tail knowledge\ngiven that simply ', 'a conference paper at ICLR 2023\nEmergent Ability Tasks\nFigure 20: Emergent ability tasks of GLM-130B. These tasks’ performance does not grow much\nuntil the model size reaches a certain threshold (e.g., 100B or 10B). After reaching the threshold, the\nmodel performance soars up quickly. The BIG-bench (Srivastava et al., 2022) benchmark collects\nmany of these challenges.\nMMLU, hindu_knowledge, crass_ai, implicatures, understanding_fables, modified_arithmetic,\nimplicit_relations, and gre_reading_comprehension from BIG-bench (Srivastava et al., 2022).\nIn line with the observation in (Wei et al., 2022b), we show that GLM-130B also presents the two\nsimilar scaling behaviors to other LLMs such as GPT-3, LaMDA, and PaLM. Though why and how\nLLMs present these intriguing properties remain unclear, GLM-130B provides open opportunities\nfor all researchers to test and understand the reason behind them.\n47\x0cPublished as a conference paper at ICLR 2023\nTable 11: Full configurations for ', 'quality by\nlow quantity can improve the model performance. Besides,\nwe consider the design of training data as a crucial factor\nto the efficient data usage. For example, experiments show\nthat curriculum learning [93], active learning [94] and\nprompting [95] could improve the data efficiency. However,\nmostofthesestrategiesarestillattheearlystageandneed\nthe further investigation.\nc) Computational Resource: As LLMs are growing\nbigger and bigger, the deployment and training of these\nmodels are getting more and more costly. Daily prac-\ntitioners in NLP and deep learning will find it hard\nto install the LLMs on their own devices. Previous\nstudy [96] also show that the computational resource\nrequirements for strong model scaling clearly outpaces\nthat of system hardware. We argue that model scaling\nmay be inevitable, which is determined by the scaling law.\nHowever, recent attempts among model design, tuning\nstrategy and compression could possibly mitigate the\nextreme consumption of the computational ']",None,0.0,1.0,-0.020524272695183754,"LLaMA's evaluation strategy includes novel tasks that assess the model's ability in reasoning, domain-specific knowledge (e.g., legal or medical), and its proficiency in languages other than English. This approach reveals dimensions of model performance such as cross-domain adaptability, fine-grained linguistic understanding, and the capacity to generalize knowledge across different contexts and languages.","LLaMA's evaluation strategy extends beyond standard NLP tasks by covering socially important subjects such as morality and law, which are topics that humans are incentivized to learn. This reveals new dimensions of model performance related to text understanding across a wide range of real-world domains.",1.0,1.0,0.8998314142227173 +Describe the methodology used for evaluating models on the multitask test. How does it differ from traditional model evaluations?,"Models were evaluated in zero-shot and few-shot settings to measure their ability to apply pretraining knowledge without further fine-tuning, a method designed to be more akin to human learning and evaluation processes.","['task,\nand (2) jointly embed the models and tasks, and select a\nmodel using the learned metric (see Section 4). Notice that\n(1) does not use knowledge of the model performance on\nvarious tasks, which makes it more widely applicable but\nrequires we know what task a model was trained for and\nmay ignore the fact that models trained on slightly differ-\nent tasks may still provide an overall better feature extrac-\ntor (for example by over-fitting less to the task they were\ntrained on).\nIn Table 2 we compare the overall results of the various\nproposed metrics on the model selection meta-tasks. On\nboth the iNat+CUB and Mixed meta-tasks, the Asymmetric\nTASK 2VEC model selection is close to the ground-truth op-\ntimal, and significantly improves over both chance, and over\nusing an generic ImageNet expert. Notice that our method\nhasO(1)complexity, while searching over ', 'a model’s text understanding by covering numerous topics that\nhumans are incentivized to learn. Since our test consists in 57tasks, it can be used to analyze\naggregate properties of models across tasks and to track important shortcomings. The test and code is\navailable at github.com/hendrycks/test.\n2 R ELATED WORK\nPretraining. The dominant paradigm in NLP is to pretrain large models on massive text corpora\nincluding educational books and websites. In the process, these models are exposed to information\nabout a wide range of topics. Petroni et al. (2019) found that recent models learn enough information\nfrom pretraining that they can serve as knowledge bases. However, no prior work has comprehensively\nmeasured the knowledge models have across many real-world domains.\nUntil recently, researchers primarily used fine-tuned models on downstream tasks (Devlin et al., 2019).\nHowever, larger pretrained models like GPT-3 ', 'Published as a conference paper at ICLR 2021\nMEASURING MASSIVE MULTITASK\nLANGUAGE UNDERSTANDING\nDan Hendrycks\nUC BerkeleyCollin Burns\nColumbia UniversitySteven Basart\nUChicagoAndy Zou\nUC Berkeley\nMantas Mazeika\nUIUCDawn Song\nUC BerkeleyJacob Steinhardt\nUC Berkeley\nABSTRACT\nWe propose a new test to measure a text model’s multitask accuracy. The test\ncovers 57 tasks including elementary mathematics, US history, computer science,\nlaw, and more. To attain high accuracy on this test, models must possess extensive\nworld knowledge and problem solving ability. We find that while most recent\nmodels have near random-chance accuracy, the very largest GPT-3 model improves\nover random chance by almost 20 percentage points on average. However, on every\none of the 57 tasks, the best models still need substantial improvements before\nthey can reach expert-level accuracy. Models also have lopsided performance\nand frequently do not know when they are wrong. Worse, they still have near-\nrandom accuracy on some ']","The methodology used for evaluating models on the multitask test involves jointly embedding the models and tasks, and selecting a model using a learned metric. This approach does not use knowledge of the model's performance on various tasks, making it more widely applicable. However, it requires knowing the task a model was trained for and may ignore the fact that models trained on slightly different tasks may still provide better feature extraction. This methodology differs from traditional model evaluations as it focuses on measuring a text model's multitask accuracy across a wide range of tasks to analyze aggregate properties of models and track important shortcomings. It covers 57 tasks including elementary mathematics, US history, computer science, law, and more, requiring models to possess extensive world knowledge and problem-solving ability to attain high accuracy.",1.0,1.0,0.32263585925102234,"Models were evaluated in zero-shot and few-shot settings to measure their ability to apply pretraining knowledge without further fine-tuning, a method designed to be more akin to human learning and evaluation processes.","The methodology used for evaluating models on the multitask test involves two main components: (1) evaluating models on a diverse set of tasks without knowledge of the model's performance on those tasks, and (2) jointly embedding the models and tasks to select a model using the learned metric. This method differs from traditional model evaluations in that it does not rely on prior knowledge of the model's performance on specific tasks, making it more widely applicable. Additionally, it takes into account that models trained on slightly different tasks may still provide overall better feature extraction.",1.0,1.0,0.33569014072418213 +What characteristic of large language model (LLM) generated text's probability function does DetectGPT exploit for detection?,DetectGPT exploits the characteristic that LLM-generated text tends to lie in regions of negative curvature of the model's log probability function.,"['DetectGPT: Zero-Shot Machine-Generated Text Detection\nusing Probability Curvature\nEric Mitchell1Yoonho Lee1Alexander Khazatsky1Christopher D. Manning1Chelsea Finn1\nAbstract\nThe increasing fluency and widespread usage of\nlarge language models (LLMs) highlight the de-\nsirability of corresponding tools aiding detection\nof LLM-generated text. In this paper, we identify\na property of the structure of an LLM’s proba-\nbility function that is useful for such detection.\nSpecifically, we demonstrate that text sampled\nfrom an LLM tends to occupy negative curva-\nture regions of the model’s log probability func-\ntion. Leveraging this observation, we then define\na new curvature-based criterion for judging if a\npassage is generated from a given LLM. This\napproach, which we call DetectGPT, does not re-\nquire training a separate classifier, collecting a\ndataset of real or generated passages, or explic-\nitly watermarking generated text. It uses only\nlog probabilities computed by the model of in-\nterest and random perturbations of ', 'al.,\n2023) may further improve the effectiveness of methods\nsuch as DetectGPT, even as LLMs continue to improve.\nLimitations. One limitation of probability-based methods\nfor zero-shot machine-generated text detection (like Detect-\nGPT) is the white-box assumption that we can evaluate log\nprobabilities of the model(s) in question. For models be-\nhind APIs that do provide probabilities (such as GPT-3),\nevaluating probabilities nonetheless costs money. Another\nassumption of DetectGPT is access to a reasonable pertur-\nbation function. While in this work, we use off-the-shelf\nmask-filling models such as T5 and mT5 (for non-English\nlanguages), some domains may see reduced performance\nif existing mask-filling models do not well represent the\nspace of meaningful rephrases, reducing the quality of the\ncurvature estimate. While DetectGPT provides the best\navailable detection performance for PubMedQA, its drop\nin performance compared to other datasets may be a result\nAverage length0.9850.9900.995AUROC\ngpt-2\nAverage length0.960.970.980.99AUROC\nopt-2.7\nXSum\nSQuAD\nWritingPrompts\n130 140 150 ', 'pas-\nsage length. We bin the paired human- and model-generated\nsequences by their average length into three bins of equal\nsize (bottom/middle/top third), and plot the AUROC within\neach bin. The relationship between detection performance\nand passage length generally depends on the dataset and\nmodel (or tokenizer). For very long sequences, DetectGPT\nmay see reduced performance because our implementation\nof DetectGPT applies all T5 mask-filling perturbations at\nonce, and T5 may fail to track many mask tokens at once.\nBy applying perturbations in multiple sequential rounds of\nsmaller numbers of masks, this effect may be mitigated.\n6. Discussion\nAs large language models continue to improve, they will\nbecome increasingly attractive tools for replacing human\nwriters in a variety of contexts, such as education, jour-\nnalism, and art. While legitimate uses of language model\ntechnologies exist in all of these settings, teachers, readers,\nand consumers are likely ']",Negative curvature regions of the model's log probability function,1.0,0.6666666666666666,0.5933892130851746,DetectGPT exploits the characteristic that LLM-generated text tends to lie in regions of negative curvature of the model's log probability function.,DetectGPT exploits the property that text sampled from an LLM tends to occupy negative curvature regions of the model's log probability function for detection.,1.0,1.0,0.9116738438606262 +What empirical validation does DetectGPT provide for its hypothesis regarding log probability curvature?,"DetectGPT's hypothesis is empirically validated across a diverse body of LLMs, showing consistent results even when perturbations are sourced from different language models.","['performance; the results are shown\nin Figure 7. In particular, the increased discrimination power\nof DetectGPT for larger mask-filling models supports the\ninterpretation that DetectGPT is estimating the curvature\nof the log probability in a latent semantic space, rather\nthan in raw token embedding space. Larger T5 models\nbetter represent this latent space, where random directions\ncorrespond to meaningful changes in the text.\nNumber of perturbations for DetectGPT. We evaluate the\nperformance of DetectGPT as a function of the number of\nperturbations used to estimate the expectation in Equation 1\non three datasets. The results are presented in Figure 8.\nDetection accuracy continues to improve until 100 pertur-\nbations, where it converges. Evaluations use 100 examples\nfrom each dataset.\nData distributional properties. We study more closely\nthe impact of the data distribution on DetectGPT, particu-\n1 10 100 10000.60.70.80.91.0Detection AUROC\nGPT-2\nXSum\nSQuAD\nWritingPrompts\n1 10 100 1000\nGPT-J\nNumber of perturbationsFigure ', 'source model pθ,\nDetectGPT compares the log probability of the candidate\npassage under pθwith the average log probability of several\nperturbations of the passage under pθ(generated with, e.g.,\nT5; Raffel et al. (2020)). If the perturbed passages tend\nto have lower average log probability than the original by\nsome margin, the candidate passage is likely to have come\nfrompθ. See Figure 1 for an overview of the problem and\nDetectGPT. See Figure 2 for an illustration of the under-\nlying hypothesis and Figure 3 for empirical evaluation of\nthe hypothesis. Our experiments find that DetectGPT is\nmore accurate than existing zero-shot methods for detect-\ning machine-generated text, improving over the strongest\nzero-shot baseline by over 0.1 AUROC for multiple source\nmodels when detecting machine-generated news articles.\nContributions. Our main contributions are: (a) the identi-\nfication and empirical validation of the hypothesis that the\ncurvature of ', 'al.,\n2023) may further improve the effectiveness of methods\nsuch as DetectGPT, even as LLMs continue to improve.\nLimitations. One limitation of probability-based methods\nfor zero-shot machine-generated text detection (like Detect-\nGPT) is the white-box assumption that we can evaluate log\nprobabilities of the model(s) in question. For models be-\nhind APIs that do provide probabilities (such as GPT-3),\nevaluating probabilities nonetheless costs money. Another\nassumption of DetectGPT is access to a reasonable pertur-\nbation function. While in this work, we use off-the-shelf\nmask-filling models such as T5 and mT5 (for non-English\nlanguages), some domains may see reduced performance\nif existing mask-filling models do not well represent the\nspace of meaningful rephrases, reducing the quality of the\ncurvature estimate. While DetectGPT provides the best\navailable detection performance for PubMedQA, its drop\nin performance compared to other datasets may be a result\nAverage length0.9850.9900.995AUROC\ngpt-2\nAverage length0.960.970.980.99AUROC\nopt-2.7\nXSum\nSQuAD\nWritingPrompts\n130 140 150 ']","DetectGPT provides empirical validation for its hypothesis regarding log probability curvature by comparing the log probability of the candidate passage under the source model with the average log probability of several perturbations of the passage under the same source model. If the perturbed passages have a lower average log probability than the original passage by a certain margin, the candidate passage is likely to have come from the source model. Additionally, DetectGPT's experiments show that it is more accurate than existing zero-shot methods for detecting machine-generated text, improving over the strongest zero-shot baseline by over 0.1 AUROC for multiple source models.",1.0,0.3333333333333333,0.8207941055297852,"DetectGPT's hypothesis is empirically validated across a diverse body of LLMs, showing consistent results even when perturbations are sourced from different language models.","DetectGPT provides empirical validation for its hypothesis regarding log probability curvature through experiments on a machine-generated news articles dataset and the WritingPrompts dataset. It compares the performance of DetectGPT with two pre-trained RoBERTa-based detector models and a probability thresholding baseline, showing that DetectGPT can provide competitive or better detection compared to the supervised models and greatly outperforms probability thresholding on average.",0.5,1.0,0.8523637056350708 +What datasets were used for BERT's pre-training and why?,"BooksCorpus (800M words) and English Wikipedia (2500M words), chosen for their document-level organization and volume of data.","['which includes a\ncareful evaluation of the effects of hyperparmeter\ntuning and training set size. We find that BERT\nwas significantly undertrained and propose an im-\nproved recipe for training BERT models, which\nwe call RoBERTa, that can match or exceed the\nperformance of all of the post-BERT methods.\nOur modifications are simple, they include: (1)\ntraining the model longer, with bigger batches,\nover more data; (2) removing the next sentence\nprediction objective; (3) training on longer se-\nquences; and (4) dynamically changing the mask-\ning pattern applied to the training data. We also\ncollect a large new dataset (CC-N EWS) of compa-\nrable size to other privately used datasets, to better\ncontrol for training set size effects.\nWhen controlling for training data, our im-\nproved training procedure improves upon the pub-\nlished BERT results on both GLUE and SQuAD.\nWhen trained for longer over additional data, ', 'steps in our\nframework: pre-training and fine-tuning . Dur-\ning pre-training, the model is trained on unlabeled\ndata over different pre-training tasks. For fine-\ntuning, the BERT model is first initialized with\nthe pre-trained parameters, and all of the param-\neters are fine-tuned using labeled data from the\ndownstream tasks. Each downstream task has sep-\narate fine-tuned models, even though they are ini-\ntialized with the same pre-trained parameters. The\nquestion-answering example in Figure 1 will serve\nas a running example for this section.\nA distinctive feature of BERT is its unified ar-\nchitecture across different tasks. There is mini-mal difference between the pre-trained architec-\nture and the final downstream architecture.\nModel Architecture BERT’s model architec-\nture is a multi-layer bidirectional Transformer en-\ncoder based on the original implementation de-\nscribed in Vaswani et al. (2017) and released in\nthetensor2tensor library.1Because the use\nof Transformers has become common ', 'BERT: Pre-training of Deep Bidirectional Transformers for\nLanguage Understanding\nJacob Devlin Ming-Wei Chang Kenton Lee Kristina Toutanova\nGoogle AI Language\n{jacobdevlin,mingweichang,kentonl,kristout }@google.com\nAbstract\nWe introduce a new language representa-\ntion model called BERT , which stands for\nBidirectional Encoder Representations from\nTransformers. Unlike recent language repre-\nsentation models (Peters et al., 2018a; Rad-\nford et al., 2018), BERT is designed to pre-\ntrain deep bidirectional representations from\nunlabeled text by jointly conditioning on both\nleft and right context in all layers. As a re-\nsult, the pre-trained BERT model can be fine-\ntuned with just one additional output layer\nto create state-of-the-art models for a wide\nrange of tasks, such as question answering and\nlanguage inference, without substantial task-\nspecific architecture modifications.\nBERT is conceptually simple and empirically\npowerful. It obtains new state-of-the-art re-\nsults on eleven natural language processing\ntasks, including pushing the GLUE score to\n80.5% (7.7% point absolute improvement),\nMultiNLI accuracy ']",None.,0.0,0.0,0.1003052368760109,"BooksCorpus (800M words) and English Wikipedia (2500M words), chosen for their document-level organization and volume of data.",None,0.0,1.0,0.11263389140367508 +How do the LLaMA models' parameter counts compare across the different versions?,"The LLaMA modelsvary in size from 7 billion parameters to 65 billion parameters, with intermediate sizes of 13 billion and 33 billion parameters.","['at\nruntime.\nB.8.1 Q UANTIZATION RESULTS AT SCALES\nGLM models at 110M to 10B scale are from GLM’s original paper(Du et al., 2022). Although the\narchitecture of smaller scale GLMs are not the same as GLM-130B, we believe that the training\nobjective is the key factor for quantization. Table 10 shows the performance of GLM and BLOOM\nfamily models at different scales on the LAMBADA dataset with different quantization methods.\nAlmost all models maintain performance at INT8 precision. In general, GLM maintains better\nperformance than BLOOM at INT4 precision as it scales.\n28\x0cPublished as a conference paper at ICLR 2023\nTable 10: Accuracy on LAMBADA dataset for GLM and BLOOM family at 100M to 176B scales\nacross different quantization precision.\nBLOOM-560M BLOOM-1B1 BLOOM-3B BLOOM-7B BLOOM-176B\nOriginal 31.40% 40.68% 48.30% 54.91% 64.37%\nAbsmax INT8, col-wise 26.12% 40.69% 48.83% 55.33% 65.03%\nAbsmax INT4, col-wise 9.30% ', 'the RTL\nmodel would not be able to condition the answer\non the question; (c) this it is strictly less powerful\nthan a deep bidirectional model, since it can use\nboth left and right context at every layer.\n5.2 Effect of Model Size\nIn this section, we explore the effect of model size\non fine-tuning task accuracy. We trained a number\nof BERT models with a differing number of layers,\nhidden units, and attention heads, while otherwise\nusing the same hyperparameters and training pro-\ncedure as described previously.\nResults on selected GLUE tasks are shown in\nTable 6. In this table, we report the average Dev\nSet accuracy from 5 random restarts of fine-tuning.\nWe can see that larger models lead to a strict ac-\ncuracy improvement across all four datasets, even\nfor MRPC which only has 3,600 labeled train-\ning examples, and is substantially different ', '57tasks. On the right are UnifiedQA\ntransfer accuracies for all of the 57tasks. For both models, capabilities are lopsided.\nA.1 A NALYSIS WITH MORE FINE-TUNED MODELS\nWe primarily analyzed models with more than 10billion parameters in the main body of the paper.\nFor this section, we analyze smaller models including RoBERTa-base (125 million parameters) (Liu\n11\x0cPublished as a conference paper at ICLR 2021\net al., 2019), ALBERT-xxlarge (223 million parameters) (Lan et al., 2020), and GPT-2 (1,558 million\nparameters) (Radford et al., 2019). Models are fine-tuned to predict one of four classes using the\nUnifiedQA MCQ questions and using our dev+val set. We test on our multitask test set.\nWe observe that these smaller models can attain better-than-random accuracy. RoBERTa-base attains\nan overall accuracy of 27.9%, with 27.9%accuracy for the humanities, 28.8%for social sciences,\n27.0%for STEM, and 27.7%for other. ']",None,0.0,1.0,-0.02451009675860405,"The LLaMA modelsvary in size from 7 billion parameters to 65 billion parameters, with intermediate sizes of 13 billion and 33 billion parameters.",None,0.0,0.5,-0.024509962648153305 +"What are the significant benchmarks LLaMA models were evaluated on, and how does their performance relate to other foundation models?"," LLaMA models were evaluated on benchmarks such as Common Sense Reasoning, Closed-book Question Answering, Reading Comprehension, Mathematical Reasoning, and Code Generation, showing superior or competitive performance compared to existing foundation models.","['MATH and and k= 40 for GSM8k). LLaMA-65B\noutperforms Minerva 62B on GSM8k, although it has\nnot been fine-tuned on mathematical data.\ndocstring. The model needs to generate a Python\nprogram that fits the description and satisfies the\ntest cases. In Table 8, we compare the pass@1\nscores of our models with existing language mod-\nels that have not been finetuned on code, namely\nPaLM and LaMDA (Thoppilan et al., 2022). PaLM\nand LLaMA were trained on datasets that contain\na similar number of code tokens.\nAs show in Table 8, for a similar number\nof parameters, LLaMA outperforms other gen-\neral models such as LaMDA and PaLM, which\nare not trained or finetuned specifically for code.\nLLaMA with 13B parameters and more outper-\nforms LaMDA 137B on both HumanEval and\nMBPP. LLaMA 65B also outperforms PaLM 62B,\neven when it is trained longer. The pass@1 ', 'a conference paper at ICLR 2023\nEmergent Ability Tasks\nFigure 20: Emergent ability tasks of GLM-130B. These tasks’ performance does not grow much\nuntil the model size reaches a certain threshold (e.g., 100B or 10B). After reaching the threshold, the\nmodel performance soars up quickly. The BIG-bench (Srivastava et al., 2022) benchmark collects\nmany of these challenges.\nMMLU, hindu_knowledge, crass_ai, implicatures, understanding_fables, modified_arithmetic,\nimplicit_relations, and gre_reading_comprehension from BIG-bench (Srivastava et al., 2022).\nIn line with the observation in (Wei et al., 2022b), we show that GLM-130B also presents the two\nsimilar scaling behaviors to other LLMs such as GPT-3, LaMDA, and PaLM. Though why and how\nLLMs present these intriguing properties remain unclear, GLM-130B provides open opportunities\nfor all researchers to test and understand the reason behind them.\n47\x0cPublished as a conference paper at ICLR 2023\nTable 11: Full configurations for ', 'LLaMA: Open and Efficient Foundation Language Models\nHugo Touvron∗, Thibaut Lavril∗, Gautier Izacard∗, Xavier Martinet\nMarie-Anne Lachaux, Timothee Lacroix, Baptiste Rozière, Naman Goyal\nEric Hambro, Faisal Azhar, Aurelien Rodriguez, Armand Joulin\nEdouard Grave∗, Guillaume Lample∗\nMeta AI\nAbstract\nWe introduce LLaMA, a collection of founda-\ntion language models ranging from 7B to 65B\nparameters. We train our models on trillions\nof tokens, and show that it is possible to train\nstate-of-the-art models using publicly avail-\nable datasets exclusively, without resorting\nto proprietary and inaccessible datasets. In\nparticular, LLaMA-13B outperforms GPT-3\n(175B) on most benchmarks, and LLaMA-\n65B is competitive with the best models,\nChinchilla-70B and PaLM-540B. We release\nall our models to the research community1.\n1 Introduction\nLarge Languages Models (LLMs) trained on mas-\nsive corpora of texts have shown their ability to per-\nform new tasks from textual instructions or from a\nfew examples (Brown et al., 2020). These few-shot\nproperties ']","The significant benchmarks LLaMA models were evaluated on are HumanEval and MBPP. Their performance outperforms LaMDA and PaLM models on both benchmarks, even when trained longer.",1.0,1.0,0.7506391406059265," LLaMA models were evaluated on benchmarks such as Common Sense Reasoning, Closed-book Question Answering, Reading Comprehension, Mathematical Reasoning, and Code Generation, showing superior or competitive performance compared to existing foundation models.","The significant benchmarks LLaMA models were evaluated on are GPT-3, Chinchilla, and PaLM. The LLaMA-13B model outperformed GPT-3 on most benchmarks, and the LLaMA-65B model is competitive with the best models, Chinchilla-70B and PaLM-540B.",1.0,0.6666666666666666,0.56718909740448 +What is the primary goal of introducing the massive multitask test in language understanding models?,"The primary goal is to bridge the gap between the vast knowledge models encounter during pretraining on the internet and the existing measures of success, assessing models across a diverse set of subjects to highlight knowledge and problem-solving ability in more challenging, real-world contexts.","['Published as a conference paper at ICLR 2021\nMEASURING MASSIVE MULTITASK\nLANGUAGE UNDERSTANDING\nDan Hendrycks\nUC BerkeleyCollin Burns\nColumbia UniversitySteven Basart\nUChicagoAndy Zou\nUC Berkeley\nMantas Mazeika\nUIUCDawn Song\nUC BerkeleyJacob Steinhardt\nUC Berkeley\nABSTRACT\nWe propose a new test to measure a text model’s multitask accuracy. The test\ncovers 57 tasks including elementary mathematics, US history, computer science,\nlaw, and more. To attain high accuracy on this test, models must possess extensive\nworld knowledge and problem solving ability. We find that while most recent\nmodels have near random-chance accuracy, the very largest GPT-3 model improves\nover random chance by almost 20 percentage points on average. However, on every\none of the 57 tasks, the best models still need substantial improvements before\nthey can reach expert-level accuracy. Models also have lopsided performance\nand frequently do not know when they are wrong. Worse, they still have near-\nrandom accuracy on some ', 'a model’s text understanding by covering numerous topics that\nhumans are incentivized to learn. Since our test consists in 57tasks, it can be used to analyze\naggregate properties of models across tasks and to track important shortcomings. The test and code is\navailable at github.com/hendrycks/test.\n2 R ELATED WORK\nPretraining. The dominant paradigm in NLP is to pretrain large models on massive text corpora\nincluding educational books and websites. In the process, these models are exposed to information\nabout a wide range of topics. Petroni et al. (2019) found that recent models learn enough information\nfrom pretraining that they can serve as knowledge bases. However, no prior work has comprehensively\nmeasured the knowledge models have across many real-world domains.\nUntil recently, researchers primarily used fine-tuned models on downstream tasks (Devlin et al., 2019).\nHowever, larger pretrained models like GPT-3 ', 'there has been notable progress across many natural language processing (NLP) tasks, led\nby methods such as ELMo (Peters et al., 2018), OpenAI GPT (Radford et al., 2018), and BERT\n(Devlin et al., 2019). The unifying theme of these methods is that they couple self-supervised learning\nfrom massive unlabelled text corpora with effective adapting of the resulting model to target tasks.\nThe tasks that have proven amenable to this general approach include question answering, textual\nentailment, and parsing, among many others (Devlin et al., 2019; Kitaev et al., 2019, i.a.).\nIn this context, the GLUE benchmark (Wang et al., 2019a) has become a prominent evaluation\nframework for research towards general-purpose language understanding technologies. GLUE is\na collection of nine language understanding tasks built on existing public datasets, together with\nprivate test data, an evaluation server, a single-number ']",The primary goal of introducing the massive multitask test in language understanding models is to measure a text model's multitask accuracy and assess its ability to possess extensive world knowledge and problem-solving ability across 57 tasks covering various domains.,0.75,1.0,0.4691257178783417,"The primary goal is to bridge the gap between the vast knowledge models encounter during pretraining on the internet and the existing measures of success, assessing models across a diverse set of subjects to highlight knowledge and problem-solving ability in more challenging, real-world contexts.",The primary goal of introducing the massive multitask test in language understanding models is to measure a text model's multitask accuracy and analyze models across numerous tasks to identify important shortcomings.,1.0,1.0,0.4712291955947876 +"What were the key findings regarding the models' performance on the multitask test, particularly concerning their knowledge application and subject-specific accuracy?","The findings highlighted that while recent models like GPT-3 have made progress, they exhibit lopsided performance across different subjects, lack expert-level accuracy, and struggle with procedural knowledge and tasks requiring calculations.","['Published as a conference paper at ICLR 2021\nMEASURING MASSIVE MULTITASK\nLANGUAGE UNDERSTANDING\nDan Hendrycks\nUC BerkeleyCollin Burns\nColumbia UniversitySteven Basart\nUChicagoAndy Zou\nUC Berkeley\nMantas Mazeika\nUIUCDawn Song\nUC BerkeleyJacob Steinhardt\nUC Berkeley\nABSTRACT\nWe propose a new test to measure a text model’s multitask accuracy. The test\ncovers 57 tasks including elementary mathematics, US history, computer science,\nlaw, and more. To attain high accuracy on this test, models must possess extensive\nworld knowledge and problem solving ability. We find that while most recent\nmodels have near random-chance accuracy, the very largest GPT-3 model improves\nover random chance by almost 20 percentage points on average. However, on every\none of the 57 tasks, the best models still need substantial improvements before\nthey can reach expert-level accuracy. Models also have lopsided performance\nand frequently do not know when they are wrong. Worse, they still have near-\nrandom accuracy on some ', 'task,\nand (2) jointly embed the models and tasks, and select a\nmodel using the learned metric (see Section 4). Notice that\n(1) does not use knowledge of the model performance on\nvarious tasks, which makes it more widely applicable but\nrequires we know what task a model was trained for and\nmay ignore the fact that models trained on slightly differ-\nent tasks may still provide an overall better feature extrac-\ntor (for example by over-fitting less to the task they were\ntrained on).\nIn Table 2 we compare the overall results of the various\nproposed metrics on the model selection meta-tasks. On\nboth the iNat+CUB and Mixed meta-tasks, the Asymmetric\nTASK 2VEC model selection is close to the ground-truth op-\ntimal, and significantly improves over both chance, and over\nusing an generic ImageNet expert. Notice that our method\nhasO(1)complexity, while searching over ', 'a model’s text understanding by covering numerous topics that\nhumans are incentivized to learn. Since our test consists in 57tasks, it can be used to analyze\naggregate properties of models across tasks and to track important shortcomings. The test and code is\navailable at github.com/hendrycks/test.\n2 R ELATED WORK\nPretraining. The dominant paradigm in NLP is to pretrain large models on massive text corpora\nincluding educational books and websites. In the process, these models are exposed to information\nabout a wide range of topics. Petroni et al. (2019) found that recent models learn enough information\nfrom pretraining that they can serve as knowledge bases. However, no prior work has comprehensively\nmeasured the knowledge models have across many real-world domains.\nUntil recently, researchers primarily used fine-tuned models on downstream tasks (Devlin et al., 2019).\nHowever, larger pretrained models like GPT-3 ']","The key findings regarding the models' performance on the multitask test revealed that most recent models have near random-chance accuracy, with the largest GPT-3 model improving over random chance by almost 20 percentage points on average. However, despite these improvements, the best models still need substantial enhancements before reaching expert-level accuracy on all 57 tasks. The models also exhibited lopsided performance, frequently did not recognize when they were wrong, and had near-random accuracy on some tasks. Additionally, the test highlighted the need for models to possess extensive world knowledge and problem-solving ability to achieve high accuracy.",1.0,0.6666666666666666,0.5125778913497925,"The findings highlighted that while recent models like GPT-3 have made progress, they exhibit lopsided performance across different subjects, lack expert-level accuracy, and struggle with procedural knowledge and tasks requiring calculations.","The key findings regarding the models' performance on the multitask test include: +- Most recent models have near random-chance accuracy. +- The largest GPT-3 model improves over random chance by almost 20 percentage points on average. +- The best models still need substantial improvements before reaching expert-level accuracy on all 57 tasks. +- Models have lopsided performance and frequently do not know when they are wrong. +- Models still have near-random accuracy on some tasks, especially those that require calculations and socially relevant subjects like morality and law.",1.0,0.6666666666666666,0.627117395401001 +How does DetectGPT perform in comparison to the strongest zero-shot baseline when detecting fake news articles generated by GPT-NeoX?,DetectGPT improves detection from an AUROC of 0.81 for the strongest zero-shot baseline to 0.95 for DetectGPT.,"['DetectGPT: Zero-Shot Machine-Generated Text Detection\nusing Probability Curvature\nEric Mitchell1Yoonho Lee1Alexander Khazatsky1Christopher D. Manning1Chelsea Finn1\nAbstract\nThe increasing fluency and widespread usage of\nlarge language models (LLMs) highlight the de-\nsirability of corresponding tools aiding detection\nof LLM-generated text. In this paper, we identify\na property of the structure of an LLM’s proba-\nbility function that is useful for such detection.\nSpecifically, we demonstrate that text sampled\nfrom an LLM tends to occupy negative curva-\nture regions of the model’s log probability func-\ntion. Leveraging this observation, we then define\na new curvature-based criterion for judging if a\npassage is generated from a given LLM. This\napproach, which we call DetectGPT, does not re-\nquire training a separate classifier, collecting a\ndataset of real or generated passages, or explic-\nitly watermarking generated text. It uses only\nlog probabilities computed by the model of in-\nterest and random perturbations of ', 'al.,\n2023) may further improve the effectiveness of methods\nsuch as DetectGPT, even as LLMs continue to improve.\nLimitations. One limitation of probability-based methods\nfor zero-shot machine-generated text detection (like Detect-\nGPT) is the white-box assumption that we can evaluate log\nprobabilities of the model(s) in question. For models be-\nhind APIs that do provide probabilities (such as GPT-3),\nevaluating probabilities nonetheless costs money. Another\nassumption of DetectGPT is access to a reasonable pertur-\nbation function. While in this work, we use off-the-shelf\nmask-filling models such as T5 and mT5 (for non-English\nlanguages), some domains may see reduced performance\nif existing mask-filling models do not well represent the\nspace of meaningful rephrases, reducing the quality of the\ncurvature estimate. While DetectGPT provides the best\navailable detection performance for PubMedQA, its drop\nin performance compared to other datasets may be a result\nAverage length0.9850.9900.995AUROC\ngpt-2\nAverage length0.960.970.980.99AUROC\nopt-2.7\nXSum\nSQuAD\nWritingPrompts\n130 140 150 ', 'quarter of the text in model samples has been re-\nplaced. Unsurprisingly, almost all methods show a gradual\ndegradation in performance as the sample is more heavily\nrevised. The entropy baseline shows surprisingly robust\nperformance in this setting (althought it is least accurate\non average), even slightly improving detection performance\nup to 24% replacement. DetectGPT shows the strongest\ndetection performance for all revision levels.\nImpact of alternative decoding strategies on detection.\nWhile Table 1 suggests that DetectGPT is effective for\n5We reduce the number of evaluation samples from 500 in our\nmain experiments to reduce the API costs of these experiments.XSum SQuAD WritingPrompts\nMethod top- ptop-ktop-ptop-ktop-p top-k\nlogp(x) 0.92 0.87 0.89 0.85 0.98 0.96\nRank 0.76 0.76 0.81 0.80 0.84 0.83\nLogRank 0.93* 0.90* 0.92* 0.90* 0.98 0.97\nEntropy 0.53 0.55 0.54 0.56 0.32 0.35\nDetectGPT 0.98 0.98 0.94 0.93 0.98 0.97\nTable 3. AUROC for ']",None,0.0,1.0,-0.025025706738233566,DetectGPT improves detection from an AUROC of 0.81 for the strongest zero-shot baseline to 0.95 for DetectGPT.,None,0.0,0.6666666666666666,-0.02502545155584812 +How does DetectGPT's performance vary across different datasets and models in zero-shot detection scenarios?,"DetectGPT shows discriminative improvement over existing methods, notably on the XSum dataset for fake news detection, with significant AUROC improvements.","['performance; the results are shown\nin Figure 7. In particular, the increased discrimination power\nof DetectGPT for larger mask-filling models supports the\ninterpretation that DetectGPT is estimating the curvature\nof the log probability in a latent semantic space, rather\nthan in raw token embedding space. Larger T5 models\nbetter represent this latent space, where random directions\ncorrespond to meaningful changes in the text.\nNumber of perturbations for DetectGPT. We evaluate the\nperformance of DetectGPT as a function of the number of\nperturbations used to estimate the expectation in Equation 1\non three datasets. The results are presented in Figure 8.\nDetection accuracy continues to improve until 100 pertur-\nbations, where it converges. Evaluations use 100 examples\nfrom each dataset.\nData distributional properties. We study more closely\nthe impact of the data distribution on DetectGPT, particu-\n1 10 100 10000.60.70.80.91.0Detection AUROC\nGPT-2\nXSum\nSQuAD\nWritingPrompts\n1 10 100 1000\nGPT-J\nNumber of perturbationsFigure ', 'al.,\n2023) may further improve the effectiveness of methods\nsuch as DetectGPT, even as LLMs continue to improve.\nLimitations. One limitation of probability-based methods\nfor zero-shot machine-generated text detection (like Detect-\nGPT) is the white-box assumption that we can evaluate log\nprobabilities of the model(s) in question. For models be-\nhind APIs that do provide probabilities (such as GPT-3),\nevaluating probabilities nonetheless costs money. Another\nassumption of DetectGPT is access to a reasonable pertur-\nbation function. While in this work, we use off-the-shelf\nmask-filling models such as T5 and mT5 (for non-English\nlanguages), some domains may see reduced performance\nif existing mask-filling models do not well represent the\nspace of meaningful rephrases, reducing the quality of the\ncurvature estimate. While DetectGPT provides the best\navailable detection performance for PubMedQA, its drop\nin performance compared to other datasets may be a result\nAverage length0.9850.9900.995AUROC\ngpt-2\nAverage length0.960.970.980.99AUROC\nopt-2.7\nXSum\nSQuAD\nWritingPrompts\n130 140 150 ', 'quarter of the text in model samples has been re-\nplaced. Unsurprisingly, almost all methods show a gradual\ndegradation in performance as the sample is more heavily\nrevised. The entropy baseline shows surprisingly robust\nperformance in this setting (althought it is least accurate\non average), even slightly improving detection performance\nup to 24% replacement. DetectGPT shows the strongest\ndetection performance for all revision levels.\nImpact of alternative decoding strategies on detection.\nWhile Table 1 suggests that DetectGPT is effective for\n5We reduce the number of evaluation samples from 500 in our\nmain experiments to reduce the API costs of these experiments.XSum SQuAD WritingPrompts\nMethod top- ptop-ktop-ptop-ktop-p top-k\nlogp(x) 0.92 0.87 0.89 0.85 0.98 0.96\nRank 0.76 0.76 0.81 0.80 0.84 0.83\nLogRank 0.93* 0.90* 0.92* 0.90* 0.98 0.97\nEntropy 0.53 0.55 0.54 0.56 0.32 0.35\nDetectGPT 0.98 0.98 0.94 0.93 0.98 0.97\nTable 3. AUROC for ']","DetectGPT's performance varies across different datasets and models in zero-shot detection scenarios. Specifically, DetectGPT provides the best available detection performance for PubMedQA. However, its drop in performance compared to other datasets may be a result of variations in data distribution and the effectiveness of the mask-filling models used to estimate the curvature of the log probability in a latent semantic space.",1.0,1.0,0.8107336163520813,"DetectGPT shows discriminative improvement over existing methods, notably on the XSum dataset for fake news detection, with significant AUROC improvements.","DetectGPT's performance varies across different datasets and models in zero-shot detection scenarios. The results show that DetectGPT provides the best available detection performance for PubMedQA, but its performance drops compared to other datasets. In particular, the increased discrimination power of DetectGPT for larger mask-filling models supports the interpretation that DetectGPT estimates the curvature of the log probability in a latent semantic space, rather than in raw token embedding space. Larger T5 models better represent this latent space, where random directions correspond to meaningful changes in the text.",0.5,1.0,0.8283261060714722 +How does DistilBERT's performance on the GLUE benchmark compare to BERT and ELMo?,"DistilBERT achieves performance close to BERT, outperforming ELMo across GLUE benchmark tasks, demonstrating significant efficiency and effectiveness.","['recent work in this area.\nSince its release, GLUE has been used as a testbed and showcase by the developers of several\ninfluential models, including GPT (Radford et al., 2018) and BERT (Devlin et al., 2019). As shown\nin Figure 1, progress on GLUE since its release has been striking. On GLUE, GPT and BERT\nachieved scores of 72.8 and 80.2 respectively, relative to 66.5 for an ELMo-based model (Peters\net al., 2018) and 63.7 for the strongest baseline with no multitask learning or pretraining above the\nword level. Recent models (Liu et al., 2019d; Yang et al., 2019) have clearly surpassed estimates of\nnon-expert human performance on GLUE (Nangia and Bowman, 2019). The success of these models\non GLUE has been driven by ever-increasing model capacity, compute power, and data quantity, as\nwell as innovations in ', 'ELMo,\nOpenAI GPT and BERT. The comparisons be-\ntween the model architectures are shown visually\nin Figure 3. Note that in addition to the architec-\nture differences, BERT and OpenAI GPT are fine-\ntuning approaches, while ELMo is a feature-based\napproach.\nThe most comparable existing pre-training\nmethod to BERT is OpenAI GPT, which trains a\nleft-to-right Transformer LM on a large text cor-\npus. In fact, many of the design decisions in BERT\nwere intentionally made to make it as close to\nGPT as possible so that the two methods could be\nminimally compared. The core argument of this\nwork is that the bi-directionality and the two pre-\ntraining tasks presented in Section 3.1 account for\nthe majority of the empirical improvements, but\nwe do note that there are several other differences\nbetween how BERT and GPT were trained:\n• GPT is trained on the BooksCorpus (800M\nwords); ', '91.1 80.8 86.3\nMT-DNN 87.9/87.4 96.0 89.9 86.3 96.5 92.7 68.4 91.1 89.0 87.6\nXLNet 90.2/89.8 98.6 90.3 86.3 96.8 93.0 67.8 91.6 90.4 88.4\nRoBERTa 90.8/90.2 98.9 90.2 88.2 96.7 92.3 67.8 92.2 89.0 88.5\nTable 5: Results on GLUE. All results are based on a 24-layer a rchitecture. BERT LARGE and XLNet LARGE results\nare from Devlin et al. (2019 ) and Yang et al. (2019 ), respectively. RoBERTa results on the development set are a\nmedian over five runs. RoBERTa results on the test set are ense mbles of single-task models. For RTE, STS and\nMRPC we finetune starting from the MNLI model instead of the ba seline pretrained model. Averages are obtained\nfrom the GLUE leaderboard.\nTask-specific modifications Two of the GLUE\ntasks require task-specific finetuning approaches\nto achieve competitive leaderboard results.\nQNLI : Recent submissions ']",DistilBERT's performance on the GLUE benchmark is not provided in the given context information.,1.0,1.0,0.7570562362670898,"DistilBERT achieves performance close to BERT, outperforming ELMo across GLUE benchmark tasks, demonstrating significant efficiency and effectiveness.","DistilBERT's performance on the GLUE benchmark is comparable to BERT, retaining 97% of BERT's performance with 40% fewer parameters. Additionally, DistilBERT is always on par or improving over the ELMo baseline on the tasks, showing up to a 19-point increase in accuracy on the STS-B task.",1.0,1.0,0.8800992965698242 +How does DistilBERT's performance on downstream tasks like IMDb sentiment classification and SQuAD v1.1 compare to BERT?,"DistilBERT closely approaches BERT's performance on IMDb sentiment classification and SQuAD v1.1, with minimal performance loss despite its smaller size.","['yields to comparable\nperformance on downstream tasks. Com-\nparison on downstream tasks: IMDb (test ac-\ncuracy) and SQuAD 1.1 (EM/F1 on dev set).\nD: with a second step of distillation during\nfine-tuning.\nModel IMDb SQuAD\n(acc.) (EM/F1)\nBERT-base 93.46 81.2/88.5\nDistilBERT 92.82 77.7/85.8\nDistilBERT (D) - 79.1/86.9Table 3: DistilBERT is significantly smaller\nwhile being constantly faster. Inference\ntime of a full pass of GLUE task STS-B (sen-\ntiment analysis) on CPU with a batch size of\n1.\nModel # param. Inf. time\n(Millions) (seconds)\nELMo 180 895\nBERT-base 110 668\nDistilBERT 66 410\nDistillation We applied best practices for training BERT model recently proposed in Liu et al. [2019].\nAs such, DistilBERT is distilled on very large batches leveraging gradient accumulation (up to 4K\nexamples per batch) using dynamic masking and without the next sentence prediction objective.\nData and compute power We train DistilBERT on the same corpus as the original ', 'by two BiLSTMs.4\nThe results on each of the 9 tasks are showed on Table 1 along with the macro-score (average of\nindividual scores). Among the 9 tasks, DistilBERT is always on par or improving over the ELMo\nbaseline (up to 19 points of accuracy on STS-B). DistilBERT also compares surprisingly well to\nBERT, retaining 97% of the performance with 40% fewer parameters.\n4.1 Downstream task benchmark\nDownstream tasks We further study the performances of DistilBERT on several downstream tasks\nunder efficient inference constraints: a classification task (IMDb sentiment classification - Maas et al.\n[2011]) and a question answering task (SQuAD v1.1 - Rajpurkar et al. [2016]).\nAs shown in Table 2, DistilBERT is only 0.6% point behind BERT in test accuracy on the IMDb\nbenchmark while being 40% smaller. On SQuAD, DistilBERT is within 3.9 points of ', 'DistilBERT, a distilled version of BERT: smaller,\nfaster, cheaper and lighter\nVictor SANH, Lysandre DEBUT, Julien CHAUMOND, Thomas WOLF\nHugging Face\n{victor,lysandre,julien,thomas}@huggingface.co\nAbstract\nAs Transfer Learning from large-scale pre-trained models becomes more prevalent\nin Natural Language Processing (NLP), operating these large models in on-the-\nedge and/or under constrained computational training or inference budgets remains\nchallenging. In this work, we propose a method to pre-train a smaller general-\npurpose language representation model, called DistilBERT, which can then be fine-\ntuned with good performances on a wide range of tasks like its larger counterparts.\nWhile most prior work investigated the use of distillation for building task-specific\nmodels, we leverage knowledge distillation during the pre-training phase and show\nthat it is possible to reduce the size of a BERT model by 40%, while retaining 97%\nof its language understanding capabilities and being 60% faster. To leverage ']","DistilBERT's performance on downstream tasks like IMDb sentiment classification and SQuAD v1.1 is slightly behind BERT in test accuracy on IMDb and within a small margin on SQuAD, while being significantly smaller, faster, and lighter.",1.0,1.0,0.9247759580612183,"DistilBERT closely approaches BERT's performance on IMDb sentiment classification and SQuAD v1.1, with minimal performance loss despite its smaller size.","DistilBERT's performance on downstream tasks like IMDb sentiment classification and SQuAD v1.1 is very close to BERT. It is only 0.6% point behind BERT in test accuracy on the IMDb benchmark and within 3.9 points on the SQuAD task, while being 40% smaller.",1.0,1.0,0.9092819690704346 +"What modifications to the BERT pretraining process are introduced in RoBERTa, and how do they collectively enhance model performance?","RoBERTa introduces several key modifications, including training with larger mini-batches, eliminating the next sentence prediction (NSP) loss, training on longer sequences, dynamically changing the masking pattern, and pretraining over more data. These improvements collectively enable RoBERTa to match or exceed the performance of all post-BERT models, achieving state-of-the-art results on benchmarks like GLUE, RACE, and SQuAD.","['RoBERTa is trained with dynamic mask-\ning (Section 4.1),FULL -SENTENCES without NSP\nloss (Section 4.2), large mini-batches (Section 4.3)\nand a larger byte-level BPE (Section 4.4).\nAdditionally, we investigate two other impor-\ntant factors that have been under-emphasized in\nprevious work: (1) the data used for pretraining,\nand (2) the number of training passes through the\ndata. For example, the recently proposed XLNet\narchitecture ( Yang et al. ,2019 ) is pretrained us-\ning nearly 10 times more data than the original\nBERT ( Devlin et al. ,2019 ). It is also trained with\na batch size eight times larger for half as many op-\ntimization steps, thus seeing four times as many\nsequences in pretraining compared to BERT.\nTo help disentangle the importance of these fac-\ntors from other modeling choices (e.g., the pre-\ntraining objective), we begin by training RoBERTa\nfollowing the BERT LARGE ', 'which includes a\ncareful evaluation of the effects of hyperparmeter\ntuning and training set size. We find that BERT\nwas significantly undertrained and propose an im-\nproved recipe for training BERT models, which\nwe call RoBERTa, that can match or exceed the\nperformance of all of the post-BERT methods.\nOur modifications are simple, they include: (1)\ntraining the model longer, with bigger batches,\nover more data; (2) removing the next sentence\nprediction objective; (3) training on longer se-\nquences; and (4) dynamically changing the mask-\ning pattern applied to the training data. We also\ncollect a large new dataset (CC-N EWS) of compa-\nrable size to other privately used datasets, to better\ncontrol for training set size effects.\nWhen controlling for training data, our im-\nproved training procedure improves upon the pub-\nlished BERT results on both GLUE and SQuAD.\nWhen trained for longer over additional data, ', 'BERT: Pre-training of Deep Bidirectional Transformers for\nLanguage Understanding\nJacob Devlin Ming-Wei Chang Kenton Lee Kristina Toutanova\nGoogle AI Language\n{jacobdevlin,mingweichang,kentonl,kristout }@google.com\nAbstract\nWe introduce a new language representa-\ntion model called BERT , which stands for\nBidirectional Encoder Representations from\nTransformers. Unlike recent language repre-\nsentation models (Peters et al., 2018a; Rad-\nford et al., 2018), BERT is designed to pre-\ntrain deep bidirectional representations from\nunlabeled text by jointly conditioning on both\nleft and right context in all layers. As a re-\nsult, the pre-trained BERT model can be fine-\ntuned with just one additional output layer\nto create state-of-the-art models for a wide\nrange of tasks, such as question answering and\nlanguage inference, without substantial task-\nspecific architecture modifications.\nBERT is conceptually simple and empirically\npowerful. It obtains new state-of-the-art re-\nsults on eleven natural language processing\ntasks, including pushing the GLUE score to\n80.5% (7.7% point absolute improvement),\nMultiNLI accuracy ']","The modifications introduced in the RoBERTa pretraining process include: +1) Training the model longer, with bigger batches, over more data +2) Removing the next sentence prediction objective +3) Training on longer sequences +4) Dynamically changing the masking pattern applied to the training data + +These modifications collectively enhance model performance by significantly improving upon the published BERT results on both GLUE and SQuAD when controlling for training data. Additionally, when trained for longer over additional data, RoBERTa can match or exceed the performance of all post-BERT methods.",1.0,1.0,0.8387560844421387,"RoBERTa introduces several key modifications, including training with larger mini-batches, eliminating the next sentence prediction (NSP) loss, training on longer sequences, dynamically changing the masking pattern, and pretraining over more data. These improvements collectively enable RoBERTa to match or exceed the performance of all post-BERT models, achieving state-of-the-art results on benchmarks like GLUE, RACE, and SQuAD.","RoBERTa introduces dynamic masking, full-sentences without NSP loss, large mini-batches, and a larger byte-level BPE compared to BERT. These modifications collectively enhance model performance by emphasizing factors such as the data used for pretraining and the number of training passes through the data, leading to improved training efficiency and effectiveness. Additionally, RoBERTa follows the BERT LARGE pretraining process to help disentangle the importance of these factors from other modeling choices, ultimately resulting in better model performance.",1.0,1.0,0.887593150138855 +"What role does the novel dataset CC-NEWS play in RoBERTa's pretraining, and how does it compare to other datasets used?","CC-NEWS, a large dataset collected from English news articles, provides a significant source of diverse and recent textual content for RoBERTa's pretraining, complementing other datasets and ensuring a broad coverage of language use and topics. This dataset helps control for training set size effects and contributes to the robustness and generalization capability of the model.","['transformer model introduced in [ 33]) by pre-training on 10,000 top\nstarred GitHub open source projects written in C, where we use Clang C tokenizer and Sentencepiece\nto tokenize the pre-training data. The C-BERT model is then fine tuned on each classification\nbenchmark. Additionally, we experiment with the POJ-104 dataset, which contains code examples in\nC and C++.\nC-BERT achieves appealing results on binary classification and vulnerability detection with C source\ncode [ 10,37]. However, it has not been used on multiclass classification tasks or with other languages\nsuch as C++, Java, and Python. Because we use sub-word tokenization and different programming\nlanguages share common tokens, we could apply the C-BERT model directly on the benchmarks.\nAfter pretraining, we fine tune the model for five epochs on each benchmark, with a batch size 32 and\nlearning rate 2e-5. ', 'related datasets.\nLarge scale. A useful dataset should contain a large number and variety of data samples to expose\nthe realistic and complex landscape of data distributions one meets in practice. CodeNet is the\nlargest dataset in its class - it has approximately 10 times more code samples than GCJ and its C++\nbenchmark is approximately 10 times larger than POJ-104.\nRich annotation. For the dataset class in question, it is important to include information beyond\nwhich problem a code sample solves to enable a wide range of applications and use cases. It is useful\nto know whether a code sample solves the problem correctly, and if not, the error category (e.g.,\ncompilation error, runtime error, and out-of-memory error). Since the source code is supposed to\nsolve a programming problem, it is advantageous to know the problem ', 'improves performance on down-\nstream tasks; (3) Our training improvements show\nthat masked language model pretraining, under\nthe right design choices, is competitive with all\nother recently published methods. We release our\nmodel, pretraining and fine-tuning code imple-\nmented in PyTorch ( Paszke et al. ,2017 ).\n2 Background\nIn this section, we give a brief overview of the\nBERT ( Devlin et al. ,2019 ) pretraining approach\nand some of the training choices that we will ex-\namine experimentally in the following section.\n2.1 Setup\nBERT takes as input a concatenation of two\nsegments (sequences of tokens), x1,...,x N\nandy1,...,yM. Segments usually consist of\nmore than one natural sentence. The two seg-\nments are presented as a single input sequence\nto BERT with special tokens delimiting them:\n[CLS],x1,...,x N,[SEP],y1,...,yM,[EOS].\nMandNare constrained such that M+N < T ,\nwhereTis a parameter that controls the maximum\nsequence length during training.\nThe model ']",None,0.0,0.0,-0.0660683810710907,"CC-NEWS, a large dataset collected from English news articles, provides a significant source of diverse and recent textual content for RoBERTa's pretraining, complementing other datasets and ensuring a broad coverage of language use and topics. This dataset helps control for training set size effects and contributes to the robustness and generalization capability of the model.",None,0.0,1.0,-0.06606854498386383 +Describe the process and purpose of the 'Next Sentence Prediction' task in BERT's pre-training.,"NSP involves predicting whether a sentence B is the actual next sentence that follows sentence A, facilitating understanding of sentence relationships.","['and replacement is performed once in the be-\nginning and saved for the duration of training, al-\nthough in practice, data is duplicated so the mask\nis not always the same for every training sentence\n(see Section 4.1).\nNext Sentence Prediction (NSP) NSP is a bi-\nnary classification loss for predicting whether two\nsegments follow each other in the original text.\nPositive examples are created by taking consecu-\ntive sentences from the text corpus. Negative ex-\namples are created by pairing segments from dif-\nferent documents. Positive and negative examples\nare sampled with equal probability.\nThe NSP objective was designed to improve\nperformance on downstream tasks, such as Natural\nLanguage Inference ( Bowman et al. ,2015 ), which\nrequire reasoning about the relationships between\npairs of sentences.\n2.4 Optimization\nBERT is optimized with Adam ( Kingma and Ba ,\n2015 ) using the following parameters: β1= 0.9,\nβ2= 0.999,ǫ=1e-6 ', 'a deep bidirectional Transformer. In addi-\ntion to the masked language model, we also use\na “next sentence prediction” task that jointly pre-\ntrains text-pair representations. The contributions\nof our paper are as follows:\n• We demonstrate the importance of bidirectional\npre-training for language representations. Un-\nlike Radford et al. (2018), which uses unidirec-\ntional language models for pre-training, BERT\nuses masked language models to enable pre-\ntrained deep bidirectional representations. This\nis also in contrast to Peters et al. (2018a), which\nuses a shallow concatenation of independently\ntrained left-to-right and right-to-left LMs.\n• We show that pre-trained representations reduce\nthe need for many heavily-engineered task-\nspecific architectures. BERT is the first fine-\ntuning based representation model that achieves\nstate-of-the-art performance on a large suite\nof sentence-level andtoken-level tasks, outper-\nforming many task-specific architectures.\n• BERT advances the state of the art for eleven\nNLP tasks. The code and pre-trained ', 'token is chosen, we replace\nthei-th token with (1) the [MASK] token 80% of\nthe time (2) a random token 10% of the time (3)\nthe unchanged i-th token 10% of the time. Then,\nTiwill be used to predict the original token with\ncross entropy loss. We compare variations of this\nprocedure in Appendix C.2.\nTask #2: Next Sentence Prediction (NSP)\nMany important downstream tasks such as Ques-\ntion Answering (QA) and Natural Language Infer-\nence (NLI) are based on understanding the rela-\ntionship between two sentences, which is not di-\nrectly captured by language modeling. In order\nto train a model that understands sentence rela-\ntionships, we pre-train for a binarized next sen-\ntence prediction task that can be trivially gener-\nated from any monolingual corpus. Specifically,\nwhen choosing the sentences AandBfor each pre-\ntraining example, 50% of the time Bis the actual\nnext sentence that ']","The Next Sentence Prediction (NSP) task in BERT's pre-training is a binary classification loss for predicting whether two segments follow each other in the original text. Positive examples are created by taking consecutive sentences from the text corpus, while negative examples are created by pairing segments from different documents. This task aims to improve performance on downstream tasks, such as Natural Language Inference, by training the model to understand the relationships between pairs of sentences.",1.0,1.0,0.8058959245681763,"NSP involves predicting whether a sentence B is the actual next sentence that follows sentence A, facilitating understanding of sentence relationships.","The 'Next Sentence Prediction' task in BERT's pre-training is used in addition to the masked language model task. It involves jointly pre-training text-pair representations by predicting whether one sentence follows another in a given text pair. This task helps BERT learn relationships and dependencies between sentences in a text, enabling it to better understand and generate coherent responses in natural language processing tasks.",1.0,0.6666666666666666,0.5910657644271851 +"What performance improvements does LLaMA-13B show over GPT-3, and how does LLaMA-65B stand in comparison to Chinchilla-70B and PaLM-540B?","LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with Chinchilla-70B and PaLM-540B, showing that it can achieve comparable or better performance.","['democratize the access and\nstudy of LLMs, since it can be run on a single GPU.\nAt the higher-end of the scale, our 65B-parameter\nmodel is also competitive with the best large lan-\nguage models such as Chinchilla or PaLM-540B.\nUnlike Chinchilla, PaLM, or GPT-3, we only\nuse publicly available data, making our work com-\npatible with open-sourcing, while most existing\nmodels rely on data which is either not publicly\navailable or undocumented (e.g. “Books – 2TB” or\n“Social media conversations”). There exist some\nexceptions, notably OPT (Zhang et al., 2022),\nGPT-NeoX (Black et al., 2022), BLOOM (Scao\net al., 2022) and GLM (Zeng et al., 2022), but none\nthat are competitive with PaLM-62B or Chinchilla.\nIn the rest of this paper, we present an overview\nof the modifications we made to the transformer\narchitecture (Vaswani et al., 2017), as well as our\ntraining method. We ', 'et al., 2022) to leverage its bidirectional at-\ntention advantage and autoregressive blank infilling objective. Table 1 summarizes the comparison\nbetween GLM-130B, GPT-3 and another two open-source efforts—OPT-175B and BLOOM-176B,\nas well as PaLM 540B (Chowdhery et al., 2022)—a 4 ×larger model—as a reference.\nAltogether, the conceptual uniqueness and engineering efforts enable GLM-130B to exhibit perfor-\nmance that surpasses the level of GPT-3 on a wide range of benchmarks (in total 112 tasks) and also\noutperforms PaLM 540B in many cases, while outperformance over GPT-3 has not been observed in\nOPT-175B and BLOOM-176B (Cf. Figure 1 left). For zero-shot performance, GLM-130B is better\nthan GPT-3 175B (+5.0%), OPT-175B (+6.5%), and BLOOM-176B (+13.0%) on LAMBADA (Pa-\nperno et al., 2016), and achieves 3 ×better performance than GPT-3 on Big-bench-lite (Srivastava\net al., 2022). For the 5-shot MMLU (Hendrycks et ', 'and WinoGrande.\nLLaMA-13B model also outperforms GPT-3 on\nmost benchmarks despite being 10 ×smaller.\n3.2 Closed-book Question Answering\nWe compare LLaMA to existing large language\nmodels on two closed-book question answering\nbenchmarks: Natural Questions (Kwiatkowski\net al., 2019) and TriviaQA (Joshi et al., 2017). For\nboth benchmarks, we report exact match perfor-\nmance in a closed book setting, i.e., where the mod-\nels do not have access to documents that contain\nevidence to answer the question. In Table 4, we\nreport performance on NaturalQuestions, and in Ta-\nble 5, we report on TriviaQA. On both benchmarks,\nLLaMA-65B achieve state-of-the-arts performance\nin the zero-shot and few-shot settings. More im-\nportantly, the LLaMA-13B is also competitive on\nthese benchmarks with GPT-3 and Chinchilla, de-\nspite being 5-10 ×smaller. This model runs on a\nsingle V100 GPU during inference.\n0-shot 1-shot 5-shot 64-shot\nGopher 280B 43.5 - 57.0 57.2\nChinchilla 70B 55.4 - ']",LLaMA-13B shows performance improvements over GPT-3 in both zero-shot and few-shot settings. LLaMA-65B is also competitive with Chinchilla-70B and outperforms PaLM-540B on many benchmarks.,1.0,1.0,0.8695489168167114,"LLaMA-13B outperforms GPT-3 (175B) on most benchmarks, and LLaMA-65B is competitive with Chinchilla-70B and PaLM-540B, showing that it can achieve comparable or better performance.","LLaMA-13B shows performance improvements over GPT-3 in terms of pass@1 scores on both HumanEval and MBPP. On the other hand, LLaMA-65B outperforms Chinchilla-70B and PaLM-540B.",0.5,0.3333333333333333,0.9270268678665161 +How does LLaMA's training data preprocessing and mixture differ from other large language models?,"LLaMA's training data includes a mixture of sources like CommonCrawl, C4, GitHub, Wikipedia, and others, preprocessed with techniques like deduplication, language identification, and quality filtering, emphasizing the use of publicly available data.","['LLaMA: Open and Efficient Foundation Language Models\nHugo Touvron∗, Thibaut Lavril∗, Gautier Izacard∗, Xavier Martinet\nMarie-Anne Lachaux, Timothee Lacroix, Baptiste Rozière, Naman Goyal\nEric Hambro, Faisal Azhar, Aurelien Rodriguez, Armand Joulin\nEdouard Grave∗, Guillaume Lample∗\nMeta AI\nAbstract\nWe introduce LLaMA, a collection of founda-\ntion language models ranging from 7B to 65B\nparameters. We train our models on trillions\nof tokens, and show that it is possible to train\nstate-of-the-art models using publicly avail-\nable datasets exclusively, without resorting\nto proprietary and inaccessible datasets. In\nparticular, LLaMA-13B outperforms GPT-3\n(175B) on most benchmarks, and LLaMA-\n65B is competitive with the best models,\nChinchilla-70B and PaLM-540B. We release\nall our models to the research community1.\n1 Introduction\nLarge Languages Models (LLMs) trained on mas-\nsive corpora of texts have shown their ability to per-\nform new tasks from textual instructions or from a\nfew examples (Brown et al., 2020). These few-shot\nproperties ', 'can improve\nlong-tail knowledge. Our work differs in that we conduct\ncausal re-training experiments and find that model scaling\nis highly beneficial to long-tail QA performance.\n6. Conclusion and Future Work\nLarge language models demonstrate impressive few-shot\nlearning capabilities that arise from simply training on large-\nscale internet text. With the open-source release of LLMs—\nand their associated pre-training datasets—the research com-\nmunity can now begin to understand the origins of these\ncapabilities. Our work is one of the first to relate an ob-\nserved phenomenon in LLMs back to the pre-training data\nitself. In our case, our results are negative: while LLMs\nachieve moderate performance on open-domain QA bench-\nmarks, they are mainly successful on questions that probe\nknowledge that appears widely in their pre-training datasets.\nOur work raises numerous directions for further inquiry,\nnamely, how to improve retention of long-tail knowledge\ngiven that simply ', '5x160 with ReLU\nConvolution 1x128Convolution 15x256 with ReLUFigure 8: Siamese architecture for similarity analysis.\nMAP@R score [ 46] is computationally expensive for GMN models because an embedding has to be\ncomputed for all SPT pairs in the test set, and hence Table 14 reports results on smaller sampled test\nsets.\nDetails of MLM Experiment\nHere we show how a masked language model (MLM) can be trained with CodeNet. We closely\nfollow the approach by Ankur Singh, documented in the blog [ 48]. The goal of the model is to infer\nthe correct token for an arbitrary masked-out location in the source text. We assume that in every text,\nprecisely one token is randomly masked. The original token at such position is then the golden label.\nFrom each of the 1000 C++1000 problems, we randomly select 100 samples for ']",None,0.0,0.5,-0.054138604551553726,"LLaMA's training data includes a mixture of sources like CommonCrawl, C4, GitHub, Wikipedia, and others, preprocessed with techniques like deduplication, language identification, and quality filtering, emphasizing the use of publicly available data.","LLaMA's training data preprocessing and mixture differs from other large language models in that they train their models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets. Additionally, LLaMA conducts causal re-training experiments and finds that model scaling is highly beneficial to long-tail QA performance.",1.0,1.0,0.8156492710113525 +How does the accuracy of the largest GPT-3 model compare to human professionals across the 57 tasks?,"he largest GPT-3 model achieves 43.9% accuracy on average, significantly below the expert-level performance, indicating that even the most advanced models require substantial improvements to reach human professional accuracy levels across these tasks.","['ALBERT-xxlarge attains an accuracy of 27.1%, with 27.2%\naccuracy for the humanities, 25.7%for the social sciences, 27.7%for STEM, and 27.9%for other.\nGPT-2 attains an accuracy of 32.4%, with 32.8%accuracy for the humanities, 33.3%for the social\nsciences, 30.2%for STEM, and 33.1%for other.\nCompare this to UnifiedQA’s smallest variant, which has just 60million parameters and approximately\n29.3%accuracy. It obtains higher accuracy than RoBERTa and ALBERT, even though it has fewer\nparameters. This suggests that its larger pretraining dataset enables higher accuracy. Likewise,\nUnifiedQA with 3billion parameters attains 43.7%, while the similarly sized GPT-2 model with 1.5\nbillion parameters attains 32.4%accuracy. This again suggests that T5’s larger pretraining dataset\nsize (and therefore UnifiedQA’s pretraining dataset size) can increase accuracy.\nA.2 E RROR ANALYSIS\nWe qualitatively analyze when GPT-3 makes high confidence mistakes. We find that while many of\nthese mistakes were clearly wrong, ', 'a commonsense benchmark (Hel-\nlaSwag), a linguistic understanding benchmark (Super-\nGLUE), and the massive multitask test. On previous\nbenchmarks, smaller models start well above random\nchance levels and exhibit more continuous improve-\nments with model size increases, but on our test, GPT-3\nmoves beyond random chance with the largest model.\nspecialized areas like law and ethics (Hendrycks et al., 2020). The granularity and breadth of the\nsubjects makes the benchmark ideal for identifying a model’s blind spots.\nWe find that meaningful progress on our benchmark has only become possible in recent months. In\nparticular, few-shot models up to 13billion parameters (Brown et al., 2020) achieve random chance\nperformance of 25% accuracy, but the 175billion parameter GPT-3 model reaches a much higher\n43.9%accuracy (see Figure 1b). On the other hand, unlike human professionals GPT-3 does not\nexcel at any single subject. Instead, ', 'we find that performance is lopsided, with GPT-3 having almost\n70% accuracy for its best subject but near-random performance for several other subjects.\nOur results indicate that while recent advances have been impressive, state-of-the-art models still\nstruggle at learning and applying knowledge from pretraining. The tasks with near-random accuracy\ninclude calculation-heavy subjects such as physics and mathematics and subjects related to human\nvalues such as law and morality. This second weakness is particularly concerning because it will\nbe important for future models to have a strong understanding of what is legal and what is ethical.\nWorryingly, we also find that GPT-3 does not have an accurate sense of what it does or does not know\nsince its average confidence can be up to 24% off from its actual accuracy. We comprehensively\nevaluate the breadth and depth of ']","The accuracy of the largest GPT-3 model is lopsided, with almost 70% accuracy for its best subject but near-random performance for several other subjects. In comparison, human professionals are likely to exhibit a more balanced and consistent level of accuracy across all 57 tasks.",1.0,1.0,0.9487650394439697,"he largest GPT-3 model achieves 43.9% accuracy on average, significantly below the expert-level performance, indicating that even the most advanced models require substantial improvements to reach human professional accuracy levels across these tasks.","The largest GPT-3 model reaches a much higher accuracy of 43.9% across the 57 tasks, whereas human professionals excel at any single subject but GPT-3 has near-random performance for several other subjects.",1.0,1.0,0.9300378561019897 +What can you say about the calibration of models like GPT-3 in relation to their confidence and accuracy on the multitask test?,"Models like GPT-3 are poorly calibrated, with their confidence often not accurately reflecting their actual performance across different subjects, indicating a need for improvement in model calibration techniques.","['way, our test shows that GPT-3 has\nmany knowledge blindspots and has capabilities that are lopsided.\nCalibration. We should not trust a model’s prediction unless the model is calibrated, meaning\nthat its confidence is a good estimate of the actual probability the prediction is correct. However,\nlarge neural networks are often miscalibrated (Guo et al., 2017), especially under distribution shift\n(Ovadia et al., 2019). We evaluate the calibration of GPT-3 by testing how well its average confidence\nestimates its actual accuracy for each subject. We show the results in Figure 8, which demonstrates\nthat GPT-3 is uncalibrated. In fact, its confidence is only weakly related to its actual accuracy in\nthe zero-shot setting, with the difference between its accuracy and confidence reaching up to 24%\nfor some subjects. Another calibration measure is the Root Mean Squared (RMS) ', 'a commonsense benchmark (Hel-\nlaSwag), a linguistic understanding benchmark (Super-\nGLUE), and the massive multitask test. On previous\nbenchmarks, smaller models start well above random\nchance levels and exhibit more continuous improve-\nments with model size increases, but on our test, GPT-3\nmoves beyond random chance with the largest model.\nspecialized areas like law and ethics (Hendrycks et al., 2020). The granularity and breadth of the\nsubjects makes the benchmark ideal for identifying a model’s blind spots.\nWe find that meaningful progress on our benchmark has only become possible in recent months. In\nparticular, few-shot models up to 13billion parameters (Brown et al., 2020) achieve random chance\nperformance of 25% accuracy, but the 175billion parameter GPT-3 model reaches a much higher\n43.9%accuracy (see Figure 1b). On the other hand, unlike human professionals GPT-3 does not\nexcel at any single subject. Instead, ', 'Published as a conference paper at ICLR 2021\nMEASURING MASSIVE MULTITASK\nLANGUAGE UNDERSTANDING\nDan Hendrycks\nUC BerkeleyCollin Burns\nColumbia UniversitySteven Basart\nUChicagoAndy Zou\nUC Berkeley\nMantas Mazeika\nUIUCDawn Song\nUC BerkeleyJacob Steinhardt\nUC Berkeley\nABSTRACT\nWe propose a new test to measure a text model’s multitask accuracy. The test\ncovers 57 tasks including elementary mathematics, US history, computer science,\nlaw, and more. To attain high accuracy on this test, models must possess extensive\nworld knowledge and problem solving ability. We find that while most recent\nmodels have near random-chance accuracy, the very largest GPT-3 model improves\nover random chance by almost 20 percentage points on average. However, on every\none of the 57 tasks, the best models still need substantial improvements before\nthey can reach expert-level accuracy. Models also have lopsided performance\nand frequently do not know when they are wrong. Worse, they still have near-\nrandom accuracy on some ']","The calibration of models like GPT-3 is shown to be uncalibrated, as their confidence is not a good estimate of their actual accuracy on the multitask test. The confidence of GPT-3 is only weakly related to its actual accuracy in the zero-shot setting, with the difference between accuracy and confidence reaching up to 24% for some subjects.",1.0,1.0,0.7593065500259399,"Models like GPT-3 are poorly calibrated, with their confidence often not accurately reflecting their actual performance across different subjects, indicating a need for improvement in model calibration techniques.","The calibration of models like GPT-3 is uncalibrated, as its confidence is only weakly related to its actual accuracy on the multitask test, with the difference between accuracy and confidence reaching up to 24% for some subjects.",1.0,0.3333333333333333,0.7717238068580627 +"Without needing training on a separate classifier, how does DetectGPT determine if a passage was generated by an LLM?",DetectGPT uses the log probabilities computed by the model of interest and random perturbations of the passage from another generic pre-trained language model.,"['a move to the White House that included bringing along his pet German Shepherd…”\nDetectGPT\nx\n...GPT-3(1)Perturb(2) Score(3) Compare\n🤖 from GPT-3\nYes(reword with T5)\n“made a move” “moved”→“pet” “dog”→Delete “bringing along”\n...\n🤔 from other source\nNo\nFigure 1. We aim to determine whether a piece of text was gener-\nated by a particular LLM p, such as GPT-3. To classify a candidate\npassage x, DetectGPT first generates minor perturbations of the\npassage ˜xiusing a generic pre-trained model such as T5. Then\nDetectGPT compares the log probability under pof the original\nsample xwith each perturbed sample ˜xi. If the average log ratio\nis high, the sample is likely from the source model.\nWhile recent work has found that cogent-sounding LLM-\ngenerated responses are often simply wrong (Lin et al.,\n2022), the articulate nature of such generated text may still\nmake LLMs attractive for replacing human labor in ', 'DetectGPT: Zero-Shot Machine-Generated Text Detection\nusing Probability Curvature\nEric Mitchell1Yoonho Lee1Alexander Khazatsky1Christopher D. Manning1Chelsea Finn1\nAbstract\nThe increasing fluency and widespread usage of\nlarge language models (LLMs) highlight the de-\nsirability of corresponding tools aiding detection\nof LLM-generated text. In this paper, we identify\na property of the structure of an LLM’s proba-\nbility function that is useful for such detection.\nSpecifically, we demonstrate that text sampled\nfrom an LLM tends to occupy negative curva-\nture regions of the model’s log probability func-\ntion. Leveraging this observation, we then define\na new curvature-based criterion for judging if a\npassage is generated from a given LLM. This\napproach, which we call DetectGPT, does not re-\nquire training a separate classifier, collecting a\ndataset of real or generated passages, or explic-\nitly watermarking generated text. It uses only\nlog probabilities computed by the model of in-\nterest and random perturbations of ', 'performance; the results are shown\nin Figure 7. In particular, the increased discrimination power\nof DetectGPT for larger mask-filling models supports the\ninterpretation that DetectGPT is estimating the curvature\nof the log probability in a latent semantic space, rather\nthan in raw token embedding space. Larger T5 models\nbetter represent this latent space, where random directions\ncorrespond to meaningful changes in the text.\nNumber of perturbations for DetectGPT. We evaluate the\nperformance of DetectGPT as a function of the number of\nperturbations used to estimate the expectation in Equation 1\non three datasets. The results are presented in Figure 8.\nDetection accuracy continues to improve until 100 pertur-\nbations, where it converges. Evaluations use 100 examples\nfrom each dataset.\nData distributional properties. We study more closely\nthe impact of the data distribution on DetectGPT, particu-\n1 10 100 10000.60.70.80.91.0Detection AUROC\nGPT-2\nXSum\nSQuAD\nWritingPrompts\n1 10 100 1000\nGPT-J\nNumber of perturbationsFigure ']","DetectGPT determines if a passage was generated by an LLM by comparing the log probability under the model of the original sample with each perturbed sample. If the average log ratio is high, the sample is likely from the source model.",1.0,1.0,0.843525767326355,DetectGPT uses the log probabilities computed by the model of interest and random perturbations of the passage from another generic pre-trained language model.,"DetectGPT determines if a passage was generated by an LLM by comparing the log probability under the LLM of the original sample with each perturbed sample generated using a generic pre-trained model. If the average log ratio is high, the sample is likely from the source model.",1.0,0.3333333333333333,0.8362866044044495 +"What role do random perturbations play in DetectGPT's methodology, and how are they applied?","Random perturbations are used to slightly modify the text, keeping its meaning similar. DetectGPT evaluates the change in log probability caused by these perturbations to detect machine-generated text.","['8. Impact of varying the number of perturbations (samples\nof mask and mask-fill) used by DetectGPT on AUROC for GPT-2\n(left) and GPT-J ( right ) to estimate the perturbation discrepancy\non detection. Averaging up to 100 perturbations greatly increases\nDetectGPT’s reliability. Perturbations sampled from T5-large.\nlarly how the domain impacts the threshold separating the\nperturbation discrepancy distributions of model-generated\nand human texts as well as the impact of passage length on\ndetection. Figure 9 shows the perturbation discrepancy dis-\ntributions for model-generated and human texts across four\ndata distributions, using GPT-Neo-2.7B to generate sam-\nples. A threshold of slightly below 0.1 separates human and\nmodel texts across data distributions, which is important for\npractical scenarios in which a passage may be analyzed with-\nout knowing its domain a priori. Finally, Figure 10 shows an\nanalysis of DetectGPT’s performance as a function of ', 'performance; the results are shown\nin Figure 7. In particular, the increased discrimination power\nof DetectGPT for larger mask-filling models supports the\ninterpretation that DetectGPT is estimating the curvature\nof the log probability in a latent semantic space, rather\nthan in raw token embedding space. Larger T5 models\nbetter represent this latent space, where random directions\ncorrespond to meaningful changes in the text.\nNumber of perturbations for DetectGPT. We evaluate the\nperformance of DetectGPT as a function of the number of\nperturbations used to estimate the expectation in Equation 1\non three datasets. The results are presented in Figure 8.\nDetection accuracy continues to improve until 100 pertur-\nbations, where it converges. Evaluations use 100 examples\nfrom each dataset.\nData distributional properties. We study more closely\nthe impact of the data distribution on DetectGPT, particu-\n1 10 100 10000.60.70.80.91.0Detection AUROC\nGPT-2\nXSum\nSQuAD\nWritingPrompts\n1 10 100 1000\nGPT-J\nNumber of perturbationsFigure ', 'al.,\n2023) may further improve the effectiveness of methods\nsuch as DetectGPT, even as LLMs continue to improve.\nLimitations. One limitation of probability-based methods\nfor zero-shot machine-generated text detection (like Detect-\nGPT) is the white-box assumption that we can evaluate log\nprobabilities of the model(s) in question. For models be-\nhind APIs that do provide probabilities (such as GPT-3),\nevaluating probabilities nonetheless costs money. Another\nassumption of DetectGPT is access to a reasonable pertur-\nbation function. While in this work, we use off-the-shelf\nmask-filling models such as T5 and mT5 (for non-English\nlanguages), some domains may see reduced performance\nif existing mask-filling models do not well represent the\nspace of meaningful rephrases, reducing the quality of the\ncurvature estimate. While DetectGPT provides the best\navailable detection performance for PubMedQA, its drop\nin performance compared to other datasets may be a result\nAverage length0.9850.9900.995AUROC\ngpt-2\nAverage length0.960.970.980.99AUROC\nopt-2.7\nXSum\nSQuAD\nWritingPrompts\n130 140 150 ']",The random perturbations play a role in estimating the perturbation discrepancy on detection in DetectGPT's methodology. They are applied by varying the number of samples of mask and mask-fill used by DetectGPT. Averaging up to 100 perturbations greatly increases DetectGPT's reliability. These perturbations are sampled from T5-large and are used to estimate the expectation in the detection process.,0.75,1.0,0.7225701212882996,"Random perturbations are used to slightly modify the text, keeping its meaning similar. DetectGPT evaluates the change in log probability caused by these perturbations to detect machine-generated text.","Random perturbations play a crucial role in DetectGPT's methodology by generating passages that are 'nearby' the candidate passage. These random perturbations are applied by using a perturbation function q(· |x) that gives a distribution over slightly modified versions of the original passage x, with similar meaning. This function q(· |x) might involve asking a human to rewrite a passage or applying small perturbations to the original passage to create a new version.",1.0,1.0,0.807762861251831 +What specific architectural changes were made to develop DistilBERT from BERT?,"DistilBERT omits token-type embeddings and the pooler layer, and reduces the number of layers by half compared to BERT.","['DistilBERT, a distilled version of BERT: smaller,\nfaster, cheaper and lighter\nVictor SANH, Lysandre DEBUT, Julien CHAUMOND, Thomas WOLF\nHugging Face\n{victor,lysandre,julien,thomas}@huggingface.co\nAbstract\nAs Transfer Learning from large-scale pre-trained models becomes more prevalent\nin Natural Language Processing (NLP), operating these large models in on-the-\nedge and/or under constrained computational training or inference budgets remains\nchallenging. In this work, we propose a method to pre-train a smaller general-\npurpose language representation model, called DistilBERT, which can then be fine-\ntuned with good performances on a wide range of tasks like its larger counterparts.\nWhile most prior work investigated the use of distillation for building task-specific\nmodels, we leverage knowledge distillation during the pre-training phase and show\nthat it is possible to reduce the size of a BERT model by 40%, while retaining 97%\nof its language understanding capabilities and being 60% faster. To leverage ', 'BERT: Pre-training of Deep Bidirectional Transformers for\nLanguage Understanding\nJacob Devlin Ming-Wei Chang Kenton Lee Kristina Toutanova\nGoogle AI Language\n{jacobdevlin,mingweichang,kentonl,kristout }@google.com\nAbstract\nWe introduce a new language representa-\ntion model called BERT , which stands for\nBidirectional Encoder Representations from\nTransformers. Unlike recent language repre-\nsentation models (Peters et al., 2018a; Rad-\nford et al., 2018), BERT is designed to pre-\ntrain deep bidirectional representations from\nunlabeled text by jointly conditioning on both\nleft and right context in all layers. As a re-\nsult, the pre-trained BERT model can be fine-\ntuned with just one additional output layer\nto create state-of-the-art models for a wide\nrange of tasks, such as question answering and\nlanguage inference, without substantial task-\nspecific architecture modifications.\nBERT is conceptually simple and empirically\npowerful. It obtains new state-of-the-art re-\nsults on eleven natural language processing\ntasks, including pushing the GLUE score to\n80.5% (7.7% point absolute improvement),\nMultiNLI accuracy ', 'inference speed\nTo further investigate the speed-up/size trade-off of DistilBERT, we compare (in Table 3) the number\nof parameters of each model along with the inference time needed to do a full pass on the STS-\nB development set on CPU (Intel Xeon E5-2690 v3 Haswell @2.9GHz) using a batch size of 1.\nDistilBERT has 40% fewer parameters than BERT and is 60% faster than BERT.\nOn device computation We studied whether DistilBERT could be used for on-the-edge applications\nby building a mobile application for question answering. We compare the average inference time on\na recent smartphone (iPhone 7 Plus) against our previously trained question answering model based\non BERT-base. Excluding the tokenization step, DistilBERT is 71% faster than BERT, and the whole\nmodel weighs 207 MB (which could be further reduced with quantization). Our code is ']","Knowledge distillation during the pre-training phase was leveraged to reduce the size of the BERT model by 40%, while retaining 97% of its language understanding capabilities and being 60% faster.",1.0,0.5,0.38079750537872314,"DistilBERT omits token-type embeddings and the pooler layer, and reduces the number of layers by half compared to BERT.","DistilBERT was developed by leveraging knowledge distillation during the pre-training phase, which allowed for reducing the size of the BERT model by 40% while retaining 97% of its language understanding capabilities. Additionally, DistilBERT was trained on very large batches using dynamic masking and without the next sentence prediction objective.",0.5714285714285714,1.0,0.6470428705215454 +What core challenge does HellaSwag aim to address in the context of state-of-the-art models' capabilities in commonsense natural language inference (NLI)?,"HellaSwag challenges the notion that state-of-the-art models have solved commonsense NLI by introducing adversarially crafted dataset examples that are easy for humans but difficult for machines, highlighting the gap in commonsense reasoning.","['How well do deep pretrained models, like\n1A New York Times article at https: //nyti.ms /2DycutY.\n1arXiv:1905.07830v1 [cs.CL] 19 May 2019\x0cBERT, perform at commonsense natural language\ninference (NLI)? Our surprising conclusion is\nthat the underlying task remains unsolved. In-\ndeed, we find that deep models such as BERT do\nnot demonstrate robust commonsense reasonining\nability by themselves. Instead, they operate more\nlikerapid surface learners for a particular dataset.\nTheir strong performance on SWAG is dependent\non the finetuning process, wherein they largely\nlearn to pick up on dataset-specific distributional\nbiases. When the distribution of language shifts\nslightly, performance drops drastically – even if\nthe domain remains identical.\nWe study this question by introducing Hella-\nSwag ,2a new benchmark for commonsense\nNLI. We use Adversarial Filtering (AF), a data-\ncollection paradigm in which a series of discrim-\ninators is used to select a challenging set of gen-\nerated wrong ', 'there has been notable progress across many natural language processing (NLP) tasks, led\nby methods such as ELMo (Peters et al., 2018), OpenAI GPT (Radford et al., 2018), and BERT\n(Devlin et al., 2019). The unifying theme of these methods is that they couple self-supervised learning\nfrom massive unlabelled text corpora with effective adapting of the resulting model to target tasks.\nThe tasks that have proven amenable to this general approach include question answering, textual\nentailment, and parsing, among many others (Devlin et al., 2019; Kitaev et al., 2019, i.a.).\nIn this context, the GLUE benchmark (Wang et al., 2019a) has become a prominent evaluation\nframework for research towards general-purpose language understanding technologies. GLUE is\na collection of nine language understanding tasks built on existing public datasets, together with\nprivate test data, an evaluation server, a single-number ', 'both sentences, the task is to determine whether the word is used with the same sense in\nboth sentences. Sentences are drawn from WordNet (Miller, 1995), VerbNet (Schuler, 2005), and\nWiktionary. We follow the original work and evaluate using accuracy.\nWSC (Winograd Schema Challenge, Levesque et al., 2012) is a coreference resolution task in\nwhich examples consist of a sentence with a pronoun and a list of noun phrases from the sentence.\nThe system must determine the correct referrent of the pronoun from among the provided choices.\nWinograd schemas are designed to require everyday knowledge and commonsense reasoning to solve.\nGLUE includes a version of WSC recast as NLI, known as WNLI. Until very recently, no substantial\nprogress had been made on WNLI, with many submissions opting to submit majority class predic-\ntions.2In the past few months, ']","HellaSwag aims to address the core challenge of robust commonsense reasoning ability in deep pretrained models by testing their performance on a challenging set of generated wrong answers, using Adversarial Filtering (AF) as a data-collection paradigm.",1.0,0.5,0.6106338500976562,"HellaSwag challenges the notion that state-of-the-art models have solved commonsense NLI by introducing adversarially crafted dataset examples that are easy for humans but difficult for machines, highlighting the gap in commonsense reasoning.","HellaSwag aims to address the challenge of robust commonsense reasoning ability in deep pretrained models such as BERT, which have been found to operate more like rapid surface learners dependent on dataset-specific biases rather than demonstrating true commonsense reasoning ability.",1.0,0.5,0.6637831330299377 +"How does the dynamic masking strategy in RoBERTa differ from BERT's static masking, and what advantage does it offer?","Unlike BERT's static masking, which applies the same mask to each training instance throughout epochs, RoBERTa employs dynamic masking, where the mask is generated anew for each instance every time it is fed into the model. This strategy ensures a richer variety of training examples and prevents the model from overfitting to a specific masked pattern.","['RoBERTa is trained with dynamic mask-\ning (Section 4.1),FULL -SENTENCES without NSP\nloss (Section 4.2), large mini-batches (Section 4.3)\nand a larger byte-level BPE (Section 4.4).\nAdditionally, we investigate two other impor-\ntant factors that have been under-emphasized in\nprevious work: (1) the data used for pretraining,\nand (2) the number of training passes through the\ndata. For example, the recently proposed XLNet\narchitecture ( Yang et al. ,2019 ) is pretrained us-\ning nearly 10 times more data than the original\nBERT ( Devlin et al. ,2019 ). It is also trained with\na batch size eight times larger for half as many op-\ntimization steps, thus seeing four times as many\nsequences in pretraining compared to BERT.\nTo help disentangle the importance of these fac-\ntors from other modeling choices (e.g., the pre-\ntraining objective), we begin by training RoBERTa\nfollowing the BERT LARGE ', 'a deep bidirectional Transformer. In addi-\ntion to the masked language model, we also use\na “next sentence prediction” task that jointly pre-\ntrains text-pair representations. The contributions\nof our paper are as follows:\n• We demonstrate the importance of bidirectional\npre-training for language representations. Un-\nlike Radford et al. (2018), which uses unidirec-\ntional language models for pre-training, BERT\nuses masked language models to enable pre-\ntrained deep bidirectional representations. This\nis also in contrast to Peters et al. (2018a), which\nuses a shallow concatenation of independently\ntrained left-to-right and right-to-left LMs.\n• We show that pre-trained representations reduce\nthe need for many heavily-engineered task-\nspecific architectures. BERT is the first fine-\ntuning based representation model that achieves\nstate-of-the-art performance on a large suite\nof sentence-level andtoken-level tasks, outper-\nforming many task-specific architectures.\n• BERT advances the state of the art for eleven\nNLP tasks. The code and pre-trained ', 'DistilBERT, a distilled version of BERT: smaller,\nfaster, cheaper and lighter\nVictor SANH, Lysandre DEBUT, Julien CHAUMOND, Thomas WOLF\nHugging Face\n{victor,lysandre,julien,thomas}@huggingface.co\nAbstract\nAs Transfer Learning from large-scale pre-trained models becomes more prevalent\nin Natural Language Processing (NLP), operating these large models in on-the-\nedge and/or under constrained computational training or inference budgets remains\nchallenging. In this work, we propose a method to pre-train a smaller general-\npurpose language representation model, called DistilBERT, which can then be fine-\ntuned with good performances on a wide range of tasks like its larger counterparts.\nWhile most prior work investigated the use of distillation for building task-specific\nmodels, we leverage knowledge distillation during the pre-training phase and show\nthat it is possible to reduce the size of a BERT model by 40%, while retaining 97%\nof its language understanding capabilities and being 60% faster. To leverage ']","RoBERTa is trained with dynamic masking, where the masking of tokens changes dynamically during training, unlike BERT's static masking. This allows RoBERTa to see all tokens during training, leading to improved performance as the model is able to learn more from each token.",0.5,1.0,0.8392252922058105,"Unlike BERT's static masking, which applies the same mask to each training instance throughout epochs, RoBERTa employs dynamic masking, where the mask is generated anew for each instance every time it is fed into the model. This strategy ensures a richer variety of training examples and prevents the model from overfitting to a specific masked pattern.","RoBERTa is trained with dynamic masking, where a random sample of the tokens in the input sequence is selected and replaced with the special token [MASK]. This differs from BERT's static masking, where the same tokens are masked throughout the training process. The advantage of dynamic masking is that it allows for more varied and unpredictable masking patterns, which helps the model generalize better to different contexts and improves its robustness.",1.0,1.0,0.878290593624115 +"How does the performance of RoBERTa compare to BERT and other models on the GLUE benchmark, particularly in terms of state-of-the-art achievements?","RoBERTa surpasses BERT and establishes new state-of-the-art performance on the GLUE benchmark, particularly excelling in tasks like MNLI, QNLI, RTE, and STS-B. This achievement underscores the effectiveness of RoBERTa's optimizations and its capacity to better understand and process natural language.","['accumulates improvements from the row s above. RoBERTa\nmatches the architecture and training objective of BERT LARGE . Results for BERT LARGE and XLNet LARGE are from\nDevlin et al. (2019 ) and Yang et al. (2019 ), respectively. Complete results on all GLUE tasks can be fo und in the\nAppendix.\nDevlin et al. (2019 ). We pretrain our model using\n1024 V100 GPUs for approximately one day.\nResults We present our results in Table 4. When\ncontrolling for training data, we observe that\nRoBERTa provides a large improvement over the\noriginally reported BERT LARGE results, reaffirming\nthe importance of the design choices we explored\nin Section 4.\nNext, we combine this data with the three ad-\nditional datasets described in Section 3.2. We\ntrain RoBERTa over the combined data with the\nsame number of training steps as before (100K).\nIn total, we ', '91.1 80.8 86.3\nMT-DNN 87.9/87.4 96.0 89.9 86.3 96.5 92.7 68.4 91.1 89.0 87.6\nXLNet 90.2/89.8 98.6 90.3 86.3 96.8 93.0 67.8 91.6 90.4 88.4\nRoBERTa 90.8/90.2 98.9 90.2 88.2 96.7 92.3 67.8 92.2 89.0 88.5\nTable 5: Results on GLUE. All results are based on a 24-layer a rchitecture. BERT LARGE and XLNet LARGE results\nare from Devlin et al. (2019 ) and Yang et al. (2019 ), respectively. RoBERTa results on the development set are a\nmedian over five runs. RoBERTa results on the test set are ense mbles of single-task models. For RTE, STS and\nMRPC we finetune starting from the MNLI model instead of the ba seline pretrained model. Averages are obtained\nfrom the GLUE leaderboard.\nTask-specific modifications Two of the GLUE\ntasks require task-specific finetuning approaches\nto achieve competitive leaderboard results.\nQNLI : Recent submissions ', 'data.\nOur single RoBERTa model outperforms all but\none of the single model submissions, and is the\ntop scoring system among those that do not rely\non data augmentation.\n5.3 RACE Results\nIn RACE, systems are provided with a passage of\ntext, an associated question, and four candidate an-\nswers. Systems are required to classify which of\nthe four candidate answers is correct.\nWe modify RoBERTa for this task by concate-Model Accuracy Middle High\nSingle models on test (as of July 25, 2019)\nBERT LARGE 72.0 76.6 70.1\nXLNet LARGE 81.7 85.4 80.2\nRoBERTa 83.2 86.5 81.3\nTable 7: Results on the RACE test set. BERT LARGE and\nXLNet LARGE results are from Yang et al. (2019 ).\nnating each candidate answer with the correspond-\ning question and passage. We then encode each of\nthese four sequences and pass the resulting [CLS]\nrepresentations through a fully-connected layer,\nwhich is ']","RoBERTa performs better than BERT and other models on the GLUE benchmark, showcasing state-of-the-art achievements in terms of performance improvements.",1.0,1.0,0.8539428114891052,"RoBERTa surpasses BERT and establishes new state-of-the-art performance on the GLUE benchmark, particularly excelling in tasks like MNLI, QNLI, RTE, and STS-B. This achievement underscores the effectiveness of RoBERTa's optimizations and its capacity to better understand and process natural language.","RoBERTa outperforms BERT and other models on the GLUE benchmark, achieving new state-of-the-art results with a score of 80.5%, which is a 7.7% absolute improvement.",1.0,1.0,0.8624818325042725 +How should future language model benchmarks be structured to ensure a holistic assessment of models' capabilities and knowledge breadth?,"Future benchmarks should integrate a broader spectrum of subjects and cognitive skills, emphasizing the inclusion of tasks that test models' ethical reasoning, understanding of human values, and ability to perform complex problem-solving, beyond the mere scale of data and parameters.","['learning model\nusage should be developed for guiding users to learn ‘Dos’\nand Dont’ in AI. Detailed policies could also be proposed\nto list all user’s responsibilities before the model access.\nC. Language Models Beyond ChatGPT\nThe examination of ethical implications associated with\nlanguage models necessitates a comprehensive examina-\ntion of the broader challenges that arise within the domain\x0cof language models, in light of recent advancements in\nthe field of artificial intelligence. The last decade has seen\na rapid evolution of AI techniques, characterized by an\nexponential increase in the size and complexity of AI\nmodels, and a concomitant scale-up of model parameters.\nThe scaling laws that govern the development of language\nmodels,asdocumentedinrecentliterature[84,85],suggest\nthatwecanexpecttoencounterevenmoreexpansivemod-\nels that incorporate multiple modalities in the near future.\nEfforts to integrate multiple modalities into a single model\nare driven by the ultimate goal of realizing the concept of\nfoundation models [86]. ', 'language models are\nat learning and applying knowledge from many domains.\nTo bridge the gap between the wide-ranging knowledge that models see during pretraining and the\nexisting measures of success, we introduce a new benchmark for assessing models across a diverse\nset of subjects that humans learn. We design the benchmark to measure knowledge acquired during\npretraining by evaluating models exclusively in zero-shot and few-shot settings. This makes the\nbenchmark more challenging and more similar to how we evaluate humans. The benchmark covers\n57subjects across STEM, the humanities, the social sciences, and more. It ranges in difficulty from\nan elementary level to an advanced professional level, and it tests both world knowledge and problem\nsolving ability. Subjects range from traditional areas, such as mathematics and history, to more\n1arXiv:2009.03300v3 [cs.CY] 12 Jan 2021\x0cPublished as a conference paper at ', 'a\nlack of access to the benefits of these models for people\nwho speak different languages and can lead to biased or\nunfairpredictionsaboutthosegroups[14,15].Toovercome\nthis, it is crucial to ensure that the training data contains\na substantial proportion of diverse, high-quality corpora\nfrom various languages and cultures.\nb) Robustness: Another major ethical consideration\nin the design and implementation of language models is\ntheir robustness. Robustness refers to a model’s ability\nto maintain its performance when given input that is\nsemantically or syntactically different from the input it\nwas trained on.\nSemantic Perturbation: Semantic perturbation is a type\nof input that can cause a language model to fail [40, 41].\nThis input has different syntax but is semantically similar\nto the input used for training the model. To address this,\nit is crucial to ensure that the training data is diverse and\nrepresentative of the population it will ']","Future language model benchmarks should be structured to measure knowledge acquired during pretraining, evaluating models exclusively in zero-shot and few-shot settings across a diverse set of subjects. This will make the benchmark more challenging and similar to how we evaluate humans, covering a wide range of subjects from STEM, humanities, social sciences, and more, ranging from elementary to advanced professional levels, testing both world knowledge and problem-solving ability. Additionally, ensuring that the training data contains a substantial proportion of diverse, high-quality corpora from various languages and cultures can help address ethical considerations such as biased or unfair predictions about different language groups and ensure robustness in model performance when faced with semantically or syntactically different input.",1.0,0.0,0.6025552153587341,"Future benchmarks should integrate a broader spectrum of subjects and cognitive skills, emphasizing the inclusion of tasks that test models' ethical reasoning, understanding of human values, and ability to perform complex problem-solving, beyond the mere scale of data and parameters.","Future language model benchmarks should be structured to measure knowledge acquired during pretraining by evaluating models exclusively in zero-shot and few-shot settings across a diverse set of subjects that humans learn. The benchmarks should cover a wide range of topics across STEM, the humanities, the social sciences, and more, ranging in difficulty from elementary to advanced professional levels. Additionally, the benchmarks should test both world knowledge and problem-solving ability, similar to how humans are evaluated. This approach would make the benchmarks more challenging and provide a more holistic assessment of the models' capabilities and knowledge breadth.",1.0,1.0,0.6688027381896973 +How does DetectGPT's approach to machine-generated text detection differ from previous zero-shot methods?,"Previous zero-shot methods mainly relied on evaluating the average per-token log probability of generated text, whereas DetectGPT assesses the local structure around a passage through log probability curvature.","['DetectGPT: Zero-Shot Machine-Generated Text Detection\nusing Probability Curvature\nEric Mitchell1Yoonho Lee1Alexander Khazatsky1Christopher D. Manning1Chelsea Finn1\nAbstract\nThe increasing fluency and widespread usage of\nlarge language models (LLMs) highlight the de-\nsirability of corresponding tools aiding detection\nof LLM-generated text. In this paper, we identify\na property of the structure of an LLM’s proba-\nbility function that is useful for such detection.\nSpecifically, we demonstrate that text sampled\nfrom an LLM tends to occupy negative curva-\nture regions of the model’s log probability func-\ntion. Leveraging this observation, we then define\na new curvature-based criterion for judging if a\npassage is generated from a given LLM. This\napproach, which we call DetectGPT, does not re-\nquire training a separate classifier, collecting a\ndataset of real or generated passages, or explic-\nitly watermarking generated text. It uses only\nlog probabilities computed by the model of in-\nterest and random perturbations of ', 'al.,\n2023) may further improve the effectiveness of methods\nsuch as DetectGPT, even as LLMs continue to improve.\nLimitations. One limitation of probability-based methods\nfor zero-shot machine-generated text detection (like Detect-\nGPT) is the white-box assumption that we can evaluate log\nprobabilities of the model(s) in question. For models be-\nhind APIs that do provide probabilities (such as GPT-3),\nevaluating probabilities nonetheless costs money. Another\nassumption of DetectGPT is access to a reasonable pertur-\nbation function. While in this work, we use off-the-shelf\nmask-filling models such as T5 and mT5 (for non-English\nlanguages), some domains may see reduced performance\nif existing mask-filling models do not well represent the\nspace of meaningful rephrases, reducing the quality of the\ncurvature estimate. While DetectGPT provides the best\navailable detection performance for PubMedQA, its drop\nin performance compared to other datasets may be a result\nAverage length0.9850.9900.995AUROC\ngpt-2\nAverage length0.960.970.980.99AUROC\nopt-2.7\nXSum\nSQuAD\nWritingPrompts\n130 140 150 ', 'and WritingPrompts datasets\nand compare the two pre-trained RoBERTa-based detector\nmodels with DetectGPT and the probability thresholding\nbaseline. We show in Table 2 that DetectGPT can provide\ndetection competitive with or better than the stronger of the\ntwo supervised models, and it again greatly outperforms\nprobability thresholding on average.\n5.2. Variants of Machine-Generated Text Detection\nDetecting paraphrased machine-generated text. In prac-\ntice, humans may manually edit or refine machine-generated\ntext rather than blindly use a model’s generations for their\ntask of interest. We therefore conduct an experiment to\nsimulate the detection problem for model samples that have\nbeen increasingly heavily revised. We simulate human re-\nvision by replacing 5 word spans of the text with samples\nfrom T5-3B until r% of the text has been replaced, and\nreport performance as rvaries. Figure 5 shows that De-\ntectGPT maintains detection AUROC above 0.8 even when\nnearly a ']","DetectGPT's approach to machine-generated text detection differs from previous zero-shot methods by leveraging a new curvature-based criterion for judging if a passage is generated from a given LLM, without requiring training a separate classifier, collecting a dataset of real or generated passages, or explicitly watermarking generated text.",1.0,1.0,0.7746951580047607,"Previous zero-shot methods mainly relied on evaluating the average per-token log probability of generated text, whereas DetectGPT assesses the local structure around a passage through log probability curvature.","DetectGPT's approach to machine-generated text detection differs from previous zero-shot methods by leveraging the observation that text sampled from a large language model tends to occupy negative curvature regions of the model's log probability function. This allows DetectGPT to define a new curvature-based criterion for judging if a passage is generated from a given LLM without requiring training a separate classifier, collecting a dataset of real or generated passages, or explicitly watermarking generated text.",1.0,0.3333333333333333,0.8138202428817749 +"What percentage of BERT's language understanding capabilities does DistilBERT retain, and what is the size reduction achieved?",DistilBERT retains 97% of BERT's language understanding capabilities with a 40% reduction in size.,"['DistilBERT, a distilled version of BERT: smaller,\nfaster, cheaper and lighter\nVictor SANH, Lysandre DEBUT, Julien CHAUMOND, Thomas WOLF\nHugging Face\n{victor,lysandre,julien,thomas}@huggingface.co\nAbstract\nAs Transfer Learning from large-scale pre-trained models becomes more prevalent\nin Natural Language Processing (NLP), operating these large models in on-the-\nedge and/or under constrained computational training or inference budgets remains\nchallenging. In this work, we propose a method to pre-train a smaller general-\npurpose language representation model, called DistilBERT, which can then be fine-\ntuned with good performances on a wide range of tasks like its larger counterparts.\nWhile most prior work investigated the use of distillation for building task-specific\nmodels, we leverage knowledge distillation during the pre-training phase and show\nthat it is possible to reduce the size of a BERT model by 40%, while retaining 97%\nof its language understanding capabilities and being 60% faster. To leverage ', 'inference speed\nTo further investigate the speed-up/size trade-off of DistilBERT, we compare (in Table 3) the number\nof parameters of each model along with the inference time needed to do a full pass on the STS-\nB development set on CPU (Intel Xeon E5-2690 v3 Haswell @2.9GHz) using a batch size of 1.\nDistilBERT has 40% fewer parameters than BERT and is 60% faster than BERT.\nOn device computation We studied whether DistilBERT could be used for on-the-edge applications\nby building a mobile application for question answering. We compare the average inference time on\na recent smartphone (iPhone 7 Plus) against our previously trained question answering model based\non BERT-base. Excluding the tokenization step, DistilBERT is 71% faster than BERT, and the whole\nmodel weighs 207 MB (which could be further reduced with quantization). Our code is ', 'BERT: Pre-training of Deep Bidirectional Transformers for\nLanguage Understanding\nJacob Devlin Ming-Wei Chang Kenton Lee Kristina Toutanova\nGoogle AI Language\n{jacobdevlin,mingweichang,kentonl,kristout }@google.com\nAbstract\nWe introduce a new language representa-\ntion model called BERT , which stands for\nBidirectional Encoder Representations from\nTransformers. Unlike recent language repre-\nsentation models (Peters et al., 2018a; Rad-\nford et al., 2018), BERT is designed to pre-\ntrain deep bidirectional representations from\nunlabeled text by jointly conditioning on both\nleft and right context in all layers. As a re-\nsult, the pre-trained BERT model can be fine-\ntuned with just one additional output layer\nto create state-of-the-art models for a wide\nrange of tasks, such as question answering and\nlanguage inference, without substantial task-\nspecific architecture modifications.\nBERT is conceptually simple and empirically\npowerful. It obtains new state-of-the-art re-\nsults on eleven natural language processing\ntasks, including pushing the GLUE score to\n80.5% (7.7% point absolute improvement),\nMultiNLI accuracy ']",DistilBERT retains 97% of BERT's language understanding capabilities and achieves a size reduction of 40%.,1.0,0.6666666666666666,0.9848236441612244,DistilBERT retains 97% of BERT's language understanding capabilities with a 40% reduction in size.,DistilBERT retains 97% of BERT's language understanding capabilities and achieves a size reduction of 40%.,1.0,0.6666666666666666,0.9848236441612244 +"What datasets and computational resources were used to train DistilBERT, and how do they compare to the original BERT training setup?","DistilBERT was trained on the same English Wikipedia and Toronto Book Corpus as BERT, using 8 NVIDIA V100 GPUs for approximately 90 hours, showcasing efficient resource use compared to larger models.","['DistilBERT, a distilled version of BERT: smaller,\nfaster, cheaper and lighter\nVictor SANH, Lysandre DEBUT, Julien CHAUMOND, Thomas WOLF\nHugging Face\n{victor,lysandre,julien,thomas}@huggingface.co\nAbstract\nAs Transfer Learning from large-scale pre-trained models becomes more prevalent\nin Natural Language Processing (NLP), operating these large models in on-the-\nedge and/or under constrained computational training or inference budgets remains\nchallenging. In this work, we propose a method to pre-train a smaller general-\npurpose language representation model, called DistilBERT, which can then be fine-\ntuned with good performances on a wide range of tasks like its larger counterparts.\nWhile most prior work investigated the use of distillation for building task-specific\nmodels, we leverage knowledge distillation during the pre-training phase and show\nthat it is possible to reduce the size of a BERT model by 40%, while retaining 97%\nof its language understanding capabilities and being 60% faster. To leverage ', 'inference speed\nTo further investigate the speed-up/size trade-off of DistilBERT, we compare (in Table 3) the number\nof parameters of each model along with the inference time needed to do a full pass on the STS-\nB development set on CPU (Intel Xeon E5-2690 v3 Haswell @2.9GHz) using a batch size of 1.\nDistilBERT has 40% fewer parameters than BERT and is 60% faster than BERT.\nOn device computation We studied whether DistilBERT could be used for on-the-edge applications\nby building a mobile application for question answering. We compare the average inference time on\na recent smartphone (iPhone 7 Plus) against our previously trained question answering model based\non BERT-base. Excluding the tokenization step, DistilBERT is 71% faster than BERT, and the whole\nmodel weighs 207 MB (which could be further reduced with quantization). Our code is ', 'BERT model:\na concatenation of English Wikipedia and Toronto Book Corpus [Zhu et al., 2015]. DistilBERT\nwas trained on 8 16GB V100 GPUs for approximately 90 hours. For the sake of comparison, the\nRoBERTa model [Liu et al., 2019] required 1 day of training on 1024 32GB V100.\n4 Experiments\nGeneral Language Understanding We assess the language understanding and generalization ca-\npabilities of DistilBERT on the General Language Understanding Evaluation (GLUE) benchmark\n[Wang et al., 2018], a collection of 9 datasets for evaluating natural language understanding systems.\nWe report scores on the development sets for each task by fine-tuning DistilBERT without the use\nof ensembling or multi-tasking scheme for fine-tuning (which are mostly orthogonal to the present\nwork). We compare the results to the baseline provided by the authors of GLUE: an ELMo (Peters\net al. [2018]) encoder followed ']","DistilBERT was trained on 8 16GB V100 GPUs for approximately 90 hours. In comparison, the original BERT model required 1 day of training on 1024 32GB V100 GPUs.",1.0,1.0,0.8871505260467529,"DistilBERT was trained on the same English Wikipedia and Toronto Book Corpus as BERT, using 8 NVIDIA V100 GPUs for approximately 90 hours, showcasing efficient resource use compared to larger models.","The DistilBERT model was pre-trained using knowledge distillation during the pre-training phase. It was trained with fewer parameters compared to the original BERT model, resulting in a size reduction of 40%. Additionally, DistilBERT was pre-trained using a smaller general-purpose language representation model to achieve good performance on a wide range of tasks, similar to its larger counterparts.",1.0,1.0,0.6966451406478882 +"What findings were revealed about model performance on HellaSwag when evaluated in zero-shot scenarios, and what implications does this have for future model development?","Models showed a decline in performance in zero-shot scenarios, highlighting the need for future models to improve generalization capabilities and adapt to unseen contexts or domains.","['one\ndataset and evaluated on the other.11\nThe best models are trained on the same\ndataset that they are evaluated on: training on\nSWAG and evaluating on HellaSwag lowers per-\nformance by 12%; vice versa lowers performance\nby 15%. The missing domain for HellaSwag mod-\nels is movie descriptions (LSMDC), still, Hella-\nSwag models obtain 69% accuracy. On the other\nhand, SWAG models do not generalize at all to\ntheir missing domain, WikiHow (28%), suggest-\ning that learning general commonsense reasoning\n11Note that the ActivityNet splits are di fferent for each\ndataset. To avoid skewing the results, we report only on\nthe validation video captions that are not in the training sets\nof either dataset. The overall accuracy is then a weighted\naverage, where ActivityNet examples are weighted propor-\ntionately more. This gives a slight advantage to training on\nSWAG, as it sees all the ActivityNet ', 'GPT-3 175B and even PaLM 540B (4 ×larger) in\nzero-shot setting. This is probably owing to GLM-130B’s bidirectional context attention and MIP,\nwhich has been proved to improve zero-shot results in unseen tasks (Wei et al., 2022a; Sanh et al.,\n2022). As the number of shots increases, GLM-130B’s performance keeps going up, maintaining its\noutperformance over GPT-3 (Cf. Appendix C.5 and Table 14 for details on each model and task).\nLimitations and Discussions. In the experiments above, we observe that GLM-130B’s performance\ngrowth (13.31 to 15.12) with the increase of few-shot samples is not as significant as GPT-3’s (4.35\nto 13.18). Here is our intuitive attempt to understand the phenomenon.\nFirst, the bidirectional nature of GLM-130B could lead to strong zero-shot performance (as is indi-\ncated in zero-shot language modeling), thus getting closer to the few-shot “upper-bound” ', 'and after AF is used. We also evaluate each al-\nternative dataset using BERT-Large. The results sug-\ngest that using a a stronger model at test time (over the\nmodel used for AF) improves performance, but is not\nenough to solve the task.\n6 Discussion\nOur results suggest that HellaSwag is a challenging\ntestbed for state-of-the-art NLI models, even those\nbuilt on extensive pretraining. The question still\nremains, though, of where will the field go next?\n6.1 How easy might HellaSwag be for future\ndiscriminators?\nIn this paper, we showed the existence of a\nGoldilocks zone of text complexity – in which\ngenerations are nonsensical, but existing state-\nof-the-art NLP models cannot tell the di fference.\nHow hard will the dataset be for future, even more\npowerful, models?\nAnswering this question is challenging because\nthese models don’t exist (or are unavailable) at\nthe time of writing . However, ']","The findings revealed that model performance on HellaSwag in zero-shot scenarios showed that GLM-130B performed better than GPT-3. This suggests that the bidirectional nature of GLM-130B and its ability to improve zero-shot results in unseen tasks have implications for future model development. It indicates that incorporating bidirectional context attention and other techniques like MIP can enhance model performance in zero-shot settings, potentially leading to the development of even more powerful models in the future.",1.0,1.0,0.5499686002731323,"Models showed a decline in performance in zero-shot scenarios, highlighting the need for future models to improve generalization capabilities and adapt to unseen contexts or domains.","The findings revealed that model performance on HellaSwag in zero-shot scenarios was low, with models achieving an accuracy of only 25%. This suggests that current models struggle to perform well on tasks requiring common sense reasoning without specific training data. This implies that future model development should focus on improving zero-shot capabilities to enhance performance on tasks that require broader knowledge and understanding.",1.0,1.0,0.7045235633850098 +Describe RoBERTa's approach to training with large mini-batches and its effect on model optimization and performance.,"RoBERTa's training with large mini-batches, significantly larger than those used for BERT, improves both optimization speed and end-task performance. This approach, facilitated by appropriate scaling of the learning rate, enables more efficient training and better generalization across tasks.","['RoBERTa is trained with dynamic mask-\ning (Section 4.1),FULL -SENTENCES without NSP\nloss (Section 4.2), large mini-batches (Section 4.3)\nand a larger byte-level BPE (Section 4.4).\nAdditionally, we investigate two other impor-\ntant factors that have been under-emphasized in\nprevious work: (1) the data used for pretraining,\nand (2) the number of training passes through the\ndata. For example, the recently proposed XLNet\narchitecture ( Yang et al. ,2019 ) is pretrained us-\ning nearly 10 times more data than the original\nBERT ( Devlin et al. ,2019 ). It is also trained with\na batch size eight times larger for half as many op-\ntimization steps, thus seeing four times as many\nsequences in pretraining compared to BERT.\nTo help disentangle the importance of these fac-\ntors from other modeling choices (e.g., the pre-\ntraining objective), we begin by training RoBERTa\nfollowing the BERT LARGE ', 'which includes a\ncareful evaluation of the effects of hyperparmeter\ntuning and training set size. We find that BERT\nwas significantly undertrained and propose an im-\nproved recipe for training BERT models, which\nwe call RoBERTa, that can match or exceed the\nperformance of all of the post-BERT methods.\nOur modifications are simple, they include: (1)\ntraining the model longer, with bigger batches,\nover more data; (2) removing the next sentence\nprediction objective; (3) training on longer se-\nquences; and (4) dynamically changing the mask-\ning pattern applied to the training data. We also\ncollect a large new dataset (CC-N EWS) of compa-\nrable size to other privately used datasets, to better\ncontrol for training set size effects.\nWhen controlling for training data, our im-\nproved training procedure improves upon the pub-\nlished BERT results on both GLUE and SQuAD.\nWhen trained for longer over additional data, ', 'accumulates improvements from the row s above. RoBERTa\nmatches the architecture and training objective of BERT LARGE . Results for BERT LARGE and XLNet LARGE are from\nDevlin et al. (2019 ) and Yang et al. (2019 ), respectively. Complete results on all GLUE tasks can be fo und in the\nAppendix.\nDevlin et al. (2019 ). We pretrain our model using\n1024 V100 GPUs for approximately one day.\nResults We present our results in Table 4. When\ncontrolling for training data, we observe that\nRoBERTa provides a large improvement over the\noriginally reported BERT LARGE results, reaffirming\nthe importance of the design choices we explored\nin Section 4.\nNext, we combine this data with the three ad-\nditional datasets described in Section 3.2. We\ntrain RoBERTa over the combined data with the\nsame number of training steps as before (100K).\nIn total, we ']","RoBERTa's approach to training with large mini-batches involves training the model longer, with bigger batches, over more data. This allows the model to see more sequences during pretraining compared to BERT. The larger mini-batches help in optimizing the model faster and more efficiently. As a result, RoBERTa's performance matches or exceeds the performance of post-BERT methods, showing significant improvements over the originally reported BERT LARGE results.",1.0,0.3333333333333333,0.9178693294525146,"RoBERTa's training with large mini-batches, significantly larger than those used for BERT, improves both optimization speed and end-task performance. This approach, facilitated by appropriate scaling of the learning rate, enables more efficient training and better generalization across tasks.","RoBERTa utilizes large mini-batches during training, which allows it to see four times as many sequences in pretraining compared to BERT. This approach to training with large mini-batches improves model optimization by enabling the model to train on more data simultaneously, leading to better generalization and performance on tasks like GLUE and SQuAD compared to the original BERT model.",0.8333333333333334,1.0,0.9152030348777771 +What findings does RoBERTa reveal about the efficacy of masked language model (MLM) pretraining under its optimized design choices?,"RoBERTa's findings reaffirm the efficacy of MLM pretraining, suggesting that with optimized design choices such as dynamic masking and training on extended sequences, MLM pretraining remains highly competitive with, or superior to, other recently proposed pretraining objectives.","['5x160 with ReLU\nConvolution 1x128Convolution 15x256 with ReLUFigure 8: Siamese architecture for similarity analysis.\nMAP@R score [ 46] is computationally expensive for GMN models because an embedding has to be\ncomputed for all SPT pairs in the test set, and hence Table 14 reports results on smaller sampled test\nsets.\nDetails of MLM Experiment\nHere we show how a masked language model (MLM) can be trained with CodeNet. We closely\nfollow the approach by Ankur Singh, documented in the blog [ 48]. The goal of the model is to infer\nthe correct token for an arbitrary masked-out location in the source text. We assume that in every text,\nprecisely one token is randomly masked. The original token at such position is then the golden label.\nFrom each of the 1000 C++1000 problems, we randomly select 100 samples for ', 'the specific strategies used\nduring MLM pre-training (BERT uses 80%, 10%,\n10%). The right part of the paper represents the\nDev set results. For the feature-based approach,\nwe concatenate the last 4 layers of BERT as the\nfeatures, which was shown to be the best approach\nin Section 5.3.\nFrom the table it can be seen that fine-tuning is\nsurprisingly robust to different masking strategies.\nHowever, as expected, using only the M ASK strat-\negy was problematic when applying the feature-\nbased approach to NER. Interestingly, using only\nthe R NDstrategy performs much worse than our\nstrategy as well.', 'performs worse than the MLM model on all\ntasks, with large drops on MRPC and SQuAD.\nFor SQuAD it is intuitively clear that a LTR\nmodel will perform poorly at token predictions,\nsince the token-level hidden states have no right-\nside context. In order to make a good faith at-\ntempt at strengthening the LTR system, we added\na randomly initialized BiLSTM on top. This does\nsignificantly improve results on SQuAD, but theresults are still far worse than those of the pre-\ntrained bidirectional models. The BiLSTM hurts\nperformance on the GLUE tasks.\nWe recognize that it would also be possible to\ntrain separate LTR and RTL models and represent\neach token as the concatenation of the two mod-\nels, as ELMo does. However: (a) this is twice as\nexpensive as a single bidirectional model; (b) this\nis non-intuitive for tasks like QA, since ']","The findings from RoBERTa reveal that fine-tuning is surprisingly robust to different masking strategies. However, using only the MASK strategy was problematic when applying the feature-based approach to Named Entity Recognition (NER). Additionally, using only the RND strategy performs much worse than their strategy as well. Ultimately, the RoBERTa model performs worse than the MLM model on all tasks, with large drops on the MRPC and SQuAD datasets.",0.8,1.0,0.5311053991317749,"RoBERTa's findings reaffirm the efficacy of MLM pretraining, suggesting that with optimized design choices such as dynamic masking and training on extended sequences, MLM pretraining remains highly competitive with, or superior to, other recently proposed pretraining objectives.","RoBERTa reveals that fine-tuning is surprisingly robust to different masking strategies. However, using only the MASK strategy was problematic when applying the feature-based approach to NER. Interestingly, using only the RND strategy performs much worse than the optimized strategy as well.",1.0,0.5,0.4783112406730652 +Describe the triple loss used in DistilBERT's training and its components.,"The triple loss in DistilBERT's training combines distillation loss (Lce), masked language modeling loss (Lmlm), and cosine embedding loss (Lcos), aiming to retain performance while reducing model size.","['DistilBERT, a distilled version of BERT: smaller,\nfaster, cheaper and lighter\nVictor SANH, Lysandre DEBUT, Julien CHAUMOND, Thomas WOLF\nHugging Face\n{victor,lysandre,julien,thomas}@huggingface.co\nAbstract\nAs Transfer Learning from large-scale pre-trained models becomes more prevalent\nin Natural Language Processing (NLP), operating these large models in on-the-\nedge and/or under constrained computational training or inference budgets remains\nchallenging. In this work, we propose a method to pre-train a smaller general-\npurpose language representation model, called DistilBERT, which can then be fine-\ntuned with good performances on a wide range of tasks like its larger counterparts.\nWhile most prior work investigated the use of distillation for building task-specific\nmodels, we leverage knowledge distillation during the pre-training phase and show\nthat it is possible to reduce the size of a BERT model by 40%, while retaining 97%\nof its language understanding capabilities and being 60% faster. To leverage ', 'which includes a\ncareful evaluation of the effects of hyperparmeter\ntuning and training set size. We find that BERT\nwas significantly undertrained and propose an im-\nproved recipe for training BERT models, which\nwe call RoBERTa, that can match or exceed the\nperformance of all of the post-BERT methods.\nOur modifications are simple, they include: (1)\ntraining the model longer, with bigger batches,\nover more data; (2) removing the next sentence\nprediction objective; (3) training on longer se-\nquences; and (4) dynamically changing the mask-\ning pattern applied to the training data. We also\ncollect a large new dataset (CC-N EWS) of compa-\nrable size to other privately used datasets, to better\ncontrol for training set size effects.\nWhen controlling for training data, our im-\nproved training procedure improves upon the pub-\nlished BERT results on both GLUE and SQuAD.\nWhen trained for longer over additional data, ', 'by two BiLSTMs.4\nThe results on each of the 9 tasks are showed on Table 1 along with the macro-score (average of\nindividual scores). Among the 9 tasks, DistilBERT is always on par or improving over the ELMo\nbaseline (up to 19 points of accuracy on STS-B). DistilBERT also compares surprisingly well to\nBERT, retaining 97% of the performance with 40% fewer parameters.\n4.1 Downstream task benchmark\nDownstream tasks We further study the performances of DistilBERT on several downstream tasks\nunder efficient inference constraints: a classification task (IMDb sentiment classification - Maas et al.\n[2011]) and a question answering task (SQuAD v1.1 - Rajpurkar et al. [2016]).\nAs shown in Table 2, DistilBERT is only 0.6% point behind BERT in test accuracy on the IMDb\nbenchmark while being 40% smaller. On SQuAD, DistilBERT is within 3.9 points of ']",None,0.0,1.0,0.004610069096088409,"The triple loss in DistilBERT's training combines distillation loss (Lce), masked language modeling loss (Lmlm), and cosine embedding loss (Lcos), aiming to retain performance while reducing model size.",None,0.0,0.5,0.004610247910022736 +What advantages does DistilBERT present for on-device computations and mobile applications?,"DistilBERT's reduced size and faster inference capabilities make it highly suitable for on-device computations, demonstrated through a mobile application for question answering.","['inference speed\nTo further investigate the speed-up/size trade-off of DistilBERT, we compare (in Table 3) the number\nof parameters of each model along with the inference time needed to do a full pass on the STS-\nB development set on CPU (Intel Xeon E5-2690 v3 Haswell @2.9GHz) using a batch size of 1.\nDistilBERT has 40% fewer parameters than BERT and is 60% faster than BERT.\nOn device computation We studied whether DistilBERT could be used for on-the-edge applications\nby building a mobile application for question answering. We compare the average inference time on\na recent smartphone (iPhone 7 Plus) against our previously trained question answering model based\non BERT-base. Excluding the tokenization step, DistilBERT is 71% faster than BERT, and the whole\nmodel weighs 207 MB (which could be further reduced with quantization). Our code is ', 'DistilBERT, a distilled version of BERT: smaller,\nfaster, cheaper and lighter\nVictor SANH, Lysandre DEBUT, Julien CHAUMOND, Thomas WOLF\nHugging Face\n{victor,lysandre,julien,thomas}@huggingface.co\nAbstract\nAs Transfer Learning from large-scale pre-trained models becomes more prevalent\nin Natural Language Processing (NLP), operating these large models in on-the-\nedge and/or under constrained computational training or inference budgets remains\nchallenging. In this work, we propose a method to pre-train a smaller general-\npurpose language representation model, called DistilBERT, which can then be fine-\ntuned with good performances on a wide range of tasks like its larger counterparts.\nWhile most prior work investigated the use of distillation for building task-specific\nmodels, we leverage knowledge distillation during the pre-training phase and show\nthat it is possible to reduce the size of a BERT model by 40%, while retaining 97%\nof its language understanding capabilities and being 60% faster. To leverage ', 'by two BiLSTMs.4\nThe results on each of the 9 tasks are showed on Table 1 along with the macro-score (average of\nindividual scores). Among the 9 tasks, DistilBERT is always on par or improving over the ELMo\nbaseline (up to 19 points of accuracy on STS-B). DistilBERT also compares surprisingly well to\nBERT, retaining 97% of the performance with 40% fewer parameters.\n4.1 Downstream task benchmark\nDownstream tasks We further study the performances of DistilBERT on several downstream tasks\nunder efficient inference constraints: a classification task (IMDb sentiment classification - Maas et al.\n[2011]) and a question answering task (SQuAD v1.1 - Rajpurkar et al. [2016]).\nAs shown in Table 2, DistilBERT is only 0.6% point behind BERT in test accuracy on the IMDb\nbenchmark while being 40% smaller. On SQuAD, DistilBERT is within 3.9 points of ']","DistilBERT is 60% faster than BERT for on-device computations and mobile applications. It is also smaller in size, weighing 207 MB, which could be further reduced with quantization. It is 71% faster than BERT excluding the tokenization step.",1.0,1.0,0.773715615272522,"DistilBERT's reduced size and faster inference capabilities make it highly suitable for on-device computations, demonstrated through a mobile application for question answering.","DistilBERT presents advantages for on-device computations and mobile applications by being 60% faster than BERT and 40% smaller, making it suitable for use in resource-constrained environments. It is also shown to be 71% faster than BERT on a smartphone for question answering tasks and weighs 207 MB, which can be further reduced with quantization.",1.0,1.0,0.8332751989364624 +"In what ways does HellaSwag expand upon its predecessor, SWAG, to offer a more rigorous test of AI commonsense reasoning?","HellaSwag expands on SWAG by incorporating longer and more diverse contexts from WikiHow articles, introducing more complex and varied scenarios for testing commonsense reasoning.","['one\ndataset and evaluated on the other.11\nThe best models are trained on the same\ndataset that they are evaluated on: training on\nSWAG and evaluating on HellaSwag lowers per-\nformance by 12%; vice versa lowers performance\nby 15%. The missing domain for HellaSwag mod-\nels is movie descriptions (LSMDC), still, Hella-\nSwag models obtain 69% accuracy. On the other\nhand, SWAG models do not generalize at all to\ntheir missing domain, WikiHow (28%), suggest-\ning that learning general commonsense reasoning\n11Note that the ActivityNet splits are di fferent for each\ndataset. To avoid skewing the results, we report only on\nthe validation video captions that are not in the training sets\nof either dataset. The overall accuracy is then a weighted\naverage, where ActivityNet examples are weighted propor-\ntionately more. This gives a slight advantage to training on\nSWAG, as it sees all the ActivityNet ', 'and after AF is used. We also evaluate each al-\nternative dataset using BERT-Large. The results sug-\ngest that using a a stronger model at test time (over the\nmodel used for AF) improves performance, but is not\nenough to solve the task.\n6 Discussion\nOur results suggest that HellaSwag is a challenging\ntestbed for state-of-the-art NLI models, even those\nbuilt on extensive pretraining. The question still\nremains, though, of where will the field go next?\n6.1 How easy might HellaSwag be for future\ndiscriminators?\nIn this paper, we showed the existence of a\nGoldilocks zone of text complexity – in which\ngenerations are nonsensical, but existing state-\nof-the-art NLP models cannot tell the di fference.\nHow hard will the dataset be for future, even more\npowerful, models?\nAnswering this question is challenging because\nthese models don’t exist (or are unavailable) at\nthe time of writing . However, ', '(context, answer) pair.\nFinally, when the context is removed and the\nwords in each ending are shu ffled, performance\ndrops to 60.4%. While low, this is still higher\nthan ELMo’s performance ( ă60% from Zellers\net al., 2018). As neither context nor structure\nis needed to discriminate between human and\nmachine-written endings in a majority of cases, it\nis likely that systems primarily learn to detect dis-\ntributional stylistic patterns during finetuning.\n3.3 Where do the stylistic biases come from?\nSWAG was constructed via Adversarial Filter-\ning (AF). Endings were generated via a language\nmodel, and then selected to fool a discrimina-\ntor. To understand why it was solved requires\nunderstanding the interplay of AF with respect to\nSWAG’s generators and discriminators.\nZellers et al. (2018) used a two-layer LSTM for\ngeneration, with shallow stylistic adversarial fil-\nters.6This setup was robust against ELMo mod-\nels, but has the ']","HellaSwag expands upon its predecessor, SWAG, by introducing a more challenging testbed for state-of-the-art NLI models, even those built on extensive pretraining. It presents a Goldilocks zone of text complexity where generations are nonsensical but existing models cannot differentiate. HellaSwag requires models to not only understand common knowledge but also grasp nuanced contextual information and reasoning, making it a more rigorous test of AI commonsense reasoning.",1.0,0.6666666666666666,0.8360292315483093,"HellaSwag expands on SWAG by incorporating longer and more diverse contexts from WikiHow articles, introducing more complex and varied scenarios for testing commonsense reasoning.","The missing domain for HellaSwag models is movie descriptions (LSMDC), which is not the case for SWAG models. Additionally, HellaSwag models obtain 69% accuracy compared to SWAG models which do not generalize well to their missing domain, WikiHow (28%). These factors contribute to HellaSwag offering a more rigorous test of AI commonsense reasoning compared to SWAG.",1.0,1.0,0.567561686038971 +How does RoBERTa's use of a byte-level BPE (Byte Pair Encoding) vocabulary contribute to its model architecture and performance?,"RoBERTa adopts a byte-level BPE vocabulary, allowing for more efficient handling of large and diverse corpora without the need for additional preprocessing or tokenization. This encoding scheme supports a modest-sized vocabulary that can encode any input text, potentially enhancing the model's linguistic flexibility and performance on various NLP tasks.","['BERT: Pre-training of Deep Bidirectional Transformers for\nLanguage Understanding\nJacob Devlin Ming-Wei Chang Kenton Lee Kristina Toutanova\nGoogle AI Language\n{jacobdevlin,mingweichang,kentonl,kristout }@google.com\nAbstract\nWe introduce a new language representa-\ntion model called BERT , which stands for\nBidirectional Encoder Representations from\nTransformers. Unlike recent language repre-\nsentation models (Peters et al., 2018a; Rad-\nford et al., 2018), BERT is designed to pre-\ntrain deep bidirectional representations from\nunlabeled text by jointly conditioning on both\nleft and right context in all layers. As a re-\nsult, the pre-trained BERT model can be fine-\ntuned with just one additional output layer\nto create state-of-the-art models for a wide\nrange of tasks, such as question answering and\nlanguage inference, without substantial task-\nspecific architecture modifications.\nBERT is conceptually simple and empirically\npowerful. It obtains new state-of-the-art re-\nsults on eleven natural language processing\ntasks, including pushing the GLUE score to\n80.5% (7.7% point absolute improvement),\nMultiNLI accuracy ', '),\nwe instead consider training BERT with a larger\nbyte-level BPE vocabulary containing 50K sub-\nword units, without any additional preprocessing\nor tokenization of the input. This adds approxi-\nmately 15M and 20M additional parameters for\nBERT BASEand BERT LARGE , respectively.\nEarly experiments revealed only slight dif-\nferences between these encodings, with the\nRadford et al. (2019 ) BPE achieving slightly\nworse end-task performance on some tasks. Nev-\nertheless, we believe the advantages of a univer-\nsal encoding scheme outweighs the minor degre-\ndation in performance and use this encoding in\nthe remainder of our experiments. A more de-\ntailed comparison of these encodings is left to fu-\nture work.\n5 RoBERTa\nIn the previous section we propose modifications\nto the BERT pretraining procedure that improve\nend-task performance. We now aggregate these\nimprovements and evaluate their combined im-\npact. We call this configuration RoBERTa for\nRobustly optimized BERT approach. Specifi-\ncally, ', 'diverse cor-\npora, such as the ones considered in this work.\nRadford et al. (2019 ) introduce a clever imple-\nmentation of BPE that uses bytes instead of uni-\ncode characters as the base subword units. Using\nbytes makes it possible to learn a subword vocab-\nulary of a modest size (50K units) that can still en-\ncode any input text without introducing any “un-\nknown” tokens.\n8Large batch training can improve training efficiency even\nwithout large scale parallel hardware through gradient ac-\ncumulation , whereby gradients from multiple mini-batches\nare accumulated locally before each optimization step. Thi s\nfunctionality is supported natively in FAIRSEQ (Ott et al. ,\n2019 ).The original BERT implementa-\ntion ( Devlin et al. ,2019 ) uses a character-level\nBPE vocabulary of size 30K, which is learned\nafter preprocessing the input with heuristic tok-\nenization rules. Following Radford et al. (2019 ']","RoBERTa's use of a byte-level BPE vocabulary contributes to its model architecture and performance by allowing for the learning of a subword vocabulary of a modest size (50K units) that can encode any input text without introducing any ""unknown"" tokens. This approach improves the model's ability to handle diverse corpora and leads to robust optimization, ultimately enhancing end-task performance.",1.0,0.5,0.9311502575874329,"RoBERTa adopts a byte-level BPE vocabulary, allowing for more efficient handling of large and diverse corpora without the need for additional preprocessing or tokenization. This encoding scheme supports a modest-sized vocabulary that can encode any input text, potentially enhancing the model's linguistic flexibility and performance on various NLP tasks.","RoBERTa's use of a byte-level BPE vocabulary contributes to its model architecture and performance by allowing for a larger subword vocabulary containing 50K units without the need for additional preprocessing or tokenization of the input. This results in approximately 15M and 20M additional parameters for BERT BASE and BERT LARGE, respectively. While early experiments showed slight differences in performance compared to other encodings, the universal encoding scheme provided by byte-level BPE is believed to outweigh minor performance degradation and is used in the remainder of the experiments.",1.0,1.0,0.8115901350975037 +Discuss RoBERTa's contributions to the understanding of effective pretraining strategies in NLP.,"RoBERTa's comprehensive evaluation of pretraining strategies, including the effects of dynamic masking, data size, batch size, and the removal of the NSP objective, contributes valuable insights into the design and optimization of language models. Its success offers guidance for future model development, emphasizing the importance of these previously under-explored factors in achieving high performance in NLP tasks.","['RoBERTa is trained with dynamic mask-\ning (Section 4.1),FULL -SENTENCES without NSP\nloss (Section 4.2), large mini-batches (Section 4.3)\nand a larger byte-level BPE (Section 4.4).\nAdditionally, we investigate two other impor-\ntant factors that have been under-emphasized in\nprevious work: (1) the data used for pretraining,\nand (2) the number of training passes through the\ndata. For example, the recently proposed XLNet\narchitecture ( Yang et al. ,2019 ) is pretrained us-\ning nearly 10 times more data than the original\nBERT ( Devlin et al. ,2019 ). It is also trained with\na batch size eight times larger for half as many op-\ntimization steps, thus seeing four times as many\nsequences in pretraining compared to BERT.\nTo help disentangle the importance of these fac-\ntors from other modeling choices (e.g., the pre-\ntraining objective), we begin by training RoBERTa\nfollowing the BERT LARGE ', 'pretrain over 160GB of text. We ob-\nserve further improvements in performance across\nall downstream tasks, validating the importance of\ndata size and diversity in pretraining.9\nFinally, we pretrain RoBERTa for significantly\nlonger, increasing the number of pretraining steps\nfrom 100K to 300K, and then further to 500K. We\nagain observe significant gains in downstream task\nperformance, and the 300K and 500K step mod-\nels outperform XLNet LARGE across most tasks. We\nnote that even our longest-trained model does not\nappear to overfit our data and would likely benefit\nfrom additional training.\nIn the rest of the paper, we evaluate our best\nRoBERTa model on the three different bench-\nmarks: GLUE, SQuaD and RACE. Specifically\n9Our experiments conflate increases in data size and di-\nversity. We leave a more careful analysis of these two dimen-\nsions to future work.we consider RoBERTa trained for 500K steps over\nall ', 'which includes a\ncareful evaluation of the effects of hyperparmeter\ntuning and training set size. We find that BERT\nwas significantly undertrained and propose an im-\nproved recipe for training BERT models, which\nwe call RoBERTa, that can match or exceed the\nperformance of all of the post-BERT methods.\nOur modifications are simple, they include: (1)\ntraining the model longer, with bigger batches,\nover more data; (2) removing the next sentence\nprediction objective; (3) training on longer se-\nquences; and (4) dynamically changing the mask-\ning pattern applied to the training data. We also\ncollect a large new dataset (CC-N EWS) of compa-\nrable size to other privately used datasets, to better\ncontrol for training set size effects.\nWhen controlling for training data, our im-\nproved training procedure improves upon the pub-\nlished BERT results on both GLUE and SQuAD.\nWhen trained for longer over additional data, ']","RoBERTa has made significant contributions to the understanding of effective pretraining strategies in NLP by emphasizing the importance of data size, diversity, and training duration. By training with dynamic masking, using full sentences without NSP loss, large mini-batches, and a larger byte-level BPE, RoBERTa was able to achieve improved performance across all downstream tasks. Additionally, RoBERTa showed that increasing the amount of pretraining data and the number of training passes through the data can lead to significant gains in downstream task performance. By training RoBERTa for longer periods (300K to 500K steps), RoBERTa outperformed XLNet on most tasks, demonstrating the importance of extended training. Furthermore, RoBERTa's modifications, such as removing the next sentence prediction objective, training on longer sequences, and dynamically changing masking patterns, along with collecting a large new dataset, have helped to improve upon the performance of BERT models and match or exceed the performance of post-BERT methods. Overall, RoBERTa's research highlights the significance of data size, diversity, and prolonged training for effective pretraining strategies in NLP.",1.0,0.6666666666666666,0.80128014087677,"RoBERTa's comprehensive evaluation of pretraining strategies, including the effects of dynamic masking, data size, batch size, and the removal of the NSP objective, contributes valuable insights into the design and optimization of language models. Its success offers guidance for future model development, emphasizing the importance of these previously under-explored factors in achieving high performance in NLP tasks.","RoBERTa's contributions to the understanding of effective pretraining strategies in NLP include the use of dynamic masking, training with full-sentences without NSP loss, utilizing large mini-batches, and implementing a larger byte-level BPE. Additionally, RoBERTa investigates the importance of the data used for pretraining and the number of training passes through the data, which have been under-emphasized in previous work. By training RoBERTa following the BERT LARGE architecture, it helps disentangle the importance of these factors from other modeling choices. RoBERTa's approach enhances the understanding of how different pretraining strategies can impact the performance of NLP models, ultimately advancing the state-of-the-art in natural language processing tasks.",1.0,1.0,0.9082660675048828 +"How does Adversarial Filtering (AF) contribute to the creation of HellaSwag, and what unique characteristic does it bring to the dataset?","AF iteratively selects challenging machine-generated wrong answers by using a series of discriminators, creating a dataset that is robust against model biases and exhibits a 'Goldilocks' zone of complexity.","['this via Adversarial Filtering (AF), a\ndata collection paradigm wherein a series of\ndiscriminators iteratively select an adversarial\nset of machine-generated wrong answers. AF\nproves to be surprisingly robust. The key in-\nsight is to scale up the length and complex-\nity of the dataset examples towards a critical\n‘Goldilocks’ zone wherein generated text is\nridiculous to humans, yet often misclassified\nby state-of-the-art models.\nOur construction of HellaSwag , and its result-\ning di fficulty, sheds light on the inner work-\nings of deep pretrained models. More broadly,\nit suggests a new path forward for NLP re-\nsearch, in which benchmarks co-evolve with\nthe evolving state-of-the-art in an adversarial\nway, so as to present ever-harder challenges.\n1 Introduction\nImagine a woman chasing a dog around outside,\ntrying to give it a bath. What might happen next?\nHumans can read a narrative like this, shown in\nFigure 1, and connect ', 'Last, humans validate the data to remove\nadversarial endings that seem realistic.\nImportantly, AF creates a final dataset that\nis challenging to models regardless of the final\ndataset split. In Section 4, we will use AF as the\nunderlying workhorse to construct an NLI dataset\nthat is easy for humans, yet challenging for ma-\nchines. This di fficulty persists even when mod-\nels are provided significant training data, and even\nwhen this data comes from the same distribution\nas the test set. This contrasts with past work on\nadversarial examples (e.g. Jia and Liang, 2017;\nGlockner et al., 2018; Belinkov and Bisk, 2018)\nwhich consider cases where an out-of-distribution\ntest set is constructed to be adversarial.\n3 Investigating SWAG\nIn this section, we investigate why SWAG was\nsolved. We focus on BERT, since it is the best\nDefault Ending Only Shuffled Shuffled+\nEnding Only30405060708090100 BERT-Large Accuracy (%)86.7%\n74.8%77.0%\n60.4%\n46.7%\n41.4%\n36.2%\n31.6%SWAG\nHellaSwagFigure ', '40\nWikihow Adversarial Filtering iteration0255075100BERT accuracy (4-way)\n1 sentence\n2 sentences\n3 sentencesFigure 5: Adversarial Filtering (AF) results with BERT-Large as the discriminator. Left: AF applied to ActivityNet\ngenerations produced by Zellers et al. (2018)’s language model versus OpenAI GPT. While GPT converges at\nrandom, the LM used for SWAG converges at 75%. Right : AF applied to WikiHow generations from GPT, while\nvarying the ending length from one to three sentences. They converge to random, „40%, and„50%, respectively.\nstructural patterns, we consider a new scenario,\nShuffled . Here the shared context is provided,\nbut the words in each ending choice are randomly\npermuted. Surprisingly, this reduces BERT perfor-\nmance by less than 10%. Even though BERT was\nnever exposed to randomly shu ffled text during\npretraining, it easily adapts to this setting, which\nsuggests that BERT is largely performing lexical\nreasoning over each ']","Adversarial Filtering (AF) contributes to the creation of HellaSwag by iteratively selecting an adversarial set of machine-generated wrong answers. This process scales up the length and complexity of the dataset examples towards a critical ‘Goldilocks’ zone where generated text is ridiculous to humans, yet often misclassified by state-of-the-art models. The unique characteristic it brings to the dataset is that it is challenging to models regardless of the final dataset split, making it difficult for machines even when provided significant training data from the same distribution as the test set.",1.0,0.5,0.5785472393035889,"AF iteratively selects challenging machine-generated wrong answers by using a series of discriminators, creating a dataset that is robust against model biases and exhibits a 'Goldilocks' zone of complexity.","Adversarial Filtering (AF) contributes to the creation of HellaSwag by utilizing a series of discriminators to iteratively select an adversarial set of machine-generated wrong answers. This process scales up the length and complexity of the dataset examples towards a critical 'Goldilocks' zone where the generated text is ridiculous to humans but often misclassified by state-of-the-art models. The unique characteristic it brings to the dataset is that it is challenging to models regardless of the final dataset split, making it difficult for machines to accurately classify the data even with significant training data from the same distribution as the test set.",1.0,1.0,0.6114956140518188 +What is the impact of removing the NSP loss on RoBERTa's performance across various benchmarks compared to BERT?,"Removing the NSP loss from RoBERTa's pretraining objective leads to significant improvements in downstream task performance. This finding challenges the initial hypothesis that NSP is crucial for learning sentence relationships, suggesting that masked language modeling alone, under optimal conditions, is sufficient for achieving superior performance.","['in order to better\nunderstand their relative importance. Additional\x0cDev Set\nTasks MNLI-m QNLI MRPC SST-2 SQuAD\n(Acc) (Acc) (Acc) (Acc) (F1)\nBERT BASE 84.4 88.4 86.7 92.7 88.5\nNo NSP 83.9 84.9 86.5 92.6 87.9\nLTR & No NSP 82.1 84.3 77.5 92.1 77.8\n+ BiLSTM 82.1 84.1 75.7 91.6 84.9\nTable 5: Ablation over the pre-training tasks using the\nBERT BASE architecture. “No NSP” is trained without\nthe next sentence prediction task. “LTR & No NSP” is\ntrained as a left-to-right LM without the next sentence\nprediction, like OpenAI GPT. “+ BiLSTM” adds a ran-\ndomly initialized BiLSTM on top of the “LTR + No\nNSP” model during fine-tuning.\nablation studies can be found in Appendix C.\n5.1 Effect of Pre-training Tasks\nWe demonstrate the importance of the deep bidi-\nrectionality of BERT by evaluating two pre-\ntraining objectives using exactly the same pre-\ntraining data, fine-tuning ', ""However, BERT still\nneeds upwards of 16k examples to approach hu-\nman performance, around which it plateaus.\n3.2 What is learned during finetuning?\nFigure 4 compares BERT’s performance when\ntrained and evaluated on variants of SWAG.\nContext: BERT’s performance only slips 11.9\npoints (86.7%Ñ74.8%) when context is omitted\n(Ending Only ), suggesting a bias exists in the\nendings themselves.5If a followup event seems\nunreasonable absent of context , then there must be\nsomething markedly di fferent between the space\nof human-written and machine-generated endings.\nStructure: To distinguish word usage from\n4See the appendix for a discussion of the BERT architec-\nture and hyperparameter settings we used in our experiments.\n5These biases are similar to those in NLI datasets, as\nfound by Gururangan et al. (2018); Poliak et al. (2018).\n3\x0c0 10 20 30 40 50\nActivitynet Adversarial Filtering iteration0255075100BERT accuracy (4-way) Zellers' LM GPT\n0 10 20 30 "", 'RoBERTa is trained with dynamic mask-\ning (Section 4.1),FULL -SENTENCES without NSP\nloss (Section 4.2), large mini-batches (Section 4.3)\nand a larger byte-level BPE (Section 4.4).\nAdditionally, we investigate two other impor-\ntant factors that have been under-emphasized in\nprevious work: (1) the data used for pretraining,\nand (2) the number of training passes through the\ndata. For example, the recently proposed XLNet\narchitecture ( Yang et al. ,2019 ) is pretrained us-\ning nearly 10 times more data than the original\nBERT ( Devlin et al. ,2019 ). It is also trained with\na batch size eight times larger for half as many op-\ntimization steps, thus seeing four times as many\nsequences in pretraining compared to BERT.\nTo help disentangle the importance of these fac-\ntors from other modeling choices (e.g., the pre-\ntraining objective), we begin by training RoBERTa\nfollowing the BERT LARGE ']","The impact of removing the NSP loss on RoBERTa's performance across various benchmarks compared to BERT is not specified in the given context. Therefore, the answer is None.",1.0,1.0,0.574307918548584,"Removing the NSP loss from RoBERTa's pretraining objective leads to significant improvements in downstream task performance. This finding challenges the initial hypothesis that NSP is crucial for learning sentence relationships, suggesting that masked language modeling alone, under optimal conditions, is sufficient for achieving superior performance.",The impact of removing the NSP loss on RoBERTa's performance across various benchmarks compared to BERT is not specified in the context provided.,1.0,1.0,0.5646319389343262 +In what ways does RoBERTa's training process leverage data size and training duration for improved model performance?,"RoBERTa extensively explores the impact of both increased data size and prolonged training duration, demonstrating that both factors significantly contribute to enhanced model performance. Training over larger datasets and for more steps than BERT allows RoBERTa to better generalize and excel on downstream tasks.","['pretrain over 160GB of text. We ob-\nserve further improvements in performance across\nall downstream tasks, validating the importance of\ndata size and diversity in pretraining.9\nFinally, we pretrain RoBERTa for significantly\nlonger, increasing the number of pretraining steps\nfrom 100K to 300K, and then further to 500K. We\nagain observe significant gains in downstream task\nperformance, and the 300K and 500K step mod-\nels outperform XLNet LARGE across most tasks. We\nnote that even our longest-trained model does not\nappear to overfit our data and would likely benefit\nfrom additional training.\nIn the rest of the paper, we evaluate our best\nRoBERTa model on the three different bench-\nmarks: GLUE, SQuaD and RACE. Specifically\n9Our experiments conflate increases in data size and di-\nversity. We leave a more careful analysis of these two dimen-\nsions to future work.we consider RoBERTa trained for 500K steps over\nall ', 'which includes a\ncareful evaluation of the effects of hyperparmeter\ntuning and training set size. We find that BERT\nwas significantly undertrained and propose an im-\nproved recipe for training BERT models, which\nwe call RoBERTa, that can match or exceed the\nperformance of all of the post-BERT methods.\nOur modifications are simple, they include: (1)\ntraining the model longer, with bigger batches,\nover more data; (2) removing the next sentence\nprediction objective; (3) training on longer se-\nquences; and (4) dynamically changing the mask-\ning pattern applied to the training data. We also\ncollect a large new dataset (CC-N EWS) of compa-\nrable size to other privately used datasets, to better\ncontrol for training set size effects.\nWhen controlling for training data, our im-\nproved training procedure improves upon the pub-\nlished BERT results on both GLUE and SQuAD.\nWhen trained for longer over additional data, ', 'RoBERTa is trained with dynamic mask-\ning (Section 4.1),FULL -SENTENCES without NSP\nloss (Section 4.2), large mini-batches (Section 4.3)\nand a larger byte-level BPE (Section 4.4).\nAdditionally, we investigate two other impor-\ntant factors that have been under-emphasized in\nprevious work: (1) the data used for pretraining,\nand (2) the number of training passes through the\ndata. For example, the recently proposed XLNet\narchitecture ( Yang et al. ,2019 ) is pretrained us-\ning nearly 10 times more data than the original\nBERT ( Devlin et al. ,2019 ). It is also trained with\na batch size eight times larger for half as many op-\ntimization steps, thus seeing four times as many\nsequences in pretraining compared to BERT.\nTo help disentangle the importance of these fac-\ntors from other modeling choices (e.g., the pre-\ntraining objective), we begin by training RoBERTa\nfollowing the BERT LARGE ']","RoBERTa's training process leverages data size and training duration by increasing the number of pretraining steps from 100K to 300K, and then further to 500K. This longer pretraining duration leads to significant gains in downstream task performance. Additionally, RoBERTa is trained with dynamic masking, using full sentences without next sentence prediction loss, large mini-batches, and a larger byte-level BPE, all of which contribute to improved model performance.",1.0,1.0,0.7558075785636902,"RoBERTa extensively explores the impact of both increased data size and prolonged training duration, demonstrating that both factors significantly contribute to enhanced model performance. Training over larger datasets and for more steps than BERT allows RoBERTa to better generalize and excel on downstream tasks.","RoBERTa's training process leverages data size and training duration by pretraining over 160GB of text, increasing the number of pretraining steps from 100K to 300K, and further to 500K. This extended training duration with larger amounts of data leads to significant gains in downstream task performance, surpassing XLNet LARGE across most tasks. Additionally, RoBERTa's training process includes training the model longer, with bigger batches, over more data, and training on longer sequences, all of which contribute to improved model performance.",1.0,1.0,0.8059983253479004 +What defines the Task2Vec embedding's ability to capture the complexity of a visual classification task and its semantic relationships?,"Task2Vec embedding utilizes the Fisher Information Matrix (FIM) computed from a probe network's parameters, capturing the task's complexity and semantic relationships by representing tasks as elements in a vector space.","['on each task.\nA similar approach has been taken in computer vision for\nselecting a detector for a new category out of a large library\nof detectors [26, 40, 38].\n7. Discussion\nTASK 2VEC is an efficient way to represent a task, or the\ncorresponding dataset, as a fixed dimensional vector. It has\nseveral appealing properties, in particular its norm corre-\nlates with the test error obtained on the task, and the co-\nsine distance between embeddings correlates with natural\ndistances between tasks, when available, such as the taxo-\nnomic distance for species classification, and the fine-tuning\ndistance for transfer learning. Having a representation of\ntasks paves the way for a wide variety of meta-learning\ntasks. In this work, we focused on selection of an expert\nfeature extractor in order to solve a new task, especially\nwhen little training data is present, and showed ', 'requires understanding\nindividual tokens in context, complete sentences, inter-sentence relations, and entire paragraphs.\nLicense: Task data must be available under licences that allow use and redistribution for research\npurposes.\nTo identify possible tasks for SuperGLUE, we disseminated a public call for task proposals to the\nNLP community, and received approximately 30 proposals. We filtered these proposals according\nto our criteria. Many proposals were not suitable due to licensing issues, complex formats, and\ninsufficient headroom; we provide examples of such tasks in Appendix D. For each of the remaining\ntasks, we ran a BERT-based baseline and a human baseline, and filtered out tasks which were either\ntoo challenging for humans without extensive training or too easy for our machine baselines.\n3.2 Selected Tasks\nFollowing this process, we arrived at eight tasks to use in SuperGLUE. See Tables 1 and 2 ', 'TASK2VEC: Task Embedding for Meta-Learning\nAlessandro Achille\nUCLA and AWS\nachille@cs.ucla.eduMichael Lam\nAWS\nmichlam@amazon.comRahul Tewari\nAWS\ntewarir@amazon.comAvinash Ravichandran\nAWS\nravinash@amazon.com\nSubhransu Maji\nUMass and AWS\nsmmaji@amazon.comCharless Fowlkes\nUCI and AWS\nfowlkec@amazon.comStefano Soatto\nUCLA and AWS\nsoattos@amazon.comPietro Perona\nCaltech and AWS\nperonapp@amazon.com\nAbstract\nWe introduce a method to provide vectorial represen-\ntations of visual classification tasks which can be used\nto reason about the nature of those tasks and their re-\nlations. Given a dataset with ground-truth labels and a\nloss function defined over those labels, we process images\nthrough a “probe network” and compute an embedding\nbased on estimates of the Fisher information matrix asso-\nciated with the probe network parameters. This provides a\nfixed-dimensional embedding of the task that is independent\nof details such as the number of classes and does not require\nany understanding of the class label semantics. We demon-\nstrate that this embedding is capable of predicting task sim-\nilarities that match our intuition about semantic ']","The Task2Vec embedding's ability to capture the complexity of a visual classification task and its semantic relationships is defined by its norm correlating with the test error obtained on the task, and the cosine distance between embeddings correlating with natural distances between tasks, such as taxonomic distance for species classification and fine-tuning distance for transfer learning.",1.0,0.6666666666666666,0.5875685214996338,"Task2Vec embedding utilizes the Fisher Information Matrix (FIM) computed from a probe network's parameters, capturing the task's complexity and semantic relationships by representing tasks as elements in a vector space.","The ability of the Task2Vec embedding to capture the complexity of a visual classification task and its semantic relationships is defined by its norm correlating with the test error obtained on the task, and the cosine distance between embeddings correlating with natural distances between tasks, such as taxonomic distance for species classification and fine-tuning distance for transfer learning.",1.0,1.0,0.561631441116333 +How does Task2Vec's embedding relate to the difficulty and domain characteristics of a task?,"The embedding's norm correlates with task difficulty, while its orientation and distance from other embeddings capture domain characteristics and task similarities.","['embedding (distance from origin) and test error obtained on the task.\nnear the decision boundary since p(1−p)is maximized at\np= 1/2. Compare this to the covariance matrix of the data,\nC0, to which all data points contribute equally. Instead, in\nTASK 2VEC information on the domain is based on data near\nthe decision boundary (task-weighted domain embedding).\nEncoding useful features for the task: The FIM de-\npends on the curvature of the loss function with the diagonal\nentries capturing the sensitivity of the loss to model param-\neters. Specifically, in the two-layer model one can see that,\nif a given feature is uncorrelated with y, the correspond-\ning blocks of Fare zero. In contrast, a domain embedding\nbased on feature activations of the probe network (e.g., C1)\nonly reflects which features vary over the dataset without\nindication of whether they are relevant ', 'on each task.\nA similar approach has been taken in computer vision for\nselecting a detector for a new category out of a large library\nof detectors [26, 40, 38].\n7. Discussion\nTASK 2VEC is an efficient way to represent a task, or the\ncorresponding dataset, as a fixed dimensional vector. It has\nseveral appealing properties, in particular its norm corre-\nlates with the test error obtained on the task, and the co-\nsine distance between embeddings correlates with natural\ndistances between tasks, when available, such as the taxo-\nnomic distance for species classification, and the fine-tuning\ndistance for transfer learning. Having a representation of\ntasks paves the way for a wide variety of meta-learning\ntasks. In this work, we focused on selection of an expert\nfeature extractor in order to solve a new task, especially\nwhen little training data is present, and showed ', 'have multiplemodels trained on the same task with different performance\ncharacteristics. To model the joint interaction between task\nand model ( i.e., architecture and training algorithm), we aim\nto learn a joint embedding of the two.\nWe consider for concreteness the problem of learning\na joint embedding for model selection. In order to em-\nbed models in the task space so that those near a task\nare likely to perform well on that task, we formulate the\nfollowing meta-learning problem: Given kmodels, their\nMODEL 2VEC embedding are the vectors mi=Fi+bi,\nwhereFiis the task embedding of the task used to train\nmodelmi(if available, else we set it to zero), and biis a\nlearned “model bias” that perturbs the task embedding to\naccount for particularities of the model. We learn biby opti-\nmizing ak-way cross entropy loss to predict the best model\ngiven the task ']","Task2Vec's embedding relates to the difficulty and domain characteristics of a task by representing a task as a fixed dimensional vector. The norm of the embedding correlates with the test error obtained on the task, indicating the difficulty of the task. Additionally, the cosine distance between embeddings correlates with natural distances between tasks, such as taxonomic distance for species classification or fine-tuning distance for transfer learning, reflecting the domain characteristics of the task.",1.0,1.0,0.7857621908187866,"The embedding's norm correlates with task difficulty, while its orientation and distance from other embeddings capture domain characteristics and task similarities.","Task2Vec's embedding encodes the ""difficulty"" of the task, characteristics of the input domain, and which features of the probe network are useful to solve it. It provides a fixed-dimensional representation of the task that captures the structure of the task, including information on the domain based on data near the decision boundary (task-weighted domain embedding).",1.0,0.6666666666666666,0.535447359085083 +How does Task2Vec differentiate itself from traditional domain embeddings and other task representation methods?,"Unlike domain embeddings that focus on input data distribution, Task2Vec uniquely represents tasks based on the joint input-output distribution's structure, offering a novel perspective on task similarity and complexity.","['on each task.\nA similar approach has been taken in computer vision for\nselecting a detector for a new category out of a large library\nof detectors [26, 40, 38].\n7. Discussion\nTASK 2VEC is an efficient way to represent a task, or the\ncorresponding dataset, as a fixed dimensional vector. It has\nseveral appealing properties, in particular its norm corre-\nlates with the test error obtained on the task, and the co-\nsine distance between embeddings correlates with natural\ndistances between tasks, when available, such as the taxo-\nnomic distance for species classification, and the fine-tuning\ndistance for transfer learning. Having a representation of\ntasks paves the way for a wide variety of meta-learning\ntasks. In this work, we focused on selection of an expert\nfeature extractor in order to solve a new task, especially\nwhen little training data is present, and showed ', 'requires understanding\nindividual tokens in context, complete sentences, inter-sentence relations, and entire paragraphs.\nLicense: Task data must be available under licences that allow use and redistribution for research\npurposes.\nTo identify possible tasks for SuperGLUE, we disseminated a public call for task proposals to the\nNLP community, and received approximately 30 proposals. We filtered these proposals according\nto our criteria. Many proposals were not suitable due to licensing issues, complex formats, and\ninsufficient headroom; we provide examples of such tasks in Appendix D. For each of the remaining\ntasks, we ran a BERT-based baseline and a human baseline, and filtered out tasks which were either\ntoo challenging for humans without extensive training or too easy for our machine baselines.\n3.2 Selected Tasks\nFollowing this process, we arrived at eight tasks to use in SuperGLUE. See Tables 1 and 2 ', 'the same categorical attribute, may\nend up far away from each other and close to other tasks that\nare semantically more similar ( e.g., the jeans category task\nis close to the ripped attribute and the denim material). This\nis reflected in the mixture of colors of semantically related\nnearby tasks, showing non-trivial grouping.\nWe also compare the TASK 2VEC embedding with a do-\nmain embedding baseline, which only exploits the input\ndistribution p(x)rather than the task distribution p(x,y).\nWhile some tasks are highly correlated with their domain\n(e.g., tasks from iNaturalist), other tasks differ only on the\nlabels ( e.g., all the attribute tasks of iMaterialist, which\nshare the same clothes domain). Accordingly, the domain\x0c102103104\nNumber of samples-10%0%10%Error relative to brute force\n(lower is better)\nBrute force fixed\nImageNet fixed\nTask2Vec fixedImageNet finetune\nTask2Vec finetuneFigure 4: TASK 2VEC improves results at different\ndataset sizes and training ']","Task2Vec differentiates itself from traditional domain embeddings and other task representation methods by representing a task, or the corresponding dataset, as a fixed dimensional vector with appealing properties. The norm of the Task2Vec vector correlates with the test error obtained on the task, and the cosine distance between embeddings correlates with natural distances between tasks. Additionally, Task2Vec allows for a wide variety of meta-learning tasks and enables the selection of an expert feature extractor to solve a new task, especially when there is limited training data available.",1.0,0.6666666666666666,0.8274490237236023,"Unlike domain embeddings that focus on input data distribution, Task2Vec uniquely represents tasks based on the joint input-output distribution's structure, offering a novel perspective on task similarity and complexity.","Task2Vec differentiates itself from traditional domain embeddings and other task representation methods by representing a task or dataset as a fixed-dimensional vector, where the norm correlates with the test error obtained on the task, and the cosine distance between embeddings correlates with natural distances between tasks. This representation paves the way for a wide variety of meta-learning tasks, particularly useful when there is insufficient data to train or fine-tune a generic model.",1.0,0.6666666666666666,0.7941734790802002 +How does Task2Vec ensure its task embeddings are invariant to the number of classes and label semantics within a dataset?,"By basing the embeddings on the structure of the task via the FIM, Task2Vec creates representations that are independent of class count or label semantics, focusing on the task's intrinsic characteristics.","['entity_types.__len__() > 0 %}\nIn the sentence\n{{text}}\nthe X = ""{{entities[entity_idx]}}"" is an entity of the type ""{{\nentity_types[entity_idx]}}"". Extract all possible triples contains ""{{\nentities[entity_idx]}}"" in the form of ( X ; Y ; Z ), given the\nfollowing candidate properties Y:\n{% for r in allowed_relations %}- {{r}}\n{% endfor %}\nAnswer: ||| {% for r in relations %}{% if r[\'head\'][0] == entities[\nentity_idx] %}{{format_triple([r], allowed_relations) | join("" "")}}{%\nendif %}{% endfor %}\n{% endif %}\n(Relation Classification, Prompt 0)\nQUIZ\n1. Given the candidate relations:\n- {{shuffle(allowed_relations) | join(""\\n- "")}}\nwhat is the relation between ""{{relations[triple_idx][\'head\'][0]}}"" and\n""{{relations[triple_idx][\'tail\'][0]}}"" in the following sentence?\n{{text}}\nAnswer: ||| {{relations[triple_idx][\'relation\']}}\nNevertheless, existing joint entity and relation extraction datasets have very limited relation schema.\nFor example, CoNLL04 only contains five different relations; the most diverse NYT dataset con-\ntains 24 Freebase predicates. To allow the model to capture a diverse range of ', 'such as the (non-centered)\ncovarianceC0=E[\nxxT]\nof the input data or the covari-\nanceC1=E[\nzzT]\nof the feature activations. One could\ntake such statistics as a representative domain embedding\nsince they only depend on the marginal distribution p(x)in\ncontrast to the FIM task embedding , which depends on the\njoint distribution p(x,y). These simple expressions high-\nlight some important (and more general) properties of the\nFisher embedding we now describe.\nInvariance to the label space: The task embedding does\nnot directly depend on the task labels, but only on the pre-\ndicted distribution pw(y|x)of the trained model. Infor-\nmation about the ground-truth labels yis encoded in the\nweightswwhich are a sufficient statistic of the task [5]. In\nparticular, the task embedding is invariant to permutations\nof the labels y, and has fixed dimension (number of filters\nof the feature extractor) regardless of the output space (e.g.,\nk-way classification with ', 'task. This can be particu-\nlarly valuable when there is insufficient data to train or fine-\ntune a generic model, and transfer of knowledge is essen-\ntial. TASK 2VEC depends solely on the task, and ignores\ninteractions with the model which may however play an\nimportant role. To address this, we learn a joint task and\nmodel embedding, called MODEL 2VEC, in such a way that\nmodels whose embeddings are close to a task exhibit good\nperfmormance on the task. We use this to select an expert\nfrom a given collection, improving performance relative to\n1arXiv:1902.03545v1 [cs.LG] 10 Feb 2019\x0cTask Embeddings\nDomain EmbeddingsActinopterygii (n)\nAmphibia (n)\nArachnida (n)\nAves (n)\nFungi (n)Insecta (n)\nMammalia (n)\nMollusca (n)\nPlantae (n)\nProtozoa (n)Reptilia (n)\nCategory (m)\nColor (m)\nGender (m)\nMaterial (m)Neckline (m)\nPants (m)\nPattern (m)\nShoes (m)Figure 1: Task embedding across a large library of tasks (best seen magnified). (Left) T-SNE visualization of the ']",None.,0.0,1.0,-0.07551559805870056,"By basing the embeddings on the structure of the task via the FIM, Task2Vec creates representations that are independent of class count or label semantics, focusing on the task's intrinsic characteristics.",Task2Vec ensures its task embeddings are invariant to the number of classes and label semantics within a dataset by representing a task or dataset as a fixed-dimensional vector. This fixed dimensional vector has properties such as its norm correlating with the test error obtained on the task and the cosine distance between embeddings correlating with natural distances between tasks. This approach allows for a consistent representation of tasks regardless of the number of classes or label semantics present in the dataset.,1.0,0.6666666666666666,0.6477453708648682 +How does Task2Vec handle the variance in data size and complexity across different tasks in its embeddings?,"Through robust Fisher computation, Task2Vec accommodates variance in data size and complexity, ensuring embeddings remain informative by adjusting for irregularities in the loss landscape.","['are interested in has\nrelatively few samples. In Fig. 4 we show how the perfor-\nmance of TASK 2VEC varies on a model selection task as the\nnumber of samples varies. At all sample sizes TASK 2VEC is\nclose to the optimum, and improves over selecting a generic\nexpert (ImageNet), both when fine-tuning and when train-\ning only a classifier. We observe that the best choice of ex-\nperts is not affected by the dataset size, and that even with\nfew examples TASK 2VEC is able to find the optimal experts.\nChoice of probe network In Table 1 we show that\nDenseNet [15] and ResNet architectures [11] perform sig-\nnificantly better when used as probe networks to compute\ntheTASK 2VEC embedding than a VGG [32] architecture.\n6. Related Work\nTask and Domain embedding. Tasks distinguished by\ntheir domain can be understood simply in ', 'on each task.\nA similar approach has been taken in computer vision for\nselecting a detector for a new category out of a large library\nof detectors [26, 40, 38].\n7. Discussion\nTASK 2VEC is an efficient way to represent a task, or the\ncorresponding dataset, as a fixed dimensional vector. It has\nseveral appealing properties, in particular its norm corre-\nlates with the test error obtained on the task, and the co-\nsine distance between embeddings correlates with natural\ndistances between tasks, when available, such as the taxo-\nnomic distance for species classification, and the fine-tuning\ndistance for transfer learning. Having a representation of\ntasks paves the way for a wide variety of meta-learning\ntasks. In this work, we focused on selection of an expert\nfeature extractor in order to solve a new task, especially\nwhen little training data is present, and showed ', 'requires understanding\nindividual tokens in context, complete sentences, inter-sentence relations, and entire paragraphs.\nLicense: Task data must be available under licences that allow use and redistribution for research\npurposes.\nTo identify possible tasks for SuperGLUE, we disseminated a public call for task proposals to the\nNLP community, and received approximately 30 proposals. We filtered these proposals according\nto our criteria. Many proposals were not suitable due to licensing issues, complex formats, and\ninsufficient headroom; we provide examples of such tasks in Appendix D. For each of the remaining\ntasks, we ran a BERT-based baseline and a human baseline, and filtered out tasks which were either\ntoo challenging for humans without extensive training or too easy for our machine baselines.\n3.2 Selected Tasks\nFollowing this process, we arrived at eight tasks to use in SuperGLUE. See Tables 1 and 2 ']","Task2Vec is able to handle the variance in data size and complexity across different tasks in its embeddings by showing consistent performance at all sample sizes, regardless of the dataset size. It is able to find the optimal experts even with few examples, indicating that the choice of experts is not affected by the dataset size. Additionally, the norm of Task2Vec embeddings correlates with the test error obtained on the task, and the cosine distance between embeddings correlates with natural distances between tasks, such as taxonomic distance for species classification and fine-tuning distance for transfer learning. This allows for a wide variety of meta-learning tasks to be tackled using Task2Vec representations.",1.0,0.6666666666666666,0.7255837917327881,"Through robust Fisher computation, Task2Vec accommodates variance in data size and complexity, ensuring embeddings remain informative by adjusting for irregularities in the loss landscape.","Task2Vec handles the variance in data size and complexity across different tasks in its embeddings by representing each task as a fixed-dimensional vector. This vector representation correlates with the test error obtained on the task, as well as the natural distances between tasks. Additionally, Task2Vec ignores interactions with the model and focuses solely on the task itself, which allows it to be particularly valuable when there is insufficient data to train or fine-tune a generic model.",1.0,1.0,0.7255862355232239 +"How does GLM-130B's architecture differ from traditional GPT-style models, and what are its key features?","Unlike GPT-style architectures, GLM-130B is based on the General Language Model (GLM) framework with bidirectional attention and autoregressive blank infilling, enabling more effective context comprehension.","['et al., 2022) to leverage its bidirectional at-\ntention advantage and autoregressive blank infilling objective. Table 1 summarizes the comparison\nbetween GLM-130B, GPT-3 and another two open-source efforts—OPT-175B and BLOOM-176B,\nas well as PaLM 540B (Chowdhery et al., 2022)—a 4 ×larger model—as a reference.\nAltogether, the conceptual uniqueness and engineering efforts enable GLM-130B to exhibit perfor-\nmance that surpasses the level of GPT-3 on a wide range of benchmarks (in total 112 tasks) and also\noutperforms PaLM 540B in many cases, while outperformance over GPT-3 has not been observed in\nOPT-175B and BLOOM-176B (Cf. Figure 1 left). For zero-shot performance, GLM-130B is better\nthan GPT-3 175B (+5.0%), OPT-175B (+6.5%), and BLOOM-176B (+13.0%) on LAMBADA (Pa-\nperno et al., 2016), and achieves 3 ×better performance than GPT-3 on Big-bench-lite (Srivastava\net al., 2022). For the 5-shot MMLU (Hendrycks et ', 'The resultant GLM-130B model offers significant\noutperformance over GPT-3 175B (davinci) on a wide range of popular English\nbenchmarks while the performance advantage is not observed in OPT-175B and\nBLOOM-176B. It also consistently and significantly outperforms ERNIE TITAN\n3.0 260B—the largest Chinese language model—across related benchmarks. Fi-\nnally, we leverage a unique scaling property of GLM-130B to reach INT4 quanti-\nzation without post training, with almost no performance loss, making it the first\namong 100B-scale models and more importantly, allowing its effective inference\non 4×RTX 3090 (24G) or 8 ×RTX 2080 Ti (11G) GPUs, the most affordable\nGPUs required for using 100B-scale models. The GLM-130B model weights are\npublicly accessible and its code, training logs, related toolkit, and lessons learned\nare open-sourced at https://github.com/THUDM/GLM-130B/ .\n1 I NTRODUCTION\nLarge language models (LLMs), particularly those with over 100 billion (100B) parameters (Brown\net ', 'al., 2021) tasks, it is better than GPT-3 175B\n(+0.9%) and BLOOM-176B (+12.7%). As a bilingual LLM also in Chinese, it offers significantly\nbetter results than ERNIE TITAN 3.0 260B (Wang et al., 2021)—the largest Chinese LLM—on 7\nzero-shot CLUE (Xu et al., 2020) datasets (+24.26%) and 5 zero-shot FewCLUE (Xu et al., 2021)\nones (+12.75%). Importantly, as summarized in Figure 1 right, GLM-130B as an open model is\nassociated with significantly less bias and generation toxicity than its 100B-scale counterparts .\nFinally, we design GLM-130B to empower as many people as possible to conduct 100B-scale LLM\nstudies. First, instead of using 175B+ parameters as OPT and BLOOM, the 130B size is decided be-\ncause such a size supports inference on a single A100 (8 ×40G) server. Second, to further lower the\nGPU requirements, we quantize GLM-130B ']","GLM-130B's architecture differs from traditional GPT-style models by leveraging bidirectional attention and autoregressive blank infilling objective. Its key features include surpassing the performance of GPT-3 on a wide range of benchmarks, outperforming PaLM 540B in many cases, and exhibiting better zero-shot performance than GPT-3 175B, OPT-175B, and BLOOM-176B on specific tasks. Additionally, GLM-130B offers outperformance over ERNIE TITAN 3.0 260B, the largest Chinese language model, across related benchmarks and has the unique ability to reach INT4 quantization without post training, enabling effective inference on affordable GPUs.",1.0,1.0,0.8094369769096375,"Unlike GPT-style architectures, GLM-130B is based on the General Language Model (GLM) framework with bidirectional attention and autoregressive blank infilling, enabling more effective context comprehension.","GLM-130B's architecture differs from traditional GPT-style models by exploring the potential of a bidirectional General Language Model (GLM) as its backbone. The key features of GLM-130B include leveraging autoregressive blank infilling as its training objective, scalability for INT4 weight quantization allowing for faster inference on affordable GPUs, and negligible performance degradation with INT4 quantization compared to uncompressed models.",1.0,1.0,0.8875114917755127 +How does GLM-130B's performance compare to other 100B-scale models and PaLM 540B across English benchmarks?,"GLM-130B surpasses GPT-3 and other 100B-scale models in a range of English benchmarks and performs competitively against PaLM 540B, demonstrating its effectiveness in language understanding.","['et al., 2022) to leverage its bidirectional at-\ntention advantage and autoregressive blank infilling objective. Table 1 summarizes the comparison\nbetween GLM-130B, GPT-3 and another two open-source efforts—OPT-175B and BLOOM-176B,\nas well as PaLM 540B (Chowdhery et al., 2022)—a 4 ×larger model—as a reference.\nAltogether, the conceptual uniqueness and engineering efforts enable GLM-130B to exhibit perfor-\nmance that surpasses the level of GPT-3 on a wide range of benchmarks (in total 112 tasks) and also\noutperforms PaLM 540B in many cases, while outperformance over GPT-3 has not been observed in\nOPT-175B and BLOOM-176B (Cf. Figure 1 left). For zero-shot performance, GLM-130B is better\nthan GPT-3 175B (+5.0%), OPT-175B (+6.5%), and BLOOM-176B (+13.0%) on LAMBADA (Pa-\nperno et al., 2016), and achieves 3 ×better performance than GPT-3 on Big-bench-lite (Srivastava\net al., 2022). For the 5-shot MMLU (Hendrycks et ', 'it have not been approved for months.\n7\x0cPublished as a conference paper at ICLR 2023\n50 100 150 200 250 300 350 400\nTrained Tokens (Billion)3034384246\nGPT-3 175B (5-shot)\nBLOOM 176B (5-shot)GLM-130B (5-shot)\nFigure 6: GLM-130B on MMLU\n(57 tasks) along training steps.\n10810910101011\nEffective Parameter Count0246810121416\nGLM-130B 0-shot\nGLM-130B 1-shot\nGLM-130B 3-shot\nGPT-3 0-shot\nGPT-3 1-shot\nGPT-3 3-shot\nPaLM 0-shotFigure 7: BIG-bench-lite evalua-\ntion (24 tasks) across scales.0-shot 1-shot 3-shot\nGPT-3 2.6B 0.60 0.71 1.83\nGPT-3 6.7B -0.06 2.93 5.40\nGPT-3 13B 1.77 5.43 7.95\nGPT-3 175B 4.35 11.34 13.18\nPaLM 540B 8.05 37.77 -\nGLM-130B 13.31 14.91 15.12\nTable 4: Details on BIG-\nbench-lite (24 tasks).\n5.2 M ASSIVE MULTITASK LANGUAGE UNDERSTANDING (MMLU)\nMMLU (Hendrycks et al., 2021) is a diverse benchmark including 57 multi-choice question an-\nswering tasks concerning human knowledge ranging from high-school-level to expert-level. It is\nreleased after the crawling of Pile and serves as an ideal test-bed for LLMs’ few-shot learning. ', 'at\nruntime.\nB.8.1 Q UANTIZATION RESULTS AT SCALES\nGLM models at 110M to 10B scale are from GLM’s original paper(Du et al., 2022). Although the\narchitecture of smaller scale GLMs are not the same as GLM-130B, we believe that the training\nobjective is the key factor for quantization. Table 10 shows the performance of GLM and BLOOM\nfamily models at different scales on the LAMBADA dataset with different quantization methods.\nAlmost all models maintain performance at INT8 precision. In general, GLM maintains better\nperformance than BLOOM at INT4 precision as it scales.\n28\x0cPublished as a conference paper at ICLR 2023\nTable 10: Accuracy on LAMBADA dataset for GLM and BLOOM family at 100M to 176B scales\nacross different quantization precision.\nBLOOM-560M BLOOM-1B1 BLOOM-3B BLOOM-7B BLOOM-176B\nOriginal 31.40% 40.68% 48.30% 54.91% 64.37%\nAbsmax INT8, col-wise 26.12% 40.69% 48.83% 55.33% 65.03%\nAbsmax INT4, col-wise 9.30% ']","GLM-130B's performance surpasses the level of GPT-3 on a wide range of benchmarks and outperforms PaLM 540B in many cases. It also exhibits better zero-shot performance compared to GPT-3 175B, OPT-175B, and BLOOM-176B on LAMBADA and achieves 3 times better performance than GPT-3 on Big-bench-lite.",1.0,0.5,0.8322749733924866,"GLM-130B surpasses GPT-3 and other 100B-scale models in a range of English benchmarks and performs competitively against PaLM 540B, demonstrating its effectiveness in language understanding.",GLM-130B's performance outperforms other 100B-scale models and is slightly worse than PaLM-540B across English benchmarks.,1.0,0.6666666666666666,0.8755315542221069 +What record-setting performance did Megatron-LM achieve in terms of parameter count and sustained PetaFLOPs on NVIDIA V100 GPUs?,"Megatron-LM trained models up to 8.3 billion parameters, sustaining 15.1 PetaFLOPs across 512 GPUs with 76% scaling efficiency.","['/ 87.4 86.5 (89.0 / 85.5)\nXLNet (Yang et al., 2019) 2 90.8 / 90.8 92.3 95.1 / 89.7 90.6 / 87.9 85.4 (88.6 / 84.0)\nMegatron-336M 1 89.7 / 90.0 92.3 94.2 / 88.0 88.1 / 84.8 83.0 (86.9 / 81.5)\nMegatron-1.3B 1 90.9 / 91.0 92.6 94.9 / 89.1 90.2 / 87.1 87.3 (90.4 / 86.1)\nMegatron-3.9B 1 91.4 / 91.4 92.7 95.5 / 90.0 91.2 / 88.5 89.5 (91.8 / 88.6)\nALBERT ensemble (Lan et al., 2019) 95.5 / 90.1 91.4 / 88.9 89.4 (91.2 / 88.6)\nMegatron-3.9B ensemble 95.8 / 90.5 91.7 / 89.0 90.9 (93.1 / 90.0)\nFigure 7. Training loss for BERT model using the original architec-\nture (a) and the rearranged architecture (b). Left figure shows the\ntraining loss for 336M and 752M BERT model. While the original\narchitecture performs well ', 'Megatron-LM: Training Multi-Billion Parameter Language Models Using\nModel Parallelism\nMohammad Shoeybi1 2Mostofa Patwary1 2Raul Puri1 2Patrick LeGresley2Jared Casper2\nBryan Catanzaro2\nAbstract\nRecent work in language modeling demonstrates\nthat training large transformer models advances\nthe state of the art in Natural Language Processing\napplications. However, very large models can be\nquite difficult to train due to memory constraints.\nIn this work, we present our techniques for train-\ning very large transformer models and implement\na simple, efficient intra-layer model parallel ap-\nproach that enables training transformer models\nwith billions of parameters. Our approach does\nnot require a new compiler or library changes, is\northogonal and complimentary to pipeline model\nparallelism, and can be fully implemented with\nthe insertion of a few communication operations\nin native PyTorch. We illustrate this approach\nby converging transformer based models up to\n8.3 billion parameters using 512 GPUs. We sus-\ntain 15.1 PetaFLOPs across the entire ', 'into INT4 precision without post training while OPT\nand BLOOM can only reach INT8. Due to a unique property of the GLM architecture, GLM-130B’s\nINT4 quantization introduces negligible performance degradation, e.g., -0.74% on LAMBADA and\neven +0.05% on MMLU, making it still better than the uncompressed GPT-3. This enables GLM-\n130B’s fast inference with performance guarantee on a server of 4 ×RTX 3090 (24G) or 8 ×RTX\n2080 Ti (11G), the most affordable GPU required for using 100B-scale LLMs to date.\n2\x0cPublished as a conference paper at ICLR 2023\nGradient Norm(a) More than 30 failed preliminary trials at 100B-scale (b) Final decisive trials: Sandwich-LN v.s. DeepNorm012345678910111213\n0 500 1k 1.5k 2k2.5k 3k\nFigure 3: Trials on different LayerNorms for GLM-130B training. It turns out that DeepNorm is the\nmost stable one, as it has small gradient norm and ']",None,0.0,0.5,-0.029711192473769188,"Megatron-LM trained models up to 8.3 billion parameters, sustaining 15.1 PetaFLOPs across 512 GPUs with 76% scaling efficiency.",None,0.0,1.0,-0.029710978269577026 +What computational approach does PAL use to integrate programmatic reasoning within natural language tasks?,"PAL leverages large language models (LLMs) to generate programs as intermediate reasoning steps for natural language problems, offloading solution steps to a Python interpreter, ensuring accuracy by focusing LLMs on decomposing problems into executable code.","['intermediate\nreasoning steps, but offloads the solution step to a\nruntime such as a Python interpreter. With PAL,\ndecomposing the natural language problem into\nrunnable steps remains the only learning task for\nthe LLM, while solving is delegated to the inter-\npreter. We demonstrate this synergy between a\nneural LLM and a symbolic interpreter across 13\nmathematical, symbolic, and algorithmic reason-\ning tasks from BIG-Bench Hard and other bench-\nmarks. In all these natural language reasoning\ntasks, generating code using an LLM and rea-\nsoning using a Python interpreter leads to more\naccurate results than much larger models. For ex-\nample, PALusing CODEX achieves state-of-the-\nart few-shot accuracy on the GSM 8Kbenchmark\nof math word problems, surpassing PaLM-540 B\nwhich uses chain-of-thought by absolute 15% top-\n1. Our code and data are publicly available at\nhttp://reasonwithpal.com .\n*The first three authors contributed equally.1Language Tech-\nnologies Institute, Carnegie Mellon University, ', 'PAL: Program-aided Language Models\nLuyu Gao* 1Aman Madaan* 1Shuyan Zhou* 1Uri Alon1Pengfei Liu1 2Yiming Yang1Jamie Callan1\nGraham Neubig1 2\n{luyug,amadaan,shuyanzh,ualon,pliu3,yiming,callan,gneubig }@cs.cmu.edu\nAbstract\nLarge language models (LLMs) have recently\ndemonstrated an impressive ability to perform\narithmetic and symbolic reasoning tasks, when\nprovided with a few examples at test time (“few-\nshot prompting”). Much of this success can be\nattributed to prompting methods such as “chain-\nof-thought”, which employ LLMs for both under-\nstanding the problem description by decomposing\nit into steps, as well as solving each step of the\nproblem. While LLMs seem to be adept at this\nsort of step-by-step decomposition, LLMs often\nmake logical and arithmetic mistakes in the solu-\ntion part, even when the problem is decomposed\ncorrectly. In this paper, we present Program-\nAided Language models ( PAL): a novel approach\nthat uses the LLM to read natural language prob-\nlems and generate programs as the ', 'Given a mathematical reasoning question, Chain-of-thought (left) generates interme-\ndiate reasoning steps of free-form text. In contrast, Program-aided Language models ( PAL, right) generate intermediate\nsteps andPython code. This shifts the role of running the reasoning steps from the language model to the Python interpreter.\nThe final answer is obtained by running the generated reasoning chain. Chain-of-thought reasoning is highlighted in blue;\nPAL steps are highlighted in gray and pink ; the Python interpreter run is highlighted in black and green.\nrequire LLMs, solving and reasoning can be done with the\nexternal solver. This bridges an important gap in chain-of-\nthought-like methods, where reasoning chains can be correct\nbut produce an incorrect answer.\nWe demonstrate the effectiveness of PALacross 13arith-\nmetic and symbolic reasoning tasks. In all these tasks,\nPALusing Codex (Chen et al., 2021a) outperforms much\nlarger models such ']","Program-Aided Language models (PAL) use a computational approach that involves reading natural language problems, generating programs as a solution, and offloading the task of solving and reasoning to an external interpreter, such as a Python interpreter.",1.0,1.0,0.8534362316131592,"PAL leverages large language models (LLMs) to generate programs as intermediate reasoning steps for natural language problems, offloading solution steps to a Python interpreter, ensuring accuracy by focusing LLMs on decomposing problems into executable code.",PAL uses Program-aided Language Models (PAL) to integrate programmatic reasoning within natural language tasks.,1.0,0.6666666666666666,0.863900363445282 +How does PAL handle tasks involving large numbers differently than models relying on chain-of-thought methodologies?,"For tasks with large numbers, PAL demonstrates robust performance by leveraging the Python interpreter for accurate calculations, contrasting with chain-of-thought methodologies where performance drops due to LLMs' computational limitations.","['PAL: Program-aided Language Models\nLuyu Gao* 1Aman Madaan* 1Shuyan Zhou* 1Uri Alon1Pengfei Liu1 2Yiming Yang1Jamie Callan1\nGraham Neubig1 2\n{luyug,amadaan,shuyanzh,ualon,pliu3,yiming,callan,gneubig }@cs.cmu.edu\nAbstract\nLarge language models (LLMs) have recently\ndemonstrated an impressive ability to perform\narithmetic and symbolic reasoning tasks, when\nprovided with a few examples at test time (“few-\nshot prompting”). Much of this success can be\nattributed to prompting methods such as “chain-\nof-thought”, which employ LLMs for both under-\nstanding the problem description by decomposing\nit into steps, as well as solving each step of the\nproblem. While LLMs seem to be adept at this\nsort of step-by-step decomposition, LLMs often\nmake logical and arithmetic mistakes in the solu-\ntion part, even when the problem is decomposed\ncorrectly. In this paper, we present Program-\nAided Language models ( PAL): a novel approach\nthat uses the LLM to read natural language prob-\nlems and generate programs as the ', 'Given a mathematical reasoning question, Chain-of-thought (left) generates interme-\ndiate reasoning steps of free-form text. In contrast, Program-aided Language models ( PAL, right) generate intermediate\nsteps andPython code. This shifts the role of running the reasoning steps from the language model to the Python interpreter.\nThe final answer is obtained by running the generated reasoning chain. Chain-of-thought reasoning is highlighted in blue;\nPAL steps are highlighted in gray and pink ; the Python interpreter run is highlighted in black and green.\nrequire LLMs, solving and reasoning can be done with the\nexternal solver. This bridges an important gap in chain-of-\nthought-like methods, where reasoning chains can be correct\nbut produce an incorrect answer.\nWe demonstrate the effectiveness of PALacross 13arith-\nmetic and symbolic reasoning tasks. In all these tasks,\nPALusing Codex (Chen et al., 2021a) outperforms much\nlarger models such ', 'the question still yields the wrong answer. In contrast, PAL-style reasoning only takes a few lines of code,\nand the execution does produce the correct answer, in this case. These examples indicate that PALcan benefit even an\nostensibly powerful model like ChatGPT.\n1chat.openai.com\x0cPAL: Program-aided Language Models 18\n(a) In C OT style reasoning, the correct intermediate reasoning chain leads to wrong answers.\n(b) In P AL, the execution of the code will produce the correct answer.\nFigure 13: ChatGPT with P AL and C OT to answer a user-posted question\x0cPAL: Program-aided Language Models 19\n(a) Step-by-step reasoning struggle on counting the number of letters in the word “intrigu-\ning” which has ten letters.\n(b) Explicitly instructing ChatGPT to reason step-by-step before generating answer still\nleads to the wrong answer.\n(c) P AL takes a few lines of code and ']","PAL handles tasks involving large numbers differently than models relying on chain-of-thought methodologies by generating intermediate steps and Python code instead of just free-form text. This shift in approach allows the role of running the reasoning steps to be done by the Python interpreter, which can help prevent logical and arithmetic mistakes in the final solution even when the problem is correctly decomposed.",1.0,0.6666666666666666,0.7986947298049927,"For tasks with large numbers, PAL demonstrates robust performance by leveraging the Python interpreter for accurate calculations, contrasting with chain-of-thought methodologies where performance drops due to LLMs' computational limitations.","PAL handles tasks involving large numbers differently than models relying on chain-of-thought methodologies by generating programs as intermediate reasoning steps using the LLM to read natural language problems, but delegating the solution step to a Python interpreter. This approach allows the LLM to focus on decomposing the natural language problem into runnable steps, while the interpreter handles the solving aspect. This synergy between the neural LLM and symbolic interpreter results in more accurate results for tasks involving large numbers compared to models relying on chain-of-thought methodologies.",1.0,1.0,0.8056040406227112 +Describe the process and tools provided by CodeNet for transforming code samples into machine-learning-friendly formats.,"CodeNet offers pre-processing tools for tokenization, simplified parse tree generation, and code graph formation, easing the transformation of code into representations for machine learning models.","['present a large-scale dataset CodeNet ,\nconsisting of over 14 million code samples and about 500 million lines of code\nin 55 different programming languages, which is aimed at teaching AI to code.\nIn addition to its large scale, CodeNet has a rich set of high-quality annotations\nto benchmark and help accelerate research in AI techniques for a variety of crit-\nical coding tasks, including code similarity and classification, code translation\nbetween a large variety of programming languages, and code performance (runtime\nand memory) improvement techniques. Additionally, CodeNet provides sample\ninput and output test sets for 98.5% of the code samples, which can be used as\nan oracle for determining code correctness and potentially guide reinforcement\nlearning for code quality improvements. As a usability feature, we provide several\npre-processing tools in CodeNet to transform source code into representations that\ncan ', 'be readily used as inputs into machine learning models. Results of code classi-\nfication and code similarity experiments using the CodeNet dataset are provided as\na reference. We hope that the scale, diversity and rich, high-quality annotations of\nCodeNet will offer unprecedented research opportunities at the intersection of AI\nand Software Engineering.\n1 Introduction\nThere is a growing trend towards leveraging AI for building tools that support software engineering\nand development [ 1,2]. AI can manipulate and generate computer code, but can it do so with\nhigh quality? Many researchers are fascinated by this possibility, encouraged by AI successes in\nother domains and tantalized by the vision of computers programming computers. Some recent\ndeep-learning models [ 3,4] for code have received a lot of publicity: trained on vast amounts of\ndata and using novel architectures with billions of parameters, ', 'requirements change, make it more secure, and/or comply with regu-\nlations. These tasks are challenging, and it is crucial to provide tool support for developers to be\nmore productive at performing them. It is well known that the latest advancements in deep learning\nalgorithms rely on best-of-breed datasets, such as ImageNet, to create increasingly complex and\npowerful models. In this paper, we present ""CodeNet"", a first-of-its-kind dataset in scale, diversity,\nand quality, to accelerate the algorithmic advances in AI for Code.\nTo promote widespread adoption of CodeNet, we will be launching contests involving use cases based\non the dataset. The first contest [ 6] will focus on diversity, inclusion and spurring interest among\naspiring data scientists. We are partnering with the Global Women in Data Science organization (with\npresence in over 50 countries) founded by Stanford University ']","CodeNet provides several pre-processing tools to transform source code into representations that can be readily used as inputs into machine learning models. This process involves converting the code samples into formats that are compatible with machine learning algorithms, making it easier to analyze and extract patterns from the code. These tools aim to facilitate the use of the dataset for research in AI techniques related to coding tasks such as code classification, code similarity, code translation, and code performance improvement.",1.0,0.6666666666666666,0.8706406354904175,"CodeNet offers pre-processing tools for tokenization, simplified parse tree generation, and code graph formation, easing the transformation of code into representations for machine learning models.",CodeNet provides several pre-processing tools to transform source code into representations that are suitable for machine learning. These tools help in converting the code samples into machine-learning-friendly formats which can be used for various AI coding tasks.,1.0,0.6666666666666666,0.8632083535194397 +What specific challenges do the tasks in SuperGLUE address in natural language processing?,"The tasks challenge models on various fronts, including deep textual understanding, commonsense reasoning, fine-grained word sense disambiguation, and the ability to handle ambiguity and complexity in language, reflecting a broad spectrum of NLP challenges.","['in GLUE. The remaining tasks\nwere identified from those submitted to an open call for task proposals and were selected based on\ndifficulty for current NLP approaches.\nMore diverse task formats: The task formats in GLUE are limited to sentence- and sentence-pair\nclassification. We expand the set of task formats in SuperGLUE to include coreference resolution\nand question answering (QA).\nComprehensive human baselines: We include human performance estimates for all benchmark\ntasks, which verify that substantial headroom exists between a strong BERT-based baseline and\nhuman performance.\nImproved code support: SuperGLUE is distributed with a new, modular toolkit for work on\npretraining, multi-task learning, and transfer learning in NLP, built around standard tools including\nPyTorch (Paszke et al., 2017) and AllenNLP (Gardner et al., 2017).\nRefined usage rules: The conditions for inclusion on the SuperGLUE leaderboard have been\nrevamped to ensure fair ', 'Conclusion\nWe present SuperGLUE, a new benchmark for evaluating general-purpose language understanding\nsystems. SuperGLUE updates the GLUE benchmark by identifying a new set of challenging NLU\ntasks, as measured by the difference between human and machine baselines. The set of eight tasks in\nour benchmark emphasizes diverse task formats and low-data training data tasks, with nearly half the\ntasks having fewer than 1k examples and all but one of the tasks having fewer than 10k examples.\nWe evaluate BERT-based baselines and find that they still lag behind humans by nearly 20 points.\nGiven the difficulty of SuperGLUE for BERT, we expect that further progress in multi-task, transfer,\nand unsupervised/self-supervised learning techniques will be necessary to approach human-level per-\nformance on the benchmark. Overall, we argue that SuperGLUE offers a rich and challenging testbed\nfor work developing new general-purpose ', 'high-level goals, the original version of\nthe benchmark is no longer a suitable metric for quantifying such progress.\nIn response, we introduce SuperGLUE, a new benchmark designed to pose a more rigorous test of\nlanguage understanding. SuperGLUE has the same high-level motivation as GLUE: to provide a\nsimple, hard-to-game measure of progress toward general-purpose language understanding technolo-\ngies for English. We anticipate that significant progress on SuperGLUE should require substantive\ninnovations in a number of core areas of machine learning, including sample-efficient, transfer,\nmultitask, and unsupervised or self-supervised learning.\nSuperGLUE follows the basic design of GLUE: It consists of a public leaderboard built around\neight language understanding tasks, drawing on existing data, accompanied by a single-number\nperformance metric, and an analysis toolkit. However, it improves upon GLUE in several ways:\nMore challenging tasks: SuperGLUE retains the two hardest tasks ']","The tasks in SuperGLUE address challenges in natural language processing related to diverse task formats, low-data training data tasks, and the need for advancements in multi-task, transfer, and unsupervised/self-supervised learning techniques to approach human-level performance in language understanding.",1.0,1.0,0.47197631001472473,"The tasks challenge models on various fronts, including deep textual understanding, commonsense reasoning, fine-grained word sense disambiguation, and the ability to handle ambiguity and complexity in language, reflecting a broad spectrum of NLP challenges.","The tasks in SuperGLUE address challenges in natural language processing related to diverse task formats, low-data training data tasks, and the need for further progress in multi-task, transfer, and unsupervised/self-supervised learning techniques to approach human-level performance.",1.0,0.3333333333333333,0.4540385603904724 +"How does SuperGLUE's scoring system work, and what does it aim to achieve?"," SuperGLUE uses a single-number metric averaging task scores to simplify comparison across models, aiming to provide a clear, comprehensive measure of progress on a wide range of language understanding tasks.","['high-level goals, the original version of\nthe benchmark is no longer a suitable metric for quantifying such progress.\nIn response, we introduce SuperGLUE, a new benchmark designed to pose a more rigorous test of\nlanguage understanding. SuperGLUE has the same high-level motivation as GLUE: to provide a\nsimple, hard-to-game measure of progress toward general-purpose language understanding technolo-\ngies for English. We anticipate that significant progress on SuperGLUE should require substantive\ninnovations in a number of core areas of machine learning, including sample-efficient, transfer,\nmultitask, and unsupervised or self-supervised learning.\nSuperGLUE follows the basic design of GLUE: It consists of a public leaderboard built around\neight language understanding tasks, drawing on existing data, accompanied by a single-number\nperformance metric, and an analysis toolkit. However, it improves upon GLUE in several ways:\nMore challenging tasks: SuperGLUE retains the two hardest tasks ', 'Conclusion\nWe present SuperGLUE, a new benchmark for evaluating general-purpose language understanding\nsystems. SuperGLUE updates the GLUE benchmark by identifying a new set of challenging NLU\ntasks, as measured by the difference between human and machine baselines. The set of eight tasks in\nour benchmark emphasizes diverse task formats and low-data training data tasks, with nearly half the\ntasks having fewer than 1k examples and all but one of the tasks having fewer than 10k examples.\nWe evaluate BERT-based baselines and find that they still lag behind humans by nearly 20 points.\nGiven the difficulty of SuperGLUE for BERT, we expect that further progress in multi-task, transfer,\nand unsupervised/self-supervised learning techniques will be necessary to approach human-level per-\nformance on the benchmark. Overall, we argue that SuperGLUE offers a rich and challenging testbed\nfor work developing new general-purpose ', 'in GLUE. The remaining tasks\nwere identified from those submitted to an open call for task proposals and were selected based on\ndifficulty for current NLP approaches.\nMore diverse task formats: The task formats in GLUE are limited to sentence- and sentence-pair\nclassification. We expand the set of task formats in SuperGLUE to include coreference resolution\nand question answering (QA).\nComprehensive human baselines: We include human performance estimates for all benchmark\ntasks, which verify that substantial headroom exists between a strong BERT-based baseline and\nhuman performance.\nImproved code support: SuperGLUE is distributed with a new, modular toolkit for work on\npretraining, multi-task learning, and transfer learning in NLP, built around standard tools including\nPyTorch (Paszke et al., 2017) and AllenNLP (Gardner et al., 2017).\nRefined usage rules: The conditions for inclusion on the SuperGLUE leaderboard have been\nrevamped to ensure fair ']","SuperGLUE's scoring system consists of a single-number performance metric that measures progress towards general-purpose language understanding technologies for English. The aim of SuperGLUE is to provide a more rigorous test of language understanding compared to the original benchmark, GLUE, by including more challenging tasks, diverse task formats, comprehensive human baselines, improved code support, and refined usage rules. The scoring system aims to evaluate general-purpose language understanding systems by identifying challenging NLU tasks and measuring the performance difference between human and machine baselines.",1.0,1.0,0.9156036376953125," SuperGLUE uses a single-number metric averaging task scores to simplify comparison across models, aiming to provide a clear, comprehensive measure of progress on a wide range of language understanding tasks.","SuperGLUE's scoring system consists of a single-number performance metric that measures progress toward general-purpose language understanding technologies for English. It aims to pose a more rigorous test of language understanding compared to the original GLUE benchmark, requiring substantive innovations in core areas of machine learning such as sample-efficient, transfer, multitask, and unsupervised or self-supervised learning. The goal of SuperGLUE is to provide a challenging testbed for the development of new general-purpose language understanding systems.",1.0,0.6666666666666666,0.9165962934494019 +"What practical meta-task is Task2Vec particularly designed to optimize, and how does it achieve this?",Task2Vec is designed for selecting the most suitable pre-trained feature extractor for a new task by learning a metric on task embeddings to predict feature extractor performance.,"['TASK2VEC: Task Embedding for Meta-Learning\nAlessandro Achille\nUCLA and AWS\nachille@cs.ucla.eduMichael Lam\nAWS\nmichlam@amazon.comRahul Tewari\nAWS\ntewarir@amazon.comAvinash Ravichandran\nAWS\nravinash@amazon.com\nSubhransu Maji\nUMass and AWS\nsmmaji@amazon.comCharless Fowlkes\nUCI and AWS\nfowlkec@amazon.comStefano Soatto\nUCLA and AWS\nsoattos@amazon.comPietro Perona\nCaltech and AWS\nperonapp@amazon.com\nAbstract\nWe introduce a method to provide vectorial represen-\ntations of visual classification tasks which can be used\nto reason about the nature of those tasks and their re-\nlations. Given a dataset with ground-truth labels and a\nloss function defined over those labels, we process images\nthrough a “probe network” and compute an embedding\nbased on estimates of the Fisher information matrix asso-\nciated with the probe network parameters. This provides a\nfixed-dimensional embedding of the task that is independent\nof details such as the number of classes and does not require\nany understanding of the class label semantics. We demon-\nstrate that this embedding is capable of predicting task sim-\nilarities that match our intuition about semantic ', 'to the task.\n3. Similarity Measures on the Space of Tasks\nWhat metric should be used on the space of tasks? This\ndepends critically on the meta-task we are considering. As a\nmotivation, we concentrate on the meta-task of selecting the\npre-trained feature extractor from a set in order to obtain the\nbest performance on a new training task. There are several\nnatural metrics that may be considered for this meta-task.\nIn this work, we mainly consider:\nTaxonomic distance For some tasks, there is a natural no-\ntion of semantic similarity, for instance defined by sets of\ncategories organized in a taxonomic hierarchy where each\ntask is classification inside a subtree of the hierarchy ( e.g.,\nwe may say that classifying breeds of dogs is closer to clas-sification of cats than it is to classification of species of\nplants). In this setting, ', 'that using\nTASK 2VEC to select an expert from a collection can sen-\nsibly improve test performance while adding only a small\noverhead to the training process.\x0cMeta-learning on the space of tasks is an important step\ntoward general artificial intelligence. In this work, we in-\ntroduce a way of dealing with thousands of tasks, enough to\nenable reconstruct a topology on the task space, and to test\nmeta-learning solutions. The current experiments highlight\nthe usefulness of our methods. Even so, our collection does\nnot capture the full complexity and variety of tasks that one\nmay encounter in real-world situations. Future work should\nfurther test effectiveness, robustness, and limitations of the\nembedding on larger and more diverse collections.\nReferences\n[1] iMaterialist Challenge (Fashion) at FGVC5 workshop,\nCVPR 2018. https://www.kaggle.com/c/\nimaterialist-challenge-fashion-2018 .\n5\n[2] S. M. Abdulrahman, P. Brazdil, J. N. van Rijn, and J. Van-\nschoren. Speeding up ']","Task2Vec is particularly designed to optimize the meta-task of selecting the pre-trained feature extractor from a set in order to obtain the best performance on a new training task. It achieves this by providing vectorial representations of visual classification tasks, independent of details such as the number of classes and without requiring any understanding of the class label semantics. This allows for the selection of an expert from a collection using Task2Vec to sensibly improve test performance while adding only a small overhead to the training process.",1.0,0.3333333333333333,0.7791461944580078,Task2Vec is designed for selecting the most suitable pre-trained feature extractor for a new task by learning a metric on task embeddings to predict feature extractor performance.,"Task2Vec is particularly designed to optimize the practical meta-task of selecting an expert feature extractor in order to solve a new task, especially when little training data is present. It achieves this by representing a task as a fixed-dimensional vector, where the norm correlates with the test error obtained on the task, and the cosine distance between embeddings correlates with natural distances between tasks. This allows for the selection of an expert feature extractor based on the task representation, improving performance when training data is limited.",1.0,0.6666666666666666,0.8712323904037476 +In what way does Task2Vec's asymmetric distance measure contribute to task similarity assessment and model selection?,"The asymmetric distance accounts for task complexity and transferability, aiding in selecting feature extractors by evaluating their proximity to a task in the embedding space.","['the dis-\ntance from the trivial embedding. This suggests the follow-\ning asymmetric score, again improperly called a “distance”\ndespite being asymmetric and possibly negative:\ndasym(ta→tb) =dsym(ta,tb)−αdsym(ta,t0),\nwheret0is the trivial embedding, and αis an hyperparam-\neter. This has the effect of bring more complex models\ncloser. The hyper-parameter αcan be selected based on\nthe meta-task. In our experiments, we found that the best\nvalue ofα(α= 0.15when using a ResNet-34 pretrained\non ImageNet as the probe network) is robust to the choice\nof meta-tasks.\n4.MODEL 2VEC: task/model co-embedding\nBy construction, the TASK 2VEC distance ignores details\nof the model and only relies on the task. If we know what\ntask a model was trained on, we can represent the model by\nthe embedding of that task. However, in general we may\nnot have such information ( e.g., black-box models or hand-\nconstructed feature extractors). We may also ', 'the division\nis element-wise. This is a symmetric distance which we ex-\npect to capture semantic similarity between two tasks. For\nexample, we show in Fig. 2 that it correlates well with the\ntaxonomical distance between species on iNaturalist.\nOn the other hand, precisely for this reason, this distance\nis ill-suited for tasks such as model selection, where the (in-\ntrinsically asymmetric) transfer distance is more relevant.\nAsymmetric TASK 2VEC distance In a first approxima-\ntion, that does not consider either the model or the training\nprocedure used, positive transfer between two tasks depends\nboth on the similarity between two tasks and on the com-\nplexity of the first. Indeed, pre-training on a general but\ncomplex task such as ImageNet often yields a better result\nthan fine-tuning from a close dataset of comparable com-\nplexity. In our case, complexity can be measured as ', '(with the selected procedure) on task tb.\n3.1. Symmetric and asymmetric TASK 2VECmetrics\nBy construction, the Fisher embedding on which\nTASK 2VEC is based captures fundamental information\x0cabout the structure of the task. We may therefore expect\nthat the distance between two embeddings correlate posi-\ntively with natural metrics on the space of tasks. However,\nthere are two problems in using the Euclidean distance be-\ntween embeddings: the parameters of the network have dif-\nferent scales, and the norm of the embedding is affected by\ncomplexity of the task and the number of samples used to\ncompute the embedding.\nSymmetric TASK 2VEC distance To make the distance\ncomputation robust, we propose to use the cosine distance\nbetween normalized embeddings:\ndsym(Fa,Fb) =dcos(Fa\nFa+Fb,Fb\nFa+Fb)\n,\nwheredcosis the cosine distance, FaandFbare the two\ntask embeddings ( i.e., the diagonal of the Fisher Informa-\ntion computed on the same probe network), and ']","Task2Vec's asymmetric distance measure contributes to task similarity assessment and model selection by considering the complexity of the tasks involved. It takes into account the complexity of the first task and the similarity between two tasks, which can impact positive transfer between tasks. This helps in determining how well a model pre-trained on a general but complex task, such as ImageNet, can perform when fine-tuned on a different task. This approach may yield better results compared to fine-tuning from a close dataset of comparable complexity. Ultimately, Task2Vec's asymmetric distance measure helps in evaluating which tasks are more suitable for model selection and transfer learning based on their complexity and similarity.",1.0,0.6666666666666666,0.7710387110710144,"The asymmetric distance accounts for task complexity and transferability, aiding in selecting feature extractors by evaluating their proximity to a task in the embedding space.","Task2Vec's asymmetric distance measure contributes to task similarity assessment and model selection by considering both the similarity between two tasks and the complexity of the first task. This measure takes into account the effect of pre-training on a general but complex task, such as ImageNet, which often yields better results than fine-tuning from a dataset of comparable complexity. By incorporating both task similarity and task complexity, Task2Vec's asymmetric distance measure helps in identifying positive transfer between tasks and assists in model selection by highlighting the relevance of transfer distance over semantic similarity in certain scenarios.",1.0,0.6666666666666666,0.7947289347648621 +Describe the computational approach to obtaining Task2Vec embeddings using a probe network.,"Task2Vec embeddings are obtained by processing images through a pre-trained probe network, retraining the classifier layer for the specific task, and computing the FIM of the network filter parameters.","['of the task itself. Specifically, given\na task defined by a dataset D={(xi,yi)}N\ni=1of labeled\nsamples, we feed the data through a pre-trained reference\nconvolutional neural network which we call “ probe net-\nwork ”, and compute the diagonal Fisher Information Ma-\ntrix (FIM) of the network filter parameters to capture the\nstructure of the task (Sect. 2). Since the architecture and\nweights of the probe network are fixed, the FIM provides a\nfixed-dimensional representation of the task. We show this\nembedding encodes the “difficulty” of the task, character-\nistics of the input domain, and which features of the probe\nnetwork are useful to solve it (Sect. 2.1).\nOur task embedding can be used to reason about the\nspace of tasks and solve meta-tasks. As a motivating exam-\nple, we study the problem of selecting the best pre-trained\nfeature extractor to solve a new ', 'on each task.\nA similar approach has been taken in computer vision for\nselecting a detector for a new category out of a large library\nof detectors [26, 40, 38].\n7. Discussion\nTASK 2VEC is an efficient way to represent a task, or the\ncorresponding dataset, as a fixed dimensional vector. It has\nseveral appealing properties, in particular its norm corre-\nlates with the test error obtained on the task, and the co-\nsine distance between embeddings correlates with natural\ndistances between tasks, when available, such as the taxo-\nnomic distance for species classification, and the fine-tuning\ndistance for transfer learning. Having a representation of\ntasks paves the way for a wide variety of meta-learning\ntasks. In this work, we focused on selection of an expert\nfeature extractor in order to solve a new task, especially\nwhen little training data is present, and showed ', 'conditions: Performance of\nmodel selection on a subset of 4 tasks as a function of\nthe number of samples available to train relative to opti-\nmal model selection (dashed orange). Training a classifier\non the feature extractor selected by TASK 2VEC (solid red) is\nalways better than using a generic ImageNet feature extrac-\ntor (dashed red). The same holds when allowed to fine-tune\nthe feature extractor (blue curves). Also notice that in the\nlow-data regime fine-tuning the ImageNet feature extractor\nis more expensive and has a worse performance than accu-\nrately selecting a good fixed feature extractor.\nProbe network Top-10 All\nChance +13.95% +59.52%\nVGG-13 +4.82% +38.03%\nDenseNet-121 +0.30% +10.63%\nResNet-13 +0.00% +9.97%\nTable 1: Choice of probe network. Mean relative error\nincrease over the ground-truth optimum on the iNat+CUB\nmeta-task for different choices of the probe-network. We\nalso report the performance on the top 10 tasks ']","The computational approach to obtaining Task2Vec embeddings using a probe network involves feeding data through a pre-trained reference convolutional neural network (probe network) and computing the diagonal Fisher Information Matrix (FIM) of the network filter parameters. The FIM provides a fixed-dimensional representation of the task, encoding the difficulty of the task, characteristics of the input domain, and useful features of the probe network to solve it. This embedding can be used to reason about the space of tasks and solve meta-tasks, such as selecting the best pre-trained feature extractor for a new task. The norm of the Task2Vec embedding correlates with the test error on the task, and the cosine distance between embeddings correlates with natural distances between tasks. Overall, Task2Vec provides an efficient way to represent a task as a fixed-dimensional vector, paving the way for various meta-learning tasks.",1.0,1.0,0.7432625889778137,"Task2Vec embeddings are obtained by processing images through a pre-trained probe network, retraining the classifier layer for the specific task, and computing the FIM of the network filter parameters.","The computational approach to obtaining Task2Vec embeddings using a probe network involves feeding the data through a pre-trained reference convolutional neural network, which is referred to as the ""probe network."" The diagonal Fisher Information Matrix (FIM) of the network filter parameters is computed to capture the structure of the task. This FIM provides a fixed-dimensional representation of the task, encoding the ""difficulty"" of the task, characteristics of the input domain, and which features of the probe network are useful to solve it. The task embedding can then be used to reason about the space of tasks and solve meta-tasks.",1.0,1.0,0.690927267074585 +What limitations does Task2Vec face regarding its ability to capture the full complexity of tasks in real-world applications?,"While effective, Task2Vec's embeddings may not fully capture the entire spectrum of task complexity and diversity found in broader real-world applications, indicating room for future enhancements.","['are interested in has\nrelatively few samples. In Fig. 4 we show how the perfor-\nmance of TASK 2VEC varies on a model selection task as the\nnumber of samples varies. At all sample sizes TASK 2VEC is\nclose to the optimum, and improves over selecting a generic\nexpert (ImageNet), both when fine-tuning and when train-\ning only a classifier. We observe that the best choice of ex-\nperts is not affected by the dataset size, and that even with\nfew examples TASK 2VEC is able to find the optimal experts.\nChoice of probe network In Table 1 we show that\nDenseNet [15] and ResNet architectures [11] perform sig-\nnificantly better when used as probe networks to compute\ntheTASK 2VEC embedding than a VGG [32] architecture.\n6. Related Work\nTask and Domain embedding. Tasks distinguished by\ntheir domain can be understood simply in ', 'on each task.\nA similar approach has been taken in computer vision for\nselecting a detector for a new category out of a large library\nof detectors [26, 40, 38].\n7. Discussion\nTASK 2VEC is an efficient way to represent a task, or the\ncorresponding dataset, as a fixed dimensional vector. It has\nseveral appealing properties, in particular its norm corre-\nlates with the test error obtained on the task, and the co-\nsine distance between embeddings correlates with natural\ndistances between tasks, when available, such as the taxo-\nnomic distance for species classification, and the fine-tuning\ndistance for transfer learning. Having a representation of\ntasks paves the way for a wide variety of meta-learning\ntasks. In this work, we focused on selection of an expert\nfeature extractor in order to solve a new task, especially\nwhen little training data is present, and showed ', 'requires understanding\nindividual tokens in context, complete sentences, inter-sentence relations, and entire paragraphs.\nLicense: Task data must be available under licences that allow use and redistribution for research\npurposes.\nTo identify possible tasks for SuperGLUE, we disseminated a public call for task proposals to the\nNLP community, and received approximately 30 proposals. We filtered these proposals according\nto our criteria. Many proposals were not suitable due to licensing issues, complex formats, and\ninsufficient headroom; we provide examples of such tasks in Appendix D. For each of the remaining\ntasks, we ran a BERT-based baseline and a human baseline, and filtered out tasks which were either\ntoo challenging for humans without extensive training or too easy for our machine baselines.\n3.2 Selected Tasks\nFollowing this process, we arrived at eight tasks to use in SuperGLUE. See Tables 1 and 2 ']","Task2Vec may face limitations in capturing the full complexity of tasks in real-world applications due to the fact that it requires understanding individual tokens in context, complete sentences, inter-sentence relations, and entire paragraphs. Additionally, the availability of task data under licenses that allow use and redistribution for research purposes may also be a limitation.",1.0,1.0,0.791001558303833,"While effective, Task2Vec's embeddings may not fully capture the entire spectrum of task complexity and diversity found in broader real-world applications, indicating room for future enhancements.","Task2Vec faces limitations in capturing the full complexity of tasks in real-world applications because it depends solely on the task and ignores interactions with the model, which may play an important role. This can be problematic as interactions with the model can be crucial in addressing the complexity of tasks.",1.0,1.0,0.7995020747184753 +"How does GLM-130B manage to achieve INT4 weight quantization without post-training, and what are the benefits?","By leveraging the GLM architecture's unique scaling law, GLM-130B achieves INT4 weight quantization, reducing memory requirements for inference with minimal performance degradation.","['into INT4 precision without post training while OPT\nand BLOOM can only reach INT8. Due to a unique property of the GLM architecture, GLM-130B’s\nINT4 quantization introduces negligible performance degradation, e.g., -0.74% on LAMBADA and\neven +0.05% on MMLU, making it still better than the uncompressed GPT-3. This enables GLM-\n130B’s fast inference with performance guarantee on a server of 4 ×RTX 3090 (24G) or 8 ×RTX\n2080 Ti (11G), the most affordable GPU required for using 100B-scale LLMs to date.\n2\x0cPublished as a conference paper at ICLR 2023\nGradient Norm(a) More than 30 failed preliminary trials at 100B-scale (b) Final decisive trials: Sandwich-LN v.s. DeepNorm012345678910111213\n0 500 1k 1.5k 2k2.5k 3k\nFigure 3: Trials on different LayerNorms for GLM-130B training. It turns out that DeepNorm is the\nmost stable one, as it has small gradient norm and ', '0.1 can solve most numerical instability problems.\nLesson 7 (GLM’s INT4 Quantization Scaling Law). GLM has a unique INT4 weight quan-\ntization scaling law unobserved in GPT-style BLOOM.\nLesson 8 (Future Direction). To create powerful LLMs, the main focus can be on 1) more and\nbetter data, 2) better architectures and pre-training objectives, and 3) more sufficient training.\n30\x0cPublished as a conference paper at ICLR 2023\nFigure 15: Weight value distribution of linear layers in GLM-130B (in orange, attn-dense ,\nattn-qkv ,glu-w1 ,glu-w2 ) and BLOOM-176B (in blue, attn-dense ,attn-qkv ,\nffn-w1 ,ffn-w2 )’s first 28 transformer layers. Generally for GLM-130B it is attn-dense\nandw2that may present narrow value distributions. attn-qkv andw1may also be a reason for\nenabling INT4 quantization in middle layers of GLM-130B.\n31\x0cPublished as a conference paper at ICLR 2023\nC D ATASET AND EVALUATION DETAILS\nC.1 M ', 'INT4 version\nhelps additionally save half of the required GPU memory to 70GB, thus allowing GLM-130B infer-\nence on 4 ×RTX 3090 Ti (24G) or 8 ×RTX 2080 Ti (11G). Performance-wise, Table 2 left indicates\nthat without post-training at all, the INT4-version GLM-130B experiences almost no performance\ndegradation, thus maintaining the performance advantages over GPT-3 on common benchmarks.\nGLM’s INT4 Weight Quantization Scaling Law. We examine the underlying mechanism of this\nunique INT4 weight quantization scaling law exhibited in Figure 5 right. We plot the weight value\ndistributions in Figure 5 left, which turns out to directly impact the quantization quality. Specifically,\na wider-distributed linear layer needs to be quantized with larger bins, leading to more precision loss.\nThus the wide-distributed attn-dense andw2matrices explain the INT4 quantization failure for\nGPT-style BLOOM. Conversely, GLMs tend to have much narrower ']","GLM-130B manages to achieve INT4 weight quantization without post-training due to a unique property of the GLM architecture. The benefits of this include negligible performance degradation, with only -0.74% on LAMBADA and even +0.05% on MMLU, making it still better than the uncompressed GPT-3. Additionally, the INT4 version helps save half of the required GPU memory, allowing for faster inference on more affordable GPUs.",1.0,0.6666666666666666,0.8308624625205994,"By leveraging the GLM architecture's unique scaling law, GLM-130B achieves INT4 weight quantization, reducing memory requirements for inference with minimal performance degradation.","GLM-130B manages to achieve INT4 weight quantization without post-training due to its unique architecture, which allows for negligible performance degradation. The benefits include saving half of the required GPU memory, enabling fast inference with performance guarantee on affordable GPUs, and maintaining performance advantages over GPT-3 on common benchmarks.",1.0,0.6666666666666666,0.8842052221298218 +What contributions does GLM-130B offer to the open-source community and AI research field?," GLM-130B is fully open-sourced, including model weights, code, and training logs, promoting transparency, reproducibility, and inclusivity in large-scale language model research and applications.","['studies of LLMs’ theory, capacity, and flaws. Researchers can also modify the\nmodel architecture and weights, to validate the proposed algorithms to improve LLMs Zhu et al.\n(2020); Cao et al. (2021); Hase et al. (2021); Mitchell et al. (2022).\nWith INT4 quantization, GLM-130B can perform inference on popularized GPUs such as 4 ×RTX\n3090 or 8 ×RTX 2080 Ti, which can be easily accessed from cloud service. As a result, researchers\nwho cannot afford powerful data-center GPU servers like DGX-A100 can also utilize GLM-130B.\nG.2 I MPACT ON INDIVIDUAL DEVELOPERS AND SMALL COMPANIES\nCurrently, individual developers and small companies who want to integrate LLMs into their busi-\nness can only choose paid inference APIs. The increased cost can hinder their attempts. Instead,\nGLM-130B can be deployed on popularized hardware that they own or can access via ', 'into INT4 precision without post training while OPT\nand BLOOM can only reach INT8. Due to a unique property of the GLM architecture, GLM-130B’s\nINT4 quantization introduces negligible performance degradation, e.g., -0.74% on LAMBADA and\neven +0.05% on MMLU, making it still better than the uncompressed GPT-3. This enables GLM-\n130B’s fast inference with performance guarantee on a server of 4 ×RTX 3090 (24G) or 8 ×RTX\n2080 Ti (11G), the most affordable GPU required for using 100B-scale LLMs to date.\n2\x0cPublished as a conference paper at ICLR 2023\nGradient Norm(a) More than 30 failed preliminary trials at 100B-scale (b) Final decisive trials: Sandwich-LN v.s. DeepNorm012345678910111213\n0 500 1k 1.5k 2k2.5k 3k\nFigure 3: Trials on different LayerNorms for GLM-130B training. It turns out that DeepNorm is the\nmost stable one, as it has small gradient norm and ', 'training 100B-scale LLMs, we present not only the\nsuccessful part for training GLM-130B but also many of the failed options and lessons learned.\nParticularly, the training stability is the decisive factor in the success of training models of such a\nscale. Different from practices such as manually adjusting learning rates in OPT-175B and using\nembedding norm in the sacrifice of performance in BLOOM-176B, we experiment with various op-\ntions and find the strategy of embedding gradient shrink can significantly stabilize the training of\nGLM-130B.\nSpecifically, GLM-130B is a bilingual (English and Chinese) bidirectional dense model with 130 bil-\nlion parameters, pre-trained over 400 billion tokens on a cluster of 96 NVIDIA DGX-A100 (8 ×40G)\nGPU nodes between May 6 and July 3, 2022. Instead of using the GPT-style architecture, we adopt\nthe General Language Model (GLM) algorithm (Du ']","GLM-130B offers the contribution of being able to perform inference on popularized GPUs, making it accessible to researchers who cannot afford powerful data-center GPU servers. Additionally, GLM-130B's INT4 quantization introduces negligible performance degradation, making it still better than the uncompressed GPT-3, enabling fast inference with performance guarantee on affordable GPUs. Moreover, GLM-130B's training stability, experimented with various options such as embedding gradient shrink, is highlighted as crucial for the success of training models of such a large scale. Overall, GLM-130B's architecture, training methods, and accessibility contribute to advancements in the open-source community and AI research field.",1.0,0.6666666666666666,0.7745413780212402," GLM-130B is fully open-sourced, including model weights, code, and training logs, promoting transparency, reproducibility, and inclusivity in large-scale language model research and applications.","GLM-130B offers contributions to the open-source community and AI research field by providing insights into LLM architectures, pre-training objectives, training stability and efficiency, and affordable inference. It contributes to high quality language performance on tasks and ethical results on bias and toxicity benchmarks, making it valuable for researchers testing and understanding the properties of LLMs.",1.0,1.0,0.7849516868591309 +What advancements does Megatron-LM contribute to the handling of layer normalization in BERT-like models to increase performance?," It rearranges the order of layer normalization and residual connections, allowing for stable training and enhanced accuracies as model size increases.","['/ 87.4 86.5 (89.0 / 85.5)\nXLNet (Yang et al., 2019) 2 90.8 / 90.8 92.3 95.1 / 89.7 90.6 / 87.9 85.4 (88.6 / 84.0)\nMegatron-336M 1 89.7 / 90.0 92.3 94.2 / 88.0 88.1 / 84.8 83.0 (86.9 / 81.5)\nMegatron-1.3B 1 90.9 / 91.0 92.6 94.9 / 89.1 90.2 / 87.1 87.3 (90.4 / 86.1)\nMegatron-3.9B 1 91.4 / 91.4 92.7 95.5 / 90.0 91.2 / 88.5 89.5 (91.8 / 88.6)\nALBERT ensemble (Lan et al., 2019) 95.5 / 90.1 91.4 / 88.9 89.4 (91.2 / 88.6)\nMegatron-3.9B ensemble 95.8 / 90.5 91.7 / 89.0 90.9 (93.1 / 90.0)\nFigure 7. Training loss for BERT model using the original architec-\nture (a) and the rearranged architecture (b). Left figure shows the\ntraining loss for 336M and 752M BERT model. While the original\narchitecture performs well ', 'the same general architec-\nture as BERT. The token-type embeddings and the pooler are removed while the number of layers\nis reduced by a factor of 2. Most of the operations used in the Transformer architecture ( linear\nlayer andlayer normalisation ) are highly optimized in modern linear algebra frameworks and our\ninvestigations showed that variations on the last dimension of the tensor (hidden size dimension) have\na smaller impact on computation efficiency (for a fixed parameters budget) than variations on other\nfactors like the number of layers. Thus we focus on reducing the number of layers.\nStudent initialization In addition to the previously described optimization and architectural choices,\nan important element in our training procedure is to find the right initialization for the sub-network to\nconverge. Taking advantage of the common dimensionality between teacher and student ', 'Megatron-LM: Training Multi-Billion Parameter Language Models Using\nModel Parallelism\nMohammad Shoeybi1 2Mostofa Patwary1 2Raul Puri1 2Patrick LeGresley2Jared Casper2\nBryan Catanzaro2\nAbstract\nRecent work in language modeling demonstrates\nthat training large transformer models advances\nthe state of the art in Natural Language Processing\napplications. However, very large models can be\nquite difficult to train due to memory constraints.\nIn this work, we present our techniques for train-\ning very large transformer models and implement\na simple, efficient intra-layer model parallel ap-\nproach that enables training transformer models\nwith billions of parameters. Our approach does\nnot require a new compiler or library changes, is\northogonal and complimentary to pipeline model\nparallelism, and can be fully implemented with\nthe insertion of a few communication operations\nin native PyTorch. We illustrate this approach\nby converging transformer based models up to\n8.3 billion parameters using 512 GPUs. We sus-\ntain 15.1 PetaFLOPs across the entire ']","Megatron-LM contributes to handling layer normalization in BERT-like models by removing token-type embeddings and the pooler while reducing the number of layers by a factor of 2. Additionally, Megatron-LM focuses on reducing the number of layers, as variations on the last dimension of the tensor (hidden size dimension) have a smaller impact on computation efficiency than variations on other factors like the number of layers.",1.0,1.0,0.3772377073764801," It rearranges the order of layer normalization and residual connections, allowing for stable training and enhanced accuracies as model size increases.","Megatron-LM contributes advancements in handling layer normalization in BERT-like models by focusing on reducing the number of layers, which has been shown to have a smaller impact on computation efficiency compared to other factors like the number of layers. This approach allows for training very large transformer models with billions of parameters and can lead to improvements in performance and efficiency.",0.5,1.0,0.4710771143436432 +What specific properties of Task2Vec embeddings allow for effective reasoning about task space?,"Task2Vec embeddings are invariant to label space, encode task difficulty, represent task-weighted domain characteristics, and highlight features important for the task.","['on each task.\nA similar approach has been taken in computer vision for\nselecting a detector for a new category out of a large library\nof detectors [26, 40, 38].\n7. Discussion\nTASK 2VEC is an efficient way to represent a task, or the\ncorresponding dataset, as a fixed dimensional vector. It has\nseveral appealing properties, in particular its norm corre-\nlates with the test error obtained on the task, and the co-\nsine distance between embeddings correlates with natural\ndistances between tasks, when available, such as the taxo-\nnomic distance for species classification, and the fine-tuning\ndistance for transfer learning. Having a representation of\ntasks paves the way for a wide variety of meta-learning\ntasks. In this work, we focused on selection of an expert\nfeature extractor in order to solve a new task, especially\nwhen little training data is present, and showed ', 'embedding (distance from origin) and test error obtained on the task.\nnear the decision boundary since p(1−p)is maximized at\np= 1/2. Compare this to the covariance matrix of the data,\nC0, to which all data points contribute equally. Instead, in\nTASK 2VEC information on the domain is based on data near\nthe decision boundary (task-weighted domain embedding).\nEncoding useful features for the task: The FIM de-\npends on the curvature of the loss function with the diagonal\nentries capturing the sensitivity of the loss to model param-\neters. Specifically, in the two-layer model one can see that,\nif a given feature is uncorrelated with y, the correspond-\ning blocks of Fare zero. In contrast, a domain embedding\nbased on feature activations of the probe network (e.g., C1)\nonly reflects which features vary over the dataset without\nindication of whether they are relevant ', 'varying k).\nEncoding task difficulty: As we can see from the ex-\npressions above, if the fit model is very confident in its pre-\ndictions, E[(y−p)2]goes to zero. Hence, the norm of the\ntask embedding∥F∥⋆scales with the difficulty of the task\nfor a given feature extractor φ. Figure 2 (Right) shows that\neven for more complex models trained on real data, the FIM\nnorm correlates with test performance.\nEncoding task domain: Data points xthat are classi-\nfied with high confidence, i.e., pis close to 0 or 1, will\nhave a lower contribution to the task embedding than points\x0c0 25 50 75 100 125\nSize k of neighborhood1.01.52.02.53.0Avg. top-k tax. distanceTask2Vec distance\nTax. distance\n0.4 0.6 0.8\nL1 norm of task embedding1e80%10%20%30%40%50%60%Test error on task (%)\nFigure 2: Distance between species classification tasks. (Left) Task similarity matrix ordered by hierarchical clustering.\nNote that the dendrogram ']","The specific properties of Task2Vec embeddings that allow for effective reasoning about task space include: +- The norm of the Task2Vec embedding correlates with the test error obtained on the task. +- The cosine distance between embeddings correlates with natural distances between tasks, such as taxonomic distance for species classification and fine-tuning distance for transfer learning. +- Task2Vec embeddings encode useful features for the task by capturing the sensitivity of the loss function to model parameters. +- The norm of the Task2Vec embedding scales with the difficulty of the task for a given feature extractor. +- Task2Vec embeddings encode task domain by giving lower contribution to points classified with high confidence.",1.0,0.3333333333333333,0.7818197011947632,"Task2Vec embeddings are invariant to label space, encode task difficulty, represent task-weighted domain characteristics, and highlight features important for the task.","The specific properties of Task2Vec embeddings that allow for effective reasoning about task space include the fact that the norm of the embedding correlates with the test error obtained on the task, the cosine distance between embeddings correlates with natural distances between tasks, and the embedding encodes the difficulty of the task, characteristics of the input domain, and which features of the probe network are useful to solve it.",1.0,1.0,0.7584042549133301 +What distinctive strategy does GLM-130B employ to ensure training stability for a 130-billion-parameter model?,"GLM-130B utilizes embedding gradient shrink (EGS) and DeepNorm layer normalization to ensure training stability, significantly reducing the likelihood of loss spikes and divergence.","['training 100B-scale LLMs, we present not only the\nsuccessful part for training GLM-130B but also many of the failed options and lessons learned.\nParticularly, the training stability is the decisive factor in the success of training models of such a\nscale. Different from practices such as manually adjusting learning rates in OPT-175B and using\nembedding norm in the sacrifice of performance in BLOOM-176B, we experiment with various op-\ntions and find the strategy of embedding gradient shrink can significantly stabilize the training of\nGLM-130B.\nSpecifically, GLM-130B is a bilingual (English and Chinese) bidirectional dense model with 130 bil-\nlion parameters, pre-trained over 400 billion tokens on a cluster of 96 NVIDIA DGX-A100 (8 ×40G)\nGPU nodes between May 6 and July 3, 2022. Instead of using the GPT-style architecture, we adopt\nthe General Language Model (GLM) algorithm (Du ', 'into INT4 precision without post training while OPT\nand BLOOM can only reach INT8. Due to a unique property of the GLM architecture, GLM-130B’s\nINT4 quantization introduces negligible performance degradation, e.g., -0.74% on LAMBADA and\neven +0.05% on MMLU, making it still better than the uncompressed GPT-3. This enables GLM-\n130B’s fast inference with performance guarantee on a server of 4 ×RTX 3090 (24G) or 8 ×RTX\n2080 Ti (11G), the most affordable GPU required for using 100B-scale LLMs to date.\n2\x0cPublished as a conference paper at ICLR 2023\nGradient Norm(a) More than 30 failed preliminary trials at 100B-scale (b) Final decisive trials: Sandwich-LN v.s. DeepNorm012345678910111213\n0 500 1k 1.5k 2k2.5k 3k\nFigure 3: Trials on different LayerNorms for GLM-130B training. It turns out that DeepNorm is the\nmost stable one, as it has small gradient norm and ', 'of 43.3% and model FLOPs utilization (MFU) of 32.5% due to\nre-materialization.\nGLM-130B Configurations. We aim to enable our 100B-scale LLM to run a single DGX-A100\n(40G) node in FP16 precision. Based on the hidden state dimension of 12,288 we adopt from\nGPT-3, the resultant model size has to be no more than 130B parameters, thus GLM-130B. To\nmaximize GPU utilization, we configure the model based on the platform and its corresponding\nparallel strategy. To avoid insufficient memory utilization in the middle stages due to the additional\nword embedding at both ends, we balance the pipeline partition by removing one layer from them,\nmaking 9 ×8-2=70 transformer layers in GLM-130B.\nDuring the 60-day access to the cluster, we manage to train GLM-130B for 400 billion tokens\n(roughly 200 billion each for Chinese and English) with a fixed sequence ']",GLM-130B employs the strategy of embedding gradient shrink to significantly stabilize the training.,1.0,1.0,0.8743759393692017,"GLM-130B utilizes embedding gradient shrink (EGS) and DeepNorm layer normalization to ensure training stability, significantly reducing the likelihood of loss spikes and divergence.",GLM-130B employs gradient shrinking on embedding layers as a distinctive strategy to ensure training stability for a 130-billion-parameter model.,1.0,0.6666666666666666,0.8679056763648987 +What parallel strategies and configurations are utilized to train GLM-130B efficiently on a GPU cluster?,"GLM-130B employs a 3D parallel strategy combining data, tensor, and pipeline model parallelism, optimized for 96 DGX-A100 GPU nodes, maximizing token throughput and hardware utilization.","['of 43.3% and model FLOPs utilization (MFU) of 32.5% due to\nre-materialization.\nGLM-130B Configurations. We aim to enable our 100B-scale LLM to run a single DGX-A100\n(40G) node in FP16 precision. Based on the hidden state dimension of 12,288 we adopt from\nGPT-3, the resultant model size has to be no more than 130B parameters, thus GLM-130B. To\nmaximize GPU utilization, we configure the model based on the platform and its corresponding\nparallel strategy. To avoid insufficient memory utilization in the middle stages due to the additional\nword embedding at both ends, we balance the pipeline partition by removing one layer from them,\nmaking 9 ×8-2=70 transformer layers in GLM-130B.\nDuring the 60-day access to the cluster, we manage to train GLM-130B for 400 billion tokens\n(roughly 200 billion each for Chinese and English) with a fixed sequence ', 'into INT4 precision without post training while OPT\nand BLOOM can only reach INT8. Due to a unique property of the GLM architecture, GLM-130B’s\nINT4 quantization introduces negligible performance degradation, e.g., -0.74% on LAMBADA and\neven +0.05% on MMLU, making it still better than the uncompressed GPT-3. This enables GLM-\n130B’s fast inference with performance guarantee on a server of 4 ×RTX 3090 (24G) or 8 ×RTX\n2080 Ti (11G), the most affordable GPU required for using 100B-scale LLMs to date.\n2\x0cPublished as a conference paper at ICLR 2023\nGradient Norm(a) More than 30 failed preliminary trials at 100B-scale (b) Final decisive trials: Sandwich-LN v.s. DeepNorm012345678910111213\n0 500 1k 1.5k 2k2.5k 3k\nFigure 3: Trials on different LayerNorms for GLM-130B training. It turns out that DeepNorm is the\nmost stable one, as it has small gradient norm and ', 'prompted datasets from (Sanh et al., 2022; Wang et al., 2022a), listed\nin Appendix C and Table 12. GLM-130B users are suggested to avoid evaluating its zero-shot and\nfew-shot capabilities on these datasets according to the criterion illustrated in Section 5.\n2.3 P LATFORM -AWARE PARALLEL STRATEGIES AND MODEL CONFIGURATIONS\nGLM-130B is trained on a cluster of 96 DGX-A100 GPU (8 ×40G) servers with a 60-day access.\nThe goal is to pass through as many tokens as possible, as a recent study (Hoffmann et al., 2022)\nsuggests that most existing LLMs are largely under-trained.\nThe 3D Parallel Strategy. The data parallelism (Valiant, 1990) and tensor model paral-\nlelism (Shoeybi et al., 2019) are the de facto practices for training billion-scale models (Wang &\nKomatsuzaki, 2021; Du et al., 2022). To further handle the huge GPU memory requirement ']","The parallel strategies utilized to train GLM-130B efficiently on a GPU cluster are data parallelism and tensor model parallelism. The model is trained on a cluster of 96 DGX-A100 GPU servers with a 60-day access, aiming to pass through as many tokens as possible.",1.0,1.0,0.8036704659461975,"GLM-130B employs a 3D parallel strategy combining data, tensor, and pipeline model parallelism, optimized for 96 DGX-A100 GPU nodes, maximizing token throughput and hardware utilization.","The 3D Parallel Strategy including data parallelism and tensor model parallelism is utilized to train GLM-130B efficiently on a cluster of 96 DGX-A100 GPU servers. Additionally, the strategy of embedding gradient shrink is also employed to stabilize the training of GLM-130B.",1.0,0.6666666666666666,0.6391139626502991 +How does Megatron-LM's model parallel approach optimize memory and computation distribution across GPUs?, By partitioning key operations such as GEMMs within transformer layers across GPUs and minimizing communication overhead with strategic all-reduce operations.,"['a slight\ndecrease in scaling efficiency. Future research should be\nwary of this hyperparameter to design large transformer\nmodels that balance model speed and model accuracy.\nD.2. Strong Scaling\nOur model parallelism is primarily designed to enable train-\ning models larger than what can fit in the memory of a\x0cMegatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism\nTable 7. Effect of number of attention heads on scaling on 8.3\nbillion of parameters with 8-way model parallelism.\nAttention heads Hidden size per head Scaling Efficiency\n16 192 82%\n24 128 80%\n32 96 77%\nTable 8. Speedup obtained for the 1.2 billion parameters model\nusing model parallelism while keeping the batch size constant.\n# of GPUs 1 2 4 8\nSpeedup 1.0 1.64 2.34 2.98\nsingle GPU, but it can also accelerate the training of smaller\nmodels without increasing the batch size. To measure this\nacceleration we ', 'Megatron-LM: Training Multi-Billion Parameter Language Models Using\nModel Parallelism\nMohammad Shoeybi1 2Mostofa Patwary1 2Raul Puri1 2Patrick LeGresley2Jared Casper2\nBryan Catanzaro2\nAbstract\nRecent work in language modeling demonstrates\nthat training large transformer models advances\nthe state of the art in Natural Language Processing\napplications. However, very large models can be\nquite difficult to train due to memory constraints.\nIn this work, we present our techniques for train-\ning very large transformer models and implement\na simple, efficient intra-layer model parallel ap-\nproach that enables training transformer models\nwith billions of parameters. Our approach does\nnot require a new compiler or library changes, is\northogonal and complimentary to pipeline model\nparallelism, and can be fully implemented with\nthe insertion of a few communication operations\nin native PyTorch. We illustrate this approach\nby converging transformer based models up to\n8.3 billion parameters using 512 GPUs. We sus-\ntain 15.1 PetaFLOPs across the entire ', 'of 43.3% and model FLOPs utilization (MFU) of 32.5% due to\nre-materialization.\nGLM-130B Configurations. We aim to enable our 100B-scale LLM to run a single DGX-A100\n(40G) node in FP16 precision. Based on the hidden state dimension of 12,288 we adopt from\nGPT-3, the resultant model size has to be no more than 130B parameters, thus GLM-130B. To\nmaximize GPU utilization, we configure the model based on the platform and its corresponding\nparallel strategy. To avoid insufficient memory utilization in the middle stages due to the additional\nword embedding at both ends, we balance the pipeline partition by removing one layer from them,\nmaking 9 ×8-2=70 transformer layers in GLM-130B.\nDuring the 60-day access to the cluster, we manage to train GLM-130B for 400 billion tokens\n(roughly 200 billion each for Chinese and English) with a fixed sequence ']","Megatron-LM's model parallel approach optimizes memory and computation distribution across GPUs by implementing a simple and efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. This approach does not require new compiler or library changes, is orthogonal and complementary to pipeline model parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. This allows for the training of very large transformer models while avoiding memory constraints and ensuring efficient distribution of memory and computation tasks across GPUs.",1.0,1.0,0.533291220664978, By partitioning key operations such as GEMMs within transformer layers across GPUs and minimizing communication overhead with strategic all-reduce operations.,"Megatron-LM's model parallel approach optimizes memory and computation distribution across GPUs by implementing a simple, efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. This approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch.",1.0,0.3333333333333333,0.5312631130218506 +How does Megatron-LM address the challenges of large batch training and optimization in transformer models?,"Through mixed precision training with dynamic loss scaling, weight initialization strategies, and careful optimizer configurations, including Adam with weight decay.","['Megatron-LM: Training Multi-Billion Parameter Language Models Using\nModel Parallelism\nMohammad Shoeybi1 2Mostofa Patwary1 2Raul Puri1 2Patrick LeGresley2Jared Casper2\nBryan Catanzaro2\nAbstract\nRecent work in language modeling demonstrates\nthat training large transformer models advances\nthe state of the art in Natural Language Processing\napplications. However, very large models can be\nquite difficult to train due to memory constraints.\nIn this work, we present our techniques for train-\ning very large transformer models and implement\na simple, efficient intra-layer model parallel ap-\nproach that enables training transformer models\nwith billions of parameters. Our approach does\nnot require a new compiler or library changes, is\northogonal and complimentary to pipeline model\nparallelism, and can be fully implemented with\nthe insertion of a few communication operations\nin native PyTorch. We illustrate this approach\nby converging transformer based models up to\n8.3 billion parameters using 512 GPUs. We sus-\ntain 15.1 PetaFLOPs across the entire ', '/ 87.4 86.5 (89.0 / 85.5)\nXLNet (Yang et al., 2019) 2 90.8 / 90.8 92.3 95.1 / 89.7 90.6 / 87.9 85.4 (88.6 / 84.0)\nMegatron-336M 1 89.7 / 90.0 92.3 94.2 / 88.0 88.1 / 84.8 83.0 (86.9 / 81.5)\nMegatron-1.3B 1 90.9 / 91.0 92.6 94.9 / 89.1 90.2 / 87.1 87.3 (90.4 / 86.1)\nMegatron-3.9B 1 91.4 / 91.4 92.7 95.5 / 90.0 91.2 / 88.5 89.5 (91.8 / 88.6)\nALBERT ensemble (Lan et al., 2019) 95.5 / 90.1 91.4 / 88.9 89.4 (91.2 / 88.6)\nMegatron-3.9B ensemble 95.8 / 90.5 91.7 / 89.0 90.9 (93.1 / 90.0)\nFigure 7. Training loss for BERT model using the original architec-\nture (a) and the rearranged architecture (b). Left figure shows the\ntraining loss for 336M and 752M BERT model. While the original\narchitecture performs well ', 'to\nthe existing PyTorch transformer implementations. We ef-\nficiently trained transformer based models up to 8.3 bil-\nlion parameter on 512 NVIDIA V100 GPUs with 8-way\nmodel parallelism and achieved up to 15.1 PetaFLOPs sus-\ntained over the entire application. We also showed that for\nBERT models, careful attention to the placement of layer\nnormalization in BERT-like models is critical to achieving\nincreased accuracies as the model size increases. We study\nthe effect of model size on down-stream task accuracy and\nachieve far superior results on downstream tasks and estab-\nlish new SOTA for WikiText103, LAMBADA, and RACE\ndatasets. Finally, we open sourced our code to enable future\nwork leveraging model parallel transformers.\nThere are several directions for future work. Continuing\nto increase the scale of pretraining is a promising line of\x0cMegatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism\ninvestigation that will further ']","Megatron-LM addresses the challenges of large batch training and optimization in transformer models by implementing a simple, efficient intra-layer model parallel approach that enables training transformer models with billions of parameters. This approach does not require a new compiler or library changes, is orthogonal and complimentary to pipeline model parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. By using this approach, Megatron-LM is able to converge transformer based models with up to 8.3 billion parameters using 512 GPUs, achieving high sustained performance of 15.1 PetaFLOPs. Additionally, careful attention to the placement of layer normalization in BERT-like models is critical for achieving increased accuracies as the model size increases, leading to superior results on downstream tasks and establishing new state-of-the-art performance on various datasets.",1.0,1.0,0.3060898184776306,"Through mixed precision training with dynamic loss scaling, weight initialization strategies, and careful optimizer configurations, including Adam with weight decay.","Megatron-LM addresses the challenges of large batch training and optimization in transformer models by implementing a simple and efficient intra-layer model parallel approach which enables training transformer models with billions of parameters. This approach does not require new compiler or library changes, is orthogonal and complimentary to pipeline model parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. By utilizing model parallelism, Megatron-LM is able to train very large transformer models and sustain high computational performance across multiple GPUs.",1.0,1.0,0.2789177894592285 +"What specific tasks and benchmarks were used to evaluate PAL's performance, and what were the results?"," PAL was evaluated across 13 tasks, including mathematical, symbolic, and algorithmic reasoning from BIG-Bench Hard and other benchmarks, consistently outperforming larger models and setting new accuracy standards across all evaluated tasks.","['only\n2.3% (absolute) on GSM 8Kand improved PaLM-540 Bby\n1.7%, while PALimproves Codex by 6.4% on the same\nbenchmark (Section 5.1). Similarly to our work, Chowd-\nhery et al. (2022) have also experimented with generating\nPython code for solving the GSM 8Kbenchmark, but their\nexperiments resulted in lower accuracy than the standard\nPaLM-540 Bthat uses chain-of-thought. Pi et al. (2022)\npretrain the model on execution results of random expres-\nsions on a calculator, instead of using the solver at test time\nas well. While their model can hypothetically perform arith-\nmetic better than other pretrained LMs, their results on the\nSV AMP benchmark are much lower: 57.4% using a T5-11Bmodel, while PAL achieves 79.4% on the same benchmark\nwithout any specialized pretraining.\nShortly after a preprint of our work was submitted to arXiv,\nanother related work on “program of thought prompting”\n(Chen et al., ', '23.7%. Similarly, PAL\nvastly outperforms COTby absolute 21.8% on REPEAT\nCOPY. Surprisingly, DIRECT prompting performs better\nthan COTonREPEAT COPY. Yet, PALimproves over\nDIRECT by 9.3% in R EPEAT COPY.\n[0,2] [3,5] [6,8] [9,11] [12,14] [15,17] [18,20] [21,23] [24,26]0.60.81\nNumber of ObjectsAccuracy\nPaL\nCoT\nFigure 6: The solve rate on COLORED OBJECTS with re-\nspect to the number of objects included in the test question.\nIs PAL sensitive to the complexity of the question? We\nexamined how the performance of PALandCOTchange as\nthe complexity of the input question grows, measured as the\nnumber of objects in the question of COLORED OBJECTS .\nAs shown in Figure 6, PALis superior COTacross all input\nlengths. As the number of objects in the question increases,\nCOT’s accuracy is unstable and drops, while PALremains\nconsistently close to 100%. More analysis on the token-level\npredictions can be found in Appendix G.\x0cPAL: Program-aided Language ', 'Samples on P AL 14\nE Standard Deviations Across Multiple Order of Prompts 17\nF P AL Beyond Benchmarks 17\nG Closer Look into Token-level Behaviors of Different Mechanisms 20\nH Datasets 20\nH.1 Creating GSM -HARD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 23\nH.2 GSM -HARD Analysis . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 23\nI Generalization of PAL ']","The specific tasks and benchmarks used to evaluate PAL's performance were GSM 8K, Codex, REPEAT COPY, and COLORED OBJECTS. The results showed that PAL improved performance on GSM 8K by 2.3%, on Codex by 6.4%, outperformed COT by 21.8% on REPEAT COPY, and maintained close to 100% accuracy on COLORED OBJECTS across all input lengths.",0.6,0.6666666666666666,0.6944215297698975," PAL was evaluated across 13 tasks, including mathematical, symbolic, and algorithmic reasoning from BIG-Bench Hard and other benchmarks, consistently outperforming larger models and setting new accuracy standards across all evaluated tasks.","The specific tasks and benchmarks used to evaluate PAL's performance were symbolic reasoning and algorithmic tasks. The results showed that PAL outperformed COT by 8.8% in COLORED OBJECTS, by 14.1% in PENGUINS, and by 11.4% in DATE. Additionally, PAL reached 96.7% in OBJECT COUNTING, improving over COT by 2.3% on GSM 8K.",1.0,1.0,0.7210339307785034 +How does the inclusion of specific metadata in CodeNet facilitate a wide range of code analysis tasks?,"Rich metadata including problem descriptions, submission outcomes, and programming languages enable tasks like bug fixing, performance improvement, and understanding coding patterns.","['related datasets.\nLarge scale. A useful dataset should contain a large number and variety of data samples to expose\nthe realistic and complex landscape of data distributions one meets in practice. CodeNet is the\nlargest dataset in its class - it has approximately 10 times more code samples than GCJ and its C++\nbenchmark is approximately 10 times larger than POJ-104.\nRich annotation. For the dataset class in question, it is important to include information beyond\nwhich problem a code sample solves to enable a wide range of applications and use cases. It is useful\nto know whether a code sample solves the problem correctly, and if not, the error category (e.g.,\ncompilation error, runtime error, and out-of-memory error). Since the source code is supposed to\nsolve a programming problem, it is advantageous to know the problem ', 'requirements change, make it more secure, and/or comply with regu-\nlations. These tasks are challenging, and it is crucial to provide tool support for developers to be\nmore productive at performing them. It is well known that the latest advancements in deep learning\nalgorithms rely on best-of-breed datasets, such as ImageNet, to create increasingly complex and\npowerful models. In this paper, we present ""CodeNet"", a first-of-its-kind dataset in scale, diversity,\nand quality, to accelerate the algorithmic advances in AI for Code.\nTo promote widespread adoption of CodeNet, we will be launching contests involving use cases based\non the dataset. The first contest [ 6] will focus on diversity, inclusion and spurring interest among\naspiring data scientists. We are partnering with the Global Women in Data Science organization (with\npresence in over 50 countries) founded by Stanford University ', 'statement and have a sample\ninput for execution and a sample output for validation. All such extra information is part of CodeNet\nbut absent in GCJ and POJ-104.\nClean samples. For effective machine learning, the data samples are expected to be independent\nand identically distributed (iid); otherwise, the resulting performance metric could be significantly\ninflated [ 24]. The existence of duplicate and/or near duplicate code samples makes the iid assumption\ndubious. Hence, it is crucial to identify the near duplicates. The presence of identical problems in the\ndataset poses an even bigger issue. In CodeNet, we analyzed the code samples for (near) duplication\nand used clustering to find identical problems. This information is made available as part of the\ndataset release but it is absent in GCJ and POJ-104.\n5 Construction of CodeNet\n5.1 Collection of Code Samples\nThe CodeNet ']","The inclusion of specific metadata in CodeNet, such as information on error categories, problem requirements, and sample input/output, enables a wide range of code analysis tasks. This metadata allows for determining if a code sample solves a problem correctly, identifying errors, making code more secure, complying with regulations, and addressing diverse use cases. Additionally, the metadata helps in performing tasks like identifying duplicate or near-duplicate code samples, ensuring data samples are independent and identically distributed, and analyzing code for clustering and problem identification. Overall, the specific metadata in CodeNet enhances the ability to perform various code analysis tasks efficiently and effectively.",1.0,0.3333333333333333,0.6480797529220581,"Rich metadata including problem descriptions, submission outcomes, and programming languages enable tasks like bug fixing, performance improvement, and understanding coding patterns.","The inclusion of specific metadata in CodeNet, such as information on whether a code sample solves the problem correctly, error categories, problem statements, sample inputs, sample outputs, and identification of near duplicates, enables a wide range of code analysis tasks. This metadata allows for tasks such as code search, clone detection, and identifying duplicate or near duplicate code samples. Additionally, it helps in ensuring that the data samples are independent and identically distributed, which is crucial for effective machine learning. This rich annotation and metadata in CodeNet provide valuable information for various applications and use cases in code analysis.",1.0,1.0,0.6067237854003906 +"What types of tasks are included in SuperGLUE, and how do they enhance the benchmark's complexity?","SuperGLUE includes eight diverse tasks such as Boolean Questions (BoolQ), Choice of Plausible Alternatives (COPA), and Winograd Schema Challenge (WSC), enhancing complexity through a mix of question answering, natural language inference, coreference resolution, and word sense disambiguation tasks.","['Conclusion\nWe present SuperGLUE, a new benchmark for evaluating general-purpose language understanding\nsystems. SuperGLUE updates the GLUE benchmark by identifying a new set of challenging NLU\ntasks, as measured by the difference between human and machine baselines. The set of eight tasks in\nour benchmark emphasizes diverse task formats and low-data training data tasks, with nearly half the\ntasks having fewer than 1k examples and all but one of the tasks having fewer than 10k examples.\nWe evaluate BERT-based baselines and find that they still lag behind humans by nearly 20 points.\nGiven the difficulty of SuperGLUE for BERT, we expect that further progress in multi-task, transfer,\nand unsupervised/self-supervised learning techniques will be necessary to approach human-level per-\nformance on the benchmark. Overall, we argue that SuperGLUE offers a rich and challenging testbed\nfor work developing new general-purpose ', 'high-level goals, the original version of\nthe benchmark is no longer a suitable metric for quantifying such progress.\nIn response, we introduce SuperGLUE, a new benchmark designed to pose a more rigorous test of\nlanguage understanding. SuperGLUE has the same high-level motivation as GLUE: to provide a\nsimple, hard-to-game measure of progress toward general-purpose language understanding technolo-\ngies for English. We anticipate that significant progress on SuperGLUE should require substantive\ninnovations in a number of core areas of machine learning, including sample-efficient, transfer,\nmultitask, and unsupervised or self-supervised learning.\nSuperGLUE follows the basic design of GLUE: It consists of a public leaderboard built around\neight language understanding tasks, drawing on existing data, accompanied by a single-number\nperformance metric, and an analysis toolkit. However, it improves upon GLUE in several ways:\nMore challenging tasks: SuperGLUE retains the two hardest tasks ', 'in GLUE. The remaining tasks\nwere identified from those submitted to an open call for task proposals and were selected based on\ndifficulty for current NLP approaches.\nMore diverse task formats: The task formats in GLUE are limited to sentence- and sentence-pair\nclassification. We expand the set of task formats in SuperGLUE to include coreference resolution\nand question answering (QA).\nComprehensive human baselines: We include human performance estimates for all benchmark\ntasks, which verify that substantial headroom exists between a strong BERT-based baseline and\nhuman performance.\nImproved code support: SuperGLUE is distributed with a new, modular toolkit for work on\npretraining, multi-task learning, and transfer learning in NLP, built around standard tools including\nPyTorch (Paszke et al., 2017) and AllenNLP (Gardner et al., 2017).\nRefined usage rules: The conditions for inclusion on the SuperGLUE leaderboard have been\nrevamped to ensure fair ']","The tasks included in SuperGLUE are more diverse compared to GLUE, with task formats including coreference resolution and question answering (QA) in addition to sentence- and sentence-pair classification tasks. This diversity enhances the benchmark's complexity by challenging language understanding systems to perform well across different types of tasks, requiring substantive innovations in core areas of machine learning such as sample-efficient, transfer, multi-task, and unsupervised or self-supervised learning.",1.0,1.0,0.8283367156982422,"SuperGLUE includes eight diverse tasks such as Boolean Questions (BoolQ), Choice of Plausible Alternatives (COPA), and Winograd Schema Challenge (WSC), enhancing complexity through a mix of question answering, natural language inference, coreference resolution, and word sense disambiguation tasks.","The types of tasks included in SuperGLUE are diverse and include not only sentence- and sentence-pair classification but also coreference resolution and question answering (QA). These diverse task formats enhance the benchmark's complexity by challenging language understanding systems in different ways, requiring them to perform well across a variety of NLU tasks.",1.0,1.0,0.8364593982696533 +"What criteria were used to select tasks for inclusion in SuperGLUE, and how do these criteria benefit the benchmark?","Tasks were selected based on difficulty, evaluability, substance, public data availability, simple formats, and licensing conditions, ensuring a challenging, diverse, and accessible benchmark for advancing general-purpose language understanding technologies.","['Conclusion\nWe present SuperGLUE, a new benchmark for evaluating general-purpose language understanding\nsystems. SuperGLUE updates the GLUE benchmark by identifying a new set of challenging NLU\ntasks, as measured by the difference between human and machine baselines. The set of eight tasks in\nour benchmark emphasizes diverse task formats and low-data training data tasks, with nearly half the\ntasks having fewer than 1k examples and all but one of the tasks having fewer than 10k examples.\nWe evaluate BERT-based baselines and find that they still lag behind humans by nearly 20 points.\nGiven the difficulty of SuperGLUE for BERT, we expect that further progress in multi-task, transfer,\nand unsupervised/self-supervised learning techniques will be necessary to approach human-level per-\nformance on the benchmark. Overall, we argue that SuperGLUE offers a rich and challenging testbed\nfor work developing new general-purpose ', 'high-level goals, the original version of\nthe benchmark is no longer a suitable metric for quantifying such progress.\nIn response, we introduce SuperGLUE, a new benchmark designed to pose a more rigorous test of\nlanguage understanding. SuperGLUE has the same high-level motivation as GLUE: to provide a\nsimple, hard-to-game measure of progress toward general-purpose language understanding technolo-\ngies for English. We anticipate that significant progress on SuperGLUE should require substantive\ninnovations in a number of core areas of machine learning, including sample-efficient, transfer,\nmultitask, and unsupervised or self-supervised learning.\nSuperGLUE follows the basic design of GLUE: It consists of a public leaderboard built around\neight language understanding tasks, drawing on existing data, accompanied by a single-number\nperformance metric, and an analysis toolkit. However, it improves upon GLUE in several ways:\nMore challenging tasks: SuperGLUE retains the two hardest tasks ', 'in GLUE. The remaining tasks\nwere identified from those submitted to an open call for task proposals and were selected based on\ndifficulty for current NLP approaches.\nMore diverse task formats: The task formats in GLUE are limited to sentence- and sentence-pair\nclassification. We expand the set of task formats in SuperGLUE to include coreference resolution\nand question answering (QA).\nComprehensive human baselines: We include human performance estimates for all benchmark\ntasks, which verify that substantial headroom exists between a strong BERT-based baseline and\nhuman performance.\nImproved code support: SuperGLUE is distributed with a new, modular toolkit for work on\npretraining, multi-task learning, and transfer learning in NLP, built around standard tools including\nPyTorch (Paszke et al., 2017) and AllenNLP (Gardner et al., 2017).\nRefined usage rules: The conditions for inclusion on the SuperGLUE leaderboard have been\nrevamped to ensure fair ']","The criteria used to select tasks for inclusion in SuperGLUE were based on the difficulty for current NLP approaches. These criteria benefit the benchmark by ensuring that the tasks are challenging and require substantive innovations in core areas of machine learning, such as sample-efficient, transfer, multitask, and unsupervised or self-supervised learning. This leads to a more rigorous test of language understanding and provides a measure of progress towards general-purpose language understanding technologies for English.",1.0,1.0,0.4921630322933197,"Tasks were selected based on difficulty, evaluability, substance, public data availability, simple formats, and licensing conditions, ensuring a challenging, diverse, and accessible benchmark for advancing general-purpose language understanding technologies.","The criteria used to select tasks for inclusion in SuperGLUE were based on their difficulty for current NLP approaches and the diversity of task formats. These criteria benefit the benchmark by offering more challenging tasks, including coreference resolution and question answering, which go beyond the limited sentence- and sentence-pair classification tasks in the original GLUE benchmark. Additionally, including human performance estimates for all tasks provides a clear benchmark for measuring progress beyond BERT-based baselines, highlighting the substantial headroom for improvement in general-purpose language understanding systems.",1.0,0.6666666666666666,0.44551360607147217 +"What are the main components of GLM-130B's pre-training objective, and how do they contribute to its performance?","GLM-130B's pre-training includes self-supervised blank infilling for 95% of tokens and multi-task instruction pre-training (MIP) for 5% of tokens, enhancing downstream zero-shot performance.","['training 100B-scale LLMs, we present not only the\nsuccessful part for training GLM-130B but also many of the failed options and lessons learned.\nParticularly, the training stability is the decisive factor in the success of training models of such a\nscale. Different from practices such as manually adjusting learning rates in OPT-175B and using\nembedding norm in the sacrifice of performance in BLOOM-176B, we experiment with various op-\ntions and find the strategy of embedding gradient shrink can significantly stabilize the training of\nGLM-130B.\nSpecifically, GLM-130B is a bilingual (English and Chinese) bidirectional dense model with 130 bil-\nlion parameters, pre-trained over 400 billion tokens on a cluster of 96 NVIDIA DGX-A100 (8 ×40G)\nGPU nodes between May 6 and July 3, 2022. Instead of using the GPT-style architecture, we adopt\nthe General Language Model (GLM) algorithm (Du ', 'the patch projection layer. Unfortunately,\nwe cannot freeze the training of the embedding layer in language models.\nFinally, we find the gradient shrink on embedding layers could overcome loss spikes and thus sta-\nbilize GLM-130B’s training. It is first used in the multi-modal transformer CogView (Ding et al.,\n2021). Let αbe the shrinking factor, the strategy can be easily implemented via word _embedding =\nword _embedding ∗α+word _embedding .detach ()∗(1−α). Figure 4 (b) suggests that empirically,\nsetting α= 0.1wipes out most spikes we would have met, with negligible latency.\nIn fact, the final GLM-130B training run only experiences three late-stage loss divergence cases,\nthough it fails numerous times due to hardware failures. For the three unexpected spikes, it turns out\nfurther shrinking the embedding gradient can still help stabilize the GLM-130B training. See the\ntraining notes and Tensorboard ', 'GPT-3, PaLM)ContextMask(s)\nFigure 2: GLM-130B and LLMs of similar\nscale on zero-shot LAMBADA language\nmodeling. Details on GLM’s bidirectional\nattention are provided in Du et al. (2022).Conceptually, the blank infilling objective with bidi-\nrectional attention enables a more effective compre-\nhension of contexts than GPT-style models: when us-\ning [MASK], GLM-130B behaves as BERT (Devlin\net al., 2019) and T5 (Raffel et al., 2020); when us-\ning [gMASK], GLM-130B behaves similarly to Pre-\nfixLM (Liu et al., 2018; Dong et al., 2019).\nEmpirically, GLM-130B offers a record-high accuracy\nof 80.2% on zero-shot LAMBADA by outperforming\nboth GPT-3 and PaLM 540B in Figure 2. By setting\nthe attention mask, GLM-130B’s unidirectional vari-\nant is comparable to GPT-3 and OPT-175B. Our ob-\nservations are in line with existing findings (Liu et al.,\n2018; Dong et al., 2019).\nLayer Normalization (LN, Ba et al. (2016)). Training instability is one ']","GLM-130B's pre-training objective includes the blank infilling objective with bidirectional attention. This objective enables a more effective comprehension of contexts compared to GPT-style models. When using [MASK], GLM-130B behaves as BERT and T5, and when using [gMASK], it behaves similarly to PrefixLM. This objective contributes to GLM-130B's performance by allowing it to achieve a record-high accuracy of 80.2% on zero-shot LAMBADA, outperforming GPT-3 and PaLM. Additionally, setting the attention mask helps make GLM-130B's unidirectional variant comparable to GPT-3 and OPT-175B.",0.8,0.6666666666666666,0.7116473317146301,"GLM-130B's pre-training includes self-supervised blank infilling for 95% of tokens and multi-task instruction pre-training (MIP) for 5% of tokens, enhancing downstream zero-shot performance.",None,0.0,0.3333333333333333,-0.014217216521501541 +How does GLM-130B address ethical concerns and biases compared to its counterparts?,"Through comprehensive ethical evaluations, GLM-130B exhibits significantly less bias and toxicity generation than similar-scale models, highlighting its consideration for ethical AI development.","['et al., 2019; Fan\net al., 2019). Very recent work (Dettmers et al., 2022) shows that LLMs such as OPT-175B and\nBLOOM-176B can be quantized to 8 bit due to special distribution of outlier dimensions. In this\nwork, we demonstrate GLM’s scaling law for INT4 weight quantization, which allows GLM-130B\nto inference on as few as 4 ×RTX 3090 (24G) GPUs or 8 ×RTX 2080 Ti (11G) GPUs.\n7 C ONCLUSION AND LESSONS\nWe introduce GLM-130B, a bilingual pre-trained language model that aims to facilitate open and\ninclusive LLM research. GLM-130B’s technical and engineering undertakings generate insight into\nLLMs’ architectures, pre-training objectives, training stability and efficiency, and affordable infer-\nence. Altogether, it contributes to the high quality of GLM-130B in terms of both language perfor-\nmance on 112 tasks and ethical results on bias and toxicity benchmarks. Our ', 'GPT-3, PaLM)ContextMask(s)\nFigure 2: GLM-130B and LLMs of similar\nscale on zero-shot LAMBADA language\nmodeling. Details on GLM’s bidirectional\nattention are provided in Du et al. (2022).Conceptually, the blank infilling objective with bidi-\nrectional attention enables a more effective compre-\nhension of contexts than GPT-style models: when us-\ning [MASK], GLM-130B behaves as BERT (Devlin\net al., 2019) and T5 (Raffel et al., 2020); when us-\ning [gMASK], GLM-130B behaves similarly to Pre-\nfixLM (Liu et al., 2018; Dong et al., 2019).\nEmpirically, GLM-130B offers a record-high accuracy\nof 80.2% on zero-shot LAMBADA by outperforming\nboth GPT-3 and PaLM 540B in Figure 2. By setting\nthe attention mask, GLM-130B’s unidirectional vari-\nant is comparable to GPT-3 and OPT-175B. Our ob-\nservations are in line with existing findings (Liu et al.,\n2018; Dong et al., 2019).\nLayer Normalization (LN, Ba et al. (2016)). Training instability is one ', 'into INT4 precision without post training while OPT\nand BLOOM can only reach INT8. Due to a unique property of the GLM architecture, GLM-130B’s\nINT4 quantization introduces negligible performance degradation, e.g., -0.74% on LAMBADA and\neven +0.05% on MMLU, making it still better than the uncompressed GPT-3. This enables GLM-\n130B’s fast inference with performance guarantee on a server of 4 ×RTX 3090 (24G) or 8 ×RTX\n2080 Ti (11G), the most affordable GPU required for using 100B-scale LLMs to date.\n2\x0cPublished as a conference paper at ICLR 2023\nGradient Norm(a) More than 30 failed preliminary trials at 100B-scale (b) Final decisive trials: Sandwich-LN v.s. DeepNorm012345678910111213\n0 500 1k 1.5k 2k2.5k 3k\nFigure 3: Trials on different LayerNorms for GLM-130B training. It turns out that DeepNorm is the\nmost stable one, as it has small gradient norm and ']","GLM-130B addresses ethical concerns and biases compared to its counterparts by offering high quality in terms of both language performance on 112 tasks and ethical results on bias and toxicity benchmarks. Additionally, due to its unique architecture, GLM-130B's INT4 quantization introduces negligible performance degradation, making it still better than the uncompressed GPT-3. This allows for fast inference with performance guarantee on more affordable GPUs compared to other 100B-scale LLMs.",1.0,0.6666666666666666,0.8056617379188538,"Through comprehensive ethical evaluations, GLM-130B exhibits significantly less bias and toxicity generation than similar-scale models, highlighting its consideration for ethical AI development.",GLM-130B addresses ethical concerns and biases compared to its counterparts by demonstrating high quality in terms of language performance on 112 tasks and ethical results on bias and toxicity benchmarks.,1.0,0.6666666666666666,0.8363907337188721 +How does Megatron-LM's implementation ensure training stability for extremely large transformer models?,"Through techniques like embedding gradient shrink (EGS) and DeepNorm layer normalization, Megatron-LM stabilizes training of models with billions of parameters.","['Megatron-LM: Training Multi-Billion Parameter Language Models Using\nModel Parallelism\nMohammad Shoeybi1 2Mostofa Patwary1 2Raul Puri1 2Patrick LeGresley2Jared Casper2\nBryan Catanzaro2\nAbstract\nRecent work in language modeling demonstrates\nthat training large transformer models advances\nthe state of the art in Natural Language Processing\napplications. However, very large models can be\nquite difficult to train due to memory constraints.\nIn this work, we present our techniques for train-\ning very large transformer models and implement\na simple, efficient intra-layer model parallel ap-\nproach that enables training transformer models\nwith billions of parameters. Our approach does\nnot require a new compiler or library changes, is\northogonal and complimentary to pipeline model\nparallelism, and can be fully implemented with\nthe insertion of a few communication operations\nin native PyTorch. We illustrate this approach\nby converging transformer based models up to\n8.3 billion parameters using 512 GPUs. We sus-\ntain 15.1 PetaFLOPs across the entire ', '/ 87.4 86.5 (89.0 / 85.5)\nXLNet (Yang et al., 2019) 2 90.8 / 90.8 92.3 95.1 / 89.7 90.6 / 87.9 85.4 (88.6 / 84.0)\nMegatron-336M 1 89.7 / 90.0 92.3 94.2 / 88.0 88.1 / 84.8 83.0 (86.9 / 81.5)\nMegatron-1.3B 1 90.9 / 91.0 92.6 94.9 / 89.1 90.2 / 87.1 87.3 (90.4 / 86.1)\nMegatron-3.9B 1 91.4 / 91.4 92.7 95.5 / 90.0 91.2 / 88.5 89.5 (91.8 / 88.6)\nALBERT ensemble (Lan et al., 2019) 95.5 / 90.1 91.4 / 88.9 89.4 (91.2 / 88.6)\nMegatron-3.9B ensemble 95.8 / 90.5 91.7 / 89.0 90.9 (93.1 / 90.0)\nFigure 7. Training loss for BERT model using the original architec-\nture (a) and the rearranged architecture (b). Left figure shows the\ntraining loss for 336M and 752M BERT model. While the original\narchitecture performs well ', 'to\nthe existing PyTorch transformer implementations. We ef-\nficiently trained transformer based models up to 8.3 bil-\nlion parameter on 512 NVIDIA V100 GPUs with 8-way\nmodel parallelism and achieved up to 15.1 PetaFLOPs sus-\ntained over the entire application. We also showed that for\nBERT models, careful attention to the placement of layer\nnormalization in BERT-like models is critical to achieving\nincreased accuracies as the model size increases. We study\nthe effect of model size on down-stream task accuracy and\nachieve far superior results on downstream tasks and estab-\nlish new SOTA for WikiText103, LAMBADA, and RACE\ndatasets. Finally, we open sourced our code to enable future\nwork leveraging model parallel transformers.\nThere are several directions for future work. Continuing\nto increase the scale of pretraining is a promising line of\x0cMegatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism\ninvestigation that will further ']","Megatron-LM's implementation ensures training stability for extremely large transformer models by utilizing a simple and efficient intra-layer model parallel approach, which enables training transformer models with billions of parameters. This approach does not require new compiler or library changes, is orthogonal and complementary to pipeline model parallelism, and can be fully implemented with the insertion of a few communication operations in native PyTorch. Additionally, careful attention to the placement of layer normalization in BERT-like models is critical to achieving increased accuracies, particularly as the model size increases.",1.0,1.0,0.7938891649246216,"Through techniques like embedding gradient shrink (EGS) and DeepNorm layer normalization, Megatron-LM stabilizes training of models with billions of parameters.","Megatron-LM's implementation ensures training stability for extremely large transformer models by maintaining a separate random number generator for dropout within model parallel regions, which is uniquely seeded for each model parallel worker. This helps achieve randomness across the entire operation and prevent issues related to training large models.",1.0,1.0,0.779701292514801 +How does PAL's performance on the GSM8K benchmark compare to other advanced models?,"PAL achieves state-of-the-art few-shot accuracy on the GSM8K benchmark, surpassing PaLM-540B by a significant 15% top-1 accuracy improvement, demonstrating its effectiveness in handling complex arithmetic and symbolic reasoning tasks.","['only\n2.3% (absolute) on GSM 8Kand improved PaLM-540 Bby\n1.7%, while PALimproves Codex by 6.4% on the same\nbenchmark (Section 5.1). Similarly to our work, Chowd-\nhery et al. (2022) have also experimented with generating\nPython code for solving the GSM 8Kbenchmark, but their\nexperiments resulted in lower accuracy than the standard\nPaLM-540 Bthat uses chain-of-thought. Pi et al. (2022)\npretrain the model on execution results of random expres-\nsions on a calculator, instead of using the solver at test time\nas well. While their model can hypothetically perform arith-\nmetic better than other pretrained LMs, their results on the\nSV AMP benchmark are much lower: 57.4% using a T5-11Bmodel, while PAL achieves 79.4% on the same benchmark\nwithout any specialized pretraining.\nShortly after a preprint of our work was submitted to arXiv,\nanother related work on “program of thought prompting”\n(Chen et al., ', 'et al., 2022) to leverage its bidirectional at-\ntention advantage and autoregressive blank infilling objective. Table 1 summarizes the comparison\nbetween GLM-130B, GPT-3 and another two open-source efforts—OPT-175B and BLOOM-176B,\nas well as PaLM 540B (Chowdhery et al., 2022)—a 4 ×larger model—as a reference.\nAltogether, the conceptual uniqueness and engineering efforts enable GLM-130B to exhibit perfor-\nmance that surpasses the level of GPT-3 on a wide range of benchmarks (in total 112 tasks) and also\noutperforms PaLM 540B in many cases, while outperformance over GPT-3 has not been observed in\nOPT-175B and BLOOM-176B (Cf. Figure 1 left). For zero-shot performance, GLM-130B is better\nthan GPT-3 175B (+5.0%), OPT-175B (+6.5%), and BLOOM-176B (+13.0%) on LAMBADA (Pa-\nperno et al., 2016), and achieves 3 ×better performance than GPT-3 on Big-bench-lite (Srivastava\net al., 2022). For the 5-shot MMLU (Hendrycks et ', 'democratize the access and\nstudy of LLMs, since it can be run on a single GPU.\nAt the higher-end of the scale, our 65B-parameter\nmodel is also competitive with the best large lan-\nguage models such as Chinchilla or PaLM-540B.\nUnlike Chinchilla, PaLM, or GPT-3, we only\nuse publicly available data, making our work com-\npatible with open-sourcing, while most existing\nmodels rely on data which is either not publicly\navailable or undocumented (e.g. “Books – 2TB” or\n“Social media conversations”). There exist some\nexceptions, notably OPT (Zhang et al., 2022),\nGPT-NeoX (Black et al., 2022), BLOOM (Scao\net al., 2022) and GLM (Zeng et al., 2022), but none\nthat are competitive with PaLM-62B or Chinchilla.\nIn the rest of this paper, we present an overview\nof the modifications we made to the transformer\narchitecture (Vaswani et al., 2017), as well as our\ntraining method. We ']",PAL's performance on the GSM8K benchmark is 6.4% better compared to other advanced models like Pi et al. (2022) and Chowdhery et al. (2022) who experimented with generating Python code for solving the GSM8K benchmark.,1.0,1.0,0.8306421041488647,"PAL achieves state-of-the-art few-shot accuracy on the GSM8K benchmark, surpassing PaLM-540B by a significant 15% top-1 accuracy improvement, demonstrating its effectiveness in handling complex arithmetic and symbolic reasoning tasks.","PAL's performance on the GSM8K benchmark is significantly improved compared to other advanced models. The PAL model achieves an accuracy of 80.4%, which is 1.9% higher than Minerva-540B, while using the same number of samples.",1.0,1.0,0.8481011390686035 +Can PAL's approach be generalized to models trained primarily on natural language rather than code?,"PAL is effective with models trained on code, but experiments indicate it can also enhance models primarily trained on natural language, provided they possess sufficient coding ability, demonstrating PAL's versatility.","['PAL: Program-aided Language Models\nLuyu Gao* 1Aman Madaan* 1Shuyan Zhou* 1Uri Alon1Pengfei Liu1 2Yiming Yang1Jamie Callan1\nGraham Neubig1 2\n{luyug,amadaan,shuyanzh,ualon,pliu3,yiming,callan,gneubig }@cs.cmu.edu\nAbstract\nLarge language models (LLMs) have recently\ndemonstrated an impressive ability to perform\narithmetic and symbolic reasoning tasks, when\nprovided with a few examples at test time (“few-\nshot prompting”). Much of this success can be\nattributed to prompting methods such as “chain-\nof-thought”, which employ LLMs for both under-\nstanding the problem description by decomposing\nit into steps, as well as solving each step of the\nproblem. While LLMs seem to be adept at this\nsort of step-by-step decomposition, LLMs often\nmake logical and arithmetic mistakes in the solu-\ntion part, even when the problem is decomposed\ncorrectly. In this paper, we present Program-\nAided Language models ( PAL): a novel approach\nthat uses the LLM to read natural language prob-\nlems and generate programs as the ', 'work with weaker models, while\nits benefit scales elegantly to stronger models as well.\nDoes P AL work with LMs of natural language? We\nalso experimented with PALusing thetext-davinci\nseries. Figure 8 shows the following interesting re-\nsults: when the base LM’s “code modeling ability” is\nweak (using text-davinci-001 ),COTperforms better\nthan PAL. However, once the LM’s code modeling abil-\nity is sufficiently high (using text-davinci-002 and\ntext-davinci-003 ),PALoutperforms COT, and PAL\ntext-davinci-003 performs almost as PALcode-davinci-002 .\nThis shows that PALis not limited to LMs of code, but it\ncan work with LMs that were mainly trained for natural\nlanguage, if they have a sufficiently high coding ability.\nIs P AL better because of the Python prompt or because\nof the interpreter? We experimented with generating\nPython code, while requiring the neural LM to “execute” it\nas well, without using an interpreter, following Nye ', 'model with the explicit intermediate\nsteps that are required to reach the final answer. Then, the\nmodel is expected to apply a similar decomposition to the ac-\ntual test example, and consecutively reach an accurate final\nanswer (Ling et al., 2017; Amini et al., 2019). Nevertheless,\nwhile LLMs can decompose natural language problems into\nsteps and perform simple arithmetic operations, their perfor-\nmance falls dramatically when dealing with complex arith-\nmetic (Hendrycks et al., 2021; Madaan & Yazdanbakhsh,\n2022) or large numbers (Nogueira et al., 2021; Qian et al.,\n2022). In fact, even when fine-tuning a PaLM-based model\non 164B tokens of explicit mathematical content, its two\nmost common failures are reportedly “incorrect reasoning”\nand “incorrect calculation” (Lewkowycz et al., 2022).\nIn this paper, we propose Program- Aided Language\nmodel ( PAL): a novel method that uses an LLM to read\nnatural language problems ']","Yes, PAL's approach can be generalized to models trained primarily on natural language rather than code as long as they have a sufficiently high coding ability.",1.0,1.0,0.829967737197876,"PAL is effective with models trained on code, but experiments indicate it can also enhance models primarily trained on natural language, provided they possess sufficient coding ability, demonstrating PAL's versatility.","Yes, PAL's approach can be generalized to models trained primarily on natural language rather than code, as long as the models have a sufficiently high coding ability.",1.0,1.0,0.8284573554992676 +What contributions does CodeNet make towards the creation of AI models capable of understanding and generating code?," By providing a comprehensive dataset with diverse programming tasks and solutions, CodeNet accelerates the development of AI models for code generation and understanding.","['be readily used as inputs into machine learning models. Results of code classi-\nfication and code similarity experiments using the CodeNet dataset are provided as\na reference. We hope that the scale, diversity and rich, high-quality annotations of\nCodeNet will offer unprecedented research opportunities at the intersection of AI\nand Software Engineering.\n1 Introduction\nThere is a growing trend towards leveraging AI for building tools that support software engineering\nand development [ 1,2]. AI can manipulate and generate computer code, but can it do so with\nhigh quality? Many researchers are fascinated by this possibility, encouraged by AI successes in\nother domains and tantalized by the vision of computers programming computers. Some recent\ndeep-learning models [ 3,4] for code have received a lot of publicity: trained on vast amounts of\ndata and using novel architectures with billions of parameters, ', 'CodeNet: A Large-Scale AI for Code Dataset for\nLearning a Diversity of Coding Tasks\nRuchir Puri1, David S. Kung1, Geert Janssen1, Wei Zhang1,\nGiacomo Domeniconi1,Vladimir Zolotov1,Julian Dolby1,Jie Chen2,1,\nMihir Choudhury1,Lindsey Decker1,Veronika Thost2,1,Luca Buratti1,\nSaurabh Pujar1,Shyam Ramji1,Ulrich Finkler1,Susan Malaika3,Frederick Reiss1\n1IBM Research\n2MIT-IBM Watson AI Lab\n3IBM Worldwide Ecosystems\nAbstract\nOver the last several decades, software has been woven into the fabric of every\naspect of our society. As software development surges and code infrastructure of\nenterprise applications ages, it is now more critical than ever to increase software\ndevelopment productivity and modernize legacy applications. Advances in deep\nlearning and machine learning algorithms have enabled breakthroughs in computer\nvision, speech recognition, natural language processing and beyond, motivating\nresearchers to leverage AI techniques to improve software development efficiency.\nThus, the fast-emerging research area of “AI for Code” has garnered new interest\nand gathered momentum. In this paper, we ', 'requirements change, make it more secure, and/or comply with regu-\nlations. These tasks are challenging, and it is crucial to provide tool support for developers to be\nmore productive at performing them. It is well known that the latest advancements in deep learning\nalgorithms rely on best-of-breed datasets, such as ImageNet, to create increasingly complex and\npowerful models. In this paper, we present ""CodeNet"", a first-of-its-kind dataset in scale, diversity,\nand quality, to accelerate the algorithmic advances in AI for Code.\nTo promote widespread adoption of CodeNet, we will be launching contests involving use cases based\non the dataset. The first contest [ 6] will focus on diversity, inclusion and spurring interest among\naspiring data scientists. We are partnering with the Global Women in Data Science organization (with\npresence in over 50 countries) founded by Stanford University ']","CodeNet provides a large-scale dataset with rich, high-quality annotations that offer unprecedented research opportunities at the intersection of AI and Software Engineering. This dataset can be readily used as inputs into machine learning models for code classification, code similarity experiments, and other coding tasks. The diversity, scale, and quality of CodeNet accelerate algorithmic advances in AI for Code by providing developers with the necessary tools and support to improve software development efficiency. Additionally, the launch of contests involving the use cases based on the dataset promotes widespread adoption of CodeNet and spurs interest among aspiring data scientists.",0.75,0.6666666666666666,0.8815904855728149," By providing a comprehensive dataset with diverse programming tasks and solutions, CodeNet accelerates the development of AI models for code generation and understanding.","CodeNet contributes towards the creation of AI models capable of understanding and generating code by providing a large-scale dataset with diverse coding tasks, high-quality annotations, and rich annotations. This allows researchers to train deep learning algorithms on vast amounts of data and novel architectures, leading to potential breakthroughs in the field of AI for Code. Additionally, CodeNet promotes algorithmic advances by accelerating the development of complex and powerful models through the use of its dataset.",0.6666666666666666,1.0,0.9162453413009644 +How does SuperGLUE facilitate the development and evaluation of models across its diverse set of tasks?,"SuperGLUE provides a public leaderboard, a single-number performance metric for easy comparison, a software toolkit for model analysis, and detailed task instructions, supporting the development, evaluation, and comparison of models on its comprehensive suite of tasks.","['high-level goals, the original version of\nthe benchmark is no longer a suitable metric for quantifying such progress.\nIn response, we introduce SuperGLUE, a new benchmark designed to pose a more rigorous test of\nlanguage understanding. SuperGLUE has the same high-level motivation as GLUE: to provide a\nsimple, hard-to-game measure of progress toward general-purpose language understanding technolo-\ngies for English. We anticipate that significant progress on SuperGLUE should require substantive\ninnovations in a number of core areas of machine learning, including sample-efficient, transfer,\nmultitask, and unsupervised or self-supervised learning.\nSuperGLUE follows the basic design of GLUE: It consists of a public leaderboard built around\neight language understanding tasks, drawing on existing data, accompanied by a single-number\nperformance metric, and an analysis toolkit. However, it improves upon GLUE in several ways:\nMore challenging tasks: SuperGLUE retains the two hardest tasks ', 'Conclusion\nWe present SuperGLUE, a new benchmark for evaluating general-purpose language understanding\nsystems. SuperGLUE updates the GLUE benchmark by identifying a new set of challenging NLU\ntasks, as measured by the difference between human and machine baselines. The set of eight tasks in\nour benchmark emphasizes diverse task formats and low-data training data tasks, with nearly half the\ntasks having fewer than 1k examples and all but one of the tasks having fewer than 10k examples.\nWe evaluate BERT-based baselines and find that they still lag behind humans by nearly 20 points.\nGiven the difficulty of SuperGLUE for BERT, we expect that further progress in multi-task, transfer,\nand unsupervised/self-supervised learning techniques will be necessary to approach human-level per-\nformance on the benchmark. Overall, we argue that SuperGLUE offers a rich and challenging testbed\nfor work developing new general-purpose ', 'in GLUE. The remaining tasks\nwere identified from those submitted to an open call for task proposals and were selected based on\ndifficulty for current NLP approaches.\nMore diverse task formats: The task formats in GLUE are limited to sentence- and sentence-pair\nclassification. We expand the set of task formats in SuperGLUE to include coreference resolution\nand question answering (QA).\nComprehensive human baselines: We include human performance estimates for all benchmark\ntasks, which verify that substantial headroom exists between a strong BERT-based baseline and\nhuman performance.\nImproved code support: SuperGLUE is distributed with a new, modular toolkit for work on\npretraining, multi-task learning, and transfer learning in NLP, built around standard tools including\nPyTorch (Paszke et al., 2017) and AllenNLP (Gardner et al., 2017).\nRefined usage rules: The conditions for inclusion on the SuperGLUE leaderboard have been\nrevamped to ensure fair ']","SuperGLUE facilitates the development and evaluation of models across its diverse set of tasks by including more challenging tasks, diverse task formats such as coreference resolution and question answering, comprehensive human baselines for comparison, improved code support with a modular toolkit for NLP work, and refined usage rules for fair inclusion on the SuperGLUE leaderboard.",1.0,1.0,0.8908988237380981,"SuperGLUE provides a public leaderboard, a single-number performance metric for easy comparison, a software toolkit for model analysis, and detailed task instructions, supporting the development, evaluation, and comparison of models on its comprehensive suite of tasks.","SuperGLUE facilitates the development and evaluation of models across its diverse set of tasks by providing a benchmark that emphasizes diverse task formats and low-data training data tasks. It includes challenging tasks that were selected based on difficulty for current NLP approaches, incorporates more diverse task formats such as coreference resolution and question answering, and includes human performance estimates for all benchmark tasks. Additionally, SuperGLUE is distributed with a new, modular toolkit for work on pretraining, multi-task learning, and transfer learning in NLP, improving code support for developers.",1.0,1.0,0.8822134733200073 + What tools and support does SuperGLUE offer to researchers working on language understanding models?,"SuperGLUE offers jiant, a software toolkit for pretraining, multi-task learning, and transfer learning in NLP, built around standard tools and supporting popular pretrained models, facilitating research and experimentation on the benchmark's tasks.","['high-level goals, the original version of\nthe benchmark is no longer a suitable metric for quantifying such progress.\nIn response, we introduce SuperGLUE, a new benchmark designed to pose a more rigorous test of\nlanguage understanding. SuperGLUE has the same high-level motivation as GLUE: to provide a\nsimple, hard-to-game measure of progress toward general-purpose language understanding technolo-\ngies for English. We anticipate that significant progress on SuperGLUE should require substantive\ninnovations in a number of core areas of machine learning, including sample-efficient, transfer,\nmultitask, and unsupervised or self-supervised learning.\nSuperGLUE follows the basic design of GLUE: It consists of a public leaderboard built around\neight language understanding tasks, drawing on existing data, accompanied by a single-number\nperformance metric, and an analysis toolkit. However, it improves upon GLUE in several ways:\nMore challenging tasks: SuperGLUE retains the two hardest tasks ', 'Conclusion\nWe present SuperGLUE, a new benchmark for evaluating general-purpose language understanding\nsystems. SuperGLUE updates the GLUE benchmark by identifying a new set of challenging NLU\ntasks, as measured by the difference between human and machine baselines. The set of eight tasks in\nour benchmark emphasizes diverse task formats and low-data training data tasks, with nearly half the\ntasks having fewer than 1k examples and all but one of the tasks having fewer than 10k examples.\nWe evaluate BERT-based baselines and find that they still lag behind humans by nearly 20 points.\nGiven the difficulty of SuperGLUE for BERT, we expect that further progress in multi-task, transfer,\nand unsupervised/self-supervised learning techniques will be necessary to approach human-level per-\nformance on the benchmark. Overall, we argue that SuperGLUE offers a rich and challenging testbed\nfor work developing new general-purpose ', 'in GLUE. The remaining tasks\nwere identified from those submitted to an open call for task proposals and were selected based on\ndifficulty for current NLP approaches.\nMore diverse task formats: The task formats in GLUE are limited to sentence- and sentence-pair\nclassification. We expand the set of task formats in SuperGLUE to include coreference resolution\nand question answering (QA).\nComprehensive human baselines: We include human performance estimates for all benchmark\ntasks, which verify that substantial headroom exists between a strong BERT-based baseline and\nhuman performance.\nImproved code support: SuperGLUE is distributed with a new, modular toolkit for work on\npretraining, multi-task learning, and transfer learning in NLP, built around standard tools including\nPyTorch (Paszke et al., 2017) and AllenNLP (Gardner et al., 2017).\nRefined usage rules: The conditions for inclusion on the SuperGLUE leaderboard have been\nrevamped to ensure fair ']","SuperGLUE offers improved code support with a new, modular toolkit for work on pretraining, multi-task learning, and transfer learning in NLP, built around standard tools including PyTorch and AllenNLP.",1.0,0.5,0.802820086479187,"SuperGLUE offers jiant, a software toolkit for pretraining, multi-task learning, and transfer learning in NLP, built around standard tools and supporting popular pretrained models, facilitating research and experimentation on the benchmark's tasks.","SuperGLUE offers improved code support with a new, modular toolkit for work on pretraining, multi-task learning, and transfer learning in NLP, built around standard tools including PyTorch and AllenNLP.",1.0,1.0,0.8028199672698975 +In what ways does GLM-130B's bilingual capability extend its application compared to monolingual models?,"As a bilingual model in English and Chinese, GLM-130B significantly outperforms monolingual models in related benchmarks, showcasing its superior performance in cross-lingual understanding and applications.","['in Table 16. It shows that GLM-130B has better performances than\nLaMDA and GPT-3 (Davinci) on all tasks. In the Data-to-text task, GLM-130B performs slightly\nworse than PaLM-540B, while in the summary task, GLM-130B has even higher ROUGE results.\nWe also ablate GLM-130B to unidirectional to demonstrate the advantage of bidirectional attention.\nUnidirectional GLM-130B underperforms GPT-3 175B in all three datasets, but when it shifts to\nbidirectional attention, there is an instant boost, making GLM-130B even comparable to PaLM-\n540B in a few cases. It indicates that bidirectional attention over the provided context (i.e., prefix)\ncan also be beneficial for text generation missions.\nTable 16: 1-shot GEM English natural language generation tasks (WebNLG, E2E, and WikiLingua).\nWe compare two versions of GLM-130B (uni: unidirectional attention, bi: bidirectional attention),\nshowing that bidirectional attention can also improve conditional generation’s performance.\nTask ', 'et al., 2019; Fan\net al., 2019). Very recent work (Dettmers et al., 2022) shows that LLMs such as OPT-175B and\nBLOOM-176B can be quantized to 8 bit due to special distribution of outlier dimensions. In this\nwork, we demonstrate GLM’s scaling law for INT4 weight quantization, which allows GLM-130B\nto inference on as few as 4 ×RTX 3090 (24G) GPUs or 8 ×RTX 2080 Ti (11G) GPUs.\n7 C ONCLUSION AND LESSONS\nWe introduce GLM-130B, a bilingual pre-trained language model that aims to facilitate open and\ninclusive LLM research. GLM-130B’s technical and engineering undertakings generate insight into\nLLMs’ architectures, pre-training objectives, training stability and efficiency, and affordable infer-\nence. Altogether, it contributes to the high quality of GLM-130B in terms of both language perfor-\nmance on 112 tasks and ethical results on bias and toxicity benchmarks. Our ', 'al., 2021) tasks, it is better than GPT-3 175B\n(+0.9%) and BLOOM-176B (+12.7%). As a bilingual LLM also in Chinese, it offers significantly\nbetter results than ERNIE TITAN 3.0 260B (Wang et al., 2021)—the largest Chinese LLM—on 7\nzero-shot CLUE (Xu et al., 2020) datasets (+24.26%) and 5 zero-shot FewCLUE (Xu et al., 2021)\nones (+12.75%). Importantly, as summarized in Figure 1 right, GLM-130B as an open model is\nassociated with significantly less bias and generation toxicity than its 100B-scale counterparts .\nFinally, we design GLM-130B to empower as many people as possible to conduct 100B-scale LLM\nstudies. First, instead of using 175B+ parameters as OPT and BLOOM, the 130B size is decided be-\ncause such a size supports inference on a single A100 (8 ×40G) server. Second, to further lower the\nGPU requirements, we quantize GLM-130B ']","GLM-130B's bilingual capability extends its application compared to monolingual models by offering significantly better results in both English and Chinese languages, outperforming other large-scale models in zero-shot CLUE and FewCLUE datasets. Additionally, GLM-130B demonstrates less bias and generation toxicity compared to its 100B-scale counterparts, making it suitable for more inclusive and ethical language model research.",1.0,1.0,0.8914151787757874,"As a bilingual model in English and Chinese, GLM-130B significantly outperforms monolingual models in related benchmarks, showcasing its superior performance in cross-lingual understanding and applications.","GLM-130B's bilingual capability extends its application by facilitating open and inclusive LLM research, generating insights into LLMs' architectures, pre-training objectives, training stability and efficiency, and affordable inference. Additionally, it contributes to high-quality language performance on various tasks and ethical results on bias and toxicity benchmarks.",1.0,0.6666666666666666,0.7592584490776062 +What intrinsic model characteristic allows Megatron-LM to achieve efficient training with multi-billion parameter transformer models?,"Megatron-LM utilizes intra-layer model parallelism exploiting the inherent structure of transformer-based language models, enabling efficient training without custom C++ or compiler requirements.","['Megatron-LM: Training Multi-Billion Parameter Language Models Using\nModel Parallelism\nMohammad Shoeybi1 2Mostofa Patwary1 2Raul Puri1 2Patrick LeGresley2Jared Casper2\nBryan Catanzaro2\nAbstract\nRecent work in language modeling demonstrates\nthat training large transformer models advances\nthe state of the art in Natural Language Processing\napplications. However, very large models can be\nquite difficult to train due to memory constraints.\nIn this work, we present our techniques for train-\ning very large transformer models and implement\na simple, efficient intra-layer model parallel ap-\nproach that enables training transformer models\nwith billions of parameters. Our approach does\nnot require a new compiler or library changes, is\northogonal and complimentary to pipeline model\nparallelism, and can be fully implemented with\nthe insertion of a few communication operations\nin native PyTorch. We illustrate this approach\nby converging transformer based models up to\n8.3 billion parameters using 512 GPUs. We sus-\ntain 15.1 PetaFLOPs across the entire ', '/ 87.4 86.5 (89.0 / 85.5)\nXLNet (Yang et al., 2019) 2 90.8 / 90.8 92.3 95.1 / 89.7 90.6 / 87.9 85.4 (88.6 / 84.0)\nMegatron-336M 1 89.7 / 90.0 92.3 94.2 / 88.0 88.1 / 84.8 83.0 (86.9 / 81.5)\nMegatron-1.3B 1 90.9 / 91.0 92.6 94.9 / 89.1 90.2 / 87.1 87.3 (90.4 / 86.1)\nMegatron-3.9B 1 91.4 / 91.4 92.7 95.5 / 90.0 91.2 / 88.5 89.5 (91.8 / 88.6)\nALBERT ensemble (Lan et al., 2019) 95.5 / 90.1 91.4 / 88.9 89.4 (91.2 / 88.6)\nMegatron-3.9B ensemble 95.8 / 90.5 91.7 / 89.0 90.9 (93.1 / 90.0)\nFigure 7. Training loss for BERT model using the original architec-\nture (a) and the rearranged architecture (b). Left figure shows the\ntraining loss for 336M and 752M BERT model. While the original\narchitecture performs well ', 'a slight\ndecrease in scaling efficiency. Future research should be\nwary of this hyperparameter to design large transformer\nmodels that balance model speed and model accuracy.\nD.2. Strong Scaling\nOur model parallelism is primarily designed to enable train-\ning models larger than what can fit in the memory of a\x0cMegatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism\nTable 7. Effect of number of attention heads on scaling on 8.3\nbillion of parameters with 8-way model parallelism.\nAttention heads Hidden size per head Scaling Efficiency\n16 192 82%\n24 128 80%\n32 96 77%\nTable 8. Speedup obtained for the 1.2 billion parameters model\nusing model parallelism while keeping the batch size constant.\n# of GPUs 1 2 4 8\nSpeedup 1.0 1.64 2.34 2.98\nsingle GPU, but it can also accelerate the training of smaller\nmodels without increasing the batch size. To measure this\nacceleration we ']","The intrinsic model characteristic that allows Megatron-LM to achieve efficient training with multi-billion parameter transformer models is the implementation of a simple, efficient intra-layer model parallel approach.",1.0,1.0,0.8105524778366089,"Megatron-LM utilizes intra-layer model parallelism exploiting the inherent structure of transformer-based language models, enabling efficient training without custom C++ or compiler requirements.",Intra-layer model parallel approach allows Megatron-LM to achieve efficient training with multi-billion parameter transformer models.,1.0,1.0,0.7729780673980713 +Describe Megatron-LM's approach to handling the output embedding weight matrix for model parallelism.,"The output embedding weight matrix is parallelized along the vocabulary dimension, requiring minimal communication and maximizing compute efficiency during training.","['embedding GEMM. How-\never, in transformer language models, the output embed-\nding layer shares weights with the input embedding, requir-\ning modifications to both. We parallelize the input embed-\nding weight matrix EH×valong the vocabulary dimension\nE= [E1,E2](column-wise). Since each partition now only\x0cMegatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism\nFigure 4. Communication operations in a transformer layer. There\nare 4 total communication operations in the forward and backward\npass of a single model parallel transformer layer.\ncontains a portion of the embedding table, an all-reduce ( g\noperator) is required after the input embedding. For the\noutput embedding, one approach is to perform the parallel\nGEMM [Y1,Y2] = [XE 1,XE 2]to obtain the logits, add an\nall-gatherY=all-gather ([Y1,Y2]), and send the results to\nthe cross-entropy loss function. However, for this case, the\nall-gather will communicate b×s×velements (bis the\nbatch-size and sis the ', 'Megatron-LM: Training Multi-Billion Parameter Language Models Using\nModel Parallelism\nMohammad Shoeybi1 2Mostofa Patwary1 2Raul Puri1 2Patrick LeGresley2Jared Casper2\nBryan Catanzaro2\nAbstract\nRecent work in language modeling demonstrates\nthat training large transformer models advances\nthe state of the art in Natural Language Processing\napplications. However, very large models can be\nquite difficult to train due to memory constraints.\nIn this work, we present our techniques for train-\ning very large transformer models and implement\na simple, efficient intra-layer model parallel ap-\nproach that enables training transformer models\nwith billions of parameters. Our approach does\nnot require a new compiler or library changes, is\northogonal and complimentary to pipeline model\nparallelism, and can be fully implemented with\nthe insertion of a few communication operations\nin native PyTorch. We illustrate this approach\nby converging transformer based models up to\n8.3 billion parameters using 512 GPUs. We sus-\ntain 15.1 PetaFLOPs across the entire ', '0.1 can solve most numerical instability problems.\nLesson 7 (GLM’s INT4 Quantization Scaling Law). GLM has a unique INT4 weight quan-\ntization scaling law unobserved in GPT-style BLOOM.\nLesson 8 (Future Direction). To create powerful LLMs, the main focus can be on 1) more and\nbetter data, 2) better architectures and pre-training objectives, and 3) more sufficient training.\n30\x0cPublished as a conference paper at ICLR 2023\nFigure 15: Weight value distribution of linear layers in GLM-130B (in orange, attn-dense ,\nattn-qkv ,glu-w1 ,glu-w2 ) and BLOOM-176B (in blue, attn-dense ,attn-qkv ,\nffn-w1 ,ffn-w2 )’s first 28 transformer layers. Generally for GLM-130B it is attn-dense\nandw2that may present narrow value distributions. attn-qkv andw1may also be a reason for\nenabling INT4 quantization in middle layers of GLM-130B.\n31\x0cPublished as a conference paper at ICLR 2023\nC D ATASET AND EVALUATION DETAILS\nC.1 M ']","For the output embedding weight matrix in Megatron-LM's approach to model parallelism, one approach is to perform parallel GEMM to obtain the logits, followed by an all-gather operation to gather the results and send them to the cross-entropy loss function.",1.0,1.0,0.48899489641189575,"The output embedding weight matrix is parallelized along the vocabulary dimension, requiring minimal communication and maximizing compute efficiency during training.","Megatron-LM's approach to handling the output embedding weight matrix for model parallelism involves performing a parallel GEMM operation to obtain the logits, adding an all-gather operation to gather the results, and then sending the results to the cross-entropy loss function. By using this approach, they can effectively parallelize the output embedding weight matrix to enable training transformer models with billions of parameters.",1.0,1.0,0.5158683657646179 +How does the addition of a Python interpreter in PAL's framework influence the accuracy of solutions?,"Incorporating a Python interpreter allows PAL to execute generated programs accurately, bridging the gap in LLMs' arithmetic and logical capabilities, thus significantly improving solution accuracy compared to relying solely on LLMs.","['intermediate\nreasoning steps, but offloads the solution step to a\nruntime such as a Python interpreter. With PAL,\ndecomposing the natural language problem into\nrunnable steps remains the only learning task for\nthe LLM, while solving is delegated to the inter-\npreter. We demonstrate this synergy between a\nneural LLM and a symbolic interpreter across 13\nmathematical, symbolic, and algorithmic reason-\ning tasks from BIG-Bench Hard and other bench-\nmarks. In all these natural language reasoning\ntasks, generating code using an LLM and rea-\nsoning using a Python interpreter leads to more\naccurate results than much larger models. For ex-\nample, PALusing CODEX achieves state-of-the-\nart few-shot accuracy on the GSM 8Kbenchmark\nof math word problems, surpassing PaLM-540 B\nwhich uses chain-of-thought by absolute 15% top-\n1. Our code and data are publicly available at\nhttp://reasonwithpal.com .\n*The first three authors contributed equally.1Language Tech-\nnologies Institute, Carnegie Mellon University, ', '23.7%. Similarly, PAL\nvastly outperforms COTby absolute 21.8% on REPEAT\nCOPY. Surprisingly, DIRECT prompting performs better\nthan COTonREPEAT COPY. Yet, PALimproves over\nDIRECT by 9.3% in R EPEAT COPY.\n[0,2] [3,5] [6,8] [9,11] [12,14] [15,17] [18,20] [21,23] [24,26]0.60.81\nNumber of ObjectsAccuracy\nPaL\nCoT\nFigure 6: The solve rate on COLORED OBJECTS with re-\nspect to the number of objects included in the test question.\nIs PAL sensitive to the complexity of the question? We\nexamined how the performance of PALandCOTchange as\nthe complexity of the input question grows, measured as the\nnumber of objects in the question of COLORED OBJECTS .\nAs shown in Figure 6, PALis superior COTacross all input\nlengths. As the number of objects in the question increases,\nCOT’s accuracy is unstable and drops, while PALremains\nconsistently close to 100%. More analysis on the token-level\npredictions can be found in Appendix G.\x0cPAL: Program-aided Language ', 'voting. We thus repeated the greedy-\ndecoding experiments using nucleus sampling (Holtzman\net al., 2019) with p= 0.95andk= 40 as in Lewkowycz\net al. (2022) and temperature of 0.7. As shown in Table 3,\nthis further increases the accuracy of PALfrom 72.0% to\n80.4% on GSM 8K, obtaining 1.9% higher accuracy than\nMinerva-540B using the same number of samples.5.2. Symbolic Reasoning & Algorithmic Tasks Results\nResults for symbolic reasoning and algorithmic tasks are\nshown in Table 2. In COLORED OBJECTS ,PALimproves\nover the strong COTby 8.8%, and by 19.4% over the stan-\ndard direct prompting. In P ENGUINS , PAL provides a gain\nof absolute 14.1% over COT. InDATE,PALfurther provides\n11.4% gain over both C OTCodex,PaLM-540 B, and LaMDA-137B .\nThe two rightmost columns of Table 2 show that PALis\nclose to solving OBJECT COUNTING , reaching 96.7% and\nimproving over COTby absolute ']",Solving using a Python interpreter in PAL's framework leads to more accurate results compared to much larger models. It achieves state-of-the-art few-shot accuracy on math word problems and outperforms other models by a significant margin in various reasoning tasks.,1.0,1.0,0.8387901186943054,"Incorporating a Python interpreter allows PAL to execute generated programs accurately, bridging the gap in LLMs' arithmetic and logical capabilities, thus significantly improving solution accuracy compared to relying solely on LLMs.","The addition of a Python interpreter in PAL's framework influences the accuracy of solutions by offloading the solution step to the interpreter, resulting in more accurate results compared to using larger models that do not utilize a Python interpreter.",1.0,1.0,0.8056235313415527 +How does CodeNet's dataset size and diversity support advanced AI for code research compared to previous datasets?,"CodeNet surpasses previous datasets with over 14 million code samples and around 500 million lines of code in 55 languages, providing unparalleled scale and diversity for AI for code research.","['requirements change, make it more secure, and/or comply with regu-\nlations. These tasks are challenging, and it is crucial to provide tool support for developers to be\nmore productive at performing them. It is well known that the latest advancements in deep learning\nalgorithms rely on best-of-breed datasets, such as ImageNet, to create increasingly complex and\npowerful models. In this paper, we present ""CodeNet"", a first-of-its-kind dataset in scale, diversity,\nand quality, to accelerate the algorithmic advances in AI for Code.\nTo promote widespread adoption of CodeNet, we will be launching contests involving use cases based\non the dataset. The first contest [ 6] will focus on diversity, inclusion and spurring interest among\naspiring data scientists. We are partnering with the Global Women in Data Science organization (with\npresence in over 50 countries) founded by Stanford University ', 'related datasets.\nLarge scale. A useful dataset should contain a large number and variety of data samples to expose\nthe realistic and complex landscape of data distributions one meets in practice. CodeNet is the\nlargest dataset in its class - it has approximately 10 times more code samples than GCJ and its C++\nbenchmark is approximately 10 times larger than POJ-104.\nRich annotation. For the dataset class in question, it is important to include information beyond\nwhich problem a code sample solves to enable a wide range of applications and use cases. It is useful\nto know whether a code sample solves the problem correctly, and if not, the error category (e.g.,\ncompilation error, runtime error, and out-of-memory error). Since the source code is supposed to\nsolve a programming problem, it is advantageous to know the problem ', 'are expensive\nto curate [ 52], especially for low-resource languages (e.g., legacy code). Recently, monolingual\napproaches [ 53,4] were developed to mitigate the reliance on parallel data, paving ways to build\nmodels for languages with little translation. Compared with current popular data sets (e.g., [ 4,54]),\nCodeNet covers a much richer set of languages with ample training instances.\n10 Conclusion\nArtificial intelligence has made great strides in understanding human language. Computer scientists\nhave been fascinated by the possibility and tantalized by the vision of computers (AI) programming\ncomputers. In this paper, we presented ""CodeNet"", a first-of-its-kind very large-scale, diverse and\nhigh-quality dataset to accelerate the algorithmic advances in AI for Code. This dataset is not\nonly unique in its scale, but also in the diversity of coding tasks it can help benchmark: from code\nsimilarity and classification for advances ']","CodeNet's dataset size is approximately 10 times larger than previous datasets like GCJ and POJ-104, and it covers a much richer set of languages with ample training instances. This large scale and diversity enable CodeNet to provide a more realistic and complex landscape of data distributions, allowing for more advanced AI research in code compared to previous datasets.",1.0,0.25,0.8062356114387512,"CodeNet surpasses previous datasets with over 14 million code samples and around 500 million lines of code in 55 languages, providing unparalleled scale and diversity for AI for code research.","CodeNet's dataset size of over 14 million code samples and about 500 million lines of code in 55 different programming languages, along with its rich set of high-quality annotations, supports advanced AI for code research by providing a larger and more diverse set of data for benchmarking and accelerating research in AI techniques for critical coding tasks. This allows for a more comprehensive understanding of different coding tasks such as code similarity and classification, code translation, and code performance improvement techniques, compared to previous datasets that may have been more limited in scope and size.",1.0,1.0,0.9035031795501709 +What methodologies were employed in SuperGLUE to establish robust human baseline performances across its tasks?,"SuperGLUE's human baseline performances were established through careful collection of non-expert annotations for a sample of each test set, following structured annotation procedures that included a short training phase for annotators, ensuring reliable estimates of human performance.","['Conclusion\nWe present SuperGLUE, a new benchmark for evaluating general-purpose language understanding\nsystems. SuperGLUE updates the GLUE benchmark by identifying a new set of challenging NLU\ntasks, as measured by the difference between human and machine baselines. The set of eight tasks in\nour benchmark emphasizes diverse task formats and low-data training data tasks, with nearly half the\ntasks having fewer than 1k examples and all but one of the tasks having fewer than 10k examples.\nWe evaluate BERT-based baselines and find that they still lag behind humans by nearly 20 points.\nGiven the difficulty of SuperGLUE for BERT, we expect that further progress in multi-task, transfer,\nand unsupervised/self-supervised learning techniques will be necessary to approach human-level per-\nformance on the benchmark. Overall, we argue that SuperGLUE offers a rich and challenging testbed\nfor work developing new general-purpose ', 'high-level goals, the original version of\nthe benchmark is no longer a suitable metric for quantifying such progress.\nIn response, we introduce SuperGLUE, a new benchmark designed to pose a more rigorous test of\nlanguage understanding. SuperGLUE has the same high-level motivation as GLUE: to provide a\nsimple, hard-to-game measure of progress toward general-purpose language understanding technolo-\ngies for English. We anticipate that significant progress on SuperGLUE should require substantive\ninnovations in a number of core areas of machine learning, including sample-efficient, transfer,\nmultitask, and unsupervised or self-supervised learning.\nSuperGLUE follows the basic design of GLUE: It consists of a public leaderboard built around\neight language understanding tasks, drawing on existing data, accompanied by a single-number\nperformance metric, and an analysis toolkit. However, it improves upon GLUE in several ways:\nMore challenging tasks: SuperGLUE retains the two hardest tasks ', 'Room and board .Context 2: He nailed boards across the windows.\nSense match: FalseWSCText: Mark told Pete many lies about himself, which Pete included in his book. Heshould have been\nmore truthful. Coreference: False\n3 SuperGLUE Overview\n3.1 Design Process\nThe goal of SuperGLUE is to provide a simple, robust evaluation metric of any method capable of\nbeing applied to a broad range of language understanding tasks. To that end, in designing SuperGLUE,\nwe identify the following desiderata of tasks in the benchmark:\nTask substance: Tasks should test a system’s ability to understand and reason about texts in English.\nTask difficulty: Tasks should be beyond the scope of current state-of-the-art systems, but solvable by\nmost college-educated English speakers. We exclude tasks that require domain-specific knowledge,\ne.g. medical notes or scientific papers.\nEvaluability: Tasks must have an automatic performance metric that ']","The methodologies employed in SuperGLUE to establish robust human baseline performances across its tasks include updating the GLUE benchmark with new challenging NLU tasks, emphasizing diverse task formats and low-data training data tasks, evaluating BERT-based baselines, and setting high standards for language understanding by requiring substantive innovations in sample-efficient, transfer, multitask, and unsupervised or self-supervised learning techniques.",1.0,1.0,0.8549681901931763,"SuperGLUE's human baseline performances were established through careful collection of non-expert annotations for a sample of each test set, following structured annotation procedures that included a short training phase for annotators, ensuring reliable estimates of human performance.",SuperGLUE included human performance estimates for all benchmark tasks to establish robust human baseline performances.,0.5,1.0,0.838058590888977 +How do the selected tasks in SuperGLUE reflect the benchmark's goals for advancing language understanding technologies?,"The selected tasks, chosen for their difficulty and diversity, reflect SuperGLUE's goals by requiring substantive innovations in machine learning to improve sample-efficient transfer, multitask, and unsupervised learning, driving forward the capabilities of language understanding systems.","['high-level goals, the original version of\nthe benchmark is no longer a suitable metric for quantifying such progress.\nIn response, we introduce SuperGLUE, a new benchmark designed to pose a more rigorous test of\nlanguage understanding. SuperGLUE has the same high-level motivation as GLUE: to provide a\nsimple, hard-to-game measure of progress toward general-purpose language understanding technolo-\ngies for English. We anticipate that significant progress on SuperGLUE should require substantive\ninnovations in a number of core areas of machine learning, including sample-efficient, transfer,\nmultitask, and unsupervised or self-supervised learning.\nSuperGLUE follows the basic design of GLUE: It consists of a public leaderboard built around\neight language understanding tasks, drawing on existing data, accompanied by a single-number\nperformance metric, and an analysis toolkit. However, it improves upon GLUE in several ways:\nMore challenging tasks: SuperGLUE retains the two hardest tasks ', 'Conclusion\nWe present SuperGLUE, a new benchmark for evaluating general-purpose language understanding\nsystems. SuperGLUE updates the GLUE benchmark by identifying a new set of challenging NLU\ntasks, as measured by the difference between human and machine baselines. The set of eight tasks in\nour benchmark emphasizes diverse task formats and low-data training data tasks, with nearly half the\ntasks having fewer than 1k examples and all but one of the tasks having fewer than 10k examples.\nWe evaluate BERT-based baselines and find that they still lag behind humans by nearly 20 points.\nGiven the difficulty of SuperGLUE for BERT, we expect that further progress in multi-task, transfer,\nand unsupervised/self-supervised learning techniques will be necessary to approach human-level per-\nformance on the benchmark. Overall, we argue that SuperGLUE offers a rich and challenging testbed\nfor work developing new general-purpose ', 'in GLUE. The remaining tasks\nwere identified from those submitted to an open call for task proposals and were selected based on\ndifficulty for current NLP approaches.\nMore diverse task formats: The task formats in GLUE are limited to sentence- and sentence-pair\nclassification. We expand the set of task formats in SuperGLUE to include coreference resolution\nand question answering (QA).\nComprehensive human baselines: We include human performance estimates for all benchmark\ntasks, which verify that substantial headroom exists between a strong BERT-based baseline and\nhuman performance.\nImproved code support: SuperGLUE is distributed with a new, modular toolkit for work on\npretraining, multi-task learning, and transfer learning in NLP, built around standard tools including\nPyTorch (Paszke et al., 2017) and AllenNLP (Gardner et al., 2017).\nRefined usage rules: The conditions for inclusion on the SuperGLUE leaderboard have been\nrevamped to ensure fair ']","The selected tasks in SuperGLUE reflect the benchmark's goals for advancing language understanding technologies by emphasizing diverse task formats, including coreference resolution and question answering, and by focusing on low-data training tasks. Additionally, the tasks were selected based on their difficulty for current NLP approaches, encouraging substantive innovations in machine learning areas such as sample-efficient, transfer, multi-task, and unsupervised or self-supervised learning.",1.0,1.0,0.888789713382721,"The selected tasks, chosen for their difficulty and diversity, reflect SuperGLUE's goals by requiring substantive innovations in machine learning to improve sample-efficient transfer, multitask, and unsupervised learning, driving forward the capabilities of language understanding systems.","The selected tasks in SuperGLUE were chosen based on their difficulty for current NLP approaches, with an emphasis on diverse task formats and low-data training data tasks. This reflects the benchmark's goal of posing a rigorous test of language understanding and requiring substantive innovations in sample-efficient, transfer, multi-task, and unsupervised/self-supervised learning in order to make progress towards general-purpose language understanding technologies.",1.0,1.0,0.8712459802627563 +"In the context of PAL, what role do meaningful variable names play in the generated program's effectiveness?","Meaningful variable names in PAL's generated code are crucial for linking programmatic steps to natural language problem entities, enhancing readability and maintainability of the code, and improving the model's reasoning and execution accuracy.","['et al.\n(2021); Madaan et al. (2022). We created prompts that are\nsimilar to PAL’s, except that they do include the final answer.\nThis resulted in a 23.2 solve rate on GSM 8K, much lower\nthan PAL(72.0), and only 4.5 points higher than DIRECT .\nThese results reinforce our hypothesis that the main benefit\nofPALcomes from the synergy with the interpreter, andnot only from having a better prompt. Additional details\nare provided in Appendix B. For additional discussion on\nthe advantages of code-prompts over textual-prompts, see\nAppendix G.\nDo variable names matter? In all our experiments, we\nused meaningful variable names in the PALprompts, to ease\nthe model’s grounding of variables to the entities they rep-\nresent. For the Python interpreter, however, variable names\nare meaningless. To measure the importance of meaningful\nvariable names, we experimented with two prompts variants:\n1. P AL−comment – ', 'the PALprompt without intermediate\nNL comments.\n2. P AL−var\n−comment – the PALprompt without intermediate\nNL comments and with variable names substituted\nwith random characters.\nThe results are shown in Figure 9. In COLORED OBJECTED\nandDATE, removing intermediate NL comments but keep-\ning meaningful variable names ( PAL−comment ) – slightly re-\nduces the results compared to the full PALprompt, but it still\nachieves higher accuracy than the baselines COT. Remov-\ning variable names as well ( PAL−var\n−comment ) further decreases\naccuracy, and performs worse than COT. Since variable\nnames have an important part in code quality (Gellenbeck\n& Cook, 1991; Takang et al., 1996), meaningful variable\nnames are only expected to ease reasoning for Codex, which\nwas trained on mostly meaningful names, as was also found\nby Madaan et al. (2022).\n7. Related Work\nPrompting Few-shot prompting (Brown et al., 2020) has\nbeen shown to be an ', 'PAL: Program-aided Language Models\nLuyu Gao* 1Aman Madaan* 1Shuyan Zhou* 1Uri Alon1Pengfei Liu1 2Yiming Yang1Jamie Callan1\nGraham Neubig1 2\n{luyug,amadaan,shuyanzh,ualon,pliu3,yiming,callan,gneubig }@cs.cmu.edu\nAbstract\nLarge language models (LLMs) have recently\ndemonstrated an impressive ability to perform\narithmetic and symbolic reasoning tasks, when\nprovided with a few examples at test time (“few-\nshot prompting”). Much of this success can be\nattributed to prompting methods such as “chain-\nof-thought”, which employ LLMs for both under-\nstanding the problem description by decomposing\nit into steps, as well as solving each step of the\nproblem. While LLMs seem to be adept at this\nsort of step-by-step decomposition, LLMs often\nmake logical and arithmetic mistakes in the solu-\ntion part, even when the problem is decomposed\ncorrectly. In this paper, we present Program-\nAided Language models ( PAL): a novel approach\nthat uses the LLM to read natural language prob-\nlems and generate programs as the ']","Meaningful variable names play a critical role in the generated program's effectiveness as they ease the model's grounding of variables to the entities they represent. This helps in improving the reasoning process for the model, especially when it comes to code quality and understanding the problem description accurately.",1.0,0.6666666666666666,0.6391936540603638,"Meaningful variable names in PAL's generated code are crucial for linking programmatic steps to natural language problem entities, enhancing readability and maintainability of the code, and improving the model's reasoning and execution accuracy.","Meaningful variable names play a crucial role in the generated program's effectiveness in PAL. They ease the model's grounding of variables to the entities they represent, aiding in reasoning and understanding the code.",1.0,1.0,0.8643062114715576 +How does PAL address the execution of complex computations in natural language processing tasks?," PAL addresses complex computations by generating programmatic reasoning steps for LLMs and executing them via an interpreter, ensuring accurate and efficient problem-solving acros various domains.","['intermediate\nreasoning steps, but offloads the solution step to a\nruntime such as a Python interpreter. With PAL,\ndecomposing the natural language problem into\nrunnable steps remains the only learning task for\nthe LLM, while solving is delegated to the inter-\npreter. We demonstrate this synergy between a\nneural LLM and a symbolic interpreter across 13\nmathematical, symbolic, and algorithmic reason-\ning tasks from BIG-Bench Hard and other bench-\nmarks. In all these natural language reasoning\ntasks, generating code using an LLM and rea-\nsoning using a Python interpreter leads to more\naccurate results than much larger models. For ex-\nample, PALusing CODEX achieves state-of-the-\nart few-shot accuracy on the GSM 8Kbenchmark\nof math word problems, surpassing PaLM-540 B\nwhich uses chain-of-thought by absolute 15% top-\n1. Our code and data are publicly available at\nhttp://reasonwithpal.com .\n*The first three authors contributed equally.1Language Tech-\nnologies Institute, Carnegie Mellon University, ', 'PAL: Program-aided Language Models\nLuyu Gao* 1Aman Madaan* 1Shuyan Zhou* 1Uri Alon1Pengfei Liu1 2Yiming Yang1Jamie Callan1\nGraham Neubig1 2\n{luyug,amadaan,shuyanzh,ualon,pliu3,yiming,callan,gneubig }@cs.cmu.edu\nAbstract\nLarge language models (LLMs) have recently\ndemonstrated an impressive ability to perform\narithmetic and symbolic reasoning tasks, when\nprovided with a few examples at test time (“few-\nshot prompting”). Much of this success can be\nattributed to prompting methods such as “chain-\nof-thought”, which employ LLMs for both under-\nstanding the problem description by decomposing\nit into steps, as well as solving each step of the\nproblem. While LLMs seem to be adept at this\nsort of step-by-step decomposition, LLMs often\nmake logical and arithmetic mistakes in the solu-\ntion part, even when the problem is decomposed\ncorrectly. In this paper, we present Program-\nAided Language models ( PAL): a novel approach\nthat uses the LLM to read natural language prob-\nlems and generate programs as the ', 'domain-specific\nrepresentations other than Python code. Further, LMs that\nwere pretrained on Python are abundant compared to other\ndomain-specific languages, making Python code a much\nmore preferable representation. Andor et al. (2019) generate\ntask-specific arithmetic operations for reading comprehen-\nsion tasks; Gupta et al. (2019) design neural modules such\nascount to deal with arithmetic operations. PALgener-\nalizes these works by generating general Python programs,\nwithout the need for defining specialized modules. The clos-\nest work to ours technically may be Binder (Cheng et al.,\n2022), but it addressed mostly answering questions about\ntables using SQL and SQL-like Python.\x0cPAL: Program-aided Language Models 9\n8. Conclusion\nWe introduce PAL, a new method for natural language rea-\nsoning, using programs as intermediate reasoning steps.\nDifferently from existing LM-based reasoning approaches,\nthe main idea is to offload solving and calculating to an\nexternal Python interpreter, instead of using the LLM ']","PAL addresses the execution of complex computations in natural language processing tasks by using the LLM to decompose the natural language problem into runnable steps, while delegating the solving and calculation tasks to an external Python interpreter. This offloads the solution step to the runtime interpreter, allowing the LLM to focus solely on generating code and intermediate reasoning steps.",1.0,1.0,0.8730251789093018," PAL addresses complex computations by generating programmatic reasoning steps for LLMs and executing them via an interpreter, ensuring accurate and efficient problem-solving acros various domains.","PAL addresses the execution of complex computations in natural language processing tasks by using a novel approach that offloads solving and calculating to an external Python interpreter, instead of relying on the LLM for the solution step. This allows the LLM to focus on decomposing the natural language problem into runnable steps, while the Python interpreter handles the actual solving of the problem.",1.0,1.0,0.8662283420562744 +How does SuperGLUE's design ensure it presents a more challenging benchmark than its predecessor GLUE for evaluating language understanding models?,"SuperGLUE introduces more challenging tasks retained from GLUE, diversifies task formats beyond sentence- and sentence-pair classification to include coreference resolution and question answering, and features comprehensive human baseline performances to ensure significant headroom for model improvement.","['high-level goals, the original version of\nthe benchmark is no longer a suitable metric for quantifying such progress.\nIn response, we introduce SuperGLUE, a new benchmark designed to pose a more rigorous test of\nlanguage understanding. SuperGLUE has the same high-level motivation as GLUE: to provide a\nsimple, hard-to-game measure of progress toward general-purpose language understanding technolo-\ngies for English. We anticipate that significant progress on SuperGLUE should require substantive\ninnovations in a number of core areas of machine learning, including sample-efficient, transfer,\nmultitask, and unsupervised or self-supervised learning.\nSuperGLUE follows the basic design of GLUE: It consists of a public leaderboard built around\neight language understanding tasks, drawing on existing data, accompanied by a single-number\nperformance metric, and an analysis toolkit. However, it improves upon GLUE in several ways:\nMore challenging tasks: SuperGLUE retains the two hardest tasks ', 'Conclusion\nWe present SuperGLUE, a new benchmark for evaluating general-purpose language understanding\nsystems. SuperGLUE updates the GLUE benchmark by identifying a new set of challenging NLU\ntasks, as measured by the difference between human and machine baselines. The set of eight tasks in\nour benchmark emphasizes diverse task formats and low-data training data tasks, with nearly half the\ntasks having fewer than 1k examples and all but one of the tasks having fewer than 10k examples.\nWe evaluate BERT-based baselines and find that they still lag behind humans by nearly 20 points.\nGiven the difficulty of SuperGLUE for BERT, we expect that further progress in multi-task, transfer,\nand unsupervised/self-supervised learning techniques will be necessary to approach human-level per-\nformance on the benchmark. Overall, we argue that SuperGLUE offers a rich and challenging testbed\nfor work developing new general-purpose ', 'in GLUE. The remaining tasks\nwere identified from those submitted to an open call for task proposals and were selected based on\ndifficulty for current NLP approaches.\nMore diverse task formats: The task formats in GLUE are limited to sentence- and sentence-pair\nclassification. We expand the set of task formats in SuperGLUE to include coreference resolution\nand question answering (QA).\nComprehensive human baselines: We include human performance estimates for all benchmark\ntasks, which verify that substantial headroom exists between a strong BERT-based baseline and\nhuman performance.\nImproved code support: SuperGLUE is distributed with a new, modular toolkit for work on\npretraining, multi-task learning, and transfer learning in NLP, built around standard tools including\nPyTorch (Paszke et al., 2017) and AllenNLP (Gardner et al., 2017).\nRefined usage rules: The conditions for inclusion on the SuperGLUE leaderboard have been\nrevamped to ensure fair ']","SuperGLUE ensures it presents a more challenging benchmark than its predecessor GLUE by including more challenging tasks, incorporating diverse task formats such as coreference resolution and question answering, providing comprehensive human baselines for comparison, offering improved code support for pretraining, multi-task learning, and transfer learning, and refining usage rules to ensure fair competition.",1.0,1.0,0.9337500929832458,"SuperGLUE introduces more challenging tasks retained from GLUE, diversifies task formats beyond sentence- and sentence-pair classification to include coreference resolution and question answering, and features comprehensive human baseline performances to ensure significant headroom for model improvement.","SuperGLUE's design ensures it presents a more challenging benchmark than its predecessor GLUE for evaluating language understanding models by including more challenging tasks, such as coreference resolution and question answering, by retaining the two hardest tasks from GLUE, by incorporating comprehensive human baselines for all benchmark tasks, and by providing improved code support with a new modular toolkit for pretraining, multi-task learning, and transfer learning in NLP.",1.0,1.0,0.9029492139816284 +" In what ways does SuperGLUE expand beyond GLUE's task formats, and why is this expansion significant?","By including coreference resolution and question answering formats, SuperGLUE expands the scope of evaluated linguistic phenomena, testing models' abilities in more varied contexts and reflecting a broader range of natural language understanding challenges.","['in GLUE. The remaining tasks\nwere identified from those submitted to an open call for task proposals and were selected based on\ndifficulty for current NLP approaches.\nMore diverse task formats: The task formats in GLUE are limited to sentence- and sentence-pair\nclassification. We expand the set of task formats in SuperGLUE to include coreference resolution\nand question answering (QA).\nComprehensive human baselines: We include human performance estimates for all benchmark\ntasks, which verify that substantial headroom exists between a strong BERT-based baseline and\nhuman performance.\nImproved code support: SuperGLUE is distributed with a new, modular toolkit for work on\npretraining, multi-task learning, and transfer learning in NLP, built around standard tools including\nPyTorch (Paszke et al., 2017) and AllenNLP (Gardner et al., 2017).\nRefined usage rules: The conditions for inclusion on the SuperGLUE leaderboard have been\nrevamped to ensure fair ', 'high-level goals, the original version of\nthe benchmark is no longer a suitable metric for quantifying such progress.\nIn response, we introduce SuperGLUE, a new benchmark designed to pose a more rigorous test of\nlanguage understanding. SuperGLUE has the same high-level motivation as GLUE: to provide a\nsimple, hard-to-game measure of progress toward general-purpose language understanding technolo-\ngies for English. We anticipate that significant progress on SuperGLUE should require substantive\ninnovations in a number of core areas of machine learning, including sample-efficient, transfer,\nmultitask, and unsupervised or self-supervised learning.\nSuperGLUE follows the basic design of GLUE: It consists of a public leaderboard built around\neight language understanding tasks, drawing on existing data, accompanied by a single-number\nperformance metric, and an analysis toolkit. However, it improves upon GLUE in several ways:\nMore challenging tasks: SuperGLUE retains the two hardest tasks ', 'Conclusion\nWe present SuperGLUE, a new benchmark for evaluating general-purpose language understanding\nsystems. SuperGLUE updates the GLUE benchmark by identifying a new set of challenging NLU\ntasks, as measured by the difference between human and machine baselines. The set of eight tasks in\nour benchmark emphasizes diverse task formats and low-data training data tasks, with nearly half the\ntasks having fewer than 1k examples and all but one of the tasks having fewer than 10k examples.\nWe evaluate BERT-based baselines and find that they still lag behind humans by nearly 20 points.\nGiven the difficulty of SuperGLUE for BERT, we expect that further progress in multi-task, transfer,\nand unsupervised/self-supervised learning techniques will be necessary to approach human-level per-\nformance on the benchmark. Overall, we argue that SuperGLUE offers a rich and challenging testbed\nfor work developing new general-purpose ']","SuperGLUE expands beyond GLUE's task formats by including coreference resolution and question answering (QA) in addition to sentence- and sentence-pair classification tasks. This expansion is significant because it introduces more diverse task formats, challenging NLP approaches beyond what is currently possible with GLUE, and providing a more comprehensive evaluation of language understanding systems.",1.0,1.0,0.8890186548233032,"By including coreference resolution and question answering formats, SuperGLUE expands the scope of evaluated linguistic phenomena, testing models' abilities in more varied contexts and reflecting a broader range of natural language understanding challenges.","SuperGLUE expands beyond GLUE's task formats by including coreference resolution and question answering (QA) tasks. This expansion is significant because it allows for a more diverse set of tasks to test language understanding systems, challenging them in different ways and providing a more comprehensive evaluation of their capabilities.",1.0,1.0,0.8707597851753235 diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv b/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv rename to evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv From d99a3555e53fbf33ab347c735ea5c09981cd61f4 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Thu, 20 Jun 2024 14:27:25 +0200 Subject: [PATCH 14/17] updating readme --- README.md | 9 +- evaluations/README.md | 1 - evaluations/evaluation_squad_rag.py | 189 ---------------------------- 3 files changed, 8 insertions(+), 191 deletions(-) delete mode 100644 evaluations/evaluation_squad_rag.py diff --git a/README.md b/README.md index ddbf372..0224367 100644 --- a/README.md +++ b/README.md @@ -2,5 +2,12 @@ This repository contains examples on how to use Haystack to build RAG architectures and evaluate their performance using human annotated datasets. -- [RAG Techniques/Architectures](evaluations/architectures/README.md) + +Check out the following sections for more details: + + +- [Evaluations](evaluations/README.md) + +- [Techniques/Architectures](evaluations/architectures/README.md) + - [Datasets](datasets/README.md) diff --git a/evaluations/README.md b/evaluations/README.md index e3353a0..7dfa901 100644 --- a/evaluations/README.md +++ b/evaluations/README.md @@ -3,5 +3,4 @@ Name | Dataset | Evaluation Metrics | Colab | --------------------------------------------------------------------------|---------------|--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| [RAG Evaluation](evaluation_aragog.py) | ARAGOG | [ContextRelevance](https://docs.haystack.deepset.ai/docs/contextrelevanceevaluator) , [Faithfulness](https://docs.haystack.deepset.ai/docs/faithfulnessevaluator), [Semantic Answer Similarity](https://docs.haystack.deepset.ai/docs/sasevaluator) | Open In Colab | -[RAG Evaluation](evaluation_squad_rag.py) | SQuAD | [ContextRelevance](https://docs.haystack.deepset.ai/docs/contextrelevanceevaluator) , [Faithfulness](https://docs.haystack.deepset.ai/docs/faithfulnessevaluator), [Semantic Answer Similarity](https://docs.haystack.deepset.ai/docs/sasevaluator) | ToDo | [Extractive QA Evaluation](evaluation_squad_extractive_qa.py) | SQuAD | [Answer Exact Match](https://docs.haystack.deepset.ai/docs/answerexactmatchevaluator), [DocumentMRR](https://docs.haystack.deepset.ai/docs/documentmrrevaluator), [DocumentMAP](https://docs.haystack.deepset.ai/docs/documentmapevaluator), [DocumentRecall](https://docs.haystack.deepset.ai/docs/documentrecallevaluator), [Semantic Answer Similarity](https://docs.haystack.deepset.ai/docs/sasevaluator) | ToDo | \ No newline at end of file diff --git a/evaluations/evaluation_squad_rag.py b/evaluations/evaluation_squad_rag.py deleted file mode 100644 index e59fd68..0000000 --- a/evaluations/evaluation_squad_rag.py +++ /dev/null @@ -1,189 +0,0 @@ -import json -import os -import random -from pathlib import Path - -from haystack import Pipeline, Document -from haystack.components.embedders import SentenceTransformersDocumentEmbedder -from haystack.components.evaluators import ( - AnswerExactMatchEvaluator, - DocumentMRREvaluator, - DocumentMAPEvaluator, - DocumentRecallEvaluator, - SASEvaluator, -) -from haystack.components.evaluators.document_recall import RecallMode -from haystack.components.preprocessors import DocumentSplitter -from haystack.components.writers import DocumentWriter -from haystack.document_stores.in_memory import InMemoryDocumentStore -from haystack.document_stores.types import DuplicatePolicy -from haystack.evaluation import EvaluationRunResult -from tqdm import tqdm - -from architectures.basic_rag import basic_rag - -base_path = "datasets/SQuAD-2.0/transformed_squad/" - - -def load_transformed_squad(): - with open(base_path+"questions.jsonl", "r") as f: - questions = [json.loads(x) for x in f.readlines()] - for idx, question in enumerate(questions): - question["query_id"] = f"query_{idx}" - - def create_document(text: str, name: str): - return Document(content=text, meta={"name": name}) - - documents = [] - for root, dirs, files in os.walk(base_path+"articles"): - for article in files: - with open(f"{root}/{article}", "r") as f: - article_text = f.read() - documents.append(create_document(article_text, article.replace(".txt", ""))) - - return questions, documents - - -def indexing(documents, embedding_model, chunk_size): - document_store = InMemoryDocumentStore() - doc_splitter = DocumentSplitter(split_by="sentence", split_length=chunk_size) - doc_writer = DocumentWriter(document_store=document_store, policy=DuplicatePolicy.SKIP) - doc_embedder = SentenceTransformersDocumentEmbedder(model=embedding_model) - ingestion_pipe = Pipeline() - ingestion_pipe.add_component(instance=doc_splitter, name="doc_splitter") - ingestion_pipe.add_component(instance=doc_embedder, name="doc_embedder") - ingestion_pipe.add_component(instance=doc_writer, name="doc_writer") - ingestion_pipe.connect("doc_splitter.documents", "doc_embedder.documents") - ingestion_pipe.connect("doc_embedder.documents", "doc_writer.documents") - ingestion_pipe.run({"doc_splitter": {"documents": documents}}) - - return document_store - - -def run_basic_rag(doc_store, questions, embedding_model, top_k): - - rag = basic_rag(document_store=doc_store, embedding_model=embedding_model, top_k=top_k) - - # predicted data - retrieved_docs = [] - retrieved_contexts = [] - predicted_answers = [] - - for q in tqdm(questions): - response = rag.run( - data={"query_embedder": {"text": q}, - "prompt_builder": {"question": q}, - "answer_builder": {"query": q}} - ) - - # gather response data - retrieved_docs.append(response["answer_builder"]["answers"][0].documents) - retrieved_contexts.append([doc.content for doc in response["answer_builder"]["answers"][0].documents]) - predicted_answers.append(response["answer_builder"]["answers"][0].data) - - return retrieved_docs, predicted_answers, retrieved_contexts - - -def run_evaluation(embedding_model, ground_truth_docs, retrieved_docs, questions, predicted_answers, ground_truth_answers): - eval_pipeline = Pipeline() - eval_pipeline.add_component("doc_mrr", DocumentMRREvaluator()) - eval_pipeline.add_component("doc_map", DocumentMAPEvaluator()) - eval_pipeline.add_component("doc_recall_single_hit", DocumentRecallEvaluator(mode=RecallMode.SINGLE_HIT)) - eval_pipeline.add_component("doc_recall_multi_hit", DocumentRecallEvaluator(mode=RecallMode.MULTI_HIT)) - eval_pipeline.add_component("answer_exact", AnswerExactMatchEvaluator()) - eval_pipeline.add_component("sas", SASEvaluator(model=embedding_model)) - - # get the original documents from the retrieved documents which were split - original_retrieved_docs = [] - for doc in retrieved_docs: - original_docs = [] - for split_doc in doc: - for original_doc in ground_truth_docs: - if split_doc.meta["name"] == original_doc[0].meta["name"]: - original_docs.append(original_doc[0]) - original_retrieved_docs.append(original_docs) - - eval_pipeline_results = eval_pipeline.run( - { - "doc_mrr": {"ground_truth_documents": ground_truth_docs, "retrieved_documents": original_retrieved_docs}, - "sas": {"predicted_answers": predicted_answers, "ground_truth_answers": ground_truth_answers}, - "answer_exact": {"predicted_answers": predicted_answers, "ground_truth_answers": ground_truth_answers}, - "doc_map": {"ground_truth_documents": ground_truth_docs, "retrieved_documents": original_retrieved_docs}, - "doc_recall_single_hit": {"ground_truth_documents": ground_truth_docs, "retrieved_documents": original_retrieved_docs}, - "doc_recall_multi_hit": {"ground_truth_documents": ground_truth_docs, "retrieved_documents": original_retrieved_docs} - } - ) - - results = { - "doc_mrr": eval_pipeline_results['doc_mrr'], - "sas": eval_pipeline_results['sas'], - "doc_map": eval_pipeline_results['doc_map'], - "doc_recall_single_hit": eval_pipeline_results['doc_recall_single_hit'], - "doc_recall_multi_hit": eval_pipeline_results['doc_recall_multi_hit'] - } - - inputs = {'questions': questions, - 'true_answers': ground_truth_answers, - 'predicted_answers': predicted_answers, - 'contexts': retrieved_docs - } - - return results, inputs - - -def parameter_tuning(queries, documents): - """ - Run the basic RAG model with different parameters, and evaluate the results. - - The parameters to be tuned are: embedding model, top_k, and chunk_size. - """ - embedding_models = { - "sentence-transformers/all-MiniLM-L6-v2", - "sentence-transformers/msmarco-distilroberta-base-v2", - "sentence-transformers/all-mpnet-base-v2" - } - top_k_values = [1, 2, 3] - chunk_sizes = [5, 10, 15] - - # create results directory if it does not exist using Pathlib - out_path = Path("squad_results") - out_path.mkdir(exist_ok=True) - - questions = [] - ground_truth_answers = [] - ground_truth_docs = [] - for sample in queries: - questions.append(sample["question"]) - ground_truth_answers.append(sample["answers"]["text"][0]) - ground_truth_docs.append([doc for doc in documents if doc.meta["name"] == sample["document"]]) - - for embedding_model in embedding_models: - for top_k in top_k_values: - for chunk_size in chunk_sizes: - name_params = f"{embedding_model.split('/')[-1]}__top_k:{top_k}__chunk_size:{chunk_size}" - print(name_params) - print("Indexing documents") - doc_store = indexing(documents, embedding_model, chunk_size) - print("Running RAG pipeline") - retrieved_docs, predicted_answers, retrieved_contexts = run_basic_rag( - doc_store, questions, embedding_model, top_k - ) - print(f"Running evaluation") - results, inputs = run_evaluation( - embedding_model, ground_truth_docs, retrieved_docs, questions, predicted_answers, - ground_truth_answers - ) - eval_results = EvaluationRunResult(run_name=name_params, inputs=inputs, results=results) - eval_results.score_report().to_csv(f"{out_path}/score_report_{name_params}.csv") - eval_results.to_pandas().to_csv(f"{out_path}/detailed_{name_params}.csv") - - -def main(): - random.seed(42) - all_queries, documents = load_transformed_squad() - queries = random.sample(all_queries, 100) # take a sample of 100 questions - parameter_tuning(queries, documents) - - -if __name__ == "__main__": - main() From b2a379d3d2331b05b2419e3ee2d1e064d974c7d0 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Thu, 20 Jun 2024 16:23:12 +0200 Subject: [PATCH 15/17] updating readmes and results files --- README.md | 7 ++-- datasets/README.md | 25 ++++++++---- evaluations/README.md | 11 ++++-- evaluations/architectures/README.md | 39 +++++++++++++------ .../comparative_scores.csv | 0 ...-MiniLM-L6-v2__top_k:1__chunk_size:128.csv | 0 ...-MiniLM-L6-v2__top_k:1__chunk_size:256.csv | 0 ...l-MiniLM-L6-v2__top_k:1__chunk_size:64.csv | 0 ...-MiniLM-L6-v2__top_k:2__chunk_size:128.csv | 0 ...-MiniLM-L6-v2__top_k:2__chunk_size:256.csv | 0 ...l-MiniLM-L6-v2__top_k:2__chunk_size:64.csv | 0 ...-MiniLM-L6-v2__top_k:3__chunk_size:128.csv | 0 ...-MiniLM-L6-v2__top_k:3__chunk_size:256.csv | 0 ...l-MiniLM-L6-v2__top_k:3__chunk_size:64.csv | 0 ...mpnet-base-v2__top_k:1__chunk_size:128.csv | 0 ...mpnet-base-v2__top_k:1__chunk_size:256.csv | 0 ...-mpnet-base-v2__top_k:1__chunk_size:64.csv | 0 ...mpnet-base-v2__top_k:2__chunk_size:128.csv | 0 ...mpnet-base-v2__top_k:2__chunk_size:256.csv | 0 ...-mpnet-base-v2__top_k:2__chunk_size:64.csv | 0 ...mpnet-base-v2__top_k:3__chunk_size:128.csv | 0 ...mpnet-base-v2__top_k:3__chunk_size:256.csv | 0 ...-mpnet-base-v2__top_k:3__chunk_size:64.csv | 0 ...berta-base-v2__top_k:1__chunk_size:128.csv | 0 ...berta-base-v2__top_k:1__chunk_size:256.csv | 0 ...oberta-base-v2__top_k:1__chunk_size:64.csv | 0 ...berta-base-v2__top_k:2__chunk_size:128.csv | 0 ...berta-base-v2__top_k:2__chunk_size:256.csv | 0 ...oberta-base-v2__top_k:2__chunk_size:64.csv | 0 ...berta-base-v2__top_k:3__chunk_size:128.csv | 0 ...berta-base-v2__top_k:3__chunk_size:256.csv | 0 ...oberta-base-v2__top_k:3__chunk_size:64.csv | 0 ...-MiniLM-L6-v2__top_k:1__chunk_size:128.csv | 0 ...-MiniLM-L6-v2__top_k:1__chunk_size:256.csv | 0 ...l-MiniLM-L6-v2__top_k:1__chunk_size:64.csv | 0 ...-MiniLM-L6-v2__top_k:2__chunk_size:128.csv | 0 ...-MiniLM-L6-v2__top_k:2__chunk_size:256.csv | 0 ...l-MiniLM-L6-v2__top_k:2__chunk_size:64.csv | 0 ...-MiniLM-L6-v2__top_k:3__chunk_size:128.csv | 0 ...-MiniLM-L6-v2__top_k:3__chunk_size:256.csv | 0 ...l-MiniLM-L6-v2__top_k:3__chunk_size:64.csv | 0 ...mpnet-base-v2__top_k:1__chunk_size:128.csv | 0 ...mpnet-base-v2__top_k:1__chunk_size:256.csv | 0 ...-mpnet-base-v2__top_k:1__chunk_size:64.csv | 0 ...mpnet-base-v2__top_k:2__chunk_size:128.csv | 0 ...mpnet-base-v2__top_k:2__chunk_size:256.csv | 0 ...-mpnet-base-v2__top_k:2__chunk_size:64.csv | 0 ...mpnet-base-v2__top_k:3__chunk_size:128.csv | 0 ...mpnet-base-v2__top_k:3__chunk_size:256.csv | 0 ...-mpnet-base-v2__top_k:3__chunk_size:64.csv | 0 ...berta-base-v2__top_k:1__chunk_size:128.csv | 0 ...berta-base-v2__top_k:1__chunk_size:256.csv | 0 ...oberta-base-v2__top_k:1__chunk_size:64.csv | 0 ...berta-base-v2__top_k:2__chunk_size:128.csv | 0 ...berta-base-v2__top_k:2__chunk_size:256.csv | 0 ...oberta-base-v2__top_k:2__chunk_size:64.csv | 0 ...berta-base-v2__top_k:3__chunk_size:128.csv | 0 ...berta-base-v2__top_k:3__chunk_size:256.csv | 0 ...oberta-base-v2__top_k:3__chunk_size:64.csv | 0 59 files changed, 55 insertions(+), 27 deletions(-) rename evaluations/results/{results_aragog_base_vs_hyde_2024_06_20 => aragog_baseline_rag_vs_hyde_2024_06_20}/comparative_scores.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:1__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:1__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:1__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:2__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:2__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:2__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:3__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:3__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_all-mpnet-base-v2__top_k:3__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:1__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:1__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:1__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:2__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:2__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:2__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:3__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:3__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_all-mpnet-base-v2__top_k:3__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv (100%) rename evaluations/results/{results_aragog_parameter_search_2024_06_12 => aragog_parameter_search_2024_06_12}/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv (100%) diff --git a/README.md b/README.md index 0224367..a801842 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,9 @@ # haystack-evaluation -This repository contains examples on how to use Haystack to build RAG architectures and evaluate their performance using human annotated datasets. - - -Check out the following sections for more details: +This repository contains examples on how to use Haystack to evaluate systems build with Haystack for different tasks +and datasets. +This repository is structured as: - [Evaluations](evaluations/README.md) diff --git a/datasets/README.md b/datasets/README.md index 250c8bb..65160b0 100644 --- a/datasets/README.md +++ b/datasets/README.md @@ -1,8 +1,19 @@ # Datasets -## 1. ARAGOG -This dataset is based on the paper [Advanced Retrieval Augmented Generation Output Grading (ARAGOG)](https://arxiv.org/pdf/2404.01037). It's a collection of papers from ArXiv covering topics around Transformers and Large Language Models, all in PDF format. +## Overview + + +Name | Suitable Metrics | Description +------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------| +ARAGOG | [ContextRelevance](https://docs.haystack.deepset.ai/docs/contextrelevanceevaluator), [Faithfulness](https://docs.haystack.deepset.ai/docs/faithfulnessevaluator), [Semantic Answer Similarity](https://docs.haystack.deepset.ai/docs/sasevaluator) |A collection of papers from ArXiv covering topics around Transformers and Large Language Models, all in PDF format. +SQuAD 1.1 | [Answer Exact Match](https://docs.haystack.deepset.ai/docs/answerexactmatchevaluator), [DocumentMRR](https://docs.haystack.deepset.ai/docs/documentmrrevaluator), [DocumentMAP](https://docs.haystack.deepset.ai/docs/documentmapevaluator), [DocumentRecall](https://docs.haystack.deepset.ai/docs/documentrecallevaluator) [Semantic Answer Similarity](https://docs.haystack.deepset.ai/docs/sasevaluator) | A collection of questions and answers from Wikipedia articles, typically used for training and evaluating models for extractive question-answering tasks. + + +## ARAGOG + +This dataset is based on the paper [Advanced Retrieval Augmented Generation Output Grading (ARAGOG)](https://arxiv.org/pdf/2404.01037). It's a +collection of papers from ArXiv covering topics around Transformers and Large Language Models, all in PDF format. The dataset contains: - 13 PDF papers. @@ -15,13 +26,13 @@ The following metrics can be used: -## 2. SQuAD dataset - -The SQuAD 1.1 dataset is a collection of questions and answers from Wikipedia articles, and it's typically used for training and evaluating models for extractive question-answering tasks. -You can find more about this dataset on the paper [SQuAD: 100,000+ Questions for Machine Comprehension of Text](https://aclanthology.org/D16-1264/) and on the official website: -[https://rajpurkar.github.io/SQuAD-explorer/](https://rajpurkar.github.io/SQuAD-explorer/) +## SQuAD dataset +The SQuAD 1.1 dataset is a collection of questions and answers from Wikipedia articles, and it's typically used for +training and evaluating models for extractive question-answering tasks. You can find more about this dataset on the +paper [SQuAD: 100,000+ Questions for Machine Comprehension of Text](https://aclanthology.org/D16-1264/) and on the +official website [https://rajpurkar.github.io/SQuAD-explorer/](https://rajpurkar.github.io/SQuAD-explorer/) The dataset contains: - 490 Wikipedia articles in text format. diff --git a/evaluations/README.md b/evaluations/README.md index 7dfa901..52832c0 100644 --- a/evaluations/README.md +++ b/evaluations/README.md @@ -1,6 +1,9 @@ # Evaluations -Name | Dataset | Evaluation Metrics | Colab | ---------------------------------------------------------------------------|---------------|--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -[RAG Evaluation](evaluation_aragog.py) | ARAGOG | [ContextRelevance](https://docs.haystack.deepset.ai/docs/contextrelevanceevaluator) , [Faithfulness](https://docs.haystack.deepset.ai/docs/faithfulnessevaluator), [Semantic Answer Similarity](https://docs.haystack.deepset.ai/docs/sasevaluator) | Open In Colab | -[Extractive QA Evaluation](evaluation_squad_extractive_qa.py) | SQuAD | [Answer Exact Match](https://docs.haystack.deepset.ai/docs/answerexactmatchevaluator), [DocumentMRR](https://docs.haystack.deepset.ai/docs/documentmrrevaluator), [DocumentMAP](https://docs.haystack.deepset.ai/docs/documentmapevaluator), [DocumentRecall](https://docs.haystack.deepset.ai/docs/documentrecallevaluator), [Semantic Answer Similarity](https://docs.haystack.deepset.ai/docs/sasevaluator) | ToDo | \ No newline at end of file +Here we provide full examples on how to use Haystack to evaluate systems build also with Haystack for different tasks and datasets. + +Name | Dataset | Evaluation Metrics | Colab | +----------------------------------------------------------------------------------|---------------|--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +[RAG with parameter search](evaluation_aragog.py) | ARAGOG | [ContextRelevance](https://docs.haystack.deepset.ai/docs/contextrelevanceevaluator) , [Faithfulness](https://docs.haystack.deepset.ai/docs/faithfulnessevaluator), [Semantic Answer Similarity](https://docs.haystack.deepset.ai/docs/sasevaluator) | Open In Colab | +[Baseline RAG vs HyDE using Harness](evaluation_aragog_harness.py) | ARAGOG | [ContextRelevance](https://docs.haystack.deepset.ai/docs/contextrelevanceevaluator) , [Faithfulness](https://docs.haystack.deepset.ai/docs/faithfulnessevaluator), [Semantic Answer Similarity](https://docs.haystack.deepset.ai/docs/sasevaluator) | - | +[Extractive QA with parameter search](evaluation_squad_extractive_qa.py) | SQuAD | [Answer Exact Match](https://docs.haystack.deepset.ai/docs/answerexactmatchevaluator), [DocumentMRR](https://docs.haystack.deepset.ai/docs/documentmrrevaluator), [DocumentMAP](https://docs.haystack.deepset.ai/docs/documentmapevaluator), [DocumentRecall](https://docs.haystack.deepset.ai/docs/documentrecallevaluator), [Semantic Answer Similarity](https://docs.haystack.deepset.ai/docs/sasevaluator) | - | \ No newline at end of file diff --git a/evaluations/architectures/README.md b/evaluations/architectures/README.md index 006637d..e470fb5 100644 --- a/evaluations/architectures/README.md +++ b/evaluations/architectures/README.md @@ -1,19 +1,39 @@ # RAG Techniques/Architectures +## Overview -## Basic RAG +Here we provide full examples on how to use Haystack to evaluate systems build also with Haystack for different tasks and datasets. + +Name | Code | Description +----------------------------------------|--------------------------------------|--------------------------------------------------------------------------------------------------------------------| +Basic RAG | [basic_rag.py](basic_rag.py) | Retrieves the top-k document chunks and then passes them to an LLM generate the answer. +Extractive QA | [extractive_qa.py](extractive_qa.py) | Retrieves the top-_k_ documents and uses an extractive QA model to extract the answer from the documents. +Hypothetical Document Embeddings (HyDE) | [hyde_rag.py](hyde_rag.py) | HyDE generates a hypothetical document from the query and uses it to retrieve similar documents from the document embedding space. +Sentence-Window Retrieval | ToDo | Breaks down documents into smaller chunks (sentences) and indexes them separately. Retrieves the most relevant sentences and replaces them with the full surrounding context. +Document Summary Index | ToDo | ToDo +Multi-Query | ToDo | ToDo +Maximal Marginal Relevance (MMR) | ToDo | ToDo +Cohere Re-ranker | ToDo | ToDo +LLM-based Re-ranker | ToDo | ToDo + + + + +### Basic RAG This is the baseline RAG technique, that retrieves the top-k document chunks and then uses them to generate the answer. It uses the same text chunk for indexing/embedding as well as for generating answers. +--- -## Extractive QA +### Extractive QA -This technique retrieves the top-_k_ documents, but instead of using the generator to generate the answer, it uses an +This technique retrieves the top-_k_ documents, but instead of using the generator to provide the answer, it uses an extractive QA model to extract the answer from the retrieved documents. +--- -## Hypothetical Document Embeddings (HyDE) +### Hypothetical Document Embeddings (HyDE) HyDE first zero-shot prompts an instruction-following language model to generate a “fake” hypothetical document that captures relevant textual patterns from the initial query - in practice, this is done five times. @@ -25,16 +45,11 @@ retrieved based on vector similarity. - Paper: [Precise Zero-Shot Dense Retrieval without Relevance Labels](https://aclanthology.org/2023.acl-long.99.pdf) - Blog: [HyDE: Hypothetical Document Embeddings for Zero-Shot Dense Retrieval](https://huggingface.co/blog/hyde-zero-shot-dense-retrieval) +--- -## Sentence-Window Retrieval +### Sentence-Window Retrieval The sentence-window approach breaks down documents into smaller chunks (sentences) and indexes them separately. During retrieval, we retrieve the sentences that are most relevant to the query via similarity search and replace the -sentence with the full surrounding context, using a static sentence-window around the context. - -## Document Summary Index -## Multi-Query -## Maximal Marginal Relevance (MMR) -## Cohere Re-ranker -## LLM-based Re-ranker \ No newline at end of file +sentence with the full surrounding context, using a static sentence-window around the context. \ No newline at end of file diff --git a/evaluations/results/results_aragog_base_vs_hyde_2024_06_20/comparative_scores.csv b/evaluations/results/aragog_baseline_rag_vs_hyde_2024_06_20/comparative_scores.csv similarity index 100% rename from evaluations/results/results_aragog_base_vs_hyde_2024_06_20/comparative_scores.csv rename to evaluations/results/aragog_baseline_rag_vs_hyde_2024_06_20/comparative_scores.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:1__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:2__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_all-mpnet-base-v2__top_k:3__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:1__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:2__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_all-mpnet-base-v2__top_k:3__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:64.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:256.csv diff --git a/evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv b/evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv similarity index 100% rename from evaluations/results/results_aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv rename to evaluations/results/aragog_parameter_search_2024_06_12/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:64.csv From 961d3eb9651f3b55d30c2ea6ca1258cc4b3817a8 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Fri, 21 Jun 2024 10:36:08 +0200 Subject: [PATCH 16/17] fixing notebook --- .../analyze_aragog_parameter_search.ipynb | 70 ++++++++----------- 1 file changed, 31 insertions(+), 39 deletions(-) diff --git a/evaluations/analyze_aragog_parameter_search.ipynb b/evaluations/analyze_aragog_parameter_search.ipynb index fc84cab..17f4787 100644 --- a/evaluations/analyze_aragog_parameter_search.ipynb +++ b/evaluations/analyze_aragog_parameter_search.ipynb @@ -73,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "id": "a03966eb-658d-4e16-bce0-e198886eca35", "metadata": { "id": "a03966eb-658d-4e16-bce0-e198886eca35" @@ -81,12 +81,12 @@ "outputs": [], "source": [ "import os\n", - "df = read_scores('results/results_aragog_2024_06_12/')" + "df = read_scores('results/aragog_parameter_search_2024_06_12/')" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "id": "a018bfb3-755b-4a4f-9f2d-cf69201f9f6d", "metadata": { "colab": { @@ -434,7 +434,7 @@ "26 3 256 " ] }, - "execution_count": 4, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -455,7 +455,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 7, "id": "44d1fce4-430d-4365-b27e-d6e862eabc75", "metadata": {}, "outputs": [ @@ -502,7 +502,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 8, "id": "8616c992-934a-414c-89e8-ea8ccad2408e", "metadata": {}, "outputs": [], @@ -512,7 +512,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 9, "id": "b4328bc2-dccd-4a18-96d1-818df2d7e8d5", "metadata": {}, "outputs": [ @@ -528,7 +528,7 @@ "Name: 1, dtype: object" ] }, - "execution_count": 44, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -539,7 +539,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 10, "id": "56d6327f-bec1-4fbe-a7fe-9ab4e51b4160", "metadata": {}, "outputs": [ @@ -555,7 +555,7 @@ "Name: 0, dtype: object" ] }, - "execution_count": 45, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -574,7 +574,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 11, "id": "dbc6831a-3eca-461c-8243-a2e9659fb220", "metadata": {}, "outputs": [], @@ -584,7 +584,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 12, "id": "2c9b5ef4-d141-4219-9af8-6b86d3fbbb62", "metadata": {}, "outputs": [ @@ -600,7 +600,7 @@ "Name: 17, dtype: object" ] }, - "execution_count": 49, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -611,7 +611,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 13, "id": "f001f85f-2e95-43c9-b665-9a4aafa2b70f", "metadata": {}, "outputs": [ @@ -627,7 +627,7 @@ "Name: 9, dtype: object" ] }, - "execution_count": 50, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -646,7 +646,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 14, "id": "d6105b7f-e1b3-4654-a39e-915797fa7c58", "metadata": {}, "outputs": [], @@ -656,7 +656,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 15, "id": "00d06d75-9f43-40fb-85ec-c4fea16cb1e4", "metadata": {}, "outputs": [ @@ -672,7 +672,7 @@ "Name: 26, dtype: object" ] }, - "execution_count": 52, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -683,7 +683,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 16, "id": "6c6a51b5-6945-4832-b4dd-b273f8ee0fe9", "metadata": {}, "outputs": [ @@ -699,7 +699,7 @@ "Name: 21, dtype: object" ] }, - "execution_count": 53, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -709,28 +709,28 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "6a5bb992-867c-41a6-bf25-b69645ad19b2", + "cell_type": "markdown", + "id": "32f2c613-361c-401e-9e41-17899b29eb6d", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "## Let's inspect individual queries for this parameter configuration" + ] }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 19, "id": "68e4ed5e-5afe-4db2-a93e-f4232e733092", "metadata": { "id": "68e4ed5e-5afe-4db2-a93e-f4232e733092" }, "outputs": [], "source": [ - "detailed_best_sas_df = pd.read_csv(\"results/results_aragog_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv\")" + "detailed_best_sas_df = pd.read_csv(\"results/aragog_parameter_search_2024_06_12/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:128.csv\")" ] }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 20, "id": "c7f425f4-ed35-4625-8824-f06e33622eac", "metadata": { "id": "c7f425f4-ed35-4625-8824-f06e33622eac", @@ -952,7 +952,7 @@ "[107 rows x 7 columns]" ] }, - "execution_count": 56, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -963,7 +963,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 21, "id": "0e2da651-7843-4dab-bc55-7f51d9965901", "metadata": { "id": "0e2da651-7843-4dab-bc55-7f51d9965901" @@ -992,7 +992,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 22, "id": "3d7e17ed-cbce-4dd6-a69d-ad217630fa23", "metadata": { "id": "3d7e17ed-cbce-4dd6-a69d-ad217630fa23", @@ -1177,14 +1177,6 @@ "source": [ "inspect(44)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "69641552-244b-46f3-832a-56aab8db3933", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 8d87f9e2392c4385bbe9ee22ebbaec2fdcdbf5d2 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Fri, 21 Jun 2024 10:59:03 +0200 Subject: [PATCH 17/17] adding squad results --- ...l-MiniLM-L6-v2__top_k:1__chunk_size:10.csv | 125 ++++++++++++++++ ...l-MiniLM-L6-v2__top_k:1__chunk_size:15.csv | 122 ++++++++++++++++ ...ll-MiniLM-L6-v2__top_k:1__chunk_size:5.csv | 124 ++++++++++++++++ ...l-MiniLM-L6-v2__top_k:2__chunk_size:10.csv | 120 ++++++++++++++++ ...l-MiniLM-L6-v2__top_k:2__chunk_size:15.csv | 122 ++++++++++++++++ ...ll-MiniLM-L6-v2__top_k:2__chunk_size:5.csv | 123 ++++++++++++++++ ...l-MiniLM-L6-v2__top_k:3__chunk_size:10.csv | 120 ++++++++++++++++ ...l-MiniLM-L6-v2__top_k:3__chunk_size:15.csv | 125 ++++++++++++++++ ...ll-MiniLM-L6-v2__top_k:3__chunk_size:5.csv | 117 +++++++++++++++ ...-mpnet-base-v2__top_k:1__chunk_size:10.csv | 122 ++++++++++++++++ ...-mpnet-base-v2__top_k:1__chunk_size:15.csv | 121 ++++++++++++++++ ...l-mpnet-base-v2__top_k:1__chunk_size:5.csv | 125 ++++++++++++++++ ...-mpnet-base-v2__top_k:2__chunk_size:10.csv | 121 ++++++++++++++++ ...-mpnet-base-v2__top_k:2__chunk_size:15.csv | 121 ++++++++++++++++ ...l-mpnet-base-v2__top_k:2__chunk_size:5.csv | 122 ++++++++++++++++ ...-mpnet-base-v2__top_k:3__chunk_size:10.csv | 117 +++++++++++++++ ...-mpnet-base-v2__top_k:3__chunk_size:15.csv | 121 ++++++++++++++++ ...l-mpnet-base-v2__top_k:3__chunk_size:5.csv | 119 ++++++++++++++++ ...oberta-base-v2__top_k:1__chunk_size:10.csv | 133 ++++++++++++++++++ ...oberta-base-v2__top_k:1__chunk_size:15.csv | 129 +++++++++++++++++ ...roberta-base-v2__top_k:1__chunk_size:5.csv | 127 +++++++++++++++++ ...oberta-base-v2__top_k:2__chunk_size:10.csv | 128 +++++++++++++++++ ...oberta-base-v2__top_k:2__chunk_size:15.csv | 123 ++++++++++++++++ ...roberta-base-v2__top_k:2__chunk_size:5.csv | 127 +++++++++++++++++ ...oberta-base-v2__top_k:3__chunk_size:10.csv | 125 ++++++++++++++++ ...oberta-base-v2__top_k:3__chunk_size:15.csv | 122 ++++++++++++++++ ...roberta-base-v2__top_k:3__chunk_size:5.csv | 124 ++++++++++++++++ ...l-MiniLM-L6-v2__top_k:1__chunk_size:10.csv | 6 + ...l-MiniLM-L6-v2__top_k:1__chunk_size:15.csv | 6 + ...ll-MiniLM-L6-v2__top_k:1__chunk_size:5.csv | 6 + ...l-MiniLM-L6-v2__top_k:2__chunk_size:10.csv | 6 + ...l-MiniLM-L6-v2__top_k:2__chunk_size:15.csv | 6 + ...ll-MiniLM-L6-v2__top_k:2__chunk_size:5.csv | 6 + ...l-MiniLM-L6-v2__top_k:3__chunk_size:10.csv | 6 + ...l-MiniLM-L6-v2__top_k:3__chunk_size:15.csv | 6 + ...ll-MiniLM-L6-v2__top_k:3__chunk_size:5.csv | 6 + ...-mpnet-base-v2__top_k:1__chunk_size:10.csv | 6 + ...-mpnet-base-v2__top_k:1__chunk_size:15.csv | 6 + ...l-mpnet-base-v2__top_k:1__chunk_size:5.csv | 6 + ...-mpnet-base-v2__top_k:2__chunk_size:10.csv | 6 + ...-mpnet-base-v2__top_k:2__chunk_size:15.csv | 6 + ...l-mpnet-base-v2__top_k:2__chunk_size:5.csv | 6 + ...-mpnet-base-v2__top_k:3__chunk_size:10.csv | 6 + ...-mpnet-base-v2__top_k:3__chunk_size:15.csv | 6 + ...l-mpnet-base-v2__top_k:3__chunk_size:5.csv | 6 + ...oberta-base-v2__top_k:1__chunk_size:10.csv | 6 + ...oberta-base-v2__top_k:1__chunk_size:15.csv | 6 + ...roberta-base-v2__top_k:1__chunk_size:5.csv | 6 + ...oberta-base-v2__top_k:2__chunk_size:10.csv | 6 + ...oberta-base-v2__top_k:2__chunk_size:15.csv | 6 + ...roberta-base-v2__top_k:2__chunk_size:5.csv | 6 + ...oberta-base-v2__top_k:3__chunk_size:10.csv | 6 + ...oberta-base-v2__top_k:3__chunk_size:15.csv | 6 + ...roberta-base-v2__top_k:3__chunk_size:5.csv | 6 + 54 files changed, 3487 insertions(+) create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:1__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:1__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:1__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:2__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:2__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:2__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:3__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:3__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:3__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:1__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:1__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:1__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:2__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:2__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:2__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:3__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:3__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:3__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:5.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:10.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:15.csv create mode 100644 evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:5.csv diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:10.csv new file mode 100644 index 0000000..fb3cc58 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:10.csv @@ -0,0 +1,125 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=ac8e9c286327aea24d00bceb002b08ad3fbc3007673c77da9b48b6b6b91aa76f, content: ' Some rites of the domus (household) were held in public places but were legally defined as privata ...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5865511627398434)]",1.0,1.0,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,2010s,"[Document(id=6cc80ac4bfb8c070a3cad221dd87e4b7fa2117e16d83462bf23ff87410cae387, content: ' Most were eventually rebuilt, using castings of the original façades to retain the architectural st...', meta: {'name': 'San_Diego', 'source_id': '04aa1c0a61a5a047684cd62ea4175007928dadda83d273a78287bcdbb722ed63', 'page_number': 1}, score: 0.3697025378541572)]",0.0,0.09833155572414398,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Coventry,"[Document(id=d8fd97e153fa909813de22390c7c4607821a5041d6350b606b4f95c6304faa9d, content: ' ""Pathfinders"" from 12 Kampfgruppe 100 (Bomb Group 100 or KGr 100) led 437 bombers from KG 1, KG 3, ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5517882669038934)]",1.0,0.4347962737083435,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,n,"[Document(id=e8db61efa2c902ccb7106c7f85b976fcd3c5e267fd62cf6003d23ef9f3a8b528, content: ' Macrons over vowels were originally used not to mark long vowels (as in modern editions), but to in...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.5244903194235846)]",1.0,0.3367372155189514,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=c1b15c9f6474425049e145f56eea6b22790074204f55cd26eb005fca608f63a8, content: ' This guidance is generally offered during a ceremony called the patriarchal blessing. +In Hinduism t...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.47522311411982915)]",1.0,0.20152544975280762,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,"Olympiacos, Panathinaikos, AEK Athens and PAOK. +The Greek national basketball team has a decades-long tradition of excellence in the sport, being considered among the world's top basketball powers. As of 2012, it ranked 4th in the world and 2nd in Europe. They have won the European Championship twice in 1987 and 2005, and have reached the final four in two of the last four FIBA World Championships, taking the second place in the world in 2006 FIBA World Championship, after a spectacular 101–95 win against Team USA in the tournament's semifinal. The domestic top basketball league, A1 Ethniki, is composed of fourteen teams. The most successful Greek teams are Olympiacos, Panathinaikos, Aris Thessaloniki","[Document(id=4b9cc1d29ed0f9a4cb783c7febb93802073dee536efd60ee53a2c3e5bec9cccf, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6432507807845881)]",1.0,0.4490772783756256,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=17489faf6fb39343de67cc2bfe448e959559775109bb646aa0e4d02b716ddf77, content: ' Jennifer Lopez and Steven Tyler joined the judging panel in season ten, but both left after two sea...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.6295865021297492)]",1.0,0.8232484459877014,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=1c8a379dc092fa8d374927640e6d16f46286416581451c66a1da80fef1412acc, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.7197352146596256)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=1eed45d4c946ec058384afdd313db9b847d2b384629f1872466e03c7bc13a2e5, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.6574727180609481)]",1.0,1.0,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,two urban districts,"[Document(id=b99c33e3f1152b05cb402034099d36d6b8fc3942c6deb97d0de9d7f47d56c738, content: ' The equivalent of the Minister-President is the Senatspräsident (President of the Senate) in Bremen...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 0.5932038256321518)]",1.0,0.23566487431526184,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,"Unicode are rarely used Kanji or Chinese characters, many of which are part of personal and place names, making them rarely used, but much more essential than envisioned in the original architecture of Unicode","[Document(id=52969654009785b4d154f4c0d9dbf3b19c1679e2e9b96a07fe9573dae98048ce, content: 'g., Egyptian Hieroglyphs) and thousands of rarely used or obsolete characters that had not been anti...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.5821946522602964)]",0.0,0.01901988312602043,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,lower federal courts,"[Document(id=af588e16cba35397799ed402e254a69a9445c63e2da06985a64c8440d5e111ea, content: ' Most often these are claims, such as ineffective assistance of counsel, which requires the court to...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.5401086806576698)]",1.0,0.5716047286987305,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,rank amateur,"[Document(id=a022efa8ed739794e05008dbe44d9b83a87fc4f4b7a609ec096b9d437bc5089e, content: ' For Whitehead, education should be the exact opposite of the multidisciplinary, value-free school m...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.6345955416988933)]",1.0,1.0,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,borrow money on the credit of the United States,"[Document(id=9f240c943520891dcb4853fa0b77bb9da0c6144acd3f67acca4ff043b2823fb2, content: ' +Early releases of the Washington coin included error coins shipped primarily from the Philadelphia ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.40843243316554156)]",1.0,0.019053475931286812,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,"Alice Springs, Australia","[Document(id=11305a23f8bf763ba364e29e12c6d52cbb34345ba63d4b002db69932753a1dff, content: '7 mm) of precipitation per year, more than most other locations with desert climates, but it still q...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.6205771265988049)]",0.0,0.36634862422943115,0.0,0.0,0.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,adaptive immune system,"[Document(id=329774fcd326068e3e3307eab728f9f7211002cdf9ddedab811c99d1a4968642, content: ' If pathogens successfully evade the innate response, vertebrates possess a second layer of protecti...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.6230771628644063)]",1.0,0.8636709451675415,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Kyle Schwarber,"[Document(id=e7143fa77dfb51a3ba85804cccd3bf347ba3ba5227422dbc11b406b7a7ce7610, content: ' +On May 11, 2000, Glenallen Hill, facing Brewers starter Steve Woodard, became the first, and thus f...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.6449944844616512)]",1.0,0.3030855655670166,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,Computing,"[Document(id=9be1bf507c2859b61088d1f83c172682620003fe33104d080a9b6b3a2256570a, content: ' Students are encouraged to take these classes either for credit or in their own time, and in some d...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 0.5930303365803692)]",1.0,0.6075230836868286,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1960,"[Document(id=78d1760ca2fa0904e5480a91d388ba9d6c37d737a65426e5cbb2488333bc1e1a, content: ' Eisenhower made clear his stance in his first State of the Union address in February 1953, saying ""...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.6669113603959826)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=891cab83df456a1bb11d8e7c301e45b02e564d224d3be1f7bdb82d02edc8f221, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.7160253770681686)]",1.0,0.8085941672325134,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,lower levels of growth,"[Document(id=98a903d7a3fe66ed56536faa0a16552cf481b51c14c6a2a0600485dc972ba20c, content: '. The main reason for this shift is the increasing importance of human capital in development. When ...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.46549644086481434)]",0.0,0.13175535202026367,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=56ff4bdb4f543c61c1f516ba1e544e83e117ab5512bae58b2264fa6b6d86f26b, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.7654878512589371)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=5c5f6a0591170904a56f4c0417e667089761d8d8f013f55283aff164fdadd75b, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.5930262907390398)]",1.0,1.0,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,Veneti,"[Document(id=4236cbbf71506fe35f0da033ea9b465edf3cfbfdd8e6bac2b259b18835cc0408, content: ' +The relationship between the Slavs and a tribe called the Veneti east of the River Vistula in the R...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.6265603953087009)]",1.0,0.22235718369483948,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.","measures sexual orientation on a continuum, considers various dimensions of sexual orientation, and considers homosexuality and heterosexuality separately","[Document(id=04d7cfe710801483b2e5c7c67b89cf9966fc92203783ea424fccf90e1b51520c, content: ' The classification scheme works such that individuals in the same category show the same balance be...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.4367552146155492)]",1.0,0.626720666885376,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,literate and upper class,"[Document(id=149c6390292392d4438c9191dca43bbe827c20a86bb7bfc502679813d7fcc108, content: ' Due to Eastern Lombard's status as a Gallo-Italic language, an Eastern Lombard speaker may, in fact...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 0.6197546116557725)]",1.0,0.7113619446754456,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,suppressive fire,"[Document(id=398ba78535afecc2004eff0502d304cec27deadedaaad329103a223bbb2a5e9d, content: ' +Many units are supplemented with a variety of specialized weapons, including the M249 SAW (Squad Au...', meta: {'name': 'United_States_Army', 'source_id': '7d1426796777e1da6822eafc3ff4dc7c856a2aafe381e0c95a3533a40874b202', 'page_number': 1}, score: 0.4030619320178129)]",0.0,0.23421159386634827,0.0,0.0,0.0 +What is that power that enables us to love?,Prevenient grace,Stout and Jeannine M.,"[Document(id=80d5f8430d3dbc590053d3b62577dce5ee8fbf237db0701d19dd7d56d8107c3a, content: '"" This connection is further analyzed by Stout and Jeannine M. Love in Integrative Process: Folletti...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.3732671667638634)]",0.0,0.2212621420621872,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,He Jin,"[Document(id=1023d346fa09f68f024520eed11ceb8d146eebd929e4d93e19ec61ae32461e57, content: ' 189 AD), half-brother to Empress He (d. 189 AD), plotted with Yuan Shao (d. 202 AD) to overthrow th...', meta: {'name': 'Han_dynasty', 'source_id': '060d95b91f9c20e5c8cd8b8a032ab44aac38ab88ce62d3c302bea0865f533568', 'page_number': 1}, score: 0.442711163558856)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,introduced sharia,"[Document(id=e2a95241acaa96c9b3665ffd83614f229b121a721d01ae7e3a778b2ce1d5050d, content: ' Intent on pushing Libya towards ""Islamic socialism"", he introduced sharia as the basis for the lega...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.5626624054289608)]",1.0,0.7288561463356018,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",Peter Townsend,"[Document(id=4b69cd784f30edcd0bcf343e88529300a3f5f46cab6549e05e32a9f6208375df, content: ' +Elizabeth and Philip were married on 20 November 1947 at Westminster Abbey. They received 2500 wedd...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.6765609310084981)]",1.0,0.1941501349210739,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,They harassed enemy shipping and attacked enemy colonies,"[Document(id=01afaccbfc29afb40aa8a4fc8ee8849ee22d4ebd33b61272ccd253ac9d3be2d3, content: ' This approach did not serve France well in the war, as the colonies were indeed lost, but although ...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.5305715873253745)]",0.0,0.24629266560077667,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,16.9%,"[Document(id=f34e8297a274ac70a2a126825fc3935412353cb3a42fcc32db84b6e7e69baf97, content: '9% Asian, 0.2% Pacific Islander, 16.9% from other races, and 3.8% from two or more races. Hispanic o...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.5307301750521758)]",1.0,0.6942497491836548,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=67c87173e747d72d79bc92866e553902d56583aae14965ecaa3b2da507256976, content: ' After the turmoil of the English Civil War, the Royal Society wanted to show that science did not t...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.6184567229523368)]",1.0,1.000000238418579,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,software error,"[Document(id=6c8c749813cf91978d96464cce374f7a04d7464bb50d66f4cda7f08549bc9fae, content: ' It may be desired that also some aspects of the architecture internal level are maintained. A compl...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.40717080191996524)]",1.0,0.25782641768455505,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,"Thomas Aquinas. +Early Islamic philosophy and Islamic metaphysics, imbued as it is with Islamic theology, distinguishes more clearly than Aristotelianism between essence and existence. Whereas existence is the domain of the contingent and the accidental, essence endures within a being beyond the accidental. The philosophy of Ibn Sīnā, particularly that part relating to metaphysics, owes much to al-Farabi. The search for a definitive Islamic philosophy separate from Occasionalism can be seen in what is left of his work. +Following al-Farabi","[Document(id=2da70cf12f17b13f7cd40e16a00b9b3a4b79dc3d3d86d9da2d5ba634b0255bb4, content: ' +His Book of Healing became available in Europe in partial Latin translation some fifty years after ...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.6182599373756009)]",1.0,0.2192244827747345,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=f08f17188f751088b2b76f2c1a69ffc86f340979f05ea84b4b201def163ec6f3, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.562789144621481)]",1.0,0.07690561562776566,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,a runic system,"[Document(id=e8db61efa2c902ccb7106c7f85b976fcd3c5e267fd62cf6003d23ef9f3a8b528, content: ' Macrons over vowels were originally used not to mark long vowels (as in modern editions), but to in...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.5674121980026754)]",1.0,0.9278781414031982,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=c54ac19342651f97c105c4825d0b82cbf05684cf1d7c4921e0d078a7212b4a21, content: ' A common type of embezzlement is that of personal use of entrusted government resources; for exampl...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.5087219209237853)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,International Energy Agency,"[Document(id=e95b189deaf952c54a6d9d9b63c4e39a8ca3f10302437e822af426093ae93e9e, content: ' Some solar balloons are large enough for human flight, but usage is generally limited to the toy ma...', meta: {'name': 'Solar_energy', 'source_id': '4b26f9c886129869a3884289e417d88e1bc8a531535e048a63701a7b1784a8c1', 'page_number': 1}, score: 0.4775217647255643)]",0.0,0.12792061269283295,0.0,0.0,0.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=de63707a025645c7fcc7a26e6dd37ddd29a27cd60ff187ce487e0d07217708ee, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.7250091339820998)]",1.0,0.9999998807907104,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,The Cathar Crusade,"[Document(id=3f50079b5ebfa4f0c771fad96712717b3bd3050f02fa3abfb19f87bc049689e1, content: ' By the 11th century, more organised groups such as the Patarini, the Dulcinians, the Waldensians an...', meta: {'name': 'Heresy', 'source_id': '8ba473262f8508a9c99699bb09cd022993896ece1fed1463a9cf4bd7eb0c3e68', 'page_number': 1}, score: 0.5133480868513103)]",1.0,1.0,1.0,1.0,1.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=f0c6a5cd5408a0fc01c220e5910c03a2241fbe85099410f518592c1fb7098441, content: ' ""Creating a partnership with Beyoncé, one of the most hard-working and talented people in the world...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.6717989493314664)]",0.0,1.0,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,legal tender at face value,"[Document(id=c5c8d55b2c3952d9e29c10530dc199fccdd3bfb5dc897ac40dd8ff1d01e6a97f, content: ' This has to do with Alexander Hamilton's suggestion to Congress of a fixed 15:1 ratio of silver to ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.5587388197776981)]",0.0,0.16358545422554016,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,a Russian fleet attacked an Ottoman force,"[Document(id=34bd168edceec4d845ba383e25526ec637cdd2a67e74fbe1494f27559701c396, content: ' The clash came on 30 November 1853 when a Russian fleet attacked an Ottoman force in the harbour at...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.5038881684651161)]",0.0,0.0756317600607872,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Eomaia,"[Document(id=deb49612ed6c7dff5f1ffa19a9e6cc731756e662202cde46400d79672a69618a, content: ' In the early Triassic, all the medium to large land carnivore niches were taken over by archosaurs ...', meta: {'name': 'Mammal', 'source_id': 'ce12abe98969c8fca9c93088950b36e820ec47c4c63f339bc203185f79f0242d', 'page_number': 1}, score: 0.598355717977564)]",0.0,0.34817540645599365,0.0,0.0,0.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,modern evolutionary synthesis,"[Document(id=e1b3872c9a56b024a609ebeaf211b3ea1b4799599e6935c3dd62e613f036575b, content: ' This dogma has since been shown to have exceptions, such as reverse transcription in retroviruses. ...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5384574426271362)]",1.0,0.9789240956306458,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,8:10 p.m,"[Document(id=2ed21a855190ee9369cd4351878edc528d570a2136e3a2612158d6108a59491e, content: ' until 6:00 p.m. or later, with dinner from exactly 8:10 p.m., at Delmonico's restaurant and later t...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6844753001764978)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1789,"[Document(id=93996e15e538ef99cbc1f14e3a6eca7f547c782d02004cfe4d90863b09e3c9ba, content: ' When hostilities were concluded in 1648 with the Treaty of Westphalia, most of Alsace was recognize...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.6466685240282758)]",1.0,0.2011210322380066,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,the senate,"[Document(id=4b02bca62f562d6c5bfda9faf0b29f77e95b817f971009b508742099d4c57423, content: ' None of these defixiones seem produced by, or on behalf of the elite, who had more immediate recour...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.49976447567576965)]",0.0,0.17791815102100372,0.0,0.0,0.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=d16248de2a5e0d9e6bbb7e85b411bf0a4143206e20471ecab73dc7158ce992b1, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.5374786638713411)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,"Angelo Giuseppe Roncalli,[a] Italian pronunciation: [ˈandʒelo dʒuˈzɛppe roŋˈkalli]; 25 November 1881 – 3 June 1963","[Document(id=2637c74f0c9d8013db6dd5e001b320fad50f7374ada702e24a314846612ef144, content: 'Pope Saint John XXIII (Latin: Ioannes XXIII; Italian: Giovanni XXIII) born Angelo Giuseppe Roncalli,...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 0.5965553455670057)]",0.0,0.26511257886886597,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c3327e32d8658307f8dca684bbdeb22895ca7a8d7ee8ad6ff0eba6200c5e6da2, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.6517133656496545)]",1.0,0.9866578578948975,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment","They were armed with a sword and a small shield, as well as several light javelins","[Document(id=1983d91ab2ea433561865fb3ff1c494c8513349262f6ecf4a75e3747f0f18a6a, content: ' Their role in accompanying the army was primarily to supply any vacancies that might occur in the m...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5060990867529696)]",1.0,0.15637509524822235,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Muslim Brotherhood,"[Document(id=1687188f7518c2be7432e4406714b812e0a6d1fce365dd58302ad4eab6661f88, content: ' He started writing his book Philosophy of the Revolution during the siege. +After the war, Nasser re...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.5538089292772775)]",1.0,0.2528778314590454,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,his clan,"[Document(id=0fa0d88bd1e937bdfda2ada2f0a26524ce558edae20b1537db3005c2c91407d3, content: ' Uthman placed some of the trusted members of his clan at prominent and strong positions throughout ...', meta: {'name': 'Umayyad_Caliphate', 'source_id': '704c4038f63f9c5a686218060cb31639c0333dda34e87e804be75e5fec05ec53', 'page_number': 1}, score: 0.36949795109946626)]",0.0,0.07835237681865692,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=754901c14e07fd836481f4df6a9e1c790f259897d11a92d8f4449320bf432be1, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.5163642742400598)]",1.0,0.5505510568618774,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston–North Charleston–Summerville Metropolitan Statistical Area,"[Document(id=620f512c3df1eb1788fcecbd5ec4c0438953e005020fab4303e121d55369ade5, content: 'Charleston is the oldest and second-largest city in the U.S. state of South Carolina, the county sea...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5246972383754526)]",1.0,0.3331555128097534,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=39112deed098f10b50cfae893e4a07e0541a59e3ede2de2a696f87f4dd3a60cf, content: '"" When in September 1831 he learned, while travelling from Vienna to Paris, that the uprising had be...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.6957076687039898)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,steal the invention,"[Document(id=1011faa25e59a33b095480ce04c180eb0c71513aa2d54d760fe11160d7fa844b, content: '"" His records indicate that the device is based on a narrow stream of small tungsten pellets that ar...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6696097359530666)]",1.0,1.0,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,Radical Reformers,"[Document(id=7dce512135e1a6449f8f807a63a932137765967a80191486d9c009690cb118d4, content: ' Since the term ""magister"" also means ""teacher"", the Magisterial Reformation is also characterized b...', meta: {'name': 'Protestantism', 'source_id': '4e459ea6668bbb8e35a3b3731021de782c29c314b694a30df606a1413f2e1325', 'page_number': 1}, score: 0.4272090543082947)]",0.0,0.501602053642273,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Lippincott,"[Document(id=e302bfc687b3918c099a0b5ca3053c72bfd1fccfaee0997a2e91ff94d201d3db, content: ' Hoping to be published, Lee presented her writing in 1957 to a literary agent recommended by Capote...', meta: {'name': 'To_Kill_a_Mockingbird', 'source_id': 'f1e375014d37d0eb57fcd3e14861d458b3adfe0e71162344652acf38ae9d1bbb', 'page_number': 1}, score: 0.46715050025419724)]",0.0,0.1679261326789856,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,Temporal measurement,"[Document(id=627b4646aa6d5bd39a435ba0dc32566058c94f814caca5d432160a59db2ebe75, content: ' An operational definition of time, wherein one says that observing a certain number of repetitions ...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.5358515216189067)]",0.0,0.1256408840417862,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",He also placed a corps of his own troops under Frederick's command,"[Document(id=c90b28034a392eb2832dcb9a2ae1e8b326e72b851e54b5c70c28b7477ac220bd, content: ' Then on 5 January 1762 the Russian Empress Elizabeth died. Her Prussophile successor, Peter III, at...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.7323095236946228)]",1.0,0.28628912568092346,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=0a1d789e1e3ae04146d38c10ae30c76bb73173ec2df854d0b0695309c7e8629a, content: '” +On April 23, 2008, Dell announced the closure of one of its biggest Canadian call-centers in Kanat...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.659275856771866)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,"Imperial cult apparatus was fully developed, first in the Eastern Provinces, then in the West. Provincial Cult centres","[Document(id=daeee6942174a48e69242d0cab076688d09f9df7af5dabac194a1fdbe6160f3d, content: ' +Military settlement within the empire and at its borders broadened the context of Romanitas. Rome's...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5785378976346987)]",1.0,-0.08820496499538422,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=0b3a599a54fac34c82f29d858c51bbf5ce4f37032b3539a76c2b8a6b918abc6e, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.7016983231419771)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=45ad6989b7801812895d43ea694bad169af9b5dd9024dd96b099fe071a196217, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6552354556870785)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Lucca,"[Document(id=fbb6c94ac86ef324054f6ce0df37a7fbf9b69547011346eef119b000264cff50, content: ' The political alliance of the triumvirate was crumbling. Domitius Ahenobarbus ran for the consulshi...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5345331713521415)]",0.0,0.3228505849838257,0.0,0.0,0.0 +In what decade was disco popular?,1970s,1980s,"[Document(id=24440712083226dfd525d04e8584c667060d5e2b151f666d4bdfb4e2e523cdbd, content: ' Also significant was Black Sabbath's Paranoid (1970), which combined guitar riffs with dissonance a...', meta: {'name': 'Hard_rock', 'source_id': '8c928e0d3f021cd5a2c2ffff1882ada0df22da36e38c0e9a3cdfb92bc88423cc', 'page_number': 1}, score: 0.4995811254104975)]",0.0,0.7625939249992371,0.0,0.0,0.0 +What do the rivers form?,lakes,Nieuwe Maas,"[Document(id=d4f00e0321e7bd0e40e3988f483dbb9e724f85a7fec1890682781c70d19fdf9a, content: ' The middle flow begins as Nederrijn, then changes into Lek, then joins the Noord, thereby forming N...', meta: {'name': 'Rhine', 'source_id': '23686e48a61504679a91dac10d83b287aef3dff7dabc0ab881c281059187ce6a', 'page_number': 1}, score: 0.5310363018327526)]",0.0,0.16586220264434814,0.0,0.0,0.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,ISO-8859-1,"[Document(id=1ca3328892ba24004a75d8cfaab0f58ce32bed8ee034be6fa54e33e8b0a62eb5, content: ' UTF-16 extends UCS-2, using one 16-bit unit for the characters that were representable in UCS-2 and...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.6784456034023938)]",1.0,1.0,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,three,"[Document(id=1458da8434cabde78aa443222423f12deeb21dfbf444ec9694b97cfdf73b8417, content: ' Beyoncé was the third woman to have writing credits on three number one songs (""Irreplaceable"", ""Gr...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.48296640826682014)]",1.0,0.6790156364440918,1.0,1.0,1.0 +How many Examination Boards exist in India?,30,30,"[Document(id=16d8a70ea7a6d58f48e68b5c9072e65770d4a4053e9906c77dee6ff0c12c90a1, content: ' However, these vocational schools are not part of the German dual education system. Ergänzungsschul...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 0.4960366894753296)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Gandhi,"[Document(id=81b78857f8f0193b8e9f8bd09692e1e7b64cc94852c31a8effb89f2b7e5dc78e, content: 'e. ""cultural revolution"", it simply implies sweeping and widespread change to a section of the socia...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.5399410900017829)]",1.0,0.25790858268737793,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,United Nations,"[Document(id=36cca080196ef1b35130c0b7bcd6bb4216fa5de9889c7c13bfa049ba8b3661c7, content: ' Criticism was focused on the institution of the monarchy itself and the Queen's wider family rather...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5702394761597431)]",1.0,0.11135419458150864,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=c37151361f1ebbd218e8e53e243abfe86d24fabddd8be6f3401cf56e9e98633b, content: ' Its name is said to have come in 1814 from the Russian soldiers who occupied the city; ""bistro"" mea...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.5637046717833719)]",1.0,1.0,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,871,"[Document(id=5396376bb5221d60d6016a3ef2e8c6fb591b1eb2c9c6dab90677a7e0ac39a5a5, content: ' The speech of eastern and northern parts of England was subject to strong Old Norse influence due t...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.506357630655818)]",0.0,1.0,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,"input validation and error-management routines.[citation needed] Software fault injection, in the form of fuzzing, is an example of failure testing. Various commercial non-functional testing tools are linked from the software fault injection page; there are also numerous open-source and free software tools available that perform destructive testing. +Load testing is primarily concerned with testing that the system can continue to operate under a specific load, whether that be large quantities of data or a large number of users","[Document(id=76ad3fb3d257a3812a169d749a5fb13f2e05383764e8f9dfc53895f08747b375, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5555899197717479)]",1.0,0.10437621176242828,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=bb0e180e009c490f3ae30fb55a9a91cbe512409c2e12ebae0203298a6f3efe78, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.6289352889491475)]",1.0,1.0,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=cdfa1e6bea8fd42b00ce80468ba471689216a6195425c2435fb1141b0b8c7e6f, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.597249455201943)]",1.0,1.0,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1854,"[Document(id=7c2b46d47135979280b3bd098e70f7cc34a4526459e2816700066af0071057b2, content: ' The Russians did nothing and he evacuated to Batum in February of the following year. +The Baltic wa...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.536293090337765)]",0.0,0.42288899421691895,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,a part of daily life,"[Document(id=1a8c1e09e777c4e183dceed861053866bfd85f790fb0c2cd9b9aad59ad7dc5b2, content: ' Even the most skeptical among Rome's intellectual elite such as Cicero, who was an augur, saw relig...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.685217369665892)]",1.0,0.26575636863708496,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,defects in dead code,"[Document(id=366c0fe9bdc55f8585c6c011f54a3964b248ff55389d9ea1362bace26f4a6c59, content: ' There are various roles for testing team members. Information derived from software testing may be ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.6180757102134704)]",1.0,0.4074403643608093,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,three,"[Document(id=54ac8a4bef87eec4f4c4bb18b0298a72660993d40a0085ae08f0804872097969, content: ' +The governing bodies in each country operate league systems in a domestic season, normally comprisi...', meta: {'name': 'Association_football', 'source_id': 'f2b7305b09ad8a43b8389a5eb2d84b8ea4b4deaa7968d21fe9da9da9ecda2c6b', 'page_number': 1}, score: 0.5340059135303545)]",1.0,0.23862609267234802,1.0,1.0,1.0 +In which year was the new constitution ratified?,1855,1884 that produced a constitution ratified 3:1 by Montana citizens in November 1884,"[Document(id=012172d30b9790cfd5a5f1acf26703580b6738cfd860ad8388391dcbc296f44a, content: ' Surveys in 1874, 1875 and 1876 helped spark the Great Sioux War of 1876. The transcontinental NPR w...', meta: {'name': 'Montana', 'source_id': '556dc408ae9a8cf251c8082c455043f3c354c7457c6fe7793c307babf666b651', 'page_number': 1}, score: 0.5220032930737526)]",0.0,0.2675617039203644,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Tens of thousands fled to the U.,"[Document(id=1fddecf60c272b200a5cbd6b29960f6294785c52fb7401c69ae1b0c1ee3e41ef, content: ' Lee tenaciously defended their capital until the very end. The war spilled across the continent, an...', meta: {'name': 'Military_history_of_the_United_States', 'source_id': 'e80a3db424753f4e406b9cfac948a9c9640a8278fb51e7ba7c06781d3f1813f4', 'page_number': 1}, score: 0.5538840050657533)]",0.0,0.05208169296383858,0.0,0.0,0.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=92daacd08f6215b2b6c9a23e96940ae9ffa3af8f6a0300108c6bfcf840702db9, content: ' In western New Haven, the university holds 500 acres (2.0 km2) of athletic facilities, including th...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.6046670847188567)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,alleles,"[Document(id=d951120bf8fad70ef82aa9a83d235840870adf33714da769d0f6ef883f101b5c, content: ' The Human Genome Project states ""People who have lived in the same geographic region for many gener...', meta: {'name': 'Race_(human_categorization)', 'source_id': '1232f7fe18d6e2adb87627ce2042b468cfa3fb9ba0b6afb2165c345f440b5339', 'page_number': 1}, score: 0.5611322350404006)]",0.0,0.19195985794067383,0.0,0.0,0.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=1b5c167a9d5d609ea5fb8a97ac9f8e9db1b78d5c91d2f95320c7a861768e926e, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.584297128771873)]",1.0,1.0,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,rational thought,"[Document(id=707efe35d39bbe6f12111e251260c07bcfa48cea8000d7e2c9680c78ec3b9dcc, content: '"" The theory was indebted to Aristotle's pluralism and his concepts of Soul, the rational, living as...', meta: {'name': 'Idealism', 'source_id': '55a0bd42a4a737442a2d93fe0926424e236910de01b2406ed43c174f2225ff9d', 'page_number': 1}, score: 0.5783754080833273)]",1.0,1.0,1.0,1.0,1.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,21–24%,"[Document(id=4a2399f100a536a4baf71c5dedf166c45f09b1b237a505e9a84faf1da7fd8a1b, content: ' +In 2014 the Notre Dame student body consisted of 12,179 students, with 8,448 undergraduates, 2,138 ...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.6002271102755674)]",1.0,1.0,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,second,"[Document(id=718965c4d19f8aedea8f06ea42eb1b8aa90c1796050fa2b7fd1db899ab71ddbe, content: 'S. fleet encountered massive AA fire with proximity fuzes. Only one American warship was slightly da...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.6488385837345709)]",1.0,1.000000238418579,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,2005 European Championship,"[Document(id=76be05dc0de2dfbcf6d3d16e80abbfa4ee57f55d30696681b941ecd6ff700822, content: 'O.K. Greek basketball teams are the most successful in European basketball the last 25 years, having...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6068292090899892)]",1.0,0.7032607793807983,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,past and present,"[Document(id=27c48c714f7a7d5c0daa9f28a8e02d3375d014da26ecdbb43d24c3a5f42b97ed, content: ' Typical of a Slavic language, Czech cardinal numbers one through four allow the nouns and adjective...', meta: {'name': 'Czech_language', 'source_id': 'b7b15f8c583c24627f86dae71c96a8b788adafb4e0606251ec9a06307cc37f6c', 'page_number': 1}, score: 0.3183698883943239)]",0.0,0.2961136996746063,0.0,0.0,0.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass,"[Document(id=2a2aed1469e1865e2816d1c4cee4f53887b00f04578d64882e7a5207977b4a70, content: ' in 2005. +In mid-2015, several new color schemes for all of the current iPod models were spotted in ...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.6366722986570924)]",1.0,1.0,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,excited form,"[Document(id=814f2c2136f18b6a49318bf9a19fad148d9b272f987b2539771dc5dbd3beca10, content: ' +Under ordinary conditions on Earth, elemental hydrogen exists as the diatomic gas, H2. However, hyd...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6317899747001257)]",1.0,1.0,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements,"[Document(id=ff11a1c6102e2b293943cbcfb8c2935799487df6f465dea412c00844c1c7c655, content: '"" In July 2013, the Assistance Association for Political Prisoners indicated that there were approxi...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.5881745056471273)]",1.0,0.9815698862075806,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=ac63b8357f80e413cededa7c5a37849ede32b9c8d7d3181d6e287572836f5f86, content: ' They formed the backdrop to one of the earliest football-related films, The Arsenal Stadium Mystery...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5508357351080974)]",1.0,1.0,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,1837,"[Document(id=02347b24d27e18d8b2928c6693c7aa2a7b9045acffc2554a2465ddcfd854f43d, content: ' +Batman's Treaty with the Aborigines was annulled by the New South Wales governor (who at the time g...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.5670046274040195)]",1.0,0.4998088777065277,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:15.csv new file mode 100644 index 0000000..62a4d17 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:15.csv @@ -0,0 +1,122 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=3c9b5484de418625b08e3d8f4e3c3398f40f2f5af0bbf48528ee46ea38c64e7c, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5133148969949781)]",1.0,1.0,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,11 May 1944,"[Document(id=c58958638fa47dad60cf7d7556802e28d387d9bdc8b406eb23f39fcb8c5a5b19, content: ' The descriptions of these were published in Memoir III of the Australian Museum Sydney between 1896...', meta: {'name': 'Tuvalu', 'source_id': '4a4e309bc4b63f2a542d80bca7c9d33d9df6ccfa328612b01483ad5d2101621d', 'page_number': 1}, score: 0.3557680082127386)]",0.0,0.31558167934417725,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Coventry,"[Document(id=93c78551f27ece2b380d8a30b1c8852063e2afe373bf66ac0ff8fc705d37f96c, content: ' On 19 November 1940 the famous RAF night fighter ace John Cunningham shot down a Ju 88 bomber using...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5463814161451065)]",1.0,0.4347962439060211,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=e9007dae49d8af7b7fa7932aef22bbdc01132df7a83250c1516203d03c7a8b4b, content: ' It is most “important to recognize that in many words the English and Scandinavian language differe...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.5029416489492136)]",1.0,0.9999998807907104,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=b43e607046d3a9f2878f76721d6d75cb6c46d2bdb38f6b3fdc5f1b9a2f23a163, content: ' +In The Church of Jesus Christ of Latter-day Saints (LDS Church), the teacher is an office in the Aa...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.48577817227393083)]",1.0,0.20152541995048523,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,no other nation has won more than 4 Euroleague championships in this period,"[Document(id=9c5e4e5ebe04f6d60e8e13bc3cfe5cd0c8fa7c41c1148c916c472a26ca7c677b, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6432507807845881)]",1.0,0.2148960381746292,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,thirteenth,"[Document(id=498709ffe86307d8d0f5b856b64133d5f9caef82e404024b6d06f89e0f2b0818, content: '. Lopez, Urban and Connick, Jr. all returned as judges for the show's fourteenth and fifteenth seaso...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.6107974944476209)]",1.0,0.4162346124649048,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=2296ed852027297f8181dc1f64af4fcbb66e36bb282c6197423888ce9d011df5, content: '9–14 μm) and produce images of that radiation. Since infrared radiation is emitted by all objects ba...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.6525550298431104)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=5dac70957842f214d4c60f76564b27a7e657cd237c677ed236d1ed1ca76e360f, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.6477764778542375)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,Bremen and Bremerhaven,"[Document(id=48379d700246108e0c3da307a3a8e5de24d8bee4044a5988144f1d9cd57b09c2, content: 'Germany is a federal republic consisting of sixteen federal states (German: Bundesland, or Land).[a]...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 0.5043829724168043)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,fullwidth forms,"[Document(id=8c55128e915db91be79f675390a7251853ff83d2c6945bcbba98c2fef9e723ac, content: ' For example, the ""fullwidth forms"" section of code points encompasses a full Latin alphabet that is...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.6069345349817818)]",0.0,0.15829457342624664,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,Delaware Court of Chancery,"[Document(id=469c080219c9489ab9349edb6b97e1bd27a0c1447462a4449f67449be9ec591e, content: '9 million juvenile cases. In 2010, state appellate courts received 272,795 new cases. By way of comp...', meta: {'name': 'Law_of_the_United_States', 'source_id': 'bf8a791338bf99b4b7bdcb9792bf47d76e6ef5bfb9301a2ff1ebf19bd80a77d5', 'page_number': 1}, score: 0.6048466371742737)]",0.0,0.18627992272377014,0.0,0.0,0.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,one of the 20th century's foremost metaphysicians,"[Document(id=645c5e4f8126b7f8b45cf02dc6a818511eead3620792f162523eacd0e5908ee5, content: '"" Ironically, in later life Whitehead would become one of the 20th century's foremost metaphysicians...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.5103405273843459)]",1.0,0.14327546954154968,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,legal tender,"[Document(id=20272f837f8ae191cb2ca1c0c96912e627a2509f85e2d6f9240c270f8fbec035, content: ' +Early releases of the Washington coin included error coins shipped primarily from the Philadelphia ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.40843243316554156)]",1.0,0.198892742395401,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,lower 48,"[Document(id=e9b45d40d16f3341089b2285741ef7c3e78ad43c5d994970bf132f2e7690b72c, content: ' city of more than 250,000 people; it also ranks highly in winter precipitation. Conversely, the cit...', meta: {'name': 'Seattle', 'source_id': '1c1a54ff037a1e964929dc466a4a876fa6bb7609d7737ba7226bcd2ab49385cf', 'page_number': 1}, score: 0.5856120305100194)]",0.0,0.24463695287704468,0.0,0.0,0.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,adaptive immune system,"[Document(id=21f2466ad0edc49315abfe01f4ddac2a7deb20ca58dccc9ebe7ccdf8dfd7b71d, content: ' If pathogens successfully evade the innate response, vertebrates possess a second layer of protecti...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.6230771628644063)]",1.0,0.863671064376831,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Sammy Sosa,"[Document(id=2b19113a3f895a730fab4b460fee948bcaf23a55f4cd47402da918fb0a4107e3, content: ' When Cubs outfielder Rick Monday noticed the flag on the ground and the man and boy fumbling with m...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.5587959106841319)]",1.0,0.46679896116256714,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,Computing,"[Document(id=724eb1fe8da5e0d4da8a448227c2eb83283ae94ad66b58e07296d26274b6a2b4, content: ' In terms of specific course salaries, the Sunday Times ranked Computing graduates from Imperial as ...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 0.4505703094141891)]",1.0,0.6075230836868286,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1960,"[Document(id=0f23a3d664b2d9d293629ea9d692c835b9a8e93ffb30bc45de463cd9b9d93424, content: ' Therefore, Khrushchev would not take part in the summit. Up until this event, Eisenhower felt he ha...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.6092619061844369)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=44b4daa4d3c4378951ca880516b3342520595caa477d61e2c5fc88de3627c8f0, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.7160253770681686)]",1.0,0.8085941076278687,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,rates of growth and investment,"[Document(id=2ceee3158fa42ccc05a00626664d0dc2609f0a02800a7d45c6887dfc6805db72, content: '"" +In 1993, Galor and Zeira showed that inequality in the presence of credit market imperfections has...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.4526356974935226)]",0.0,0.010820414870977402,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=497c11f9fbbfb1f37be3dcd0a29d747c5f8f8967252a25e340b937417a080280, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.7380069801719974)]",1.0,1.0,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=8a0f220e6df4e50504002c6ad6312d289ff774c9c753db65e80fb1022f521be2, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.5930262907390398)]",1.0,1.0,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,Balaton Principality,"[Document(id=63d1469ba8c52e953e84a5272412efd96f10761e80271a6f8824c2b695835762, content: ' +Around the 6th century, Slavs appeared on Byzantine borders in great numbers.[page needed] The Byza...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.6527312000201964)]",1.0,0.14623066782951355,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.","measures sexual orientation on a continuum, considers various dimensions of sexual orientation, and considers homosexuality and heterosexuality separately","[Document(id=3638fb16add213ce4e749139ce1852d845838bce357ac5b46f8f2163ed664217, content: ' The classification scheme works such that individuals in the same category show the same balance be...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.4367552146155492)]",1.0,0.626720666885376,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,the family or a village priest,"[Document(id=1ac361381724706acf7a12f934a8f6a399e50f7b7857405e6b825fba7c4f2236, content: ' Other countries codified their laws; legal codes were promulgated in Castile, Poland, and Lithuania...', meta: {'name': 'Middle_Ages', 'source_id': 'f13b345e20c54b6cc55363080d7492474df62715717172aa9f1029f286010ad9', 'page_number': 1}, score: 0.527310784861083)]",0.0,0.20006507635116577,0.0,0.0,0.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,"a lighter weapon with a higher rate of fire is required, to increase a hit probability on a fast airborne target","[Document(id=2d15a146babb6b8e8f582201b3b065fcf100beeb9cf9b243445c45f0c57a8030, content: ' It is expected to be ready in 2020 to 2025.[verification needed] These systems while currently desi...', meta: {'name': 'Anti-aircraft_warfare', 'source_id': 'c54639424f62ce369356ed8c4e1e9fab8769359dd28029aa89832f5aa994f45d', 'page_number': 1}, score: 0.4081959878179246)]",1.0,0.20289859175682068,1.0,1.0,1.0 +What is that power that enables us to love?,Prevenient grace,Stout and Jeannine M.,"[Document(id=80d5f8430d3dbc590053d3b62577dce5ee8fbf237db0701d19dd7d56d8107c3a, content: '"" This connection is further analyzed by Stout and Jeannine M. Love in Integrative Process: Folletti...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.3732671667638634)]",0.0,0.2212620973587036,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,Caesar,"[Document(id=dd09780a89c3259aae7f10ab2070b88e3687ba1267d0d7b037e76509ccd66648, content: ' Thus, the assemblies became powerless and were unable to oppose him. +Caesar was assassinated on Mar...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.4209174078831871)]",0.0,0.25281232595443726,0.0,0.0,0.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,sharia law,"[Document(id=21cb89e76694844f819e54d7269a6349d3404a6310be592588c5473ad2d8efe2, content: ' Gaddafi proclaimed that the People's Congresses provided for Libya's every political need, renderin...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.5469461218052117)]",1.0,0.6752218008041382,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",Elizabeth,"[Document(id=ef4b826406ef2434ad51dec309a8e5c432eb5bed5cf5ea6e6fade38d1becac0c, content: ' He was a prince without a home or kingdom. Some of the papers played long and loud tunes on the str...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5857813160604811)]",1.0,0.17397035658359528,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,"To prevent German formations from hitting targets in Britain, RAF's Bomber Command would destroy Luftwaffe aircraft on their own bases, aircraft in their factories and fuel reserves by attacking oil plants","[Document(id=5e58c24d0a8600a2deca63ac3fc24e00c1ae3a1f9bdef97477f4afda60854c3b, content: ' Most of the resources went into planning for daylight fighter defences. The difficulty RAF bombers ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5065869351546286)]",0.0,0.1575212925672531,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,2.7%,"[Document(id=95e1463ec5cbd41e6746a2396eee9d7fb5ba52847c057866e7e7fd5a7f376d91, content: ' The racial makeup of the city was 69.7% White (down from 94.8% in 1970), 5.0% Black or African-Amer...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.5245243631758744)]",1.0,0.6450710892677307,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=9a450124afc12f193f72ae1f8310b72ada7cbbf8f34bbd71f66eb32b0f47efbc, content: ' With the development of the modern evolutionary synthesis in the 1930s and 1940s, Darwin's concept ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.6164412846430873)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,Database transactions,"[Document(id=64793ba16b3dcf1ad2ea402349708087ba8524851dc937aa0c8517e629e95067, content: ' Data security prevents unauthorized users from viewing or updating the database. Using passwords, u...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.34940664600314386)]",1.0,0.4331532120704651,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,al-Razi,"[Document(id=14ff2f3d4bed0ab93357f04935bd7b6a4050a44a58f2f208ad56793a35bba089, content: ' Yet, to require differentia entails that they exist 'due-to-themselves' as well as 'due to what is ...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.5769698934607952)]",1.0,0.4632871747016907,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=391f8278a6d664b0fda2a91e05a75fcb178e560aca2778cf3383f74470691074, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.562789144621481)]",1.0,0.07690561562776566,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,Roman cursive,"[Document(id=9b07ee8006e937b1f3fc72c78c329158c8ab48962e9e1ef4c8d375c50cf7a30b, content: ' Scripts using two separate cases are also called bicameral scripts. Many other writing systems make...', meta: {'name': 'Letter_case', 'source_id': 'f24785f407e55c52408fa13b0c7fd7fc6529cb973898992f6e98d5d28ea82d5a', 'page_number': 1}, score: 0.5399797841084327)]",0.0,0.06087452173233032,0.0,0.0,0.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=b281d8f2fa5c72bf9176af2f66fbd72725ba99c1c3e8e2e12044e694c6ee3bbe, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.5025447478163751)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,International Solar Energy Society,"[Document(id=c793eed6f7e60651d2a76fcaf9711f0a39fe3f3343f81bd4df136c41ff320e80, content: ' +Lester Brown has argued that ""a world facing the prospect of economically disruptive climate change...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 0.4503806030703587)]",1.0,0.06405393034219742,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=d4ddb3955cf6ff9d6bed687f54220f695fef0951972c64309938d9cf7670f6ba, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.7250091339820998)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,pitch representation,"[Document(id=793e9139db9b4d6ebbb0e461ce9fd70835627ebd0340e4cc155c27b13fce781c, content: ' It is still the most common form of pitch representation in English- and German-speaking countries ...', meta: {'name': 'Normans', 'source_id': 'a93c74c61d9371ab8c7a63b9c58bd1ffebc96d2c920df9d53e702f614c82ef48', 'page_number': 1}, score: 0.4432011390260362)]",0.0,-0.0011289305984973907,0.0,0.0,0.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=dccc646845ca49e7358315767a80e7f3cbcc38b5a971e1af1ab775ad80542dee, content: ' Contemporary critics have noted the consistent minimalistic aesthetic exhibited throughout DONDA cr...', meta: {'name': 'Kanye_West', 'source_id': '4693bb2ae0f5b7c5ed3788607bba35813f42eada13da446f4c397c0715d2dc91', 'page_number': 1}, score: 0.5964939320176397)]",1.0,1.0,1.0,1.0,1.0 +How much was each copper token worth?,a halfpenny,$4.00,"[Document(id=39c522ed946709c71a8c0453d1162d983ffe41cccbfae7c51dc54aec6aabbb3d, content: ' This has to do with Alexander Hamilton's suggestion to Congress of a fixed 15:1 ratio of silver to ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.5411290847512054)]",0.0,0.19542115926742554,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,Glencoe and Sea Nymph,"[Document(id=c450c10c9cc7ec2362a5a8c23142549f03d77146379f9bb96ab788f8c91887e3, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5428866259942204)]",1.0,1.0,1.0,1.0,1.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Tikitherium,"[Document(id=e46342d518bff02018f8d43556b540641d63dba71e0bc814f0993e18d70b0dba, content: ' +The early synapsid mammalian ancestors were sphenacodont pelycosaurs, a group that produced the non...', meta: {'name': 'Mammal', 'source_id': 'ce12abe98969c8fca9c93088950b36e820ec47c4c63f339bc203185f79f0242d', 'page_number': 1}, score: 0.5665906290089553)]",0.0,0.2962087392807007,0.0,0.0,0.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,modern evolutionary synthesis,"[Document(id=7ff269861e7df34fcebf71c4907ef4694447d099e228c45550f4e2387812c2fd, content: ' This dogma has since been shown to have exceptions, such as reverse transcription in retroviruses. ...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5467572858377108)]",1.0,0.9789240956306458,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,"8:10 p.m., at Delmonico's restaurant and later the Waldorf-Astoria Hotel. Tesla would telephone his dinner order to the headwaiter, who also could be the only one to serve him. ""The meal was required to be ready at eight o'clock","[Document(id=bf93e5ca5e1091757426fa6d168a7b90f8c1d9dad833880ffcb9b06d8ad2b0f7, content: ' There are a minimum of 278 patents issued to Tesla in 26 countries that have been accounted for. Ma...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6614342019294968)]",1.0,0.36503666639328003,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1789,"[Document(id=5ba09395255feae4ebf751bb80a7a4fa5d79691518f143a0bddc36e8712f8fc6, content: ' As a result, Alsace was transformed into a mosaic of Catholic and Protestant territories. On the ot...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.66589685742468)]",1.0,0.2011210322380066,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,two censors,"[Document(id=ce88f4b2acc2cd7986f0b63373b8505a79a6c1ae6aa86d23bd2b76d546f82d61, content: ' Consuls had supreme power in both civil and military matters. While in the city of Rome, the consul...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5303996384286787)]",1.0,1.0,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008",May 14,"[Document(id=5f07f5208c13bf62fa1ce8a8e975e3b5699a76935df35b9e6887b5cb6c98bfce, content: ' Liu’s family was later told that he was being investigated on suspicion of the crime of inciting su...', meta: {'name': '2008_Sichuan_earthquake', 'source_id': '028805d0ca3d5419311b65a7d324234efcfd11668efb51f32c952344e8ec3145', 'page_number': 1}, score: 0.47982362440532583)]",0.0,0.6049047112464905,0.0,0.0,0.0 +Who was the last pope to be crowned?,Paul VI,Pope Pius IX,"[Document(id=0ee41e346a8cb75c8539129cfa0012e097fefb93199e94b953b130d6a823d20d, content: 'Pope Saint John XXIII (Latin: Ioannes XXIII; Italian: Giovanni XXIII) born Angelo Giuseppe Roncalli,...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 0.5965553455670057)]",0.0,0.42167389392852783,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=61583a75fdba243f6a812f3bf73c41e686e4f8374db81e4dd5f1a4d5a4430336, content: ' +Non-Australian citizens who are Australian permanent residents should be aware that during their st...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.6661485563876728)]",1.0,0.9866578578948975,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",Rome was forced to effectively ignore its long-standing principle that its soldiers must be both citizens and property owners,"[Document(id=e9b65cb0ed0d083f9f18e9d8a272362d48c238c27f4ed0b8150f2dec64d25a52, content: ' In the 4th century BC, the Romans abandoned the phalanx in favour of the more flexible manipular fo...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.47970363136972916)]",1.0,0.11697272956371307,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,his political opponents,"[Document(id=11a20c0aa9217ecd3b6ae5e1759f5a8a4cad36ba0608988c41a03fac43c11719, content: '"" Between April and June, hundreds of Naguib's supporters in the military were either arrested or di...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.6007058204987675)]",1.0,0.39355385303497314,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,arbitrators,"[Document(id=7a4b52f5a5d3fc5c0f6a38d1dee9e0f9e8662385743dce053b5e1c150c9cbd8d, content: ' The two families are therefore considered to be different clans (those of Hashim and of Umayya, res...', meta: {'name': 'Umayyad_Caliphate', 'source_id': '704c4038f63f9c5a686218060cb31639c0333dda34e87e804be75e5fec05ec53', 'page_number': 1}, score: 0.38202671645241126)]",0.0,0.22575870156288147,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=0869972e54922afd3497ba60a5f3e981d9c0ab240629036531512fe359bc4da8, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.5524063253173699)]",1.0,0.5505510568618774,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston–North Charleston–Summerville Metropolitan Statistical Area,"[Document(id=a00baea2441c87e38109afe86358b94ae591d905c9ff5c35c440986bd364ee66, content: 'Charleston is the oldest and second-largest city in the U.S. state of South Carolina, the county sea...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5246972383754526)]",1.0,0.3331555128097534,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=e4be5f97eecad8f13742697394da51c6177446b3b3d0eac7d06873a3b5294293, content: '"" When in September 1831 he learned, while travelling from Vienna to Paris, that the uprising had be...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.6957076687039898)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,put into patent protection,"[Document(id=bf93e5ca5e1091757426fa6d168a7b90f8c1d9dad833880ffcb9b06d8ad2b0f7, content: ' There are a minimum of 278 patents issued to Tesla in 26 countries that have been accounted for. Ma...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6380459636569318)]",1.0,0.5425417423248291,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,cordon sanitarian,"[Document(id=da081545e8e8020b03087fbbcde823b131a8ffeb227970cc6962ac2b8197ec87, content: ' When a party achieves enough seats in an election to form a majority, the party's frontbench become...', meta: {'name': 'Political_party', 'source_id': '476a9215de81b75a4f17737eed1679570973e55b5ed6175aa0ec1014b09d4632', 'page_number': 1}, score: 0.44697301723543903)]",0.0,0.23455810546875,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Stationers' Hall,"[Document(id=8efeacba5808472fcf5765653cdacd88f38b8ca6c851464247d3804997f7fc32, content: ' His estimate that writing his ""big book"" would take five years was optimistic. +On the Origin of Spe...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.4229277754222375)]",0.0,0.28779086470603943,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,fundamental intellectual structure,"[Document(id=dd4f4d9a87883e0aa9f9e24e4db2a323a2a0555ed35dcc2ef63f816e15bbcd52, content: 'Time has long been a major subject of study in religion, philosophy, and science, but defining it in...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.4628609019980579)]",0.0,0.3731681704521179,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",placed a corps of his own troops under Frederick's command,"[Document(id=d7342a0e3021ae2d3bba679cf10ecb72ddc69518be33b380ee6b9e8f5e4d03c9, content: ' +On the eastern front, progress was very slow. The Russian army was heavily dependent upon its main ...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.7072835413098615)]",1.0,0.28195881843566895,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=cea0f1307bbe685005f9642d975856221cf9a5dd6f18a7b87c780156408770d1, content: '” +On April 23, 2008, Dell announced the closure of one of its biggest Canadian call-centers in Kanat...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.659275856771866)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,Imperial cult,"[Document(id=5c227c76e8d8e1973e37da3019c94002ad552b20f22cf982d543b6a8c3e74922, content: ' +Military settlement within the empire and at its borders broadened the context of Romanitas. Rome's...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5785378976346987)]",1.0,0.20699740946292877,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=73c809a12535348ffed165eff42c189f3aa95c6ed455c42b5e7932cd44167a8d, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.6349932241694249)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=b562ec53d471dfbb3ba9b12247b16c622039308f7ec5b5b5344127cfeb67e87d, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6552354556870785)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Milan,"[Document(id=423e38d8a6d6af214d9084d6c9c45752923b4fc73735c2d71dfaf17e2be72ac8, content: ' He sent him, indeed, five hundred pieces of gold ""to bear his charges"" but Liberius refused them, s...', meta: {'name': 'Athanasius_of_Alexandria', 'source_id': '2a8e7ec0a44075f1a567744982dcecf4bcea19f0065d4f73c742398a05fa1642', 'page_number': 1}, score: 0.5515330125237783)]",0.0,0.5594199895858765,0.0,0.0,0.0 +In what decade was disco popular?,1970s,1970s,"[Document(id=2a3289df636bba9b6194e462e8116e6249c5ae5ad18c639ded7e73b593b09e4a, content: ' Other performing arts venues in Miami include the Gusman Center for the Performing Arts, Coconut Gr...', meta: {'name': 'Miami', 'source_id': '39ab1ce422fd3baeedbb69a9a90b4c0e43c513f6161698478a96f09e5d9f7285', 'page_number': 1}, score: 0.5362407528966322)]",1.0,1.0,1.0,1.0,1.0 +What do the rivers form?,lakes,Nieuwe Maas,"[Document(id=c76f28ed2994796f7f3a5aeea9229958f03f574059e71701fc815116b1cba0e0, content: ' Since the Rhine contributes most of the water, the shorter term Rhine Delta is commonly used. Howev...', meta: {'name': 'Rhine', 'source_id': '23686e48a61504679a91dac10d83b287aef3dff7dabc0ab881c281059187ce6a', 'page_number': 1}, score: 0.47601402875176224)]",0.0,0.16586217284202576,0.0,0.0,0.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,ISO-8859-1,"[Document(id=e5a1e75e17e8253dc7c01b0468f9e5c5d8a150581ae96fc8d4a0f6136ae9d79a, content: 'Unicode is a computing industry standard for the consistent encoding, representation, and handling o...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.653812371157898)]",1.0,1.0,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,"118 million records worldwide (a further 60 million additionally with Destiny's Child), making her one of the best-selling music artists of all time. The Recording Industry Association of America (RIAA) listed Beyoncé as the top certified artist of the 2000s, with a total of 64 certifications. Her songs ""Crazy in Love"", ""Single Ladies (Put a Ring on It)"", ""Halo"", and ""Irreplaceable"" are some of the best-selling singles of all time worldwide","[Document(id=052d7cad4bc7b3bd257fcbeb66826373b8ba4096023106374a2e07541664cccf, content: ' +Beyoncé has received numerous awards. As a solo artist she has sold over 15 million albums in the U...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.5123708652895399)]",1.0,0.09557723253965378,1.0,1.0,1.0 +How many Examination Boards exist in India?,30,NIPER) and National Institute of Nutrition (NIN). Hyderabad has five,"[Document(id=2a60820f19599e70a11ffdf68cff487cf35e2e6c1fa7acfbb591eec5701ddc2b, content: ' Depending on the institution, students are required to sit the Secondary School Certificate or the ...', meta: {'name': 'Hyderabad', 'source_id': 'e032b0dc6babeab40ef746299b8fdda8c0f0f12976c0b900e8ed7ca60ce1802d', 'page_number': 1}, score: 0.4533602356519226)]",0.0,-0.009233260527253151,0.0,0.0,0.0 +What play showed an early depiction of civil disobedience?,Antigone,Alice in Wonderland,"[Document(id=b4e00b0a3e6a4f73089e8f957908f72dbaf1af9ec0bbe25f4c7241c91d74c097, content: ' In reviewing the voluminous literature on the subject, the student of civil disobedience rapidly fi...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.5260499178464622)]",1.0,0.25487321615219116,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,Lilibet,"[Document(id=564415855a4c3e99a6bd07fa1bf9f505a2d4db183f3d7afe4eba215305953cf5, content: ' She and her husband undertook an extensive tour of the United Kingdom, while her children and grand...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5272145651819172)]",1.0,0.3128524124622345,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=faa03729332a5c07c1c9d6d87747d15af8b3043f87a9cff7389d9a1c23d97c7c, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.6398172942259656)]",1.0,1.0,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,middle of the 10th century,"[Document(id=020b06606f9be0ef225d06a65fe75136a36d3c27c244908b82a80ac5c2785da3, content: ' 871–899) came to an agreement with the Viking invaders in the late 9th century, resulting in Danish...', meta: {'name': 'Middle_Ages', 'source_id': 'f13b345e20c54b6cc55363080d7492474df62715717172aa9f1029f286010ad9', 'page_number': 1}, score: 0.49354982401776937)]",1.0,-0.022137032821774483,1.0,1.0,1.0 +What two methods can be used when using Load Testing?,data or a large number of users,large quantities of data or a large number of users,"[Document(id=dd704f4bc758ab475db02844a8a24263dd4fdb32afbc7f7dc86b810f8fa62e4f, content: ' +Load testing is primarily concerned with testing that the system can continue to operate under a sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.6185313518586624)]",1.0,0.9246095418930054,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,paid peering agreements,"[Document(id=020a4a6bf0a6435c7f8cac6b6e8e1cb5ee8f050c453f14b679151d213b0fd453, content: ' Under the terms of the agreement Comcast was to acquire 100% of Time Warner Cable. However, on Apri...', meta: {'name': 'Comcast', 'source_id': 'd36d794351d6cb7a4e24e8049738c242828e19254a9104589071daf07cc13503', 'page_number': 1}, score: 0.5240737955936977)]",0.0,0.29451510310173035,0.0,0.0,0.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,"interception of bombers and other fighters, reconnaissance, and patrol","[Document(id=d2709fd4938aadfc321fd75534f338a1182dec7bcf787b2638ddb43d1a59b926, content: ' Special Operations Command (USSOCOM). It conducts long-range, special operations missions, and is e...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.4702763385999099)]",1.0,0.19300077855587006,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1939–40,"[Document(id=8c3b20fc00a1588118787fa4929e17ca6008ab28ed89e8c899ff91d209aec2b1, content: 'The stated clauses of the Nazi-Soviet non-aggression pact were a guarantee of non-belligerence by ea...', meta: {'name': 'Molotov%E2%80%93Ribbentrop_Pact', 'source_id': 'f84d8572ce2c244709e2c253bb9092c0696ebcc07d3cd3874dc2b5a25c448e7f', 'page_number': 1}, score: 0.5333525757691033)]",0.0,0.37150004506111145,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,to absorb the deities and cults of other peoples,"[Document(id=876b4580a64fd77fb8816f9aef13a054a2ac16b7e85433b15298374090048432, content: ' Women, slaves, and children all participated in a range of religious activities. Some public ritual...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.6360686226922287)]",1.0,0.12295319885015488,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,"defects in dead code will never result in failures. A defect can turn into a failure when the environment is changed. Examples of these changes in environment include the software being run on a new computer hardware platform, alterations in source data, or interacting with different software","[Document(id=f10f419092a1f6acf685021bef11a7a6ebef797bdd44e3997bd6b4a635aa2a89, content: ' Not all defects will necessarily result in failures. For example, defects in dead code will never r...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5932129675513269)]",1.0,0.5290453433990479,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,three,"[Document(id=85f535796cef4ec10db8a4fd4e565960608be55322b93bae284e720192b9a26d, content: ' +A number of players may be replaced by substitutes during the course of the game. The maximum numbe...', meta: {'name': 'Association_football', 'source_id': 'f2b7305b09ad8a43b8389a5eb2d84b8ea4b4deaa7968d21fe9da9da9ecda2c6b', 'page_number': 1}, score: 0.5656282796723537)]",1.0,0.2386261373758316,1.0,1.0,1.0 +In which year was the new constitution ratified?,1855,1884 that produced a constitution ratified 3:1 by Montana citizens in November 1884,"[Document(id=10085273dcf2e8add6fd060b7e5ee171cced6c2e779468105fa3f236158226ca, content: ' Surveys in 1874, 1875 and 1876 helped spark the Great Sioux War of 1876. The transcontinental NPR w...', meta: {'name': 'Montana', 'source_id': '556dc408ae9a8cf251c8082c455043f3c354c7457c6fe7793c307babf666b651', 'page_number': 1}, score: 0.485443964150987)]",0.0,0.2675617039203644,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Naiad,"[Document(id=c450c10c9cc7ec2362a5a8c23142549f03d77146379f9bb96ab788f8c91887e3, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5314080714125682)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,1701,"[Document(id=d46a97edd9b0790dc5e0cbfa41c3f7905f20cfefe131cf9ee37d29a73ae1c3c8, content: 'Yale University is an American private Ivy League research university in New Haven, Connecticut. Fou...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.5084767247878694)]",1.0,0.2268960326910019,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,moderate levels,"[Document(id=073df342a989619b1eec131e4197dffb009cfe81b1f8022d1a8cd8b13985d1db, content: ' The Human Genome Project states ""People who have lived in the same geographic region for many gener...', meta: {'name': 'Race_(human_categorization)', 'source_id': '1232f7fe18d6e2adb87627ce2042b468cfa3fb9ba0b6afb2165c345f440b5339', 'page_number': 1}, score: 0.5611322350404006)]",0.0,0.14990273118019104,0.0,0.0,0.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=d5a42bacf8a08d601a973e5bad25b36c41015f76de0d512f9c3130d5e87ae4de, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.584297128771873)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,Ilion,"[Document(id=bfdb2859c4aba7b42fa442a55f6ab1b3ece70f89cdc51e7d48df4e96e7091e95, content: ' +As god of colonization, Apollo gave oracular guidance on colonies, especially during the height of ...', meta: {'name': 'Apollo', 'source_id': '2eaf6f8c7cfd741cc2ece4f65e91b75ad204489bacd14efebc4697a9395dbb00', 'page_number': 1}, score: 0.5127319887009952)]",0.0,0.06766567379236221,0.0,0.0,0.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,"1987–2005), there was a rapid growth in the school's reputation, faculty, and resources. He increased the faculty by more than 500 professors; the academic quality of the student body has improved dramatically, with the average SAT score rising from 1240 to 1360; the number of minority students more than doubled; the endowment grew from $350 million to more than $3 billion; the annual operating budget rose from $177 million to more than $650 million; and annual research funding improved from $15 million to more than $70 million. Notre Dame's most recent[when?] capital campaign raised $1.","[Document(id=f676fd95f068792ff77624bf3b35dae04ac9867081bfd07e539ab85e7d1bd983, content: ', Notre Dame's Vice President of Student Affairs, added that coeducation ""opened up a whole other po...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.5139353889406519)]",1.0,0.07369742542505264,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,24 October,"[Document(id=67d402d488ad87fcda2d30f304cebc3d3ab52520c994807e5b06e892c3a0e1e1, content: ' The carriers had only 108 planes. The force was not spotted by the Allies until 16:40 on 24 October...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.694417436054523)]",1.0,0.241010844707489,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=9c5e4e5ebe04f6d60e8e13bc3cfe5cd0c8fa7c41c1148c916c472a26ca7c677b, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.5942357834131707)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,imperfective,"[Document(id=b086dbc67b80e6f26b1e823c6ec440f8f645661a3c528184598912bb2cd8d10b, content: ' On occasion of the publication’s 45th anniversary, the Croatian weekly journal Forum published the ...', meta: {'name': 'Serbo-Croatian', 'source_id': '2cc55814bc801258e8300ed82e003428dbf11715c74ad99297d61c5dd77023f4', 'page_number': 1}, score: 0.43658414340153945)]",1.0,1.0,1.0,1.0,1.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=25a9235e2187510deb110e2fe84f6e0f7a2dbacbbd0bd491e44e8a83b3be8cbf, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.6185783377431922)]",1.0,0.8350290060043335,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,excited form,"[Document(id=f649524cdd28fbe2ba5a3cadae832b66d8fef791e1b4613b5d6705e7ca2b49a9, content: ' As a plasma, hydrogen's electron and proton are not bound together, resulting in very high electric...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6261915857181519)]",1.0,1.0,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,All male children in Buddhist families are encouraged to be a novice (beginner for Buddhism) before the age of twenty and to be a monk after the age of twenty,"[Document(id=24b43e7abfd404fae93d249c1ea9b3f3291572fa9a977ea3adc12146d7d194a0, content: ' The arts, particularly literature, have historically been influenced by the local form of Theravada...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.5226686003616492)]",1.0,0.32406651973724365,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,late 1980s,"[Document(id=349c47b6563932e744137ad0a14f4e1d1070e837af1798aca5b22b7d3f21f0d6, content: ' The club's location, adjoining wealthy areas such as Canonbury and Barnsbury, mixed areas such as I...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5504963556882005)]",1.0,0.885123610496521,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,1837,"[Document(id=5ed927b9faadf3b3a6f63549d7a184e5e8bf2c6feda205d37278fbf8ee21158e, content: ' +Batman's Treaty with the Aborigines was annulled by the New South Wales governor (who at the time g...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.5670046274040195)]",1.0,0.4998088479042053,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:5.csv new file mode 100644 index 0000000..47a0faa --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:1__chunk_size:5.csv @@ -0,0 +1,124 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=99729d5c09ec68a18e6cb9e326f59027b5c69bc74e05e8c8c0d825360370bf5e, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5522157733860871)]",1.0,1.0,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,1930,"[Document(id=40f8be4bd0cd72dec74312c305ef1f0dfafbf98df7af989c47c915459e027963, content: ' Over the next several decades the Army set up a series of coastal artillery batteries and named the...', meta: {'name': 'San_Diego', 'source_id': '04aa1c0a61a5a047684cd62ea4175007928dadda83d273a78287bcdbb722ed63', 'page_number': 1}, score: 0.38883239661667707)]",0.0,0.44883856177330017,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Another raid was carried out on 11/12 May 1941,"[Document(id=eb6901a8f38708e4e25b2c3eace4bcc2eb6a667f1dde10b663f86c96897b3ec0, content: ' Another raid was carried out on 11/12 May 1941. Westminster Abbey and the Law Courts were damaged, ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5480045073379827)]",1.0,0.1221819818019867,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,inflectional,"[Document(id=8825405ebdc1bb4daf623185ae693fe4f2cab9da4dcca330ccb2ab3f6b05d6e5, content: ' The modern cognates of original words have been used whenever practical to give a close approximati...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.5456537412802707)]",1.0,0.3097653388977051,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,"low-church"" traditions of Protestantism","[Document(id=bbad1043bc2ff983eeca01fd03f84a8577a718a20a14adc37febb6fddb52936a, content: ' +In The Church of Jesus Christ of Latter-day Saints (LDS Church), the teacher is an office in the Aa...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.5194444646640657)]",1.0,0.03650655969977379,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=927ee26533dc7f17be50a4cb7fc74bed43a4bc44825d916333e6dd9c5d18c958, content: ' The Greek league, the A1 Ethniki, is considered one of the top volleyball leagues in Europe and the...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6800230578684324)]",1.0,1.0,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,four,"[Document(id=52a0fae35f124062c7009aeb57cfa6fb15f201644550104b1c1a83c6f0449b3a, content: ' This season's judging panel consisted of Randy Jackson, along with Mariah Carey, Keith Urban and Ni...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.6542733508503982)]",1.0,0.9999999403953552,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=05e7c56a6da1ed703309ec6f627bc69c369ec51519a9e5eb8c442673c62365e4, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.7190365428634795)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=4437c778daec446c4362623ed3261e5ec4170314f4aa14f8381b8c7ee6eef3fe, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.7362191709321027)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,two urban districts,"[Document(id=a102b81b2963aaefdf1a83d1a1d96870be10e96bb78da447e6f4a0220bbf260f, content: ' The equivalent of the Minister-President is the Senatspräsident (President of the Senate) in Bremen...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 0.5987085330383715)]",1.0,0.23566487431526184,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,Unicode variation sequences,"[Document(id=2f36232b0dd91a653f6f2576055db49c5d19cd0f18bb852a417bdc59b99cc443, content: ' +Modern font technology provides a means to address the practical issue of needing to depict a unifi...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.5992671777257355)]",0.0,0.111117884516716,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,"appellate court finds that significant legal errors did occur, then it will reverse the judgment, or nullify the sentence and order a new capital sentencing hearing. Lastly, if the appellate court finds that no reasonable juror could find the defendant eligible for the death penalty, a rarity, then it will order the defendant acquitted, or not guilty, of the crime for which he/she was given the death penalty, and order him sentenced to the next most severe punishment for which the offense is eligible. About 60 percent survive the process of direct review intact. +Under the Antiterrorism and Effective Death Penalty Act of 1996, a state prisoner is ordinarily only allowed one suit for habeas corpus in federal court","[Document(id=7e0d5332e60a37e652cd2a51cc22044fd5e6ae3218102012d3b17416fb99fc7e, content: ' If the appellate court finds that significant legal errors did occur, then it will reverse the judg...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.6103408531822065)]",1.0,0.1905200332403183,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,rank amateur,"[Document(id=b0360cb0e79aa53fce9b6b987065d38127ef8c7392ded59d294e940b3fc8c15b, content: ' For Whitehead, education should be the exact opposite of the multidisciplinary, value-free school m...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.6416938602715427)]",1.0,1.0,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,legal tender,"[Document(id=709af185292b1734d0e4c8a237bf0723a76b5dde9eb9eea06b0165e2876d5768, content: 'S.C. § 5112. Section 5112 prescribes the forms, in which the United States dollars should be issued....', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.412008492363103)]",1.0,0.1988927125930786,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,"Alice Springs, Australia","[Document(id=0b4d90ac104ee8adddd3d378559e19756efb7723596ad72e5f1c354c08d952a3, content: '7 mm) of precipitation per year, more than most other locations with desert climates, but it still q...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.628535110128569)]",0.0,0.36634862422943115,0.0,0.0,0.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Innate immune defenses,"[Document(id=a18e0058b343b5da0485ed348b299343056dca832078a909b95578e3a8fd7eb7, content: ' Conversely, non-self molecules are those recognized as foreign molecules. One class of non-self mol...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.6306169395074717)]",1.0,0.6429639458656311,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Sam Snead,"[Document(id=6929c42ad466bed7c14eac422c4d021a4f3e192fc8b2852b17a71ac3d6c8106e, content: ' +On May 11, 2000, Glenallen Hill, facing Brewers starter Steve Woodard, became the first, and thus f...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.6338891136946663)]",1.0,0.30576199293136597,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,2011/12 – £765.2 million) and total expenditure of £754.,"[Document(id=38d36ddbfcff6c32f28791ecef866ff54adf64d946d9968ffab17a763c85d252, content: ' +In the financial year ended 31 July 2013, Imperial had a total net income of £822.0 million (2011/1...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 0.5416434087972107)]",1.0,0.11472521722316742,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1957,"[Document(id=e20106e1401ce2522cd3d14d13fae9ea782170d84e4c3db914247513b098ce09, content: '.. a discrimination in the expenditure of those funds"". +Eisenhower told District of Columbia officia...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.6884836139820915)]",1.0,0.703947901725769,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=d89cdeedbc8441f4e2c52a3575e8c530fcce225561404b37c7cdd108d1cc549f, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.7343823718020364)]",1.0,0.8085941672325134,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,lower levels of growth,"[Document(id=e62fe349ee4e73a3572e9f63b19d92af4d05f2e4ba5ba37814049ed91dfeb15b, content: '"" +In 1993, Galor and Zeira showed that inequality in the presence of credit market imperfections has...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.47991741337458843)]",0.0,0.13175535202026367,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=720eb3795a623e31335108bcc29a4b774e33272bb625d5968983846dd3b35f68, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.7553394085608023)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=cf59c0f0619e715b979051e0a84530bf73463713fa11f2961866d233ac2537f5, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.6532529240278193)]",1.0,1.0,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,East Slavs,"[Document(id=701344efe2101165a535fe45e922b01123011116f4f123fcd5b1668edbb29898, content: 'Slavs are the largest Indo-European ethno-linguistic group in Europe. They inhabit Central Europe, E...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.6309663423432798)]",1.0,0.3395293653011322,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.",four summaries,"[Document(id=f86f4f0c6b96ee3b686893cc41683ca450066b225884d3390504ef3124153018, content: ' +The SASO consists of 12 questions. Six of these questions assess sexual attraction, four assess sex...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.5156582100443352)]",1.0,0.5629022717475891,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,ten per cent of males and one per cent of females,"[Document(id=f29c459c3fb02992508251c17df638d0a61defd503ac62e20104f9a0c0cdd29f, content: ' Lay literacy rates rose, but were still low; one estimate gave a literacy rate of ten per cent of m...', meta: {'name': 'Middle_Ages', 'source_id': 'f13b345e20c54b6cc55363080d7492474df62715717172aa9f1029f286010ad9', 'page_number': 1}, score: 0.6150936116081785)]",0.0,0.12186141312122345,0.0,0.0,0.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,smaller-calibre ordnance,"[Document(id=0578917295f43992a211ded11b78bf62d55a950a27b45fde24ab1eabb2df79e0, content: ' Their needs could cogently be met with smaller-calibre ordnance beyond using the usual singly-mount...', meta: {'name': 'Anti-aircraft_warfare', 'source_id': 'c54639424f62ce369356ed8c4e1e9fab8769359dd28029aa89832f5aa994f45d', 'page_number': 1}, score: 0.4206405801057249)]",1.0,0.10714080184698105,1.0,1.0,1.0 +What is that power that enables us to love?,Prevenient grace,universal love,"[Document(id=a78f83359689c5324d38fe66319b2e022231f3eea49709cfa6adc4ff161463f3, content: ' Love led to spiritual growth that, in turn, reflected an increase in love for God and humanity. Thi...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 0.4322410896201257)]",0.0,0.33382272720336914,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,Caesar,"[Document(id=9251dafda92232db54143d3e1f9a589eeb4b79fd5873fc0b101379c7cfe3f5ba, content: ' Thus, the assemblies became powerless and were unable to oppose him. +Caesar was assassinated on Mar...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.4975724389201519)]",0.0,0.25281232595443726,0.0,0.0,0.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,Ruling by decree,"[Document(id=97f0d038336472957d283c2ff0f1232ffef408a6928653afaced89770ed6145b, content: ' Initially ideologically committed to Arab nationalism and Arab socialism, he came to rule according...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.5929539864336693)]",1.0,0.2979413568973541,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",heir presumptive,"[Document(id=d84cede505487aab2b7f8ca81ca6dc2ec7e647562d9fcef338a4a7bc8b2c2e2b, content: ' She was educated privately at home. Her father acceded to the throne on the abdication of his broth...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.6316186438786011)]",1.0,0.16207502782344818,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,UN intervention and US pressure forced Britain into a humiliating withdrawal of its forces,"[Document(id=d09ab756c06e4e2126fc0655780e94923b99c11b88bd24e4b791726d7b43a296, content: ' Another of Eisenhower's concerns was the possibility of a wider war with the Soviet Union after it ...', meta: {'name': 'British_Empire', 'source_id': '8f673e96ef153333a8fc1d3855be60f5be1829316ed1fcb58dd8590fdb083946', 'page_number': 1}, score: 0.5456397414690248)]",0.0,0.13994494080543518,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,6%,"[Document(id=312f7d9ebeee95b77b19de18d1f6ec1dcd72b41992924c8312881afed5cd44da, content: '6% of the population. Non-Hispanic Whites were 47.2% of the population in 2010, down from 72.8% in 1...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.5368159160243922)]",1.0,0.7885175943374634,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,William Paley,"[Document(id=1de1b94fbb49d9d6bbc2cdcf71ef4f81cd3ec13b95fe79865c57979200ac9095, content: ' Lamarck thought there was an inherent progressive tendency driving organisms continuously towards g...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.640773032968124)]",1.0,0.2994629740715027,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,dedicated backup files,"[Document(id=d62f58c8737fe86268de0ebf0cb0ef4eae1f9ba5f33735f7cadb2e83beb9e377, content: 'g., cases when the database is found corrupted due to a software error, or if it has been updated wi...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.5250403765091296)]",1.0,0.12655818462371826,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Quran,"[Document(id=4929afcbf8a25329cc28344a59a6da0673a0b29592a46e295b09e09e3666b249, content: ' Avicenna's views on Islamic theology (and philosophy) were enormously influential, forming part of ...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.6661604057856159)]",1.0,0.34885287284851074,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya seized Tenasserim and Lan Na,"[Document(id=ca6db29fb81cf6a69632107c67ace9bdd1fdd8f61142978a0684c6cd6f540122, content: ' Ayutthaya seized Tenasserim and Lan Na, and Portuguese mercenaries established Portuguese rule at T...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.584896008554856)]",1.0,0.15335537493228912,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=8825405ebdc1bb4daf623185ae693fe4f2cab9da4dcca330ccb2ab3f6b05d6e5, content: ' The modern cognates of original words have been used whenever practical to give a close approximati...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.7110396889326562)]",1.0,0.9999999403953552,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=f1fa43fc9c1c5bd46b9f5455701bf99b0e98d4579816d755389bcc2dbba1bcda, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.5088500802288218)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,The energy from the sun,"[Document(id=6873b580ad3f15a0882d302e702bc9af713e66089c5a7f2aa10feabe24a0ebed, content: ' The energy from the sun could play a key role in de-carbonizing the global economy alongside improv...', meta: {'name': 'Solar_energy', 'source_id': '4b26f9c886129869a3884289e417d88e1bc8a531535e048a63701a7b1784a8c1', 'page_number': 1}, score: 0.5276195041168332)]",0.0,0.05508004128932953,0.0,0.0,0.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=e8da082f5022b804801ddb2778f16c32226f0ecb59f3ed7f8fcdae17653f162c, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.7250091339820998)]",1.0,0.9999998807907104,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,The Cathar Crusade,"[Document(id=08215551b881c7cefad5bb5baaaddc4d6f9a5802a709d61e32985651e2115c85, content: ' By the 11th century, more organised groups such as the Patarini, the Dulcinians, the Waldensians an...', meta: {'name': 'Heresy', 'source_id': '8ba473262f8508a9c99699bb09cd022993896ece1fed1463a9cf4bd7eb0c3e68', 'page_number': 1}, score: 0.5927295914277724)]",1.0,1.0,1.0,1.0,1.0 +What music service is a huge competitor for Tidal?,Spotify,Aspiro,"[Document(id=ef9c78ac21a913a9aad74111f50f9247dd1e578b598b35e509b5dfb85cdd75a6, content: ' Contemporary critics have noted the consistent minimalistic aesthetic exhibited throughout DONDA cr...', meta: {'name': 'Kanye_West', 'source_id': '4693bb2ae0f5b7c5ed3788607bba35813f42eada13da446f4c397c0715d2dc91', 'page_number': 1}, score: 0.6148988469371888)]",1.0,0.20396557450294495,1.0,1.0,1.0 +How much was each copper token worth?,a halfpenny,"1.505 g) in 1837 (16:1 ratio).[citation needed] +Technically, all these coins are still legal tender at face value","[Document(id=8cba8d045af2ea2d04789f21782201aca9ae1bf67c20440df2f6dabfdb2b03d1, content: '22 grains (1.505 g) in 1837 (16:1 ratio).[citation needed] +Technically, all these coins are still le...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.5695327514337405)]",0.0,0.1435888707637787,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,Glencoe and Sea Nymph,"[Document(id=73cc59555437274633bfd424f9225eaa5b55cf814b3bbb82c89b0d9357a659f0, content: ' In 1852 the San Francisco-based ships Glencoe and Sea Nymph were attacked and everyone aboard excep...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5423896434650102)]",1.0,1.0,1.0,1.0,1.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Eomaia,"[Document(id=8d3062cc119ec09e8a2dc8ded8bcf02da6323a3a52f0e8768141cb6ca77e58c9, content: ' In the early Triassic, all the medium to large land carnivore niches were taken over by archosaurs ...', meta: {'name': 'Mammal', 'source_id': 'ce12abe98969c8fca9c93088950b36e820ec47c4c63f339bc203185f79f0242d', 'page_number': 1}, score: 0.5925301137060124)]",0.0,0.34817540645599365,0.0,0.0,0.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,the modern evolutionary synthesis,"[Document(id=54fb8e44f718e454d5f4aef7097036553aa91b829fc2ea08644a1f9fc17e50f5, content: ' +The theories developed in the 1930s and 1940s to integrate molecular genetics with Darwinian evolut...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.6825508197600287)]",1.0,1.0000001192092896,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,eight o'clock,"[Document(id=5dfd5078daafbc2ef3695636e6d70b42db2abc79ac389c20eac0417969714c11, content: ' Tesla would telephone his dinner order to the headwaiter, who also could be the only one to serve h...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.7299236291347593)]",1.0,0.5878220796585083,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1639,"[Document(id=60bf7d285d0a82adaf86beaea9d4016fab6e78f84863f7bc7c59437478198477, content: ' As a result, Alsace was transformed into a mosaic of Catholic and Protestant territories. On the ot...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.6624026315749729)]",1.0,0.5775867700576782,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,censors,"[Document(id=3749ef6e14ea9af3893cfd84917e3a39acc2212480fc846e5a9231414a5599e6, content: ' Praetors administered civil law and commanded provincial armies. Every five years, two censors were...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5462632729451639)]",1.0,0.8454219698905945,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=80f5e2e80143434aa6d2d78a16f411f7f6ab4f8ffffbd29914ad0c75e8bf31da, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.5523669903863153)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Angelo Giuseppe Roncalli,"[Document(id=d025eebf35da454623a00c5d71d009d27de02dc1bdc601d3e2b18ee0f2c3a436, content: 'Pope Saint John XXIII (Latin: Ioannes XXIII; Italian: Giovanni XXIII) born Angelo Giuseppe Roncalli,...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 0.5976698401557258)]",0.0,0.4155743420124054,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c983e8f9fccac478cd7d895c53ca7636f4291c3a87693dd68246b03e75a4be42, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.6392372140463647)]",1.0,0.9866578578948975,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment","They were armed with a sword and a small shield, as well as several light javelins","[Document(id=33fcf4581f5ab78359aa184280cfd3d832482718892d7c7caea5758b15dcd90b, content: ' Their role in accompanying the army was primarily to supply any vacancies that might occur in the m...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5218022473891348)]",1.0,0.15637509524822235,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,King Saud,"[Document(id=5d0ec0248e1b31849b4fcf2b6de857e2ffd286b834f600afc95bf4d1111c7ae6, content: '"" Nasser ordered a crackdown against Syrian communists, dismissing many of them from their governmen...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.6098491907190069)]",1.0,0.3763999044895172,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,Hashimites,"[Document(id=09958ca4e60acef434f0c74dd9cf33effb3e5660b7f27f5a045864b3147d4d6f, content: ' If he felt that a governor or a commander was becoming attracted to wealth, he had him removed from...', meta: {'name': 'Umayyad_Caliphate', 'source_id': '704c4038f63f9c5a686218060cb31639c0333dda34e87e804be75e5fec05ec53', 'page_number': 1}, score: 0.41455657660131334)]",0.0,0.18943406641483307,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=f6a12178d43defb2a103deca0f4fb166a5e99e5a8e58be48c76be81bd5d2bd38, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.5458091961847271)]",1.0,0.5505510568618774,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Albemarle Point,"[Document(id=33520542b8f7e5d136d9a92a7dce72f05f29e134ba94ef5fbda564b9a00e3664, content: ' It moved to its present location on Oyster Point in 1680 from a location on the west bank of the As...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5352715558082974)]",1.0,0.16469033062458038,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=2dc3b5cfb05bfeb4e2c0b072a5a5e6417b1109588ffcbd88ffd03346c8bc1975, content: ' In France he used the French versions of his given names, and after receiving French citizenship in...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.7137269057215699)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,experiment,"[Document(id=3c13b6a4d7fc41ebb143777fda97e499cb3c96df8338be0d43bcb307df2fcbae, content: ' +In 1937, at a luncheon in his honor concerning the death ray, Tesla stated, ""But it is not an exper...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.7212605684756541)]",1.0,0.23364409804344177,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,theorists,"[Document(id=01eb5e9d28031d5d0b360b7c8f29e7ddbf87fd002dd1a6d7598b122c26821796, content: ' It was necessarily this belief that led them to adopt a strong central power as the only means of p...', meta: {'name': 'Political_philosophy', 'source_id': 'b6ae8bbff967363b12ca505926b5bff22989ed948eaa878bbf82df20c179d98b', 'page_number': 1}, score: 0.4561133241269701)]",0.0,0.44234633445739746,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,"The Reverend Alexander Dyce was another benefactor of the library, leaving over 14,000 books to the museum in 1869. Amongst the books he collected are early editions in Greek and Latin of the poets and playwrights Aeschylus, Aristotle, Homer, Livy, Ovid, Pindar, Sophocles and Virgil. More recent authors include Giovanni Boccaccio, Dante, Racine, Rabelais and Molière. +Writers whose papers are in the library are as diverse as Charles Dickens and Beatrix Potter. Illuminated manuscripts in the library dating from the 12th to 16th centuries include: the Eadwine Psalter[citation needed], Canterbury; Pocket Book of Hours, Reims; Missal from the Royal Abbey of Saint Denis","[Document(id=c4e299761e10f4cdbe2878ec63543541144638c306cbf9461b4fa00ff423a2e6, content: ' The Reverend Alexander Dyce was another benefactor of the library, leaving over 14,000 books to the...', meta: {'name': 'Victoria_and_Albert_Museum', 'source_id': '202e0997b99bb8f0aaa6dd9c09c05e008cf8809c92549b6c9f073b5d26d1a3dd', 'page_number': 1}, score: 0.48250097806269554)]",0.0,0.3507031798362732,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,"operational definition of time, wherein one says that observing a certain number of repetitions of one or another standard cyclical event (such as the passage of a free-swinging pendulum) constitutes one standard unit such as the second, is highly useful in the conduct of both advanced experiments and everyday affairs of life. The operational definition leaves aside the question whether there is something called time, apart from the counting activity just mentioned, that flows and that can be measured","[Document(id=849ac9a1750eb7540fb92137ca4256c703db879b81e07bec1f34d0071ef5b30e, content: ' An operational definition of time, wherein one says that observing a certain number of repetitions ...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.5233312924187772)]",0.0,0.1799328625202179,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",He also placed a corps of his own troops under Frederick's command,"[Document(id=ba197620bd7dce47cd5783da779f1718bab82d5ef9f7b4a77e908fd7397ad72a, content: ' Then on 5 January 1762 the Russian Empress Elizabeth died. Her Prussophile successor, Peter III, at...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.726811910218078)]",1.0,0.28628912568092346,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,45,"[Document(id=01c8154b0e9d235879d3d4340dee5d60313f266039fc22ae569f7bc4ddabfbfe, content: ''s Macintosh that took 45 percent, seven percent at Hewlett Packard, six percent at Lenovo and Asus,...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.6647941450538054)]",1.0,0.3141947090625763,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,Imperial cult apparatus,"[Document(id=af97cb64fa6938b2e1aa470aef89519e08e5ee5a4c7b0dfb5f6779059a1c3f13, content: ' Greek allies had their own traditional cults to rulers as divine benefactors, and offered similar c...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5799821762333404)]",1.0,0.14709675312042236,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=10406721e1bacc4b94568efb8105444ce33c23b507417efe2e032c93615c0677, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.7174902149566916)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=694b7ad0ec30f16563c095bfd8aa73a286298746fa8b5b6b568961e806fe7462, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.7181772030878829)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Milan,"[Document(id=e419da5121b148265bffa3a5af9e72064a03b1766267a2dfd8cd48214972ee16, content: ' He sent him, indeed, five hundred pieces of gold ""to bear his charges"" but Liberius refused them, s...', meta: {'name': 'Athanasius_of_Alexandria', 'source_id': '2a8e7ec0a44075f1a567744982dcecf4bcea19f0065d4f73c742398a05fa1642', 'page_number': 1}, score: 0.5579230492143117)]",0.0,0.5594199895858765,0.0,0.0,0.0 +In what decade was disco popular?,1970s,1970s,"[Document(id=901a9f1636faf9035626a9559750c7d346d38dbd3d60a92fc29a9aaf68f78dc7, content: ' Other performing arts venues in Miami include the Gusman Center for the Performing Arts, Coconut Gr...', meta: {'name': 'Miami', 'source_id': '39ab1ce422fd3baeedbb69a9a90b4c0e43c513f6161698478a96f09e5d9f7285', 'page_number': 1}, score: 0.5381765557556315)]",1.0,1.0,1.0,1.0,1.0 +What do the rivers form?,lakes,Nieuwe Maas,"[Document(id=2c02265ed1b53d17cbf8d8090f6a3dad995df9f6a8bbe12e06c780368660607e, content: ' The middle flow begins as Nederrijn, then changes into Lek, then joins the Noord, thereby forming N...', meta: {'name': 'Rhine', 'source_id': '23686e48a61504679a91dac10d83b287aef3dff7dabc0ab881c281059187ce6a', 'page_number': 1}, score: 0.5730175051454007)]",0.0,0.16586220264434814,0.0,0.0,0.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,historic scripts,"[Document(id=df3636934a87dcaa1aa37c6146a219c020357b983841dcebcc4edf19d3146c4a, content: ' For example, the ""fullwidth forms"" section of code points encompasses a full Latin alphabet that is...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.7118722829389936)]",1.0,0.08416931331157684,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,20,"[Document(id=7bd8798cd87379643365f7305e27ccc9e3b89266b2ba91439f47a12218f428ba, content: ' It has so many depth and layers that it's easily as self-aware and earnest as Ray of Light."" The al...', meta: {'name': 'Madonna_(entertainer)', 'source_id': '0868d847bb768c30a7507239169d5e441bb3181b66c8b9353a863daa3d58e425', 'page_number': 1}, score: 0.5071568670430149)]",0.0,0.5910412073135376,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,30,"[Document(id=87a93f72cbb9c00ea6477b2a31d21c284a2ecf07733f93b0fb4ba7b277f5b8f5, content: ' It is within the power of both the union government and the state governments to govern schools sin...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 0.5956209852111035)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Book of Exodus,"[Document(id=7494eaecd937f085419fe0fd2756a519eebdab4fc40df8775479885b329c3be9, content: ' But Stephen Eilmann argues that if it is necessary to disobey rules that conflict with morality, we...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.5443074411362279)]",1.0,0.20138104259967804,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,Your Majesty,"[Document(id=6661c5b42cee4d9122296712db8297ce21db50225e9f1d2cad1e056d68ca32c8, content: ' Additional styles include Defender of the Faith and Duke of Lancaster. When in conversation with th...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5861381499007268)]",1.0,0.3193480372428894,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=ff2ec1a3232f7d74c84b198defa77dd9879cdb65474a7dab2bf47da19675285e, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.6258372807721067)]",1.0,1.0,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,886 AD,"[Document(id=97b9e2b1856762e1d11691539c2c193c07d71470734d02a5b7282ff6d4bc33ec, content: ' It was an area of political and geographical control imposed by the Viking incursions which was for...', meta: {'name': 'London', 'source_id': '384b6fa4b2a5b435a0f85244f7298e637d4502e20b701668b4f9f2add9c563e3', 'page_number': 1}, score: 0.5435868133989574)]",0.0,0.6714180707931519,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,large quantities of data or a large number of users,"[Document(id=4753cc282d43999b382172589cceb076cee5e8185383911435f640a568593cbe, content: ' +Load testing is primarily concerned with testing that the system can continue to operate under a sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.6273370435754246)]",1.0,0.9246095418930054,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=5f8dc7264853d3b0b88a03420e7878e16fa4c08ef70ab7cc5d722e889b21dda6, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.6102544598734859)]",1.0,1.0,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=87ffdf42e04cd4e5aabc33e35caf9614ba61758e355a1d97a38a524ba0bedf46, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.7377392492190507)]",1.0,1.0,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1853,"[Document(id=5e62f2afbd35341eab44e01cce599dfb08f5845fb82d0fb96bdbebed584d740b, content: ' As far as Britain was concerned, defeats inflicted by Russia on Persia and Turkey demonstrated its ...', meta: {'name': 'British_Empire', 'source_id': '8f673e96ef153333a8fc1d3855be60f5be1829316ed1fcb58dd8590fdb083946', 'page_number': 1}, score: 0.6213761252873606)]",0.0,0.462875097990036,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,Private and personal worship,"[Document(id=8cb202622013f5234d79ec3a4275501aaa91b4fdefe32dbbccfd335829f7ffa1, content: '[citation needed] +Roman religious beliefs date back to the founding of Rome, around 800 BC. However,...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.7263754631387216)]",0.0,0.2722780108451843,0.0,0.0,0.0 +What is one example of failure testing?,Software fault injection,defects in dead code,"[Document(id=7475e2af3065cbe79f0e067059ae3ca86f6548819c59b2c8ff21a438fc2f6346, content: ' Not all defects will necessarily result in failures. For example, defects in dead code will never r...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5822750020340757)]",1.0,0.4074403643608093,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,goalkeeper,"[Document(id=e6ee12359c832f972207465515e8a9b305bdf92628324bb99b5470cc0b0b74b4, content: ' The same laws are designed to apply to all levels of football, although certain modifications for g...', meta: {'name': 'Association_football', 'source_id': 'f2b7305b09ad8a43b8389a5eb2d84b8ea4b4deaa7968d21fe9da9da9ecda2c6b', 'page_number': 1}, score: 0.56937587493132)]",1.0,1.0,1.0,1.0,1.0 +In which year was the new constitution ratified?,1855,1982,"[Document(id=7d6e1c6a8e83b4b29ce4b75833fc74ff72c585bad0dc7e4ea9a862be027f8038, content: 'C. Statehood Green Party. The nearest this movement ever came to success was in 1978, when Congress ...', meta: {'name': '51st_state', 'source_id': 'aa3341a0a65518fe53b022b57fd32c865f74c249a1b4d32919c38bfd1cead5b7', 'page_number': 1}, score: 0.5897939832060628)]",0.0,0.5184707045555115,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Great White Fleet,"[Document(id=cd897c548533a5aba0b93b04622013a15c0c16c36e38f21aad766477a89148f3, content: ' In 1907, most of the Navy's battleships, with several support vessels, dubbed the Great White Fleet...', meta: {'name': 'Military_history_of_the_United_States', 'source_id': 'e80a3db424753f4e406b9cfac948a9c9640a8278fb51e7ba7c06781d3f1813f4', 'page_number': 1}, score: 0.5486769216069662)]",0.0,0.25486424565315247,0.0,0.0,0.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=1eb83a50e062eec7af6abab3de1bf0b5576a4c75ac168cc64f1b77a2e268c579, content: '7 km2) Yale-Myers Forest in Connecticut's Quiet Corner—and nature preserves including Horse Island. +...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.6826308918386579)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,expressed sequence tags,"[Document(id=c181af6d9e4133e2db3dd3214c28363e87a8f250668a4fc4d9c06fd0060f35eb, content: ' Conversely, plants can have extremely large genomes, with rice containing >46,000 protein-coding ge...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.516086017715844)]",1.0,0.21117305755615234,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=3cdcefdaa320ecb0d100eef0c92063190f1030f98ee702385d5ad087a454afc6, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.6345208523493653)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,rational thought,"[Document(id=14435d1137e579f740d93ef8fabdf50640828e96d9dd732c7b8c6d4932ae6a58, content: '"" The theory was indebted to Aristotle's pluralism and his concepts of Soul, the rational, living as...', meta: {'name': 'Idealism', 'source_id': '55a0bd42a4a737442a2d93fe0926424e236910de01b2406ed43c174f2225ff9d', 'page_number': 1}, score: 0.6069175571756187)]",1.0,1.0,1.0,1.0,1.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,"8,448 undergraduates, 2,138 graduate and professional and 1,593","[Document(id=1fa5bbb9afa8ebfbb9575bd6b27204acdf8f5bcb61e4e7771f62bb0b32428d62, content: ' +In 2014 the Notre Dame student body consisted of 12,179 students, with 8,448 undergraduates, 2,138 ...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.6858562650923519)]",1.0,0.25114506483078003,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,midnight,"[Document(id=8f9a474db1bbc2a41725828917a296999dd03c7fd6c62c0e1adf58a5a2f207f9, content: 'S. Third Fleet was formidable – nine large carriers, eight light carriers, six battleships, 17 cruis...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.6451543648987118)]",1.0,0.23394635319709778,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=4cbbc741471265b8757147f853168935ce795df102abf69b11f058149ff15eff, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6349059298170436)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,imperfective,"[Document(id=06048f854345fb7e2b6f12877040f57cf87279336165b7cb7893e29f87cc9222, content: ' On occasion of the publication’s 45th anniversary, the Croatian weekly journal Forum published the ...', meta: {'name': 'Serbo-Croatian', 'source_id': '2cc55814bc801258e8300ed82e003428dbf11715c74ad99297d61c5dd77023f4', 'page_number': 1}, score: 0.4510094094135552)]",1.0,1.0,1.0,1.0,1.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=158a6b8443733e42d24dec31180770da4724f3bd14ed49e47a937624f7aeed47, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.626275648011194)]",1.0,0.8350290656089783,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,molecular forms,"[Document(id=20580d781b7cb1b0f929dd861809d47639d869e422d7e9f42898f3544f7840a9, content: ' The most common isotope of hydrogen, termed protium (name rarely used, symbol 1H), has one proton a...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6461370372692088)]",1.0,0.4372621178627014,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,Children are being sold as conscripts,"[Document(id=0088f05f01a71535c2cb1e5b41db4930183fdb107aa5d9b1b2402b982ed6e8a4, content: '"" In July 2013, the Assistance Association for Political Prisoners indicated that there were approxi...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.597721040777937)]",1.0,0.31609007716178894,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=1bdd87144e8c05c0c8820e3bd6186deb63a4be13d5810e39af795993e4721bb7, content: ' The book was twice adapted for the cinema – the 1997 British film focuses on Arsenal's 1988–89 titl...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5961240467039464)]",1.0,1.0,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,1803,"[Document(id=ea0a06b9a04457ce2c807b8789e8cac9f6cb1392ea3c2c34eeac6524dc27a03a, content: ' With Great Britain having claimed the entire Australian continent east of the 135th meridian east i...', meta: {'name': 'Victoria_(Australia)', 'source_id': 'de2fb2860506705876798514a70ef6d34931928f0161bda737ae7b6c7f28a056', 'page_number': 1}, score: 0.5539068992531617)]",0.0,0.3583161234855652,0.0,0.0,0.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:10.csv new file mode 100644 index 0000000..69b6212 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:10.csv @@ -0,0 +1,120 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=ac8e9c286327aea24d00bceb002b08ad3fbc3007673c77da9b48b6b6b91aa76f, content: ' Some rites of the domus (household) were held in public places but were legally defined as privata ...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5865511627398434)]",1.0,1.0,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,August 1943,"[Document(id=e8b83e3a1a1c126abbafbdcc3a1032bec3194faa41994b581f26eaa7f9d0f307, content: ' The descriptions of these were published in Memoir III of the Australian Museum Sydney between 1896...', meta: {'name': 'Tuvalu', 'source_id': '4a4e309bc4b63f2a542d80bca7c9d33d9df6ccfa328612b01483ad5d2101621d', 'page_number': 1}, score: 0.3557680082127386)]",0.0,0.27104127407073975,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Coventry,"[Document(id=d8fd97e153fa909813de22390c7c4607821a5041d6350b606b4f95c6304faa9d, content: ' ""Pathfinders"" from 12 Kampfgruppe 100 (Bomb Group 100 or KGr 100) led 437 bombers from KG 1, KG 3, ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5517882669038934)]",1.0,0.4347962737083435,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=081239836dc280856dc2c1ab168f17d8fffffed12cbaeb50b62cd11b58a116b4, content: ' It maintains several distinct cases: the nominative, accusative, genitive, dative and (vestigially)...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.48628358032025165)]",1.0,0.9999998807907104,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=c1b15c9f6474425049e145f56eea6b22790074204f55cd26eb005fca608f63a8, content: ' This guidance is generally offered during a ceremony called the patriarchal blessing. +In Hinduism t...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.47522311411982915)]",1.0,0.20152544975280762,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,"Panathinaikos, Olympiacos, Aris Thessaloniki, AEK Athens, P.A.O.K, Maroussi","[Document(id=76be05dc0de2dfbcf6d3d16e80abbfa4ee57f55d30696681b941ecd6ff700822, content: 'O.K. Greek basketball teams are the most successful in European basketball the last 25 years, having...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6240392690147745)]",1.0,0.5849509239196777,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=17489faf6fb39343de67cc2bfe448e959559775109bb646aa0e4d02b716ddf77, content: ' Jennifer Lopez and Steven Tyler joined the judging panel in season ten, but both left after two sea...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.6295865021297492)]",1.0,0.8232484459877014,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=1c8a379dc092fa8d374927640e6d16f46286416581451c66a1da80fef1412acc, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.7197352146596256)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=1eed45d4c946ec058384afdd313db9b847d2b384629f1872466e03c7bc13a2e5, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.6574727180609481)]",1.0,1.0,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,Bremen and Bremerhaven,"[Document(id=0427081e9deb9c727f124b818203b3d0cc649efe97ddc37c49cee8cebfa2c3a1, content: 'Germany is a federal republic consisting of sixteen federal states (German: Bundesland, or Land).[a]...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 0.5043829724168043)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,"Since version 3.0, any precomposed characters that can be represented by a combining sequence of already existing characters can no longer be added to the standard in order to preserve interoperability between software using different versions of Unicode","[Document(id=8abb498ed2e51ecc6d89f4e5b9bcdce65b632039dd82bfefb02d5dec25036fbd, content: ' +Modern font technology provides a means to address the practical issue of needing to depict a unifi...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.5751440958199375)]",0.0,0.03272436559200287,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,lower federal courts,"[Document(id=af588e16cba35397799ed402e254a69a9445c63e2da06985a64c8440d5e111ea, content: ' Most often these are claims, such as ineffective assistance of counsel, which requires the court to...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.5401086806576698)]",1.0,0.5716047286987305,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,rank amateur,"[Document(id=a022efa8ed739794e05008dbe44d9b83a87fc4f4b7a609ec096b9d437bc5089e, content: ' For Whitehead, education should be the exact opposite of the multidisciplinary, value-free school m...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.6345955416988933)]",1.0,1.0,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,borrow money on the credit of the United States,"[Document(id=9f240c943520891dcb4853fa0b77bb9da0c6144acd3f67acca4ff043b2823fb2, content: ' +Early releases of the Washington coin included error coins shipped primarily from the Philadelphia ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.40843243316554156)]",1.0,0.019053475931286812,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,sub-Tropical High Pressure Belt,"[Document(id=106db074c0cb96eb9dae93492a807c47bc87218f2ec096f0b6b51dba419575fc, content: ' +Typically the sub-Tropical High Pressure Belt, with frequent clear skies, provides more than 300 da...', meta: {'name': 'Namibia', 'source_id': 'b9f689c4f90121083289631dd033d301a051af9eb1e03125f1200942937a6318', 'page_number': 1}, score: 0.5784191859462218)]",0.0,0.19846832752227783,0.0,0.0,0.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,adaptive immune system,"[Document(id=329774fcd326068e3e3307eab728f9f7211002cdf9ddedab811c99d1a4968642, content: ' If pathogens successfully evade the innate response, vertebrates possess a second layer of protecti...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.6230771628644063)]",1.0,0.8636709451675415,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=bde5f6c2b4913151f9e46d6578f4f20345ade116f2e96b5ae76ef509e1e517dd, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.5515789425362991)]",1.0,1.0,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,Computing,"[Document(id=9be1bf507c2859b61088d1f83c172682620003fe33104d080a9b6b3a2256570a, content: ' Students are encouraged to take these classes either for credit or in their own time, and in some d...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 0.5930303365803692)]",1.0,0.6075230836868286,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1960,"[Document(id=78d1760ca2fa0904e5480a91d388ba9d6c37d737a65426e5cbb2488333bc1e1a, content: ' Eisenhower made clear his stance in his first State of the Union address in February 1953, saying ""...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.6669113603959826)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=891cab83df456a1bb11d8e7c301e45b02e564d224d3be1f7bdb82d02edc8f221, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.7160253770681686)]",1.0,0.8085941672325134,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,inequality,"[Document(id=0088e02d1bc9f26df2c1dbc19e8cba8d1c8b418df18bf8e8cfeb9bdce1034e21, content: ' Thomas Sowell, in his book Knowledge and Decisions, claims that this difference is due to women not...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.4178487217478476)]",0.0,0.07081164419651031,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=56ff4bdb4f543c61c1f516ba1e544e83e117ab5512bae58b2264fa6b6d86f26b, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.7654878512589371)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=5c5f6a0591170904a56f4c0417e667089761d8d8f013f55283aff164fdadd75b, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.5930262907390398)]",1.0,1.0,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,Old East Slavic,"[Document(id=7b6091c50ae3f1a95d4b376e2217d7e57a8ae5ce8ee142f464dc04a21b4d3032, content: ' The oldest documents written in Old Church Slavonic and dating from the 9th century attest Словѣне ...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.6072478255953464)]",1.0,0.3049790859222412,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.",four summaries,"[Document(id=c5da820627d1881a16bd936f9f4ee4b1126ca0003f08cdb0cd5303554431224f, content: ' Despite seven categories being able to provide a more accurate description of sexual orientation th...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.38699930943871186)]",1.0,0.5629022717475891,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,literate and upper class,"[Document(id=149c6390292392d4438c9191dca43bbe827c20a86bb7bfc502679813d7fcc108, content: ' Due to Eastern Lombard's status as a Gallo-Italic language, an Eastern Lombard speaker may, in fact...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 0.6197546116557725)]",1.0,0.7113619446754456,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,smaller-calibre ordnance,"[Document(id=650eb6f51f4e011cd8d19f5e1848ee5932e97eba7ae0fd82dc71d506c549db9e, content: ' Their needs could cogently be met with smaller-calibre ordnance beyond using the usual singly-mount...', meta: {'name': 'Anti-aircraft_warfare', 'source_id': 'c54639424f62ce369356ed8c4e1e9fab8769359dd28029aa89832f5aa994f45d', 'page_number': 1}, score: 0.40298211471750184)]",1.0,0.10714080184698105,1.0,1.0,1.0 +What is that power that enables us to love?,Prevenient grace,God,"[Document(id=b25294113fba2bccd0a9a138b7273d6f86f678ed0b436dc0dbaa101779bcbba6, content: ' There were two Papal committees and numerous independent experts looking into the latest advancemen...', meta: {'name': 'Pope_Paul_VI', 'source_id': 'fc82a40c990c0b3840b06b32643858d39984b66d78ae950561d809dc84d6edf8', 'page_number': 1}, score: 0.354683878905906)]",0.0,0.29289260506629944,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,He Jin,"[Document(id=1023d346fa09f68f024520eed11ceb8d146eebd929e4d93e19ec61ae32461e57, content: ' 189 AD), half-brother to Empress He (d. 189 AD), plotted with Yuan Shao (d. 202 AD) to overthrow th...', meta: {'name': 'Han_dynasty', 'source_id': '060d95b91f9c20e5c8cd8b8a032ab44aac38ab88ce62d3c302bea0865f533568', 'page_number': 1}, score: 0.442711163558856)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,introduced sharia,"[Document(id=e2a95241acaa96c9b3665ffd83614f229b121a721d01ae7e3a778b2ce1d5050d, content: ' Intent on pushing Libya towards ""Islamic socialism"", he introduced sharia as the basis for the lega...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.5626624054289608)]",1.0,0.7288561463356018,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",Peter Townsend,"[Document(id=4b69cd784f30edcd0bcf343e88529300a3f5f46cab6549e05e32a9f6208375df, content: ' +Elizabeth and Philip were married on 20 November 1947 at Westminster Abbey. They received 2500 wedd...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.6765609310084981)]",1.0,0.1941501349210739,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,British sea communications,"[Document(id=e3840aa293e2746e6b5de03b1b0e01e25edbe19aa5e08c07e0e041b7bcb110e3, content: ' In April 1941, when the targets were British ports, rifle production fell by 25%, filled-shell prod...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5272865072304967)]",0.0,0.04938885569572449,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,16.9%,"[Document(id=f34e8297a274ac70a2a126825fc3935412353cb3a42fcc32db84b6e7e69baf97, content: '9% Asian, 0.2% Pacific Islander, 16.9% from other races, and 3.8% from two or more races. Hispanic o...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.5307301750521758)]",1.0,0.6942497491836548,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=67c87173e747d72d79bc92866e553902d56583aae14965ecaa3b2da507256976, content: ' After the turmoil of the English Civil War, the Royal Society wanted to show that science did not t...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.6184567229523368)]",1.0,1.000000238418579,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,a database administrator,"[Document(id=a3a5fb3ed16579bb6ea58603baf5e6a8ca709ead98470e3f1da0f97c43462650, content: ' When this state is needed, i.e., when it is decided by a database administrator to bring the databa...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.3669672787300863)]",1.0,0.9447140693664551,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=cbebf9a70685ea0e72bcf8c9f1dbbe9b7163ee26279b633446edf34314600fb3, content: ' Janssens demonstrated that Avicenna was a Sunni Hanafi. However, the 14th cenutry Shia faqih Nurull...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.6135459338143731)]",1.0,1.0,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=f08f17188f751088b2b76f2c1a69ffc86f340979f05ea84b4b201def163ec6f3, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.562789144621481)]",1.0,0.07690561562776566,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,a runic system,"[Document(id=e8db61efa2c902ccb7106c7f85b976fcd3c5e267fd62cf6003d23ef9f3a8b528, content: ' Macrons over vowels were originally used not to mark long vowels (as in modern editions), but to in...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.5674121980026754)]",1.0,0.9278781414031982,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=c54ac19342651f97c105c4825d0b82cbf05684cf1d7c4921e0d078a7212b4a21, content: ' A common type of embezzlement is that of personal use of entrusted government resources; for exampl...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.5087219209237853)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,International Energy Agency,"[Document(id=e95b189deaf952c54a6d9d9b63c4e39a8ca3f10302437e822af426093ae93e9e, content: ' Some solar balloons are large enough for human flight, but usage is generally limited to the toy ma...', meta: {'name': 'Solar_energy', 'source_id': '4b26f9c886129869a3884289e417d88e1bc8a531535e048a63701a7b1784a8c1', 'page_number': 1}, score: 0.4775217647255643)]",0.0,0.12792061269283295,0.0,0.0,0.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=de63707a025645c7fcc7a26e6dd37ddd29a27cd60ff187ce487e0d07217708ee, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.7250091339820998)]",1.0,0.9999998807907104,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,The Cathar Crusade,"[Document(id=3f50079b5ebfa4f0c771fad96712717b3bd3050f02fa3abfb19f87bc049689e1, content: ' By the 11th century, more organised groups such as the Patarini, the Dulcinians, the Waldensians an...', meta: {'name': 'Heresy', 'source_id': '8ba473262f8508a9c99699bb09cd022993896ece1fed1463a9cf4bd7eb0c3e68', 'page_number': 1}, score: 0.5133480868513103)]",1.0,1.0,1.0,1.0,1.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=f0c6a5cd5408a0fc01c220e5910c03a2241fbe85099410f518592c1fb7098441, content: ' ""Creating a partnership with Beyoncé, one of the most hard-working and talented people in the world...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.6717989493314664)]",0.0,1.0,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,legal tender at face value,"[Document(id=c5c8d55b2c3952d9e29c10530dc199fccdd3bfb5dc897ac40dd8ff1d01e6a97f, content: ' This has to do with Alexander Hamilton's suggestion to Congress of a fixed 15:1 ratio of silver to ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.5587388197776981)]",0.0,0.16358545422554016,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,Glencoe and Sea Nymph,"[Document(id=353e78a8c5526319fefad1c37e2e3511ce4323f2f90be311b53a11227b5bd85c, content: ' In 1852 the San Francisco-based ships Glencoe and Sea Nymph were attacked and everyone aboard excep...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5028705016342824)]",1.0,1.0,1.0,1.0,1.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=0815d5b0f30156ea39d24a8a15062d5e21822317a105f659f2000892f8d657c8, content: ' +The consensus view in contemporary paleontology is that the flying theropods, or avialans, are the ...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 0.5707345625652527)]",1.0,0.9999998807907104,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,modern evolutionary synthesis,"[Document(id=e1b3872c9a56b024a609ebeaf211b3ea1b4799599e6935c3dd62e613f036575b, content: ' This dogma has since been shown to have exceptions, such as reverse transcription in retroviruses. ...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5384574426271362)]",1.0,0.9789240956306458,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,8:10 p.m,"[Document(id=2ed21a855190ee9369cd4351878edc528d570a2136e3a2612158d6108a59491e, content: ' until 6:00 p.m. or later, with dinner from exactly 8:10 p.m., at Delmonico's restaurant and later t...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6844753001764978)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1444,"[Document(id=9dbf1201bfba9c8190d82357ee4f2f5a482d6d6711313439dac94a993edf3757, content: ' +Holy Roman Empire central power had begun to decline following years of imperial adventures in Ital...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.6132608572607257)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,censors,"[Document(id=c149e308ac4553abf8316efb49a5900ee5ff1e1eda0f20dd8b01137b5904cd33, content: ' Praetors administered civil law and commanded provincial armies. Every five years, two censors were...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.4873572047508343)]",1.0,0.8454219698905945,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=d16248de2a5e0d9e6bbb7e85b411bf0a4143206e20471ecab73dc7158ce992b1, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.5374786638713411)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,"Angelo Giuseppe Roncalli,[a] Italian pronunciation: [ˈandʒelo dʒuˈzɛppe roŋˈkalli]; 25 November 1881 – 3 June 1963","[Document(id=2637c74f0c9d8013db6dd5e001b320fad50f7374ada702e24a314846612ef144, content: 'Pope Saint John XXIII (Latin: Ioannes XXIII; Italian: Giovanni XXIII) born Angelo Giuseppe Roncalli,...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 0.5965553455670057)]",0.0,0.26511240005493164,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c3327e32d8658307f8dca684bbdeb22895ca7a8d7ee8ad6ff0eba6200c5e6da2, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.6517133656496545)]",1.0,0.9866578578948975,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment","They were armed with a sword and a small shield, as well as several light javelins","[Document(id=1983d91ab2ea433561865fb3ff1c494c8513349262f6ecf4a75e3747f0f18a6a, content: ' Their role in accompanying the army was primarily to supply any vacancies that might occur in the m...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5060990867529696)]",1.0,0.15637509524822235,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Israel,"[Document(id=1a7bb072fc739a204d2311fb1c7faab806a096efeb33e19be04be9eae7061a3d, content: ' Nasser refused the call upon determination that the air force lacked pilots and Amer's handpicked o...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.5484507326078425)]",1.0,0.2845463156700134,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,his clan,"[Document(id=0fa0d88bd1e937bdfda2ada2f0a26524ce558edae20b1537db3005c2c91407d3, content: ' Uthman placed some of the trusted members of his clan at prominent and strong positions throughout ...', meta: {'name': 'Umayyad_Caliphate', 'source_id': '704c4038f63f9c5a686218060cb31639c0333dda34e87e804be75e5fec05ec53', 'page_number': 1}, score: 0.36949795109946626)]",0.0,0.07835237681865692,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1999,"[Document(id=6c8a4bf97575a50cf435a385d35038780603d1e0468298d1d46906e5d6f5565e, content: ' Its main offices are located at 30 Rockefeller Plaza at Rockefeller Center in New York City, known ...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.5025808224824159)]",1.0,1.0,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston–North Charleston–Summerville Metropolitan Statistical Area,"[Document(id=620f512c3df1eb1788fcecbd5ec4c0438953e005020fab4303e121d55369ade5, content: 'Charleston is the oldest and second-largest city in the U.S. state of South Carolina, the county sea...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5246972383754526)]",1.0,0.3331555128097534,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=39112deed098f10b50cfae893e4a07e0541a59e3ede2de2a696f87f4dd3a60cf, content: '"" When in September 1831 he learned, while travelling from Vienna to Paris, that the uprising had be...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.6957076687039898)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,steal the invention,"[Document(id=1011faa25e59a33b095480ce04c180eb0c71513aa2d54d760fe11160d7fa844b, content: '"" His records indicate that the device is based on a narrow stream of small tungsten pellets that ar...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6696097359530666)]",1.0,1.0,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,Anti-Federalists,"[Document(id=ddb6f079d765c0097f37edeb70c0b91d00084682a1d1fd26a1e5fa0f636c0d31, content: ' Tensions between territories can still be found in federalist countries such as Canada and federati...', meta: {'name': 'Federalism', 'source_id': '05eb4479f6579b86eef28e7954d58c2044997504c63cad297935c95307861772', 'page_number': 1}, score: 0.42387948236305956)]",0.0,0.5036917924880981,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Lippincott,"[Document(id=e302bfc687b3918c099a0b5ca3053c72bfd1fccfaee0997a2e91ff94d201d3db, content: ' Hoping to be published, Lee presented her writing in 1957 to a literary agent recommended by Capote...', meta: {'name': 'To_Kill_a_Mockingbird', 'source_id': 'f1e375014d37d0eb57fcd3e14861d458b3adfe0e71162344652acf38ae9d1bbb', 'page_number': 1}, score: 0.46735501418952785)]",0.0,0.1679261326789856,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,a systematic mental framework,"[Document(id=11495c74c8b11a9455df2289e0ff9463ec8bfc2283068403eab6563e5116917e, content: '26) by which we simultaneously grasp the past in memory, the present by attention, and the future by...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.4733092895012373)]",0.0,0.24103975296020508,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",He also placed a corps of his own troops under Frederick's command,"[Document(id=c90b28034a392eb2832dcb9a2ae1e8b326e72b851e54b5c70c28b7477ac220bd, content: ' Then on 5 January 1762 the Russian Empress Elizabeth died. Her Prussophile successor, Peter III, at...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.7323095236946228)]",1.0,0.28628912568092346,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=0a1d789e1e3ae04146d38c10ae30c76bb73173ec2df854d0b0695309c7e8629a, content: '” +On April 23, 2008, Dell announced the closure of one of its biggest Canadian call-centers in Kanat...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.659275856771866)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,the senate,"[Document(id=4b02bca62f562d6c5bfda9faf0b29f77e95b817f971009b508742099d4c57423, content: ' None of these defixiones seem produced by, or on behalf of the elite, who had more immediate recour...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5735551960195144)]",1.0,1.0,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=0b3a599a54fac34c82f29d858c51bbf5ce4f37032b3539a76c2b8a6b918abc6e, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.7016983231419771)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=45ad6989b7801812895d43ea694bad169af9b5dd9024dd96b099fe071a196217, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6552354556870785)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Milan,"[Document(id=f3ddc00ec9151a3a78139029d06124f9dd3a5c7d358c3b467fee97a09cc00605, content: '."" (Pope Gregory I, would have full access to the Vatican Archives). +Constantius, ordered Liberius i...', meta: {'name': 'Athanasius_of_Alexandria', 'source_id': '2a8e7ec0a44075f1a567744982dcecf4bcea19f0065d4f73c742398a05fa1642', 'page_number': 1}, score: 0.5236542285143433)]",0.0,0.5594199895858765,0.0,0.0,0.0 +In what decade was disco popular?,1970s,1980s,"[Document(id=24440712083226dfd525d04e8584c667060d5e2b151f666d4bdfb4e2e523cdbd, content: ' Also significant was Black Sabbath's Paranoid (1970), which combined guitar riffs with dissonance a...', meta: {'name': 'Hard_rock', 'source_id': '8c928e0d3f021cd5a2c2ffff1882ada0df22da36e38c0e9a3cdfb92bc88423cc', 'page_number': 1}, score: 0.4995811254104975)]",0.0,0.7625939249992371,0.0,0.0,0.0 +What do the rivers form?,lakes,lakes,"[Document(id=c751375299aac2090fe706008704ed47f7358e09ebad2acbc0d54e2c5b0ab5d3, content: ' Cities such as Milan depend on 80 percent of water from Alpine runoff. Water from the rivers is use...', meta: {'name': 'Alps', 'source_id': 'd2eb3ad3af7d6110ae691bf40660ff7a79e4e9dbfd25785b4e55c7700bfcae79', 'page_number': 1}, score: 0.4909023721892361)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,ISO-8859-1,"[Document(id=1ca3328892ba24004a75d8cfaab0f58ce32bed8ee034be6fa54e33e8b0a62eb5, content: ' UTF-16 extends UCS-2, using one 16-bit unit for the characters that were representable in UCS-2 and...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.6784456034023938)]",1.0,1.0,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,20,"[Document(id=c85a8792121f517de1bc709c859ae73c062d480fc0c67923193c4a7ef924e5e2, content: ' She released her eighth studio album, Music, in September 2000. It featured elements from the elect...', meta: {'name': 'Madonna_(entertainer)', 'source_id': '0868d847bb768c30a7507239169d5e441bb3181b66c8b9353a863daa3d58e425', 'page_number': 1}, score: 0.44377844256800897)]",0.0,0.5910412073135376,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,30,"[Document(id=16d8a70ea7a6d58f48e68b5c9072e65770d4a4053e9906c77dee6ff0c12c90a1, content: ' However, these vocational schools are not part of the German dual education system. Ergänzungsschul...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 0.4960366894753296)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=23b161a9804a8a11c97c4f3f14b6762b49fdf3b1fefb8ff1de76b00cfa00995a, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.5247203837647456)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,Your Majesty and thereafter as Ma'am,"[Document(id=3d92674c03fa6fb7dff9bfacadbd449807bd3f50ac1fd93ba9ff982be85ddbf6, content: ' Under pressure from public opinion, she began to pay income tax for the first time, and Buckingham ...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5270637531168535)]",1.0,0.6356295347213745,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=c37151361f1ebbd218e8e53e243abfe86d24fabddd8be6f3401cf56e9e98633b, content: ' Its name is said to have come in 1814 from the Russian soldiers who occupied the city; ""bistro"" mea...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.5637046717833719)]",1.0,1.0,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,871,"[Document(id=5396376bb5221d60d6016a3ef2e8c6fb591b1eb2c9c6dab90677a7e0ac39a5a5, content: ' The speech of eastern and northern parts of England was subject to strong Old Norse influence due t...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.506357630655818)]",0.0,1.0,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,load or endurance,"[Document(id=77ce7cf14e807c8808892e96984a50baad4f22bdca6b22034b2fb170f96240a2, content: ' Stability testing (often referred to as load or endurance testing) checks to see if the software ca...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5004181695680803)]",1.0,0.22238580882549286,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=bb0e180e009c490f3ae30fb55a9a91cbe512409c2e12ebae0203298a6f3efe78, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.6289352889491475)]",1.0,1.0,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=cdfa1e6bea8fd42b00ce80468ba471689216a6195425c2435fb1141b0b8c7e6f, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.597249455201943)]",1.0,1.0,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1854,"[Document(id=7c2b46d47135979280b3bd098e70f7cc34a4526459e2816700066af0071057b2, content: ' The Russians did nothing and he evacuated to Batum in February of the following year. +The Baltic wa...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.536293090337765)]",0.0,0.42288899421691895,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,a part of daily life,"[Document(id=1a8c1e09e777c4e183dceed861053866bfd85f790fb0c2cd9b9aad59ad7dc5b2, content: ' Even the most skeptical among Rome's intellectual elite such as Cicero, who was an augur, saw relig...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.685217369665892)]",1.0,0.26575636863708496,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,Software fault injection,"[Document(id=76ad3fb3d257a3812a169d749a5fb13f2e05383764e8f9dfc53895f08747b375, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5599322632992683)]",1.0,1.0,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,25-man squad of players aged over 21,"[Document(id=729a1a7b3183917b1dc01acb07f1b632efc33f4eea6aeffbe484b1530c2d94f0, content: ' By 2009, under 40% of the players in the Premier League were English. +In response to concerns that ...', meta: {'name': 'Premier_League', 'source_id': '369baa9a5a854704f16fadd35144d3a3150cf1e9ae1671d9b50a3448547b814b', 'page_number': 1}, score: 0.48538441845429137)]",0.0,0.2965299189090729,0.0,0.0,0.0 +In which year was the new constitution ratified?,1855,1982,"[Document(id=9cce84648c5334fdc1f9efada012ed443127a95a93562f83b2d5073b15d9ceda, content: 'C. Statehood Green Party. The nearest this movement ever came to success was in 1978, when Congress ...', meta: {'name': '51st_state', 'source_id': 'aa3341a0a65518fe53b022b57fd32c865f74c249a1b4d32919c38bfd1cead5b7', 'page_number': 1}, score: 0.4919496280025851)]",0.0,0.5184706449508667,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Elias Andrews,"[Document(id=9607a56a3f0e1039bc16623e2029aae71f832540c9fb4cc233fb5b72ba157ab6, content: ' John's, bringing hundreds of Irish men into the port to operate inshore fishing boats. +The town's f...', meta: {'name': 'St._John%27s,_Newfoundland_and_Labrador', 'source_id': 'e83d1d1939d8ab88395721d6b32d75ceea714a323e61cf3dfe8e579afa707382', 'page_number': 1}, score: 0.5102813569652177)]",0.0,0.2918366491794586,0.0,0.0,0.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=92daacd08f6215b2b6c9a23e96940ae9ffa3af8f6a0300108c6bfcf840702db9, content: ' In western New Haven, the university holds 500 acres (2.0 km2) of athletic facilities, including th...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.6046670847188567)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,non-repetitive,"[Document(id=35aa1cb690df59867ee05e5097593f48e97261d2c2794a4fcd261522b374c202, content: ' In December 2013, scientists first sequenced the entire genome of a Neanderthal, an extinct species...', meta: {'name': 'Genome', 'source_id': '18d3410858e240e5596ce2f22db31a3f6dcd18aba98f4fb8d8e801b7e537aceb', 'page_number': 1}, score: 0.5600144783424263)]",0.0,0.37509000301361084,0.0,0.0,0.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=1b5c167a9d5d609ea5fb8a97ac9f8e9db1b78d5c91d2f95320c7a861768e926e, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.584297128771873)]",1.0,1.0,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,rational thought,"[Document(id=707efe35d39bbe6f12111e251260c07bcfa48cea8000d7e2c9680c78ec3b9dcc, content: '"" The theory was indebted to Aristotle's pluralism and his concepts of Soul, the rational, living as...', meta: {'name': 'Idealism', 'source_id': '55a0bd42a4a737442a2d93fe0926424e236910de01b2406ed43c174f2225ff9d', 'page_number': 1}, score: 0.5783754080833273)]",1.0,1.0,1.0,1.0,1.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,21–24%,"[Document(id=4a2399f100a536a4baf71c5dedf166c45f09b1b237a505e9a84faf1da7fd8a1b, content: ' +In 2014 the Notre Dame student body consisted of 12,179 students, with 8,448 undergraduates, 2,138 ...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.6002271102755674)]",1.0,1.0,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,second,"[Document(id=718965c4d19f8aedea8f06ea42eb1b8aa90c1796050fa2b7fd1db899ab71ddbe, content: 'S. fleet encountered massive AA fire with proximity fuzes. Only one American warship was slightly da...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.6488385837345709)]",1.0,1.000000238418579,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=4b9cc1d29ed0f9a4cb783c7febb93802073dee536efd60ee53a2c3e5bec9cccf, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.5942357834131707)]",1.0,1.0,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,past and present,"[Document(id=27c48c714f7a7d5c0daa9f28a8e02d3375d014da26ecdbb43d24c3a5f42b97ed, content: ' Typical of a Slavic language, Czech cardinal numbers one through four allow the nouns and adjective...', meta: {'name': 'Czech_language', 'source_id': 'b7b15f8c583c24627f86dae71c96a8b788adafb4e0606251ec9a06307cc37f6c', 'page_number': 1}, score: 0.3183698883943239)]",0.0,0.2961136996746063,0.0,0.0,0.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=7308906ca1725346de1f8669d04745bd0719f93959d2b77a3e2eedd2aea218ba, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.6227835662512028)]",1.0,0.8350290656089783,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,excited form,"[Document(id=814f2c2136f18b6a49318bf9a19fad148d9b272f987b2539771dc5dbd3beca10, content: ' +Under ordinary conditions on Earth, elemental hydrogen exists as the diatomic gas, H2. However, hyd...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6317899747001257)]",1.0,1.0,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements,"[Document(id=ff11a1c6102e2b293943cbcfb8c2935799487df6f465dea412c00844c1c7c655, content: '"" In July 2013, the Assistance Association for Political Prisoners indicated that there were approxi...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.5881745056471273)]",1.0,0.9815698862075806,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=ac63b8357f80e413cededa7c5a37849ede32b9c8d7d3181d6e287572836f5f86, content: ' They formed the backdrop to one of the earliest football-related films, The Arsenal Stadium Mystery...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5508357351080974)]",1.0,1.0,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,1837,"[Document(id=02347b24d27e18d8b2928c6693c7aa2a7b9045acffc2554a2465ddcfd854f43d, content: ' +Batman's Treaty with the Aborigines was annulled by the New South Wales governor (who at the time g...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.5670046274040195)]",1.0,0.4998088777065277,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:15.csv new file mode 100644 index 0000000..b89c199 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:15.csv @@ -0,0 +1,122 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=3c9b5484de418625b08e3d8f4e3c3398f40f2f5af0bbf48528ee46ea38c64e7c, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5133148969949781)]",1.0,1.0,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,11 May 1944,"[Document(id=c58958638fa47dad60cf7d7556802e28d387d9bdc8b406eb23f39fcb8c5a5b19, content: ' The descriptions of these were published in Memoir III of the Australian Museum Sydney between 1896...', meta: {'name': 'Tuvalu', 'source_id': '4a4e309bc4b63f2a542d80bca7c9d33d9df6ccfa328612b01483ad5d2101621d', 'page_number': 1}, score: 0.3557680082127386)]",0.0,0.3155817687511444,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Battle of Britain Day,"[Document(id=431dd16953391b6c15afbac30de4f8b8985317cd0d88a29fb200a7d7cdaa5c4e, content: '..] then we will now drop [...] 300,000, 400,000, yes one million kilograms in a single night. And s...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5206174872105196)]",1.0,0.3039698600769043,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=e9007dae49d8af7b7fa7932aef22bbdc01132df7a83250c1516203d03c7a8b4b, content: ' It is most “important to recognize that in many words the English and Scandinavian language differe...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.5029416489492136)]",1.0,0.9999998807907104,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=b43e607046d3a9f2878f76721d6d75cb6c46d2bdb38f6b3fdc5f1b9a2f23a163, content: ' +In The Church of Jesus Christ of Latter-day Saints (LDS Church), the teacher is an office in the Aa...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.48577817227393083)]",1.0,0.20152544975280762,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=4af194e6a549f3cca324fac69732006a675ddeb8e841c964604dff19122e4f16, content: 'O.K, Maroussi) have won 3 Triple Crowns, 5 Saporta Cups, 2 Korać Cups and 1 FIBA Europe Champions Cu...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.5204525008013012)]",1.0,1.0,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=0639a6470cdb94c436a07cbb24189b437a50f73fdb84ffa10dd2faeb282342a2, content: ' +The show had originally planned on having four judges following the Pop Idol format; however, only ...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.5454945986369781)]",1.0,0.8232484459877014,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=2296ed852027297f8181dc1f64af4fcbb66e36bb282c6197423888ce9d011df5, content: '9–14 μm) and produce images of that radiation. Since infrared radiation is emitted by all objects ba...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.6525550298431104)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=5dac70957842f214d4c60f76564b27a7e657cd237c677ed236d1ed1ca76e360f, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.6477764778542375)]",1.0,1.0,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,Bremen and Bremerhaven,"[Document(id=48379d700246108e0c3da307a3a8e5de24d8bee4044a5988144f1d9cd57b09c2, content: 'Germany is a federal republic consisting of sixteen federal states (German: Bundesland, or Land).[a]...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 0.5043829724168043)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,placing the glyph itself to the left or right of the left sidebearing,"[Document(id=1a26dbf7fc2f6d061e0b9cfbac5364517775557176c147e38168cf1391574c14, content: ' Thus, in many cases, users have multiple ways of encoding the same character. To deal with this, Un...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.5509833423446333)]",0.0,-0.015694376081228256,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,lower federal courts,"[Document(id=d356a5182977fa489e1e6eec51c082e5a99e5c3c38407e179300a795bb98de6e, content: ' These supplemental remedies are considered collateral review, that is, an avenue for upsetting judg...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.49507471145076914)]",1.0,0.5716046094894409,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,one of the 20th century's foremost metaphysicians,"[Document(id=645c5e4f8126b7f8b45cf02dc6a818511eead3620792f162523eacd0e5908ee5, content: '"" Ironically, in later life Whitehead would become one of the 20th century's foremost metaphysicians...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.5103405273843459)]",1.0,0.14327546954154968,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,"""No state shall... make any thing but gold and silver coin a tender in payment of debts","[Document(id=68d14edc51b808820ba009fc52fe536383dc5d9fe285a432d114fdcd84ef9e86, content: ' The currency as we know it today did not get the faces they currently have until after the early 20...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.3801649071482328)]",1.0,0.08643913269042969,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,Hawaii,"[Document(id=5cc5c90d3b7502d6e064b808c140d69c74657b0e5d9b60b7a5c3a8b76125d80b, content: 'S., with Quinault Rainforest in Washington having an average of 137 inches (348 cm). Hawaii receives...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 0.580297732561311)]",1.0,0.5410599708557129,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Antigen specificity,"[Document(id=7f56f4718d147b0156e077e4853a6ac0250773335364d5e5cac1ff37df572de8, content: ' The adaptive immune response is antigen-specific and requires the recognition of specific ""non-self...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.5447541169419035)]",1.0,0.4682602286338806,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=9f4c20a2bce44cf9bd3af493be183ab333c4d80eef37f323c78a73218b2cb402, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.5515789425362991)]",1.0,1.0,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,Computing,"[Document(id=724eb1fe8da5e0d4da8a448227c2eb83283ae94ad66b58e07296d26274b6a2b4, content: ' In terms of specific course salaries, the Sunday Times ranked Computing graduates from Imperial as ...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 0.4505703094141891)]",1.0,0.6075230836868286,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1960,"[Document(id=0f23a3d664b2d9d293629ea9d692c835b9a8e93ffb30bc45de463cd9b9d93424, content: ' Therefore, Khrushchev would not take part in the summit. Up until this event, Eisenhower felt he ha...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.6092619061844369)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=44b4daa4d3c4378951ca880516b3342520595caa477d61e2c5fc88de3627c8f0, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.7160253770681686)]",1.0,0.8085941672325134,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,duration of economic growth spells,"[Document(id=fba3e3c3e8001e6e2682424641a36dc63233372513150f4bf138ff4ceac88039, content: ' Thus, the marginal utility of wealth per person (""the additional dollar"") decreases as a person bec...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.4046748955914073)]",0.0,0.06012151017785072,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=497c11f9fbbfb1f37be3dcd0a29d747c5f8f8967252a25e340b937417a080280, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.7380069801719974)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=8a0f220e6df4e50504002c6ad6312d289ff774c9c753db65e80fb1022f521be2, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.5930262907390398)]",1.0,1.0,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,the Antes and the Sclaveni,"[Document(id=198e2ff2b18d1e11637616cacf59f7db2e62fac307b8d4669b7694d128532573, content: ' The Old East Slavic Slavuta for the Dnieper River was argued by Henrich Bartek (1907–1986) to be de...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.5947934206180892)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.",four summaries,"[Document(id=6d55c2f18c45503d524329d23b6d3edb48694f77be7d85064dd10de2b4e6fd9a, content: ' Despite seven categories being able to provide a more accurate description of sexual orientation th...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.38699930943871186)]",1.0,0.5629022717475891,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,the family or a village priest,"[Document(id=1ac361381724706acf7a12f934a8f6a399e50f7b7857405e6b825fba7c4f2236, content: ' Other countries codified their laws; legal codes were promulgated in Castile, Poland, and Lithuania...', meta: {'name': 'Middle_Ages', 'source_id': 'f13b345e20c54b6cc55363080d7492474df62715717172aa9f1029f286010ad9', 'page_number': 1}, score: 0.527310784861083)]",0.0,0.20006507635116577,0.0,0.0,0.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,smaller-calibre ordnance,"[Document(id=a376f05e7a6d61c6348c9a2d82a6646f62cd3d7afbcdeec823b97dcb5aa92bf2, content: ' Their needs could cogently be met with smaller-calibre ordnance beyond using the usual singly-mount...', meta: {'name': 'Anti-aircraft_warfare', 'source_id': 'c54639424f62ce369356ed8c4e1e9fab8769359dd28029aa89832f5aa994f45d', 'page_number': 1}, score: 0.40298211471750184)]",1.0,0.10714080184698105,1.0,1.0,1.0 +What is that power that enables us to love?,Prevenient grace,mercy of God and His care for His creation,"[Document(id=ab06859e83076577bc2a04088a7525ddee1f44dedfa9ffcf46872843c8c86632, content: ' They neither expected nor sought the appearance of the stigmata or any other physical manifestation...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 0.354906414247829)]",0.0,0.4893345832824707,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,Caesar,"[Document(id=dd09780a89c3259aae7f10ab2070b88e3687ba1267d0d7b037e76509ccd66648, content: ' Thus, the assemblies became powerless and were unable to oppose him. +Caesar was assassinated on Mar...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.4209174078831871)]",0.0,0.2528122663497925,0.0,0.0,0.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,sharia,"[Document(id=4d48682f805f7a385767a5c2825f3fc2119d2b698a7fc9592571f5590731bae6, content: 'Muammar Muhammad Abu Minyar al-Gaddafi (Arabic: معمر محمد أبو منيار القذافي‎ Arabic pronunciation: [...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.5289569940133785)]",1.0,0.6216318011283875,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",House of Mountbatten,"[Document(id=e315f255311d7dc4c756937a51f6602a99f1b8e3b0a145306a3d8c41c258bc57, content: ' She was educated privately at home. Her father acceded to the throne on the abdication of his broth...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5852761260940276)]",1.0,0.7063884735107422,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,UN intervention and US pressure forced Britain into a humiliating withdrawal of its forces,"[Document(id=542ad1aa0db336cf87e62cb8bfc961fa673941a90d497289bb4bdd2686c79d00, content: ' Another of Eisenhower's concerns was the possibility of a wider war with the Soviet Union after it ...', meta: {'name': 'British_Empire', 'source_id': '8f673e96ef153333a8fc1d3855be60f5be1829316ed1fcb58dd8590fdb083946', 'page_number': 1}, score: 0.5022001357807013)]",0.0,0.13994494080543518,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,2.7%,"[Document(id=95e1463ec5cbd41e6746a2396eee9d7fb5ba52847c057866e7e7fd5a7f376d91, content: ' The racial makeup of the city was 69.7% White (down from 94.8% in 1970), 5.0% Black or African-Amer...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.5245243631758744)]",1.0,0.6450710296630859,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=9a450124afc12f193f72ae1f8310b72ada7cbbf8f34bbd71f66eb32b0f47efbc, content: ' With the development of the modern evolutionary synthesis in the 1930s and 1940s, Darwin's concept ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.6164412846430873)]",1.0,1.000000238418579,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,a database administrator,"[Document(id=a3a5fb3ed16579bb6ea58603baf5e6a8ca709ead98470e3f1da0f97c43462650, content: ' When this state is needed, i.e., when it is decided by a database administrator to bring the databa...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.34742409259180107)]",1.0,0.9447140693664551,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Aristotle,"[Document(id=95bf4d41e9b5638e0c3ed64ddc4819df9e92a84aa145fcb6891ada977e4a1739, content: ' Avicenna discussed the issue of a proper methodology for scientific inquiry and the question of ""Ho...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.5492992507068948)]",1.0,0.2543175518512726,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=391f8278a6d664b0fda2a91e05a75fcb178e560aca2778cf3383f74470691074, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.562789144621481)]",1.0,0.07690561562776566,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,Roman cursive,"[Document(id=9b07ee8006e937b1f3fc72c78c329158c8ab48962e9e1ef4c8d375c50cf7a30b, content: ' Scripts using two separate cases are also called bicameral scripts. Many other writing systems make...', meta: {'name': 'Letter_case', 'source_id': 'f24785f407e55c52408fa13b0c7fd7fc6529cb973898992f6e98d5d28ea82d5a', 'page_number': 1}, score: 0.5399797841084327)]",0.0,0.06087443605065346,0.0,0.0,0.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=b281d8f2fa5c72bf9176af2f66fbd72725ba99c1c3e8e2e12044e694c6ee3bbe, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.5025447478163751)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,International Solar Energy Society,"[Document(id=c793eed6f7e60651d2a76fcaf9711f0a39fe3f3343f81bd4df136c41ff320e80, content: ' +Lester Brown has argued that ""a world facing the prospect of economically disruptive climate change...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 0.4503806030703587)]",1.0,0.06405393034219742,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=d4ddb3955cf6ff9d6bed687f54220f695fef0951972c64309938d9cf7670f6ba, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.7250091339820998)]",1.0,0.9999998807907104,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,1893,"[Document(id=a7c55da50ca7f4964477eed8dc6fabc1c1878861bbc6edc9facb24308d77a9f2, content: ' It is at the École Biblique that the famed Jerusalem Bible (both editions) was prepared. +Doctrinal ...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 0.4189815124537153)]",0.0,0.1781020313501358,0.0,0.0,0.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=dccc646845ca49e7358315767a80e7f3cbcc38b5a971e1af1ab775ad80542dee, content: ' Contemporary critics have noted the consistent minimalistic aesthetic exhibited throughout DONDA cr...', meta: {'name': 'Kanye_West', 'source_id': '4693bb2ae0f5b7c5ed3788607bba35813f42eada13da446f4c397c0715d2dc91', 'page_number': 1}, score: 0.5964939320176397)]",1.0,1.0,1.0,1.0,1.0 +How much was each copper token worth?,a halfpenny,"15,000 t","[Document(id=98b439fa147a97e5eae30c05b00e8ced8af2688f70d603a4a19bd0acee1fb227, content: ' Electroplating commonly uses copper as a base for other metals such as nickel. +Copper has been in u...', meta: {'name': 'Copper', 'source_id': 'de5c60a665b93f9b358be412d53a37ca43d90005e3ec9a553ac38a38f076fadc', 'page_number': 1}, score: 0.4860156794563682)]",0.0,0.08125006407499313,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,Glencoe and Sea Nymph,"[Document(id=c450c10c9cc7ec2362a5a8c23142549f03d77146379f9bb96ab788f8c91887e3, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5428866259942204)]",1.0,1.0,1.0,1.0,1.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Tikitherium,"[Document(id=e46342d518bff02018f8d43556b540641d63dba71e0bc814f0993e18d70b0dba, content: ' +The early synapsid mammalian ancestors were sphenacodont pelycosaurs, a group that produced the non...', meta: {'name': 'Mammal', 'source_id': 'ce12abe98969c8fca9c93088950b36e820ec47c4c63f339bc203185f79f0242d', 'page_number': 1}, score: 0.5665906290089553)]",0.0,0.29620876908302307,0.0,0.0,0.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,modern evolutionary synthesis,"[Document(id=7ff269861e7df34fcebf71c4907ef4694447d099e228c45550f4e2387812c2fd, content: ' This dogma has since been shown to have exceptions, such as reverse transcription in retroviruses. ...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5467572858377108)]",1.0,0.9789240956306458,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,"8:10 p.m., at Delmonico's restaurant and later the Waldorf-Astoria Hotel. Tesla would telephone his dinner order to the headwaiter, who also could be the only one to serve him. ""The meal was required to be ready at eight o'clock","[Document(id=bf93e5ca5e1091757426fa6d168a7b90f8c1d9dad833880ffcb9b06d8ad2b0f7, content: ' There are a minimum of 278 patents issued to Tesla in 26 countries that have been accounted for. Ma...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6614342019294968)]",1.0,0.3650366961956024,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1444,"[Document(id=f693c6f2598c5f0324deafec242b67b3598e0051d9b177df27f75a4c46de16d7, content: ' +Holy Roman Empire central power had begun to decline following years of imperial adventures in Ital...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.6132608572607257)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,two censors,"[Document(id=ce88f4b2acc2cd7986f0b63373b8505a79a6c1ae6aa86d23bd2b76d546f82d61, content: ' Consuls had supreme power in both civil and military matters. While in the city of Rome, the consul...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5303996384286787)]",1.0,0.9999998807907104,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=1e41339b2f87d6ffb45a6e6882ca2d5cec93b2ada663205b1745d23021ff3c6d, content: ' The planned closing ceremony at Justin Herman Plaza was cancelled and instead, a ceremony was held ...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.45183706257798184)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,John XXIII,"[Document(id=91c9241e5089d7599cdf7e8e77417dd04317346da71a346d843fc01cf571885f, content: ' After the long pontificate of Pope Pius XII, the cardinals chose a man who – it was presumed becaus...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 0.5341809700581116)]",0.0,0.4223441183567047,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=61583a75fdba243f6a812f3bf73c41e686e4f8374db81e4dd5f1a4d5a4430336, content: ' +Non-Australian citizens who are Australian permanent residents should be aware that during their st...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.6661485563876728)]",1.0,0.9866578578948975,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",Rome was forced to effectively ignore its long-standing principle that its soldiers must be both citizens and property owners,"[Document(id=e9b65cb0ed0d083f9f18e9d8a272362d48c238c27f4ed0b8150f2dec64d25a52, content: ' In the 4th century BC, the Romans abandoned the phalanx in favour of the more flexible manipular fo...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.47970363136972916)]",1.0,0.11697272956371307,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Nuri al-Said,"[Document(id=c355e0f20036a60ada1e4711a62b4d11adebf07dce76a808203fdd588cc1168e, content: '"" Nasser ordered a crackdown against Syrian communists, dismissing many of them from their governmen...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.5839372061580861)]",1.0,0.3125278949737549,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,his clan,"[Document(id=2d32fb9800c9d78fd442f6428cf6f5fb797d26f09a98fe2e6bd0bbee11ab4e6e, content: ' Uthman placed some of the trusted members of his clan at prominent and strong positions throughout ...', meta: {'name': 'Umayyad_Caliphate', 'source_id': '704c4038f63f9c5a686218060cb31639c0333dda34e87e804be75e5fec05ec53', 'page_number': 1}, score: 0.36949795109946626)]",0.0,0.07835237681865692,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=0869972e54922afd3497ba60a5f3e981d9c0ab240629036531512fe359bc4da8, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.5524063253173699)]",1.0,0.5505511164665222,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston–North Charleston–Summerville Metropolitan Statistical Area,"[Document(id=a00baea2441c87e38109afe86358b94ae591d905c9ff5c35c440986bd364ee66, content: 'Charleston is the oldest and second-largest city in the U.S. state of South Carolina, the county sea...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5246972383754526)]",1.0,0.3331555128097534,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=7b2e31c3f54b0c1a9b97db27512199c167f56012decb955c5fb2ad06a110d92e, content: 'Frédéric François Chopin (/ˈʃoʊpæn/; French pronunciation: ​[fʁe.de.ʁik fʁɑ̃.swa ʃɔ.pɛ̃]; 22 Februar...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.6606191519489985)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,"a minute particle would break off the cathode, pass out of the tube, and physically strike him","[Document(id=573fea08f788152dd4af1fc1bf1634092e04764cca438d0c4a0aaaeb5bbd0ee6, content: ' He believed early on that damage to the skin was not caused by the Roentgen rays, but by the ozone ...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6370188047194767)]",1.0,0.21631601452827454,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,cordon sanitarian,"[Document(id=da081545e8e8020b03087fbbcde823b131a8ffeb227970cc6962ac2b8197ec87, content: ' When a party achieves enough seats in an election to form a majority, the party's frontbench become...', meta: {'name': 'Political_party', 'source_id': '476a9215de81b75a4f17737eed1679570973e55b5ed6175aa0ec1014b09d4632', 'page_number': 1}, score: 0.44697301723543903)]",0.0,0.2345580756664276,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Stationers' Hall,"[Document(id=8efeacba5808472fcf5765653cdacd88f38b8ca6c851464247d3804997f7fc32, content: ' His estimate that writing his ""big book"" would take five years was optimistic. +On the Origin of Spe...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.4229277754222375)]",0.0,0.28779083490371704,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,systematic framework,"[Document(id=d76927397b700e2de75c7cb61d1ac04b6def03542f443a57cbea19a53d750a49, content: ' Without tangible notions of distance and size for correlation, sight can tell us next to nothing ab...', meta: {'name': 'Philosophy_of_space_and_time', 'source_id': '5578d6ef93e9e6f246ec225cb481a7decb69e22faa45aa7889ab826e1e2d4028', 'page_number': 1}, score: 0.4423514453852574)]",0.0,0.28883427381515503,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",placed a corps of his own troops under Frederick's command,"[Document(id=d7342a0e3021ae2d3bba679cf10ecb72ddc69518be33b380ee6b9e8f5e4d03c9, content: ' +On the eastern front, progress was very slow. The Russian army was heavily dependent upon its main ...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.7072835413098615)]",1.0,0.28195881843566895,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=cea0f1307bbe685005f9642d975856221cf9a5dd6f18a7b87c780156408770d1, content: '” +On April 23, 2008, Dell announced the closure of one of its biggest Canadian call-centers in Kanat...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.659275856771866)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,public religion,"[Document(id=b80b0d7fa56110987db7fc274c40400c9afab47b60f93bd3980be6d29191c35f, content: 'The priesthoods of public religion were held by members of the elite classes. There was no principle...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5459488212709411)]",1.0,0.19829890131950378,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=73c809a12535348ffed165eff42c189f3aa95c6ed455c42b5e7932cd44167a8d, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.6349932241694249)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=b562ec53d471dfbb3ba9b12247b16c622039308f7ec5b5b5344127cfeb67e87d, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6552354556870785)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Milan,"[Document(id=423e38d8a6d6af214d9084d6c9c45752923b4fc73735c2d71dfaf17e2be72ac8, content: ' He sent him, indeed, five hundred pieces of gold ""to bear his charges"" but Liberius refused them, s...', meta: {'name': 'Athanasius_of_Alexandria', 'source_id': '2a8e7ec0a44075f1a567744982dcecf4bcea19f0065d4f73c742398a05fa1642', 'page_number': 1}, score: 0.5515330125237783)]",0.0,0.5594199895858765,0.0,0.0,0.0 +In what decade was disco popular?,1970s,1970s,"[Document(id=2a3289df636bba9b6194e462e8116e6249c5ae5ad18c639ded7e73b593b09e4a, content: ' Other performing arts venues in Miami include the Gusman Center for the Performing Arts, Coconut Gr...', meta: {'name': 'Miami', 'source_id': '39ab1ce422fd3baeedbb69a9a90b4c0e43c513f6161698478a96f09e5d9f7285', 'page_number': 1}, score: 0.5362407528966322)]",1.0,1.0,1.0,1.0,1.0 +What do the rivers form?,lakes,Nieuwe Maas,"[Document(id=c76f28ed2994796f7f3a5aeea9229958f03f574059e71701fc815116b1cba0e0, content: ' Since the Rhine contributes most of the water, the shorter term Rhine Delta is commonly used. Howev...', meta: {'name': 'Rhine', 'source_id': '23686e48a61504679a91dac10d83b287aef3dff7dabc0ab881c281059187ce6a', 'page_number': 1}, score: 0.47601402875176224)]",0.0,0.16586220264434814,0.0,0.0,0.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,ISO-8859-1,"[Document(id=e5a1e75e17e8253dc7c01b0468f9e5c5d8a150581ae96fc8d4a0f6136ae9d79a, content: 'Unicode is a computing industry standard for the consistent encoding, representation, and handling o...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.653812371157898)]",1.0,1.0,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,three,"[Document(id=ac63fedfc16f471fee9ece91ef2b1a184be66b48588208bdae014bc4b8fea7b8, content: ' Beyoncé was the third woman to have writing credits on three number one songs (""Irreplaceable"", ""Gr...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.48296640826682014)]",1.0,0.6790156364440918,1.0,1.0,1.0 +How many Examination Boards exist in India?,30,NIPER) and National Institute of Nutrition (NIN). Hyderabad has five,"[Document(id=2a60820f19599e70a11ffdf68cff487cf35e2e6c1fa7acfbb591eec5701ddc2b, content: ' Depending on the institution, students are required to sit the Secondary School Certificate or the ...', meta: {'name': 'Hyderabad', 'source_id': 'e032b0dc6babeab40ef746299b8fdda8c0f0f12976c0b900e8ed7ca60ce1802d', 'page_number': 1}, score: 0.4533602356519226)]",0.0,-0.009233260527253151,0.0,0.0,0.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=ad70291d4dec497bcd48d02a2bfbb4780dffd558cf0eee2d7c976f107f401335, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.5247203837647456)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,"a touch of humour, gentleness and understanding"".","[Document(id=0a24a2ae13af1d4669b8a2f74791d8b6c9fcf28ba3103552f6b7f5c81e2ec8b3, content: ' Patriation removed the role of the British parliament from the Canadian constitution, but the monar...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.464836191618723)]",1.0,0.1287117451429367,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=faa03729332a5c07c1c9d6d87747d15af8b3043f87a9cff7389d9a1c23d97c7c, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.6398172942259656)]",1.0,1.0,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,middle of the 10th century,"[Document(id=020b06606f9be0ef225d06a65fe75136a36d3c27c244908b82a80ac5c2785da3, content: ' 871–899) came to an agreement with the Viking invaders in the late 9th century, resulting in Danish...', meta: {'name': 'Middle_Ages', 'source_id': 'f13b345e20c54b6cc55363080d7492474df62715717172aa9f1029f286010ad9', 'page_number': 1}, score: 0.49354982401776937)]",1.0,-0.022137032821774483,1.0,1.0,1.0 +What two methods can be used when using Load Testing?,data or a large number of users,large quantities of data or a large number of users,"[Document(id=dd704f4bc758ab475db02844a8a24263dd4fdb32afbc7f7dc86b810f8fa62e4f, content: ' +Load testing is primarily concerned with testing that the system can continue to operate under a sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.6185313518586624)]",1.0,0.9246095418930054,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,Title II (common carrier) of the Communications Act of 1934,"[Document(id=7aba2d10978318bc5abba467ce5b46b678a83c63b04c2770eb6bfdb5855d1dce, content: ' On 16 January 2015, Republicans presented legislation, in the form of a U.S. Congress H.R. discussi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.5236994820665182)]",1.0,0.3210926949977875,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,land forces,"[Document(id=2e4b3b475a65a6397953a11388e7fa166873ff680f98b6e7464deb399963430f, content: ' +In the US Air Force, the distinction between bombers, fighters that are actually fighter-bombers, a...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.4276380085436715)]",1.0,0.641464352607727,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1854,"[Document(id=32391672352be24938ffc4c30ab31229375147f89f81aa7ba80cc10250a1ac05, content: ' Aside from a minor skirmish at Constanța there was little for the allies to do. Karl Marx quipped t...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.5119894655681422)]",0.0,0.42288899421691895,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,Private and personal worship,"[Document(id=28a81868c1313d070db0571acd383d74fc733b1b8a6ab4356ba3f8f9fcf74d1e, content: ' In the campus, the youth assembled to play and exercise, which included jumping, wrestling, boxing ...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.6164710395064059)]",0.0,0.2722780108451843,0.0,0.0,0.0 +What is one example of failure testing?,Software fault injection,"defects in dead code will never result in failures. A defect can turn into a failure when the environment is changed. Examples of these changes in environment include the software being run on a new computer hardware platform, alterations in source data, or interacting with different software","[Document(id=f10f419092a1f6acf685021bef11a7a6ebef797bdd44e3997bd6b4a635aa2a89, content: ' Not all defects will necessarily result in failures. For example, defects in dead code will never r...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5932129675513269)]",1.0,0.5290452837944031,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,home-grown players,"[Document(id=0d02d0727bbf3d538faf4d0e4b493fb1a575377aa15b678ef4c8a1fa1646ad91, content: ' By 2009, under 40% of the players in the Premier League were English. +In response to concerns that ...', meta: {'name': 'Premier_League', 'source_id': '369baa9a5a854704f16fadd35144d3a3150cf1e9ae1671d9b50a3448547b814b', 'page_number': 1}, score: 0.48538441845429137)]",0.0,0.250182181596756,0.0,0.0,0.0 +In which year was the new constitution ratified?,1855,1884 that produced a constitution ratified 3:1 by Montana citizens in November 1884,"[Document(id=10085273dcf2e8add6fd060b7e5ee171cced6c2e779468105fa3f236158226ca, content: ' Surveys in 1874, 1875 and 1876 helped spark the Great Sioux War of 1876. The transcontinental NPR w...', meta: {'name': 'Montana', 'source_id': '556dc408ae9a8cf251c8082c455043f3c354c7457c6fe7793c307babf666b651', 'page_number': 1}, score: 0.485443964150987)]",0.0,0.2675617039203644,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Naiad,"[Document(id=c450c10c9cc7ec2362a5a8c23142549f03d77146379f9bb96ab788f8c91887e3, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5314080714125682)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,1776,"[Document(id=f5827b618d21c5fdd0fdfea0fa921e34ade55b836359a5c79591189c457876ab, content: ' Yale's 1950s-era Ingalls Rink, designed by Eero Saarinen, was included on the America's Favorite Ar...', meta: {'name': 'New_Haven,_Connecticut', 'source_id': 'ea0cc34412d3cbaf6fdb294292fb788f6c16177b7940bdfbbbcfc2ad46cf157e', 'page_number': 1}, score: 0.49414859714086073)]",0.0,0.3020399212837219,0.0,0.0,0.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,moderate levels,"[Document(id=073df342a989619b1eec131e4197dffb009cfe81b1f8022d1a8cd8b13985d1db, content: ' The Human Genome Project states ""People who have lived in the same geographic region for many gener...', meta: {'name': 'Race_(human_categorization)', 'source_id': '1232f7fe18d6e2adb87627ce2042b468cfa3fb9ba0b6afb2165c345f440b5339', 'page_number': 1}, score: 0.5611322350404006)]",0.0,0.14990274608135223,0.0,0.0,0.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=d5a42bacf8a08d601a973e5bad25b36c41015f76de0d512f9c3130d5e87ae4de, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.584297128771873)]",1.0,1.0,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,Ilion,"[Document(id=bfdb2859c4aba7b42fa442a55f6ab1b3ece70f89cdc51e7d48df4e96e7091e95, content: ' +As god of colonization, Apollo gave oracular guidance on colonies, especially during the height of ...', meta: {'name': 'Apollo', 'source_id': '2eaf6f8c7cfd741cc2ece4f65e91b75ad204489bacd14efebc4697a9395dbb00', 'page_number': 1}, score: 0.5127319887009952)]",0.0,0.06766562163829803,0.0,0.0,0.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,61,"[Document(id=d56b5cb42c58714fa0207d30d40b2901fd597738eab19c08de33e3edca922bd1, content: ' Kelly succeeded Charlie Weis, who was fired in November 2009 after five seasons. Although Weis led ...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.47814885276980346)]",1.0,0.3738129138946533,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,24 October,"[Document(id=67d402d488ad87fcda2d30f304cebc3d3ab52520c994807e5b06e892c3a0e1e1, content: ' The carriers had only 108 planes. The force was not spotted by the Allies until 16:40 on 24 October...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.694417436054523)]",1.0,0.24101078510284424,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=9c5e4e5ebe04f6d60e8e13bc3cfe5cd0c8fa7c41c1148c916c472a26ca7c677b, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.5942357834131707)]",1.0,1.0,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,imperfective,"[Document(id=b086dbc67b80e6f26b1e823c6ec440f8f645661a3c528184598912bb2cd8d10b, content: ' On occasion of the publication’s 45th anniversary, the Croatian weekly journal Forum published the ...', meta: {'name': 'Serbo-Croatian', 'source_id': '2cc55814bc801258e8300ed82e003428dbf11715c74ad99297d61c5dd77023f4', 'page_number': 1}, score: 0.43658414340153945)]",1.0,1.0,1.0,1.0,1.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=25a9235e2187510deb110e2fe84f6e0f7a2dbacbbd0bd491e44e8a83b3be8cbf, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.6185783377431922)]",1.0,0.8350290656089783,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,excited form,"[Document(id=f649524cdd28fbe2ba5a3cadae832b66d8fef791e1b4613b5d6705e7ca2b49a9, content: ' As a plasma, hydrogen's electron and proton are not bound together, resulting in very high electric...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6261915857181519)]",1.0,1.0,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,All male children in Buddhist families are encouraged to be a novice (beginner for Buddhism) before the age of twenty and to be a monk after the age of twenty,"[Document(id=24b43e7abfd404fae93d249c1ea9b3f3291572fa9a977ea3adc12146d7d194a0, content: ' The arts, particularly literature, have historically been influenced by the local form of Theravada...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.5226686003616492)]",1.0,0.32406648993492126,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1930s,"[Document(id=db4ceb7a496a06e0d9cc63e221e6c8d2205d64c499149de6aa8c41e851b9cd5a, content: '3 billion in 2015. +Arsenal Football Club was formed as Dial Square in 1886 by workers at the Royal A...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5057590998140661)]",1.0,0.5979937314987183,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,The 1850s and 1860s,"[Document(id=59b11e1f477d6345b55d7c631b6690f8efcc7e8aabb31697c94914dd9e534a7c, content: ' On 30 August 1835 the party disembarked and established a settlement at the site of the current Mel...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.5643502397335578)]",1.0,0.533630907535553,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:5.csv new file mode 100644 index 0000000..cca4578 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:2__chunk_size:5.csv @@ -0,0 +1,123 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=99729d5c09ec68a18e6cb9e326f59027b5c69bc74e05e8c8c0d825360370bf5e, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5457362610814793)]",1.0,1.0,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,February 1918,"[Document(id=0ef8f8c1cfa5365560e9c0600ddc6c5562db768c0ffefb5be0810d049e58a20c, content: 'S. entered World War I he immediately requested an overseas assignment but was again denied and then...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.3619775106609302)]",0.0,0.3045777678489685,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Coventry,"[Document(id=8fc950b88e17fb84bfb26ca1cf8567786d02b1c73face329b95c2f7c7e1dda1b, content: ' ""Pathfinders"" from 12 Kampfgruppe 100 (Bomb Group 100 or KGr 100) led 437 bombers from KG 1, KG 3, ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5286291667203107)]",1.0,0.4347962439060211,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=e5cb80c1093f573ce5c1d255322723cb12056865ea80e45b815e3467be2f33fd, content: ' It maintains several distinct cases: the nominative, accusative, genitive, dative and (vestigially)...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.5479583834144631)]",1.0,0.9999998807907104,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=f628cb30d18c1374b4351a233133ea8649eb78c607439c749e11f99f56c9bbac, content: ' This guidance is generally offered during a ceremony called the patriarchal blessing. +In Hinduism t...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.4727434670791851)]",1.0,0.20152541995048523,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=927ee26533dc7f17be50a4cb7fc74bed43a4bc44825d916333e6dd9c5d18c958, content: ' The Greek league, the A1 Ethniki, is considered one of the top volleyball leagues in Europe and the...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6800230578684324)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,four,"[Document(id=52a0fae35f124062c7009aeb57cfa6fb15f201644550104b1c1a83c6f0449b3a, content: ' This season's judging panel consisted of Randy Jackson, along with Mariah Carey, Keith Urban and Ni...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.6542733508503982)]",1.0,0.9999999403953552,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=05e7c56a6da1ed703309ec6f627bc69c369ec51519a9e5eb8c442673c62365e4, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.7197798569813239)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=4437c778daec446c4362623ed3261e5ec4170314f4aa14f8381b8c7ee6eef3fe, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.7362191709321027)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,two urban districts,"[Document(id=a102b81b2963aaefdf1a83d1a1d96870be10e96bb78da447e6f4a0220bbf260f, content: ' The equivalent of the Minister-President is the Senatspräsident (President of the Senate) in Bremen...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 0.5987085330383715)]",1.0,0.23566488921642303,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,"Unicode are rarely used Kanji or Chinese characters, many of which are part of personal and place names, making them rarely used, but much more essential than envisioned in the original architecture of Unicode","[Document(id=342c330a27b48d875c7faab6489e31c23f8175b42e50bc55435524888ab8e70f, content: 'g., Egyptian Hieroglyphs) and thousands of rarely used or obsolete characters that had not been anti...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.5761358435107942)]",0.0,0.01901988312602043,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,federal,"[Document(id=be1e7e4a69e49687682a8ca0c606792dd44baba4b9d0f7e7871524099ae07fba, content: ' The second-largest mass execution was also a hanging: the execution of 13 African-American soldiers...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.5800851716370473)]",1.0,1.0,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,rank amateur,"[Document(id=b0360cb0e79aa53fce9b6b987065d38127ef8c7392ded59d294e940b3fc8c15b, content: ' For Whitehead, education should be the exact opposite of the multidisciplinary, value-free school m...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.6416938602715427)]",1.0,1.0,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,Geithner,"[Document(id=72219d05f3a5eb2742c24339f535f703a1e2eab90cf567e58e7fabed15a9bd26, content: ' Geithner. Future minting of such coins will be made solely for collectors. +The Constitution provide...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.3853987952901218)]",1.0,0.30465614795684814,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,nearly all U.S,"[Document(id=8a2739e7083a8933e34f34b82a0419e65f25241888f20dfc9f57614f1dbbb640, content: ' This reputation stems from the frequency of light precipitation in the fall, winter, and spring. In...', meta: {'name': 'Seattle', 'source_id': '1c1a54ff037a1e964929dc466a4a876fa6bb7609d7737ba7226bcd2ab49385cf', 'page_number': 1}, score: 0.6143793924794404)]",0.0,0.3211246728897095,0.0,0.0,0.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Innate immune defenses,"[Document(id=a18e0058b343b5da0485ed348b299343056dca832078a909b95578e3a8fd7eb7, content: ' Conversely, non-self molecules are those recognized as foreign molecules. One class of non-self mol...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.6306169395074717)]",1.0,0.6429639458656311,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Kris Bryant,"[Document(id=d4ea7f9d753e5097de93cd59bafb975870c755859e3b41b8405196b35d15c608, content: ' Dave Kingman hit a shot in 1979 that hit the third porch roof on the east side of Kenmore, estimate...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.551859834267042)]",1.0,0.4678385853767395,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,Computing,"[Document(id=69167e0ce0c5b24f9a9e3827fc8644256bf41ebe37320544c994fc892aac55b6, content: ' In terms of specific course salaries, the Sunday Times ranked Computing graduates from Imperial as ...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 0.48353147038749955)]",1.0,0.6075230836868286,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1957,"[Document(id=e20106e1401ce2522cd3d14d13fae9ea782170d84e4c3db914247513b098ce09, content: '.. a discrimination in the expenditure of those funds"". +Eisenhower told District of Columbia officia...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.6884836139820915)]",1.0,0.703947901725769,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=d89cdeedbc8441f4e2c52a3575e8c530fcce225561404b37c7cdd108d1cc549f, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.7343823718020364)]",1.0,0.8085941672325134,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,lower levels of growth,"[Document(id=e62fe349ee4e73a3572e9f63b19d92af4d05f2e4ba5ba37814049ed91dfeb15b, content: '"" +In 1993, Galor and Zeira showed that inequality in the presence of credit market imperfections has...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.4799174471270644)]",0.0,0.13175535202026367,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=720eb3795a623e31335108bcc29a4b774e33272bb625d5968983846dd3b35f68, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.7553394085608023)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=cf59c0f0619e715b979051e0a84530bf73463713fa11f2961866d233ac2537f5, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.6271917279684331)]",1.0,1.0,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,East Slavs,"[Document(id=701344efe2101165a535fe45e922b01123011116f4f123fcd5b1668edbb29898, content: 'Slavs are the largest Indo-European ethno-linguistic group in Europe. They inhabit Central Europe, E...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.6309663423432798)]",1.0,0.3395293653011322,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.",four summaries,"[Document(id=f86f4f0c6b96ee3b686893cc41683ca450066b225884d3390504ef3124153018, content: ' +The SASO consists of 12 questions. Six of these questions assess sexual attraction, four assess sex...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.5156582100443352)]",1.0,0.5629022717475891,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,literate and upper class,"[Document(id=d25b410d7ee734d39ca37d5429e9393300e12f25ab3ed13f9a91eb5740bf305f, content: ' Due to Eastern Lombard's status as a Gallo-Italic language, an Eastern Lombard speaker may, in fact...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 0.6070526656029314)]",1.0,0.7113619446754456,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,smaller-calibre ordnance,"[Document(id=0578917295f43992a211ded11b78bf62d55a950a27b45fde24ab1eabb2df79e0, content: ' Their needs could cogently be met with smaller-calibre ordnance beyond using the usual singly-mount...', meta: {'name': 'Anti-aircraft_warfare', 'source_id': 'c54639424f62ce369356ed8c4e1e9fab8769359dd28029aa89832f5aa994f45d', 'page_number': 1}, score: 0.4206405801057249)]",1.0,0.10714080184698105,1.0,1.0,1.0 +What is that power that enables us to love?,Prevenient grace,universal love,"[Document(id=a78f83359689c5324d38fe66319b2e022231f3eea49709cfa6adc4ff161463f3, content: ' Love led to spiritual growth that, in turn, reflected an increase in love for God and humanity. Thi...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 0.4322410896201257)]",0.0,0.33382272720336914,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,Caesar,"[Document(id=9251dafda92232db54143d3e1f9a589eeb4b79fd5873fc0b101379c7cfe3f5ba, content: ' Thus, the assemblies became powerless and were unable to oppose him. +Caesar was assassinated on Mar...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.4975724389201519)]",0.0,0.25281232595443726,0.0,0.0,0.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,introduced sharia,"[Document(id=bdc01f662020adb880737d79c53bc6a192c293e64aaacad9c0bca0ebff7ff92d, content: ' Intent on pushing Libya towards ""Islamic socialism"", he introduced sharia as the basis for the lega...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.5802873287113512)]",1.0,0.7288561463356018,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",heir presumptive,"[Document(id=d84cede505487aab2b7f8ca81ca6dc2ec7e647562d9fcef338a4a7bc8b2c2e2b, content: ' She was educated privately at home. Her father acceded to the throne on the abdication of his broth...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.6316186438786011)]",1.0,0.1620749980211258,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,UN intervention and US pressure forced Britain into a humiliating withdrawal of its forces,"[Document(id=d09ab756c06e4e2126fc0655780e94923b99c11b88bd24e4b791726d7b43a296, content: ' Another of Eisenhower's concerns was the possibility of a wider war with the Soviet Union after it ...', meta: {'name': 'British_Empire', 'source_id': '8f673e96ef153333a8fc1d3855be60f5be1829316ed1fcb58dd8590fdb083946', 'page_number': 1}, score: 0.5456397414690248)]",0.0,0.13994494080543518,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,5%),"[Document(id=85479ceb6ee7474eee57b17473c24a59db101c6d892e6eb76c94e6a327c1f337, content: '5%) Native American, 96,405 (6.3%) Asian (2.0% Chinese, 1.2% Indian, 0.9% Vietnamese, 0.', meta: {'name': 'Philadelphia', 'source_id': '171d1ade27657b3e3fccd29b91877e3084f29a24e344651181523e2acec7739a', 'page_number': 1}, score: 0.5113436268756754)]",0.0,0.776621401309967,0.0,0.0,0.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,William Paley,"[Document(id=1de1b94fbb49d9d6bbc2cdcf71ef4f81cd3ec13b95fe79865c57979200ac9095, content: ' Lamarck thought there was an inherent progressive tendency driving organisms continuously towards g...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.64077301092669)]",1.0,0.2994629740715027,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,dedicated backup files,"[Document(id=d62f58c8737fe86268de0ebf0cb0ef4eae1f9ba5f33735f7cadb2e83beb9e377, content: 'g., cases when the database is found corrupted due to a software error, or if it has been updated wi...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.5250403765091296)]",1.0,0.12655818462371826,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=a3a017724e8f025ada3a4a92e98993ec44b17e0941134ce7e52db3191cd3ea08, content: ' He learned Indian arithmetic from an Indian greengrocer,ءMahmoud Massahi and he began to learn more...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.665399959042628)]",1.0,1.0,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,With Burma preoccupied by the Chinese threat,"[Document(id=a8730bd7201f8a080d3cffb4ba6d02e7cc82f409fe226dfe6987d18adaeb8596, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.542541483221691)]",1.0,0.7524588704109192,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=8825405ebdc1bb4daf623185ae693fe4f2cab9da4dcca330ccb2ab3f6b05d6e5, content: ' The modern cognates of original words have been used whenever practical to give a close approximati...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.7110396889326562)]",1.0,0.9999999403953552,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=f1fa43fc9c1c5bd46b9f5455701bf99b0e98d4579816d755389bcc2dbba1bcda, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.5088500802288218)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,the Stern Review,"[Document(id=333c4da3ee155b7d7b4ddf91857cd1ec4e1c3cafb4159ebedda18347755b3f1f, content: ' In the US the main tool to track and stimulate voluntary actions is Green-e program managed by Cent...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 0.5063254824004584)]",1.0,0.9567214250564575,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=e8da082f5022b804801ddb2778f16c32226f0ecb59f3ed7f8fcdae17653f162c, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.7250091339820998)]",1.0,0.9999998807907104,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,The Cathar Crusade,"[Document(id=08215551b881c7cefad5bb5baaaddc4d6f9a5802a709d61e32985651e2115c85, content: ' By the 11th century, more organised groups such as the Patarini, the Dulcinians, the Waldensians an...', meta: {'name': 'Heresy', 'source_id': '8ba473262f8508a9c99699bb09cd022993896ece1fed1463a9cf4bd7eb0c3e68', 'page_number': 1}, score: 0.5927295914277724)]",1.0,1.0,1.0,1.0,1.0 +What music service is a huge competitor for Tidal?,Spotify,Aspiro,"[Document(id=ceb53e0be996e6acbe2836b14e1988152098397cb9e1cd2d54a2293b3ca833e7, content: ' ""Creating a partnership with Beyoncé, one of the most hard-working and talented people in the world...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.5982661567904872)]",0.0,0.20396557450294495,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,"1.505 g) in 1837 (16:1 ratio).[citation needed] +Technically, all these coins are still legal tender at face value","[Document(id=8cba8d045af2ea2d04789f21782201aca9ae1bf67c20440df2f6dabfdb2b03d1, content: '22 grains (1.505 g) in 1837 (16:1 ratio).[citation needed] +Technically, all these coins are still le...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.5695327514337405)]",0.0,0.1435888707637787,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,Glencoe and Sea Nymph,"[Document(id=73cc59555437274633bfd424f9225eaa5b55cf814b3bbb82c89b0d9357a659f0, content: ' In 1852 the San Francisco-based ships Glencoe and Sea Nymph were attacked and everyone aboard excep...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5423896434650102)]",1.0,1.0,1.0,1.0,1.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=22e5680a1c41aa000971ff89fccd80f808c8f88ae7d0f5c0d81eb763ce026955, content: ' Unlike Archaeopteryx and the non-avialan feathered dinosaurs, who primarily ate meat, recent studie...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 0.5813249416305144)]",1.0,0.9999998807907104,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,the modern evolutionary synthesis,"[Document(id=54fb8e44f718e454d5f4aef7097036553aa91b829fc2ea08644a1f9fc17e50f5, content: ' +The theories developed in the 1930s and 1940s to integrate molecular genetics with Darwinian evolut...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.6825508197600287)]",1.0,1.0000001192092896,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,eight o'clock,"[Document(id=5dfd5078daafbc2ef3695636e6d70b42db2abc79ac389c20eac0417969714c11, content: ' Tesla would telephone his dinner order to the headwaiter, who also could be the only one to serve h...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.7299236291347593)]",1.0,0.5878220796585083,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1789,"[Document(id=9684be592dc60ece166ecacdd76c443b9c9ab890c4097e28f83c62b1b84e5397, content: ' France did endeavour to promote Catholicism; Strasbourg Cathedral, for example, which had been Luth...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.6567755301901245)]",1.0,0.2011210322380066,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,censors,"[Document(id=3749ef6e14ea9af3893cfd84917e3a39acc2212480fc846e5a9231414a5599e6, content: ' Praetors administered civil law and commanded provincial armies. Every five years, two censors were...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5462632729451639)]",1.0,0.8454219698905945,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=80f5e2e80143434aa6d2d78a16f411f7f6ab4f8ffffbd29914ad0c75e8bf31da, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.5523669903863153)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Angelo Giuseppe Roncalli,"[Document(id=d025eebf35da454623a00c5d71d009d27de02dc1bdc601d3e2b18ee0f2c3a436, content: 'Pope Saint John XXIII (Latin: Ioannes XXIII; Italian: Giovanni XXIII) born Angelo Giuseppe Roncalli,...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 0.6199415732514972)]",0.0,0.4155743420124054,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c983e8f9fccac478cd7d895c53ca7636f4291c3a87693dd68246b03e75a4be42, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.6392372140463647)]",1.0,0.9866578578948975,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=f0adc94ea827dd57e6a4ed0151d80a56b2b699e6457dfd794787f7d9f9121082, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.521156964371783)]",1.0,0.02006305381655693,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Israel,"[Document(id=e3eb8d73377a55465b398afb7b3b16c9d73a20ea7ff944883d6ce9f984b32ec6, content: ' Nasser refused the call upon determination that the air force lacked pilots and Amer's handpicked o...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.5755809019449938)]",1.0,0.28454625606536865,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,Quraysh tribe,"[Document(id=7fe3d60e77356f23e0d8721ed8155f6235831447e86e68263318c414add29bce, content: ' +After the assassination of Uthman in 656, Ali, a member of the Quraysh tribe and the cousin and son...', meta: {'name': 'Umayyad_Caliphate', 'source_id': '704c4038f63f9c5a686218060cb31639c0333dda34e87e804be75e5fec05ec53', 'page_number': 1}, score: 0.37085195047316144)]",0.0,0.030898451805114746,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=f6a12178d43defb2a103deca0f4fb166a5e99e5a8e58be48c76be81bd5d2bd38, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.5458091961847271)]",1.0,0.5505510568618774,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston-North Charleston-Summerville Metropolitan Statistical Area,"[Document(id=d977a4cc7a4d3b3c59cbe399796de677701ac0e1ffb496b4eac87bdcb8e5aefb, content: '4 °F (20 to 22 °C). +The Charleston-North Charleston-Summerville Metropolitan Statistical Area consis...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5197195559087473)]",1.0,0.3598399758338928,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=2dc3b5cfb05bfeb4e2c0b072a5a5e6417b1109588ffcbd88ffd03346c8bc1975, content: ' In France he used the French versions of his given names, and after receiving French citizenship in...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.7137269057215699)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,intercontinental wireless transmission,"[Document(id=afd7fe515c3cf623fa2ada0193ab60872b5b691f58a0bbcb0bac444f28cd4304, content: ' +Tesla went on to pursue his ideas of wireless lighting and electricity distribution in his high-vol...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6777985821727966)]",1.0,0.13340462744235992,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,federalists,"[Document(id=ee3cacc5dc20aac6702b7794c778f638a960315769b9f0cedcb2ca54a8e76a0a, content: ' The federalists, on the other hand, argued that it was impossible to list all the rights, and those...', meta: {'name': 'Federalism', 'source_id': '05eb4479f6579b86eef28e7954d58c2044997504c63cad297935c95307861772', 'page_number': 1}, score: 0.4538650357649971)]",0.0,0.5284585952758789,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,J. B. Lippincott,"[Document(id=d8f8839165552c84949cd4d59c5e9e66f6b03f0cc0abd8a19d08417430c95f58, content: ' Hoping to be published, Lee presented her writing in 1957 to a literary agent recommended by Capote...', meta: {'name': 'To_Kill_a_Mockingbird', 'source_id': 'f1e375014d37d0eb57fcd3e14861d458b3adfe0e71162344652acf38ae9d1bbb', 'page_number': 1}, score: 0.4706522179139646)]",0.0,0.17331847548484802,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,"operational definition of time, wherein one says that observing a certain number of repetitions of one or another standard cyclical event (such as the passage of a free-swinging pendulum) constitutes one standard unit such as the second, is highly useful in the conduct of both advanced experiments and everyday affairs of life. The operational definition leaves aside the question whether there is something called time, apart from the counting activity just mentioned, that flows and that can be measured","[Document(id=849ac9a1750eb7540fb92137ca4256c703db879b81e07bec1f34d0071ef5b30e, content: ' An operational definition of time, wherein one says that observing a certain number of repetitions ...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.5233312924187772)]",0.0,0.17993292212486267,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",He also placed a corps of his own troops under Frederick's command,"[Document(id=ba197620bd7dce47cd5783da779f1718bab82d5ef9f7b4a77e908fd7397ad72a, content: ' Then on 5 January 1762 the Russian Empress Elizabeth died. Her Prussophile successor, Peter III, at...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.726811910218078)]",1.0,0.28628912568092346,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=a3b415fe93ba5b2bfab1307e12a6c3d0b910a94f573fedacc0fe6560757ac075, content: ' In total, Dell announced the ending of about 8,800 jobs in 2007–2008 — 10% of its workforce. +The co...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.6221439247250247)]",1.0,1.0,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,Latin League,"[Document(id=9ca7c0433d9b49e65fceb55924559b4d9540d1428ba52280dd6110b921c10d6e, content: ' +Each of Rome's legendary or semi-legendary kings was associated with one or more religious institut...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5784773472017329)]",1.0,0.2910771667957306,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=10406721e1bacc4b94568efb8105444ce33c23b507417efe2e032c93615c0677, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.7174902149566916)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=694b7ad0ec30f16563c095bfd8aa73a286298746fa8b5b6b568961e806fe7462, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.7181772030878829)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Rome,"[Document(id=39971eda0fac39993fb2ad01159df52ea10276610c72fad19cdc5a8d781c70e0, content: ' The earliest and most credible surviving source for the Hellenistic period is Polybius of Megalopol...', meta: {'name': 'Hellenistic_period', 'source_id': '322682cbb61d31c17e6d21e06723e94efd0ef242a5c9f308888fa14209c4078b', 'page_number': 1}, score: 0.515861487810237)]",1.0,1.0,1.0,1.0,1.0 +In what decade was disco popular?,1970s,1970s,"[Document(id=901a9f1636faf9035626a9559750c7d346d38dbd3d60a92fc29a9aaf68f78dc7, content: ' Other performing arts venues in Miami include the Gusman Center for the Performing Arts, Coconut Gr...', meta: {'name': 'Miami', 'source_id': '39ab1ce422fd3baeedbb69a9a90b4c0e43c513f6161698478a96f09e5d9f7285', 'page_number': 1}, score: 0.5381765557556315)]",1.0,1.0,1.0,1.0,1.0 +What do the rivers form?,lakes,Nieuwe Maas,"[Document(id=2c02265ed1b53d17cbf8d8090f6a3dad995df9f6a8bbe12e06c780368660607e, content: ' The middle flow begins as Nederrijn, then changes into Lek, then joins the Noord, thereby forming N...', meta: {'name': 'Rhine', 'source_id': '23686e48a61504679a91dac10d83b287aef3dff7dabc0ab881c281059187ce6a', 'page_number': 1}, score: 0.5730174843057143)]",0.0,0.16586220264434814,0.0,0.0,0.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,historic scripts,"[Document(id=df3636934a87dcaa1aa37c6146a219c020357b983841dcebcc4edf19d3146c4a, content: ' For example, the ""fullwidth forms"" section of code points encompasses a full Latin alphabet that is...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.7118722829389936)]",1.0,0.08416931331157684,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,20,"[Document(id=7bd8798cd87379643365f7305e27ccc9e3b89266b2ba91439f47a12218f428ba, content: ' It has so many depth and layers that it's easily as self-aware and earnest as Ray of Light."" The al...', meta: {'name': 'Madonna_(entertainer)', 'source_id': '0868d847bb768c30a7507239169d5e441bb3181b66c8b9353a863daa3d58e425', 'page_number': 1}, score: 0.5071568670430149)]",0.0,0.5910412073135376,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,30,"[Document(id=87a93f72cbb9c00ea6477b2a31d21c284a2ecf07733f93b0fb4ba7b277f5b8f5, content: ' It is within the power of both the union government and the state governments to govern schools sin...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 0.5949905096450057)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Book of Exodus,"[Document(id=7494eaecd937f085419fe0fd2756a519eebdab4fc40df8775479885b329c3be9, content: ' But Stephen Eilmann argues that if it is necessary to disobey rules that conflict with morality, we...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.5443074411362279)]",1.0,0.20138104259967804,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,Your Majesty,"[Document(id=6661c5b42cee4d9122296712db8297ce21db50225e9f1d2cad1e056d68ca32c8, content: ' Additional styles include Defender of the Faith and Duke of Lancaster. When in conversation with th...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5861381499007268)]",1.0,0.3193480372428894,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=ff2ec1a3232f7d74c84b198defa77dd9879cdb65474a7dab2bf47da19675285e, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.6258372807721067)]",1.0,1.0,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,871,"[Document(id=6f9dc46d6eac9c736c44b2255083b35d238c93547c3efcd0f85d83a3feef71b2, content: ' The speech of eastern and northern parts of England was subject to strong Old Norse influence due t...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.5426833021943858)]",0.0,1.0,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,load or endurance,"[Document(id=adc4b66570c6ed9d13c88909bbdc957a40b544797d95742201330710a6ff6385, content: ' Stability testing (often referred to as load or endurance testing) checks to see if the software ca...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5186147019911518)]",1.0,0.22238588333129883,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=5f8dc7264853d3b0b88a03420e7878e16fa4c08ef70ab7cc5d722e889b21dda6, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.6102544598734859)]",1.0,1.0,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=87ffdf42e04cd4e5aabc33e35caf9614ba61758e355a1d97a38a524ba0bedf46, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.7377392492190507)]",1.0,1.0,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1853,"[Document(id=5e62f2afbd35341eab44e01cce599dfb08f5845fb82d0fb96bdbebed584d740b, content: ' As far as Britain was concerned, defeats inflicted by Russia on Persia and Turkey demonstrated its ...', meta: {'name': 'British_Empire', 'source_id': '8f673e96ef153333a8fc1d3855be60f5be1829316ed1fcb58dd8590fdb083946', 'page_number': 1}, score: 0.6239564400308288)]",0.0,0.462875097990036,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,a part of daily life,"[Document(id=064c7c3a01bdf93ad9d4d1654337277537a969dc5714be1920555a00340b215e, content: ' Even the most skeptical among Rome's intellectual elite such as Cicero, who was an augur, saw relig...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.6812059913435389)]",1.0,0.26575636863708496,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,Software fault injection,"[Document(id=827178e2fa13b1045cb5486ccca30dd5c2a87aa9e9f903cbdd2808ec1b93a098, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.6034773868609032)]",1.0,1.0,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,goalkeeper,"[Document(id=e6ee12359c832f972207465515e8a9b305bdf92628324bb99b5470cc0b0b74b4, content: ' The same laws are designed to apply to all levels of football, although certain modifications for g...', meta: {'name': 'Association_football', 'source_id': 'f2b7305b09ad8a43b8389a5eb2d84b8ea4b4deaa7968d21fe9da9da9ecda2c6b', 'page_number': 1}, score: 0.56937587493132)]",1.0,1.0,1.0,1.0,1.0 +In which year was the new constitution ratified?,1855,1982,"[Document(id=7d6e1c6a8e83b4b29ce4b75833fc74ff72c585bad0dc7e4ea9a862be027f8038, content: 'C. Statehood Green Party. The nearest this movement ever came to success was in 1978, when Congress ...', meta: {'name': '51st_state', 'source_id': 'aa3341a0a65518fe53b022b57fd32c865f74c249a1b4d32919c38bfd1cead5b7', 'page_number': 1}, score: 0.5897939832060628)]",0.0,0.5184707045555115,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Naiad,"[Document(id=06462bba1bbaf61b1e06ec260c9ebb15ac9f6984188e161293ac729b7950da5d, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5354329878787587)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=1eb83a50e062eec7af6abab3de1bf0b5576a4c75ac168cc64f1b77a2e268c579, content: '7 km2) Yale-Myers Forest in Connecticut's Quiet Corner—and nature preserves including Horse Island. +...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.6826308918386579)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,expressed sequence tags,"[Document(id=c181af6d9e4133e2db3dd3214c28363e87a8f250668a4fc4d9c06fd0060f35eb, content: ' Conversely, plants can have extremely large genomes, with rice containing >46,000 protein-coding ge...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.516086017715844)]",1.0,0.21117305755615234,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=3cdcefdaa320ecb0d100eef0c92063190f1030f98ee702385d5ad087a454afc6, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.6345208523493653)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,rational thought,"[Document(id=14435d1137e579f740d93ef8fabdf50640828e96d9dd732c7b8c6d4932ae6a58, content: '"" The theory was indebted to Aristotle's pluralism and his concepts of Soul, the rational, living as...', meta: {'name': 'Idealism', 'source_id': '55a0bd42a4a737442a2d93fe0926424e236910de01b2406ed43c174f2225ff9d', 'page_number': 1}, score: 0.6069175571756187)]",1.0,1.0,1.0,1.0,1.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,"8,448 undergraduates, 2,138 graduate and professional and 1,593","[Document(id=1fa5bbb9afa8ebfbb9575bd6b27204acdf8f5bcb61e4e7771f62bb0b32428d62, content: ' +In 2014 the Notre Dame student body consisted of 12,179 students, with 8,448 undergraduates, 2,138 ...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.6858562650923519)]",1.0,0.25114506483078003,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,midnight,"[Document(id=8f9a474db1bbc2a41725828917a296999dd03c7fd6c62c0e1adf58a5a2f207f9, content: 'S. Third Fleet was formidable – nine large carriers, eight light carriers, six battleships, 17 cruis...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.6451543648987118)]",1.0,0.23394635319709778,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=4cbbc741471265b8757147f853168935ce795df102abf69b11f058149ff15eff, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.64135190830618)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,imperfective,"[Document(id=06048f854345fb7e2b6f12877040f57cf87279336165b7cb7893e29f87cc9222, content: ' On occasion of the publication’s 45th anniversary, the Croatian weekly journal Forum published the ...', meta: {'name': 'Serbo-Croatian', 'source_id': '2cc55814bc801258e8300ed82e003428dbf11715c74ad99297d61c5dd77023f4', 'page_number': 1}, score: 0.4510094094135552)]",1.0,1.0,1.0,1.0,1.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=158a6b8443733e42d24dec31180770da4724f3bd14ed49e47a937624f7aeed47, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.626275648011194)]",1.0,0.8350290656089783,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,an excited form,"[Document(id=e9b537adae4e88d5a591f33f2f06388da096013764cb94bfc4e3ea25bb2b2e23, content: ' This charged ion has also been observed in the upper atmosphere of the planet Jupiter. The ion is r...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6153574752057729)]",1.0,0.9592359066009521,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,child soldiers,"[Document(id=30c864dfe7ee098b254f1517149763909eae40b70a74f63f84fffd7a77fc3e1f, content: ' According to Samantha Power, a US delegation raised the issue of child soldiers with the government...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.53370525098552)]",1.0,0.6079859733581543,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=1bdd87144e8c05c0c8820e3bd6186deb63a4be13d5810e39af795993e4721bb7, content: ' The book was twice adapted for the cinema – the 1997 British film focuses on Arsenal's 1988–89 titl...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5961240467039464)]",1.0,1.0,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,1880,"[Document(id=2d294433fe05c77c29a474d8dcbb3200d2f3e16b7a3e67f8eb76cb3489bc8045, content: ' +The city is recognised for its mix of modern architecture which intersects with an extensive range ...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.5451667408212613)]",1.0,0.7989562749862671,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:10.csv new file mode 100644 index 0000000..51a18f6 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:10.csv @@ -0,0 +1,120 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=ac8e9c286327aea24d00bceb002b08ad3fbc3007673c77da9b48b6b6b91aa76f, content: ' Some rites of the domus (household) were held in public places but were legally defined as privata ...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5865511627398434)]",1.0,1.0,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,19 days,"[Document(id=8d74e9ba6a272d31f73dca87c354d85d0d499c9607da8ceb17a6cfbf85ded50d, content: ' Meanwhile, daylight advance parties scouted for the next bivouac site. During daylight activity or ...', meta: {'name': 'Korean_War', 'source_id': '4c27266656896ba19208a36b68ea8a5f7c7af5ab92647f478de2aae3811a92f1', 'page_number': 1}, score: 0.3338454254530471)]",0.0,0.10433438420295715,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Coventry,"[Document(id=d8fd97e153fa909813de22390c7c4607821a5041d6350b606b4f95c6304faa9d, content: ' ""Pathfinders"" from 12 Kampfgruppe 100 (Bomb Group 100 or KGr 100) led 437 bombers from KG 1, KG 3, ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5517882669038934)]",1.0,0.4347962737083435,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=081239836dc280856dc2c1ab168f17d8fffffed12cbaeb50b62cd11b58a116b4, content: ' It maintains several distinct cases: the nominative, accusative, genitive, dative and (vestigially)...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.48628358032025165)]",1.0,0.9999998807907104,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=c1b15c9f6474425049e145f56eea6b22790074204f55cd26eb005fca608f63a8, content: ' This guidance is generally offered during a ceremony called the patriarchal blessing. +In Hinduism t...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.47522311411982915)]",1.0,0.20152544975280762,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=c6017a99c04926bab23928d5fed5517ccdf053e42b0a0376e97782a93ab5a6b4, content: ' The Greece men's national water polo team became the third best water polo team in the world in 200...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.5750743919748302)]",1.0,1.0,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=17489faf6fb39343de67cc2bfe448e959559775109bb646aa0e4d02b716ddf77, content: ' Jennifer Lopez and Steven Tyler joined the judging panel in season ten, but both left after two sea...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.6295865021297492)]",1.0,0.8232484459877014,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=1c8a379dc092fa8d374927640e6d16f46286416581451c66a1da80fef1412acc, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.7197352146596256)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=1eed45d4c946ec058384afdd313db9b847d2b384629f1872466e03c7bc13a2e5, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.6574727180609481)]",1.0,1.0,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,Bremen and Bremerhaven,"[Document(id=0427081e9deb9c727f124b818203b3d0cc649efe97ddc37c49cee8cebfa2c3a1, content: 'Germany is a federal republic consisting of sixteen federal states (German: Bundesland, or Land).[a]...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 0.5043829724168043)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,"Since version 3.0, any precomposed characters that can be represented by a combining sequence of already existing characters can no longer be added to the standard in order to preserve interoperability between software using different versions of Unicode","[Document(id=8abb498ed2e51ecc6d89f4e5b9bcdce65b632039dd82bfefb02d5dec25036fbd, content: ' +Modern font technology provides a means to address the practical issue of needing to depict a unifi...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.5751440958199375)]",0.0,0.03272436559200287,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,lower federal courts,"[Document(id=af588e16cba35397799ed402e254a69a9445c63e2da06985a64c8440d5e111ea, content: ' Most often these are claims, such as ineffective assistance of counsel, which requires the court to...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.5401086806576698)]",1.0,0.5716047286987305,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,rank amateur,"[Document(id=a022efa8ed739794e05008dbe44d9b83a87fc4f4b7a609ec096b9d437bc5089e, content: ' For Whitehead, education should be the exact opposite of the multidisciplinary, value-free school m...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.6345955416988933)]",1.0,1.0,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,borrow money on the credit of the United States,"[Document(id=9f240c943520891dcb4853fa0b77bb9da0c6144acd3f67acca4ff043b2823fb2, content: ' +Early releases of the Washington coin included error coins shipped primarily from the Philadelphia ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.40843243316554156)]",1.0,0.019053475931286812,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,sub-Tropical High Pressure Belt,"[Document(id=106db074c0cb96eb9dae93492a807c47bc87218f2ec096f0b6b51dba419575fc, content: ' +Typically the sub-Tropical High Pressure Belt, with frequent clear skies, provides more than 300 da...', meta: {'name': 'Namibia', 'source_id': 'b9f689c4f90121083289631dd033d301a051af9eb1e03125f1200942937a6318', 'page_number': 1}, score: 0.5784191859462218)]",0.0,0.19846832752227783,0.0,0.0,0.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Antigen specificity,"[Document(id=a4a5008fab761323a2771c50538a7e75c9cf4ed6426bbef3a6ae08b7edd123e4, content: ' The adaptive immune response is antigen-specific and requires the recognition of specific ""non-self...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.53879306451721)]",1.0,0.4682602882385254,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=bde5f6c2b4913151f9e46d6578f4f20345ade116f2e96b5ae76ef509e1e517dd, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.5515789425362991)]",1.0,1.0,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,Computing,"[Document(id=9be1bf507c2859b61088d1f83c172682620003fe33104d080a9b6b3a2256570a, content: ' Students are encouraged to take these classes either for credit or in their own time, and in some d...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 0.5930303365803692)]",1.0,0.6075230836868286,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1948,"[Document(id=5b7f53ebfe499268644e98a8173fdea4c95a0c4cc82319e3054edd1dcefbeb53, content: '"" The report called for an end to ""all discrimination and segregation based on race, color, creed, o...', meta: {'name': 'Affirmative_action_in_the_United_States', 'source_id': 'b7156a1660bb591e3abb4460028a7922c063ba8962ad6dce11678f8b08c5cdcb', 'page_number': 1}, score: 0.503787902767582)]",0.0,0.6602938175201416,0.0,0.0,0.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=891cab83df456a1bb11d8e7c301e45b02e564d224d3be1f7bdb82d02edc8f221, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.7160253770681686)]",1.0,0.8085941672325134,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,inequality,"[Document(id=0088e02d1bc9f26df2c1dbc19e8cba8d1c8b418df18bf8e8cfeb9bdce1034e21, content: ' Thomas Sowell, in his book Knowledge and Decisions, claims that this difference is due to women not...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.4178487217478476)]",0.0,0.07081164419651031,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=56ff4bdb4f543c61c1f516ba1e544e83e117ab5512bae58b2264fa6b6d86f26b, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.7654878512589371)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=5c5f6a0591170904a56f4c0417e667089761d8d8f013f55283aff164fdadd75b, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.5930262907390398)]",1.0,1.0,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,the Antes and the Sclaveni,"[Document(id=482a70caa7e3a14a061a5b91daa2e56647767ae57909e94ef7246b9a66675bd9, content: ' The Old East Slavic Slavuta for the Dnieper River was argued by Henrich Bartek (1907–1986) to be de...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.5947934206180892)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.",four summaries,"[Document(id=c5da820627d1881a16bd936f9f4ee4b1126ca0003f08cdb0cd5303554431224f, content: ' Despite seven categories being able to provide a more accurate description of sexual orientation th...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.38699930943871186)]",1.0,0.5629022717475891,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,literate and upper class,"[Document(id=149c6390292392d4438c9191dca43bbe827c20a86bb7bfc502679813d7fcc108, content: ' Due to Eastern Lombard's status as a Gallo-Italic language, an Eastern Lombard speaker may, in fact...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 0.6197546116557725)]",1.0,0.7113619446754456,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,smaller-calibre ordnance,"[Document(id=650eb6f51f4e011cd8d19f5e1848ee5932e97eba7ae0fd82dc71d506c549db9e, content: ' Their needs could cogently be met with smaller-calibre ordnance beyond using the usual singly-mount...', meta: {'name': 'Anti-aircraft_warfare', 'source_id': 'c54639424f62ce369356ed8c4e1e9fab8769359dd28029aa89832f5aa994f45d', 'page_number': 1}, score: 0.40298211471750184)]",1.0,0.10714080184698105,1.0,1.0,1.0 +What is that power that enables us to love?,Prevenient grace,God,"[Document(id=b25294113fba2bccd0a9a138b7273d6f86f678ed0b436dc0dbaa101779bcbba6, content: ' There were two Papal committees and numerous independent experts looking into the latest advancemen...', meta: {'name': 'Pope_Paul_VI', 'source_id': 'fc82a40c990c0b3840b06b32643858d39984b66d78ae950561d809dc84d6edf8', 'page_number': 1}, score: 0.354683878905906)]",0.0,0.29289260506629944,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,He Jin,"[Document(id=1023d346fa09f68f024520eed11ceb8d146eebd929e4d93e19ec61ae32461e57, content: ' 189 AD), half-brother to Empress He (d. 189 AD), plotted with Yuan Shao (d. 202 AD) to overthrow th...', meta: {'name': 'Han_dynasty', 'source_id': '060d95b91f9c20e5c8cd8b8a032ab44aac38ab88ce62d3c302bea0865f533568', 'page_number': 1}, score: 0.442711163558856)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,abolished the monarchy and proclaimed the Republic. Ruling by decree,"[Document(id=242bcfd7cc78f498ff4abf96b525186f8ddb9d88565ef2d1ad2a1be9f3522584, content: 'Muammar Muhammad Abu Minyar al-Gaddafi (Arabic: معمر محمد أبو منيار القذافي‎ Arabic pronunciation: [...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.5289569940133785)]",1.0,0.2843249440193176,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",Peter Townsend,"[Document(id=4b69cd784f30edcd0bcf343e88529300a3f5f46cab6549e05e32a9f6208375df, content: ' +Elizabeth and Philip were married on 20 November 1947 at Westminster Abbey. They received 2500 wedd...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.6765609310084981)]",1.0,0.1941501349210739,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,British sea communications,"[Document(id=e3840aa293e2746e6b5de03b1b0e01e25edbe19aa5e08c07e0e041b7bcb110e3, content: ' In April 1941, when the targets were British ports, rifle production fell by 25%, filled-shell prod...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5272865072304967)]",0.0,0.04938885569572449,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,10%,"[Document(id=e834bcf1ef63c7b72f9bf36d447e483da5003c1c77f033ef064e514b7082f587, content: ' To the south, N. Highland Avenue leads to the Barraza-Aviation Parkway bicycle path. +South Tucson i...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.49812831896954624)]",1.0,1.0,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=67c87173e747d72d79bc92866e553902d56583aae14965ecaa3b2da507256976, content: ' After the turmoil of the English Civil War, the Royal Society wanted to show that science did not t...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.6184567229523368)]",1.0,1.000000238418579,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,Database transactions,"[Document(id=5baf5b500fc870a57b1f1a3f45f13e9b554e1d91359189ce3677cff042e60b2d, content: ' Data security prevents unauthorized users from viewing or updating the database. Using passwords, u...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.3602782504910119)]",1.0,0.4331531822681427,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=cbebf9a70685ea0e72bcf8c9f1dbbe9b7163ee26279b633446edf34314600fb3, content: ' Janssens demonstrated that Avicenna was a Sunni Hanafi. However, the 14th cenutry Shia faqih Nurull...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.6135459338143731)]",1.0,1.0,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=f08f17188f751088b2b76f2c1a69ffc86f340979f05ea84b4b201def163ec6f3, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.562789144621481)]",1.0,0.07690561562776566,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=568b9944b8e99493b93780736bc40a387965c8d30cebea94a19881b28bc86012, content: ' More entered the language when the Anglo-Saxons were converted to Christianity and Latin-speaking p...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.503328448699456)]",1.0,0.9999999403953552,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=c54ac19342651f97c105c4825d0b82cbf05684cf1d7c4921e0d078a7212b4a21, content: ' A common type of embezzlement is that of personal use of entrusted government resources; for exampl...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.5087219209237853)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,the Stern Review,"[Document(id=83d98fb8af02ba2d35942c098935050048e1daa28d2d603b2592cee6dafe38b2, content: ' The energy sector regulator is the natural advocate for efficiency and cost-containment throughout ...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 0.4629987651724192)]",1.0,0.9567214250564575,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=de63707a025645c7fcc7a26e6dd37ddd29a27cd60ff187ce487e0d07217708ee, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.7250091339820998)]",1.0,0.9999998807907104,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,The Cathar Crusade,"[Document(id=3f50079b5ebfa4f0c771fad96712717b3bd3050f02fa3abfb19f87bc049689e1, content: ' By the 11th century, more organised groups such as the Patarini, the Dulcinians, the Waldensians an...', meta: {'name': 'Heresy', 'source_id': '8ba473262f8508a9c99699bb09cd022993896ece1fed1463a9cf4bd7eb0c3e68', 'page_number': 1}, score: 0.5133480868513103)]",1.0,1.0,1.0,1.0,1.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=f0c6a5cd5408a0fc01c220e5910c03a2241fbe85099410f518592c1fb7098441, content: ' ""Creating a partnership with Beyoncé, one of the most hard-working and talented people in the world...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.6717989493314664)]",0.0,1.0,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,legal tender at face value,"[Document(id=c5c8d55b2c3952d9e29c10530dc199fccdd3bfb5dc897ac40dd8ff1d01e6a97f, content: ' This has to do with Alexander Hamilton's suggestion to Congress of a fixed 15:1 ratio of silver to ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.5587388197776981)]",0.0,0.16358545422554016,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,Glencoe and Sea Nymph,"[Document(id=353e78a8c5526319fefad1c37e2e3511ce4323f2f90be311b53a11227b5bd85c, content: ' In 1852 the San Francisco-based ships Glencoe and Sea Nymph were attacked and everyone aboard excep...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5028705016342824)]",1.0,1.0,1.0,1.0,1.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=0815d5b0f30156ea39d24a8a15062d5e21822317a105f659f2000892f8d657c8, content: ' +The consensus view in contemporary paleontology is that the flying theropods, or avialans, are the ...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 0.5707345625652527)]",1.0,0.9999998807907104,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,modern evolutionary synthesis,"[Document(id=e1b3872c9a56b024a609ebeaf211b3ea1b4799599e6935c3dd62e613f036575b, content: ' This dogma has since been shown to have exceptions, such as reverse transcription in retroviruses. ...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5384574426271362)]",1.0,0.9789240956306458,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,8:10 p.m,"[Document(id=2ed21a855190ee9369cd4351878edc528d570a2136e3a2612158d6108a59491e, content: ' until 6:00 p.m. or later, with dinner from exactly 8:10 p.m., at Delmonico's restaurant and later t...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6844753001764978)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1444,"[Document(id=9dbf1201bfba9c8190d82357ee4f2f5a482d6d6711313439dac94a993edf3757, content: ' +Holy Roman Empire central power had begun to decline following years of imperial adventures in Ital...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.6132608572607257)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,censors,"[Document(id=c149e308ac4553abf8316efb49a5900ee5ff1e1eda0f20dd8b01137b5904cd33, content: ' Praetors administered civil law and commanded provincial armies. Every five years, two censors were...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.4873572047508343)]",1.0,0.8454219698905945,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=d16248de2a5e0d9e6bbb7e85b411bf0a4143206e20471ecab73dc7158ce992b1, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.5374786638713411)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,"Angelo Giuseppe Roncalli,[a] Italian pronunciation: [ˈandʒelo dʒuˈzɛppe roŋˈkalli]; 25 November 1881 – 3 June 1963","[Document(id=2637c74f0c9d8013db6dd5e001b320fad50f7374ada702e24a314846612ef144, content: 'Pope Saint John XXIII (Latin: Ioannes XXIII; Italian: Giovanni XXIII) born Angelo Giuseppe Roncalli,...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 0.5965553455670057)]",0.0,0.26511240005493164,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c3327e32d8658307f8dca684bbdeb22895ca7a8d7ee8ad6ff0eba6200c5e6da2, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.6517133656496545)]",1.0,0.9866578578948975,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=3f0396346c47d3ea53931dd05016eb360d6261c1b5ab0c7a977a5bbf0df1be01, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.49308542181814496)]",1.0,0.02006305381655693,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Israel,"[Document(id=1a7bb072fc739a204d2311fb1c7faab806a096efeb33e19be04be9eae7061a3d, content: ' Nasser refused the call upon determination that the air force lacked pilots and Amer's handpicked o...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.5484507326078425)]",1.0,0.2845463156700134,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,his clan,"[Document(id=0fa0d88bd1e937bdfda2ada2f0a26524ce558edae20b1537db3005c2c91407d3, content: ' Uthman placed some of the trusted members of his clan at prominent and strong positions throughout ...', meta: {'name': 'Umayyad_Caliphate', 'source_id': '704c4038f63f9c5a686218060cb31639c0333dda34e87e804be75e5fec05ec53', 'page_number': 1}, score: 0.36949795109946626)]",0.0,0.07835237681865692,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1999,"[Document(id=6c8a4bf97575a50cf435a385d35038780603d1e0468298d1d46906e5d6f5565e, content: ' Its main offices are located at 30 Rockefeller Plaza at Rockefeller Center in New York City, known ...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.5025808224824159)]",1.0,1.0,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston–North Charleston–Summerville Metropolitan Statistical Area,"[Document(id=620f512c3df1eb1788fcecbd5ec4c0438953e005020fab4303e121d55369ade5, content: 'Charleston is the oldest and second-largest city in the U.S. state of South Carolina, the county sea...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5246972383754526)]",1.0,0.3331555128097534,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=045e43faf01d8bc0ffb270276761f66a16a046f15c4b2bc3e7ce701f54916e5c, content: ' He supported himself by selling his compositions and teaching piano, for which he was in high deman...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.5571432939445757)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,steal the invention,"[Document(id=1011faa25e59a33b095480ce04c180eb0c71513aa2d54d760fe11160d7fa844b, content: '"" His records indicate that the device is based on a narrow stream of small tungsten pellets that ar...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6696097359530666)]",1.0,1.0,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,Anti-Federalists,"[Document(id=ddb6f079d765c0097f37edeb70c0b91d00084682a1d1fd26a1e5fa0f636c0d31, content: ' Tensions between territories can still be found in federalist countries such as Canada and federati...', meta: {'name': 'Federalism', 'source_id': '05eb4479f6579b86eef28e7954d58c2044997504c63cad297935c95307861772', 'page_number': 1}, score: 0.42387948236305956)]",0.0,0.5036917924880981,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Lippincott,"[Document(id=e302bfc687b3918c099a0b5ca3053c72bfd1fccfaee0997a2e91ff94d201d3db, content: ' Hoping to be published, Lee presented her writing in 1957 to a literary agent recommended by Capote...', meta: {'name': 'To_Kill_a_Mockingbird', 'source_id': 'f1e375014d37d0eb57fcd3e14861d458b3adfe0e71162344652acf38ae9d1bbb', 'page_number': 1}, score: 0.46735501418952785)]",0.0,0.1679261326789856,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,a systematic mental framework,"[Document(id=11495c74c8b11a9455df2289e0ff9463ec8bfc2283068403eab6563e5116917e, content: '26) by which we simultaneously grasp the past in memory, the present by attention, and the future by...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.4733092895012373)]",0.0,0.24103975296020508,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",He also placed a corps of his own troops under Frederick's command,"[Document(id=c90b28034a392eb2832dcb9a2ae1e8b326e72b851e54b5c70c28b7477ac220bd, content: ' Then on 5 January 1762 the Russian Empress Elizabeth died. Her Prussophile successor, Peter III, at...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.7323095236946228)]",1.0,0.28628912568092346,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=0a1d789e1e3ae04146d38c10ae30c76bb73173ec2df854d0b0695309c7e8629a, content: '” +On April 23, 2008, Dell announced the closure of one of its biggest Canadian call-centers in Kanat...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.659275856771866)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,the senate,"[Document(id=4b02bca62f562d6c5bfda9faf0b29f77e95b817f971009b508742099d4c57423, content: ' None of these defixiones seem produced by, or on behalf of the elite, who had more immediate recour...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5735551960195144)]",1.0,1.0,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=0b3a599a54fac34c82f29d858c51bbf5ce4f37032b3539a76c2b8a6b918abc6e, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.7016983231419771)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=45ad6989b7801812895d43ea694bad169af9b5dd9024dd96b099fe071a196217, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6552354556870785)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Rome,"[Document(id=97686de9c4f8fed507eaca869b9537500fb0e8c613b130c6f388bc83e9983444, content: ' Some areas of the conquered world were more affected by Greek influences than others. The term Hell...', meta: {'name': 'Hellenistic_period', 'source_id': '322682cbb61d31c17e6d21e06723e94efd0ef242a5c9f308888fa14209c4078b', 'page_number': 1}, score: 0.5203969874883405)]",1.0,1.0,1.0,1.0,1.0 +In what decade was disco popular?,1970s,1980s,"[Document(id=24440712083226dfd525d04e8584c667060d5e2b151f666d4bdfb4e2e523cdbd, content: ' Also significant was Black Sabbath's Paranoid (1970), which combined guitar riffs with dissonance a...', meta: {'name': 'Hard_rock', 'source_id': '8c928e0d3f021cd5a2c2ffff1882ada0df22da36e38c0e9a3cdfb92bc88423cc', 'page_number': 1}, score: 0.4995811254104975)]",0.0,0.7625939249992371,0.0,0.0,0.0 +What do the rivers form?,lakes,lakes,"[Document(id=c751375299aac2090fe706008704ed47f7358e09ebad2acbc0d54e2c5b0ab5d3, content: ' Cities such as Milan depend on 80 percent of water from Alpine runoff. Water from the rivers is use...', meta: {'name': 'Alps', 'source_id': 'd2eb3ad3af7d6110ae691bf40660ff7a79e4e9dbfd25785b4e55c7700bfcae79', 'page_number': 1}, score: 0.4909023721892361)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,ISO-8859-1,"[Document(id=1ca3328892ba24004a75d8cfaab0f58ce32bed8ee034be6fa54e33e8b0a62eb5, content: ' UTF-16 extends UCS-2, using one 16-bit unit for the characters that were representable in UCS-2 and...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.6784456034023938)]",1.0,1.0,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,20,"[Document(id=c85a8792121f517de1bc709c859ae73c062d480fc0c67923193c4a7ef924e5e2, content: ' She released her eighth studio album, Music, in September 2000. It featured elements from the elect...', meta: {'name': 'Madonna_(entertainer)', 'source_id': '0868d847bb768c30a7507239169d5e441bb3181b66c8b9353a863daa3d58e425', 'page_number': 1}, score: 0.44377832513852783)]",0.0,0.5910412073135376,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,30,"[Document(id=16d8a70ea7a6d58f48e68b5c9072e65770d4a4053e9906c77dee6ff0c12c90a1, content: ' However, these vocational schools are not part of the German dual education system. Ergänzungsschul...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 0.4960366894753296)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=23b161a9804a8a11c97c4f3f14b6762b49fdf3b1fefb8ff1de76b00cfa00995a, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.5247203837647456)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,Your Majesty and thereafter as Ma'am,"[Document(id=3d92674c03fa6fb7dff9bfacadbd449807bd3f50ac1fd93ba9ff982be85ddbf6, content: ' Under pressure from public opinion, she began to pay income tax for the first time, and Buckingham ...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5270637531168535)]",1.0,0.6356295347213745,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=c37151361f1ebbd218e8e53e243abfe86d24fabddd8be6f3401cf56e9e98633b, content: ' Its name is said to have come in 1814 from the Russian soldiers who occupied the city; ""bistro"" mea...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.5637046717833719)]",1.0,1.0,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,871,"[Document(id=5396376bb5221d60d6016a3ef2e8c6fb591b1eb2c9c6dab90677a7e0ac39a5a5, content: ' The speech of eastern and northern parts of England was subject to strong Old Norse influence due t...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.506357630655818)]",0.0,1.0,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,load or endurance,"[Document(id=77ce7cf14e807c8808892e96984a50baad4f22bdca6b22034b2fb170f96240a2, content: ' Stability testing (often referred to as load or endurance testing) checks to see if the software ca...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5004181695680803)]",1.0,0.22238580882549286,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=bb0e180e009c490f3ae30fb55a9a91cbe512409c2e12ebae0203298a6f3efe78, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.6289352889491475)]",1.0,1.0,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=cdfa1e6bea8fd42b00ce80468ba471689216a6195425c2435fb1141b0b8c7e6f, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.597249455201943)]",1.0,1.0,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1854,"[Document(id=7c2b46d47135979280b3bd098e70f7cc34a4526459e2816700066af0071057b2, content: ' The Russians did nothing and he evacuated to Batum in February of the following year. +The Baltic wa...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.536293090337765)]",0.0,0.42288899421691895,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,a part of daily life,"[Document(id=1a8c1e09e777c4e183dceed861053866bfd85f790fb0c2cd9b9aad59ad7dc5b2, content: ' Even the most skeptical among Rome's intellectual elite such as Cicero, who was an augur, saw relig...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.685217369665892)]",1.0,0.26575636863708496,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,Software fault injection,"[Document(id=76ad3fb3d257a3812a169d749a5fb13f2e05383764e8f9dfc53895f08747b375, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5599322632992683)]",1.0,1.0,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,25-man squad of players aged over 21,"[Document(id=729a1a7b3183917b1dc01acb07f1b632efc33f4eea6aeffbe484b1530c2d94f0, content: ' By 2009, under 40% of the players in the Premier League were English. +In response to concerns that ...', meta: {'name': 'Premier_League', 'source_id': '369baa9a5a854704f16fadd35144d3a3150cf1e9ae1671d9b50a3448547b814b', 'page_number': 1}, score: 0.48538441845429137)]",0.0,0.2965299189090729,0.0,0.0,0.0 +In which year was the new constitution ratified?,1855,1982,"[Document(id=9cce84648c5334fdc1f9efada012ed443127a95a93562f83b2d5073b15d9ceda, content: 'C. Statehood Green Party. The nearest this movement ever came to success was in 1978, when Congress ...', meta: {'name': '51st_state', 'source_id': 'aa3341a0a65518fe53b022b57fd32c865f74c249a1b4d32919c38bfd1cead5b7', 'page_number': 1}, score: 0.4919496280025851)]",0.0,0.5184706449508667,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Naiad,"[Document(id=08055008121b5a6458e19916078dd3cd8426e9a5062270805846dc3cf7307fae, content: ' +Captain John Charles Marshall and Thomas Gilbert visited the islands in 1788. The islands were name...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.46400769475009856)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=92daacd08f6215b2b6c9a23e96940ae9ffa3af8f6a0300108c6bfcf840702db9, content: ' In western New Haven, the university holds 500 acres (2.0 km2) of athletic facilities, including th...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.6046670847188567)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,moderate levels,"[Document(id=baf30882ad613bf572c42f281358c802f0ba206e836231f89d46536c7b5eb6b2, content: '25 represent very great genetic variation and that an FST of 0.15–0.25 represented great variation. ...', meta: {'name': 'Race_(human_categorization)', 'source_id': '1232f7fe18d6e2adb87627ce2042b468cfa3fb9ba0b6afb2165c345f440b5339', 'page_number': 1}, score: 0.4730994602567792)]",0.0,0.14990274608135223,0.0,0.0,0.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=1b5c167a9d5d609ea5fb8a97ac9f8e9db1b78d5c91d2f95320c7a861768e926e, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.584297128771873)]",1.0,1.0,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,rational thought,"[Document(id=707efe35d39bbe6f12111e251260c07bcfa48cea8000d7e2c9680c78ec3b9dcc, content: '"" The theory was indebted to Aristotle's pluralism and his concepts of Soul, the rational, living as...', meta: {'name': 'Idealism', 'source_id': '55a0bd42a4a737442a2d93fe0926424e236910de01b2406ed43c174f2225ff9d', 'page_number': 1}, score: 0.5783754080833273)]",1.0,1.0,1.0,1.0,1.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,21–24%,"[Document(id=4a2399f100a536a4baf71c5dedf166c45f09b1b237a505e9a84faf1da7fd8a1b, content: ' +In 2014 the Notre Dame student body consisted of 12,179 students, with 8,448 undergraduates, 2,138 ...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.6002271102755674)]",1.0,1.0,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,24 October,"[Document(id=9581bfa82ec59693593b37d324dd3202f11195fecf1f113578c324318b9772ee, content: ' The carriers had only 108 planes. The force was not spotted by the Allies until 16:40 on 24 October...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.6129253433479325)]",1.0,0.24101078510284424,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=4b9cc1d29ed0f9a4cb783c7febb93802073dee536efd60ee53a2c3e5bec9cccf, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.5942357834131707)]",1.0,1.0,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,past and present,"[Document(id=27c48c714f7a7d5c0daa9f28a8e02d3375d014da26ecdbb43d24c3a5f42b97ed, content: ' Typical of a Slavic language, Czech cardinal numbers one through four allow the nouns and adjective...', meta: {'name': 'Czech_language', 'source_id': 'b7b15f8c583c24627f86dae71c96a8b788adafb4e0606251ec9a06307cc37f6c', 'page_number': 1}, score: 0.3183698883943239)]",0.0,0.2961136996746063,0.0,0.0,0.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=7308906ca1725346de1f8669d04745bd0719f93959d2b77a3e2eedd2aea218ba, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.6227835662512028)]",1.0,0.8350290656089783,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,excited form,"[Document(id=814f2c2136f18b6a49318bf9a19fad148d9b272f987b2539771dc5dbd3beca10, content: ' +Under ordinary conditions on Earth, elemental hydrogen exists as the diatomic gas, H2. However, hyd...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6317899747001257)]",1.0,1.0,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements,"[Document(id=ff11a1c6102e2b293943cbcfb8c2935799487df6f465dea412c00844c1c7c655, content: '"" In July 2013, the Assistance Association for Political Prisoners indicated that there were approxi...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.5881745056471273)]",1.0,0.9815698862075806,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=ac63b8357f80e413cededa7c5a37849ede32b9c8d7d3181d6e287572836f5f86, content: ' They formed the backdrop to one of the earliest football-related films, The Arsenal Stadium Mystery...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5508357351080974)]",1.0,1.0,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,1837,"[Document(id=02347b24d27e18d8b2928c6693c7aa2a7b9045acffc2554a2465ddcfd854f43d, content: ' +Batman's Treaty with the Aborigines was annulled by the New South Wales governor (who at the time g...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.5670046274040195)]",1.0,0.4998088777065277,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:15.csv new file mode 100644 index 0000000..a800a5e --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:15.csv @@ -0,0 +1,125 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=3c9b5484de418625b08e3d8f4e3c3398f40f2f5af0bbf48528ee46ea38c64e7c, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5133148969949781)]",1.0,1.0,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,11 May 1944,"[Document(id=c58958638fa47dad60cf7d7556802e28d387d9bdc8b406eb23f39fcb8c5a5b19, content: ' The descriptions of these were published in Memoir III of the Australian Museum Sydney between 1896...', meta: {'name': 'Tuvalu', 'source_id': '4a4e309bc4b63f2a542d80bca7c9d33d9df6ccfa328612b01483ad5d2101621d', 'page_number': 1}, score: 0.3557680082127386)]",0.0,0.31558167934417725,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,KG 54,"[Document(id=73475bbe2b26a763da172286a9367523edc185064dbbee5b34d0c365b35a629c, content: ' +Five nights later, Birmingham was hit by 369 bombers from KG 54, KG 26, and KG 55. By the end of No...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5086119098924304)]",1.0,0.07915603369474411,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=e9007dae49d8af7b7fa7932aef22bbdc01132df7a83250c1516203d03c7a8b4b, content: ' It is most “important to recognize that in many words the English and Scandinavian language differe...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.5029416489492136)]",1.0,0.9999998807907104,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=b43e607046d3a9f2878f76721d6d75cb6c46d2bdb38f6b3fdc5f1b9a2f23a163, content: ' +In The Church of Jesus Christ of Latter-day Saints (LDS Church), the teacher is an office in the Aa...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.48577817227393083)]",1.0,0.20152544975280762,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=4af194e6a549f3cca324fac69732006a675ddeb8e841c964604dff19122e4f16, content: 'O.K, Maroussi) have won 3 Triple Crowns, 5 Saporta Cups, 2 Korać Cups and 1 FIBA Europe Champions Cu...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.5204525008013012)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=0639a6470cdb94c436a07cbb24189b437a50f73fdb84ffa10dd2faeb282342a2, content: ' +The show had originally planned on having four judges following the Pop Idol format; however, only ...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.5454945986369781)]",1.0,0.8232484459877014,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=2296ed852027297f8181dc1f64af4fcbb66e36bb282c6197423888ce9d011df5, content: '9–14 μm) and produce images of that radiation. Since infrared radiation is emitted by all objects ba...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.6525550298431104)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=5dac70957842f214d4c60f76564b27a7e657cd237c677ed236d1ed1ca76e360f, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.6477764778542375)]",1.0,1.0,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,Bremen and Bremerhaven,"[Document(id=48379d700246108e0c3da307a3a8e5de24d8bee4044a5988144f1d9cd57b09c2, content: 'Germany is a federal republic consisting of sixteen federal states (German: Bundesland, or Land).[a]...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 0.5043829724168043)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,placing the glyph itself to the left or right of the left sidebearing,"[Document(id=1a26dbf7fc2f6d061e0b9cfbac5364517775557176c147e38168cf1391574c14, content: ' Thus, in many cases, users have multiple ways of encoding the same character. To deal with this, Un...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.5509833423446333)]",0.0,-0.015694376081228256,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,lower federal courts,"[Document(id=d356a5182977fa489e1e6eec51c082e5a99e5c3c38407e179300a795bb98de6e, content: ' These supplemental remedies are considered collateral review, that is, an avenue for upsetting judg...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.49507471145076914)]",1.0,0.5716047286987305,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,one of the 20th century's foremost metaphysicians,"[Document(id=645c5e4f8126b7f8b45cf02dc6a818511eead3620792f162523eacd0e5908ee5, content: '"" Ironically, in later life Whitehead would become one of the 20th century's foremost metaphysicians...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.5103405273843459)]",1.0,0.14327546954154968,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,"""No state shall... make any thing but gold and silver coin a tender in payment of debts","[Document(id=68d14edc51b808820ba009fc52fe536383dc5d9fe285a432d114fdcd84ef9e86, content: ' The currency as we know it today did not get the faces they currently have until after the early 20...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.3801649071482328)]",1.0,0.08643913269042969,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,Pacific Northwest,"[Document(id=6acdafac063b4c532b8491aead2266ee3880a9ecef0a8eac6e645a6d8ae35346, content: ' Precipitation averages less than 15 inches (38 cm). The Southwest is a hot desert, with temperature...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 0.5779231452642606)]",1.0,1.0,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Antigen specificity,"[Document(id=7f56f4718d147b0156e077e4853a6ac0250773335364d5e5cac1ff37df572de8, content: ' The adaptive immune response is antigen-specific and requires the recognition of specific ""non-self...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.5447541169419035)]",1.0,0.4682601988315582,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=9f4c20a2bce44cf9bd3af493be183ab333c4d80eef37f323c78a73218b2cb402, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.5515789425362991)]",1.0,1.0,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,Computing,"[Document(id=724eb1fe8da5e0d4da8a448227c2eb83283ae94ad66b58e07296d26274b6a2b4, content: ' In terms of specific course salaries, the Sunday Times ranked Computing graduates from Imperial as ...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 0.4505703094141891)]",1.0,0.6075230836868286,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1948,"[Document(id=f0f90784d9642a8e4529bd9b3248d2b1755499c1727299e617e58e392a3cdbe7, content: '"" The report called for an end to ""all discrimination and segregation based on race, color, creed, o...', meta: {'name': 'Affirmative_action_in_the_United_States', 'source_id': 'b7156a1660bb591e3abb4460028a7922c063ba8962ad6dce11678f8b08c5cdcb', 'page_number': 1}, score: 0.48052786159944016)]",0.0,0.6602938175201416,0.0,0.0,0.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=44b4daa4d3c4378951ca880516b3342520595caa477d61e2c5fc88de3627c8f0, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.7160253770681686)]",1.0,0.8085941076278687,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,duration of economic growth spells,"[Document(id=fba3e3c3e8001e6e2682424641a36dc63233372513150f4bf138ff4ceac88039, content: ' Thus, the marginal utility of wealth per person (""the additional dollar"") decreases as a person bec...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.4046748955914073)]",0.0,0.06012151017785072,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=497c11f9fbbfb1f37be3dcd0a29d747c5f8f8967252a25e340b937417a080280, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.7380069801719974)]",1.0,1.0,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=8a0f220e6df4e50504002c6ad6312d289ff774c9c753db65e80fb1022f521be2, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.5930262907390398)]",1.0,1.0,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,the Antes and the Sclaveni,"[Document(id=198e2ff2b18d1e11637616cacf59f7db2e62fac307b8d4669b7694d128532573, content: ' The Old East Slavic Slavuta for the Dnieper River was argued by Henrich Bartek (1907–1986) to be de...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.5947934206180892)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.",four summaries,"[Document(id=6d55c2f18c45503d524329d23b6d3edb48694f77be7d85064dd10de2b4e6fd9a, content: ' Despite seven categories being able to provide a more accurate description of sexual orientation th...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.38699930943871186)]",1.0,0.5629023313522339,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,the family or a village priest,"[Document(id=1ac361381724706acf7a12f934a8f6a399e50f7b7857405e6b825fba7c4f2236, content: ' Other countries codified their laws; legal codes were promulgated in Castile, Poland, and Lithuania...', meta: {'name': 'Middle_Ages', 'source_id': 'f13b345e20c54b6cc55363080d7492474df62715717172aa9f1029f286010ad9', 'page_number': 1}, score: 0.527310784861083)]",0.0,0.20006507635116577,0.0,0.0,0.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,smaller-calibre ordnance,"[Document(id=a376f05e7a6d61c6348c9a2d82a6646f62cd3d7afbcdeec823b97dcb5aa92bf2, content: ' Their needs could cogently be met with smaller-calibre ordnance beyond using the usual singly-mount...', meta: {'name': 'Anti-aircraft_warfare', 'source_id': 'c54639424f62ce369356ed8c4e1e9fab8769359dd28029aa89832f5aa994f45d', 'page_number': 1}, score: 0.40298211471750184)]",1.0,0.10714080184698105,1.0,1.0,1.0 +What is that power that enables us to love?,Prevenient grace,mercy of God and His care for His creation,"[Document(id=ab06859e83076577bc2a04088a7525ddee1f44dedfa9ffcf46872843c8c86632, content: ' They neither expected nor sought the appearance of the stigmata or any other physical manifestation...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 0.354906414247829)]",0.0,0.4893345832824707,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,He Jin,"[Document(id=232041b4afb4ca71e3d1d3697502447b0c4bfa12f1eccde1a6f36b432b833244, content: ' 189 AD), half-brother to Empress He (d. 189 AD), plotted with Yuan Shao (d. 202 AD) to overthrow th...', meta: {'name': 'Han_dynasty', 'source_id': '060d95b91f9c20e5c8cd8b8a032ab44aac38ab88ce62d3c302bea0865f533568', 'page_number': 1}, score: 0.3925329102651024)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,sharia,"[Document(id=4d48682f805f7a385767a5c2825f3fc2119d2b698a7fc9592571f5590731bae6, content: 'Muammar Muhammad Abu Minyar al-Gaddafi (Arabic: معمر محمد أبو منيار القذافي‎ Arabic pronunciation: [...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.5289569940133785)]",1.0,0.6216318011283875,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",House of Mountbatten,"[Document(id=e315f255311d7dc4c756937a51f6602a99f1b8e3b0a145306a3d8c41c258bc57, content: ' She was educated privately at home. Her father acceded to the throne on the abdication of his broth...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5852761260940276)]",1.0,0.7063884735107422,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,the Germans failed to prevent maritime movements or cripple industry in the regions,"[Document(id=1a4ba8fa507fb2fb6dac9df67abbdff76568d8f7a2e9cf8a578677e271a7190c, content: ' +In the north, substantial efforts were made against Newcastle-upon-Tyne and Sunderland, which were ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.4877685374997804)]",0.0,0.015689663589000702,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,2.7%,"[Document(id=95e1463ec5cbd41e6746a2396eee9d7fb5ba52847c057866e7e7fd5a7f376d91, content: ' The racial makeup of the city was 69.7% White (down from 94.8% in 1970), 5.0% Black or African-Amer...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.5245243631758744)]",1.0,0.6450710296630859,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=9a450124afc12f193f72ae1f8310b72ada7cbbf8f34bbd71f66eb32b0f47efbc, content: ' With the development of the modern evolutionary synthesis in the 1930s and 1940s, Darwin's concept ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.6164412846430873)]",1.0,1.000000238418579,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,software error,"[Document(id=493262de0ba96f4bed9bc4d9456c8abbffaff489696816e1875ca710ac0d5465, content: ' The migration involves the database's transformation from one DBMS type to another. The transformat...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.3370368984389711)]",1.0,0.25782641768455505,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=ec70e294a8a1203ecdbb9fe95270466a0129a5d607fe306df146559206aa5b31, content: ' Avicenna first began to learn the Quran and literature in such a way that when he was ten years old...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.5059370116900019)]",1.0,1.0,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=391f8278a6d664b0fda2a91e05a75fcb178e560aca2778cf3383f74470691074, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.562789144621481)]",1.0,0.07690561562776566,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,Roman cursive,"[Document(id=9b07ee8006e937b1f3fc72c78c329158c8ab48962e9e1ef4c8d375c50cf7a30b, content: ' Scripts using two separate cases are also called bicameral scripts. Many other writing systems make...', meta: {'name': 'Letter_case', 'source_id': 'f24785f407e55c52408fa13b0c7fd7fc6529cb973898992f6e98d5d28ea82d5a', 'page_number': 1}, score: 0.5399797841084327)]",0.0,0.06087452173233032,0.0,0.0,0.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=b281d8f2fa5c72bf9176af2f66fbd72725ba99c1c3e8e2e12044e694c6ee3bbe, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.5025447478163751)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,International Solar Energy Society,"[Document(id=c793eed6f7e60651d2a76fcaf9711f0a39fe3f3343f81bd4df136c41ff320e80, content: ' +Lester Brown has argued that ""a world facing the prospect of economically disruptive climate change...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 0.4503806030703587)]",1.0,0.06405393034219742,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=d4ddb3955cf6ff9d6bed687f54220f695fef0951972c64309938d9cf7670f6ba, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.7250091339820998)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,Larger Catechism,"[Document(id=c601d900bbed4182e4c31a385ec4879c196b3acf05808d506aeb8ff34d3bf65c, content: '.. and unfortunately many pastors are well-nigh unskilled and incapable of teaching."" +Luther devised...', meta: {'name': 'Martin_Luther', 'source_id': 'a3fdaefe076e6af394caf9fb8bf70d1d62cf6d16465d7e4d57f44fd99dde72d4', 'page_number': 1}, score: 0.41549565135076816)]",0.0,0.32058510184288025,0.0,0.0,0.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=dccc646845ca49e7358315767a80e7f3cbcc38b5a971e1af1ab775ad80542dee, content: ' Contemporary critics have noted the consistent minimalistic aesthetic exhibited throughout DONDA cr...', meta: {'name': 'Kanye_West', 'source_id': '4693bb2ae0f5b7c5ed3788607bba35813f42eada13da446f4c397c0715d2dc91', 'page_number': 1}, score: 0.5964939320176397)]",1.0,1.0,1.0,1.0,1.0 +How much was each copper token worth?,a halfpenny,"15,000 t","[Document(id=98b439fa147a97e5eae30c05b00e8ced8af2688f70d603a4a19bd0acee1fb227, content: ' Electroplating commonly uses copper as a base for other metals such as nickel. +Copper has been in u...', meta: {'name': 'Copper', 'source_id': 'de5c60a665b93f9b358be412d53a37ca43d90005e3ec9a553ac38a38f076fadc', 'page_number': 1}, score: 0.4860156794563682)]",0.0,0.08125006407499313,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,Glencoe and Sea Nymph,"[Document(id=c450c10c9cc7ec2362a5a8c23142549f03d77146379f9bb96ab788f8c91887e3, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5428866259942204)]",1.0,1.0,1.0,1.0,1.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Dilophosaurus,"[Document(id=47da15856e3472d29da1e5ea8564e2a9481cff64494e743d46ae067463a71669, content: ' The formation of new crust beneath the surface displaced ocean waters by as much as 200 m (656 ft) ...', meta: {'name': 'Mesozoic', 'source_id': '00ff4195df617950fbd745afb7161dc8ed971ec038d2f00639cec8848b91e69c', 'page_number': 1}, score: 0.5368359622583831)]",0.0,0.4152252674102783,0.0,0.0,0.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,modern evolutionary synthesis,"[Document(id=7ff269861e7df34fcebf71c4907ef4694447d099e228c45550f4e2387812c2fd, content: ' This dogma has since been shown to have exceptions, such as reverse transcription in retroviruses. ...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5467572858377108)]",1.0,0.9789240956306458,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,"8:10 p.m., at Delmonico's restaurant and later the Waldorf-Astoria Hotel. Tesla would telephone his dinner order to the headwaiter, who also could be the only one to serve him. ""The meal was required to be ready at eight o'clock","[Document(id=bf93e5ca5e1091757426fa6d168a7b90f8c1d9dad833880ffcb9b06d8ad2b0f7, content: ' There are a minimum of 278 patents issued to Tesla in 26 countries that have been accounted for. Ma...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6614342019294968)]",1.0,0.3650366961956024,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1444,"[Document(id=f693c6f2598c5f0324deafec242b67b3598e0051d9b177df27f75a4c46de16d7, content: ' +Holy Roman Empire central power had begun to decline following years of imperial adventures in Ital...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.6132608572607257)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,two censors,"[Document(id=ce88f4b2acc2cd7986f0b63373b8505a79a6c1ae6aa86d23bd2b76d546f82d61, content: ' Consuls had supreme power in both civil and military matters. While in the city of Rome, the consul...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5303996384286787)]",1.0,1.0,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=1e41339b2f87d6ffb45a6e6882ca2d5cec93b2ada663205b1745d23021ff3c6d, content: ' The planned closing ceremony at Justin Herman Plaza was cancelled and instead, a ceremony was held ...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.45183706257798184)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Pope Pius X,"[Document(id=0656aafee4ca0776df2863b2507d041fd1b72a5b5232c1059e55d128bf2b1909, content: ' In this exalted office he was still the gentle pastor. He believed in discussion and persuasion. He...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 0.5309331615663739)]",0.0,0.3954741954803467,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=61583a75fdba243f6a812f3bf73c41e686e4f8374db81e4dd5f1a4d5a4430336, content: ' +Non-Australian citizens who are Australian permanent residents should be aware that during their st...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.6661485563876728)]",1.0,0.9866578578948975,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=1c55594460ba4e27d8751041858090c94678420d1701172511f1bad1a1d70154, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.4733644228229093)]",1.0,0.02006305381655693,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Nuri al-Said,"[Document(id=c355e0f20036a60ada1e4711a62b4d11adebf07dce76a808203fdd588cc1168e, content: '"" Nasser ordered a crackdown against Syrian communists, dismissing many of them from their governmen...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.5839372061580861)]",1.0,0.3125278353691101,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,his clan,"[Document(id=2d32fb9800c9d78fd442f6428cf6f5fb797d26f09a98fe2e6bd0bbee11ab4e6e, content: ' Uthman placed some of the trusted members of his clan at prominent and strong positions throughout ...', meta: {'name': 'Umayyad_Caliphate', 'source_id': '704c4038f63f9c5a686218060cb31639c0333dda34e87e804be75e5fec05ec53', 'page_number': 1}, score: 0.36949795109946626)]",0.0,0.07835237681865692,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=0869972e54922afd3497ba60a5f3e981d9c0ab240629036531512fe359bc4da8, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.5524063253173699)]",1.0,0.5505510568618774,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,downtown medical district,"[Document(id=f2ed49ccc8f85dd664fcdca57c3499ff0b7c7b7afb1177548f6b0038c9e3b49f, content: ' The downtown medical district is experiencing rapid growth of biotechnology and medical research in...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.4654217451880065)]",1.0,0.41251468658447266,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=7b2e31c3f54b0c1a9b97db27512199c167f56012decb955c5fb2ad06a110d92e, content: 'Frédéric François Chopin (/ˈʃoʊpæn/; French pronunciation: ​[fʁe.de.ʁik fʁɑ̃.swa ʃɔ.pɛ̃]; 22 Februar...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.6606191519489985)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,captured an X-ray image,"[Document(id=3493f2fd6679a18fbb53f3394d72c496d8973c7aa3ec8881fb4c2b3145e673bf, content: ' +Tesla served as a vice president of the American Institute of Electrical Engineers, the forerunner ...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6264878711447988)]",1.0,0.15714535117149353,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,cordon sanitarian,"[Document(id=da081545e8e8020b03087fbbcde823b131a8ffeb227970cc6962ac2b8197ec87, content: ' When a party achieves enough seats in an election to form a majority, the party's frontbench become...', meta: {'name': 'Political_party', 'source_id': '476a9215de81b75a4f17737eed1679570973e55b5ed6175aa0ec1014b09d4632', 'page_number': 1}, score: 0.44697301723543903)]",0.0,0.23455810546875,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Bibliothèque Bleue,"[Document(id=431a21bc2021ab05bedf5f9c62189ca16d692858fb84414f96b2f293034850da, content: '"" Women, as salonnières, were ""the legitimate governors of [the] potentially unruly discourse"" that ...', meta: {'name': 'Age_of_Enlightenment', 'source_id': 'd6935b64f79e30b7c3de4c952b643a55674c548cd0bb703870a094fd3e646d84', 'page_number': 1}, score: 0.4091252915982114)]",0.0,0.3384479880332947,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,systematic framework,"[Document(id=d76927397b700e2de75c7cb61d1ac04b6def03542f443a57cbea19a53d750a49, content: ' Without tangible notions of distance and size for correlation, sight can tell us next to nothing ab...', meta: {'name': 'Philosophy_of_space_and_time', 'source_id': '5578d6ef93e9e6f246ec225cb481a7decb69e22faa45aa7889ab826e1e2d4028', 'page_number': 1}, score: 0.4423514453852574)]",0.0,0.28883427381515503,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",placed a corps of his own troops under Frederick's command,"[Document(id=d7342a0e3021ae2d3bba679cf10ecb72ddc69518be33b380ee6b9e8f5e4d03c9, content: ' +On the eastern front, progress was very slow. The Russian army was heavily dependent upon its main ...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.7072835413098615)]",1.0,0.28195881843566895,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=cea0f1307bbe685005f9642d975856221cf9a5dd6f18a7b87c780156408770d1, content: '” +On April 23, 2008, Dell announced the closure of one of its biggest Canadian call-centers in Kanat...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.659275856771866)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,pontifex maximus,"[Document(id=71718b05e0ad88a856f11532834265f8f64091425308e7bc717ce93437673530, content: ' +Towards the end of the Republic, religious and political offices became more closely intertwined; t...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5438163925798216)]",1.0,0.2345559000968933,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=73c809a12535348ffed165eff42c189f3aa95c6ed455c42b5e7932cd44167a8d, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.6349932241694249)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=b562ec53d471dfbb3ba9b12247b16c622039308f7ec5b5b5344127cfeb67e87d, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6552354556870785)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Rome,"[Document(id=b5c785a8e22edd8d9d6cb5708ef03b91b1dd4137c66bb49c28d760b75bfb84be, content: ' Some areas of the conquered world were more affected by Greek influences than others. The term Hell...', meta: {'name': 'Hellenistic_period', 'source_id': '322682cbb61d31c17e6d21e06723e94efd0ef242a5c9f308888fa14209c4078b', 'page_number': 1}, score: 0.5203969874883405)]",1.0,1.0,1.0,1.0,1.0 +In what decade was disco popular?,1970s,1970s,"[Document(id=0d7c459ee83f27ed65b7fe4763b61e5389f0abd569a3ea7d9964e56342cb94bc, content: ' +Hard rock had been established as a mainstream genre by 1965. From the end of the 1960s, it became ...', meta: {'name': 'Adult_contemporary_music', 'source_id': '97671df8695f447ceee464d87be9f360589d67646ccf18f2eb61b6153865369a', 'page_number': 1}, score: 0.46217621335930664)]",0.0,1.0,0.0,0.0,0.0 +What do the rivers form?,lakes,Nieuwe Maas,"[Document(id=c76f28ed2994796f7f3a5aeea9229958f03f574059e71701fc815116b1cba0e0, content: ' Since the Rhine contributes most of the water, the shorter term Rhine Delta is commonly used. Howev...', meta: {'name': 'Rhine', 'source_id': '23686e48a61504679a91dac10d83b287aef3dff7dabc0ab881c281059187ce6a', 'page_number': 1}, score: 0.47601402875176224)]",0.0,0.16586217284202576,0.0,0.0,0.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,ISO-8859-1,"[Document(id=e5a1e75e17e8253dc7c01b0468f9e5c5d8a150581ae96fc8d4a0f6136ae9d79a, content: 'Unicode is a computing industry standard for the consistent encoding, representation, and handling o...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.653812371157898)]",1.0,1.0,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,three,"[Document(id=ac63fedfc16f471fee9ece91ef2b1a184be66b48588208bdae014bc4b8fea7b8, content: ' Beyoncé was the third woman to have writing credits on three number one songs (""Irreplaceable"", ""Gr...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.48296640826682014)]",1.0,0.6790156364440918,1.0,1.0,1.0 +How many Examination Boards exist in India?,30,NIPER) and National Institute of Nutrition (NIN). Hyderabad has five,"[Document(id=2a60820f19599e70a11ffdf68cff487cf35e2e6c1fa7acfbb591eec5701ddc2b, content: ' Depending on the institution, students are required to sit the Secondary School Certificate or the ...', meta: {'name': 'Hyderabad', 'source_id': 'e032b0dc6babeab40ef746299b8fdda8c0f0f12976c0b900e8ed7ca60ce1802d', 'page_number': 1}, score: 0.4533602356519226)]",0.0,-0.009233260527253151,0.0,0.0,0.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=ad70291d4dec497bcd48d02a2bfbb4780dffd558cf0eee2d7c976f107f401335, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.5247203837647456)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,fight her,"[Document(id=084ce89bc02da15f20a402a3c5471b5ea5fdc1e021b5fb308f8758cd47a58cfa, content: ' +Hopkins' column also drew criticism on Twitter, including from Russell Brand, to whom Hopkins respo...', meta: {'name': 'The_Sun_(United_Kingdom)', 'source_id': 'f3e43af95385a83926c5f55e334fdf85194f862cc4b324d6cfc50b85c0244bbd', 'page_number': 1}, score: 0.4472458037405809)]",0.0,0.16980358958244324,0.0,0.0,0.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=faa03729332a5c07c1c9d6d87747d15af8b3043f87a9cff7389d9a1c23d97c7c, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.6398172942259656)]",1.0,1.0,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,1155,"[Document(id=c6a4d846401a00cf0b7f7a7d4778c810ed368749fb552c96d9809a207a1269f3, content: ' His successors continued to struggle against the papacy as well as the German nobility. A period of...', meta: {'name': 'Middle_Ages', 'source_id': 'f13b345e20c54b6cc55363080d7492474df62715717172aa9f1029f286010ad9', 'page_number': 1}, score: 0.40635490627476634)]",1.0,0.37948405742645264,1.0,1.0,1.0 +What two methods can be used when using Load Testing?,data or a large number of users,large quantities of data or a large number of users,"[Document(id=dd704f4bc758ab475db02844a8a24263dd4fdb32afbc7f7dc86b810f8fa62e4f, content: ' +Load testing is primarily concerned with testing that the system can continue to operate under a sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.6185313518586624)]",1.0,0.9246095418930054,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=0e27e0ca72a74505db588413835ac7002514412100758a45474267af68c71a90, content: 'An Internet service provider (ISP) is an organization that provides services for accessing, using, t...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.5086086563804515)]",1.0,1.0,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,land forces,"[Document(id=2e4b3b475a65a6397953a11388e7fa166873ff680f98b6e7464deb399963430f, content: ' +In the US Air Force, the distinction between bombers, fighters that are actually fighter-bombers, a...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.4276380085436715)]",1.0,0.6414642333984375,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1854,"[Document(id=32391672352be24938ffc4c30ab31229375147f89f81aa7ba80cc10250a1ac05, content: ' Aside from a minor skirmish at Constanța there was little for the allies to do. Karl Marx quipped t...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.5119894655681422)]",0.0,0.42288899421691895,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,Private and personal worship,"[Document(id=28a81868c1313d070db0571acd383d74fc733b1b8a6ab4356ba3f8f9fcf74d1e, content: ' In the campus, the youth assembled to play and exercise, which included jumping, wrestling, boxing ...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.6164710395064059)]",0.0,0.2722780108451843,0.0,0.0,0.0 +What is one example of failure testing?,Software fault injection,Software fault injection,"[Document(id=6a27e4d4d04355954ac79528dacb365ee655dae450d99f9b5d6cd61e557bc994, content: ' Specifically, it seeks to uncover software regressions, as degraded or lost features, including old...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.46880236044773604)]",1.0,1.0,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,home-grown players,"[Document(id=0d02d0727bbf3d538faf4d0e4b493fb1a575377aa15b678ef4c8a1fa1646ad91, content: ' By 2009, under 40% of the players in the Premier League were English. +In response to concerns that ...', meta: {'name': 'Premier_League', 'source_id': '369baa9a5a854704f16fadd35144d3a3150cf1e9ae1671d9b50a3448547b814b', 'page_number': 1}, score: 0.48538441845429137)]",0.0,0.2501821517944336,0.0,0.0,0.0 +In which year was the new constitution ratified?,1855,1884 that produced a constitution ratified 3:1 by Montana citizens in November 1884,"[Document(id=10085273dcf2e8add6fd060b7e5ee171cced6c2e779468105fa3f236158226ca, content: ' Surveys in 1874, 1875 and 1876 helped spark the Great Sioux War of 1876. The transcontinental NPR w...', meta: {'name': 'Montana', 'source_id': '556dc408ae9a8cf251c8082c455043f3c354c7457c6fe7793c307babf666b651', 'page_number': 1}, score: 0.485443964150987)]",0.0,0.2675617039203644,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Naiad,"[Document(id=c450c10c9cc7ec2362a5a8c23142549f03d77146379f9bb96ab788f8c91887e3, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5314080714125682)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=afdbd08089615eb2bbb9dd9a5945a7e094c57e1f63a4d1f083e6183556a9f3e7, content: ' Moreover, Yale has been accused of failing to treat workers with respect. In a 2003 strike, however...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.46579983807248415)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,alternative variants,"[Document(id=74c523cc7b77506967566fb8161bf4043d30ccf8c68e884af6cf2b9d24106e71, content: ' Conversely, plants can have extremely large genomes, with rice containing >46,000 protein-coding ge...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5387776043980725)]",1.0,1.0,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=d5a42bacf8a08d601a973e5bad25b36c41015f76de0d512f9c3130d5e87ae4de, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.584297128771873)]",1.0,1.0,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,Ilion,"[Document(id=bfdb2859c4aba7b42fa442a55f6ab1b3ece70f89cdc51e7d48df4e96e7091e95, content: ' +As god of colonization, Apollo gave oracular guidance on colonies, especially during the height of ...', meta: {'name': 'Apollo', 'source_id': '2eaf6f8c7cfd741cc2ece4f65e91b75ad204489bacd14efebc4697a9395dbb00', 'page_number': 1}, score: 0.5127319887009952)]",0.0,0.06766562163829803,0.0,0.0,0.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,39.1%),"[Document(id=fc88c82bbe8a5d66ca954ba928957f174fc7fd55fc1c67f366e483ee9b7f3159, content: ' Theodore M. Hesburgh CSC, President Emeritus of the University of Notre Dame. The institute has con...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.47693589966042116)]",1.0,0.6455903053283691,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,24 October,"[Document(id=67d402d488ad87fcda2d30f304cebc3d3ab52520c994807e5b06e892c3a0e1e1, content: ' The carriers had only 108 planes. The force was not spotted by the Allies until 16:40 on 24 October...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.694417436054523)]",1.0,0.241010844707489,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=9c5e4e5ebe04f6d60e8e13bc3cfe5cd0c8fa7c41c1148c916c472a26ca7c677b, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.5942357834131707)]",1.0,1.0,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,imperfective,"[Document(id=b086dbc67b80e6f26b1e823c6ec440f8f645661a3c528184598912bb2cd8d10b, content: ' On occasion of the publication’s 45th anniversary, the Croatian weekly journal Forum published the ...', meta: {'name': 'Serbo-Croatian', 'source_id': '2cc55814bc801258e8300ed82e003428dbf11715c74ad99297d61c5dd77023f4', 'page_number': 1}, score: 0.43658414340153945)]",1.0,1.0,1.0,1.0,1.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=25a9235e2187510deb110e2fe84f6e0f7a2dbacbbd0bd491e44e8a83b3be8cbf, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.6185783377431922)]",1.0,0.8350290060043335,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,excited form,"[Document(id=f649524cdd28fbe2ba5a3cadae832b66d8fef791e1b4613b5d6705e7ca2b49a9, content: ' As a plasma, hydrogen's electron and proton are not bound together, resulting in very high electric...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6261915857181519)]",1.0,1.0,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,child soldiers,"[Document(id=d0c37a34ef187eefa48aed48ab2f9ed0e96f3a67771e8a88b8c3da762a3a37fb, content: ' According to Samantha Power, a US delegation raised the issue of child soldiers with the government...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.49752164214332073)]",1.0,0.6079859137535095,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1930s,"[Document(id=db4ceb7a496a06e0d9cc63e221e6c8d2205d64c499149de6aa8c41e851b9cd5a, content: '3 billion in 2015. +Arsenal Football Club was formed as Dial Square in 1886 by workers at the Royal A...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5057590998140661)]",1.0,0.5979937314987183,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,The 1850s and 1860s,"[Document(id=59b11e1f477d6345b55d7c631b6690f8efcc7e8aabb31697c94914dd9e534a7c, content: ' On 30 August 1835 the party disembarked and established a settlement at the site of the current Mel...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.5643502397335578)]",1.0,0.5336307883262634,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:5.csv new file mode 100644 index 0000000..be8ab03 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-MiniLM-L6-v2__top_k:3__chunk_size:5.csv @@ -0,0 +1,117 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=99729d5c09ec68a18e6cb9e326f59027b5c69bc74e05e8c8c0d825360370bf5e, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5457362610814793)]",1.0,1.0,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,August 1943,"[Document(id=1a1a81964a22093ecafe5f8c0981cba27f2310f646ee184fd2e0949a10a9eca1, content: ' The United States Marine Corps landed on Funafuti on 2 October 1942 and on Nanumea and Nukufetau in...', meta: {'name': 'Tuvalu', 'source_id': '4a4e309bc4b63f2a542d80bca7c9d33d9df6ccfa328612b01483ad5d2101621d', 'page_number': 1}, score: 0.3552591171817888)]",0.0,0.27104127407073975,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Coventry,"[Document(id=8fc950b88e17fb84bfb26ca1cf8567786d02b1c73face329b95c2f7c7e1dda1b, content: ' ""Pathfinders"" from 12 Kampfgruppe 100 (Bomb Group 100 or KGr 100) led 437 bombers from KG 1, KG 3, ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5286291667203107)]",1.0,0.4347962439060211,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=e5cb80c1093f573ce5c1d255322723cb12056865ea80e45b815e3467be2f33fd, content: ' It maintains several distinct cases: the nominative, accusative, genitive, dative and (vestigially)...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.5479583834144631)]",1.0,0.9999998807907104,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=f628cb30d18c1374b4351a233133ea8649eb78c607439c749e11f99f56c9bbac, content: ' This guidance is generally offered during a ceremony called the patriarchal blessing. +In Hinduism t...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.4727434670791851)]",1.0,0.20152541995048523,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=927ee26533dc7f17be50a4cb7fc74bed43a4bc44825d916333e6dd9c5d18c958, content: ' The Greek league, the A1 Ethniki, is considered one of the top volleyball leagues in Europe and the...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6800230578684324)]",1.0,1.0,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,four,"[Document(id=52a0fae35f124062c7009aeb57cfa6fb15f201644550104b1c1a83c6f0449b3a, content: ' This season's judging panel consisted of Randy Jackson, along with Mariah Carey, Keith Urban and Ni...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.6542733508503982)]",1.0,0.9999999403953552,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=05e7c56a6da1ed703309ec6f627bc69c369ec51519a9e5eb8c442673c62365e4, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.7197798569813239)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=4437c778daec446c4362623ed3261e5ec4170314f4aa14f8381b8c7ee6eef3fe, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.7362191709321027)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,Bremen and Bremerhaven,"[Document(id=ac9d347e11b4785380e74afe9cd32fa785d7cbce65e3fb57700d59e39d13341f, content: 'Germany is a federal republic consisting of sixteen federal states (German: Bundesland, or Land).[a]...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 0.5273838915442326)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,"Unicode are rarely used Kanji or Chinese characters, many of which are part of personal and place names, making them rarely used, but much more essential than envisioned in the original architecture of Unicode","[Document(id=342c330a27b48d875c7faab6489e31c23f8175b42e50bc55435524888ab8e70f, content: 'g., Egyptian Hieroglyphs) and thousands of rarely used or obsolete characters that had not been anti...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.5761358435107942)]",0.0,0.01901988312602043,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,federal,"[Document(id=be1e7e4a69e49687682a8ca0c606792dd44baba4b9d0f7e7871524099ae07fba, content: ' The second-largest mass execution was also a hanging: the execution of 13 African-American soldiers...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.5800851716370473)]",1.0,1.0,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,rank amateur,"[Document(id=b0360cb0e79aa53fce9b6b987065d38127ef8c7392ded59d294e940b3fc8c15b, content: ' For Whitehead, education should be the exact opposite of the multidisciplinary, value-free school m...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.6416938602715427)]",1.0,1.0,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,Geithner,"[Document(id=72219d05f3a5eb2742c24339f535f703a1e2eab90cf567e58e7fabed15a9bd26, content: ' Geithner. Future minting of such coins will be made solely for collectors. +The Constitution provide...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.3853987952901218)]",1.0,0.30465614795684814,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,south-west,"[Document(id=a92a5ad62beb509c68bef27d232b583200e61a189db7823ad4d761f42e53ddc5, content: '1 cm) would lie at any one time due to melt. Over the 1961–1990 period, annual snowfall accumulation...', meta: {'name': 'Plymouth', 'source_id': '537a26c6a7e7589263422cb8b9e6b2a351bf563a6dcb44c80db326979f7a2b60', 'page_number': 1}, score: 0.5886305078512428)]",0.0,0.594167172908783,0.0,0.0,0.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Innate immune defenses,"[Document(id=a18e0058b343b5da0485ed348b299343056dca832078a909b95578e3a8fd7eb7, content: ' Conversely, non-self molecules are those recognized as foreign molecules. One class of non-self mol...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.6306169395074717)]",1.0,0.6429639458656311,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=2abfa717cced74cfc35f5836efd511c9dcad606c2013e9cf62940c0910458753, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.5503058004670902)]",1.0,1.0,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,Computing,"[Document(id=69167e0ce0c5b24f9a9e3827fc8644256bf41ebe37320544c994fc892aac55b6, content: ' In terms of specific course salaries, the Sunday Times ranked Computing graduates from Imperial as ...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 0.48353147038749955)]",1.0,0.6075230836868286,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1957,"[Document(id=e20106e1401ce2522cd3d14d13fae9ea782170d84e4c3db914247513b098ce09, content: '.. a discrimination in the expenditure of those funds"". +Eisenhower told District of Columbia officia...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.6884836139820915)]",1.0,0.703947901725769,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=d89cdeedbc8441f4e2c52a3575e8c530fcce225561404b37c7cdd108d1cc549f, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.7343823718020364)]",1.0,0.8085941672325134,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,lower levels of growth,"[Document(id=e62fe349ee4e73a3572e9f63b19d92af4d05f2e4ba5ba37814049ed91dfeb15b, content: '"" +In 1993, Galor and Zeira showed that inequality in the presence of credit market imperfections has...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.47991742167236073)]",0.0,0.13175535202026367,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=720eb3795a623e31335108bcc29a4b774e33272bb625d5968983846dd3b35f68, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.7553394085608023)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=cf59c0f0619e715b979051e0a84530bf73463713fa11f2961866d233ac2537f5, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.6271917279684331)]",1.0,1.0,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,the Antes and the Sclaveni,"[Document(id=47d265d7c6c10ca0d6f93b7f8d3f5cc58da5eeb0104c993e0a00be57db28de17, content: ' The Old East Slavic Slavuta for the Dnieper River was argued by Henrich Bartek (1907–1986) to be de...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.626136363966151)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.",four summaries,"[Document(id=f86f4f0c6b96ee3b686893cc41683ca450066b225884d3390504ef3124153018, content: ' +The SASO consists of 12 questions. Six of these questions assess sexual attraction, four assess sex...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.5156582100443352)]",1.0,0.5629022717475891,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,literate and upper class,"[Document(id=d25b410d7ee734d39ca37d5429e9393300e12f25ab3ed13f9a91eb5740bf305f, content: ' Due to Eastern Lombard's status as a Gallo-Italic language, an Eastern Lombard speaker may, in fact...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 0.6070526656029314)]",1.0,0.7113619446754456,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,smaller-calibre ordnance,"[Document(id=0578917295f43992a211ded11b78bf62d55a950a27b45fde24ab1eabb2df79e0, content: ' Their needs could cogently be met with smaller-calibre ordnance beyond using the usual singly-mount...', meta: {'name': 'Anti-aircraft_warfare', 'source_id': 'c54639424f62ce369356ed8c4e1e9fab8769359dd28029aa89832f5aa994f45d', 'page_number': 1}, score: 0.4206405801057249)]",1.0,0.10714080184698105,1.0,1.0,1.0 +What is that power that enables us to love?,Prevenient grace,universal love,"[Document(id=a78f83359689c5324d38fe66319b2e022231f3eea49709cfa6adc4ff161463f3, content: ' Love led to spiritual growth that, in turn, reflected an increase in love for God and humanity. Thi...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 0.4322410896201257)]",0.0,0.33382272720336914,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,eunuchs,"[Document(id=d97173c71bdb99d0b5d605d7f0a437d175c10a44abace4a16982f9d450595223, content: ' 189 AD), half-brother to Empress He (d. 189 AD), plotted with Yuan Shao (d. 202 AD) to overthrow th...', meta: {'name': 'Han_dynasty', 'source_id': '060d95b91f9c20e5c8cd8b8a032ab44aac38ab88ce62d3c302bea0865f533568', 'page_number': 1}, score: 0.47388412521933254)]",1.0,0.23481109738349915,1.0,1.0,1.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,introduced sharia,"[Document(id=bdc01f662020adb880737d79c53bc6a192c293e64aaacad9c0bca0ebff7ff92d, content: ' Intent on pushing Libya towards ""Islamic socialism"", he introduced sharia as the basis for the lega...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.5802873287113512)]",1.0,0.7288561463356018,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",heir presumptive,"[Document(id=d84cede505487aab2b7f8ca81ca6dc2ec7e647562d9fcef338a4a7bc8b2c2e2b, content: ' She was educated privately at home. Her father acceded to the throne on the abdication of his broth...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.6316186438786011)]",1.0,0.16207502782344818,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,UN intervention and US pressure forced Britain into a humiliating withdrawal of its forces,"[Document(id=d09ab756c06e4e2126fc0655780e94923b99c11b88bd24e4b791726d7b43a296, content: ' Another of Eisenhower's concerns was the possibility of a wider war with the Soviet Union after it ...', meta: {'name': 'British_Empire', 'source_id': '8f673e96ef153333a8fc1d3855be60f5be1829316ed1fcb58dd8590fdb083946', 'page_number': 1}, score: 0.5456397414690248)]",0.0,0.13994494080543518,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,5%),"[Document(id=85479ceb6ee7474eee57b17473c24a59db101c6d892e6eb76c94e6a327c1f337, content: '5%) Native American, 96,405 (6.3%) Asian (2.0% Chinese, 1.2% Indian, 0.9% Vietnamese, 0.', meta: {'name': 'Philadelphia', 'source_id': '171d1ade27657b3e3fccd29b91877e3084f29a24e344651181523e2acec7739a', 'page_number': 1}, score: 0.5113436268756754)]",0.0,0.776621401309967,0.0,0.0,0.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,William Paley,"[Document(id=1de1b94fbb49d9d6bbc2cdcf71ef4f81cd3ec13b95fe79865c57979200ac9095, content: ' Lamarck thought there was an inherent progressive tendency driving organisms continuously towards g...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.6407730208950675)]",1.0,0.2994629740715027,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,database administrator,"[Document(id=4bd715bf27179ff827b2c0c78fe9be74c7197adc66c8c14455422d407b249958, content: ' When this state is needed, i.e., when it is decided by a database administrator to bring the databa...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.4096253190251735)]",1.0,1.0,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=a3a017724e8f025ada3a4a92e98993ec44b17e0941134ce7e52db3191cd3ea08, content: ' He learned Indian arithmetic from an Indian greengrocer,ءMahmoud Massahi and he began to learn more...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.665399959042628)]",1.0,1.0,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,With Burma preoccupied by the Chinese threat,"[Document(id=a8730bd7201f8a080d3cffb4ba6d02e7cc82f409fe226dfe6987d18adaeb8596, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.542541483221691)]",1.0,0.7524588704109192,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=8825405ebdc1bb4daf623185ae693fe4f2cab9da4dcca330ccb2ab3f6b05d6e5, content: ' The modern cognates of original words have been used whenever practical to give a close approximati...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.7110396889326562)]",1.0,0.9999999403953552,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=f1fa43fc9c1c5bd46b9f5455701bf99b0e98d4579816d755389bcc2dbba1bcda, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.5088500802288218)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,the Stern Review,"[Document(id=333c4da3ee155b7d7b4ddf91857cd1ec4e1c3cafb4159ebedda18347755b3f1f, content: ' In the US the main tool to track and stimulate voluntary actions is Green-e program managed by Cent...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 0.5063254824651507)]",1.0,0.9567214846611023,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=e8da082f5022b804801ddb2778f16c32226f0ecb59f3ed7f8fcdae17653f162c, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.7250091339820998)]",1.0,0.9999998807907104,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,The Cathar Crusade,"[Document(id=08215551b881c7cefad5bb5baaaddc4d6f9a5802a709d61e32985651e2115c85, content: ' By the 11th century, more organised groups such as the Patarini, the Dulcinians, the Waldensians an...', meta: {'name': 'Heresy', 'source_id': '8ba473262f8508a9c99699bb09cd022993896ece1fed1463a9cf4bd7eb0c3e68', 'page_number': 1}, score: 0.5927295914277724)]",1.0,1.0,1.0,1.0,1.0 +What music service is a huge competitor for Tidal?,Spotify,Rhapsody,"[Document(id=5d9e051089794041608912607c441ece721f90a2d16dfd9c9daf30030cabea40, content: ' +Unauthorized MP3 file sharing continues on next-generation peer-to-peer networks. Some authorized s...', meta: {'name': 'MP3', 'source_id': '897f83fee23a44ef089f12ecec47310c2a429f933b038ca6584b19da53d1307d', 'page_number': 1}, score: 0.4703211745807585)]",0.0,0.4572443962097168,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,$100,"[Document(id=1d82522f504285e7d6da03df31727546ed52678efdb9599878b409e6371fa7d4, content: '599⁄10. When currently issued in circulating form, denominations equal to or less than a dollar are ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.523248963331033)]",0.0,0.22496077418327332,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,Glencoe and Sea Nymph,"[Document(id=73cc59555437274633bfd424f9225eaa5b55cf814b3bbb82c89b0d9357a659f0, content: ' In 1852 the San Francisco-based ships Glencoe and Sea Nymph were attacked and everyone aboard excep...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5423896434650102)]",1.0,1.0,1.0,1.0,1.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=22e5680a1c41aa000971ff89fccd80f808c8f88ae7d0f5c0d81eb763ce026955, content: ' Unlike Archaeopteryx and the non-avialan feathered dinosaurs, who primarily ate meat, recent studie...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 0.5813249416305144)]",1.0,0.9999998807907104,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,the modern evolutionary synthesis,"[Document(id=54fb8e44f718e454d5f4aef7097036553aa91b829fc2ea08644a1f9fc17e50f5, content: ' +The theories developed in the 1930s and 1940s to integrate molecular genetics with Darwinian evolut...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.6825508197600287)]",1.0,1.0000001192092896,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,eight o'clock,"[Document(id=5dfd5078daafbc2ef3695636e6d70b42db2abc79ac389c20eac0417969714c11, content: ' Tesla would telephone his dinner order to the headwaiter, who also could be the only one to serve h...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.7299236291347593)]",1.0,0.5878220796585083,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1789,"[Document(id=9684be592dc60ece166ecacdd76c443b9c9ab890c4097e28f83c62b1b84e5397, content: ' France did endeavour to promote Catholicism; Strasbourg Cathedral, for example, which had been Luth...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.6567755301901245)]",1.0,0.2011210322380066,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,censors,"[Document(id=3749ef6e14ea9af3893cfd84917e3a39acc2212480fc846e5a9231414a5599e6, content: ' Praetors administered civil law and commanded provincial armies. Every five years, two censors were...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5462632729451639)]",1.0,0.8454219698905945,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=80f5e2e80143434aa6d2d78a16f411f7f6ab4f8ffffbd29914ad0c75e8bf31da, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.5523669903863153)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,John XXIII,"[Document(id=dddf03b5e89f37a3f4e39706bd8e14f1268e93fe1d6691b53cebb466faffbabf, content: ' This was the first time in over 500 years that this name had been chosen; previous popes had avoide...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 0.5775477784756959)]",0.0,0.4223441183567047,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c983e8f9fccac478cd7d895c53ca7636f4291c3a87693dd68246b03e75a4be42, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.6392372140463647)]",1.0,0.9866578578948975,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=f0adc94ea827dd57e6a4ed0151d80a56b2b699e6457dfd794787f7d9f9121082, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.521156964371783)]",1.0,0.02006305381655693,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Israel,"[Document(id=e3eb8d73377a55465b398afb7b3b16c9d73a20ea7ff944883d6ce9f984b32ec6, content: ' Nasser refused the call upon determination that the air force lacked pilots and Amer's handpicked o...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.5755809019449938)]",1.0,0.28454625606536865,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,Quraysh tribe,"[Document(id=7fe3d60e77356f23e0d8721ed8155f6235831447e86e68263318c414add29bce, content: ' +After the assassination of Uthman in 656, Ali, a member of the Quraysh tribe and the cousin and son...', meta: {'name': 'Umayyad_Caliphate', 'source_id': '704c4038f63f9c5a686218060cb31639c0333dda34e87e804be75e5fec05ec53', 'page_number': 1}, score: 0.37085195047316144)]",0.0,0.030898451805114746,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1999,"[Document(id=ff4e5f9b67b28efe1d784938ea3eabd7391e59030ee2fef37239be9884eafceb, content: ' In 1999, the company agreed to pay a $250 million settlement in connection with claims it polluted ...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.46391543194301055)]",1.0,1.0,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston-North Charleston-Summerville Metropolitan Statistical Area,"[Document(id=d977a4cc7a4d3b3c59cbe399796de677701ac0e1ffb496b4eac87bdcb8e5aefb, content: '4 °F (20 to 22 °C). +The Charleston-North Charleston-Summerville Metropolitan Statistical Area consis...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5197195559087473)]",1.0,0.3598399758338928,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=2dc3b5cfb05bfeb4e2c0b072a5a5e6417b1109588ffcbd88ffd03346c8bc1975, content: ' In France he used the French versions of his given names, and after receiving French citizenship in...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.7137269057215699)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,steal the invention,"[Document(id=4d3444ecb9a546c18669ac8c8405c5f0787407509b989960d32c3dae162fd6f1, content: ' Tesla tried to interest the US War Department, the United Kingdom, the Soviet Union, and Yugoslavia...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6633406928003599)]",1.0,1.0,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,cordon sanitarian,"[Document(id=cb8ff1f80bb8afd12da5018a30a88e68a05aa1e3af1e27035c696cded3739c3e, content: ' When a party achieves enough seats in an election to form a majority, the party's frontbench become...', meta: {'name': 'Political_party', 'source_id': '476a9215de81b75a4f17737eed1679570973e55b5ed6175aa0ec1014b09d4632', 'page_number': 1}, score: 0.44697301723543903)]",0.0,0.23455810546875,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Old Corner Bookstore,"[Document(id=eaef61513b212f4992997f625d63d4d26977ecf06a770cd2fa38fcfdabfdb986, content: ' Irish Americans are a major influence on Boston's politics and religious institutions. Boston also ...', meta: {'name': 'Boston', 'source_id': '44b3642e0403a5651f6e670019c559e255665af6d80d967a0700be9cc365a0bb', 'page_number': 1}, score: 0.4665390735579297)]",1.0,0.9999999403953552,1.0,1.0,1.0 +Time and space are both examples of what type of resource?,complexity resources,"operational definition of time, wherein one says that observing a certain number of repetitions of one or another standard cyclical event (such as the passage of a free-swinging pendulum) constitutes one standard unit such as the second, is highly useful in the conduct of both advanced experiments and everyday affairs of life. The operational definition leaves aside the question whether there is something called time, apart from the counting activity just mentioned, that flows and that can be measured","[Document(id=849ac9a1750eb7540fb92137ca4256c703db879b81e07bec1f34d0071ef5b30e, content: ' An operational definition of time, wherein one says that observing a certain number of repetitions ...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.5233312924187772)]",0.0,0.17993292212486267,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",He also placed a corps of his own troops under Frederick's command,"[Document(id=ba197620bd7dce47cd5783da779f1718bab82d5ef9f7b4a77e908fd7397ad72a, content: ' Then on 5 January 1762 the Russian Empress Elizabeth died. Her Prussophile successor, Peter III, at...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.726811910218078)]",1.0,0.28628912568092346,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=a3b415fe93ba5b2bfab1307e12a6c3d0b910a94f573fedacc0fe6560757ac075, content: ' In total, Dell announced the ending of about 8,800 jobs in 2007–2008 — 10% of its workforce. +The co...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.6221438501655498)]",1.0,1.0,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,consuls,"[Document(id=e80397be99a173e16bbb388b99296740ebe497315e26491c8927ee1904e1464c, content: ' Consuls had supreme power in both civil and military matters. While in the city of Rome, the consul...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5694500732954679)]",0.0,0.3664471507072449,0.0,0.0,0.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=10406721e1bacc4b94568efb8105444ce33c23b507417efe2e032c93615c0677, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.7174902149566916)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=694b7ad0ec30f16563c095bfd8aa73a286298746fa8b5b6b568961e806fe7462, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.7181772030878829)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Rome,"[Document(id=39971eda0fac39993fb2ad01159df52ea10276610c72fad19cdc5a8d781c70e0, content: ' The earliest and most credible surviving source for the Hellenistic period is Polybius of Megalopol...', meta: {'name': 'Hellenistic_period', 'source_id': '322682cbb61d31c17e6d21e06723e94efd0ef242a5c9f308888fa14209c4078b', 'page_number': 1}, score: 0.515861487810237)]",1.0,1.0,1.0,1.0,1.0 +In what decade was disco popular?,1970s,1970s,"[Document(id=901a9f1636faf9035626a9559750c7d346d38dbd3d60a92fc29a9aaf68f78dc7, content: ' Other performing arts venues in Miami include the Gusman Center for the Performing Arts, Coconut Gr...', meta: {'name': 'Miami', 'source_id': '39ab1ce422fd3baeedbb69a9a90b4c0e43c513f6161698478a96f09e5d9f7285', 'page_number': 1}, score: 0.5381765557556315)]",1.0,1.0,1.0,1.0,1.0 +What do the rivers form?,lakes,Nieuwe Maas,"[Document(id=2c02265ed1b53d17cbf8d8090f6a3dad995df9f6a8bbe12e06c780368660607e, content: ' The middle flow begins as Nederrijn, then changes into Lek, then joins the Noord, thereby forming N...', meta: {'name': 'Rhine', 'source_id': '23686e48a61504679a91dac10d83b287aef3dff7dabc0ab881c281059187ce6a', 'page_number': 1}, score: 0.5730175210990665)]",0.0,0.16586220264434814,0.0,0.0,0.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,historic scripts,"[Document(id=df3636934a87dcaa1aa37c6146a219c020357b983841dcebcc4edf19d3146c4a, content: ' For example, the ""fullwidth forms"" section of code points encompasses a full Latin alphabet that is...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.7118722829389936)]",1.0,0.08416931331157684,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,20,"[Document(id=7bd8798cd87379643365f7305e27ccc9e3b89266b2ba91439f47a12218f428ba, content: ' It has so many depth and layers that it's easily as self-aware and earnest as Ray of Light."" The al...', meta: {'name': 'Madonna_(entertainer)', 'source_id': '0868d847bb768c30a7507239169d5e441bb3181b66c8b9353a863daa3d58e425', 'page_number': 1}, score: 0.5071568670430149)]",0.0,0.5910412073135376,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,30,"[Document(id=87a93f72cbb9c00ea6477b2a31d21c284a2ecf07733f93b0fb4ba7b277f5b8f5, content: ' It is within the power of both the union government and the state governments to govern schools sin...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 0.5949905096450057)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=99743eb7e7277e11f539b5449857c5f012a842facd618f6a75ff178a89fc1f76, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.5387064888379755)]",1.0,1.0,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,Your Majesty,"[Document(id=6661c5b42cee4d9122296712db8297ce21db50225e9f1d2cad1e056d68ca32c8, content: ' Additional styles include Defender of the Faith and Duke of Lancaster. When in conversation with th...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5861381499007268)]",1.0,0.3193480372428894,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=ff2ec1a3232f7d74c84b198defa77dd9879cdb65474a7dab2bf47da19675285e, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.6258372807721067)]",1.0,1.0,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,871,"[Document(id=6f9dc46d6eac9c736c44b2255083b35d238c93547c3efcd0f85d83a3feef71b2, content: ' The speech of eastern and northern parts of England was subject to strong Old Norse influence due t...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.5426833367318926)]",0.0,1.0,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,load or endurance,"[Document(id=adc4b66570c6ed9d13c88909bbdc957a40b544797d95742201330710a6ff6385, content: ' Stability testing (often referred to as load or endurance testing) checks to see if the software ca...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5186147019911518)]",1.0,0.22238588333129883,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=5f8dc7264853d3b0b88a03420e7878e16fa4c08ef70ab7cc5d722e889b21dda6, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.6102544598734859)]",1.0,1.0,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=87ffdf42e04cd4e5aabc33e35caf9614ba61758e355a1d97a38a524ba0bedf46, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.7377392492190507)]",1.0,1.0,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1853,"[Document(id=5e62f2afbd35341eab44e01cce599dfb08f5845fb82d0fb96bdbebed584d740b, content: ' As far as Britain was concerned, defeats inflicted by Russia on Persia and Turkey demonstrated its ...', meta: {'name': 'British_Empire', 'source_id': '8f673e96ef153333a8fc1d3855be60f5be1829316ed1fcb58dd8590fdb083946', 'page_number': 1}, score: 0.6239564400308288)]",0.0,0.462875097990036,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,a part of daily life,"[Document(id=064c7c3a01bdf93ad9d4d1654337277537a969dc5714be1920555a00340b215e, content: ' Even the most skeptical among Rome's intellectual elite such as Cicero, who was an augur, saw relig...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.6812059913435389)]",1.0,0.26575636863708496,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,Software fault injection,"[Document(id=827178e2fa13b1045cb5486ccca30dd5c2a87aa9e9f903cbdd2808ec1b93a098, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.6034773868609032)]",1.0,1.0,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,goalkeeper,"[Document(id=e6ee12359c832f972207465515e8a9b305bdf92628324bb99b5470cc0b0b74b4, content: ' The same laws are designed to apply to all levels of football, although certain modifications for g...', meta: {'name': 'Association_football', 'source_id': 'f2b7305b09ad8a43b8389a5eb2d84b8ea4b4deaa7968d21fe9da9da9ecda2c6b', 'page_number': 1}, score: 0.56937587493132)]",1.0,1.0,1.0,1.0,1.0 +In which year was the new constitution ratified?,1855,1982,"[Document(id=7d6e1c6a8e83b4b29ce4b75833fc74ff72c585bad0dc7e4ea9a862be027f8038, content: 'C. Statehood Green Party. The nearest this movement ever came to success was in 1978, when Congress ...', meta: {'name': '51st_state', 'source_id': 'aa3341a0a65518fe53b022b57fd32c865f74c249a1b4d32919c38bfd1cead5b7', 'page_number': 1}, score: 0.5897939832060628)]",0.0,0.5184707045555115,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Naiad,"[Document(id=06462bba1bbaf61b1e06ec260c9ebb15ac9f6984188e161293ac729b7950da5d, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5354329878787587)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=1eb83a50e062eec7af6abab3de1bf0b5576a4c75ac168cc64f1b77a2e268c579, content: '7 km2) Yale-Myers Forest in Connecticut's Quiet Corner—and nature preserves including Horse Island. +...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.6826308918386579)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,expressed sequence tags,"[Document(id=c181af6d9e4133e2db3dd3214c28363e87a8f250668a4fc4d9c06fd0060f35eb, content: ' Conversely, plants can have extremely large genomes, with rice containing >46,000 protein-coding ge...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.516086017715844)]",1.0,0.21117305755615234,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=3cdcefdaa320ecb0d100eef0c92063190f1030f98ee702385d5ad087a454afc6, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.6345208523493653)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,rational thought,"[Document(id=14435d1137e579f740d93ef8fabdf50640828e96d9dd732c7b8c6d4932ae6a58, content: '"" The theory was indebted to Aristotle's pluralism and his concepts of Soul, the rational, living as...', meta: {'name': 'Idealism', 'source_id': '55a0bd42a4a737442a2d93fe0926424e236910de01b2406ed43c174f2225ff9d', 'page_number': 1}, score: 0.6069175571756187)]",1.0,1.0,1.0,1.0,1.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,25%,"[Document(id=7b16015c2a711597133e8b72a34ec9ddbe7fa1d416026f927c41b10d82dfa759, content: ' While all entering students begin in the College of the First Year of Studies, 25% have indicated t...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.5862371777189133)]",1.0,0.7584383487701416,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,midnight,"[Document(id=8f9a474db1bbc2a41725828917a296999dd03c7fd6c62c0e1adf58a5a2f207f9, content: 'S. Third Fleet was formidable – nine large carriers, eight light carriers, six battleships, 17 cruis...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.6451543648987118)]",1.0,0.23394635319709778,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=4cbbc741471265b8757147f853168935ce795df102abf69b11f058149ff15eff, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.64135190830618)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,imperfective,"[Document(id=06048f854345fb7e2b6f12877040f57cf87279336165b7cb7893e29f87cc9222, content: ' On occasion of the publication’s 45th anniversary, the Croatian weekly journal Forum published the ...', meta: {'name': 'Serbo-Croatian', 'source_id': '2cc55814bc801258e8300ed82e003428dbf11715c74ad99297d61c5dd77023f4', 'page_number': 1}, score: 0.4510094094135552)]",1.0,1.0,1.0,1.0,1.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=158a6b8443733e42d24dec31180770da4724f3bd14ed49e47a937624f7aeed47, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.626275648011194)]",1.0,0.8350290656089783,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,an excited form,"[Document(id=e9b537adae4e88d5a591f33f2f06388da096013764cb94bfc4e3ea25bb2b2e23, content: ' This charged ion has also been observed in the upper atmosphere of the planet Jupiter. The ion is r...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6153574752057729)]",1.0,0.9592359066009521,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,child soldiers,"[Document(id=30c864dfe7ee098b254f1517149763909eae40b70a74f63f84fffd7a77fc3e1f, content: ' According to Samantha Power, a US delegation raised the issue of child soldiers with the government...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.53370525098552)]",1.0,0.6079859733581543,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=1bdd87144e8c05c0c8820e3bd6186deb63a4be13d5810e39af795993e4721bb7, content: ' The book was twice adapted for the cinema – the 1997 British film focuses on Arsenal's 1988–89 titl...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5961240467039464)]",1.0,1.0,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,2012,"[Document(id=811855915f2ce9785fa299ca9ab7e62030bd03c861b34240bfa6a8b83aafca1d, content: ' Air pollution, by world standards, is classified as being good. Summer and autumn are the worst tim...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.5440868087434042)]",1.0,0.2986200153827667,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:1__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:1__chunk_size:10.csv new file mode 100644 index 0000000..ae34b46 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:1__chunk_size:10.csv @@ -0,0 +1,122 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=ac8e9c286327aea24d00bceb002b08ad3fbc3007673c77da9b48b6b6b91aa76f, content: ' Some rites of the domus (household) were held in public places but were legally defined as privata ...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5545176647418332)]",1.0,1.000000238418579,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,1992,"[Document(id=362969eae3f46b97e578fb1b65143f839fc2a787fe322e1e83b8270c0ac09c94, content: ' Bush's 2007 European tour, Edi Rama, Tirana's mayor and leader of the opposition Socialists, said: ...', meta: {'name': '51st_state', 'source_id': 'aa3341a0a65518fe53b022b57fd32c865f74c249a1b4d32919c38bfd1cead5b7', 'page_number': 1}, score: 0.38915419615303337)]",0.0,0.34838998317718506,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Bomber Command,"[Document(id=67346696a72eb98fc01faf97de2c3a68cf161364d6413eefe068134d10fcf3de, content: ' Attacks on Birmingham and Coventry were subject to 500 short tons (450 t) of bombs between them in ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.600247916969115)]",1.0,0.2595972716808319,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=081239836dc280856dc2c1ab168f17d8fffffed12cbaeb50b62cd11b58a116b4, content: ' It maintains several distinct cases: the nominative, accusative, genitive, dative and (vestigially)...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6403058641147197)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,"low-church"" traditions of Protestantism","[Document(id=70acdc5f12009efe2ebd2dd786809c97eacf0dc441dc5d0ed86857bd7db13270, content: ' Also, with the advent of the internet, many teachers are now selling their lesson plans to other te...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.5329079416622291)]",1.0,0.1646253764629364,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=c6017a99c04926bab23928d5fed5517ccdf053e42b0a0376e97782a93ab5a6b4, content: ' The Greece men's national water polo team became the third best water polo team in the world in 200...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6852555389578893)]",1.0,1.0,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=17489faf6fb39343de67cc2bfe448e959559775109bb646aa0e4d02b716ddf77, content: ' Jennifer Lopez and Steven Tyler joined the judging panel in season ten, but both left after two sea...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.6149654088231247)]",1.0,0.8083884716033936,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=1c8a379dc092fa8d374927640e6d16f46286416581451c66a1da80fef1412acc, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.6642589447541861)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=1eed45d4c946ec058384afdd313db9b847d2b384629f1872466e03c7bc13a2e5, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.6168396798925952)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,two urban districts,"[Document(id=b99c33e3f1152b05cb402034099d36d6b8fc3942c6deb97d0de9d7f47d56c738, content: ' The equivalent of the Minister-President is the Senatspräsident (President of the Senate) in Bremen...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 0.5237965717088976)]",1.0,0.39612674713134766,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,Unicode,"[Document(id=1ca3328892ba24004a75d8cfaab0f58ce32bed8ee034be6fa54e33e8b0a62eb5, content: ' UTF-16 extends UCS-2, using one 16-bit unit for the characters that were representable in UCS-2 and...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.5571209200423095)]",0.0,0.1403539478778839,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,federal,"[Document(id=478de5bfd3be02e5795166dfaf36bf31f55ed01f66e8d8ff978214365dc0f179, content: ' Further, the Court's decision in Baze v. Rees, upholding a lethal injection method used by many sta...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.5154552807306718)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,rank amateur,"[Document(id=a022efa8ed739794e05008dbe44d9b83a87fc4f4b7a609ec096b9d437bc5089e, content: ' For Whitehead, education should be the exact opposite of the multidisciplinary, value-free school m...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.5968778075657488)]",1.0,1.0,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,borrow money on the credit of the United States,"[Document(id=9f240c943520891dcb4853fa0b77bb9da0c6144acd3f67acca4ff043b2823fb2, content: ' +Early releases of the Washington coin included error coins shipped primarily from the Philadelphia ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.5126320755658378)]",1.0,0.023210063576698303,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,California,"[Document(id=01bc00b0443b909364f804d3db4980f9c28a48894e7efe45a10b1d66bb4f6002, content: ' The uplands towards the Appalachians were included in the great eastern forested area, while the we...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 0.678538481868027)]",1.0,0.5433623194694519,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Adaptive (or acquired) immunity,"[Document(id=4f2fba1eb5467425d8e57ae2a4af5c24920de101064b738c048036a68ef3852a, content: 'The immune system is a system of many biological structures and processes within an organism that pr...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.6805778943142242)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Kyle Schwarber,"[Document(id=e7143fa77dfb51a3ba85804cccd3bf347ba3ba5227422dbc11b406b7a7ce7610, content: ' +On May 11, 2000, Glenallen Hill, facing Brewers starter Steve Woodard, became the first, and thus f...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.5046003190275887)]",1.0,0.5717676877975464,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,3.36,"[Document(id=8190e2f3cfba3c67fc319c1eba30ec986d8174be2decff7ed0bb12a364fb52bb, content: ' +The College's endowment is sub-divided into three distinct portfolios: (i) Unitised Scheme – a unit...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 0.49844352438506834)]",1.0,0.09059444814920425,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1960,"[Document(id=78d1760ca2fa0904e5480a91d388ba9d6c37d737a65426e5cbb2488333bc1e1a, content: ' Eisenhower made clear his stance in his first State of the Union address in February 1953, saying ""...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.6915934272370499)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=891cab83df456a1bb11d8e7c301e45b02e564d224d3be1f7bdb82d02edc8f221, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.66910672984007)]",1.0,0.804789662361145,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,Crime rate,"[Document(id=cfc35d312a7f860ff8772433762d5f27e606e10585399783dd129af1d4d74ed4, content: ' Other studies have confirmed this relationship. The UNICEF index of ""child well-being in rich count...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.5542598259336555)]",0.0,0.0639934316277504,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=56ff4bdb4f543c61c1f516ba1e544e83e117ab5512bae58b2264fa6b6d86f26b, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.740709470725756)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=5c5f6a0591170904a56f4c0417e667089761d8d8f013f55283aff164fdadd75b, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.5566146748558449)]",1.0,1.000000238418579,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,Veneti,"[Document(id=4236cbbf71506fe35f0da033ea9b465edf3cfbfdd8e6bac2b259b18835cc0408, content: ' +The relationship between the Slavs and a tribe called the Veneti east of the River Vistula in the R...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.6927475328468398)]",1.0,0.2406373918056488,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.","measures sexual orientation on a continuum, considers various dimensions of sexual orientation, and considers homosexuality and heterosexuality separately","[Document(id=04d7cfe710801483b2e5c7c67b89cf9966fc92203783ea424fccf90e1b51520c, content: ' The classification scheme works such that individuals in the same category show the same balance be...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.3825635282360615)]",1.0,0.6333848237991333,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,literate and upper class,"[Document(id=149c6390292392d4438c9191dca43bbe827c20a86bb7bfc502679813d7fcc108, content: ' Due to Eastern Lombard's status as a Gallo-Italic language, an Eastern Lombard speaker may, in fact...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 0.666760845084105)]",1.0,0.7628111839294434,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,ground based air defence,"[Document(id=02cb335221aaee4ff1ec0fda1bf1d119c70f55fc89a407c08c8d2b31e60cf104, content: ' The M1 version was approved in 1940. During the 1920s there was some work on a 4.7-inch which lapse...', meta: {'name': 'Anti-aircraft_warfare', 'source_id': 'c54639424f62ce369356ed8c4e1e9fab8769359dd28029aa89832f5aa994f45d', 'page_number': 1}, score: 0.39528168687505916)]",1.0,0.24992957711219788,1.0,1.0,1.0 +What is that power that enables us to love?,Prevenient grace,Love in the soul is both the cause and result of true understanding and judgement,"[Document(id=3b09da7999a543e2dd4e11715dbc298468750ad12bafc0459e403691c4c7015a, content: ' Thus, it is easier to state what God is not, than to state what God is: ""... we affirm things of Go...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 0.47203679374271235)]",0.0,0.31394249200820923,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,Caesar,"[Document(id=4f17ed621ff3df1e2ab21fa16d21495b6fcfc7630eb0a8ebe5cdfa2aa939ba81, content: ' This made his person sacrosanct, gave him the power to veto the senate, and allowed him to dominate...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.4484555871081836)]",0.0,0.18531587719917297,0.0,0.0,0.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,sharia law,"[Document(id=6d61cd8c91a3530f637646f434785421d170c3d742a9e1e779b8ad38390b3f4f, content: ' Gaddafi proclaimed that the People's Congresses provided for Libya's every political need, renderin...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.605111406774985)]",1.0,0.6366350650787354,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",Peter Townsend,"[Document(id=4b69cd784f30edcd0bcf343e88529300a3f5f46cab6549e05e32a9f6208375df, content: ' +Elizabeth and Philip were married on 20 November 1947 at Westminster Abbey. They received 2500 wedd...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.6031312122817887)]",1.0,0.17705275118350983,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,Minorca in the Mediterranean,"[Document(id=8f27b8606c29f864e870a0c3ebc173abdd66686eabf5bdc41d7e51673bbdbc31, content: ' As prime minister Pitt committed Britain to a grand strategy of seizing the entire French Empire, e...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.601896930875277)]",0.0,0.0038108360022306442,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,Paleo-Indians,"[Document(id=ca6350a9a8c38782a1850582e37efb990f91441f3354b7b61cf884e4720f16a2, content: 'Tucson (/ˈtuːsɒn/ /tuːˈsɒn/) is a city and the county seat of Pima County, Arizona, United States, a...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.5982970062118942)]",1.0,0.08434860408306122,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=67c87173e747d72d79bc92866e553902d56583aae14965ecaa3b2da507256976, content: ' After the turmoil of the English Civil War, the Royal Society wanted to show that science did not t...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.6990559439833335)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,software error,"[Document(id=6c8c749813cf91978d96464cce374f7a04d7464bb50d66f4cda7f08549bc9fae, content: ' It may be desired that also some aspects of the architecture internal level are maintained. A compl...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.4415903158428742)]",1.0,0.23398324847221375,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=cbebf9a70685ea0e72bcf8c9f1dbbe9b7163ee26279b633446edf34314600fb3, content: ' Janssens demonstrated that Avicenna was a Sunni Hanafi. However, the 14th cenutry Shia faqih Nurull...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.7413625193934965)]",1.0,1.0,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=f08f17188f751088b2b76f2c1a69ffc86f340979f05ea84b4b201def163ec6f3, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.5241313195161235)]",1.0,0.10474255681037903,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=568b9944b8e99493b93780736bc40a387965c8d30cebea94a19881b28bc86012, content: ' More entered the language when the Anglo-Saxons were converted to Christianity and Latin-speaking p...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6607325652412197)]",1.0,1.000000238418579,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=c54ac19342651f97c105c4825d0b82cbf05684cf1d7c4921e0d078a7212b4a21, content: ' A common type of embezzlement is that of personal use of entrusted government resources; for exampl...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.49411751006790716)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,Clean Edge,"[Document(id=639620812442614c676be21cf936d052650009771bc4d08df98e8056e4441e38, content: ' +New government spending, regulation, and policies helped the industry weather the 2009 economic cri...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 0.5498180514882574)]",1.0,0.10936959832906723,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=de63707a025645c7fcc7a26e6dd37ddd29a27cd60ff187ce487e0d07217708ee, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.8069591810257106)]",1.0,1.0,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,The Cathar Crusade,"[Document(id=3f50079b5ebfa4f0c771fad96712717b3bd3050f02fa3abfb19f87bc049689e1, content: ' By the 11th century, more organised groups such as the Patarini, the Dulcinians, the Waldensians an...', meta: {'name': 'Heresy', 'source_id': '8ba473262f8508a9c99699bb09cd022993896ece1fed1463a9cf4bd7eb0c3e68', 'page_number': 1}, score: 0.4953807364007933)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=f0c6a5cd5408a0fc01c220e5910c03a2241fbe85099410f518592c1fb7098441, content: ' ""Creating a partnership with Beyoncé, one of the most hard-working and talented people in the world...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.5224108713853148)]",0.0,0.9999999403953552,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,legal tender at face value,"[Document(id=c5c8d55b2c3952d9e29c10530dc199fccdd3bfb5dc897ac40dd8ff1d01e6a97f, content: ' This has to do with Alexander Hamilton's suggestion to Congress of a fixed 15:1 ratio of silver to ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.44825442543903155)]",0.0,0.4468753933906555,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,a Russian fleet attacked an Ottoman force,"[Document(id=34bd168edceec4d845ba383e25526ec637cdd2a67e74fbe1494f27559701c396, content: ' The clash came on 30 November 1853 when a Russian fleet attacked an Ottoman force in the harbour at...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.5355975216123309)]",0.0,0.07638347893953323,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=0815d5b0f30156ea39d24a8a15062d5e21822317a105f659f2000892f8d657c8, content: ' +The consensus view in contemporary paleontology is that the flying theropods, or avialans, are the ...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 0.5702787263670992)]",1.0,1.0,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,modern evolutionary synthesis,"[Document(id=e1b3872c9a56b024a609ebeaf211b3ea1b4799599e6935c3dd62e613f036575b, content: ' This dogma has since been shown to have exceptions, such as reverse transcription in retroviruses. ...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5969093624625235)]",1.0,0.9375607371330261,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,8:10 p.m,"[Document(id=2ed21a855190ee9369cd4351878edc528d570a2136e3a2612158d6108a59491e, content: ' until 6:00 p.m. or later, with dinner from exactly 8:10 p.m., at Delmonico's restaurant and later t...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.8054284554320554)]",1.0,0.9999999403953552,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1789,"[Document(id=93996e15e538ef99cbc1f14e3a6eca7f547c782d02004cfe4d90863b09e3c9ba, content: ' When hostilities were concluded in 1648 with the Treaty of Westphalia, most of Alsace was recognize...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.6624069423136786)]",1.0,0.5213748812675476,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,Comitia Centuriata,"[Document(id=409efc7d73eb1e4b7778886a27796612a2da5b9f46d8dd07e3972f8290409c4f, content: ' It also elected censors. Only the Comitia Centuriata could declare war, and ratify the results of a...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5138090672272254)]",1.0,0.13780222833156586,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=d16248de2a5e0d9e6bbb7e85b411bf0a4143206e20471ecab73dc7158ce992b1, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.5365703485459956)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Joseph Ratzinger,"[Document(id=569b2ac7d468efc1995e7597b1a5c8cce2fb04e8e0523f0b442cd6d658321f84, content: ' He welcomed the announcement of Pope Paul VI to celebrate the 1900th anniversary of the death of th...', meta: {'name': 'Pope_Paul_VI', 'source_id': 'fc82a40c990c0b3840b06b32643858d39984b66d78ae950561d809dc84d6edf8', 'page_number': 1}, score: 0.5395101684371744)]",1.0,0.407831609249115,1.0,1.0,1.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c3327e32d8658307f8dca684bbdeb22895ca7a8d7ee8ad6ff0eba6200c5e6da2, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.6820248727155205)]",1.0,0.9528100490570068,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment","They were armed with a sword and a small shield, as well as several light javelins","[Document(id=1983d91ab2ea433561865fb3ff1c494c8513349262f6ecf4a75e3747f0f18a6a, content: ' Their role in accompanying the army was primarily to supply any vacancies that might occur in the m...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.553412580040751)]",1.0,0.21872466802597046,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Nuri al-Said,"[Document(id=0feca297c7e24ea29eb0b0ec3f9cea1e28b5bba699cb08db0269994093ec02c1, content: ' Nasser gave each of the provinces two vice-presidents: Boghdadi and Amer in Egypt, and Sabri al-Asa...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.6261596905435558)]",1.0,0.5893326997756958,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,record labels,"[Document(id=7742716fffd906e0d6edf74500caef115b60f88285d7f246abeb00e404d5d0f3, content: ' The network of supportive record labels that included Industrial, Fast, E.G., Mute, Axis/4AD and Gl...', meta: {'name': 'Post-punk', 'source_id': 'bee90cece10cbdebcf39c6ca24886eb874551f866ed672c4f1f297c137963f75', 'page_number': 1}, score: 0.36459114720421903)]",0.0,0.03603558987379074,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=754901c14e07fd836481f4df6a9e1c790f259897d11a92d8f4449320bf432be1, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.5578163934895837)]",1.0,0.5353438854217529,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston–North Charleston–Summerville Metropolitan Statistical Area,"[Document(id=620f512c3df1eb1788fcecbd5ec4c0438953e005020fab4303e121d55369ade5, content: 'Charleston is the oldest and second-largest city in the U.S. state of South Carolina, the county sea...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5474995153501447)]",1.0,0.40961262583732605,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=39112deed098f10b50cfae893e4a07e0541a59e3ede2de2a696f87f4dd3a60cf, content: '"" When in September 1831 he learned, while travelling from Vienna to Paris, that the uprising had be...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.650285022879904)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,conversion of matter into energy,"[Document(id=a821c6f7864ad547d77613dfb820fb3325460eb2a1db7dd77d77b7831d007952, content: ' Twain notably described Tesla's induction motor invention as ""the most valuable patent since the te...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.65038332569829)]",1.0,0.16155940294265747,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,al-Qaeda and the Egyptian Islamic Jihad,"[Document(id=83d3d4ff2abd52d1481391f4a70166aae238e8633826a7d0796ff63840ed230c, content: 'Islamism, also known as Political Islam (Arabic: إسلام سياسي‎ islām siyāsī), is an Islamic revival m...', meta: {'name': 'Islamism', 'source_id': 'b18ef6af50cd5612358df7ee136f3b80ef65a22ebd6f502f95f8b0dec581619a', 'page_number': 1}, score: 0.4780907972523595)]",0.0,0.3834308385848999,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,.,"[Document(id=71628ca153c5dbecf9cfbccb021f738ea191ea90a7de4318b12b17766a3c6520, content: ' It also houses a library.', meta: {'name': 'Saint_Barth%C3%A9lemy', 'source_id': '0e1cdc9aed7071dc6555b8a19b3cdc57f25dee3b2feea27a85c1cb142f1638a4', 'page_number': 1}, score: 0.46255752232824643)]",0.0,0.1316603422164917,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,fundamental intellectual structure,"[Document(id=dde733057987b7656d54f0140cdaa3de2ee1789c63c74f1f5c17cb1f61c82cd4, content: 'Time has long been a major subject of study in religion, philosophy, and science, but defining it in...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.5137300757270435)]",0.0,0.3276873826980591,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",Deprived of men,"[Document(id=7e90c7180c2320c1f870ba8d1ee74a7643ed7ff6a878ec6fc37dbe9adbbb1ea9, content: ' +On the eastern front, progress was very slow. The Russian army was heavily dependent upon its main ...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.6735388214817897)]",1.0,0.06652583181858063,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,12,"[Document(id=0c5f3492dfe6b6838bc7113688331de615d79f7950a305ca235db36174dba218, content: ' With the return of Michael Dell as CEO, the company saw immediate changes in operations, the exodus...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.6340834355511753)]",1.0,0.29873126745224,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,Latin League,"[Document(id=f92f538baf53c616bb6d302c094ff784648439dd5a99ec33660dd665e0c6e57a, content: ' +Each of Rome's legendary or semi-legendary kings was associated with one or more religious institut...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5743521855580911)]",1.0,0.193890780210495,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=0b3a599a54fac34c82f29d858c51bbf5ce4f37032b3539a76c2b8a6b918abc6e, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.7432106125866814)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=45ad6989b7801812895d43ea694bad169af9b5dd9024dd96b099fe071a196217, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.7008250803733801)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Rome,"[Document(id=97686de9c4f8fed507eaca869b9537500fb0e8c613b130c6f388bc83e9983444, content: ' Some areas of the conquered world were more affected by Greek influences than others. The term Hell...', meta: {'name': 'Hellenistic_period', 'source_id': '322682cbb61d31c17e6d21e06723e94efd0ef242a5c9f308888fa14209c4078b', 'page_number': 1}, score: 0.5478172274353506)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what decade was disco popular?,1970s,1980s,"[Document(id=ac88b34c02a837bc0a4387ec54f7df72e84a365c85acf3ea8c8abdf1bf22c767, content: ' Emphasizing glamour, fashion, and escapism in distinction to the experimental seriousness of earlie...', meta: {'name': 'Post-punk', 'source_id': 'bee90cece10cbdebcf39c6ca24886eb874551f866ed672c4f1f297c137963f75', 'page_number': 1}, score: 0.5462718569085085)]",0.0,0.8399688601493835,0.0,0.0,0.0 +What do the rivers form?,lakes,Nieuwe Maas,"[Document(id=d4f00e0321e7bd0e40e3988f483dbb9e724f85a7fec1890682781c70d19fdf9a, content: ' The middle flow begins as Nederrijn, then changes into Lek, then joins the Noord, thereby forming N...', meta: {'name': 'Rhine', 'source_id': '23686e48a61504679a91dac10d83b287aef3dff7dabc0ab881c281059187ce6a', 'page_number': 1}, score: 0.5195132686870181)]",0.0,0.2038368135690689,0.0,0.0,0.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,ISO-8859-1,"[Document(id=1ca3328892ba24004a75d8cfaab0f58ce32bed8ee034be6fa54e33e8b0a62eb5, content: ' UTF-16 extends UCS-2, using one 16-bit unit for the characters that were representable in UCS-2 and...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.6513962855269255)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,2013,"[Document(id=d973663d7b373b969d29c8bb0584e676ae0839b0a85dcfe214f9439bae9553e9, content: '"" In 2013, Beyoncé made the Time 100 list, Baz Luhrmann writing ""no one has that voice, no one moves...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.49332506233862033)]",1.0,0.1711360514163971,1.0,1.0,1.0 +How many Examination Boards exist in India?,30,30,"[Document(id=16d8a70ea7a6d58f48e68b5c9072e65770d4a4053e9906c77dee6ff0c12c90a1, content: ' However, these vocational schools are not part of the German dual education system. Ergänzungsschul...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 0.5124628146490905)]",1.0,1.0,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=23b161a9804a8a11c97c4f3f14b6762b49fdf3b1fefb8ff1de76b00cfa00995a, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.6589027090165251)]",1.0,1.0,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,Your Majesty and thereafter as Ma'am,"[Document(id=3d92674c03fa6fb7dff9bfacadbd449807bd3f50ac1fd93ba9ff982be85ddbf6, content: ' Under pressure from public opinion, she began to pay income tax for the first time, and Buckingham ...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5990400446359011)]",1.0,0.6777033805847168,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=4c486afe5bb2b5636b50e59b9e75919d81c0ac84dee15003c77f04be72177646, content: 'Since the 19th century, the built-up area of Paris has grown far beyond its administrative borders; ...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.6055035497429572)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,871,"[Document(id=5396376bb5221d60d6016a3ef2e8c6fb591b1eb2c9c6dab90677a7e0ac39a5a5, content: ' The speech of eastern and northern parts of England was subject to strong Old Norse influence due t...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.4737880350354553)]",0.0,1.0,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,"input validation and error-management routines.[citation needed] Software fault injection, in the form of fuzzing, is an example of failure testing. Various commercial non-functional testing tools are linked from the software fault injection page; there are also numerous open-source and free software tools available that perform destructive testing. +Load testing is primarily concerned with testing that the system can continue to operate under a specific load, whether that be large quantities of data or a large number of users","[Document(id=76ad3fb3d257a3812a169d749a5fb13f2e05383764e8f9dfc53895f08747b375, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5705638206179092)]",1.0,0.16032397747039795,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=bb0e180e009c490f3ae30fb55a9a91cbe512409c2e12ebae0203298a6f3efe78, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.6359423909104869)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=cdfa1e6bea8fd42b00ce80468ba471689216a6195425c2435fb1141b0b8c7e6f, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.6595684899907257)]",1.0,1.0,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1917,"[Document(id=5cf39c40640013969626bfa9dcc17e86c891033c6881c53d41dfced842a316bd, content: ' +The Russian Revolution is the series of revolutions in Russia in 1917, which destroyed the Tsarist ...', meta: {'name': 'Modern_history', 'source_id': '59c9b273f6f85af9d336afaf5154a702230bef3726363ecbf5c8378f5d8798c7', 'page_number': 1}, score: 0.5696587984574364)]",0.0,0.6672266125679016,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,augury,"[Document(id=483a4d6e20a90300dacd5b488981283a0e3e48e19133aeae709d43fd9599dbac, content: ' Etruscan religion was also a major influence, particularly on the practice of augury. +Imported myst...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.6798432924358846)]",1.0,0.12443841248750687,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,Software fault injection,"[Document(id=76ad3fb3d257a3812a169d749a5fb13f2e05383764e8f9dfc53895f08747b375, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5810701579387141)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,three,"[Document(id=54ac8a4bef87eec4f4c4bb18b0298a72660993d40a0085ae08f0804872097969, content: ' +The governing bodies in each country operate league systems in a domestic season, normally comprisi...', meta: {'name': 'Association_football', 'source_id': 'f2b7305b09ad8a43b8389a5eb2d84b8ea4b4deaa7968d21fe9da9da9ecda2c6b', 'page_number': 1}, score: 0.47496159838396923)]",1.0,0.18245229125022888,1.0,1.0,1.0 +In which year was the new constitution ratified?,1855,1884 that produced a constitution ratified 3:1 by Montana citizens in November 1884,"[Document(id=012172d30b9790cfd5a5f1acf26703580b6738cfd860ad8388391dcbc296f44a, content: ' Surveys in 1874, 1875 and 1876 helped spark the Great Sioux War of 1876. The transcontinental NPR w...', meta: {'name': 'Montana', 'source_id': '556dc408ae9a8cf251c8082c455043f3c354c7457c6fe7793c307babf666b651', 'page_number': 1}, score: 0.49764851422200085)]",0.0,0.22884264588356018,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Fancy,"[Document(id=c73f91cfdbff455b566d44f1e5e2b3a45e69e1706373ebb639e5a56f60c3546d, content: ' +In September 1695, Captain Henry Every, an English pirate on board the Fancy, reached the Straits o...', meta: {'name': 'East_India_Company', 'source_id': 'e06fca07272816e944b588c975118bc6baa76ae98cc186fe22f94dfd06cc38cb', 'page_number': 1}, score: 0.5417964631676105)]",0.0,0.18923906981945038,0.0,0.0,0.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=92daacd08f6215b2b6c9a23e96940ae9ffa3af8f6a0300108c6bfcf840702db9, content: ' In western New Haven, the university holds 500 acres (2.0 km2) of athletic facilities, including th...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.5900821044427363)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,non-repetitive,"[Document(id=35aa1cb690df59867ee05e5097593f48e97261d2c2794a4fcd261522b374c202, content: ' In December 2013, scientists first sequenced the entire genome of a Neanderthal, an extinct species...', meta: {'name': 'Genome', 'source_id': '18d3410858e240e5596ce2f22db31a3f6dcd18aba98f4fb8d8e801b7e537aceb', 'page_number': 1}, score: 0.5173390507269158)]",0.0,0.2370980978012085,0.0,0.0,0.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=1b5c167a9d5d609ea5fb8a97ac9f8e9db1b78d5c91d2f95320c7a861768e926e, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.610828310088769)]",1.0,1.000000238418579,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,rational thought,"[Document(id=707efe35d39bbe6f12111e251260c07bcfa48cea8000d7e2c9680c78ec3b9dcc, content: '"" The theory was indebted to Aristotle's pluralism and his concepts of Soul, the rational, living as...', meta: {'name': 'Idealism', 'source_id': '55a0bd42a4a737442a2d93fe0926424e236910de01b2406ed43c174f2225ff9d', 'page_number': 1}, score: 0.530264670750077)]",1.0,1.0,1.0,1.0,1.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,21–24%,"[Document(id=4a2399f100a536a4baf71c5dedf166c45f09b1b237a505e9a84faf1da7fd8a1b, content: ' +In 2014 the Notre Dame student body consisted of 12,179 students, with 8,448 undergraduates, 2,138 ...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.6933619621704381)]",1.0,1.0,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,25 October,"[Document(id=f338e2189fa194b23ca50a67ebd0647e84f119f03f303e8a80c0ee355a155d72, content: ' commanders ignored reports that Kurita had turned back towards San Bernardino Strait. They had take...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.7028943836880425)]",1.0,0.24027208983898163,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,2005 European Championship,"[Document(id=76be05dc0de2dfbcf6d3d16e80abbfa4ee57f55d30696681b941ecd6ff700822, content: 'O.K. Greek basketball teams are the most successful in European basketball the last 25 years, having...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6882304367381349)]",1.0,0.7931281328201294,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,past and present,"[Document(id=27c48c714f7a7d5c0daa9f28a8e02d3375d014da26ecdbb43d24c3a5f42b97ed, content: ' Typical of a Slavic language, Czech cardinal numbers one through four allow the nouns and adjective...', meta: {'name': 'Czech_language', 'source_id': 'b7b15f8c583c24627f86dae71c96a8b788adafb4e0606251ec9a06307cc37f6c', 'page_number': 1}, score: 0.35658768086529163)]",0.0,0.22152823209762573,0.0,0.0,0.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass,"[Document(id=2a2aed1469e1865e2816d1c4cee4f53887b00f04578d64882e7a5207977b4a70, content: ' in 2005. +In mid-2015, several new color schemes for all of the current iPod models were spotted in ...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.657611807312876)]",1.0,1.0,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,excited form,"[Document(id=814f2c2136f18b6a49318bf9a19fad148d9b272f987b2539771dc5dbd3beca10, content: ' +Under ordinary conditions on Earth, elemental hydrogen exists as the diatomic gas, H2. However, hyd...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.5947984648702281)]",1.0,1.0,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements,"[Document(id=ff11a1c6102e2b293943cbcfb8c2935799487df6f465dea412c00844c1c7c655, content: '"" In July 2013, the Assistance Association for Political Prisoners indicated that there were approxi...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.6589232659427924)]",1.0,0.9912453889846802,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=ac63b8357f80e413cededa7c5a37849ede32b9c8d7d3181d6e287572836f5f86, content: ' They formed the backdrop to one of the earliest football-related films, The Arsenal Stadium Mystery...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5470057790543041)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,late 1890s,"[Document(id=e7a2f32f10a857b188ae10953333d02317612155cbae0e2ac8e0bf5365da90e4, content: ' The Melbourne financial crisis was a contributing factor in the Australian economic depression of t...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.5958355202000913)]",1.0,1.0,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:1__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:1__chunk_size:15.csv new file mode 100644 index 0000000..27523be --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:1__chunk_size:15.csv @@ -0,0 +1,121 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=3c9b5484de418625b08e3d8f4e3c3398f40f2f5af0bbf48528ee46ea38c64e7c, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5597276623459511)]",1.0,1.000000238418579,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,1992,"[Document(id=aad3198679bdfc06be9a6021094c2cc36d80d09f77a88a1410392cd2e76ab336, content: '"" At the time of ex-Secretary of State James Baker's visit in 1992, there was even a move to hold a ...', meta: {'name': '51st_state', 'source_id': 'aa3341a0a65518fe53b022b57fd32c865f74c249a1b4d32919c38bfd1cead5b7', 'page_number': 1}, score: 0.35516591888002275)]",0.0,0.34839001297950745,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Coventry,"[Document(id=93c78551f27ece2b380d8a30b1c8852063e2afe373bf66ac0ff8fc705d37f96c, content: ' On 19 November 1940 the famous RAF night fighter ace John Cunningham shot down a Ju 88 bomber using...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5731025412729198)]",1.0,0.5261203050613403,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=e9007dae49d8af7b7fa7932aef22bbdc01132df7a83250c1516203d03c7a8b4b, content: ' It is most “important to recognize that in many words the English and Scandinavian language differe...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6303054320453825)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=b43e607046d3a9f2878f76721d6d75cb6c46d2bdb38f6b3fdc5f1b9a2f23a163, content: ' +In The Church of Jesus Christ of Latter-day Saints (LDS Church), the teacher is an office in the Aa...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.5091397083739027)]",1.0,0.4012155532836914,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=4af194e6a549f3cca324fac69732006a675ddeb8e841c964604dff19122e4f16, content: 'O.K, Maroussi) have won 3 Triple Crowns, 5 Saporta Cups, 2 Korać Cups and 1 FIBA Europe Champions Cu...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6223667259906489)]",1.0,1.0,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=0639a6470cdb94c436a07cbb24189b437a50f73fdb84ffa10dd2faeb282342a2, content: ' +The show had originally planned on having four judges following the Pop Idol format; however, only ...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.5792649097039392)]",1.0,0.8083884716033936,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=2296ed852027297f8181dc1f64af4fcbb66e36bb282c6197423888ce9d011df5, content: '9–14 μm) and produce images of that radiation. Since infrared radiation is emitted by all objects ba...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.6292707894619731)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=5dac70957842f214d4c60f76564b27a7e657cd237c677ed236d1ed1ca76e360f, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.6064028863553603)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,"Bergen (in Norway), Stockholm (in Sweden), and Vyborg","[Document(id=6a3f5e0630db73ff1550601fa1d1616875dc0e070f4b7057108481da4656c6c8, content: ' The former German state of Prussia took its name from the Baltic Prussians, although it was led by ...', meta: {'name': 'Germans', 'source_id': '87c5e28d01065fdd517ee7c4c4b9b83b8b40b35a4d9ba5664f523e4485346f24', 'page_number': 1}, score: 0.5104194547915891)]",0.0,0.4704947769641876,0.0,0.0,0.0 +What seeks to limit the use of polysyllabic characters?,CPC,UTF-8 standard,"[Document(id=0f06154ad167e0c62a1732910968dd4068a0d290baf6698dd674b8e6df290ce5, content: ' However RFC 3629, the UTF-8 standard, recommends that byte order marks be forbidden in protocols us...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.5576395998086606)]",0.0,0.07299263030290604,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,federal,"[Document(id=18fb3c7c246cfed9c7f86df4b14156139de2cefac2e61a9bbd26c8653739ab9b, content: ' Further, the Court's decision in Baze v. Rees, upholding a lethal injection method used by many sta...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.5379543078595055)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,"last great Anglo-American philosopher before Wittgenstein's disciples spread their misty confusion, sufficiency, and terror."" French sociologist and anthropologist Bruno Latour even went so far as to call Whitehead ""the greatest philosopher of the 20th century","[Document(id=777192ecd0b066a38e00224d5a2269174c0dfa158ccc4e8bc7395644c0b99822, content: 'g., a religion might encourage the violent extermination of a rival religion's adherents). +However, ...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.5657093946225062)]",1.0,0.11629929393529892,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,legal tender,"[Document(id=20272f837f8ae191cb2ca1c0c96912e627a2509f85e2d6f9240c270f8fbec035, content: ' +Early releases of the Washington coin included error coins shipped primarily from the Philadelphia ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.5122653770702773)]",1.0,0.11276385933160782,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,Hawaii,"[Document(id=5cc5c90d3b7502d6e064b808c140d69c74657b0e5d9b60b7a5c3a8b76125d80b, content: 'S., with Quinault Rainforest in Washington having an average of 137 inches (348 cm). Hawaii receives...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 0.6797168161958604)]",1.0,0.5233805179595947,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Adaptive (or acquired) immunity,"[Document(id=ad1b87ebcacf263e09bd48e7e71e798fd5bc8ca82f8a1917807172a097a552ae, content: 'The immune system is a system of many biological structures and processes within an organism that pr...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.6578931511774535)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=9f4c20a2bce44cf9bd3af493be183ab333c4d80eef37f323c78a73218b2cb402, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.5064623445443148)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,female,"[Document(id=2c88cea895d41eff8b543d55b9d2a6d4ccc9891814b7888bf4a979fddbfdc2ce, content: ' The Trust is currently the largest in the UK and has an annual turnover of £800 million, treating m...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 0.4718451464351809)]",1.0,0.1364639550447464,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1960,"[Document(id=0f23a3d664b2d9d293629ea9d692c835b9a8e93ffb30bc45de463cd9b9d93424, content: ' Therefore, Khrushchev would not take part in the summit. Up until this event, Eisenhower felt he ha...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.642219936436536)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=44b4daa4d3c4378951ca880516b3342520595caa477d61e2c5fc88de3627c8f0, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.66910672984007)]",1.0,0.804789662361145,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,Crime rate,"[Document(id=e3d7ca757aed12075bbf76ea9a1e24a5322e40b9e0288850c49ef2d02dc252a9, content: ' Other studies have confirmed this relationship. The UNICEF index of ""child well-being in rich count...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.5671652046426924)]",0.0,0.0639934316277504,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=497c11f9fbbfb1f37be3dcd0a29d747c5f8f8967252a25e340b937417a080280, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.6587906616356036)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=8a0f220e6df4e50504002c6ad6312d289ff774c9c753db65e80fb1022f521be2, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.5446938087977631)]",1.0,1.000000238418579,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,Sklaboi,"[Document(id=8ad39bb1347ca9057c45116b007a845b2f566c20ddd8d7fb64626c78a4750a3c, content: 'Slavs are the largest Indo-European ethno-linguistic group in Europe. They inhabit Central Europe, E...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.7195013371244505)]",1.0,0.27494680881500244,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.","measures sexual orientation on a continuum, considers various dimensions of sexual orientation, and considers homosexuality and heterosexuality separately","[Document(id=3638fb16add213ce4e749139ce1852d845838bce357ac5b46f8f2163ed664217, content: ' The classification scheme works such that individuals in the same category show the same balance be...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.3776180459315529)]",1.0,0.6333848237991333,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,the family or a village priest,"[Document(id=1ac361381724706acf7a12f934a8f6a399e50f7b7857405e6b825fba7c4f2236, content: ' Other countries codified their laws; legal codes were promulgated in Castile, Poland, and Lithuania...', meta: {'name': 'Middle_Ages', 'source_id': 'f13b345e20c54b6cc55363080d7492474df62715717172aa9f1029f286010ad9', 'page_number': 1}, score: 0.5257922905676446)]",0.0,0.22852304577827454,0.0,0.0,0.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,high rate of fire,"[Document(id=45532502a93fb403d23d2c78d298cc7d788b57372d92095d78031ff043962a4d, content: ' Under these circumstances, the Germans had to continue the war, yet could not pin down any proper m...', meta: {'name': 'Franco-Prussian_War', 'source_id': 'a0061af5e8954755f51ba336d899763a228676b667c28764a7ea7e6303f0d79c', 'page_number': 1}, score: 0.38680520179324923)]",0.0,0.26763296127319336,0.0,0.0,0.0 +What is that power that enables us to love?,Prevenient grace,"wisdom and understanding enhance one's faith in God. According to him, these are the tools that God uses to commune with a contemplative. Love in the soul is both the cause and result of true understanding and judgement","[Document(id=9f0a0b71abae404c8342fedf70ff8f016c3a6b4ec47cc4fee1252fcf9d7adea4, content: ' +One of Albert's greatest contributions was his study of Dionysus the Areopagite, a mystical theolog...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 0.449116765433877)]",0.0,0.29284828901290894,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,Caesar,"[Document(id=dd09780a89c3259aae7f10ab2070b88e3687ba1267d0d7b037e76509ccd66648, content: ' Thus, the assemblies became powerless and were unable to oppose him. +Caesar was assassinated on Mar...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.44714711562407566)]",0.0,0.1853158324956894,0.0,0.0,0.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,sharia,"[Document(id=4d48682f805f7a385767a5c2825f3fc2119d2b698a7fc9592571f5590731bae6, content: 'Muammar Muhammad Abu Minyar al-Gaddafi (Arabic: معمر محمد أبو منيار القذافي‎ Arabic pronunciation: [...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.6177406623423897)]",1.0,0.6138635277748108,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",House of Mountbatten,"[Document(id=e315f255311d7dc4c756937a51f6602a99f1b8e3b0a145306a3d8c41c258bc57, content: ' She was educated privately at home. Her father acceded to the throne on the abdication of his broth...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.6146287331638632)]",1.0,0.7635930776596069,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,Minorca in the Mediterranean,"[Document(id=19aa0d6dee9980002e943f9e1d31d63db517fa0aae06cfc59222ce379dddd1bd, content: ' As prime minister Pitt committed Britain to a grand strategy of seizing the entire French Empire, e...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.5842505432740723)]",0.0,0.0038108360022306442,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,Hohokam,"[Document(id=566e0e1e8fea2b2d4f4780311f659535816e073357090acf35a5060d287a12d1, content: 'Tucson (/ˈtuːsɒn/ /tuːˈsɒn/) is a city and the county seat of Pima County, Arizona, United States, a...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.6144746452329479)]",1.0,0.08153045177459717,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=9a450124afc12f193f72ae1f8310b72ada7cbbf8f34bbd71f66eb32b0f47efbc, content: ' With the development of the modern evolutionary synthesis in the 1930s and 1940s, Darwin's concept ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.6905690706074652)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,RAID,"[Document(id=a80859df82d1d384a197f48e26a3b04228abfce695b712003b1161d5e181e6f7, content: 'A database management system (DBMS) is a computer software application that interacts with the user,...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.4148625502062699)]",1.0,0.23673129081726074,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=ec70e294a8a1203ecdbb9fe95270466a0129a5d607fe306df146559206aa5b31, content: ' Avicenna first began to learn the Quran and literature in such a way that when he was ten years old...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.7299585588069163)]",1.0,1.0,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=391f8278a6d664b0fda2a91e05a75fcb178e560aca2778cf3383f74470691074, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.49740108859030846)]",1.0,0.10474255681037903,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=540a5b537ec7e1fdea905baae428e9a4c76167e7be305ec2dac494ec1ac16d1f, content: 'Old English (Ænglisc, Anglisc, Englisc) or Anglo-Saxon is the earliest historical form of the Englis...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6427731581432499)]",1.0,1.000000238418579,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=b281d8f2fa5c72bf9176af2f66fbd72725ba99c1c3e8e2e12044e694c6ee3bbe, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.5003888561596567)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,The Stern Review,"[Document(id=d8e9c2fbdeffaa4d8556deecba15bceb380011371746091dbaeb7542415289ef, content: ' Tax and subsidy shifting can help overcome these problems, though is also problematic to combine di...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 0.5684680461699898)]",1.0,0.8556329011917114,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=d4ddb3955cf6ff9d6bed687f54220f695fef0951972c64309938d9cf7670f6ba, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.8069591810257106)]",1.0,1.0,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,Latin,"[Document(id=849449c1450a95f93162f06fa31dc5068c49e9309fa9a5571acb08b712097028, content: ' Several texts proposed for his approval had to be changed. But all texts were finally agreed upon. ...', meta: {'name': 'Pope_Paul_VI', 'source_id': 'fc82a40c990c0b3840b06b32643858d39984b66d78ae950561d809dc84d6edf8', 'page_number': 1}, score: 0.5101620714643623)]",0.0,0.21103423833847046,0.0,0.0,0.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=ea56a7c43f1347c76ec07affd8ad20729d040a8d58a36f56583b537f903e8142, content: ' +The release of a video-game Starpower: Beyoncé was cancelled after Beyoncé pulled out of a $100 mil...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.5759942076459466)]",0.0,1.0,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,5-cent,"[Document(id=842ad1ee7799e7ecbfdfb163f8afe265cf62972cba3da0e97be6f505cb10fc03, content: ' In North America, copper mining began with marginal workings by Native Americans. Native copper is ...', meta: {'name': 'Copper', 'source_id': 'de5c60a665b93f9b358be412d53a37ca43d90005e3ec9a553ac38a38f076fadc', 'page_number': 1}, score: 0.47993080401424804)]",0.0,0.7035895586013794,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,HMS Jasper,"[Document(id=91f5b594d5423035cb6299d6343f01bf4eb248b5aa8de432e2f6c51ddedca474, content: ' On 21 May 1855, the gunboats and armed steamers attacked the seaport of Taganrog, the most importan...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.5363870553469913)]",0.0,0.3474302887916565,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=5b537a30444228b3b5e39f8e28a321a8e774f6051560dcab29bb0ece8b5ae99e, content: ' Unlike Archaeopteryx and the non-avialan feathered dinosaurs, who primarily ate meat, recent studie...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 0.5631311233054687)]",1.0,1.0,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,modern evolutionary synthesis,"[Document(id=7ff269861e7df34fcebf71c4907ef4694447d099e228c45550f4e2387812c2fd, content: ' This dogma has since been shown to have exceptions, such as reverse transcription in retroviruses. ...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5781950352097195)]",1.0,0.9375607371330261,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,"8:10 p.m., at Delmonico's restaurant and later the Waldorf-Astoria Hotel. Tesla would telephone his dinner order to the headwaiter, who also could be the only one to serve him. ""The meal was required to be ready at eight o'clock","[Document(id=bf93e5ca5e1091757426fa6d168a7b90f8c1d9dad833880ffcb9b06d8ad2b0f7, content: ' There are a minimum of 278 patents issued to Tesla in 26 countries that have been accounted for. Ma...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.687530275591786)]",1.0,0.311399906873703,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1789,"[Document(id=5ba09395255feae4ebf751bb80a7a4fa5d79691518f143a0bddc36e8712f8fc6, content: ' As a result, Alsace was transformed into a mosaic of Catholic and Protestant territories. On the ot...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.6855752258831345)]",1.0,0.5213750004768372,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,Comitia Tributa,"[Document(id=6bfe26157bc8b93a3c22f1f4d5b8228d372139a27dc2848c3d256ad4386f3b4f, content: ' the citizens of Rome), the Comitia Tributa, was presided over by a consul, and was composed of 35 t...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.4990089103457631)]",1.0,0.14773741364479065,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=1e41339b2f87d6ffb45a6e6882ca2d5cec93b2ada663205b1745d23021ff3c6d, content: ' The planned closing ceremony at Justin Herman Plaza was cancelled and instead, a ceremony was held ...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.5200463937587196)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Pius XII,"[Document(id=84e7b362d02db50bb7dd66920598b2f7bcfeaad549dacdf2dc9aa4ad9c16171f, content: ' John XXIII became the first Pope to receive the title, followed by John Paul II in 1994 and Francis...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 0.5445188707405088)]",0.0,0.6628619432449341,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=61583a75fdba243f6a812f3bf73c41e686e4f8374db81e4dd5f1a4d5a4430336, content: ' +Non-Australian citizens who are Australian permanent residents should be aware that during their st...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.6689246760470762)]",1.0,0.9528100490570068,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=1c55594460ba4e27d8751041858090c94678420d1701172511f1bad1a1d70154, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5434537812989179)]",1.0,0.255912184715271,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Nuri al-Said,"[Document(id=c355e0f20036a60ada1e4711a62b4d11adebf07dce76a808203fdd588cc1168e, content: '"" Nasser ordered a crackdown against Syrian communists, dismissing many of them from their governmen...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.6408207539725228)]",1.0,0.5893326997756958,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,computer pioneer and inventor,"[Document(id=f7e96505ccf7689f7e371e32dcd7a1dc9fa715eb5b41f801d29a38043eb97ec3, content: ' +Other: Civil rights leader W. E. B. Du Bois; philosopher Henry David Thoreau; authors Ralph Waldo E...', meta: {'name': 'Harvard_University', 'source_id': '7f91a9284c2db4a8d1147861fa2c02c6a25281db1cbc78097950be011c0e6269', 'page_number': 1}, score: 0.37024883815166565)]",0.0,0.10659181326627731,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=0869972e54922afd3497ba60a5f3e981d9c0ab240629036531512fe359bc4da8, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.5146685965557382)]",1.0,0.5353437662124634,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston–North Charleston–Summerville Metropolitan Statistical Area,"[Document(id=a00baea2441c87e38109afe86358b94ae591d905c9ff5c35c440986bd364ee66, content: 'Charleston is the oldest and second-largest city in the U.S. state of South Carolina, the county sea...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5451335165669611)]",1.0,0.40961262583732605,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=e4be5f97eecad8f13742697394da51c6177446b3b3d0eac7d06873a3b5294293, content: '"" When in September 1831 he learned, while travelling from Vienna to Paris, that the uprising had be...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.6520729537358383)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,conversion of matter into energy,"[Document(id=f022f65e88fb872df34eeee407d099b2d054f887896be19fd1acedc04bb01605, content: ' Robert Underwood Johnson described him as attaining a ""distinguished sweetness, sincerity, modesty,...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6690885440537078)]",1.0,0.16155940294265747,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,Non-revolutionary civil disobedience,"[Document(id=172d5ae15d23469aad64106d5290dbb9c81377ad9262463cdd5058509187a788, content: ' +Non-revolutionary civil disobedience is a simple disobedience of laws on the grounds that they are ...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.4826805294935842)]",1.0,0.48217201232910156,1.0,1.0,1.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Notre-Dame de Paris,"[Document(id=48c7a502ced16688a82783fa43fd5166e7b536c4c0669689ee2311430aa6ece2, content: ' Victor Hugo's The Hunchback of Notre Dame inspired the renovation of its setting, the Notre-Dame de...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.4588841743332801)]",0.0,0.27070188522338867,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,fundamental intellectual structure,"[Document(id=dd4f4d9a87883e0aa9f9e24e4db2a323a2a0555ed35dcc2ef63f816e15bbcd52, content: 'Time has long been a major subject of study in religion, philosophy, and science, but defining it in...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.50831139766181)]",0.0,0.3276873826980591,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",placed a corps of his own troops under Frederick's command,"[Document(id=d7342a0e3021ae2d3bba679cf10ecb72ddc69518be33b380ee6b9e8f5e4d03c9, content: ' +On the eastern front, progress was very slow. The Russian army was heavily dependent upon its main ...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.6968056158780116)]",1.0,0.311606764793396,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,12,"[Document(id=67fda94cfa124d7c449d0e5d6525ee64bb5132e4e8255f2a8c0298097627fabb, content: ' Increasing spending on R&D would have cut into the operating margins that the company emphasized. D...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.6258874375525161)]",1.0,0.29873126745224,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,pontifex maximus,"[Document(id=71718b05e0ad88a856f11532834265f8f64091425308e7bc717ce93437673530, content: ' +Towards the end of the Republic, religious and political offices became more closely intertwined; t...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5607277962054709)]",1.0,0.18580681085586548,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=73c809a12535348ffed165eff42c189f3aa95c6ed455c42b5e7932cd44167a8d, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.704733357632634)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=b562ec53d471dfbb3ba9b12247b16c622039308f7ec5b5b5344127cfeb67e87d, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.7076322081359558)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Rome,"[Document(id=b5c785a8e22edd8d9d6cb5708ef03b91b1dd4137c66bb49c28d760b75bfb84be, content: ' Some areas of the conquered world were more affected by Greek influences than others. The term Hell...', meta: {'name': 'Hellenistic_period', 'source_id': '322682cbb61d31c17e6d21e06723e94efd0ef242a5c9f308888fa14209c4078b', 'page_number': 1}, score: 0.5545614504809487)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what decade was disco popular?,1970s,1980s,"[Document(id=a1ebb6111bad62b247dd1b756728c9569fa6d4b9a446c19b1950e68e79933994, content: ' As of 2015, house music remains extremely popular in both clubs and in the mainstream pop scene whi...', meta: {'name': 'House_music', 'source_id': 'c556f4a459e8a99b6d2065bab8fd6a918d0a00f13291aad2473922492e4abf2e', 'page_number': 1}, score: 0.5521305810998134)]",0.0,0.8399688005447388,0.0,0.0,0.0 +What do the rivers form?,lakes,Nieuwe Maas,"[Document(id=c76f28ed2994796f7f3a5aeea9229958f03f574059e71701fc815116b1cba0e0, content: ' Since the Rhine contributes most of the water, the shorter term Rhine Delta is commonly used. Howev...', meta: {'name': 'Rhine', 'source_id': '23686e48a61504679a91dac10d83b287aef3dff7dabc0ab881c281059187ce6a', 'page_number': 1}, score: 0.5163154414268505)]",0.0,0.2038368135690689,0.0,0.0,0.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,CJK ideographs,"[Document(id=8c55128e915db91be79f675390a7251853ff83d2c6945bcbba98c2fef9e723ac, content: ' For example, the ""fullwidth forms"" section of code points encompasses a full Latin alphabet that is...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.6460142541452026)]",1.0,0.31672221422195435,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,"Country Strong. Nicki Minaj has stated that seeing Beyoncé's Pepsi commercial influenced her decision to appear in the company's 2012 global campaign. +Her debut single, ""Crazy in Love"" was named VH1's ""Greatest Song of the 2000s"", NME's ""Best Track of the 00s"" and ""Pop Song of the Century"", considered by Rolling Stone to be one of the 500","[Document(id=215c36d44c2fcb6da6e34fc2acdaa14ed7ad663ac752949d8d3690e0c72d499c, content: '"" In 2013, Beyoncé made the Time 100 list, Baz Luhrmann writing ""no one has that voice, no one moves...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.48406363409287023)]",1.0,0.05507790297269821,1.0,1.0,1.0 +How many Examination Boards exist in India?,30,NIPER) and National Institute of Nutrition (NIN). Hyderabad has five,"[Document(id=2a60820f19599e70a11ffdf68cff487cf35e2e6c1fa7acfbb591eec5701ddc2b, content: ' Depending on the institution, students are required to sit the Secondary School Certificate or the ...', meta: {'name': 'Hyderabad', 'source_id': 'e032b0dc6babeab40ef746299b8fdda8c0f0f12976c0b900e8ed7ca60ce1802d', 'page_number': 1}, score: 0.46785569600226)]",0.0,0.061030175536870956,0.0,0.0,0.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=ad70291d4dec497bcd48d02a2bfbb4780dffd558cf0eee2d7c976f107f401335, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.6589027090165251)]",1.0,1.0,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,Ma'am,"[Document(id=7e78ea0e95b5bd788108ac6801d926bfe2bab5d8c8ee7ca718215a801c9c95ef, content: ' She was celebrated in Canada as the ""longest-reigning sovereign in Canada's modern era"". (King Loui...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5698498562293763)]",1.0,1.0,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=faa03729332a5c07c1c9d6d87747d15af8b3043f87a9cff7389d9a1c23d97c7c, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.5976425209098843)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,Anglo-Saxon England,"[Document(id=71c630c76acc63a450092ff63290ae196ef5301dff3d6d769d1f3545e22ff2a4, content: ' Charlemagne's grandsons and great-grandsons divided their kingdoms between their descendants, event...', meta: {'name': 'Middle_Ages', 'source_id': 'f13b345e20c54b6cc55363080d7492474df62715717172aa9f1029f286010ad9', 'page_number': 1}, score: 0.4394720937621911)]",1.0,0.07440971583127975,1.0,1.0,1.0 +What two methods can be used when using Load Testing?,data or a large number of users,large quantities of data or a large number of users,"[Document(id=dd704f4bc758ab475db02844a8a24263dd4fdb32afbc7f7dc86b810f8fa62e4f, content: ' +Load testing is primarily concerned with testing that the system can continue to operate under a sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.6507854161834614)]",1.0,0.9289779663085938,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,Title II (common carrier) of the Communications Act of 1934,"[Document(id=7aba2d10978318bc5abba467ce5b46b678a83c63b04c2770eb6bfdb5855d1dce, content: ' On 16 January 2015, Republicans presented legislation, in the form of a U.S. Congress H.R. discussi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.5902996828694462)]",1.0,0.3202260136604309,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,"interception of bombers and other fighters, reconnaissance, and patrol","[Document(id=d2709fd4938aadfc321fd75534f338a1182dec7bcf787b2638ddb43d1a59b926, content: ' Special Operations Command (USSOCOM). It conducts long-range, special operations missions, and is e...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.6432047475010534)]",1.0,0.4177878797054291,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1853,"[Document(id=e2b0b7fb7245406ebe95cec6e89cc25fccdc9386ef0e980ff8bd1bb4c36f9a9c, content: 'The Crimean War was a military conflict fought between October 1853 – March 1856 in which Russia los...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.583131325566129)]",0.0,0.672028660774231,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,to absorb the deities and cults of other peoples,"[Document(id=876b4580a64fd77fb8816f9aef13a054a2ac16b7e85433b15298374090048432, content: ' Women, slaves, and children all participated in a range of religious activities. Some public ritual...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.6999203808475947)]",1.0,0.13120442628860474,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,A programmer makes an error (mistake,"[Document(id=b6392c280d0afea37544d7517bbd9644cc64cc62815ef9e4f6ae7774c9575aff, content: 'As the number of possible tests for even simple software components is practically infinite, all sof...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5737161988389456)]",1.0,0.3859443664550781,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,three,"[Document(id=85f535796cef4ec10db8a4fd4e565960608be55322b93bae284e720192b9a26d, content: ' +A number of players may be replaced by substitutes during the course of the game. The maximum numbe...', meta: {'name': 'Association_football', 'source_id': 'f2b7305b09ad8a43b8389a5eb2d84b8ea4b4deaa7968d21fe9da9da9ecda2c6b', 'page_number': 1}, score: 0.4497446649281802)]",1.0,0.18245230615139008,1.0,1.0,1.0 +In which year was the new constitution ratified?,1855,1899,"[Document(id=e93f1498d324a356d0a273aea97c15c32b0eec4faf0660735b4c27f7c165effa, content: ' The white conservative Democrats regained control of the state legislature in 1870, in part by Ku K...', meta: {'name': 'North_Carolina', 'source_id': '3282e3dedcf0f1923e66ebf5b79cf9b9deec27c8ca1df8908e03770b02b38126', 'page_number': 1}, score: 0.49302690301412233)]",0.0,0.5596075057983398,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Naiad,"[Document(id=c450c10c9cc7ec2362a5a8c23142549f03d77146379f9bb96ab788f8c91887e3, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5136154001583656)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=afdbd08089615eb2bbb9dd9a5945a7e094c57e1f63a4d1f083e6183556a9f3e7, content: ' Moreover, Yale has been accused of failing to treat workers with respect. In a 2003 strike, however...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.5912663052349812)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,non-repetitive,"[Document(id=d88785419c04d2e3be28c54e0214054f95e1a7a26e5f7345487f082329682c93, content: ' In December 2013, scientists first sequenced the entire genome of a Neanderthal, an extinct species...', meta: {'name': 'Genome', 'source_id': '18d3410858e240e5596ce2f22db31a3f6dcd18aba98f4fb8d8e801b7e537aceb', 'page_number': 1}, score: 0.5257621576905613)]",0.0,0.2370980978012085,0.0,0.0,0.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=d5a42bacf8a08d601a973e5bad25b36c41015f76de0d512f9c3130d5e87ae4de, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.5997149229460172)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,rational thought,"[Document(id=3c9b21a4d224bb82a4e4fc744a7bd671552ea8d96f2928fcf811163166442019, content: ' The Nature of Existence (1927) contained his arguments that space, time, and matter cannot possibly...', meta: {'name': 'Idealism', 'source_id': '55a0bd42a4a737442a2d93fe0926424e236910de01b2406ed43c174f2225ff9d', 'page_number': 1}, score: 0.5788858628692617)]",1.0,1.0,1.0,1.0,1.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,21–24%,"[Document(id=bd4681af4e65cd44fb09639ab1c33814925a7f0631458709bac52a44932b0dab, content: ' Around 21–24% of students are children of alumni, and although 37% of students come from the Midwes...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.5931308200019992)]",1.0,1.0,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,24 October,"[Document(id=67d402d488ad87fcda2d30f304cebc3d3ab52520c994807e5b06e892c3a0e1e1, content: ' The carriers had only 108 planes. The force was not spotted by the Allies until 16:40 on 24 October...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.6829916731914795)]",1.0,0.2516951560974121,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,basketball,"[Document(id=4af194e6a549f3cca324fac69732006a675ddeb8e841c964604dff19122e4f16, content: 'O.K, Maroussi) have won 3 Triple Crowns, 5 Saporta Cups, 2 Korać Cups and 1 FIBA Europe Champions Cu...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6624614023398475)]",1.0,0.34321069717407227,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,imperfective,"[Document(id=b086dbc67b80e6f26b1e823c6ec440f8f645661a3c528184598912bb2cd8d10b, content: ' On occasion of the publication’s 45th anniversary, the Croatian weekly journal Forum published the ...', meta: {'name': 'Serbo-Croatian', 'source_id': '2cc55814bc801258e8300ed82e003428dbf11715c74ad99297d61c5dd77023f4', 'page_number': 1}, score: 0.31092698913374395)]",1.0,1.0,1.0,1.0,1.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass,"[Document(id=9981dc70836c47f0a547f15b216e9fb496fc8ea7433b9c52bb49fac4510a3bef, content: ' Grasso of New Jersey had originally listed an ""iPod"" trademark with the U.S. Patent and Trademark O...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.608858855032559)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,excited form,"[Document(id=f649524cdd28fbe2ba5a3cadae832b66d8fef791e1b4613b5d6705e7ca2b49a9, content: ' As a plasma, hydrogen's electron and proton are not bound together, resulting in very high electric...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6038299428337875)]",1.0,1.0,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,"Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements. The Independent reported in June 2012 that ""Children are being sold as conscripts","[Document(id=e39a424de393c7814379d5771c4abc998c371eadee31f5a4692a573651cdce01, content: ' South Africa also voted against the resolution. +There is consensus that the military regime in Myan...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.6046787751748843)]",1.0,0.8568992018699646,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=23ce9b5b052679777e1f4db02c71a154ef112d1634274e17ec804b5f383f1134, content: ' On 22 January 1927, their match at Highbury against Sheffield United was the first English League m...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5498172548089693)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,2000s,"[Document(id=c35394598ae512de45396f20eb9657cd39aa8feaea7505f76ad712e7cf8a9c19, content: ' According to the Australian Bureau of Statistics, Melbourne sustained the highest population increa...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.6020077079508841)]",1.0,0.46762651205062866,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:1__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:1__chunk_size:5.csv new file mode 100644 index 0000000..2fe74c3 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:1__chunk_size:5.csv @@ -0,0 +1,125 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=99729d5c09ec68a18e6cb9e326f59027b5c69bc74e05e8c8c0d825360370bf5e, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5674900599530198)]",1.0,1.0,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,Musharraf agreed to give the U.S. the use of three airbases for Operation Enduring Freedom. United States Secretary of State Colin Powell,"[Document(id=b125a71d25ec0dbd53a36a18b59518a04fd4d2eabf3279289747c0ff9d95561b, content: ' Musharraf agreed to give the U.S. the use of three airbases for Operation Enduring Freedom. United ...', meta: {'name': 'War_on_Terror', 'source_id': 'b9ac47a39d382362992253f0ccfa0072ea6216fe1daa748c4899980b467292f4', 'page_number': 1}, score: 0.43165641729518267)]",0.0,-0.005649004131555557,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Rotterdam,"[Document(id=3fe9edc56bd80e018f3b5d2c1a425a36eeeae2507d5266fa8dcf4b61ff983f68, content: ' Thereafter bombing operations were against ports and shipping and propaganda leaflet drops. Operati...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5911931526438963)]",1.0,0.3784560561180115,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=e5cb80c1093f573ce5c1d255322723cb12056865ea80e45b815e3467be2f33fd, content: ' It maintains several distinct cases: the nominative, accusative, genitive, dative and (vestigially)...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6521761611401243)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Protestant and Non-Denominational traditions,"[Document(id=540182e8bbf642679e675c620838f54822163c84a6963cd26e2c6bb9b8f209e7, content: ' Also, with the advent of the internet, many teachers are now selling their lesson plans to other te...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.5329079416622291)]",1.0,0.1965978592634201,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Panathinaikos,"[Document(id=026227f3702b0787154427ea73d148dcd6a4f18ee99d18c27d07b0ef11b65e25, content: 'O.K. Greek basketball teams are the most successful in European basketball the last 25 years, having...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6736603406595517)]",1.0,0.7261656522750854,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=5a76d0c122f92ef189448d61efc0333c0e7eb4caa25ac15e516eca3fccbdd7aa, content: ' +The show had originally planned on having four judges following the Pop Idol format; however, only ...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.6322883929113561)]",1.0,0.8083884716033936,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,infrared light-emitting diodes,"[Document(id=d44adf593e34d3121d568db8ecc9e819e5b21894cde7cc7fac048fc3c5d2016f, content: ' Infrared light sources can be used to augment the available ambient light for conversion by night v...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.6637731014692062)]",1.0,0.26119592785835266,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=4437c778daec446c4362623ed3261e5ec4170314f4aa14f8381b8c7ee6eef3fe, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.6643419003251825)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,"Bergen (in Norway), Stockholm (in Sweden), and Vyborg","[Document(id=5cf9bba4037ffb12d79f2ab44ea7b22d5556d1f47b8e6bb302f92f2d93799518, content: ' Along the trade routes, Hanseatic trade stations became centers of the German culture. German town ...', meta: {'name': 'Germans', 'source_id': '87c5e28d01065fdd517ee7c4c4b9b83b8b40b35a4d9ba5664f523e4485346f24', 'page_number': 1}, score: 0.6015261078515344)]",0.0,0.4704947769641876,0.0,0.0,0.0 +What seeks to limit the use of polysyllabic characters?,CPC,Unicode,"[Document(id=a2704816695442784b882622291610903319372e642887790e8945f14945ffa6, content: ' Some programming languages, such as Seed7, use UTF-32 as internal representation for strings and ch...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.577312415921709)]",0.0,0.14035390317440033,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,"appellate court finds that significant legal errors did occur, then it will reverse the judgment, or nullify the sentence and order a new capital sentencing hearing. Lastly, if the appellate court finds that no reasonable juror could find the defendant eligible for the death penalty, a rarity, then it will order the defendant acquitted, or not guilty, of the crime for which he/she was given the death penalty, and order him sentenced to the next most severe punishment for which the offense is eligible. About 60 percent survive the process of direct review intact. +Under the Antiterrorism and Effective Death Penalty Act of 1996, a state prisoner is ordinarily only allowed one suit for habeas corpus in federal court","[Document(id=7e0d5332e60a37e652cd2a51cc22044fd5e6ae3218102012d3b17416fb99fc7e, content: ' If the appellate court finds that significant legal errors did occur, then it will reverse the judg...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.5589142109780263)]",1.0,0.17290771007537842,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,rank amateur,"[Document(id=b0360cb0e79aa53fce9b6b987065d38127ef8c7392ded59d294e940b3fc8c15b, content: ' For Whitehead, education should be the exact opposite of the multidisciplinary, value-free school m...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.6532748672096498)]",1.0,1.0,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,payment of debts,"[Document(id=4f60e5dd26c7b2626d120c6cbdaee951099743a32fe85550cdf8b453dffe6e1c, content: ' make any thing but gold and silver coin a tender in payment of debts"" clause in article 1, section ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.5093472945454142)]",1.0,0.1231364756822586,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,Great Plains,"[Document(id=0c9477ecd40a6b89942238d08ca8bdbf7ec3347c54580c53f9612bc7f972ea70, content: ' The uplands towards the Appalachians were included in the great eastern forested area, while the we...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 0.6291648476958775)]",1.0,0.513728141784668,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Adaptive (or acquired) immunity,"[Document(id=af9fa957dfd959c9905dc3b94a52b8ecc9ecb9c39df2fdb2c000fd9ec4597970, content: ' Even simple unicellular organisms such as bacteria possess a rudimentary immune system, in the form...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.654858513560027)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Sam Snead,"[Document(id=6929c42ad466bed7c14eac422c4d021a4f3e192fc8b2852b17a71ac3d6c8106e, content: ' +On May 11, 2000, Glenallen Hill, facing Brewers starter Steve Woodard, became the first, and thus f...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.5406649611698102)]",1.0,0.40077218413352966,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,Ph.D.,"[Document(id=8d74cf3e7a3897f2c2f0163d507374839ffb8775af40004e60fc174f1aea1f7b, content: '), Ph.D. and D.Sc.(Tech.', meta: {'name': 'Institute_of_technology', 'source_id': 'bd3c1ce78fe4328bf8eb2cbd8027759a2a927528ce7e496ce3ebcf9675bc060e', 'page_number': 1}, score: 0.517905680620544)]",0.0,0.45661261677742004,0.0,0.0,0.0 +In what year did Eisenhower propose his second civil rights act?,1960,1957,"[Document(id=e20106e1401ce2522cd3d14d13fae9ea782170d84e4c3db914247513b098ce09, content: '.. a discrimination in the expenditure of those funds"". +Eisenhower told District of Columbia officia...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.7159989523516124)]",1.0,0.7518350481987,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=d89cdeedbc8441f4e2c52a3575e8c530fcce225561404b37c7cdd108d1cc549f, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.7434933599873272)]",1.0,0.804789662361145,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,Crime rate,"[Document(id=dcd8da4b243b1a8d91858407ae3ae6dac8e86b8331f09a52ba058f6361484420, content: ' Other studies have confirmed this relationship. The UNICEF index of ""child well-being in rich count...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.5496006903263113)]",0.0,0.0639934316277504,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=720eb3795a623e31335108bcc29a4b774e33272bb625d5968983846dd3b35f68, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.710128454829141)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=cf59c0f0619e715b979051e0a84530bf73463713fa11f2961866d233ac2537f5, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.6138198489552974)]",1.0,1.000000238418579,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,East Slavs,"[Document(id=701344efe2101165a535fe45e922b01123011116f4f123fcd5b1668edbb29898, content: 'Slavs are the largest Indo-European ethno-linguistic group in Europe. They inhabit Central Europe, E...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.70879690862351)]",1.0,0.44139364361763,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.",four summaries,"[Document(id=f86f4f0c6b96ee3b686893cc41683ca450066b225884d3390504ef3124153018, content: ' +The SASO consists of 12 questions. Six of these questions assess sexual attraction, four assess sex...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.45192740188947417)]",1.0,0.47273075580596924,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,literate and upper class,"[Document(id=d25b410d7ee734d39ca37d5429e9393300e12f25ab3ed13f9a91eb5740bf305f, content: ' Due to Eastern Lombard's status as a Gallo-Italic language, an Eastern Lombard speaker may, in fact...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 0.6914606586923586)]",1.0,0.7628111839294434,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,time types,"[Document(id=9d1747bc11e311df0105a0308df3eb29ec64ee3c1e3b7335356131a26f7421d1, content: ' Fuzing options were also examined, both impact and time types. Mountings were generally pedestal ty...', meta: {'name': 'Anti-aircraft_warfare', 'source_id': 'c54639424f62ce369356ed8c4e1e9fab8769359dd28029aa89832f5aa994f45d', 'page_number': 1}, score: 0.43523273125444223)]",1.0,0.007751554250717163,1.0,1.0,1.0 +What is that power that enables us to love?,Prevenient grace,true understanding and judgement,"[Document(id=dd10a5e31ac16d363b33b098b3febd4a104f6e071593bea34da863f68a17ee8f, content: ' It is not contradictory to say that someone is white-toothed and not white"". +Albert the Great wrote...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 0.48367023733439174)]",0.0,0.39556270837783813,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,Caesar,"[Document(id=9251dafda92232db54143d3e1f9a589eeb4b79fd5873fc0b101379c7cfe3f5ba, content: ' Thus, the assemblies became powerless and were unable to oppose him. +Caesar was assassinated on Mar...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5181739480686895)]",0.0,0.1853158324956894,0.0,0.0,0.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,sharia law,"[Document(id=1b2e436fd42f2c0f0d0cc510814e434b693f9c004a137710dd97898a0cc1af7f, content: ' Gaddafi proclaimed that the People's Congresses provided for Libya's every political need, renderin...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.6066777348111518)]",1.0,0.6366350650787354,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",Elizabeth,"[Document(id=e654e9ff005029974fb352ae5cb703e1734edb2deffb25e38aadc6d3624aea0c, content: ' On 6 February 1952, they had just returned to their Kenyan home, Sagana Lodge, after a night spent ...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.6176147417529756)]",1.0,0.27963173389434814,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,Minorca in the Mediterranean,"[Document(id=ec4515c9557988b0fec18967610f700f87d76c31c093e709fb322770706f03b8, content: ' It proved completely successful. Pitt had a clear appreciation of the enormous value of imperial po...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.5872765017217323)]",0.0,0.0038108360022306442,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,10%,"[Document(id=19e07536fc4d6a422f717376ebb5c165dc66d2b84c2b4069db4dbb4757a892d6, content: ' It was first incorporated in 1936, and later reincorporated in 1940. The population consists of abo...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.6000612210112335)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,Jean-Baptiste Lamarck,"[Document(id=1e7ee2b8973cbe904852b74345906a810d752f622915c33c467ffca1e65e0dc0, content: ' The Ussher chronology of the 1650s had calculated creation at 4004 BC, but by the 1780s geologists ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.6845480977766352)]",1.0,0.36553826928138733,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,dedicated backup files,"[Document(id=d62f58c8737fe86268de0ebf0cb0ef4eae1f9ba5f33735f7cadb2e83beb9e377, content: 'g., cases when the database is found corrupted due to a software error, or if it has been updated wi...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.4885855296732807)]",1.0,0.11919853091239929,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Quran,"[Document(id=4929afcbf8a25329cc28344a59a6da0673a0b29592a46e295b09e09e3666b249, content: ' Avicenna's views on Islamic theology (and philosophy) were enormously influential, forming part of ...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.7274513909923354)]",1.0,0.34279826283454895,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya seized Tenasserim and Lan Na,"[Document(id=ca6db29fb81cf6a69632107c67ace9bdd1fdd8f61142978a0684c6cd6f540122, content: ' Ayutthaya seized Tenasserim and Lan Na, and Portuguese mercenaries established Portuguese rule at T...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.613164344421885)]",1.0,0.22248710691928864,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=8825405ebdc1bb4daf623185ae693fe4f2cab9da4dcca330ccb2ab3f6b05d6e5, content: ' The modern cognates of original words have been used whenever practical to give a close approximati...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6676853832427841)]",1.0,1.000000238418579,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=f1fa43fc9c1c5bd46b9f5455701bf99b0e98d4579816d755389bcc2dbba1bcda, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.5395585598917987)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,the Stern Review,"[Document(id=333c4da3ee155b7d7b4ddf91857cd1ec4e1c3cafb4159ebedda18347755b3f1f, content: ' In the US the main tool to track and stimulate voluntary actions is Green-e program managed by Cent...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 0.5373316142063316)]",1.0,0.8556329011917114,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=e8da082f5022b804801ddb2778f16c32226f0ecb59f3ed7f8fcdae17653f162c, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.8359677035541203)]",1.0,1.0,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,The Cathar Crusade,"[Document(id=08215551b881c7cefad5bb5baaaddc4d6f9a5802a709d61e32985651e2115c85, content: ' By the 11th century, more organised groups such as the Patarini, the Dulcinians, the Waldensians an...', meta: {'name': 'Heresy', 'source_id': '8ba473262f8508a9c99699bb09cd022993896ece1fed1463a9cf4bd7eb0c3e68', 'page_number': 1}, score: 0.5551851931173127)]",1.0,1.0,1.0,1.0,1.0 +What music service is a huge competitor for Tidal?,Spotify,Aspiro,"[Document(id=ef9c78ac21a913a9aad74111f50f9247dd1e578b598b35e509b5dfb85cdd75a6, content: ' Contemporary critics have noted the consistent minimalistic aesthetic exhibited throughout DONDA cr...', meta: {'name': 'Kanye_West', 'source_id': '4693bb2ae0f5b7c5ed3788607bba35813f42eada13da446f4c397c0715d2dc91', 'page_number': 1}, score: 0.5907996679081687)]",1.0,0.2630590498447418,1.0,1.0,1.0 +How much was each copper token worth?,a halfpenny,$20.67,"[Document(id=6152e2b000a84ff02363a09c1ff7cf6d282c98d7934d1b527c81592756150e30, content: '22 grains (1.505 g) of gold, equivalent to setting the price of 1 troy ounce of gold at $20.67. Silv...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.5022121865285567)]",0.0,0.47537320852279663,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,A number of vessels visiting the islands,"[Document(id=06462bba1bbaf61b1e06ec260c9ebb15ac9f6984188e161293ac729b7950da5d, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5445998527389424)]",1.0,0.2269967794418335,1.0,1.0,1.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=22e5680a1c41aa000971ff89fccd80f808c8f88ae7d0f5c0d81eb763ce026955, content: ' Unlike Archaeopteryx and the non-avialan feathered dinosaurs, who primarily ate meat, recent studie...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 0.6138421968979502)]",1.0,1.0,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,the modern evolutionary synthesis,"[Document(id=54fb8e44f718e454d5f4aef7097036553aa91b829fc2ea08644a1f9fc17e50f5, content: ' +The theories developed in the 1930s and 1940s to integrate molecular genetics with Darwinian evolut...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.6807694823421528)]",1.0,0.9999998807907104,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,eight o'clock,"[Document(id=5dfd5078daafbc2ef3695636e6d70b42db2abc79ac389c20eac0417969714c11, content: ' Tesla would telephone his dinner order to the headwaiter, who also could be the only one to serve h...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.7466485072261548)]",1.0,0.7012972831726074,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1444,"[Document(id=18dbb6f8a4452ee88a36944ed7b5a77c2233677f5c2c84c6d98113a4c1519651, content: ' After the conclusion of the war, France was again free to pursue its desire to reach the Rhine and ...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.639941686896402)]",1.0,1.0,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,censors,"[Document(id=3749ef6e14ea9af3893cfd84917e3a39acc2212480fc846e5a9231414a5599e6, content: ' Praetors administered civil law and commanded provincial armies. Every five years, two censors were...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5740412708539135)]",1.0,0.806065022945404,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=80f5e2e80143434aa6d2d78a16f411f7f6ab4f8ffffbd29914ad0c75e8bf31da, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.594644661480704)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Angelo Giuseppe Roncalli,"[Document(id=d025eebf35da454623a00c5d71d009d27de02dc1bdc601d3e2b18ee0f2c3a436, content: 'Pope Saint John XXIII (Latin: Ioannes XXIII; Italian: Giovanni XXIII) born Angelo Giuseppe Roncalli,...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 0.5774459991618778)]",0.0,0.38023999333381653,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c983e8f9fccac478cd7d895c53ca7636f4291c3a87693dd68246b03e75a4be42, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.7097080153729567)]",1.0,0.9528100490570068,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment","They were armed with a sword and a small shield, as well as several light javelins","[Document(id=33fcf4581f5ab78359aa184280cfd3d832482718892d7c7caea5758b15dcd90b, content: ' Their role in accompanying the army was primarily to supply any vacancies that might occur in the m...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5265809360022261)]",1.0,0.21872466802597046,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,King Saud,"[Document(id=5d0ec0248e1b31849b4fcf2b6de857e2ffd286b834f600afc95bf4d1111c7ae6, content: '"" Nasser ordered a crackdown against Syrian communists, dismissing many of them from their governmen...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.6708211510684319)]",1.0,0.5504575967788696,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,educators,"[Document(id=f0501a0350c5212f0b765e4f2908281937b33d70767d79809051b0aab43bc078, content: ' +Other: Civil rights leader W. E. B. Du Bois; philosopher Henry David Thoreau; authors Ralph Waldo E...', meta: {'name': 'Harvard_University', 'source_id': '7f91a9284c2db4a8d1147861fa2c02c6a25281db1cbc78097950be011c0e6269', 'page_number': 1}, score: 0.377496105816545)]",0.0,0.2606774866580963,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=f6a12178d43defb2a103deca0f4fb166a5e99e5a8e58be48c76be81bd5d2bd38, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.544965494522029)]",1.0,0.5353437662124634,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,downtown medical district,"[Document(id=0b7915915ef0141c882fe46817cee7fb758759c0f34482f57922cd19e5f5e512, content: ' The downtown medical district is experiencing rapid growth of biotechnology and medical research in...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5525530182515901)]",1.0,0.47365856170654297,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=2dc3b5cfb05bfeb4e2c0b072a5a5e6417b1109588ffcbd88ffd03346c8bc1975, content: ' In France he used the French versions of his given names, and after receiving French citizenship in...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.6958497122775275)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,steal the invention,"[Document(id=4d3444ecb9a546c18669ac8c8405c5f0787407509b989960d32c3dae162fd6f1, content: ' Tesla tried to interest the US War Department, the United Kingdom, the Soviet Union, and Yugoslavia...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6860750023448956)]",1.0,1.0,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,U.S,"[Document(id=859e7515be05f408d44ec6f6949d11ce5d273b01383fe1e90b7f80b296f0d340, content: ' Bush administration, called for a new agency focused on the nonviolent practice of ""political warfa...', meta: {'name': 'Islamism', 'source_id': 'b18ef6af50cd5612358df7ee136f3b80ef65a22ebd6f502f95f8b0dec581619a', 'page_number': 1}, score: 0.5170605709380862)]",0.0,0.2188938856124878,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,"The Reverend Alexander Dyce was another benefactor of the library, leaving over 14,000 books to the museum in 1869. Amongst the books he collected are early editions in Greek and Latin of the poets and playwrights Aeschylus, Aristotle, Homer, Livy, Ovid, Pindar, Sophocles and Virgil. More recent authors include Giovanni Boccaccio, Dante, Racine, Rabelais and Molière. +Writers whose papers are in the library are as diverse as Charles Dickens and Beatrix Potter. Illuminated manuscripts in the library dating from the 12th to 16th centuries include: the Eadwine Psalter[citation needed], Canterbury; Pocket Book of Hours, Reims; Missal from the Royal Abbey of Saint Denis","[Document(id=c4e299761e10f4cdbe2878ec63543541144638c306cbf9461b4fa00ff423a2e6, content: ' The Reverend Alexander Dyce was another benefactor of the library, leaving over 14,000 books to the...', meta: {'name': 'Victoria_and_Albert_Museum', 'source_id': '202e0997b99bb8f0aaa6dd9c09c05e008cf8809c92549b6c9f073b5d26d1a3dd', 'page_number': 1}, score: 0.5087816256203566)]",0.0,0.3749416172504425,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,a systematic mental framework,"[Document(id=14e1565f9e55beb6f0a6a436ef63987e7ecfcabcded323a31ce3f0c38af15c48, content: '26) by which we simultaneously grasp the past in memory, the present by attention, and the future by...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.5150513533287606)]",0.0,0.29695814847946167,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",Frederick moved the bulk of his eastern forces to Pomerania,"[Document(id=2e241d6a2d549d7fb7ef44a92718ad0b9c17de781fc1ddf4c4c4311b6193515f, content: ' Frederick sent urgent requests to Britain for more substantial assistance, as he was now without an...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.6627893603410437)]",1.0,0.37803566455841064,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,500,"[Document(id=8d7bd268e9b8cfd6057d2e498dc7571a13c5a996c79222e58712c761e9056cff, content: '” +On April 23, 2008, Dell announced the closure of one of its biggest Canadian call-centers in Kanat...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.620125362778158)]",1.0,0.34641900658607483,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,Latin League,"[Document(id=9ca7c0433d9b49e65fceb55924559b4d9540d1428ba52280dd6110b921c10d6e, content: ' +Each of Rome's legendary or semi-legendary kings was associated with one or more religious institut...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5396862807840719)]",1.0,0.193890780210495,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=10406721e1bacc4b94568efb8105444ce33c23b507417efe2e032c93615c0677, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.7151687082822364)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=694b7ad0ec30f16563c095bfd8aa73a286298746fa8b5b6b568961e806fe7462, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.7151291941894227)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Rome,"[Document(id=39971eda0fac39993fb2ad01159df52ea10276610c72fad19cdc5a8d781c70e0, content: ' The earliest and most credible surviving source for the Hellenistic period is Polybius of Megalopol...', meta: {'name': 'Hellenistic_period', 'source_id': '322682cbb61d31c17e6d21e06723e94efd0ef242a5c9f308888fa14209c4078b', 'page_number': 1}, score: 0.5701725567412796)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what decade was disco popular?,1970s,1970s,"[Document(id=cd5a9b2c855f8451d360289cae3ca85bf1cf05c31eaf40598786adac8910f4b0, content: ' As of 2015, house music remains extremely popular in both clubs and in the mainstream pop scene whi...', meta: {'name': 'House_music', 'source_id': 'c556f4a459e8a99b6d2065bab8fd6a918d0a00f13291aad2473922492e4abf2e', 'page_number': 1}, score: 0.5505224094732637)]",0.0,1.0,0.0,0.0,0.0 +What do the rivers form?,lakes,valleys,"[Document(id=8b0a4ea594fc5c5b1b8e52042fe1a5990b9ecab79c49cf8072acc31c6bbb9112, content: ' +Valleys and plains are found along the coastline and rivers. The north of the province lies just so...', meta: {'name': 'Zhejiang', 'source_id': '640d81c4b9551176f92d59bc173460684a56171910762610e2ec62f3aa1948d4', 'page_number': 1}, score: 0.5334139839820395)]",0.0,0.5373156666755676,0.0,0.0,0.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,historic scripts,"[Document(id=df3636934a87dcaa1aa37c6146a219c020357b983841dcebcc4edf19d3146c4a, content: ' For example, the ""fullwidth forms"" section of code points encompasses a full Latin alphabet that is...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.6591597866231519)]",1.0,0.24151155352592468,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,44,"[Document(id=b8e4c4f73e595a4bf934f4743c25ffdd012fa0b8bdf2c0ce7ffd0ad37403f5dd, content: ' +Madonna holds the record for the most number-ones on all combined Billboard charts, including twelv...', meta: {'name': 'Madonna_(entertainer)', 'source_id': '0868d847bb768c30a7507239169d5e441bb3181b66c8b9353a863daa3d58e425', 'page_number': 1}, score: 0.550717575548617)]",0.0,0.4409554600715637,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,30,"[Document(id=87a93f72cbb9c00ea6477b2a31d21c284a2ecf07733f93b0fb4ba7b277f5b8f5, content: ' It is within the power of both the union government and the state governments to govern schools sin...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 0.5680934731225737)]",1.0,1.0,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=99743eb7e7277e11f539b5449857c5f012a842facd618f6a75ff178a89fc1f76, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.6792855736042169)]",1.0,1.0,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,Your Majesty,"[Document(id=6661c5b42cee4d9122296712db8297ce21db50225e9f1d2cad1e056d68ca32c8, content: ' Additional styles include Defender of the Faith and Duke of Lancaster. When in conversation with th...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.7342183996028037)]",1.0,0.5040283799171448,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=ff2ec1a3232f7d74c84b198defa77dd9879cdb65474a7dab2bf47da19675285e, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.6173766295402898)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,871,"[Document(id=6f9dc46d6eac9c736c44b2255083b35d238c93547c3efcd0f85d83a3feef71b2, content: ' The speech of eastern and northern parts of England was subject to strong Old Norse influence due t...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.5422845553260691)]",0.0,1.0,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,large quantities of data or a large number of users,"[Document(id=4753cc282d43999b382172589cceb076cee5e8185383911435f640a568593cbe, content: ' +Load testing is primarily concerned with testing that the system can continue to operate under a sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.7546814504937169)]",1.0,0.9289779663085938,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,a U.S. Congress H.R. discussion draft bill,"[Document(id=01af45e308bdc846c2017ba251456d022418d5b9aed2894db485930de32c1380, content: ' On 16 January 2015, Republicans presented legislation, in the form of a U.S. Congress H.R. discussi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.6538977153120153)]",1.0,0.12566259503364563,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=87ffdf42e04cd4e5aabc33e35caf9614ba61758e355a1d97a38a524ba0bedf46, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.8271269612162027)]",1.0,1.0,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1917,"[Document(id=d5e55f6ee93ac4f8481ec5c6635509a280f040b7e22be245ec7df77cc921db31, content: ' +The Russian Revolution is the series of revolutions in Russia in 1917, which destroyed the Tsarist ...', meta: {'name': 'Modern_history', 'source_id': '59c9b273f6f85af9d336afaf5154a702230bef3726363ecbf5c8378f5d8798c7', 'page_number': 1}, score: 0.5797152310314349)]",0.0,0.6672266125679016,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,Private and personal worship,"[Document(id=8cb202622013f5234d79ec3a4275501aaa91b4fdefe32dbbccfd335829f7ffa1, content: '[citation needed] +Roman religious beliefs date back to the founding of Rome, around 800 BC. However,...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.7022407140076495)]",0.0,0.404468834400177,0.0,0.0,0.0 +What is one example of failure testing?,Software fault injection,Software fault injection,"[Document(id=827178e2fa13b1045cb5486ccca30dd5c2a87aa9e9f903cbdd2808ec1b93a098, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5849168855723661)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,Goalkeepers,"[Document(id=dde1f3d5248e35f7ba793940bcbe48c24b20edc95ec4f306ad8006ccc3bb1c73, content: ' Competition rules may state a minimum number of players required to constitute a team, which is usu...', meta: {'name': 'Association_football', 'source_id': 'f2b7305b09ad8a43b8389a5eb2d84b8ea4b4deaa7968d21fe9da9da9ecda2c6b', 'page_number': 1}, score: 0.505336072398103)]",1.0,0.8311953544616699,1.0,1.0,1.0 +In which year was the new constitution ratified?,1855,VIII,"[Document(id=f4d30a1b3706aab7ed3995195fa028b93e0c85f66d5342bb26000df0f76bdf32, content: ' His power was confirmed by the new ""Constitution of the Year VIII"", originally devised by Sieyès to...', meta: {'name': 'Napoleon', 'source_id': 'bb5f9d936d10f96b3a68427475018c6441f2d3f372f7b44da8d9caa5ea0b9a9b', 'page_number': 1}, score: 0.5336785666881763)]",0.0,0.30888205766677856,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Naiad,"[Document(id=06462bba1bbaf61b1e06ec260c9ebb15ac9f6984188e161293ac729b7950da5d, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.6002989836151421)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=1eb83a50e062eec7af6abab3de1bf0b5576a4c75ac168cc64f1b77a2e268c579, content: '7 km2) Yale-Myers Forest in Connecticut's Quiet Corner—and nature preserves including Horse Island. +...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.6034040579940345)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,alternative variants of the same genes,"[Document(id=616efe41146a20d96fcfa37d16fa50ce52cca8acf43b362e86a15a714f86ddff, content: ' Subsequently, the sequencing in the Human Genome Project indicated that many of these transcripts w...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5241570974633485)]",1.0,0.5312574505805969,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=3cdcefdaa320ecb0d100eef0c92063190f1030f98ee702385d5ad087a454afc6, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.6646044035591563)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,rational thought,"[Document(id=14435d1137e579f740d93ef8fabdf50640828e96d9dd732c7b8c6d4932ae6a58, content: '"" The theory was indebted to Aristotle's pluralism and his concepts of Soul, the rational, living as...', meta: {'name': 'Idealism', 'source_id': '55a0bd42a4a737442a2d93fe0926424e236910de01b2406ed43c174f2225ff9d', 'page_number': 1}, score: 0.5719476914223833)]",1.0,1.0,1.0,1.0,1.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,21–24%,"[Document(id=2b6b9afae589749ea7e615fd1e236b006b90acb763735662b8fec51a24d455bd, content: ' Around 21–24% of students are children of alumni, and although 37% of students come from the Midwes...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.6697027777597087)]",1.0,1.0,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,25 October,"[Document(id=7ade51eb3e5a17ee31e4514e28638a45a8061a9ee1a9b4f97cc1e6d9985e197e, content: ' commanders ignored reports that Kurita had turned back towards San Bernardino Strait. They had take...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.6893631935380231)]",1.0,0.24027208983898163,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,basketball,"[Document(id=12d9fa7ff9f514e66e336f50a70a372c3162770635ba732cc4335141fd65ee50, content: 'O.K, Maroussi) have won 3 Triple Crowns, 5 Saporta Cups, 2 Korać Cups and 1 FIBA Europe Champions Cu...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6834429597884792)]",1.0,0.34321069717407227,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,past and present tense,"[Document(id=12d35b1cc10bfec8db3184a99e8f8840a8a60da980241fbd47ee8d235ed0fab1, content: ' This is distinct from past and present tense, and any Czech verb of either aspect can be conjugated...', meta: {'name': 'Czech_language', 'source_id': 'b7b15f8c583c24627f86dae71c96a8b788adafb4e0606251ec9a06307cc37f6c', 'page_number': 1}, score: 0.42385807190569236)]",0.0,0.2686117887496948,0.0,0.0,0.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass,"[Document(id=fdcc0196b4b8a411c852d4317f4371bd9d7338e17b11d3bebb1bcb7848dd8ddf, content: ' The combination of the undersized DC-blocking capacitors and the typical low-impedance of most cons...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.6185294524972531)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,an excited form,"[Document(id=e9b537adae4e88d5a591f33f2f06388da096013764cb94bfc4e3ea25bb2b2e23, content: ' This charged ion has also been observed in the upper atmosphere of the planet Jupiter. The ion is r...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6014010412569942)]",1.0,0.9052941799163818,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,Children are being sold as conscripts,"[Document(id=0088f05f01a71535c2cb1e5b41db4930183fdb107aa5d9b1b2402b982ed6e8a4, content: '"" In July 2013, the Assistance Association for Political Prisoners indicated that there were approxi...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.6603176163083229)]",1.0,0.5117864608764648,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=1bdd87144e8c05c0c8820e3bd6186deb63a4be13d5810e39af795993e4721bb7, content: ' The book was twice adapted for the cinema – the 1997 British film focuses on Arsenal's 1988–89 titl...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5193618971746868)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,The 1850s and 1860s,"[Document(id=5065b911750db5485f2cb49749f8e95f270f3e0d705842ca5e8f26d61da4d4ce, content: ' The city centre, Docklands, St. Kilda Road and Southbank areas feature high-density forms. +With the...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.624985416021912)]",1.0,0.703935980796814,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:2__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:2__chunk_size:10.csv new file mode 100644 index 0000000..b2eb318 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:2__chunk_size:10.csv @@ -0,0 +1,121 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=ac8e9c286327aea24d00bceb002b08ad3fbc3007673c77da9b48b6b6b91aa76f, content: ' Some rites of the domus (household) were held in public places but were legally defined as privata ...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5545176647418332)]",1.0,1.000000238418579,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,19 September 2001,"[Document(id=7244ab2e4f16b1e4573f9562f2be390b8f796d721387df92845e66279cc9a354, content: ' Musharraf agreed to give the U.S. the use of three airbases for Operation Enduring Freedom. United ...', meta: {'name': 'War_on_Terror', 'source_id': 'b9ac47a39d382362992253f0ccfa0072ea6216fe1daa748c4899980b467292f4', 'page_number': 1}, score: 0.3588364752821929)]",0.0,0.3735787272453308,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Bomber Command,"[Document(id=67346696a72eb98fc01faf97de2c3a68cf161364d6413eefe068134d10fcf3de, content: ' Attacks on Birmingham and Coventry were subject to 500 short tons (450 t) of bombs between them in ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.600247916969115)]",1.0,0.2595972716808319,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=081239836dc280856dc2c1ab168f17d8fffffed12cbaeb50b62cd11b58a116b4, content: ' It maintains several distinct cases: the nominative, accusative, genitive, dative and (vestigially)...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6403058641147197)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=c1b15c9f6474425049e145f56eea6b22790074204f55cd26eb005fca608f63a8, content: ' This guidance is generally offered during a ceremony called the patriarchal blessing. +In Hinduism t...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.46201672067809485)]",1.0,0.40121543407440186,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=c6017a99c04926bab23928d5fed5517ccdf053e42b0a0376e97782a93ab5a6b4, content: ' The Greece men's national water polo team became the third best water polo team in the world in 200...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6852555389578893)]",1.0,1.0,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=17489faf6fb39343de67cc2bfe448e959559775109bb646aa0e4d02b716ddf77, content: ' Jennifer Lopez and Steven Tyler joined the judging panel in season ten, but both left after two sea...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.6149654088231247)]",1.0,0.8083884716033936,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=1c8a379dc092fa8d374927640e6d16f46286416581451c66a1da80fef1412acc, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.6642589447541861)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=1eed45d4c946ec058384afdd313db9b847d2b384629f1872466e03c7bc13a2e5, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.6168396798925952)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,Bremen and Bremerhaven,"[Document(id=0427081e9deb9c727f124b818203b3d0cc649efe97ddc37c49cee8cebfa2c3a1, content: 'Germany is a federal republic consisting of sixteen federal states (German: Bundesland, or Land).[a]...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 0.5087021851568686)]",1.0,1.0,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,Unicode,"[Document(id=1ca3328892ba24004a75d8cfaab0f58ce32bed8ee034be6fa54e33e8b0a62eb5, content: ' UTF-16 extends UCS-2, using one 16-bit unit for the characters that were representable in UCS-2 and...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.5571209200423095)]",0.0,0.1403539478778839,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,federal,"[Document(id=478de5bfd3be02e5795166dfaf36bf31f55ed01f66e8d8ff978214365dc0f179, content: ' Further, the Court's decision in Baze v. Rees, upholding a lethal injection method used by many sta...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.5154552807306718)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,rank amateur,"[Document(id=a022efa8ed739794e05008dbe44d9b83a87fc4f4b7a609ec096b9d437bc5089e, content: ' For Whitehead, education should be the exact opposite of the multidisciplinary, value-free school m...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.5968778075657488)]",1.0,1.0,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,payment of debts,"[Document(id=fecf8dec4eebf70e22436a1b01f96b62f6beb9466dedd822db56f659f010d7bd, content: ' The use of paper money not backed by precious metals had also occurred under the Articles of Confed...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.48692030873140724)]",1.0,0.1231364756822586,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,Southwest,"[Document(id=ef3b24124d6963ab10e31070a01eb903bae0b374f75f9a00a0da61890b7d8c80, content: ', tornadoes are more common than anywhere else on Earth and touch down most commonly in the spring a...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 0.613377010514842)]",1.0,0.4576212167739868,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Adaptive (or acquired) immunity,"[Document(id=4f2fba1eb5467425d8e57ae2a4af5c24920de101064b738c048036a68ef3852a, content: 'The immune system is a system of many biological structures and processes within an organism that pr...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.6805778943142242)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Luis Castillo,"[Document(id=20206deac29e5b4bc6d2d7bfd70810f6e421fd0dfa03d5a7cf710b840fbba782, content: ' +The Cubs had high expectations in 2002, but the squad played poorly. On July 5, 2002 the Cubs promo...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.47699996405548506)]",1.0,0.4449738562107086,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,3.36,"[Document(id=8190e2f3cfba3c67fc319c1eba30ec986d8174be2decff7ed0bb12a364fb52bb, content: ' +The College's endowment is sub-divided into three distinct portfolios: (i) Unitised Scheme – a unit...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 0.49844352438506834)]",1.0,0.09059444814920425,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1960,"[Document(id=78d1760ca2fa0904e5480a91d388ba9d6c37d737a65426e5cbb2488333bc1e1a, content: ' Eisenhower made clear his stance in his first State of the Union address in February 1953, saying ""...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.6915934272370499)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=891cab83df456a1bb11d8e7c301e45b02e564d224d3be1f7bdb82d02edc8f221, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.66910672984007)]",1.0,0.804789662361145,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,Crime rate,"[Document(id=cfc35d312a7f860ff8772433762d5f27e606e10585399783dd129af1d4d74ed4, content: ' Other studies have confirmed this relationship. The UNICEF index of ""child well-being in rich count...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.5542598259336555)]",0.0,0.0639934316277504,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=56ff4bdb4f543c61c1f516ba1e544e83e117ab5512bae58b2264fa6b6d86f26b, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.740709470725756)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=5c5f6a0591170904a56f4c0417e667089761d8d8f013f55283aff164fdadd75b, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.5566146748558449)]",1.0,1.000000238418579,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,Veneti,"[Document(id=4236cbbf71506fe35f0da033ea9b465edf3cfbfdd8e6bac2b259b18835cc0408, content: ' +The relationship between the Slavs and a tribe called the Veneti east of the River Vistula in the R...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.6927475328468398)]",1.0,0.2406373918056488,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.","measures sexual orientation on a continuum, considers various dimensions of sexual orientation, and considers homosexuality and heterosexuality separately","[Document(id=04d7cfe710801483b2e5c7c67b89cf9966fc92203783ea424fccf90e1b51520c, content: ' The classification scheme works such that individuals in the same category show the same balance be...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.3825635282360615)]",1.0,0.6333848237991333,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,literate and upper class,"[Document(id=149c6390292392d4438c9191dca43bbe827c20a86bb7bfc502679813d7fcc108, content: ' Due to Eastern Lombard's status as a Gallo-Italic language, an Eastern Lombard speaker may, in fact...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 0.666760845084105)]",1.0,0.7628111839294434,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,desegregation of the Armed Forces,"[Document(id=94d45523552487be55666f4943bee9647a9f972593aae08c707a9b3bd1ba479e, content: '"" The report discussed and demonstrated racial discrimination in basic freedoms, education, public f...', meta: {'name': 'Affirmative_action_in_the_United_States', 'source_id': 'b7156a1660bb591e3abb4460028a7922c063ba8962ad6dce11678f8b08c5cdcb', 'page_number': 1}, score: 0.38881723110881794)]",0.0,-0.04560529813170433,0.0,0.0,0.0 +What is that power that enables us to love?,Prevenient grace,kindness meditation,"[Document(id=8aa1cc5c18eec7cb85e8dd3bc0786a603a405db09821c3852c577c0c1ee5d912, content: '[note 7] +The Noble Eightfold Path—the fourth of the Buddha's Noble Truths—consists of a set of eight...', meta: {'name': 'Buddhism', 'source_id': '7d629dc535ee0ab8e7a8ec238f72eca067cf563981907b159dd5db0c2c4a2fad', 'page_number': 1}, score: 0.3807221391984495)]",0.0,0.34043627977371216,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,Caesar,"[Document(id=4f17ed621ff3df1e2ab21fa16d21495b6fcfc7630eb0a8ebe5cdfa2aa939ba81, content: ' This made his person sacrosanct, gave him the power to veto the senate, and allowed him to dominate...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.4484555871081836)]",0.0,0.18531587719917297,0.0,0.0,0.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,introduced sharia,"[Document(id=e2a95241acaa96c9b3665ffd83614f229b121a721d01ae7e3a778b2ce1d5050d, content: ' Intent on pushing Libya towards ""Islamic socialism"", he introduced sharia as the basis for the lega...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.6028496968614645)]",1.0,0.7905094623565674,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",Elizabeth,"[Document(id=31de347482f6e76ab8d778bc1648539499d1dcc796460f1482425b5ee879fe5c, content: ' When she toured Canada and visited President Harry S. Truman in Washington, D.C., in October 1951, ...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5909742966356426)]",1.0,0.27963173389434814,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,sea communications,"[Document(id=2982b60b277d226d31c0a9fe90582fab11b0c6ff0f2e5a54d4e342474fbc3795, content: ' +In 1941, the Luftwaffe shifted strategy again. Erich Raeder—commander-in-chief of the Kriegsmarine—...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5488736596606179)]",0.0,0.10573846101760864,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,10%,"[Document(id=e834bcf1ef63c7b72f9bf36d447e483da5003c1c77f033ef064e514b7082f587, content: ' To the south, N. Highland Avenue leads to the Barraza-Aviation Parkway bicycle path. +South Tucson i...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.5734801185966978)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=67c87173e747d72d79bc92866e553902d56583aae14965ecaa3b2da507256976, content: ' After the turmoil of the English Civil War, the Royal Society wanted to show that science did not t...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.6990559439833335)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,Database transactions,"[Document(id=5baf5b500fc870a57b1f1a3f45f13e9b554e1d91359189ce3677cff042e60b2d, content: ' Data security prevents unauthorized users from viewing or updating the database. Using passwords, u...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.4012562109697081)]",1.0,0.4623287320137024,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=cbebf9a70685ea0e72bcf8c9f1dbbe9b7163ee26279b633446edf34314600fb3, content: ' Janssens demonstrated that Avicenna was a Sunni Hanafi. However, the 14th cenutry Shia faqih Nurull...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.7413625193934965)]",1.0,1.0,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=f08f17188f751088b2b76f2c1a69ffc86f340979f05ea84b4b201def163ec6f3, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.5241313195161235)]",1.0,0.10474255681037903,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=568b9944b8e99493b93780736bc40a387965c8d30cebea94a19881b28bc86012, content: ' More entered the language when the Anglo-Saxons were converted to Christianity and Latin-speaking p...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6607325652412197)]",1.0,1.000000238418579,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=c54ac19342651f97c105c4825d0b82cbf05684cf1d7c4921e0d078a7212b4a21, content: ' A common type of embezzlement is that of personal use of entrusted government resources; for exampl...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.49411751006790716)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,Clean Edge,"[Document(id=639620812442614c676be21cf936d052650009771bc4d08df98e8056e4441e38, content: ' +New government spending, regulation, and policies helped the industry weather the 2009 economic cri...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 0.5498180514882574)]",1.0,0.10936959832906723,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=de63707a025645c7fcc7a26e6dd37ddd29a27cd60ff187ce487e0d07217708ee, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.8069591810257106)]",1.0,1.0,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,The Cathar Crusade,"[Document(id=3f50079b5ebfa4f0c771fad96712717b3bd3050f02fa3abfb19f87bc049689e1, content: ' By the 11th century, more organised groups such as the Patarini, the Dulcinians, the Waldensians an...', meta: {'name': 'Heresy', 'source_id': '8ba473262f8508a9c99699bb09cd022993896ece1fed1463a9cf4bd7eb0c3e68', 'page_number': 1}, score: 0.4953807364007933)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=f0c6a5cd5408a0fc01c220e5910c03a2241fbe85099410f518592c1fb7098441, content: ' ""Creating a partnership with Beyoncé, one of the most hard-working and talented people in the world...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.5224108713853148)]",0.0,0.9999999403953552,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,legal tender at face value,"[Document(id=c5c8d55b2c3952d9e29c10530dc199fccdd3bfb5dc897ac40dd8ff1d01e6a97f, content: ' This has to do with Alexander Hamilton's suggestion to Congress of a fixed 15:1 ratio of silver to ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.44825442543903155)]",0.0,0.4468753933906555,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,a Russian fleet attacked an Ottoman force,"[Document(id=34bd168edceec4d845ba383e25526ec637cdd2a67e74fbe1494f27559701c396, content: ' The clash came on 30 November 1853 when a Russian fleet attacked an Ottoman force in the harbour at...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.5355975216123309)]",0.0,0.07638347893953323,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=0815d5b0f30156ea39d24a8a15062d5e21822317a105f659f2000892f8d657c8, content: ' +The consensus view in contemporary paleontology is that the flying theropods, or avialans, are the ...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 0.5702787263670992)]",1.0,1.0,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,modern evolutionary synthesis,"[Document(id=e1b3872c9a56b024a609ebeaf211b3ea1b4799599e6935c3dd62e613f036575b, content: ' This dogma has since been shown to have exceptions, such as reverse transcription in retroviruses. ...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5969093624625235)]",1.0,0.9375607371330261,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,8:10 p.m,"[Document(id=2ed21a855190ee9369cd4351878edc528d570a2136e3a2612158d6108a59491e, content: ' until 6:00 p.m. or later, with dinner from exactly 8:10 p.m., at Delmonico's restaurant and later t...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.8054284554320554)]",1.0,0.9999999403953552,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1789,"[Document(id=93996e15e538ef99cbc1f14e3a6eca7f547c782d02004cfe4d90863b09e3c9ba, content: ' When hostilities were concluded in 1648 with the Treaty of Westphalia, most of Alsace was recognize...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.6624069423136786)]",1.0,0.5213748812675476,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,Comitia Centuriata,"[Document(id=409efc7d73eb1e4b7778886a27796612a2da5b9f46d8dd07e3972f8290409c4f, content: ' It also elected censors. Only the Comitia Centuriata could declare war, and ratify the results of a...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.523229154674441)]",1.0,0.13780222833156586,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=d16248de2a5e0d9e6bbb7e85b411bf0a4143206e20471ecab73dc7158ce992b1, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.5365703485459956)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Pope John Paul I,"[Document(id=18ed06b8889609136231a06d99465ac7da3164b444e51e58cace4a1e98ddebcb, content: ' He was the last pope to date to be crowned; his successor Pope John Paul I replaced the Papal Coron...', meta: {'name': 'Pope_Paul_VI', 'source_id': 'fc82a40c990c0b3840b06b32643858d39984b66d78ae950561d809dc84d6edf8', 'page_number': 1}, score: 0.5372751222409363)]",1.0,0.7952640056610107,1.0,1.0,1.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c3327e32d8658307f8dca684bbdeb22895ca7a8d7ee8ad6ff0eba6200c5e6da2, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.6820248727155205)]",1.0,0.9528100490570068,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=3f0396346c47d3ea53931dd05016eb360d6261c1b5ab0c7a977a5bbf0df1be01, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5248291958675437)]",1.0,0.255912184715271,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Hussein Sirri Amer,"[Document(id=9a67277c77a4ce4cd3c2095f48d3612e079b639eb7e063071b54fd0bb0e70e3f, content: ' The interrogation pushed Nasser to speed up his group's activities. +In the 1950 parliamentary elect...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.6046152857550448)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,Basketball,"[Document(id=d7f6f0383459fe08c23a94fb73e1f2386715e112d5e554aee39542da48c23478, content: ' With the university having high profile sports teams itself, a number of alumni went on to become i...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.3389550585749015)]",0.0,0.12688907980918884,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1999,"[Document(id=6c8a4bf97575a50cf435a385d35038780603d1e0468298d1d46906e5d6f5565e, content: ' Its main offices are located at 30 Rockefeller Plaza at Rockefeller Center in New York City, known ...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.4865949646927139)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston–North Charleston–Summerville Metropolitan Statistical Area,"[Document(id=620f512c3df1eb1788fcecbd5ec4c0438953e005020fab4303e121d55369ade5, content: 'Charleston is the oldest and second-largest city in the U.S. state of South Carolina, the county sea...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5474995153501447)]",1.0,0.40961262583732605,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=39112deed098f10b50cfae893e4a07e0541a59e3ede2de2a696f87f4dd3a60cf, content: '"" When in September 1831 he learned, while travelling from Vienna to Paris, that the uprising had be...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.650285022879904)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,bad publicity,"[Document(id=6da0a379194f1e5cc0824b3bc6b405cb6290228e4f9e43f1d1060a34ef127630, content: '[improper synthesis?] +Starting in 1934, the Westinghouse Electric & Manufacturing Company began payi...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6402881206087123)]",1.0,0.10993127524852753,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,cordon sanitarian,"[Document(id=eb6af6324b267f8ddca82d92abfdcd76a1aa66a122f874959b3e79f50982c1fb, content: 'S. will be the president, if the party holds that office, or a prominent member of Congress in oppos...', meta: {'name': 'Political_party', 'source_id': '476a9215de81b75a4f17737eed1679570973e55b5ed6175aa0ec1014b09d4632', 'page_number': 1}, score: 0.4507376320029649)]",0.0,0.33983707427978516,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Notre-Dame,"[Document(id=12b1c266463d473c793c31afc2fbee94dcee8a5deaa16811d1cf1b031c81f83a, content: ' +Paris is a city of books and bookstores. In the 1970s, 80 percent of French-language publishing hou...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.4563788336509233)]",0.0,0.10626249015331268,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,fundamental intellectual structure,"[Document(id=dde733057987b7656d54f0140cdaa3de2ee1789c63c74f1f5c17cb1f61c82cd4, content: 'Time has long been a major subject of study in religion, philosophy, and science, but defining it in...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.5137300757270435)]",0.0,0.3276873826980591,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",Frederick lost much of his artillery,"[Document(id=857bca7f9727736940866286b455a1ffd1ff6ebfe14cf952afb4967e2619748f, content: ' +By this point Frederick was increasingly concerned by the Russian advance from the east and marched...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.6657266735791849)]",1.0,0.31146275997161865,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,12,"[Document(id=0c5f3492dfe6b6838bc7113688331de615d79f7950a305ca235db36174dba218, content: ' With the return of Michael Dell as CEO, the company saw immediate changes in operations, the exodus...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.6340834355511753)]",1.0,0.29873126745224,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,the senate,"[Document(id=4b02bca62f562d6c5bfda9faf0b29f77e95b817f971009b508742099d4c57423, content: ' None of these defixiones seem produced by, or on behalf of the elite, who had more immediate recour...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5584392009151684)]",1.0,1.0,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=0b3a599a54fac34c82f29d858c51bbf5ce4f37032b3539a76c2b8a6b918abc6e, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.7432106125866814)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=45ad6989b7801812895d43ea694bad169af9b5dd9024dd96b099fe071a196217, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.7008250803733801)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Rome,"[Document(id=97686de9c4f8fed507eaca869b9537500fb0e8c613b130c6f388bc83e9983444, content: ' Some areas of the conquered world were more affected by Greek influences than others. The term Hell...', meta: {'name': 'Hellenistic_period', 'source_id': '322682cbb61d31c17e6d21e06723e94efd0ef242a5c9f308888fa14209c4078b', 'page_number': 1}, score: 0.5478172274353506)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what decade was disco popular?,1970s,1980s,"[Document(id=ac88b34c02a837bc0a4387ec54f7df72e84a365c85acf3ea8c8abdf1bf22c767, content: ' Emphasizing glamour, fashion, and escapism in distinction to the experimental seriousness of earlie...', meta: {'name': 'Post-punk', 'source_id': 'bee90cece10cbdebcf39c6ca24886eb874551f866ed672c4f1f297c137963f75', 'page_number': 1}, score: 0.5462718569085085)]",0.0,0.8399688601493835,0.0,0.0,0.0 +What do the rivers form?,lakes,Nieuwe Maas,"[Document(id=d4f00e0321e7bd0e40e3988f483dbb9e724f85a7fec1890682781c70d19fdf9a, content: ' The middle flow begins as Nederrijn, then changes into Lek, then joins the Noord, thereby forming N...', meta: {'name': 'Rhine', 'source_id': '23686e48a61504679a91dac10d83b287aef3dff7dabc0ab881c281059187ce6a', 'page_number': 1}, score: 0.5195132686870181)]",0.0,0.2038368135690689,0.0,0.0,0.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,ISO-8859-1,"[Document(id=1ca3328892ba24004a75d8cfaab0f58ce32bed8ee034be6fa54e33e8b0a62eb5, content: ' UTF-16 extends UCS-2, using one 16-bit unit for the characters that were representable in UCS-2 and...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.6513962855269255)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,three,"[Document(id=1458da8434cabde78aa443222423f12deeb21dfbf444ec9694b97cfdf73b8417, content: ' Beyoncé was the third woman to have writing credits on three number one songs (""Irreplaceable"", ""Gr...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.48544494375755437)]",1.0,0.6437669396400452,1.0,1.0,1.0 +How many Examination Boards exist in India?,30,30,"[Document(id=16d8a70ea7a6d58f48e68b5c9072e65770d4a4053e9906c77dee6ff0c12c90a1, content: ' However, these vocational schools are not part of the German dual education system. Ergänzungsschul...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 0.5124628146490905)]",1.0,1.0,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=23b161a9804a8a11c97c4f3f14b6762b49fdf3b1fefb8ff1de76b00cfa00995a, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.6589027090165251)]",1.0,1.0,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,Your Majesty and thereafter as Ma'am,"[Document(id=3d92674c03fa6fb7dff9bfacadbd449807bd3f50ac1fd93ba9ff982be85ddbf6, content: ' Under pressure from public opinion, she began to pay income tax for the first time, and Buckingham ...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5990400446359011)]",1.0,0.6777033805847168,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=c37151361f1ebbd218e8e53e243abfe86d24fabddd8be6f3401cf56e9e98633b, content: ' Its name is said to have come in 1814 from the Russian soldiers who occupied the city; ""bistro"" mea...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.5561873109919271)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,871,"[Document(id=5396376bb5221d60d6016a3ef2e8c6fb591b1eb2c9c6dab90677a7e0ac39a5a5, content: ' The speech of eastern and northern parts of England was subject to strong Old Norse influence due t...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.4737880350354553)]",0.0,1.0,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,load or endurance,"[Document(id=77ce7cf14e807c8808892e96984a50baad4f22bdca6b22034b2fb170f96240a2, content: ' Stability testing (often referred to as load or endurance testing) checks to see if the software ca...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5191316946425397)]",1.0,0.19878101348876953,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=bb0e180e009c490f3ae30fb55a9a91cbe512409c2e12ebae0203298a6f3efe78, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.6359423909104869)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=cdfa1e6bea8fd42b00ce80468ba471689216a6195425c2435fb1141b0b8c7e6f, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.6595684899907257)]",1.0,1.0,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1853,"[Document(id=4b98a71cc257b51798b5ea0d563d064d9fb226d48c8fe7f85873c68abd7f0818, content: 'The Crimean War was a military conflict fought between October 1853 – March 1856 in which Russia los...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.5591400200208592)]",0.0,0.672028660774231,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,a part of daily life,"[Document(id=1a8c1e09e777c4e183dceed861053866bfd85f790fb0c2cd9b9aad59ad7dc5b2, content: ' Even the most skeptical among Rome's intellectual elite such as Cicero, who was an augur, saw relig...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.6737981139329287)]",1.0,0.2831001281738281,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,Software fault injection,"[Document(id=76ad3fb3d257a3812a169d749a5fb13f2e05383764e8f9dfc53895f08747b375, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5810701579387141)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,goalkeeper,"[Document(id=5888e9054f80f5ec8de6b82cb835f9c0f204a0b02a1dbcba52a8c08b7fc1e5d9, content: ' +In game play, players attempt to create goal-scoring opportunities through individual control of th...', meta: {'name': 'Association_football', 'source_id': 'f2b7305b09ad8a43b8389a5eb2d84b8ea4b4deaa7968d21fe9da9da9ecda2c6b', 'page_number': 1}, score: 0.46009544332231944)]",1.0,0.9999999403953552,1.0,1.0,1.0 +In which year was the new constitution ratified?,1855,1899,"[Document(id=aa96f32d4433624ee85129321e58c7252d53946f3e756da84c43722a5dfecef4, content: ' The white conservative Democrats regained control of the state legislature in 1870, in part by Ku K...', meta: {'name': 'North_Carolina', 'source_id': '3282e3dedcf0f1923e66ebf5b79cf9b9deec27c8ca1df8908e03770b02b38126', 'page_number': 1}, score: 0.4965179351176006)]",0.0,0.5596075057983398,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Glencoe and Sea Nymph,"[Document(id=353e78a8c5526319fefad1c37e2e3511ce4323f2f90be311b53a11227b5bd85c, content: ' In 1852 the San Francisco-based ships Glencoe and Sea Nymph were attacked and everyone aboard excep...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5135568769234267)]",1.0,0.3519451916217804,1.0,1.0,1.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=92daacd08f6215b2b6c9a23e96940ae9ffa3af8f6a0300108c6bfcf840702db9, content: ' In western New Haven, the university holds 500 acres (2.0 km2) of athletic facilities, including th...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.5900821044427363)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,non-repetitive,"[Document(id=35aa1cb690df59867ee05e5097593f48e97261d2c2794a4fcd261522b374c202, content: ' In December 2013, scientists first sequenced the entire genome of a Neanderthal, an extinct species...', meta: {'name': 'Genome', 'source_id': '18d3410858e240e5596ce2f22db31a3f6dcd18aba98f4fb8d8e801b7e537aceb', 'page_number': 1}, score: 0.5173390507269158)]",0.0,0.2370980978012085,0.0,0.0,0.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=1b5c167a9d5d609ea5fb8a97ac9f8e9db1b78d5c91d2f95320c7a861768e926e, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.610828310088769)]",1.0,1.000000238418579,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,rational thought,"[Document(id=707efe35d39bbe6f12111e251260c07bcfa48cea8000d7e2c9680c78ec3b9dcc, content: '"" The theory was indebted to Aristotle's pluralism and his concepts of Soul, the rational, living as...', meta: {'name': 'Idealism', 'source_id': '55a0bd42a4a737442a2d93fe0926424e236910de01b2406ed43c174f2225ff9d', 'page_number': 1}, score: 0.530264670750077)]",1.0,1.0,1.0,1.0,1.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,21–24%,"[Document(id=4a2399f100a536a4baf71c5dedf166c45f09b1b237a505e9a84faf1da7fd8a1b, content: ' +In 2014 the Notre Dame student body consisted of 12,179 students, with 8,448 undergraduates, 2,138 ...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.6933619621704381)]",1.0,1.0,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,second,"[Document(id=718965c4d19f8aedea8f06ea42eb1b8aa90c1796050fa2b7fd1db899ab71ddbe, content: 'S. fleet encountered massive AA fire with proximity fuzes. Only one American warship was slightly da...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.6812274997788408)]",1.0,1.0,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,2005 European Championship,"[Document(id=76be05dc0de2dfbcf6d3d16e80abbfa4ee57f55d30696681b941ecd6ff700822, content: 'O.K. Greek basketball teams are the most successful in European basketball the last 25 years, having...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6882304367381349)]",1.0,0.7931281328201294,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,imperfective,"[Document(id=bb36de75da71a69959981a8c0923df0cc0a0f4248e05fecc303139076fc6aa2d, content: ' The evidence supporting this claim is patchy: Croatian linguist Stjepan Babić complained that the t...', meta: {'name': 'Serbo-Croatian', 'source_id': '2cc55814bc801258e8300ed82e003428dbf11715c74ad99297d61c5dd77023f4', 'page_number': 1}, score: 0.3154931739562564)]",1.0,1.0,1.0,1.0,1.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=7308906ca1725346de1f8669d04745bd0719f93959d2b77a3e2eedd2aea218ba, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.5572792387087808)]",1.0,0.8207652568817139,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,excited form,"[Document(id=814f2c2136f18b6a49318bf9a19fad148d9b272f987b2539771dc5dbd3beca10, content: ' +Under ordinary conditions on Earth, elemental hydrogen exists as the diatomic gas, H2. However, hyd...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.5947984648702281)]",1.0,1.0,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements,"[Document(id=ff11a1c6102e2b293943cbcfb8c2935799487df6f465dea412c00844c1c7c655, content: '"" In July 2013, the Assistance Association for Political Prisoners indicated that there were approxi...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.6589232659427924)]",1.0,0.9912453889846802,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=ac63b8357f80e413cededa7c5a37849ede32b9c8d7d3181d6e287572836f5f86, content: ' They formed the backdrop to one of the earliest football-related films, The Arsenal Stadium Mystery...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5470057790543041)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,late 1890s,"[Document(id=e7a2f32f10a857b188ae10953333d02317612155cbae0e2ac8e0bf5365da90e4, content: ' The Melbourne financial crisis was a contributing factor in the Australian economic depression of t...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.5958355202000913)]",1.0,1.0,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:2__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:2__chunk_size:15.csv new file mode 100644 index 0000000..22e135c --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:2__chunk_size:15.csv @@ -0,0 +1,121 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=3c9b5484de418625b08e3d8f4e3c3398f40f2f5af0bbf48528ee46ea38c64e7c, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5597276623459511)]",1.0,1.0,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,15 days later,"[Document(id=b1a7bf3d4ede00a180df1d10e5220cf7adfe31e6c297fec3813651acf8043c8f, content: ' In May 1957 Diem, then President of South Vietnam, made a state visit to the United States for ten ...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.342617954858916)]",0.0,0.24934425950050354,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Coventry,"[Document(id=93c78551f27ece2b380d8a30b1c8852063e2afe373bf66ac0ff8fc705d37f96c, content: ' On 19 November 1940 the famous RAF night fighter ace John Cunningham shot down a Ju 88 bomber using...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5731025412729198)]",1.0,0.5261203050613403,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=e9007dae49d8af7b7fa7932aef22bbdc01132df7a83250c1516203d03c7a8b4b, content: ' It is most “important to recognize that in many words the English and Scandinavian language differe...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6303054320453825)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=b43e607046d3a9f2878f76721d6d75cb6c46d2bdb38f6b3fdc5f1b9a2f23a163, content: ' +In The Church of Jesus Christ of Latter-day Saints (LDS Church), the teacher is an office in the Aa...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.5091397083739027)]",1.0,0.4012155532836914,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=4af194e6a549f3cca324fac69732006a675ddeb8e841c964604dff19122e4f16, content: 'O.K, Maroussi) have won 3 Triple Crowns, 5 Saporta Cups, 2 Korać Cups and 1 FIBA Europe Champions Cu...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6223667259906489)]",1.0,1.0,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,four,"[Document(id=64448bd26ee757122a322e3c87d03eaf23b7b9bfaa5b939f12b70c7a629dd6d7, content: ' This season's judging panel consisted of Randy Jackson, along with Mariah Carey, Keith Urban and Ni...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.4864700329068361)]",1.0,1.0,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=2296ed852027297f8181dc1f64af4fcbb66e36bb282c6197423888ce9d011df5, content: '9–14 μm) and produce images of that radiation. Since infrared radiation is emitted by all objects ba...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.6292707894619731)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=5dac70957842f214d4c60f76564b27a7e657cd237c677ed236d1ed1ca76e360f, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.6064028863553603)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,Bremen and Bremerhaven,"[Document(id=48379d700246108e0c3da307a3a8e5de24d8bee4044a5988144f1d9cd57b09c2, content: 'Germany is a federal republic consisting of sixteen federal states (German: Bundesland, or Land).[a]...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 0.5087021851568686)]",1.0,1.0,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,fullwidth forms,"[Document(id=8c55128e915db91be79f675390a7251853ff83d2c6945bcbba98c2fef9e723ac, content: ' For example, the ""fullwidth forms"" section of code points encompasses a full Latin alphabet that is...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.536255021263228)]",0.0,0.06494750082492828,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,federal,"[Document(id=18fb3c7c246cfed9c7f86df4b14156139de2cefac2e61a9bbd26c8653739ab9b, content: ' Further, the Court's decision in Baze v. Rees, upholding a lethal injection method used by many sta...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.5379543078595055)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,"last great Anglo-American philosopher before Wittgenstein's disciples spread their misty confusion, sufficiency, and terror."" French sociologist and anthropologist Bruno Latour even went so far as to call Whitehead ""the greatest philosopher of the 20th century","[Document(id=777192ecd0b066a38e00224d5a2269174c0dfa158ccc4e8bc7395644c0b99822, content: 'g., a religion might encourage the violent extermination of a rival religion's adherents). +However, ...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.5657093946225062)]",1.0,0.11629921197891235,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,"""No state shall... make any thing but gold and silver coin a tender in payment of debts","[Document(id=68d14edc51b808820ba009fc52fe536383dc5d9fe285a432d114fdcd84ef9e86, content: ' The currency as we know it today did not get the faces they currently have until after the early 20...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.5031800963235427)]",1.0,0.03460206836462021,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,Pacific Northwest,"[Document(id=6acdafac063b4c532b8491aead2266ee3880a9ecef0a8eac6e645a6d8ae35346, content: ' Precipitation averages less than 15 inches (38 cm). The Southwest is a hot desert, with temperature...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 0.6014007834873065)]",1.0,1.0,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Adaptive (or acquired) immunity,"[Document(id=ad1b87ebcacf263e09bd48e7e71e798fd5bc8ca82f8a1917807172a097a552ae, content: 'The immune system is a system of many biological structures and processes within an organism that pr...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.6578931511774535)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=9f4c20a2bce44cf9bd3af493be183ab333c4d80eef37f323c78a73218b2cb402, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.5064623445443148)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,Computing,"[Document(id=724eb1fe8da5e0d4da8a448227c2eb83283ae94ad66b58e07296d26274b6a2b4, content: ' In terms of specific course salaries, the Sunday Times ranked Computing graduates from Imperial as ...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 0.47044950560227883)]",1.0,0.5292261838912964,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1960,"[Document(id=0f23a3d664b2d9d293629ea9d692c835b9a8e93ffb30bc45de463cd9b9d93424, content: ' Therefore, Khrushchev would not take part in the summit. Up until this event, Eisenhower felt he ha...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.642219936436536)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=44b4daa4d3c4378951ca880516b3342520595caa477d61e2c5fc88de3627c8f0, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.66910672984007)]",1.0,0.804789662361145,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,nutrition literacy,"[Document(id=2e96d54b758a50cdd3b44fcb12856a2d073a3d1e742653bff90785cf4fe42d77, content: 'S. NAAL introduced the first ever measure of ""the degree to which individuals have the capacity to o...', meta: {'name': 'Nutrition', 'source_id': '861936066c6bb3f32c6021d1ede86c245833c0ef6507763ae6dc2bf67853e44a', 'page_number': 1}, score: 0.4825331385543225)]",1.0,1.0,1.0,1.0,1.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=497c11f9fbbfb1f37be3dcd0a29d747c5f8f8967252a25e340b937417a080280, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.6587906616356036)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=8a0f220e6df4e50504002c6ad6312d289ff774c9c753db65e80fb1022f521be2, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.5446938087977631)]",1.0,1.000000238418579,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,Sklaboi,"[Document(id=8ad39bb1347ca9057c45116b007a845b2f566c20ddd8d7fb64626c78a4750a3c, content: 'Slavs are the largest Indo-European ethno-linguistic group in Europe. They inhabit Central Europe, E...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.7195013371244505)]",1.0,0.27494680881500244,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.","measures sexual orientation on a continuum, considers various dimensions of sexual orientation, and considers homosexuality and heterosexuality separately","[Document(id=3638fb16add213ce4e749139ce1852d845838bce357ac5b46f8f2163ed664217, content: ' The classification scheme works such that individuals in the same category show the same balance be...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.3776180459315529)]",1.0,0.6333848237991333,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,lower class,"[Document(id=f236af80c5acc5149304af2ef853189222da8b519384aeba0e93844202f80d55, content: ' +In the early 20th century, the vast conscription of Italian men from all throughout Italy during Wo...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 0.5253530117738974)]",1.0,0.776959240436554,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,anti-communist Russia policy,"[Document(id=0a8265dd4cca4efea06dcabb2e88acb97e00e621b009a5c17aa4cda38acbbb4f, content: ' He also upgraded the role of the National Security Council in planning all phases of the Cold War. +...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.36376947224784034)]",0.0,0.03253399580717087,0.0,0.0,0.0 +What is that power that enables us to love?,Prevenient grace,"wisdom and understanding enhance one's faith in God. According to him, these are the tools that God uses to commune with a contemplative. Love in the soul is both the cause and result of true understanding and judgement","[Document(id=9f0a0b71abae404c8342fedf70ff8f016c3a6b4ec47cc4fee1252fcf9d7adea4, content: ' +One of Albert's greatest contributions was his study of Dionysus the Areopagite, a mystical theolog...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 0.449116765433877)]",0.0,0.29284828901290894,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,Caesar,"[Document(id=dd09780a89c3259aae7f10ab2070b88e3687ba1267d0d7b037e76509ccd66648, content: ' Thus, the assemblies became powerless and were unable to oppose him. +Caesar was assassinated on Mar...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.44714711562407566)]",0.0,0.1853158324956894,0.0,0.0,0.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,sharia,"[Document(id=4d48682f805f7a385767a5c2825f3fc2119d2b698a7fc9592571f5590731bae6, content: 'Muammar Muhammad Abu Minyar al-Gaddafi (Arabic: معمر محمد أبو منيار القذافي‎ Arabic pronunciation: [...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.6177406623423897)]",1.0,0.6138635277748108,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",House of Mountbatten,"[Document(id=e315f255311d7dc4c756937a51f6602a99f1b8e3b0a145306a3d8c41c258bc57, content: ' She was educated privately at home. Her father acceded to the throne on the abdication of his broth...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.6146287331638632)]",1.0,0.7635930776596069,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,Minorca in the Mediterranean,"[Document(id=19aa0d6dee9980002e943f9e1d31d63db517fa0aae06cfc59222ce379dddd1bd, content: ' As prime minister Pitt committed Britain to a grand strategy of seizing the entire French Empire, e...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.5842505432740723)]",0.0,0.0038108360022306442,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,2.7%,"[Document(id=95e1463ec5cbd41e6746a2396eee9d7fb5ba52847c057866e7e7fd5a7f376d91, content: ' The racial makeup of the city was 69.7% White (down from 94.8% in 1970), 5.0% Black or African-Amer...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.5508171690306347)]",1.0,0.7231398820877075,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=9a450124afc12f193f72ae1f8310b72ada7cbbf8f34bbd71f66eb32b0f47efbc, content: ' With the development of the modern evolutionary synthesis in the 1930s and 1940s, Darwin's concept ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.6905690706074652)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,Database transactions,"[Document(id=64793ba16b3dcf1ad2ea402349708087ba8524851dc937aa0c8517e629e95067, content: ' Data security prevents unauthorized users from viewing or updating the database. Using passwords, u...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.40743747811480446)]",1.0,0.4623287320137024,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=ec70e294a8a1203ecdbb9fe95270466a0129a5d607fe306df146559206aa5b31, content: ' Avicenna first began to learn the Quran and literature in such a way that when he was ten years old...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.7299585588069163)]",1.0,1.0,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=391f8278a6d664b0fda2a91e05a75fcb178e560aca2778cf3383f74470691074, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.49740108859030846)]",1.0,0.10474255681037903,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=540a5b537ec7e1fdea905baae428e9a4c76167e7be305ec2dac494ec1ac16d1f, content: 'Old English (Ænglisc, Anglisc, Englisc) or Anglo-Saxon is the earliest historical form of the Englis...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6427731581432499)]",1.0,1.000000238418579,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=b281d8f2fa5c72bf9176af2f66fbd72725ba99c1c3e8e2e12044e694c6ee3bbe, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.5003888561596567)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,Clean Edge,"[Document(id=7726f77f73261125d28fc6db733db2a8ae6e451dfc266962ad1ddc5ccf22d22f, content: ' +New government spending, regulation, and policies helped the industry weather the 2009 economic cri...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 0.5105360280339252)]",1.0,0.10936959832906723,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=d4ddb3955cf6ff9d6bed687f54220f695fef0951972c64309938d9cf7670f6ba, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.8069591810257106)]",1.0,1.0,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,Roman Saturnalia,"[Document(id=092570f05e27505e1e18953896df0dd6bc5b48661c76667e0b9837f68372133a, content: ' The Lenten period of the Liturgical calendar, the six weeks directly before Easter, was originally ...', meta: {'name': 'Carnival', 'source_id': '6276aebf7ba195a529455cfda1ccf9d9aa6ddf58fac44b707047b161538ef46e', 'page_number': 1}, score: 0.49560710656928925)]",0.0,0.22954773902893066,0.0,0.0,0.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=ea56a7c43f1347c76ec07affd8ad20729d040a8d58a36f56583b537f903e8142, content: ' +The release of a video-game Starpower: Beyoncé was cancelled after Beyoncé pulled out of a $100 mil...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.5759942076459466)]",0.0,1.0,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,5-cent,"[Document(id=842ad1ee7799e7ecbfdfb163f8afe265cf62972cba3da0e97be6f505cb10fc03, content: ' In North America, copper mining began with marginal workings by Native Americans. Native copper is ...', meta: {'name': 'Copper', 'source_id': 'de5c60a665b93f9b358be412d53a37ca43d90005e3ec9a553ac38a38f076fadc', 'page_number': 1}, score: 0.47993080401424804)]",0.0,0.7035895586013794,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,HMS Jasper,"[Document(id=91f5b594d5423035cb6299d6343f01bf4eb248b5aa8de432e2f6c51ddedca474, content: ' On 21 May 1855, the gunboats and armed steamers attacked the seaport of Taganrog, the most importan...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.5363870553469913)]",0.0,0.3474302887916565,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=5b537a30444228b3b5e39f8e28a321a8e774f6051560dcab29bb0ece8b5ae99e, content: ' Unlike Archaeopteryx and the non-avialan feathered dinosaurs, who primarily ate meat, recent studie...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 0.5631311233054687)]",1.0,1.0,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,modern evolutionary synthesis,"[Document(id=7ff269861e7df34fcebf71c4907ef4694447d099e228c45550f4e2387812c2fd, content: ' This dogma has since been shown to have exceptions, such as reverse transcription in retroviruses. ...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5781950352097195)]",1.0,0.9375607371330261,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,"8:10 p.m., at Delmonico's restaurant and later the Waldorf-Astoria Hotel. Tesla would telephone his dinner order to the headwaiter, who also could be the only one to serve him. ""The meal was required to be ready at eight o'clock","[Document(id=bf93e5ca5e1091757426fa6d168a7b90f8c1d9dad833880ffcb9b06d8ad2b0f7, content: ' There are a minimum of 278 patents issued to Tesla in 26 countries that have been accounted for. Ma...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.687530275591786)]",1.0,0.31139981746673584,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1789,"[Document(id=5ba09395255feae4ebf751bb80a7a4fa5d79691518f143a0bddc36e8712f8fc6, content: ' As a result, Alsace was transformed into a mosaic of Catholic and Protestant territories. On the ot...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.6855752258831345)]",1.0,0.5213750004768372,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,Comitia Tributa,"[Document(id=6bfe26157bc8b93a3c22f1f4d5b8228d372139a27dc2848c3d256ad4386f3b4f, content: ' the citizens of Rome), the Comitia Tributa, was presided over by a consul, and was composed of 35 t...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.4990089103457631)]",1.0,0.14773741364479065,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=1e41339b2f87d6ffb45a6e6882ca2d5cec93b2ada663205b1745d23021ff3c6d, content: ' The planned closing ceremony at Justin Herman Plaza was cancelled and instead, a ceremony was held ...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.5200463937587196)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Pius XII,"[Document(id=84e7b362d02db50bb7dd66920598b2f7bcfeaad549dacdf2dc9aa4ad9c16171f, content: ' John XXIII became the first Pope to receive the title, followed by John Paul II in 1994 and Francis...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 0.5445188707405088)]",0.0,0.6628619432449341,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=61583a75fdba243f6a812f3bf73c41e686e4f8374db81e4dd5f1a4d5a4430336, content: ' +Non-Australian citizens who are Australian permanent residents should be aware that during their st...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.6689246760470762)]",1.0,0.9528100490570068,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=1c55594460ba4e27d8751041858090c94678420d1701172511f1bad1a1d70154, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5434537812989179)]",1.0,0.255912184715271,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Nuri al-Said,"[Document(id=c355e0f20036a60ada1e4711a62b4d11adebf07dce76a808203fdd588cc1168e, content: '"" Nasser ordered a crackdown against Syrian communists, dismissing many of them from their governmen...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.6408207539725228)]",1.0,0.5893326997756958,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,computer pioneer and inventor,"[Document(id=f7e96505ccf7689f7e371e32dcd7a1dc9fa715eb5b41f801d29a38043eb97ec3, content: ' +Other: Civil rights leader W. E. B. Du Bois; philosopher Henry David Thoreau; authors Ralph Waldo E...', meta: {'name': 'Harvard_University', 'source_id': '7f91a9284c2db4a8d1147861fa2c02c6a25281db1cbc78097950be011c0e6269', 'page_number': 1}, score: 0.37024883815166565)]",0.0,0.10659181326627731,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1999,"[Document(id=66a9fb4c9d1cbb2bdc754757cfcbce6fa169d08ab7340e60e7b05bd65c9e5d65, content: ' In June 2014 a formal offer From GE worth $17 billion was agreed by the Alstom board. Part of the t...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.49639086782035935)]",1.0,1.0,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston–North Charleston–Summerville Metropolitan Statistical Area,"[Document(id=a00baea2441c87e38109afe86358b94ae591d905c9ff5c35c440986bd364ee66, content: 'Charleston is the oldest and second-largest city in the U.S. state of South Carolina, the county sea...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5451335165669611)]",1.0,0.40961262583732605,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=7b2e31c3f54b0c1a9b97db27512199c167f56012decb955c5fb2ad06a110d92e, content: 'Frédéric François Chopin (/ˈʃoʊpæn/; French pronunciation: ​[fʁe.de.ʁik fʁɑ̃.swa ʃɔ.pɛ̃]; 22 Februar...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.5926069778418124)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,steal the invention,"[Document(id=a7e7642fe4191fff00d610d3d0c95bcb299ff890393b3b9dda127bc3cc40623b, content: ' Tesla tried to interest the US War Department, the United Kingdom, the Soviet Union, and Yugoslavia...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6408155390011724)]",1.0,1.0,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,Non-revolutionary civil disobedience,"[Document(id=172d5ae15d23469aad64106d5290dbb9c81377ad9262463cdd5058509187a788, content: ' +Non-revolutionary civil disobedience is a simple disobedience of laws on the grounds that they are ...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.4826805294935842)]",1.0,0.48217201232910156,1.0,1.0,1.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Bibliothèque Bleue,"[Document(id=431a21bc2021ab05bedf5f9c62189ca16d692858fb84414f96b2f293034850da, content: '"" Women, as salonnières, were ""the legitimate governors of [the] potentially unruly discourse"" that ...', meta: {'name': 'Age_of_Enlightenment', 'source_id': 'd6935b64f79e30b7c3de4c952b643a55674c548cd0bb703870a094fd3e646d84', 'page_number': 1}, score: 0.421877163066887)]",0.0,0.32271450757980347,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,fundamental intellectual structure,"[Document(id=dd4f4d9a87883e0aa9f9e24e4db2a323a2a0555ed35dcc2ef63f816e15bbcd52, content: 'Time has long been a major subject of study in religion, philosophy, and science, but defining it in...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.50831139766181)]",0.0,0.3276873826980591,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",placed a corps of his own troops under Frederick's command,"[Document(id=d7342a0e3021ae2d3bba679cf10ecb72ddc69518be33b380ee6b9e8f5e4d03c9, content: ' +On the eastern front, progress was very slow. The Russian army was heavily dependent upon its main ...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.6968056158780116)]",1.0,0.311606764793396,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,12,"[Document(id=67fda94cfa124d7c449d0e5d6525ee64bb5132e4e8255f2a8c0298097627fabb, content: ' Increasing spending on R&D would have cut into the operating margins that the company emphasized. D...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.6258874375525161)]",1.0,0.29873126745224,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,pontifex maximus,"[Document(id=71718b05e0ad88a856f11532834265f8f64091425308e7bc717ce93437673530, content: ' +Towards the end of the Republic, religious and political offices became more closely intertwined; t...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5607277962054709)]",1.0,0.1858067661523819,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=73c809a12535348ffed165eff42c189f3aa95c6ed455c42b5e7932cd44167a8d, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.704733357632634)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=b562ec53d471dfbb3ba9b12247b16c622039308f7ec5b5b5344127cfeb67e87d, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.7076322081359558)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Rome,"[Document(id=b5c785a8e22edd8d9d6cb5708ef03b91b1dd4137c66bb49c28d760b75bfb84be, content: ' Some areas of the conquered world were more affected by Greek influences than others. The term Hell...', meta: {'name': 'Hellenistic_period', 'source_id': '322682cbb61d31c17e6d21e06723e94efd0ef242a5c9f308888fa14209c4078b', 'page_number': 1}, score: 0.5545614504809487)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what decade was disco popular?,1970s,1980s,"[Document(id=a1ebb6111bad62b247dd1b756728c9569fa6d4b9a446c19b1950e68e79933994, content: ' As of 2015, house music remains extremely popular in both clubs and in the mainstream pop scene whi...', meta: {'name': 'House_music', 'source_id': 'c556f4a459e8a99b6d2065bab8fd6a918d0a00f13291aad2473922492e4abf2e', 'page_number': 1}, score: 0.5521305810998134)]",0.0,0.8399688005447388,0.0,0.0,0.0 +What do the rivers form?,lakes,lakes,"[Document(id=dfdb4032d57d026d1e52c68a1cff13d84af492b1c4c23fee3c33111d08105adf, content: ' The Rhine originates in a 30 square kilometre area in Switzerland and represents almost 60 percent ...', meta: {'name': 'Alps', 'source_id': 'd2eb3ad3af7d6110ae691bf40660ff7a79e4e9dbfd25785b4e55c7700bfcae79', 'page_number': 1}, score: 0.49814621522653785)]",1.0,1.0,1.0,1.0,1.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,BMP,"[Document(id=bdee99e0e034bb0947d6790e47e76dc1df567ff49f8c8b43d455c43aa290bf76, content: ' Often, other properties must be used to specify the characteristics and behaviour of a code point. ...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.6428002363940893)]",1.0,0.11559435725212097,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,"Australia, Hungary, Ireland, New Zealand and the United States","[Document(id=e5bfa62fc564ffffb338c1e6042c55a951d678b61fad10f5f106240c14fb440a, content: ' On February 1, 2004, Beyoncé performed the American national anthem at Super Bowl XXXVIII, at the R...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.48170959775240163)]",1.0,0.14340342581272125,1.0,1.0,1.0 +How many Examination Boards exist in India?,30,30,"[Document(id=9b7d52cbf72d7c989a4a9357093025a9bd62e4fb3573ea1abab63735c50cb79e, content: ' However, it is not possible to finance these schools with such low tuition fees, which is why all G...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 0.4656468986809146)]",1.0,1.0,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=ad70291d4dec497bcd48d02a2bfbb4780dffd558cf0eee2d7c976f107f401335, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.6589027090165251)]",1.0,1.0,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,Ma'am,"[Document(id=7e78ea0e95b5bd788108ac6801d926bfe2bab5d8c8ee7ca718215a801c9c95ef, content: ' She was celebrated in Canada as the ""longest-reigning sovereign in Canada's modern era"". (King Loui...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5698498562293763)]",1.0,1.0,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=faa03729332a5c07c1c9d6d87747d15af8b3043f87a9cff7389d9a1c23d97c7c, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.5976425209098843)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,871,"[Document(id=4c48861d4673f21b2c8302b3b5239fa9bd1cfd1c2e226f06fb3ec71889c9e319, content: ' Cædmon, the earliest English poet we know by name, served as a lay brother in the monastery at Whit...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.4259535276693956)]",0.0,1.0,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,large quantities of data or a large number of users,"[Document(id=dd704f4bc758ab475db02844a8a24263dd4fdb32afbc7f7dc86b810f8fa62e4f, content: ' +Load testing is primarily concerned with testing that the system can continue to operate under a sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.6507854161834614)]",1.0,0.9289779663085938,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=0e27e0ca72a74505db588413835ac7002514412100758a45474267af68c71a90, content: 'An Internet service provider (ISP) is an organization that provides services for accessing, using, t...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.5580314549952922)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,land forces,"[Document(id=2e4b3b475a65a6397953a11388e7fa166873ff680f98b6e7464deb399963430f, content: ' +In the US Air Force, the distinction between bombers, fighters that are actually fighter-bombers, a...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.5490348511903206)]",1.0,0.7403548955917358,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1853,"[Document(id=e2b0b7fb7245406ebe95cec6e89cc25fccdc9386ef0e980ff8bd1bb4c36f9a9c, content: 'The Crimean War was a military conflict fought between October 1853 – March 1856 in which Russia los...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.583131325566129)]",0.0,0.672028660774231,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,practical and contractual,"[Document(id=b80b0d7fa56110987db7fc274c40400c9afab47b60f93bd3980be6d29191c35f, content: 'The priesthoods of public religion were held by members of the elite classes. There was no principle...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.6665384745348288)]",1.0,0.22432157397270203,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,A programmer makes an error (mistake,"[Document(id=b6392c280d0afea37544d7517bbd9644cc64cc62815ef9e4f6ae7774c9575aff, content: 'As the number of possible tests for even simple software components is practically infinite, all sof...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5737161988389456)]",1.0,0.3859443664550781,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,three,"[Document(id=85f535796cef4ec10db8a4fd4e565960608be55322b93bae284e720192b9a26d, content: ' +A number of players may be replaced by substitutes during the course of the game. The maximum numbe...', meta: {'name': 'Association_football', 'source_id': 'f2b7305b09ad8a43b8389a5eb2d84b8ea4b4deaa7968d21fe9da9da9ecda2c6b', 'page_number': 1}, score: 0.4497446649281802)]",1.0,0.18245230615139008,1.0,1.0,1.0 +In which year was the new constitution ratified?,1855,1899,"[Document(id=e93f1498d324a356d0a273aea97c15c32b0eec4faf0660735b4c27f7c165effa, content: ' The white conservative Democrats regained control of the state legislature in 1870, in part by Ku K...', meta: {'name': 'North_Carolina', 'source_id': '3282e3dedcf0f1923e66ebf5b79cf9b9deec27c8ca1df8908e03770b02b38126', 'page_number': 1}, score: 0.49302690301412233)]",0.0,0.5596075057983398,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Naiad,"[Document(id=c450c10c9cc7ec2362a5a8c23142549f03d77146379f9bb96ab788f8c91887e3, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5136154001583656)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=afdbd08089615eb2bbb9dd9a5945a7e094c57e1f63a4d1f083e6183556a9f3e7, content: ' Moreover, Yale has been accused of failing to treat workers with respect. In a 2003 strike, however...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.5912663052349812)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,moderate levels,"[Document(id=073df342a989619b1eec131e4197dffb009cfe81b1f8022d1a8cd8b13985d1db, content: ' The Human Genome Project states ""People who have lived in the same geographic region for many gener...', meta: {'name': 'Race_(human_categorization)', 'source_id': '1232f7fe18d6e2adb87627ce2042b468cfa3fb9ba0b6afb2165c345f440b5339', 'page_number': 1}, score: 0.4741344601997052)]",0.0,0.14409571886062622,0.0,0.0,0.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=d5a42bacf8a08d601a973e5bad25b36c41015f76de0d512f9c3130d5e87ae4de, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.5997149229460172)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,rational thought,"[Document(id=3c9b21a4d224bb82a4e4fc744a7bd671552ea8d96f2928fcf811163166442019, content: ' The Nature of Existence (1927) contained his arguments that space, time, and matter cannot possibly...', meta: {'name': 'Idealism', 'source_id': '55a0bd42a4a737442a2d93fe0926424e236910de01b2406ed43c174f2225ff9d', 'page_number': 1}, score: 0.5788858628692617)]",1.0,1.0,1.0,1.0,1.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,21–24%,"[Document(id=bd4681af4e65cd44fb09639ab1c33814925a7f0631458709bac52a44932b0dab, content: ' Around 21–24% of students are children of alumni, and although 37% of students come from the Midwes...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.5931308200019992)]",1.0,1.0,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,24 October,"[Document(id=67d402d488ad87fcda2d30f304cebc3d3ab52520c994807e5b06e892c3a0e1e1, content: ' The carriers had only 108 planes. The force was not spotted by the Allies until 16:40 on 24 October...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.6829916731914795)]",1.0,0.2516951560974121,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=9c5e4e5ebe04f6d60e8e13bc3cfe5cd0c8fa7c41c1148c916c472a26ca7c677b, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6102245016864454)]",1.0,1.000000238418579,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,imperfective,"[Document(id=b086dbc67b80e6f26b1e823c6ec440f8f645661a3c528184598912bb2cd8d10b, content: ' On occasion of the publication’s 45th anniversary, the Croatian weekly journal Forum published the ...', meta: {'name': 'Serbo-Croatian', 'source_id': '2cc55814bc801258e8300ed82e003428dbf11715c74ad99297d61c5dd77023f4', 'page_number': 1}, score: 0.31092698913374395)]",1.0,1.0,1.0,1.0,1.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=25a9235e2187510deb110e2fe84f6e0f7a2dbacbbd0bd491e44e8a83b3be8cbf, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.5267817991196269)]",1.0,0.8207652568817139,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,excited form,"[Document(id=f649524cdd28fbe2ba5a3cadae832b66d8fef791e1b4613b5d6705e7ca2b49a9, content: ' As a plasma, hydrogen's electron and proton are not bound together, resulting in very high electric...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6038299428337875)]",1.0,1.0,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,child soldiers,"[Document(id=d0c37a34ef187eefa48aed48ab2f9ed0e96f3a67771e8a88b8c3da762a3a37fb, content: ' According to Samantha Power, a US delegation raised the issue of child soldiers with the government...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.5711083259937013)]",1.0,0.5706413388252258,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=23ce9b5b052679777e1f4db02c71a154ef112d1634274e17ec804b5f383f1134, content: ' On 22 January 1927, their match at Highbury against Sheffield United was the first English League m...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5498172548089693)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,2000s,"[Document(id=c35394598ae512de45396f20eb9657cd39aa8feaea7505f76ad712e7cf8a9c19, content: ' According to the Australian Bureau of Statistics, Melbourne sustained the highest population increa...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.6020077079508841)]",1.0,0.46762651205062866,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:2__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:2__chunk_size:5.csv new file mode 100644 index 0000000..3eaf815 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:2__chunk_size:5.csv @@ -0,0 +1,122 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=99729d5c09ec68a18e6cb9e326f59027b5c69bc74e05e8c8c0d825360370bf5e, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5674900599530198)]",1.0,1.0,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,7 April 1997,"[Document(id=6f5469051c1e9452ac1f90d1113888497a72a12f5bcb94b63f6e73f564dcc76d, content: ' To date, the Peace Corps has not returned to the Central African Republic. The Bangui Agreements, s...', meta: {'name': 'Central_African_Republic', 'source_id': '97385de9e4cd05b9e1952b83230d3dfea313a8ee7d900de0adc9bae2d951be24', 'page_number': 1}, score: 0.40080580103576613)]",0.0,0.32845982909202576,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,KG 55,"[Document(id=90134b283b25268de3e509cb8a548e6a73007ffb1b313d9653603fc2bb9c3b55, content: ' On 19 November 1940 the famous RAF night fighter ace John Cunningham shot down a Ju 88 bomber using...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5773304984390446)]",1.0,0.11742228269577026,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=e5cb80c1093f573ce5c1d255322723cb12056865ea80e45b815e3467be2f33fd, content: ' It maintains several distinct cases: the nominative, accusative, genitive, dative and (vestigially)...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6521761611401243)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,"low-church"" traditions of Protestantism","[Document(id=bbad1043bc2ff983eeca01fd03f84a8577a718a20a14adc37febb6fddb52936a, content: ' +In The Church of Jesus Christ of Latter-day Saints (LDS Church), the teacher is an office in the Aa...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.46709569635711634)]",1.0,0.1646253764629364,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=5f851cd4f49c06ae30150b3dfe14947705e0cd2b835b8ea3ef91b1ef1bf833b5, content: ' The Greece men's national water polo team became the third best water polo team in the world in 200...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.663954529813995)]",1.0,1.0,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=dddbddd251efc34f4b3d3c9022140571b04cbc8835dace1cf407b1ef1ea9dd5d, content: ' Jennifer Lopez and Steven Tyler joined the judging panel in season ten, but both left after two sea...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.6261650029354624)]",1.0,0.8083884716033936,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=05e7c56a6da1ed703309ec6f627bc69c369ec51519a9e5eb8c442673c62365e4, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.6347191994948904)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=4437c778daec446c4362623ed3261e5ec4170314f4aa14f8381b8c7ee6eef3fe, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.6643419003251825)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,"Bergen (in Norway), Stockholm (in Sweden), and Vyborg","[Document(id=5cf9bba4037ffb12d79f2ab44ea7b22d5556d1f47b8e6bb302f92f2d93799518, content: ' Along the trade routes, Hanseatic trade stations became centers of the German culture. German town ...', meta: {'name': 'Germans', 'source_id': '87c5e28d01065fdd517ee7c4c4b9b83b8b40b35a4d9ba5664f523e4485346f24', 'page_number': 1}, score: 0.6045755481314335)]",0.0,0.4704947769641876,0.0,0.0,0.0 +What seeks to limit the use of polysyllabic characters?,CPC,ISO 8859 standard,"[Document(id=5feafea02365ea3cabb4a0047d23f7b415679dd6971d7587bf3ebbc903629e6e, content: ' UTF-16 extends UCS-2, using one 16-bit unit for the characters that were representable in UCS-2 and...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.5413251020263476)]",0.0,0.1315920501947403,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,federal,"[Document(id=be1e7e4a69e49687682a8ca0c606792dd44baba4b9d0f7e7871524099ae07fba, content: ' The second-largest mass execution was also a hanging: the execution of 13 African-American soldiers...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.5448069890747275)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,rank amateur,"[Document(id=b0360cb0e79aa53fce9b6b987065d38127ef8c7392ded59d294e940b3fc8c15b, content: ' For Whitehead, education should be the exact opposite of the multidisciplinary, value-free school m...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.6532748672096498)]",1.0,1.0,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,payment of debts,"[Document(id=4f60e5dd26c7b2626d120c6cbdaee951099743a32fe85550cdf8b453dffe6e1c, content: ' make any thing but gold and silver coin a tender in payment of debts"" clause in article 1, section ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.5093472945454142)]",1.0,0.12313651293516159,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,nearly all U.S,"[Document(id=8a2739e7083a8933e34f34b82a0419e65f25241888f20dfc9f57614f1dbbb640, content: ' This reputation stems from the frequency of light precipitation in the fall, winter, and spring. In...', meta: {'name': 'Seattle', 'source_id': '1c1a54ff037a1e964929dc466a4a876fa6bb7609d7737ba7226bcd2ab49385cf', 'page_number': 1}, score: 0.6166568410170494)]",0.0,0.3169293701648712,0.0,0.0,0.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Adaptive (or acquired) immunity,"[Document(id=af9fa957dfd959c9905dc3b94a52b8ecc9ecb9c39df2fdb2c000fd9ec4597970, content: ' Even simple unicellular organisms such as bacteria possess a rudimentary immune system, in the form...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.6509501973179057)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Moose McCormick,"[Document(id=77c29047f332b94600c60411ddf2e8c78097d4dbe8cb5f95cbf5d6e23f8473be, content: ' The two clubs were tied in the bottom of the ninth inning at the Polo Grounds, and N.Y. had runners...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.5091546444664042)]",1.0,0.31572943925857544,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,Ph.D.,"[Document(id=8d74cf3e7a3897f2c2f0163d507374839ffb8775af40004e60fc174f1aea1f7b, content: '), Ph.D. and D.Sc.(Tech.', meta: {'name': 'Institute_of_technology', 'source_id': 'bd3c1ce78fe4328bf8eb2cbd8027759a2a927528ce7e496ce3ebcf9675bc060e', 'page_number': 1}, score: 0.517905680620544)]",0.0,0.45661261677742004,0.0,0.0,0.0 +In what year did Eisenhower propose his second civil rights act?,1960,1957,"[Document(id=e20106e1401ce2522cd3d14d13fae9ea782170d84e4c3db914247513b098ce09, content: '.. a discrimination in the expenditure of those funds"". +Eisenhower told District of Columbia officia...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.7159989523516124)]",1.0,0.7518350481987,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=d89cdeedbc8441f4e2c52a3575e8c530fcce225561404b37c7cdd108d1cc549f, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.7434933599873272)]",1.0,0.804789662361145,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,economic growth,"[Document(id=2fe027a6a83389bb16ea06d04ab9b56d477f3f1adfcac2e27b85393a9c0eb105, content: ' Policies aiming at controlling unemployment and in particular at reducing its inequality-associated...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.5218842741538204)]",0.0,0.17941543459892273,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=720eb3795a623e31335108bcc29a4b774e33272bb625d5968983846dd3b35f68, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.7102647470573767)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=cf59c0f0619e715b979051e0a84530bf73463713fa11f2961866d233ac2537f5, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.6138198489552974)]",1.0,1.000000238418579,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,East Slavs,"[Document(id=701344efe2101165a535fe45e922b01123011116f4f123fcd5b1668edbb29898, content: 'Slavs are the largest Indo-European ethno-linguistic group in Europe. They inhabit Central Europe, E...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.70879690862351)]",1.0,0.44139364361763,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.",four summaries,"[Document(id=f86f4f0c6b96ee3b686893cc41683ca450066b225884d3390504ef3124153018, content: ' +The SASO consists of 12 questions. Six of these questions assess sexual attraction, four assess sex...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.45192740188947417)]",1.0,0.47273075580596924,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,educated and powerful,"[Document(id=ad3cf9694a2a85eaf132042c8ae0262f84f59777a3c746653e4e48474c28a3a2, content: ' The economic prowess and cultural and artistic importance of Tuscany in the Late Middle Ages and th...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 0.5544002922926412)]",1.0,0.5528169870376587,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,time types,"[Document(id=9d1747bc11e311df0105a0308df3eb29ec64ee3c1e3b7335356131a26f7421d1, content: ' Fuzing options were also examined, both impact and time types. Mountings were generally pedestal ty...', meta: {'name': 'Anti-aircraft_warfare', 'source_id': 'c54639424f62ce369356ed8c4e1e9fab8769359dd28029aa89832f5aa994f45d', 'page_number': 1}, score: 0.43523273125444223)]",1.0,0.007751505821943283,1.0,1.0,1.0 +What is that power that enables us to love?,Prevenient grace,universal love,"[Document(id=a78f83359689c5324d38fe66319b2e022231f3eea49709cfa6adc4ff161463f3, content: ' Love led to spiritual growth that, in turn, reflected an increase in love for God and humanity. Thi...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 0.4610609739733563)]",0.0,0.32447677850723267,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,Caesar,"[Document(id=3fbeb6b6e7e9e345e223b12ac0cf1796c2923e45de7bc622585e09312d4d60ef, content: ' +Caesar was now the primary figure of the Roman state, enforcing and entrenching his powers. His ene...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.43051288719404746)]",0.0,0.1853158324956894,0.0,0.0,0.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,introduced sharia,"[Document(id=bdc01f662020adb880737d79c53bc6a192c293e64aaacad9c0bca0ebff7ff92d, content: ' Intent on pushing Libya towards ""Islamic socialism"", he introduced sharia as the basis for the lega...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.5944495251309094)]",1.0,0.7905094623565674,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",Elizabeth,"[Document(id=e654e9ff005029974fb352ae5cb703e1734edb2deffb25e38aadc6d3624aea0c, content: ' On 6 February 1952, they had just returned to their Kenyan home, Sagana Lodge, after a night spent ...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.6176147417529756)]",1.0,0.27963170409202576,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,Minorca in the Mediterranean,"[Document(id=ec4515c9557988b0fec18967610f700f87d76c31c093e709fb322770706f03b8, content: ' It proved completely successful. Pitt had a clear appreciation of the enormous value of imperial po...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.5872765017217323)]",0.0,0.0038108360022306442,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,10%,"[Document(id=19e07536fc4d6a422f717376ebb5c165dc66d2b84c2b4069db4dbb4757a892d6, content: ' It was first incorporated in 1936, and later reincorporated in 1940. The population consists of abo...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.6162341958049967)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,Jean-Baptiste Lamarck,"[Document(id=1e7ee2b8973cbe904852b74345906a810d752f622915c33c467ffca1e65e0dc0, content: ' The Ussher chronology of the 1650s had calculated creation at 4004 BC, but by the 1780s geologists ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.6828833060111466)]",1.0,0.36553826928138733,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,dedicated backup files,"[Document(id=d62f58c8737fe86268de0ebf0cb0ef4eae1f9ba5f33735f7cadb2e83beb9e377, content: 'g., cases when the database is found corrupted due to a software error, or if it has been updated wi...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.4885855296732807)]",1.0,0.11919853091239929,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=a3a017724e8f025ada3a4a92e98993ec44b17e0941134ce7e52db3191cd3ea08, content: ' He learned Indian arithmetic from an Indian greengrocer,ءMahmoud Massahi and he began to learn more...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.7240616742271706)]",1.0,1.0,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,With Burma preoccupied by the Chinese threat,"[Document(id=a8730bd7201f8a080d3cffb4ba6d02e7cc82f409fe226dfe6987d18adaeb8596, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.5260146870162367)]",1.0,0.6175083518028259,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=8825405ebdc1bb4daf623185ae693fe4f2cab9da4dcca330ccb2ab3f6b05d6e5, content: ' The modern cognates of original words have been used whenever practical to give a close approximati...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6676853832427841)]",1.0,1.000000238418579,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=f1fa43fc9c1c5bd46b9f5455701bf99b0e98d4579816d755389bcc2dbba1bcda, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.5395585598917987)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,the Stern Review,"[Document(id=333c4da3ee155b7d7b4ddf91857cd1ec4e1c3cafb4159ebedda18347755b3f1f, content: ' In the US the main tool to track and stimulate voluntary actions is Green-e program managed by Cent...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 0.5373316142063316)]",1.0,0.8556329011917114,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=e8da082f5022b804801ddb2778f16c32226f0ecb59f3ed7f8fcdae17653f162c, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.8359677035541203)]",1.0,1.0,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,The Cathar Crusade,"[Document(id=08215551b881c7cefad5bb5baaaddc4d6f9a5802a709d61e32985651e2115c85, content: ' By the 11th century, more organised groups such as the Patarini, the Dulcinians, the Waldensians an...', meta: {'name': 'Heresy', 'source_id': '8ba473262f8508a9c99699bb09cd022993896ece1fed1463a9cf4bd7eb0c3e68', 'page_number': 1}, score: 0.5435941585791171)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What music service is a huge competitor for Tidal?,Spotify,Aspiro,"[Document(id=ceb53e0be996e6acbe2836b14e1988152098397cb9e1cd2d54a2293b3ca833e7, content: ' ""Creating a partnership with Beyoncé, one of the most hard-working and talented people in the world...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.588519582302008)]",0.0,0.2630590498447418,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,"1.505 g) in 1837 (16:1 ratio).[citation needed] +Technically, all these coins are still legal tender at face value","[Document(id=8cba8d045af2ea2d04789f21782201aca9ae1bf67c20440df2f6dabfdb2b03d1, content: '22 grains (1.505 g) in 1837 (16:1 ratio).[citation needed] +Technically, all these coins are still le...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.4982906925290043)]",0.0,0.43563181161880493,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,U.S.S. Philadelphia,"[Document(id=983088339e9fd3b328c3d15a378f5b661c816794c503aabe08d492a7396e3023, content: ' paid protection money until 1801, when President Thomas Jefferson refused to pay and sent in the Na...', meta: {'name': 'Military_history_of_the_United_States', 'source_id': 'e80a3db424753f4e406b9cfac948a9c9640a8278fb51e7ba7c06781d3f1813f4', 'page_number': 1}, score: 0.5390259322671749)]",0.0,0.2765033543109894,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=22e5680a1c41aa000971ff89fccd80f808c8f88ae7d0f5c0d81eb763ce026955, content: ' Unlike Archaeopteryx and the non-avialan feathered dinosaurs, who primarily ate meat, recent studie...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 0.6138421968979502)]",1.0,1.0,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,the modern evolutionary synthesis,"[Document(id=54fb8e44f718e454d5f4aef7097036553aa91b829fc2ea08644a1f9fc17e50f5, content: ' +The theories developed in the 1930s and 1940s to integrate molecular genetics with Darwinian evolut...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.6807694823421528)]",1.0,0.9999998807907104,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,eight o'clock,"[Document(id=5dfd5078daafbc2ef3695636e6d70b42db2abc79ac389c20eac0417969714c11, content: ' Tesla would telephone his dinner order to the headwaiter, who also could be the only one to serve h...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.7466485072261548)]",1.0,0.7012972831726074,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1444,"[Document(id=18dbb6f8a4452ee88a36944ed7b5a77c2233677f5c2c84c6d98113a4c1519651, content: ' After the conclusion of the war, France was again free to pursue its desire to reach the Rhine and ...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.639941686896402)]",1.0,1.0,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,censors,"[Document(id=3749ef6e14ea9af3893cfd84917e3a39acc2212480fc846e5a9231414a5599e6, content: ' Praetors administered civil law and commanded provincial armies. Every five years, two censors were...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5740412708539135)]",1.0,0.806065022945404,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=80f5e2e80143434aa6d2d78a16f411f7f6ab4f8ffffbd29914ad0c75e8bf31da, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.594644661480704)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Pope John Paul I,"[Document(id=2e3e26be27c5d3b2bb86e2f100b3f2cd4047a32575cd51b567e889f10d092d6d, content: ' In the past, during papal coronations, the proto-deacon also had the honor of bestowing the pallium...', meta: {'name': 'Cardinal_(Catholicism)', 'source_id': '142a323ba4130f1058b1e8dc0dbaebc921fa4f79f8e12e1e235d8d8600d3add8', 'page_number': 1}, score: 0.5572633689378403)]",0.0,0.7952640056610107,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c983e8f9fccac478cd7d895c53ca7636f4291c3a87693dd68246b03e75a4be42, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.7097080153729567)]",1.0,0.9528100490570068,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=f0adc94ea827dd57e6a4ed0151d80a56b2b699e6457dfd794787f7d9f9121082, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5204352783694779)]",1.0,0.255912184715271,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,King Saud,"[Document(id=5d0ec0248e1b31849b4fcf2b6de857e2ffd286b834f600afc95bf4d1111c7ae6, content: '"" Nasser ordered a crackdown against Syrian communists, dismissing many of them from their governmen...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.6708211510684319)]",1.0,0.5504575967788696,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,educators,"[Document(id=f0501a0350c5212f0b765e4f2908281937b33d70767d79809051b0aab43bc078, content: ' +Other: Civil rights leader W. E. B. Du Bois; philosopher Henry David Thoreau; authors Ralph Waldo E...', meta: {'name': 'Harvard_University', 'source_id': '7f91a9284c2db4a8d1147861fa2c02c6a25281db1cbc78097950be011c0e6269', 'page_number': 1}, score: 0.377496105816545)]",0.0,0.2606774866580963,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1999,"[Document(id=ff4e5f9b67b28efe1d784938ea3eabd7391e59030ee2fef37239be9884eafceb, content: ' In 1999, the company agreed to pay a $250 million settlement in connection with claims it polluted ...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.5382385055230676)]",1.0,1.0,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,downtown medical district,"[Document(id=0b7915915ef0141c882fe46817cee7fb758759c0f34482f57922cd19e5f5e512, content: ' The downtown medical district is experiencing rapid growth of biotechnology and medical research in...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5525530182515901)]",1.0,0.47365856170654297,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=2dc3b5cfb05bfeb4e2c0b072a5a5e6417b1109588ffcbd88ffd03346c8bc1975, content: ' In France he used the French versions of his given names, and after receiving French citizenship in...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.6752531486676878)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,steal the invention,"[Document(id=4d3444ecb9a546c18669ac8c8405c5f0787407509b989960d32c3dae162fd6f1, content: ' Tesla tried to interest the US War Department, the United Kingdom, the Soviet Union, and Yugoslavia...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6860750023448956)]",1.0,1.0,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,trade unions,"[Document(id=1526b73050d79bafee572465264f2597aa4189406427564019a656dac65edfb0, content: ' The majority may be powerful but it is not necessarily right. What, then, is the proper relationshi...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.48148543902367125)]",1.0,0.5192486047744751,1.0,1.0,1.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Paris,"[Document(id=79a18d86b70f7d064e2861983eacef8c4aaea013b88e70e045d4f5e02765b10f, content: ' +Paris is a city of books and bookstores. In the 1970s, 80 percent of French-language publishing hou...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.4805254691287104)]",0.0,0.2811535596847534,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,a systematic mental framework,"[Document(id=14e1565f9e55beb6f0a6a436ef63987e7ecfcabcded323a31ce3f0c38af15c48, content: '26) by which we simultaneously grasp the past in memory, the present by attention, and the future by...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.5150513533287606)]",0.0,0.29695814847946167,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",Frederick moved the bulk of his eastern forces to Pomerania,"[Document(id=2e241d6a2d549d7fb7ef44a92718ad0b9c17de781fc1ddf4c4c4311b6193515f, content: ' Frederick sent urgent requests to Britain for more substantial assistance, as he was now without an...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.6627893603410437)]",1.0,0.37803566455841064,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,500,"[Document(id=8d7bd268e9b8cfd6057d2e498dc7571a13c5a996c79222e58712c761e9056cff, content: '” +On April 23, 2008, Dell announced the closure of one of its biggest Canadian call-centers in Kanat...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.620125362778158)]",1.0,0.34641900658607483,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,Sabratha,"[Document(id=d5925fe167b21550b3b94ce08e21d5d09d8ec2f69b4e80810ada3dd38cd002d8, content: ' This remained a primary religious and social duty of emperors. +The Roman Empire expanded to include...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5396579508662903)]",1.0,0.21925941109657288,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=10406721e1bacc4b94568efb8105444ce33c23b507417efe2e032c93615c0677, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.7040351183958434)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=694b7ad0ec30f16563c095bfd8aa73a286298746fa8b5b6b568961e806fe7462, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.7264078227319748)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Rome,"[Document(id=39971eda0fac39993fb2ad01159df52ea10276610c72fad19cdc5a8d781c70e0, content: ' The earliest and most credible surviving source for the Hellenistic period is Polybius of Megalopol...', meta: {'name': 'Hellenistic_period', 'source_id': '322682cbb61d31c17e6d21e06723e94efd0ef242a5c9f308888fa14209c4078b', 'page_number': 1}, score: 0.5701725567412796)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what decade was disco popular?,1970s,'80's,"[Document(id=b2326a9f01013d0aa1f527d2f622edea66373dafe7b9251551f0f5e0ccc15bd6, content: ' +Before emerging as a pop star, Madonna has spent her early years in rock music alongside her bands,...', meta: {'name': 'Madonna_(entertainer)', 'source_id': '0868d847bb768c30a7507239169d5e441bb3181b66c8b9353a863daa3d58e425', 'page_number': 1}, score: 0.5110445276841755)]",0.0,0.6287059783935547,0.0,0.0,0.0 +What do the rivers form?,lakes,Nieuwe Maas,"[Document(id=2c02265ed1b53d17cbf8d8090f6a3dad995df9f6a8bbe12e06c780368660607e, content: ' The middle flow begins as Nederrijn, then changes into Lek, then joins the Noord, thereby forming N...', meta: {'name': 'Rhine', 'source_id': '23686e48a61504679a91dac10d83b287aef3dff7dabc0ab881c281059187ce6a', 'page_number': 1}, score: 0.5238157166111579)]",0.0,0.2038368135690689,0.0,0.0,0.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,ISO-8859-1,"[Document(id=5feafea02365ea3cabb4a0047d23f7b415679dd6971d7587bf3ebbc903629e6e, content: ' UTF-16 extends UCS-2, using one 16-bit unit for the characters that were representable in UCS-2 and...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.6118382911790479)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,44,"[Document(id=b8e4c4f73e595a4bf934f4743c25ffdd012fa0b8bdf2c0ce7ffd0ad37403f5dd, content: ' +Madonna holds the record for the most number-ones on all combined Billboard charts, including twelv...', meta: {'name': 'Madonna_(entertainer)', 'source_id': '0868d847bb768c30a7507239169d5e441bb3181b66c8b9353a863daa3d58e425', 'page_number': 1}, score: 0.550717575548617)]",0.0,0.4409554600715637,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,30,"[Document(id=87a93f72cbb9c00ea6477b2a31d21c284a2ecf07733f93b0fb4ba7b277f5b8f5, content: ' It is within the power of both the union government and the state governments to govern schools sin...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 0.5680934731225737)]",1.0,1.0,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=99743eb7e7277e11f539b5449857c5f012a842facd618f6a75ff178a89fc1f76, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.6792855736042169)]",1.0,1.0,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,Your Majesty,"[Document(id=6661c5b42cee4d9122296712db8297ce21db50225e9f1d2cad1e056d68ca32c8, content: ' Additional styles include Defender of the Faith and Duke of Lancaster. When in conversation with th...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.7342183996028037)]",1.0,0.5040283799171448,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=ff2ec1a3232f7d74c84b198defa77dd9879cdb65474a7dab2bf47da19675285e, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.6173766295402898)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,871,"[Document(id=6f9dc46d6eac9c736c44b2255083b35d238c93547c3efcd0f85d83a3feef71b2, content: ' The speech of eastern and northern parts of England was subject to strong Old Norse influence due t...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.5422845553260691)]",0.0,1.0,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,load or endurance,"[Document(id=adc4b66570c6ed9d13c88909bbdc957a40b544797d95742201330710a6ff6385, content: ' Stability testing (often referred to as load or endurance testing) checks to see if the software ca...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5499507239220379)]",1.0,0.19878104329109192,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=5f8dc7264853d3b0b88a03420e7878e16fa4c08ef70ab7cc5d722e889b21dda6, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.6158295658195905)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=87ffdf42e04cd4e5aabc33e35caf9614ba61758e355a1d97a38a524ba0bedf46, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.8271269612162027)]",1.0,1.0,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1856,"[Document(id=fcb4369249e697b0fbc49a777c7140b22fe82eef90e450df2c7fe8b613db85c0, content: ' A second counterattack, ordered personally by Nicholas, was defeated by Omar Pasha. The front settl...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.5795463116891318)]",0.0,0.702621579170227,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,Private and personal worship,"[Document(id=8cb202622013f5234d79ec3a4275501aaa91b4fdefe32dbbccfd335829f7ffa1, content: '[citation needed] +Roman religious beliefs date back to the founding of Rome, around 800 BC. However,...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.7022407140076495)]",0.0,0.404468834400177,0.0,0.0,0.0 +What is one example of failure testing?,Software fault injection,Software fault injection,"[Document(id=827178e2fa13b1045cb5486ccca30dd5c2a87aa9e9f903cbdd2808ec1b93a098, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5885062293441778)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,goalkeeper,"[Document(id=e6ee12359c832f972207465515e8a9b305bdf92628324bb99b5470cc0b0b74b4, content: ' The same laws are designed to apply to all levels of football, although certain modifications for g...', meta: {'name': 'Association_football', 'source_id': 'f2b7305b09ad8a43b8389a5eb2d84b8ea4b4deaa7968d21fe9da9da9ecda2c6b', 'page_number': 1}, score: 0.5034902789294909)]",1.0,0.9999999403953552,1.0,1.0,1.0 +In which year was the new constitution ratified?,1855,1899,"[Document(id=71b3f8496f110bfe85645777f313b2eb556ef37714f3cfcd0d146aa41835ef0a, content: ' The Democrats regained control of the legislature in 1896 and passed laws to impose Jim Crow and ra...', meta: {'name': 'North_Carolina', 'source_id': '3282e3dedcf0f1923e66ebf5b79cf9b9deec27c8ca1df8908e03770b02b38126', 'page_number': 1}, score: 0.5348079876152125)]",0.0,0.5596075057983398,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Naiad,"[Document(id=06462bba1bbaf61b1e06ec260c9ebb15ac9f6984188e161293ac729b7950da5d, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.6002989836151421)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=1eb83a50e062eec7af6abab3de1bf0b5576a4c75ac168cc64f1b77a2e268c579, content: '7 km2) Yale-Myers Forest in Connecticut's Quiet Corner—and nature preserves including Horse Island. +...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.6034040579940345)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,alternative variants of the same genes,"[Document(id=616efe41146a20d96fcfa37d16fa50ce52cca8acf43b362e86a15a714f86ddff, content: ' Subsequently, the sequencing in the Human Genome Project indicated that many of these transcripts w...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5108261562125103)]",1.0,0.5312574505805969,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=3cdcefdaa320ecb0d100eef0c92063190f1030f98ee702385d5ad087a454afc6, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.6457983818312312)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,rational thought,"[Document(id=14435d1137e579f740d93ef8fabdf50640828e96d9dd732c7b8c6d4932ae6a58, content: '"" The theory was indebted to Aristotle's pluralism and his concepts of Soul, the rational, living as...', meta: {'name': 'Idealism', 'source_id': '55a0bd42a4a737442a2d93fe0926424e236910de01b2406ed43c174f2225ff9d', 'page_number': 1}, score: 0.5719476914223833)]",1.0,1.0,1.0,1.0,1.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,21–24%,"[Document(id=2b6b9afae589749ea7e615fd1e236b006b90acb763735662b8fec51a24d455bd, content: ' Around 21–24% of students are children of alumni, and although 37% of students come from the Midwes...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.6697027777597087)]",1.0,1.0,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,25 October,"[Document(id=7ade51eb3e5a17ee31e4514e28638a45a8061a9ee1a9b4f97cc1e6d9985e197e, content: ' commanders ignored reports that Kurita had turned back towards San Bernardino Strait. They had take...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.6893631935380231)]",1.0,0.24027208983898163,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,basketball,"[Document(id=12d9fa7ff9f514e66e336f50a70a372c3162770635ba732cc4335141fd65ee50, content: 'O.K, Maroussi) have won 3 Triple Crowns, 5 Saporta Cups, 2 Korać Cups and 1 FIBA Europe Champions Cu...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6834429597884792)]",1.0,0.3432108163833618,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,imperfective,"[Document(id=06048f854345fb7e2b6f12877040f57cf87279336165b7cb7893e29f87cc9222, content: ' On occasion of the publication’s 45th anniversary, the Croatian weekly journal Forum published the ...', meta: {'name': 'Serbo-Croatian', 'source_id': '2cc55814bc801258e8300ed82e003428dbf11715c74ad99297d61c5dd77023f4', 'page_number': 1}, score: 0.4237734674781286)]",1.0,1.0,1.0,1.0,1.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=158a6b8443733e42d24dec31180770da4724f3bd14ed49e47a937624f7aeed47, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.5718641828766279)]",1.0,0.8207652568817139,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,an excited form,"[Document(id=e9b537adae4e88d5a591f33f2f06388da096013764cb94bfc4e3ea25bb2b2e23, content: ' This charged ion has also been observed in the upper atmosphere of the planet Jupiter. The ion is r...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6014010412569942)]",1.0,0.9052941799163818,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,child soldiers,"[Document(id=30c864dfe7ee098b254f1517149763909eae40b70a74f63f84fffd7a77fc3e1f, content: ' According to Samantha Power, a US delegation raised the issue of child soldiers with the government...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.5763265562773263)]",1.0,0.5706413388252258,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=1bdd87144e8c05c0c8820e3bd6186deb63a4be13d5810e39af795993e4721bb7, content: ' The book was twice adapted for the cinema – the 1997 British film focuses on Arsenal's 1988–89 titl...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5193618971746868)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,The 1850s and 1860s,"[Document(id=5065b911750db5485f2cb49749f8e95f270f3e0d705842ca5e8f26d61da4d4ce, content: ' The city centre, Docklands, St. Kilda Road and Southbank areas feature high-density forms. +With the...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.616751042484272)]",1.0,0.703935980796814,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:3__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:3__chunk_size:10.csv new file mode 100644 index 0000000..f1c0d77 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:3__chunk_size:10.csv @@ -0,0 +1,117 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=ac8e9c286327aea24d00bceb002b08ad3fbc3007673c77da9b48b6b6b91aa76f, content: ' Some rites of the domus (household) were held in public places but were legally defined as privata ...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5545176647418332)]",1.0,1.000000238418579,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,19 September 2001,"[Document(id=7244ab2e4f16b1e4573f9562f2be390b8f796d721387df92845e66279cc9a354, content: ' Musharraf agreed to give the U.S. the use of three airbases for Operation Enduring Freedom. United ...', meta: {'name': 'War_on_Terror', 'source_id': 'b9ac47a39d382362992253f0ccfa0072ea6216fe1daa748c4899980b467292f4', 'page_number': 1}, score: 0.3588364752821929)]",0.0,0.37357860803604126,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Bomber Command,"[Document(id=67346696a72eb98fc01faf97de2c3a68cf161364d6413eefe068134d10fcf3de, content: ' Attacks on Birmingham and Coventry were subject to 500 short tons (450 t) of bombs between them in ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.600247916969115)]",1.0,0.2595972716808319,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=081239836dc280856dc2c1ab168f17d8fffffed12cbaeb50b62cd11b58a116b4, content: ' It maintains several distinct cases: the nominative, accusative, genitive, dative and (vestigially)...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6403058641147197)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=c1b15c9f6474425049e145f56eea6b22790074204f55cd26eb005fca608f63a8, content: ' This guidance is generally offered during a ceremony called the patriarchal blessing. +In Hinduism t...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.46201672067809485)]",1.0,0.40121543407440186,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=c6017a99c04926bab23928d5fed5517ccdf053e42b0a0376e97782a93ab5a6b4, content: ' The Greece men's national water polo team became the third best water polo team in the world in 200...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6852555389578893)]",1.0,1.0,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=17489faf6fb39343de67cc2bfe448e959559775109bb646aa0e4d02b716ddf77, content: ' Jennifer Lopez and Steven Tyler joined the judging panel in season ten, but both left after two sea...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.6149654088231247)]",1.0,0.8083884716033936,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=1c8a379dc092fa8d374927640e6d16f46286416581451c66a1da80fef1412acc, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.6642589447541861)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=1eed45d4c946ec058384afdd313db9b847d2b384629f1872466e03c7bc13a2e5, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.6168396798925952)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,Bremen and Bremerhaven,"[Document(id=0427081e9deb9c727f124b818203b3d0cc649efe97ddc37c49cee8cebfa2c3a1, content: 'Germany is a federal republic consisting of sixteen federal states (German: Bundesland, or Land).[a]...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 0.5087021851568686)]",1.0,1.0,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,Unicode,"[Document(id=1ca3328892ba24004a75d8cfaab0f58ce32bed8ee034be6fa54e33e8b0a62eb5, content: ' UTF-16 extends UCS-2, using one 16-bit unit for the characters that were representable in UCS-2 and...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.5571209200423095)]",0.0,0.1403539478778839,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,federal,"[Document(id=478de5bfd3be02e5795166dfaf36bf31f55ed01f66e8d8ff978214365dc0f179, content: ' Further, the Court's decision in Baze v. Rees, upholding a lethal injection method used by many sta...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.5154552807306718)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,rank amateur,"[Document(id=a022efa8ed739794e05008dbe44d9b83a87fc4f4b7a609ec096b9d437bc5089e, content: ' For Whitehead, education should be the exact opposite of the multidisciplinary, value-free school m...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.5968778075657488)]",1.0,1.0,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,payment of debts,"[Document(id=fecf8dec4eebf70e22436a1b01f96b62f6beb9466dedd822db56f659f010d7bd, content: ' The use of paper money not backed by precious metals had also occurred under the Articles of Confed...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.48692030873140724)]",1.0,0.1231364756822586,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,Hawaii,"[Document(id=f1b319ccf6f84a8b9a658c8ac91d2b985ece87397b88fdbc6596e119ab5fed80, content: 'S., with Quinault Rainforest in Washington having an average of 137 inches (348 cm). Hawaii receives...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 0.5979786106604166)]",1.0,0.5233805179595947,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Antigen specificity,"[Document(id=a4a5008fab761323a2771c50538a7e75c9cf4ed6426bbef3a6ae08b7edd123e4, content: ' The adaptive immune response is antigen-specific and requires the recognition of specific ""non-self...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.5868653565168047)]",1.0,0.448192298412323,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Luis Castillo,"[Document(id=20206deac29e5b4bc6d2d7bfd70810f6e421fd0dfa03d5a7cf710b840fbba782, content: ' +The Cubs had high expectations in 2002, but the squad played poorly. On July 5, 2002 the Cubs promo...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.47699996405548506)]",1.0,0.4449738562107086,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,Computing,"[Document(id=9be1bf507c2859b61088d1f83c172682620003fe33104d080a9b6b3a2256570a, content: ' Students are encouraged to take these classes either for credit or in their own time, and in some d...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 0.4899747753246123)]",1.0,0.5292261838912964,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1960,"[Document(id=78d1760ca2fa0904e5480a91d388ba9d6c37d737a65426e5cbb2488333bc1e1a, content: ' Eisenhower made clear his stance in his first State of the Union address in February 1953, saying ""...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.6915934272370499)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=891cab83df456a1bb11d8e7c301e45b02e564d224d3be1f7bdb82d02edc8f221, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.66910672984007)]",1.0,0.804789662361145,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,health,"[Document(id=873a8ae60b922fbd4c5960cb05388ab4415c59c994a541870c1a7488d1acef1c, content: ' Policies aiming at controlling unemployment and in particular at reducing its inequality-associated...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.5147040807911953)]",0.0,0.38994866609573364,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=56ff4bdb4f543c61c1f516ba1e544e83e117ab5512bae58b2264fa6b6d86f26b, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.740709470725756)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=5c5f6a0591170904a56f4c0417e667089761d8d8f013f55283aff164fdadd75b, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.5566146748558449)]",1.0,1.000000238418579,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,the Antes and the Sclaveni,"[Document(id=482a70caa7e3a14a061a5b91daa2e56647767ae57909e94ef7246b9a66675bd9, content: ' The Old East Slavic Slavuta for the Dnieper River was argued by Henrich Bartek (1907–1986) to be de...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.6245358035969113)]",1.0,1.0,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.",four summaries,"[Document(id=c5da820627d1881a16bd936f9f4ee4b1126ca0003f08cdb0cd5303554431224f, content: ' Despite seven categories being able to provide a more accurate description of sexual orientation th...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.26704233607027905)]",1.0,0.47273075580596924,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,literate and upper class,"[Document(id=149c6390292392d4438c9191dca43bbe827c20a86bb7bfc502679813d7fcc108, content: ' Due to Eastern Lombard's status as a Gallo-Italic language, an Eastern Lombard speaker may, in fact...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 0.666760845084105)]",1.0,0.7628111839294434,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,desegregation of the Armed Forces,"[Document(id=94d45523552487be55666f4943bee9647a9f972593aae08c707a9b3bd1ba479e, content: '"" The report discussed and demonstrated racial discrimination in basic freedoms, education, public f...', meta: {'name': 'Affirmative_action_in_the_United_States', 'source_id': 'b7156a1660bb591e3abb4460028a7922c063ba8962ad6dce11678f8b08c5cdcb', 'page_number': 1}, score: 0.38881723110881794)]",0.0,-0.04560529813170433,0.0,0.0,0.0 +What is that power that enables us to love?,Prevenient grace,kindness meditation,"[Document(id=8aa1cc5c18eec7cb85e8dd3bc0786a603a405db09821c3852c577c0c1ee5d912, content: '[note 7] +The Noble Eightfold Path—the fourth of the Buddha's Noble Truths—consists of a set of eight...', meta: {'name': 'Buddhism', 'source_id': '7d629dc535ee0ab8e7a8ec238f72eca067cf563981907b159dd5db0c2c4a2fad', 'page_number': 1}, score: 0.3807221391984495)]",0.0,0.34043627977371216,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,Caesar,"[Document(id=da1f39f5c95c3d864aca4159d3d002043099e54c47348f0625401f54756ee30d, content: ' +Caesar was now the primary figure of the Roman state, enforcing and entrenching his powers. His ene...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.4098185524055718)]",0.0,0.18531587719917297,0.0,0.0,0.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,abolished the monarchy and proclaimed the Republic. Ruling by decree,"[Document(id=242bcfd7cc78f498ff4abf96b525186f8ddb9d88565ef2d1ad2a1be9f3522584, content: 'Muammar Muhammad Abu Minyar al-Gaddafi (Arabic: معمر محمد أبو منيار القذافي‎ Arabic pronunciation: [...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.578814645482204)]",1.0,0.4175863265991211,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",Elizabeth,"[Document(id=31de347482f6e76ab8d778bc1648539499d1dcc796460f1482425b5ee879fe5c, content: ' When she toured Canada and visited President Harry S. Truman in Washington, D.C., in October 1951, ...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5909742966356426)]",1.0,0.27963173389434814,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,sea communications,"[Document(id=2982b60b277d226d31c0a9fe90582fab11b0c6ff0f2e5a54d4e342474fbc3795, content: ' +In 1941, the Luftwaffe shifted strategy again. Erich Raeder—commander-in-chief of the Kriegsmarine—...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5488736596606179)]",0.0,0.10573846101760864,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,10%,"[Document(id=e834bcf1ef63c7b72f9bf36d447e483da5003c1c77f033ef064e514b7082f587, content: ' To the south, N. Highland Avenue leads to the Barraza-Aviation Parkway bicycle path. +South Tucson i...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.5734801185966978)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=67c87173e747d72d79bc92866e553902d56583aae14965ecaa3b2da507256976, content: ' After the turmoil of the English Civil War, the Royal Society wanted to show that science did not t...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.6990559439833335)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,Database transactions,"[Document(id=5baf5b500fc870a57b1f1a3f45f13e9b554e1d91359189ce3677cff042e60b2d, content: ' Data security prevents unauthorized users from viewing or updating the database. Using passwords, u...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.4012562109697081)]",1.0,0.4623287320137024,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=cbebf9a70685ea0e72bcf8c9f1dbbe9b7163ee26279b633446edf34314600fb3, content: ' Janssens demonstrated that Avicenna was a Sunni Hanafi. However, the 14th cenutry Shia faqih Nurull...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.7413625193934965)]",1.0,1.0,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=f08f17188f751088b2b76f2c1a69ffc86f340979f05ea84b4b201def163ec6f3, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.5241313195161235)]",1.0,0.10474255681037903,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=568b9944b8e99493b93780736bc40a387965c8d30cebea94a19881b28bc86012, content: ' More entered the language when the Anglo-Saxons were converted to Christianity and Latin-speaking p...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6607325652412197)]",1.0,1.000000238418579,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=c54ac19342651f97c105c4825d0b82cbf05684cf1d7c4921e0d078a7212b4a21, content: ' A common type of embezzlement is that of personal use of entrusted government resources; for exampl...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.49411751006790716)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,the Stern Review,"[Document(id=83d98fb8af02ba2d35942c098935050048e1daa28d2d603b2592cee6dafe38b2, content: ' The energy sector regulator is the natural advocate for efficiency and cost-containment throughout ...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 0.5205592477973444)]",1.0,0.8556329011917114,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=de63707a025645c7fcc7a26e6dd37ddd29a27cd60ff187ce487e0d07217708ee, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.8069591810257106)]",1.0,1.0,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,The Cathar Crusade,"[Document(id=3f50079b5ebfa4f0c771fad96712717b3bd3050f02fa3abfb19f87bc049689e1, content: ' By the 11th century, more organised groups such as the Patarini, the Dulcinians, the Waldensians an...', meta: {'name': 'Heresy', 'source_id': '8ba473262f8508a9c99699bb09cd022993896ece1fed1463a9cf4bd7eb0c3e68', 'page_number': 1}, score: 0.4953807364007933)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=f0c6a5cd5408a0fc01c220e5910c03a2241fbe85099410f518592c1fb7098441, content: ' ""Creating a partnership with Beyoncé, one of the most hard-working and talented people in the world...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.5224108713853148)]",0.0,0.9999999403953552,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,legal tender at face value,"[Document(id=c5c8d55b2c3952d9e29c10530dc199fccdd3bfb5dc897ac40dd8ff1d01e6a97f, content: ' This has to do with Alexander Hamilton's suggestion to Congress of a fixed 15:1 ratio of silver to ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.44825442543903155)]",0.0,0.4468753933906555,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,HMS Jasper,"[Document(id=399d610d5ad2cf45b4f82b2b6b4a1f496d7c04c7b5c676a8d108be1bd305ca1c, content: ' On 5 September, another French bombardment (the sixth) was followed by an assault by the French Arm...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.5000244950709762)]",0.0,0.3474302887916565,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=0815d5b0f30156ea39d24a8a15062d5e21822317a105f659f2000892f8d657c8, content: ' +The consensus view in contemporary paleontology is that the flying theropods, or avialans, are the ...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 0.5702787263670992)]",1.0,1.0,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,modern evolutionary synthesis,"[Document(id=e1b3872c9a56b024a609ebeaf211b3ea1b4799599e6935c3dd62e613f036575b, content: ' This dogma has since been shown to have exceptions, such as reverse transcription in retroviruses. ...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5969093624625235)]",1.0,0.9375607371330261,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,8:10 p.m,"[Document(id=2ed21a855190ee9369cd4351878edc528d570a2136e3a2612158d6108a59491e, content: ' until 6:00 p.m. or later, with dinner from exactly 8:10 p.m., at Delmonico's restaurant and later t...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.8054284554320554)]",1.0,0.9999999403953552,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1639,"[Document(id=013200ecd488f27b33849214ab1b40ff36933ae1c7d8a744711dad96ab0da2d0, content: ' The latter was able to use this tax and a dynastic marriage to his advantage to gain back full cont...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.6070325432940982)]",1.0,0.6075954437255859,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,censors,"[Document(id=c149e308ac4553abf8316efb49a5900ee5ff1e1eda0f20dd8b01137b5904cd33, content: ' Praetors administered civil law and commanded provincial armies. Every five years, two censors were...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.48978974934463204)]",1.0,0.8060649633407593,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=d16248de2a5e0d9e6bbb7e85b411bf0a4143206e20471ecab73dc7158ce992b1, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.5365703485459956)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Pope John Paul I,"[Document(id=18ed06b8889609136231a06d99465ac7da3164b444e51e58cace4a1e98ddebcb, content: ' He was the last pope to date to be crowned; his successor Pope John Paul I replaced the Papal Coron...', meta: {'name': 'Pope_Paul_VI', 'source_id': 'fc82a40c990c0b3840b06b32643858d39984b66d78ae950561d809dc84d6edf8', 'page_number': 1}, score: 0.5372751222409363)]",1.0,0.7952640056610107,1.0,1.0,1.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c3327e32d8658307f8dca684bbdeb22895ca7a8d7ee8ad6ff0eba6200c5e6da2, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.6820248727155205)]",1.0,0.9528100490570068,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=3f0396346c47d3ea53931dd05016eb360d6261c1b5ab0c7a977a5bbf0df1be01, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5248291958675437)]",1.0,0.255912184715271,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Hussein Sirri Amer,"[Document(id=9a67277c77a4ce4cd3c2095f48d3612e079b639eb7e063071b54fd0bb0e70e3f, content: ' The interrogation pushed Nasser to speed up his group's activities. +In the 1950 parliamentary elect...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.6046152857550448)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,Basketball,"[Document(id=d7f6f0383459fe08c23a94fb73e1f2386715e112d5e554aee39542da48c23478, content: ' With the university having high profile sports teams itself, a number of alumni went on to become i...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.3389550585749015)]",0.0,0.12688907980918884,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1999,"[Document(id=6c8a4bf97575a50cf435a385d35038780603d1e0468298d1d46906e5d6f5565e, content: ' Its main offices are located at 30 Rockefeller Plaza at Rockefeller Center in New York City, known ...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.4865949646927139)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston–North Charleston–Summerville Metropolitan Statistical Area,"[Document(id=620f512c3df1eb1788fcecbd5ec4c0438953e005020fab4303e121d55369ade5, content: 'Charleston is the oldest and second-largest city in the U.S. state of South Carolina, the county sea...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5474995153501447)]",1.0,0.40961262583732605,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=39112deed098f10b50cfae893e4a07e0541a59e3ede2de2a696f87f4dd3a60cf, content: '"" When in September 1831 he learned, while travelling from Vienna to Paris, that the uprising had be...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.650285022879904)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,bad publicity,"[Document(id=6da0a379194f1e5cc0824b3bc6b405cb6290228e4f9e43f1d1060a34ef127630, content: '[improper synthesis?] +Starting in 1934, the Westinghouse Electric & Manufacturing Company began payi...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6402881206087123)]",1.0,0.10993127524852753,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,cordon sanitarian,"[Document(id=eb6af6324b267f8ddca82d92abfdcd76a1aa66a122f874959b3e79f50982c1fb, content: 'S. will be the president, if the party holds that office, or a prominent member of Congress in oppos...', meta: {'name': 'Political_party', 'source_id': '476a9215de81b75a4f17737eed1679570973e55b5ed6175aa0ec1014b09d4632', 'page_number': 1}, score: 0.4507376320029649)]",0.0,0.33983707427978516,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Old Corner Bookstore,"[Document(id=65f32cb8a9d06cb1d46b80450df1eedc1f84baa40c22a9bf793cc3c450086a5d, content: ' Irish Americans are a major influence on Boston's politics and religious institutions. Boston also ...', meta: {'name': 'Boston', 'source_id': '44b3642e0403a5651f6e670019c559e255665af6d80d967a0700be9cc365a0bb', 'page_number': 1}, score: 0.4352219870306881)]",1.0,1.0,1.0,1.0,1.0 +Time and space are both examples of what type of resource?,complexity resources,a systematic mental framework,"[Document(id=11495c74c8b11a9455df2289e0ff9463ec8bfc2283068403eab6563e5116917e, content: '26) by which we simultaneously grasp the past in memory, the present by attention, and the future by...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.4736020301266076)]",0.0,0.29695814847946167,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",Frederick moved the bulk of his eastern forces to Pomerania,"[Document(id=365315b6f4505a199f55ac7205b59811514e5abd3df99d0fc254fd9b9dce3a39, content: ' First, Frederick devastated Prince Soubise's French force at the Battle of Rossbach on 5 November 1...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.6440337434750146)]",1.0,0.37803566455841064,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,12,"[Document(id=0c5f3492dfe6b6838bc7113688331de615d79f7950a305ca235db36174dba218, content: ' With the return of Michael Dell as CEO, the company saw immediate changes in operations, the exodus...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.6340834355511753)]",1.0,0.29873126745224,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,the senate,"[Document(id=4b02bca62f562d6c5bfda9faf0b29f77e95b817f971009b508742099d4c57423, content: ' None of these defixiones seem produced by, or on behalf of the elite, who had more immediate recour...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5570392530581753)]",1.0,1.0,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=0b3a599a54fac34c82f29d858c51bbf5ce4f37032b3539a76c2b8a6b918abc6e, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.7432106125866814)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=45ad6989b7801812895d43ea694bad169af9b5dd9024dd96b099fe071a196217, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.7008250803733801)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Rome,"[Document(id=97686de9c4f8fed507eaca869b9537500fb0e8c613b130c6f388bc83e9983444, content: ' Some areas of the conquered world were more affected by Greek influences than others. The term Hell...', meta: {'name': 'Hellenistic_period', 'source_id': '322682cbb61d31c17e6d21e06723e94efd0ef242a5c9f308888fa14209c4078b', 'page_number': 1}, score: 0.5478172274353506)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what decade was disco popular?,1970s,1970s,"[Document(id=9a308ea0c6c403ea96ab95ff48edb33ba23832969d32fceece39f77a4f1b9d67, content: ' The addition of major radio stars such as Dan Daniel, Steve O'Brien, Dick Summers, Don Bleu and Tom...', meta: {'name': 'Adult_contemporary_music', 'source_id': '97671df8695f447ceee464d87be9f360589d67646ccf18f2eb61b6153865369a', 'page_number': 1}, score: 0.44005014954968813)]",0.0,1.000000238418579,0.0,0.0,0.0 +What do the rivers form?,lakes,Nieuwe Maas,"[Document(id=d4f00e0321e7bd0e40e3988f483dbb9e724f85a7fec1890682781c70d19fdf9a, content: ' The middle flow begins as Nederrijn, then changes into Lek, then joins the Noord, thereby forming N...', meta: {'name': 'Rhine', 'source_id': '23686e48a61504679a91dac10d83b287aef3dff7dabc0ab881c281059187ce6a', 'page_number': 1}, score: 0.5195132686870181)]",0.0,0.2038368135690689,0.0,0.0,0.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,ISO-8859-1,"[Document(id=1ca3328892ba24004a75d8cfaab0f58ce32bed8ee034be6fa54e33e8b0a62eb5, content: ' UTF-16 extends UCS-2, using one 16-bit unit for the characters that were representable in UCS-2 and...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.6513962855269255)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,three,"[Document(id=1458da8434cabde78aa443222423f12deeb21dfbf444ec9694b97cfdf73b8417, content: ' Beyoncé was the third woman to have writing credits on three number one songs (""Irreplaceable"", ""Gr...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.48544494375755437)]",1.0,0.6437669396400452,1.0,1.0,1.0 +How many Examination Boards exist in India?,30,30,"[Document(id=16d8a70ea7a6d58f48e68b5c9072e65770d4a4053e9906c77dee6ff0c12c90a1, content: ' However, these vocational schools are not part of the German dual education system. Ergänzungsschul...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 0.5124628146490905)]",1.0,1.0,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=23b161a9804a8a11c97c4f3f14b6762b49fdf3b1fefb8ff1de76b00cfa00995a, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.6589027090165251)]",1.0,1.0,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,Your Majesty and thereafter as Ma'am,"[Document(id=3d92674c03fa6fb7dff9bfacadbd449807bd3f50ac1fd93ba9ff982be85ddbf6, content: ' Under pressure from public opinion, she began to pay income tax for the first time, and Buckingham ...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.6122711859695364)]",1.0,0.6777033805847168,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=c37151361f1ebbd218e8e53e243abfe86d24fabddd8be6f3401cf56e9e98633b, content: ' Its name is said to have come in 1814 from the Russian soldiers who occupied the city; ""bistro"" mea...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.5561873109919271)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,871,"[Document(id=5396376bb5221d60d6016a3ef2e8c6fb591b1eb2c9c6dab90677a7e0ac39a5a5, content: ' The speech of eastern and northern parts of England was subject to strong Old Norse influence due t...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.4737880350354553)]",0.0,1.0,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,load or endurance,"[Document(id=77ce7cf14e807c8808892e96984a50baad4f22bdca6b22034b2fb170f96240a2, content: ' Stability testing (often referred to as load or endurance testing) checks to see if the software ca...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5191316946425397)]",1.0,0.19878104329109192,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=bb0e180e009c490f3ae30fb55a9a91cbe512409c2e12ebae0203298a6f3efe78, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.6359423909104869)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=cdfa1e6bea8fd42b00ce80468ba471689216a6195425c2435fb1141b0b8c7e6f, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.6595684899907257)]",1.0,1.0,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1853,"[Document(id=4b98a71cc257b51798b5ea0d563d064d9fb226d48c8fe7f85873c68abd7f0818, content: 'The Crimean War was a military conflict fought between October 1853 – March 1856 in which Russia los...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.5591400200208592)]",0.0,0.672028660774231,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,a part of daily life,"[Document(id=1a8c1e09e777c4e183dceed861053866bfd85f790fb0c2cd9b9aad59ad7dc5b2, content: ' Even the most skeptical among Rome's intellectual elite such as Cicero, who was an augur, saw relig...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.6737981139329287)]",1.0,0.2831001281738281,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,Software fault injection,"[Document(id=76ad3fb3d257a3812a169d749a5fb13f2e05383764e8f9dfc53895f08747b375, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5810701579387141)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,goalkeeper,"[Document(id=5888e9054f80f5ec8de6b82cb835f9c0f204a0b02a1dbcba52a8c08b7fc1e5d9, content: ' +In game play, players attempt to create goal-scoring opportunities through individual control of th...', meta: {'name': 'Association_football', 'source_id': 'f2b7305b09ad8a43b8389a5eb2d84b8ea4b4deaa7968d21fe9da9da9ecda2c6b', 'page_number': 1}, score: 0.46009544332231944)]",1.0,0.9999999403953552,1.0,1.0,1.0 +In which year was the new constitution ratified?,1855,1855,"[Document(id=389828aedbe48534f7b7e38bfd9cc07d38a8b4d3b3f916c9ebd24c3283281066, content: ' At the same time, the state liberals and conservatives compromised to allow the popular Ángel Trías...', meta: {'name': 'Chihuahua_(state)', 'source_id': '5cdba5101b5b67de284f8ec7f24cba018153f78d5d347824d92b73e038b78dd2', 'page_number': 1}, score: 0.49185137587424504)]",1.0,1.0,1.0,1.0,1.0 +What vessel was attacked in 1845?,Naiad,Glencoe and Sea Nymph,"[Document(id=353e78a8c5526319fefad1c37e2e3511ce4323f2f90be311b53a11227b5bd85c, content: ' In 1852 the San Francisco-based ships Glencoe and Sea Nymph were attacked and everyone aboard excep...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5135568769234267)]",1.0,0.3519451916217804,1.0,1.0,1.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=92daacd08f6215b2b6c9a23e96940ae9ffa3af8f6a0300108c6bfcf840702db9, content: ' In western New Haven, the university holds 500 acres (2.0 km2) of athletic facilities, including th...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.5900821044427363)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,1–3%,"[Document(id=1f5a83934868fb58cdc4206eab7a18d57dd1b714f1c2d2b985185f53c335c832, content: ' This questioning gained momentum in the 1960s during the U.S. civil rights movement and the emergen...', meta: {'name': 'Race_(human_categorization)', 'source_id': '1232f7fe18d6e2adb87627ce2042b468cfa3fb9ba0b6afb2165c345f440b5339', 'page_number': 1}, score: 0.4513507692385032)]",0.0,0.11423512548208237,0.0,0.0,0.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=1b5c167a9d5d609ea5fb8a97ac9f8e9db1b78d5c91d2f95320c7a861768e926e, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.610828310088769)]",1.0,1.000000238418579,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,rational thought,"[Document(id=707efe35d39bbe6f12111e251260c07bcfa48cea8000d7e2c9680c78ec3b9dcc, content: '"" The theory was indebted to Aristotle's pluralism and his concepts of Soul, the rational, living as...', meta: {'name': 'Idealism', 'source_id': '55a0bd42a4a737442a2d93fe0926424e236910de01b2406ed43c174f2225ff9d', 'page_number': 1}, score: 0.530264670750077)]",1.0,1.0,1.0,1.0,1.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,21–24%,"[Document(id=4a2399f100a536a4baf71c5dedf166c45f09b1b237a505e9a84faf1da7fd8a1b, content: ' +In 2014 the Notre Dame student body consisted of 12,179 students, with 8,448 undergraduates, 2,138 ...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.6933619621704381)]",1.0,1.0,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,24 October,"[Document(id=9581bfa82ec59693593b37d324dd3202f11195fecf1f113578c324318b9772ee, content: ' The carriers had only 108 planes. The force was not spotted by the Allies until 16:40 on 24 October...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.6472377350236153)]",1.0,0.2516951560974121,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=4b9cc1d29ed0f9a4cb783c7febb93802073dee536efd60ee53a2c3e5bec9cccf, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6089269199243204)]",1.0,1.000000238418579,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,imperfective,"[Document(id=bb36de75da71a69959981a8c0923df0cc0a0f4248e05fecc303139076fc6aa2d, content: ' The evidence supporting this claim is patchy: Croatian linguist Stjepan Babić complained that the t...', meta: {'name': 'Serbo-Croatian', 'source_id': '2cc55814bc801258e8300ed82e003428dbf11715c74ad99297d61c5dd77023f4', 'page_number': 1}, score: 0.3154931739562564)]",1.0,1.0,1.0,1.0,1.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=7308906ca1725346de1f8669d04745bd0719f93959d2b77a3e2eedd2aea218ba, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.5572792387087808)]",1.0,0.8207652568817139,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,excited form,"[Document(id=814f2c2136f18b6a49318bf9a19fad148d9b272f987b2539771dc5dbd3beca10, content: ' +Under ordinary conditions on Earth, elemental hydrogen exists as the diatomic gas, H2. However, hyd...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.5947984648702281)]",1.0,1.0,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements,"[Document(id=ff11a1c6102e2b293943cbcfb8c2935799487df6f465dea412c00844c1c7c655, content: '"" In July 2013, the Assistance Association for Political Prisoners indicated that there were approxi...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.6589232659427924)]",1.0,0.9912453889846802,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=ac63b8357f80e413cededa7c5a37849ede32b9c8d7d3181d6e287572836f5f86, content: ' They formed the backdrop to one of the earliest football-related films, The Arsenal Stadium Mystery...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5470057790543041)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,late 1890s,"[Document(id=e7a2f32f10a857b188ae10953333d02317612155cbae0e2ac8e0bf5365da90e4, content: ' The Melbourne financial crisis was a contributing factor in the Australian economic depression of t...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.5958355202000913)]",1.0,1.0,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:3__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:3__chunk_size:15.csv new file mode 100644 index 0000000..4304bbb --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:3__chunk_size:15.csv @@ -0,0 +1,121 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=3c9b5484de418625b08e3d8f4e3c3398f40f2f5af0bbf48528ee46ea38c64e7c, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5597276623459511)]",1.0,1.0,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,15 days later,"[Document(id=b1a7bf3d4ede00a180df1d10e5220cf7adfe31e6c297fec3813651acf8043c8f, content: ' In May 1957 Diem, then President of South Vietnam, made a state visit to the United States for ten ...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.342617954858916)]",0.0,0.24934425950050354,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Coventry,"[Document(id=93c78551f27ece2b380d8a30b1c8852063e2afe373bf66ac0ff8fc705d37f96c, content: ' On 19 November 1940 the famous RAF night fighter ace John Cunningham shot down a Ju 88 bomber using...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5731025412729198)]",1.0,0.5261203050613403,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=e9007dae49d8af7b7fa7932aef22bbdc01132df7a83250c1516203d03c7a8b4b, content: ' It is most “important to recognize that in many words the English and Scandinavian language differe...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6303054320453825)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=b43e607046d3a9f2878f76721d6d75cb6c46d2bdb38f6b3fdc5f1b9a2f23a163, content: ' +In The Church of Jesus Christ of Latter-day Saints (LDS Church), the teacher is an office in the Aa...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.5091397083739027)]",1.0,0.4012155532836914,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=4af194e6a549f3cca324fac69732006a675ddeb8e841c964604dff19122e4f16, content: 'O.K, Maroussi) have won 3 Triple Crowns, 5 Saporta Cups, 2 Korać Cups and 1 FIBA Europe Champions Cu...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6223667259906489)]",1.0,1.0,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,four,"[Document(id=64448bd26ee757122a322e3c87d03eaf23b7b9bfaa5b939f12b70c7a629dd6d7, content: ' This season's judging panel consisted of Randy Jackson, along with Mariah Carey, Keith Urban and Ni...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.4864700329068361)]",1.0,1.0,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=2296ed852027297f8181dc1f64af4fcbb66e36bb282c6197423888ce9d011df5, content: '9–14 μm) and produce images of that radiation. Since infrared radiation is emitted by all objects ba...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.6292707894619731)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=5dac70957842f214d4c60f76564b27a7e657cd237c677ed236d1ed1ca76e360f, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.6064028863553603)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,Bremen and Bremerhaven,"[Document(id=48379d700246108e0c3da307a3a8e5de24d8bee4044a5988144f1d9cd57b09c2, content: 'Germany is a federal republic consisting of sixteen federal states (German: Bundesland, or Land).[a]...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 0.5087021851568686)]",1.0,1.0,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,fullwidth forms,"[Document(id=8c55128e915db91be79f675390a7251853ff83d2c6945bcbba98c2fef9e723ac, content: ' For example, the ""fullwidth forms"" section of code points encompasses a full Latin alphabet that is...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.536255021263228)]",0.0,0.06494750082492828,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,federal,"[Document(id=18fb3c7c246cfed9c7f86df4b14156139de2cefac2e61a9bbd26c8653739ab9b, content: ' Further, the Court's decision in Baze v. Rees, upholding a lethal injection method used by many sta...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.5379543078595055)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,"last great Anglo-American philosopher before Wittgenstein's disciples spread their misty confusion, sufficiency, and terror."" French sociologist and anthropologist Bruno Latour even went so far as to call Whitehead ""the greatest philosopher of the 20th century","[Document(id=777192ecd0b066a38e00224d5a2269174c0dfa158ccc4e8bc7395644c0b99822, content: 'g., a religion might encourage the violent extermination of a rival religion's adherents). +However, ...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.5657093946225062)]",1.0,0.11629921197891235,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,"""No state shall... make any thing but gold and silver coin a tender in payment of debts","[Document(id=68d14edc51b808820ba009fc52fe536383dc5d9fe285a432d114fdcd84ef9e86, content: ' The currency as we know it today did not get the faces they currently have until after the early 20...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.5031800963235427)]",1.0,0.03460206836462021,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,Pacific Northwest,"[Document(id=6acdafac063b4c532b8491aead2266ee3880a9ecef0a8eac6e645a6d8ae35346, content: ' Precipitation averages less than 15 inches (38 cm). The Southwest is a hot desert, with temperature...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 0.6014007834873065)]",1.0,1.0,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Antigen specificity,"[Document(id=7f56f4718d147b0156e077e4853a6ac0250773335364d5e5cac1ff37df572de8, content: ' The adaptive immune response is antigen-specific and requires the recognition of specific ""non-self...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.5509301004548758)]",1.0,0.448192298412323,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=9f4c20a2bce44cf9bd3af493be183ab333c4d80eef37f323c78a73218b2cb402, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.5064623445443148)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,Computing,"[Document(id=724eb1fe8da5e0d4da8a448227c2eb83283ae94ad66b58e07296d26274b6a2b4, content: ' In terms of specific course salaries, the Sunday Times ranked Computing graduates from Imperial as ...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 0.47044950560227883)]",1.0,0.5292261242866516,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1960,"[Document(id=0f23a3d664b2d9d293629ea9d692c835b9a8e93ffb30bc45de463cd9b9d93424, content: ' Therefore, Khrushchev would not take part in the summit. Up until this event, Eisenhower felt he ha...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.642219936436536)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=44b4daa4d3c4378951ca880516b3342520595caa477d61e2c5fc88de3627c8f0, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.66910672984007)]",1.0,0.804789662361145,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,nutrition literacy,"[Document(id=2e96d54b758a50cdd3b44fcb12856a2d073a3d1e742653bff90785cf4fe42d77, content: 'S. NAAL introduced the first ever measure of ""the degree to which individuals have the capacity to o...', meta: {'name': 'Nutrition', 'source_id': '861936066c6bb3f32c6021d1ede86c245833c0ef6507763ae6dc2bf67853e44a', 'page_number': 1}, score: 0.4825331385543225)]",1.0,1.0,1.0,1.0,1.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=497c11f9fbbfb1f37be3dcd0a29d747c5f8f8967252a25e340b937417a080280, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.6587906616356036)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=8a0f220e6df4e50504002c6ad6312d289ff774c9c753db65e80fb1022f521be2, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.5446938087977631)]",1.0,1.000000238418579,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,the Antes and the Sclaveni,"[Document(id=198e2ff2b18d1e11637616cacf59f7db2e62fac307b8d4669b7694d128532573, content: ' The Old East Slavic Slavuta for the Dnieper River was argued by Henrich Bartek (1907–1986) to be de...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.6420809543939001)]",1.0,1.0,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.","measures sexual orientation on a continuum, considers various dimensions of sexual orientation, and considers homosexuality and heterosexuality separately","[Document(id=3638fb16add213ce4e749139ce1852d845838bce357ac5b46f8f2163ed664217, content: ' The classification scheme works such that individuals in the same category show the same balance be...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.3776180459315529)]",1.0,0.6333848237991333,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,literate and upper class,"[Document(id=ae574192dddc12b91cee4486bf241bf1e1742369b1f59be7298f2d518e33a433, content: ' However, the majority of the regional languages in Italy are in fact not actually ""dialects"" of sta...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 0.48844579621903084)]",1.0,0.7628111839294434,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,anti-communist Russia policy,"[Document(id=0a8265dd4cca4efea06dcabb2e88acb97e00e621b009a5c17aa4cda38acbbb4f, content: ' He also upgraded the role of the National Security Council in planning all phases of the Cold War. +...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.36376947224784034)]",0.0,0.03253399580717087,0.0,0.0,0.0 +What is that power that enables us to love?,Prevenient grace,"wisdom and understanding enhance one's faith in God. According to him, these are the tools that God uses to commune with a contemplative. Love in the soul is both the cause and result of true understanding and judgement","[Document(id=9f0a0b71abae404c8342fedf70ff8f016c3a6b4ec47cc4fee1252fcf9d7adea4, content: ' +One of Albert's greatest contributions was his study of Dionysus the Areopagite, a mystical theolog...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 0.449116765433877)]",0.0,0.29284828901290894,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,Caesar,"[Document(id=dd09780a89c3259aae7f10ab2070b88e3687ba1267d0d7b037e76509ccd66648, content: ' Thus, the assemblies became powerless and were unable to oppose him. +Caesar was assassinated on Mar...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.44714711562407566)]",0.0,0.1853158324956894,0.0,0.0,0.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,sharia,"[Document(id=4d48682f805f7a385767a5c2825f3fc2119d2b698a7fc9592571f5590731bae6, content: 'Muammar Muhammad Abu Minyar al-Gaddafi (Arabic: معمر محمد أبو منيار القذافي‎ Arabic pronunciation: [...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.6177406623423897)]",1.0,0.6138635277748108,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",House of Mountbatten,"[Document(id=e315f255311d7dc4c756937a51f6602a99f1b8e3b0a145306a3d8c41c258bc57, content: ' She was educated privately at home. Her father acceded to the throne on the abdication of his broth...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.6146287331638632)]",1.0,0.7635930776596069,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,the Germans failed to prevent maritime movements or cripple industry in the regions,"[Document(id=1a4ba8fa507fb2fb6dac9df67abbdff76568d8f7a2e9cf8a578677e271a7190c, content: ' +In the north, substantial efforts were made against Newcastle-upon-Tyne and Sunderland, which were ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5247905975900863)]",0.0,0.17865215241909027,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,2.7%,"[Document(id=95e1463ec5cbd41e6746a2396eee9d7fb5ba52847c057866e7e7fd5a7f376d91, content: ' The racial makeup of the city was 69.7% White (down from 94.8% in 1970), 5.0% Black or African-Amer...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.5508171690306347)]",1.0,0.7231398820877075,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=9a450124afc12f193f72ae1f8310b72ada7cbbf8f34bbd71f66eb32b0f47efbc, content: ' With the development of the modern evolutionary synthesis in the 1930s and 1940s, Darwin's concept ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.6905690706074652)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,software error,"[Document(id=493262de0ba96f4bed9bc4d9456c8abbffaff489696816e1875ca710ac0d5465, content: ' The migration involves the database's transformation from one DBMS type to another. The transformat...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.3955258112165776)]",1.0,0.23398324847221375,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=ec70e294a8a1203ecdbb9fe95270466a0129a5d607fe306df146559206aa5b31, content: ' Avicenna first began to learn the Quran and literature in such a way that when he was ten years old...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.7299585588069163)]",1.0,1.0,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=391f8278a6d664b0fda2a91e05a75fcb178e560aca2778cf3383f74470691074, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.49740108859030846)]",1.0,0.10474255681037903,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=540a5b537ec7e1fdea905baae428e9a4c76167e7be305ec2dac494ec1ac16d1f, content: 'Old English (Ænglisc, Anglisc, Englisc) or Anglo-Saxon is the earliest historical form of the Englis...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6427731581432499)]",1.0,1.000000238418579,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=b281d8f2fa5c72bf9176af2f66fbd72725ba99c1c3e8e2e12044e694c6ee3bbe, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.5003888561596567)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,Clean Edge,"[Document(id=7726f77f73261125d28fc6db733db2a8ae6e451dfc266962ad1ddc5ccf22d22f, content: ' +New government spending, regulation, and policies helped the industry weather the 2009 economic cri...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 0.5105360280339252)]",1.0,0.10936959832906723,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=d4ddb3955cf6ff9d6bed687f54220f695fef0951972c64309938d9cf7670f6ba, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.8069591810257106)]",1.0,1.0,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,Roman Saturnalia,"[Document(id=092570f05e27505e1e18953896df0dd6bc5b48661c76667e0b9837f68372133a, content: ' The Lenten period of the Liturgical calendar, the six weeks directly before Easter, was originally ...', meta: {'name': 'Carnival', 'source_id': '6276aebf7ba195a529455cfda1ccf9d9aa6ddf58fac44b707047b161538ef46e', 'page_number': 1}, score: 0.49560710656928925)]",0.0,0.22954773902893066,0.0,0.0,0.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=ea56a7c43f1347c76ec07affd8ad20729d040a8d58a36f56583b537f903e8142, content: ' +The release of a video-game Starpower: Beyoncé was cancelled after Beyoncé pulled out of a $100 mil...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.5759942076459466)]",0.0,1.0,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,5-cent,"[Document(id=842ad1ee7799e7ecbfdfb163f8afe265cf62972cba3da0e97be6f505cb10fc03, content: ' In North America, copper mining began with marginal workings by Native Americans. Native copper is ...', meta: {'name': 'Copper', 'source_id': 'de5c60a665b93f9b358be412d53a37ca43d90005e3ec9a553ac38a38f076fadc', 'page_number': 1}, score: 0.47993080401424804)]",0.0,0.7035895586013794,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,HMS Jasper,"[Document(id=91f5b594d5423035cb6299d6343f01bf4eb248b5aa8de432e2f6c51ddedca474, content: ' On 21 May 1855, the gunboats and armed steamers attacked the seaport of Taganrog, the most importan...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.5363870553469913)]",0.0,0.3474302887916565,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=5b537a30444228b3b5e39f8e28a321a8e774f6051560dcab29bb0ece8b5ae99e, content: ' Unlike Archaeopteryx and the non-avialan feathered dinosaurs, who primarily ate meat, recent studie...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 0.5631311233054687)]",1.0,1.0,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,modern evolutionary synthesis,"[Document(id=7ff269861e7df34fcebf71c4907ef4694447d099e228c45550f4e2387812c2fd, content: ' This dogma has since been shown to have exceptions, such as reverse transcription in retroviruses. ...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5781950352097195)]",1.0,0.9375607371330261,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,"8:10 p.m., at Delmonico's restaurant and later the Waldorf-Astoria Hotel. Tesla would telephone his dinner order to the headwaiter, who also could be the only one to serve him. ""The meal was required to be ready at eight o'clock","[Document(id=bf93e5ca5e1091757426fa6d168a7b90f8c1d9dad833880ffcb9b06d8ad2b0f7, content: ' There are a minimum of 278 patents issued to Tesla in 26 countries that have been accounted for. Ma...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.687530275591786)]",1.0,0.31139981746673584,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1444,"[Document(id=f693c6f2598c5f0324deafec242b67b3598e0051d9b177df27f75a4c46de16d7, content: ' +Holy Roman Empire central power had begun to decline following years of imperial adventures in Ital...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.5827490138884875)]",1.0,1.0,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,two censors,"[Document(id=ce88f4b2acc2cd7986f0b63373b8505a79a6c1ae6aa86d23bd2b76d546f82d61, content: ' Consuls had supreme power in both civil and military matters. While in the city of Rome, the consul...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.48872733295186066)]",1.0,1.0,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=1e41339b2f87d6ffb45a6e6882ca2d5cec93b2ada663205b1745d23021ff3c6d, content: ' The planned closing ceremony at Justin Herman Plaza was cancelled and instead, a ceremony was held ...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.5200463937587196)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Pius XII,"[Document(id=84e7b362d02db50bb7dd66920598b2f7bcfeaad549dacdf2dc9aa4ad9c16171f, content: ' John XXIII became the first Pope to receive the title, followed by John Paul II in 1994 and Francis...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 0.5445188707405088)]",0.0,0.6628619432449341,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=61583a75fdba243f6a812f3bf73c41e686e4f8374db81e4dd5f1a4d5a4430336, content: ' +Non-Australian citizens who are Australian permanent residents should be aware that during their st...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.6689246760470762)]",1.0,0.9528100490570068,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=1c55594460ba4e27d8751041858090c94678420d1701172511f1bad1a1d70154, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5434537812989179)]",1.0,0.255912184715271,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Nuri al-Said,"[Document(id=c355e0f20036a60ada1e4711a62b4d11adebf07dce76a808203fdd588cc1168e, content: '"" Nasser ordered a crackdown against Syrian communists, dismissing many of them from their governmen...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.6408207539725228)]",1.0,0.5893326997756958,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,Basketball,"[Document(id=d7f6f0383459fe08c23a94fb73e1f2386715e112d5e554aee39542da48c23478, content: ' With the university having high profile sports teams itself, a number of alumni went on to become i...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.32824119379813876)]",0.0,0.12688907980918884,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1999,"[Document(id=66a9fb4c9d1cbb2bdc754757cfcbce6fa169d08ab7340e60e7b05bd65c9e5d65, content: ' In June 2014 a formal offer From GE worth $17 billion was agreed by the Alstom board. Part of the t...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.49639086782035935)]",1.0,1.0,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,downtown medical district,"[Document(id=f2ed49ccc8f85dd664fcdca57c3499ff0b7c7b7afb1177548f6b0038c9e3b49f, content: ' The downtown medical district is experiencing rapid growth of biotechnology and medical research in...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5215976359708336)]",1.0,0.47365856170654297,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=7b2e31c3f54b0c1a9b97db27512199c167f56012decb955c5fb2ad06a110d92e, content: 'Frédéric François Chopin (/ˈʃoʊpæn/; French pronunciation: ​[fʁe.de.ʁik fʁɑ̃.swa ʃɔ.pɛ̃]; 22 Februar...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.5926069778418124)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,steal the invention,"[Document(id=a7e7642fe4191fff00d610d3d0c95bcb299ff890393b3b9dda127bc3cc40623b, content: ' Tesla tried to interest the US War Department, the United Kingdom, the Soviet Union, and Yugoslavia...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6408155390011724)]",1.0,1.0,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,Non-revolutionary civil disobedience,"[Document(id=172d5ae15d23469aad64106d5290dbb9c81377ad9262463cdd5058509187a788, content: ' +Non-revolutionary civil disobedience is a simple disobedience of laws on the grounds that they are ...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.4826805294935842)]",1.0,0.48217201232910156,1.0,1.0,1.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Bibliothèque Bleue,"[Document(id=431a21bc2021ab05bedf5f9c62189ca16d692858fb84414f96b2f293034850da, content: '"" Women, as salonnières, were ""the legitimate governors of [the] potentially unruly discourse"" that ...', meta: {'name': 'Age_of_Enlightenment', 'source_id': 'd6935b64f79e30b7c3de4c952b643a55674c548cd0bb703870a094fd3e646d84', 'page_number': 1}, score: 0.421877163066887)]",0.0,0.32271450757980347,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,fundamental intellectual structure,"[Document(id=dd4f4d9a87883e0aa9f9e24e4db2a323a2a0555ed35dcc2ef63f816e15bbcd52, content: 'Time has long been a major subject of study in religion, philosophy, and science, but defining it in...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.50831139766181)]",0.0,0.3276873826980591,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",placed a corps of his own troops under Frederick's command,"[Document(id=d7342a0e3021ae2d3bba679cf10ecb72ddc69518be33b380ee6b9e8f5e4d03c9, content: ' +On the eastern front, progress was very slow. The Russian army was heavily dependent upon its main ...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.6968056158780116)]",1.0,0.311606764793396,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=cea0f1307bbe685005f9642d975856221cf9a5dd6f18a7b87c780156408770d1, content: '” +On April 23, 2008, Dell announced the closure of one of its biggest Canadian call-centers in Kanat...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.6064226171895029)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,pontifex maximus,"[Document(id=71718b05e0ad88a856f11532834265f8f64091425308e7bc717ce93437673530, content: ' +Towards the end of the Republic, religious and political offices became more closely intertwined; t...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5607277962054709)]",1.0,0.18580681085586548,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=73c809a12535348ffed165eff42c189f3aa95c6ed455c42b5e7932cd44167a8d, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.704733357632634)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=b562ec53d471dfbb3ba9b12247b16c622039308f7ec5b5b5344127cfeb67e87d, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.7076322081359558)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Rome,"[Document(id=b5c785a8e22edd8d9d6cb5708ef03b91b1dd4137c66bb49c28d760b75bfb84be, content: ' Some areas of the conquered world were more affected by Greek influences than others. The term Hell...', meta: {'name': 'Hellenistic_period', 'source_id': '322682cbb61d31c17e6d21e06723e94efd0ef242a5c9f308888fa14209c4078b', 'page_number': 1}, score: 0.5545614504809487)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what decade was disco popular?,1970s,1980s,"[Document(id=a1ebb6111bad62b247dd1b756728c9569fa6d4b9a446c19b1950e68e79933994, content: ' As of 2015, house music remains extremely popular in both clubs and in the mainstream pop scene whi...', meta: {'name': 'House_music', 'source_id': 'c556f4a459e8a99b6d2065bab8fd6a918d0a00f13291aad2473922492e4abf2e', 'page_number': 1}, score: 0.5521305810998134)]",0.0,0.8399688005447388,0.0,0.0,0.0 +What do the rivers form?,lakes,lakes,"[Document(id=dfdb4032d57d026d1e52c68a1cff13d84af492b1c4c23fee3c33111d08105adf, content: ' The Rhine originates in a 30 square kilometre area in Switzerland and represents almost 60 percent ...', meta: {'name': 'Alps', 'source_id': 'd2eb3ad3af7d6110ae691bf40660ff7a79e4e9dbfd25785b4e55c7700bfcae79', 'page_number': 1}, score: 0.49814621522653785)]",1.0,1.0,1.0,1.0,1.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,an ASCII file,"[Document(id=39d421bf1b8a6f42626cef116937dc028d1e423b078f59d7429a3dc5b9a4264c, content: 'Originally based on the English alphabet, ASCII encodes 128 specified characters into seven-bit inte...', meta: {'name': 'ASCII', 'source_id': '4bd7e27c39431d5c4712c32c534ed40f7df89f7b80f6eccb22975178fb7b1be2', 'page_number': 1}, score: 0.59409230149713)]",0.0,0.37076878547668457,0.0,0.0,0.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,"Australia, Hungary, Ireland, New Zealand and the United States","[Document(id=e5bfa62fc564ffffb338c1e6042c55a951d678b61fad10f5f106240c14fb440a, content: ' On February 1, 2004, Beyoncé performed the American national anthem at Super Bowl XXXVIII, at the R...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.48170959775240163)]",1.0,0.14340342581272125,1.0,1.0,1.0 +How many Examination Boards exist in India?,30,30,"[Document(id=9b7d52cbf72d7c989a4a9357093025a9bd62e4fb3573ea1abab63735c50cb79e, content: ' However, it is not possible to finance these schools with such low tuition fees, which is why all G...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 0.4656468986809146)]",1.0,1.0,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=ad70291d4dec497bcd48d02a2bfbb4780dffd558cf0eee2d7c976f107f401335, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.6589027090165251)]",1.0,1.0,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,Ma'am,"[Document(id=7e78ea0e95b5bd788108ac6801d926bfe2bab5d8c8ee7ca718215a801c9c95ef, content: ' She was celebrated in Canada as the ""longest-reigning sovereign in Canada's modern era"". (King Loui...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.5698498562293763)]",1.0,1.0,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=faa03729332a5c07c1c9d6d87747d15af8b3043f87a9cff7389d9a1c23d97c7c, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.5976425209098843)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,871,"[Document(id=4c48861d4673f21b2c8302b3b5239fa9bd1cfd1c2e226f06fb3ec71889c9e319, content: ' Cædmon, the earliest English poet we know by name, served as a lay brother in the monastery at Whit...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.4259535276693956)]",0.0,1.0,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,large quantities of data or a large number of users,"[Document(id=dd704f4bc758ab475db02844a8a24263dd4fdb32afbc7f7dc86b810f8fa62e4f, content: ' +Load testing is primarily concerned with testing that the system can continue to operate under a sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.6507854161834614)]",1.0,0.9289779663085938,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=0e27e0ca72a74505db588413835ac7002514412100758a45474267af68c71a90, content: 'An Internet service provider (ISP) is an organization that provides services for accessing, using, t...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.5580314549952922)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,land forces,"[Document(id=2e4b3b475a65a6397953a11388e7fa166873ff680f98b6e7464deb399963430f, content: ' +In the US Air Force, the distinction between bombers, fighters that are actually fighter-bombers, a...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.5490348511903206)]",1.0,0.7403548955917358,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1853,"[Document(id=e2b0b7fb7245406ebe95cec6e89cc25fccdc9386ef0e980ff8bd1bb4c36f9a9c, content: 'The Crimean War was a military conflict fought between October 1853 – March 1856 in which Russia los...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.583131325566129)]",0.0,0.672028660774231,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,practical and contractual,"[Document(id=b80b0d7fa56110987db7fc274c40400c9afab47b60f93bd3980be6d29191c35f, content: 'The priesthoods of public religion were held by members of the elite classes. There was no principle...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.6665384745348288)]",1.0,0.22432157397270203,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,"defects in dead code will never result in failures. A defect can turn into a failure when the environment is changed. Examples of these changes in environment include the software being run on a new computer hardware platform, alterations in source data, or interacting with different software","[Document(id=f10f419092a1f6acf685021bef11a7a6ebef797bdd44e3997bd6b4a635aa2a89, content: ' Not all defects will necessarily result in failures. For example, defects in dead code will never r...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.4963904181967126)]",1.0,0.4819352328777313,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,goalkeeper,"[Document(id=b2c989e4cf587acb5796a5b0366d6e8d6d6f6853aa4d279baf19c0c5eca65217, content: ' The team that has scored more goals at the end of the game is the winner; if both teams have scored...', meta: {'name': 'Association_football', 'source_id': 'f2b7305b09ad8a43b8389a5eb2d84b8ea4b4deaa7968d21fe9da9da9ecda2c6b', 'page_number': 1}, score: 0.40507022305233165)]",1.0,0.9999999403953552,1.0,1.0,1.0 +In which year was the new constitution ratified?,1855,1899,"[Document(id=e93f1498d324a356d0a273aea97c15c32b0eec4faf0660735b4c27f7c165effa, content: ' The white conservative Democrats regained control of the state legislature in 1870, in part by Ku K...', meta: {'name': 'North_Carolina', 'source_id': '3282e3dedcf0f1923e66ebf5b79cf9b9deec27c8ca1df8908e03770b02b38126', 'page_number': 1}, score: 0.49302690301412233)]",0.0,0.5596075057983398,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Naiad,"[Document(id=c450c10c9cc7ec2362a5a8c23142549f03d77146379f9bb96ab788f8c91887e3, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.5136154001583656)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=afdbd08089615eb2bbb9dd9a5945a7e094c57e1f63a4d1f083e6183556a9f3e7, content: ' Moreover, Yale has been accused of failing to treat workers with respect. In a 2003 strike, however...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.5912663052349812)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,moderate levels,"[Document(id=073df342a989619b1eec131e4197dffb009cfe81b1f8022d1a8cd8b13985d1db, content: ' The Human Genome Project states ""People who have lived in the same geographic region for many gener...', meta: {'name': 'Race_(human_categorization)', 'source_id': '1232f7fe18d6e2adb87627ce2042b468cfa3fb9ba0b6afb2165c345f440b5339', 'page_number': 1}, score: 0.4741344601997052)]",0.0,0.14409571886062622,0.0,0.0,0.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=d5a42bacf8a08d601a973e5bad25b36c41015f76de0d512f9c3130d5e87ae4de, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.5997149229460172)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,rational thought,"[Document(id=3c9b21a4d224bb82a4e4fc744a7bd671552ea8d96f2928fcf811163166442019, content: ' The Nature of Existence (1927) contained his arguments that space, time, and matter cannot possibly...', meta: {'name': 'Idealism', 'source_id': '55a0bd42a4a737442a2d93fe0926424e236910de01b2406ed43c174f2225ff9d', 'page_number': 1}, score: 0.5788858628692617)]",1.0,1.0,1.0,1.0,1.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,21–24%,"[Document(id=bd4681af4e65cd44fb09639ab1c33814925a7f0631458709bac52a44932b0dab, content: ' Around 21–24% of students are children of alumni, and although 37% of students come from the Midwes...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.5931308200019992)]",1.0,1.0,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,24 October,"[Document(id=67d402d488ad87fcda2d30f304cebc3d3ab52520c994807e5b06e892c3a0e1e1, content: ' The carriers had only 108 planes. The force was not spotted by the Allies until 16:40 on 24 October...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.6829916731914795)]",1.0,0.2516951560974121,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=9c5e4e5ebe04f6d60e8e13bc3cfe5cd0c8fa7c41c1148c916c472a26ca7c677b, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6102245016864454)]",1.0,1.000000238418579,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,imperfective,"[Document(id=b086dbc67b80e6f26b1e823c6ec440f8f645661a3c528184598912bb2cd8d10b, content: ' On occasion of the publication’s 45th anniversary, the Croatian weekly journal Forum published the ...', meta: {'name': 'Serbo-Croatian', 'source_id': '2cc55814bc801258e8300ed82e003428dbf11715c74ad99297d61c5dd77023f4', 'page_number': 1}, score: 0.31092698913374395)]",1.0,1.0,1.0,1.0,1.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=25a9235e2187510deb110e2fe84f6e0f7a2dbacbbd0bd491e44e8a83b3be8cbf, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.5267817991196269)]",1.0,0.8207652568817139,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,excited form,"[Document(id=f649524cdd28fbe2ba5a3cadae832b66d8fef791e1b4613b5d6705e7ca2b49a9, content: ' As a plasma, hydrogen's electron and proton are not bound together, resulting in very high electric...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6038299428337875)]",1.0,1.0,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,child soldiers,"[Document(id=d0c37a34ef187eefa48aed48ab2f9ed0e96f3a67771e8a88b8c3da762a3a37fb, content: ' According to Samantha Power, a US delegation raised the issue of child soldiers with the government...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.5711083259937013)]",1.0,0.5706413388252258,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=23ce9b5b052679777e1f4db02c71a154ef112d1634274e17ec804b5f383f1134, content: ' On 22 January 1927, their match at Highbury against Sheffield United was the first English League m...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5498172548089693)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,1887,"[Document(id=6b6aefb6efdad5c3ac3a693e89cba02ce5df9e3ad9ecfccc30b54f3ce25a3dba, content: ' Notable writers and poets from Melbourne include Thomas Browne, C. J. Dennis, Germaine Greer and Pe...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.5695785950450675)]",1.0,0.6789652109146118,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:3__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:3__chunk_size:5.csv new file mode 100644 index 0000000..e827584 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_all-mpnet-base-v2__top_k:3__chunk_size:5.csv @@ -0,0 +1,119 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=99729d5c09ec68a18e6cb9e326f59027b5c69bc74e05e8c8c0d825360370bf5e, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5674900599530198)]",1.0,1.0,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,"1756. In addition to renewing the efforts to capture Niagara, Crown Point and Duquesne, he proposed attacks on Fort Frontenac on the north shore of Lake Ontario and an expedition through the wilderness of the Maine district and down the Chaudière River to attack the city of Quebec. Bogged down by disagreements and disputes with others, including William Johnson and New York's Governor Sir Charles Hardy, Shirley's plan had little support. +Newcastle replaced him in January 1756","[Document(id=a2f39d9e2ce2dfcafd25d6c7f200598374d660bf4614d701a64ea387d77c9243, content: ' At a meeting in Albany in December 1755, he laid out his plans for 1756. In addition to renewing th...', meta: {'name': 'French_and_Indian_War', 'source_id': 'c41417e63bd9856864be613d4de392da1f213eb8f61402b7f6d037bd8b6259af', 'page_number': 1}, score: 0.39800401904562666)]",0.0,0.08425242453813553,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,KG 55,"[Document(id=90134b283b25268de3e509cb8a548e6a73007ffb1b313d9653603fc2bb9c3b55, content: ' On 19 November 1940 the famous RAF night fighter ace John Cunningham shot down a Ju 88 bomber using...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 0.5773304984390446)]",1.0,0.11742228269577026,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-es,"[Document(id=e5cb80c1093f573ce5c1d255322723cb12056865ea80e45b815e3467be2f33fd, content: ' It maintains several distinct cases: the nominative, accusative, genitive, dative and (vestigially)...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.6521761611401243)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=f628cb30d18c1374b4351a233133ea8649eb78c607439c749e11f99f56c9bbac, content: ' This guidance is generally offered during a ceremony called the patriarchal blessing. +In Hinduism t...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 0.45061060826792)]",1.0,0.4012155532836914,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=927ee26533dc7f17be50a4cb7fc74bed43a4bc44825d916333e6dd9c5d18c958, content: ' The Greek league, the A1 Ethniki, is considered one of the top volleyball leagues in Europe and the...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6558673131430859)]",1.0,1.0,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,four,"[Document(id=52a0fae35f124062c7009aeb57cfa6fb15f201644550104b1c1a83c6f0449b3a, content: ' This season's judging panel consisted of Randy Jackson, along with Mariah Carey, Keith Urban and Ni...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 0.6008725039019518)]",1.0,1.0,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=05e7c56a6da1ed703309ec6f627bc69c369ec51519a9e5eb8c442673c62365e4, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 0.6347191994948904)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=4437c778daec446c4362623ed3261e5ec4170314f4aa14f8381b8c7ee6eef3fe, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 0.6643419003251825)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,two urban districts,"[Document(id=a102b81b2963aaefdf1a83d1a1d96870be10e96bb78da447e6f4a0220bbf260f, content: ' The equivalent of the Minister-President is the Senatspräsident (President of the Senate) in Bremen...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 0.5297784205781209)]",1.0,0.39612674713134766,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,ISO 8859 standard,"[Document(id=5feafea02365ea3cabb4a0047d23f7b415679dd6971d7587bf3ebbc903629e6e, content: ' UTF-16 extends UCS-2, using one 16-bit unit for the characters that were representable in UCS-2 and...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.5413251020263476)]",0.0,0.1315920501947403,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,federal,"[Document(id=be1e7e4a69e49687682a8ca0c606792dd44baba4b9d0f7e7871524099ae07fba, content: ' The second-largest mass execution was also a hanging: the execution of 13 African-American soldiers...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 0.5448069890747275)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,rank amateur,"[Document(id=b0360cb0e79aa53fce9b6b987065d38127ef8c7392ded59d294e940b3fc8c15b, content: ' For Whitehead, education should be the exact opposite of the multidisciplinary, value-free school m...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 0.6532748672096498)]",1.0,1.0,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,visually impaired,"[Document(id=044e60f4e15347185cd183d3fb92536dd665d6e605b3f1574811fee862315a1c, content: ' +Though still predominantly green, post-2004 series incorporate other colors to better distinguish d...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.46654778370237093)]",1.0,0.23444828391075134,1.0,1.0,1.0 +Which region of the US experiences rain year round?,Pacific Northwest,nearly all U.S,"[Document(id=8a2739e7083a8933e34f34b82a0419e65f25241888f20dfc9f57614f1dbbb640, content: ' This reputation stems from the frequency of light precipitation in the fall, winter, and spring. In...', meta: {'name': 'Seattle', 'source_id': '1c1a54ff037a1e964929dc466a4a876fa6bb7609d7737ba7226bcd2ab49385cf', 'page_number': 1}, score: 0.6166568410170494)]",0.0,0.3169293701648712,0.0,0.0,0.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Adaptive (or acquired) immunity,"[Document(id=af9fa957dfd959c9905dc3b94a52b8ecc9ecb9c39df2fdb2c000fd9ec4597970, content: ' Even simple unicellular organisms such as bacteria possess a rudimentary immune system, in the form...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 0.6517149069258124)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Moose McCormick,"[Document(id=77c29047f332b94600c60411ddf2e8c78097d4dbe8cb5f95cbf5d6e23f8473be, content: ' The two clubs were tied in the bottom of the ninth inning at the Polo Grounds, and N.Y. had runners...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 0.5091546444664042)]",1.0,0.31572943925857544,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,Ph.D.,"[Document(id=8d74cf3e7a3897f2c2f0163d507374839ffb8775af40004e60fc174f1aea1f7b, content: '), Ph.D. and D.Sc.(Tech.', meta: {'name': 'Institute_of_technology', 'source_id': 'bd3c1ce78fe4328bf8eb2cbd8027759a2a927528ce7e496ce3ebcf9675bc060e', 'page_number': 1}, score: 0.517905680620544)]",0.0,0.45661261677742004,0.0,0.0,0.0 +In what year did Eisenhower propose his second civil rights act?,1960,1957,"[Document(id=e20106e1401ce2522cd3d14d13fae9ea782170d84e4c3db914247513b098ce09, content: '.. a discrimination in the expenditure of those funds"". +Eisenhower told District of Columbia officia...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 0.7159989523516124)]",1.0,0.7518350481987,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=d89cdeedbc8441f4e2c52a3575e8c530fcce225561404b37c7cdd108d1cc549f, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.7434933599873272)]",1.0,0.804789662361145,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,economic growth,"[Document(id=2fe027a6a83389bb16ea06d04ab9b56d477f3f1adfcac2e27b85393a9c0eb105, content: ' Policies aiming at controlling unemployment and in particular at reducing its inequality-associated...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 0.5218842741538204)]",0.0,0.17941543459892273,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=720eb3795a623e31335108bcc29a4b774e33272bb625d5968983846dd3b35f68, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 0.7102647470573767)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=cf59c0f0619e715b979051e0a84530bf73463713fa11f2961866d233ac2537f5, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 0.6138198489552974)]",1.0,1.000000238418579,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,East Slavs,"[Document(id=701344efe2101165a535fe45e922b01123011116f4f123fcd5b1668edbb29898, content: 'Slavs are the largest Indo-European ethno-linguistic group in Europe. They inhabit Central Europe, E...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 0.70879690862351)]",1.0,0.44139364361763,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.",four summaries,"[Document(id=f86f4f0c6b96ee3b686893cc41683ca450066b225884d3390504ef3124153018, content: ' +The SASO consists of 12 questions. Six of these questions assess sexual attraction, four assess sex...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 0.45192740188947417)]",1.0,0.47273075580596924,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,educated and powerful,"[Document(id=ad3cf9694a2a85eaf132042c8ae0262f84f59777a3c746653e4e48474c28a3a2, content: ' The economic prowess and cultural and artistic importance of Tuscany in the Late Middle Ages and th...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 0.5544002922926412)]",1.0,0.5528169870376587,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,time types,"[Document(id=9d1747bc11e311df0105a0308df3eb29ec64ee3c1e3b7335356131a26f7421d1, content: ' Fuzing options were also examined, both impact and time types. Mountings were generally pedestal ty...', meta: {'name': 'Anti-aircraft_warfare', 'source_id': 'c54639424f62ce369356ed8c4e1e9fab8769359dd28029aa89832f5aa994f45d', 'page_number': 1}, score: 0.43523273125444223)]",1.0,0.007751554250717163,1.0,1.0,1.0 +What is that power that enables us to love?,Prevenient grace,universal love,"[Document(id=a78f83359689c5324d38fe66319b2e022231f3eea49709cfa6adc4ff161463f3, content: ' Love led to spiritual growth that, in turn, reflected an increase in love for God and humanity. Thi...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 0.46093232704322956)]",0.0,0.32447677850723267,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,Caesar,"[Document(id=3fbeb6b6e7e9e345e223b12ac0cf1796c2923e45de7bc622585e09312d4d60ef, content: ' +Caesar was now the primary figure of the Roman state, enforcing and entrenching his powers. His ene...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.43051288719404746)]",0.0,0.1853158324956894,0.0,0.0,0.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,introduced sharia,"[Document(id=bdc01f662020adb880737d79c53bc6a192c293e64aaacad9c0bca0ebff7ff92d, content: ' Intent on pushing Libya towards ""Islamic socialism"", he introduced sharia as the basis for the lega...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.5944495251309094)]",1.0,0.7905094623565674,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",Elizabeth,"[Document(id=e654e9ff005029974fb352ae5cb703e1734edb2deffb25e38aadc6d3624aea0c, content: ' On 6 February 1952, they had just returned to their Kenyan home, Sagana Lodge, after a night spent ...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.6176147417529756)]",1.0,0.27963170409202576,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,Minorca in the Mediterranean,"[Document(id=ec4515c9557988b0fec18967610f700f87d76c31c093e709fb322770706f03b8, content: ' It proved completely successful. Pitt had a clear appreciation of the enormous value of imperial po...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.5872765017217323)]",0.0,0.0038108360022306442,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,10%,"[Document(id=19e07536fc4d6a422f717376ebb5c165dc66d2b84c2b4069db4dbb4757a892d6, content: ' It was first incorporated in 1936, and later reincorporated in 1940. The population consists of abo...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 0.615846351596201)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,William Paley,"[Document(id=1de1b94fbb49d9d6bbc2cdcf71ef4f81cd3ec13b95fe79865c57979200ac9095, content: ' Lamarck thought there was an inherent progressive tendency driving organisms continuously towards g...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 0.646918697976492)]",1.0,0.32971838116645813,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,dedicated backup files,"[Document(id=d62f58c8737fe86268de0ebf0cb0ef4eae1f9ba5f33735f7cadb2e83beb9e377, content: 'g., cases when the database is found corrupted due to a software error, or if it has been updated wi...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 0.4885855296732807)]",1.0,0.11919853091239929,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=a3a017724e8f025ada3a4a92e98993ec44b17e0941134ce7e52db3191cd3ea08, content: ' He learned Indian arithmetic from an Indian greengrocer,ءMahmoud Massahi and he began to learn more...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 0.7241002508861939)]",1.0,1.0,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,With Burma preoccupied by the Chinese threat,"[Document(id=a8730bd7201f8a080d3cffb4ba6d02e7cc82f409fe226dfe6987d18adaeb8596, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.5260146870162367)]",1.0,0.6175083518028259,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=bdf4733a7528116f101a96db4ba45ead494553faf3d56d886cb519e157bb2375, content: ' More entered the language when the Anglo-Saxons were converted to Christianity and Latin-speaking p...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.593625436130294)]",1.0,1.000000238418579,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=f1fa43fc9c1c5bd46b9f5455701bf99b0e98d4579816d755389bcc2dbba1bcda, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 0.5395585598917987)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,the Stern Review,"[Document(id=333c4da3ee155b7d7b4ddf91857cd1ec4e1c3cafb4159ebedda18347755b3f1f, content: ' In the US the main tool to track and stimulate voluntary actions is Green-e program managed by Cent...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 0.5373316142063316)]",1.0,0.8556329011917114,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=e8da082f5022b804801ddb2778f16c32226f0ecb59f3ed7f8fcdae17653f162c, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 0.8359677035541203)]",1.0,1.0,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,The Cathar Crusade,"[Document(id=08215551b881c7cefad5bb5baaaddc4d6f9a5802a709d61e32985651e2115c85, content: ' By the 11th century, more organised groups such as the Patarini, the Dulcinians, the Waldensians an...', meta: {'name': 'Heresy', 'source_id': '8ba473262f8508a9c99699bb09cd022993896ece1fed1463a9cf4bd7eb0c3e68', 'page_number': 1}, score: 0.5435941585791171)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=2fca0ca848c224a8494e3eead28c715beb0ffe86531f3f21ea833479b2f3f978, content: ' Including Beyoncé and Jay-Z, sixteen artist stakeholders (such as Kanye West, Rihanna, Madonna, Chr...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.505039751752655)]",0.0,1.0,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,$100,"[Document(id=1d82522f504285e7d6da03df31727546ed52678efdb9599878b409e6371fa7d4, content: '599⁄10. When currently issued in circulating form, denominations equal to or less than a dollar are ...', meta: {'name': 'United_States_dollar', 'source_id': '9ef37368424d1caed6a07c85856757a87fc5eb888868448328b70e21e5c46a4a', 'page_number': 1}, score: 0.48399488729273094)]",0.0,0.5045638680458069,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,American warship,"[Document(id=54d4a2af7381927f4f0f8f5873e297e6c594cc1da6c925d2424a6630c74dd945, content: 'S. fleet encountered massive AA fire with proximity fuzes. Only one American warship was slightly da...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.5361096474177641)]",0.0,0.25197282433509827,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=22e5680a1c41aa000971ff89fccd80f808c8f88ae7d0f5c0d81eb763ce026955, content: ' Unlike Archaeopteryx and the non-avialan feathered dinosaurs, who primarily ate meat, recent studie...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 0.6138421968979502)]",1.0,1.0,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,the modern evolutionary synthesis,"[Document(id=54fb8e44f718e454d5f4aef7097036553aa91b829fc2ea08644a1f9fc17e50f5, content: ' +The theories developed in the 1930s and 1940s to integrate molecular genetics with Darwinian evolut...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.6807694823421528)]",1.0,0.9999998807907104,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,eight o'clock,"[Document(id=5dfd5078daafbc2ef3695636e6d70b42db2abc79ac389c20eac0417969714c11, content: ' Tesla would telephone his dinner order to the headwaiter, who also could be the only one to serve h...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.7466485072261548)]",1.0,0.7012972831726074,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1444,"[Document(id=18dbb6f8a4452ee88a36944ed7b5a77c2233677f5c2c84c6d98113a4c1519651, content: ' After the conclusion of the war, France was again free to pursue its desire to reach the Rhine and ...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 0.639941686896402)]",1.0,1.0,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,censors,"[Document(id=3749ef6e14ea9af3893cfd84917e3a39acc2212480fc846e5a9231414a5599e6, content: ' Praetors administered civil law and commanded provincial armies. Every five years, two censors were...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5740412708539135)]",1.0,0.806065022945404,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=80f5e2e80143434aa6d2d78a16f411f7f6ab4f8ffffbd29914ad0c75e8bf31da, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 0.594644661480704)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Pope John Paul I,"[Document(id=dd784bbd39741346a23d0cf3feeacc82ec660daed03b91ed8b39035d65072e43, content: ' He was the last pope to date to be crowned; his successor Pope John Paul I replaced the Papal Coron...', meta: {'name': 'Pope_Paul_VI', 'source_id': 'fc82a40c990c0b3840b06b32643858d39984b66d78ae950561d809dc84d6edf8', 'page_number': 1}, score: 0.5499108306714565)]",1.0,0.7952640056610107,1.0,1.0,1.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c983e8f9fccac478cd7d895c53ca7636f4291c3a87693dd68246b03e75a4be42, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 0.7097080153729567)]",1.0,0.9528100490570068,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=f0adc94ea827dd57e6a4ed0151d80a56b2b699e6457dfd794787f7d9f9121082, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 0.5204352783694779)]",1.0,0.255912184715271,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Prime Minister Hussein Sirri Pasha,"[Document(id=25d98a87bc4019e4527fe3dbf903b4491eb5dbfb5600c6d91214b870a2dbf0f9, content: ' It was here that Nasser and his closest comrades, including Sadat and Amer, first discussed their d...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 0.6302125054603774)]",1.0,0.7236297130584717,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,educators,"[Document(id=f0501a0350c5212f0b765e4f2908281937b33d70767d79809051b0aab43bc078, content: ' +Other: Civil rights leader W. E. B. Du Bois; philosopher Henry David Thoreau; authors Ralph Waldo E...', meta: {'name': 'Harvard_University', 'source_id': '7f91a9284c2db4a8d1147861fa2c02c6a25281db1cbc78097950be011c0e6269', 'page_number': 1}, score: 0.377496105816545)]",0.0,0.2606774866580963,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1999,"[Document(id=ff4e5f9b67b28efe1d784938ea3eabd7391e59030ee2fef37239be9884eafceb, content: ' In 1999, the company agreed to pay a $250 million settlement in connection with claims it polluted ...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 0.5382385055230676)]",1.0,1.0,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,downtown medical district,"[Document(id=0b7915915ef0141c882fe46817cee7fb758759c0f34482f57922cd19e5f5e512, content: ' The downtown medical district is experiencing rapid growth of biotechnology and medical research in...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 0.5525530182515901)]",1.0,0.47365856170654297,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1835,"[Document(id=2dc3b5cfb05bfeb4e2c0b072a5a5e6417b1109588ffcbd88ffd03346c8bc1975, content: ' In France he used the French versions of his given names, and after receiving French citizenship in...', meta: {'name': 'Frédéric_Chopin', 'source_id': '5e95b80d7ceb3febb2d1a7dfe09730771a231fc8d23c15cd74d3fa2327cfcb14', 'page_number': 1}, score: 0.6787394879994724)]",1.0,1.0,1.0,1.0,1.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,steal the invention,"[Document(id=4d3444ecb9a546c18669ac8c8405c5f0787407509b989960d32c3dae162fd6f1, content: ' Tesla tried to interest the US War Department, the United Kingdom, the Soviet Union, and Yugoslavia...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 0.6860750023448956)]",1.0,1.0,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,the Taliban,"[Document(id=e89c703302c77eab7bfdec8231e6d3e70c21730080a2c0b71e7ff29c7fe0dce2, content: ' Jamaat-e-Islami of Pakistan is basically a socio-political and democratic Vanguard party but has al...', meta: {'name': 'Islamism', 'source_id': 'b18ef6af50cd5612358df7ee136f3b80ef65a22ebd6f502f95f8b0dec581619a', 'page_number': 1}, score: 0.47644565542225814)]",0.0,0.5441292524337769,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Paris,"[Document(id=79a18d86b70f7d064e2861983eacef8c4aaea013b88e70e045d4f5e02765b10f, content: ' +Paris is a city of books and bookstores. In the 1970s, 80 percent of French-language publishing hou...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.4805254691287104)]",0.0,0.2811535596847534,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,a systematic mental framework,"[Document(id=14e1565f9e55beb6f0a6a436ef63987e7ecfcabcded323a31ce3f0c38af15c48, content: '26) by which we simultaneously grasp the past in memory, the present by attention, and the future by...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 0.5150513533287606)]",0.0,0.29695814847946167,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",Frederick moved the bulk of his eastern forces to Pomerania,"[Document(id=2e241d6a2d549d7fb7ef44a92718ad0b9c17de781fc1ddf4c4c4311b6193515f, content: ' Frederick sent urgent requests to Britain for more substantial assistance, as he was now without an...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 0.6627893603410437)]",1.0,0.37803566455841064,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=a3b415fe93ba5b2bfab1307e12a6c3d0b910a94f573fedacc0fe6560757ac075, content: ' In total, Dell announced the ending of about 8,800 jobs in 2007–2008 — 10% of its workforce. +The co...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 0.6007811226441673)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,Sabratha,"[Document(id=d5925fe167b21550b3b94ce08e21d5d09d8ec2f69b4e80810ada3dd38cd002d8, content: ' This remained a primary religious and social duty of emperors. +The Roman Empire expanded to include...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.5396579508662903)]",1.0,0.21925941109657288,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=10406721e1bacc4b94568efb8105444ce33c23b507417efe2e032c93615c0677, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 0.7151687082822364)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=694b7ad0ec30f16563c095bfd8aa73a286298746fa8b5b6b568961e806fe7462, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.7264078227319748)]",1.0,1.0,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Rome,"[Document(id=39971eda0fac39993fb2ad01159df52ea10276610c72fad19cdc5a8d781c70e0, content: ' The earliest and most credible surviving source for the Hellenistic period is Polybius of Megalopol...', meta: {'name': 'Hellenistic_period', 'source_id': '322682cbb61d31c17e6d21e06723e94efd0ef242a5c9f308888fa14209c4078b', 'page_number': 1}, score: 0.5701725567412796)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what decade was disco popular?,1970s,1980s,"[Document(id=f73eacb187791d5a8d696649438e6ccf78f199ed312381153128690ac760a712, content: ' Emphasizing glamour, fashion, and escapism in distinction to the experimental seriousness of earlie...', meta: {'name': 'Post-punk', 'source_id': 'bee90cece10cbdebcf39c6ca24886eb874551f866ed672c4f1f297c137963f75', 'page_number': 1}, score: 0.5077274668295635)]",0.0,0.8399688005447388,0.0,0.0,0.0 +What do the rivers form?,lakes,Nieuwe Maas,"[Document(id=2c02265ed1b53d17cbf8d8090f6a3dad995df9f6a8bbe12e06c780368660607e, content: ' The middle flow begins as Nederrijn, then changes into Lek, then joins the Noord, thereby forming N...', meta: {'name': 'Rhine', 'source_id': '23686e48a61504679a91dac10d83b287aef3dff7dabc0ab881c281059187ce6a', 'page_number': 1}, score: 0.5238157166111579)]",0.0,0.2038368135690689,0.0,0.0,0.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,ISO-8859-1,"[Document(id=5feafea02365ea3cabb4a0047d23f7b415679dd6971d7587bf3ebbc903629e6e, content: ' UTF-16 extends UCS-2, using one 16-bit unit for the characters that were representable in UCS-2 and...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 0.6118382911790479)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,third,"[Document(id=7d15073ce76bfdf7dda1cd559c894ff0db46c284e6ad98cb2d9075367a409177, content: ' Beyoncé was the third woman to have writing credits on three number one songs (""Irreplaceable"", ""Gr...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 0.5390347865865979)]",1.0,0.288579523563385,1.0,1.0,1.0 +How many Examination Boards exist in India?,30,30,"[Document(id=87a93f72cbb9c00ea6477b2a31d21c284a2ecf07733f93b0fb4ba7b277f5b8f5, content: ' It is within the power of both the union government and the state governments to govern schools sin...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 0.5680934731225737)]",1.0,1.0,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=99743eb7e7277e11f539b5449857c5f012a842facd618f6a75ff178a89fc1f76, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 0.6792855736042169)]",1.0,1.0,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,Your Majesty,"[Document(id=6661c5b42cee4d9122296712db8297ce21db50225e9f1d2cad1e056d68ca32c8, content: ' Additional styles include Defender of the Faith and Duke of Lancaster. When in conversation with th...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 0.7342183996028037)]",1.0,0.5040283799171448,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=ff2ec1a3232f7d74c84b198defa77dd9879cdb65474a7dab2bf47da19675285e, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 0.6173766295402898)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,871,"[Document(id=6f9dc46d6eac9c736c44b2255083b35d238c93547c3efcd0f85d83a3feef71b2, content: ' The speech of eastern and northern parts of England was subject to strong Old Norse influence due t...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 0.5422845553260691)]",0.0,1.0,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,load or endurance,"[Document(id=adc4b66570c6ed9d13c88909bbdc957a40b544797d95742201330710a6ff6385, content: ' Stability testing (often referred to as load or endurance testing) checks to see if the software ca...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5548886464218801)]",1.0,0.19878104329109192,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=5f8dc7264853d3b0b88a03420e7878e16fa4c08ef70ab7cc5d722e889b21dda6, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 0.6158295658195905)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=87ffdf42e04cd4e5aabc33e35caf9614ba61758e355a1d97a38a524ba0bedf46, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 0.8271269612162027)]",1.0,1.0,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1856,"[Document(id=fcb4369249e697b0fbc49a777c7140b22fe82eef90e450df2c7fe8b613db85c0, content: ' A second counterattack, ordered personally by Nicholas, was defeated by Omar Pasha. The front settl...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 0.5795463116891318)]",0.0,0.702621579170227,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,a part of daily life,"[Document(id=064c7c3a01bdf93ad9d4d1654337277537a969dc5714be1920555a00340b215e, content: ' Even the most skeptical among Rome's intellectual elite such as Cicero, who was an augur, saw relig...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 0.6620222938777092)]",1.0,0.2831001281738281,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,Software fault injection,"[Document(id=827178e2fa13b1045cb5486ccca30dd5c2a87aa9e9f903cbdd2808ec1b93a098, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 0.5885062293441778)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,goalkeeper,"[Document(id=e6ee12359c832f972207465515e8a9b305bdf92628324bb99b5470cc0b0b74b4, content: ' The same laws are designed to apply to all levels of football, although certain modifications for g...', meta: {'name': 'Association_football', 'source_id': 'f2b7305b09ad8a43b8389a5eb2d84b8ea4b4deaa7968d21fe9da9da9ecda2c6b', 'page_number': 1}, score: 0.5034902789294909)]",1.0,0.9999999403953552,1.0,1.0,1.0 +In which year was the new constitution ratified?,1855,1899,"[Document(id=71b3f8496f110bfe85645777f313b2eb556ef37714f3cfcd0d146aa41835ef0a, content: ' The Democrats regained control of the legislature in 1896 and passed laws to impose Jim Crow and ra...', meta: {'name': 'North_Carolina', 'source_id': '3282e3dedcf0f1923e66ebf5b79cf9b9deec27c8ca1df8908e03770b02b38126', 'page_number': 1}, score: 0.5270840905961176)]",0.0,0.5596075057983398,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Naiad,"[Document(id=06462bba1bbaf61b1e06ec260c9ebb15ac9f6984188e161293ac729b7950da5d, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 0.6002989836151421)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=1eb83a50e062eec7af6abab3de1bf0b5576a4c75ac168cc64f1b77a2e268c579, content: '7 km2) Yale-Myers Forest in Connecticut's Quiet Corner—and nature preserves including Horse Island. +...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 0.6034040579940345)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,alternative variants of the same genes,"[Document(id=616efe41146a20d96fcfa37d16fa50ce52cca8acf43b362e86a15a714f86ddff, content: ' Subsequently, the sequencing in the Human Genome Project indicated that many of these transcripts w...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 0.5241570974633485)]",1.0,0.5312574505805969,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1911,"[Document(id=3cdcefdaa320ecb0d100eef0c92063190f1030f98ee702385d5ad087a454afc6, content: ' He repeatedly expressed a preference for the desert over the city and retreated to the desert to me...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 0.6479849215752526)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,rational thought,"[Document(id=14435d1137e579f740d93ef8fabdf50640828e96d9dd732c7b8c6d4932ae6a58, content: '"" The theory was indebted to Aristotle's pluralism and his concepts of Soul, the rational, living as...', meta: {'name': 'Idealism', 'source_id': '55a0bd42a4a737442a2d93fe0926424e236910de01b2406ed43c174f2225ff9d', 'page_number': 1}, score: 0.5719476914223833)]",1.0,1.0,1.0,1.0,1.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,21–24%,"[Document(id=2b6b9afae589749ea7e615fd1e236b006b90acb763735662b8fec51a24d455bd, content: ' Around 21–24% of students are children of alumni, and although 37% of students come from the Midwes...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 0.6697027777597087)]",1.0,1.0,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,25 October,"[Document(id=7ade51eb3e5a17ee31e4514e28638a45a8061a9ee1a9b4f97cc1e6d9985e197e, content: ' commanders ignored reports that Kurita had turned back towards San Bernardino Strait. They had take...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 0.6893631935380231)]",1.0,0.24027210474014282,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,basketball,"[Document(id=12d9fa7ff9f514e66e336f50a70a372c3162770635ba732cc4335141fd65ee50, content: 'O.K, Maroussi) have won 3 Triple Crowns, 5 Saporta Cups, 2 Korać Cups and 1 FIBA Europe Champions Cu...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 0.6834429597884792)]",1.0,0.34321069717407227,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,imperfective,"[Document(id=06048f854345fb7e2b6f12877040f57cf87279336165b7cb7893e29f87cc9222, content: ' On occasion of the publication’s 45th anniversary, the Croatian weekly journal Forum published the ...', meta: {'name': 'Serbo-Croatian', 'source_id': '2cc55814bc801258e8300ed82e003428dbf11715c74ad99297d61c5dd77023f4', 'page_number': 1}, score: 0.42174331016005523)]",1.0,1.0,1.0,1.0,1.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=158a6b8443733e42d24dec31180770da4724f3bd14ed49e47a937624f7aeed47, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 0.5718687004083083)]",1.0,0.8207652568817139,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,an excited form,"[Document(id=e9b537adae4e88d5a591f33f2f06388da096013764cb94bfc4e3ea25bb2b2e23, content: ' This charged ion has also been observed in the upper atmosphere of the planet Jupiter. The ion is r...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 0.6014010412569942)]",1.0,0.9052941799163818,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,child soldiers,"[Document(id=30c864dfe7ee098b254f1517149763909eae40b70a74f63f84fffd7a77fc3e1f, content: ' According to Samantha Power, a US delegation raised the issue of child soldiers with the government...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 0.5763265562773263)]",1.0,0.5706413388252258,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=1bdd87144e8c05c0c8820e3bd6186deb63a4be13d5810e39af795993e4721bb7, content: ' The book was twice adapted for the cinema – the 1997 British film focuses on Arsenal's 1988–89 titl...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 0.5193618971746868)]",1.0,1.0000001192092896,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,The 1850s and 1860s,"[Document(id=5065b911750db5485f2cb49749f8e95f270f3e0d705842ca5e8f26d61da4d4ce, content: ' The city centre, Docklands, St. Kilda Road and Southbank areas feature high-density forms. +With the...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 0.6175571430131412)]",1.0,0.703935980796814,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:10.csv new file mode 100644 index 0000000..53c4aec --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:10.csv @@ -0,0 +1,133 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=ac8e9c286327aea24d00bceb002b08ad3fbc3007673c77da9b48b6b6b91aa76f, content: ' Some rites of the domus (household) were held in public places but were legally defined as privata ...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 172.20880681428673)]",1.0,0.9999999403953552,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,January 2010,"[Document(id=5987439923479496ef29d3e07778d456397d370fee0aa96398bfcb29d803683b, content: ' On January 8, 2009, Dell announced that it would move all Dell manufacturing in Limerick to Dell's ...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 87.33112846472943)]",0.0,0.3453689515590668,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Tora Bora,"[Document(id=edf1d880f9644a86ef60cf78769e6b01b8387a648936eb886b2a89ea73f13424, content: ' On 7 October 2001, the official invasion began with British and U.S. forces conducting airstrike ca...', meta: {'name': 'War_on_Terror', 'source_id': 'b9ac47a39d382362992253f0ccfa0072ea6216fe1daa748c4899980b467292f4', 'page_number': 1}, score: 170.61233506154028)]",0.0,0.154091477394104,0.0,0.0,0.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,"those that survived, to be sure, are basic elements of Modern English vocabulary","[Document(id=e2e4a1a8f8ce00819a653f82506b51f73af5d08aa2aac746913cc487ab8f02eb, content: ' +A later literary standard, dating from the later 10th century, arose under the influence of Bishop ...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 111.43807567202998)]",1.0,-0.006157325580716133,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Bible reading,"[Document(id=3ea5248487eae73641b86b07dcbc211849c33282b52b0559d15d46b39cdee5e6, content: ' The religion's leadership, while disclaiming divine inspiration and infallibility, is said to provi...', meta: {'name': 'Jehovah%27s_Witnesses', 'source_id': '24c4ceb07a03deed95302a705611ef12a9f83e147ff32a82bd8e3659b73793ec', 'page_number': 1}, score: 112.09092051382143)]",0.0,-0.047552794218063354,0.0,0.0,0.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,"Olympiacos, Panathinaikos, AEK Athens and PAOK. +The Greek national basketball team has a decades-long tradition of excellence in the sport, being considered among the world's top basketball powers. As of 2012, it ranked 4th in the world and 2nd in Europe. They have won the European Championship twice in 1987 and 2005, and have reached the final four in two of the last four FIBA World Championships, taking the second place in the world in 2006 FIBA World Championship, after a spectacular 101–95 win against Team USA in the tournament's semifinal. The domestic top basketball league, A1 Ethniki, is composed of fourteen teams. The most successful Greek teams are Olympiacos, Panathinaikos, Aris Thessaloniki","[Document(id=4b9cc1d29ed0f9a4cb783c7febb93802073dee536efd60ee53a2c3e5bec9cccf, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 160.75179441488754)]",1.0,0.4144136607646942,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=69502c6963b83079686ca2e50db3e2ea9735a99804867a88ff5e897e898803b2, content: ' +The show had originally planned on having four judges following the Pop Idol format; however, only ...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 124.18059315671337)]",1.0,0.7108100056648254,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=1c8a379dc092fa8d374927640e6d16f46286416581451c66a1da80fef1412acc, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 129.0274963398839)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=1eed45d4c946ec058384afdd313db9b847d2b384629f1872466e03c7bc13a2e5, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 173.3412040228327)]",1.0,1.0,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,two urban districts,"[Document(id=b99c33e3f1152b05cb402034099d36d6b8fc3942c6deb97d0de9d7f47d56c738, content: ' The equivalent of the Minister-President is the Senatspräsident (President of the Senate) in Bremen...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 166.68434734067063)]",1.0,0.12825778126716614,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,"Lipopolysaccharides, also called endotoxins, are composed of polysaccharides and lipid A that is responsible for much of the toxicity of gram-negative bacteria. Most bacteria have the gram-negative cell wall, and only the Firmicutes and Actinobacteria have the alternative gram-positive arrangement. These two groups were previously known as the low G+C and high G+C Gram-positive bacteria, respectively. These differences in structure can produce differences in antibiotic susceptibility; for instance, vancomycin can kill only gram-positive bacteria and is ineffective against gram-negative pathogens, such as Haemophilus influenzae or Pseudomonas aeruginosa. If the bacterial cell wall is entirely removed, it is called a protoplast, whereas if it is partially removed, it is called a spheroplast. β-Lactam antibiotics, such as penicillin, inhibit the formation of peptidoglycan cross-links in the bacterial cell wall. The enzyme lysozyme, found in human tears, also digests the cell wall of bacteria and is the body's main defense against eye infections. +Acid-fast bacteria, such as Mycobacteria, are resistant to decolorization by acids during staining procedures","[Document(id=a93c46780eb38a9b8195740a871eddb9a85c482bf7d89e72d9247c60999aed47, content: ' Lipopolysaccharides, also called endotoxins, are composed of polysaccharides and lipid A that is re...', meta: {'name': 'Bacteria', 'source_id': 'f8e9d365bfe51780e8f42f3a07cb69dd7a6df46bc7c67030d0853bdb13eeb390', 'page_number': 1}, score: 67.86823728702234)]",0.0,0.16699571907520294,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,lower federal courts,"[Document(id=af588e16cba35397799ed402e254a69a9445c63e2da06985a64c8440d5e111ea, content: ' Most often these are claims, such as ineffective assistance of counsel, which requires the court to...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 107.60043105585532)]",1.0,0.5048620700836182,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,rank amateur,"[Document(id=a022efa8ed739794e05008dbe44d9b83a87fc4f4b7a609ec096b9d437bc5089e, content: ' For Whitehead, education should be the exact opposite of the multidisciplinary, value-free school m...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 210.30818292133372)]",1.0,1.0,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,divine right of kings,"[Document(id=607d109ecbf4caf0f219894e385aa017b2949009a0d7f78fe76e40014bbfa0af, content: 'The rule of law is the legal principle that law should govern a nation, as opposed to being governed...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 89.06030151326769)]",0.0,0.11411236971616745,0.0,0.0,0.0 +Which region of the US experiences rain year round?,Pacific Northwest,Southwest,"[Document(id=ef3b24124d6963ab10e31070a01eb903bae0b374f75f9a00a0da61890b7d8c80, content: ', tornadoes are more common than anywhere else on Earth and touch down most commonly in the spring a...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 182.51279558274814)]",1.0,0.6455833315849304,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Antigen specificity,"[Document(id=a4a5008fab761323a2771c50538a7e75c9cf4ed6426bbef3a6ae08b7edd123e4, content: ' The adaptive immune response is antigen-specific and requires the recognition of specific ""non-self...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 159.91248406200884)]",1.0,0.2857634127140045,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=bde5f6c2b4913151f9e46d6578f4f20345ade116f2e96b5ae76ef509e1e517dd, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 164.45063393002806)]",1.0,1.0,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,3.36,"[Document(id=8190e2f3cfba3c67fc319c1eba30ec986d8174be2decff7ed0bb12a364fb52bb, content: ' +The College's endowment is sub-divided into three distinct portfolios: (i) Unitised Scheme – a unit...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 140.20154888024658)]",1.0,-0.009853528812527657,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1955,"[Document(id=0d88cb30320f0b038a71e1d9aa43a891187ded88e8298726341208761ff3f32a, content: ' Congress agreed to his request in 1955 for the Formosa Resolution, which obliged the U.S. to milita...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 185.40736255863564)]",1.0,0.6117545962333679,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,"2,300 km2","[Document(id=b1a257cabbb0177c72b7e21764d67b5fde052b256f5fa1b3a533adabf91c13c0, content: ' Overall, the city is relatively flat, and the lowest point is 35 m (115 ft) above sea level. Paris ...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 183.58814741162405)]",1.0,0.39352455735206604,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,Crime rate,"[Document(id=cfc35d312a7f860ff8772433762d5f27e606e10585399783dd129af1d4d74ed4, content: ' Other studies have confirmed this relationship. The UNICEF index of ""child well-being in rich count...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 94.23477686141754)]",0.0,0.12108045816421509,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=56ff4bdb4f543c61c1f516ba1e544e83e117ab5512bae58b2264fa6b6d86f26b, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 173.34560648176648)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=5c5f6a0591170904a56f4c0417e667089761d8d8f013f55283aff164fdadd75b, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 116.19394976028495)]",1.0,1.0,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,Veneti,"[Document(id=4236cbbf71506fe35f0da033ea9b465edf3cfbfdd8e6bac2b259b18835cc0408, content: ' +The relationship between the Slavs and a tribe called the Veneti east of the River Vistula in the R...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 210.39712772144958)]",1.0,0.36141833662986755,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.",rounded upwards to a standard value,"[Document(id=8ae5bc77fffde73dcdcec12fc78bbf57d94500975ce0e61f00c40591b931f6f3, content: ' If the noise-based speed (40:1) is lower than the saturation-based speed, or undefined because of h...', meta: {'name': 'Film_speed', 'source_id': 'cd350045b0fbc5c253411cbf952460337caeec603c6f046e13f60bcc9428df61', 'page_number': 1}, score: 95.309555843697)]",0.0,0.10711224377155304,0.0,0.0,0.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,literate and upper class,"[Document(id=149c6390292392d4438c9191dca43bbe827c20a86bb7bfc502679813d7fcc108, content: ' Due to Eastern Lombard's status as a Gallo-Italic language, an Eastern Lombard speaker may, in fact...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 177.45530984259983)]",1.0,0.8130882978439331,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,defense budget,"[Document(id=6dc8ce5b8fdef387c2de164c01dcffc0e922afdd3d2159709f5aea4e8f1b3317, content: 'N. speech was well received but the Soviets never acted upon it, due to an overarching concern for t...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 82.36219845719191)]",0.0,-0.04582870751619339,0.0,0.0,0.0 +What is that power that enables us to love?,Prevenient grace,Long-term relationships allow adolescents to gain the skills necessary for high-quality relationships later in life and develop feelings of self-worth,"[Document(id=9df0e87f01c1b2c8ac7969d77fe5940944b4885a75655d3b107fbb3e960b582b, content: '0 for males and 17.3 for females. The typical duration of relationships increases throughout the tee...', meta: {'name': 'Adolescence', 'source_id': '4687b887b5783807c17e352eca017193687de7d3428929359d466eaed34ebbe4', 'page_number': 1}, score: 99.769775039754)]",0.0,0.00461157038807869,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,He Jin,"[Document(id=1023d346fa09f68f024520eed11ceb8d146eebd929e4d93e19ec61ae32461e57, content: ' 189 AD), half-brother to Empress He (d. 189 AD), plotted with Yuan Shao (d. 202 AD) to overthrow th...', meta: {'name': 'Han_dynasty', 'source_id': '060d95b91f9c20e5c8cd8b8a032ab44aac38ab88ce62d3c302bea0865f533568', 'page_number': 1}, score: 149.4057899599862)]",1.0,1.0,1.0,1.0,1.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,sharia law,"[Document(id=6d61cd8c91a3530f637646f434785421d170c3d742a9e1e779b8ad38390b3f4f, content: ' Gaddafi proclaimed that the People's Congresses provided for Libya's every political need, renderin...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 209.24689365314305)]",1.0,0.6846116185188293,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",Prince of Wales,"[Document(id=674be8f4e2b3d36a3dc034fda7b76c2fb89a2227d149ad2cfedc89552d0e55cb, content: ' She was educated privately at home. Her father acceded to the throne on the abdication of his broth...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 226.112612709885)]",1.0,0.09200291335582733,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,sea communications,"[Document(id=2982b60b277d226d31c0a9fe90582fab11b0c6ff0f2e5a54d4e342474fbc3795, content: ' +In 1941, the Luftwaffe shifted strategy again. Erich Raeder—commander-in-chief of the Kriegsmarine—...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 149.98474112003294)]",0.0,-0.1473046839237213,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,Paleo-Indians,"[Document(id=ca6350a9a8c38782a1850582e37efb990f91441f3354b7b61cf884e4720f16a2, content: 'Tucson (/ˈtuːsɒn/ /tuːˈsɒn/) is a city and the county seat of Pima County, Arizona, United States, a...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 209.14760880036863)]",1.0,0.060131728649139404,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,William Paley,"[Document(id=ff5e838b8a7222ff21358b50a0b3729b6239d2d61bd80a57e62cd9d0ea6d30db, content: ' Lamarck thought there was an inherent progressive tendency driving organisms continuously towards g...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 168.91207509332548)]",1.0,0.2320324182510376,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,Database transactions,"[Document(id=5baf5b500fc870a57b1f1a3f45f13e9b554e1d91359189ce3677cff042e60b2d, content: ' Data security prevents unauthorized users from viewing or updating the database. Using passwords, u...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 81.34420688916279)]",1.0,0.4896238446235657,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=cbebf9a70685ea0e72bcf8c9f1dbbe9b7163ee26279b633446edf34314600fb3, content: ' Janssens demonstrated that Avicenna was a Sunni Hanafi. However, the 14th cenutry Shia faqih Nurull...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 181.94319586501797)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya seized Tenasserim and Lan Na,"[Document(id=44578618b4cabfbc0cb6f3a6c6bcbb6b409d7ee16e69f6d347ef2686cc41aaf9, content: ' Many splendid temples of Mrauk U were built during this period. +Political unification returned in t...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 153.3439765889078)]",1.0,0.2441307157278061,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,the scholarly and diplomatic lingua franca of Western Europe,"[Document(id=d806131552e88e82e2690be85e2fbf31773e47614f5d7a8efdc4ab729427ca2b, content: 'Old English (Ænglisc, Anglisc, Englisc) or Anglo-Saxon is the earliest historical form of the Englis...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 161.545426839566)]",1.0,-0.028287040069699287,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=c54ac19342651f97c105c4825d0b82cbf05684cf1d7c4921e0d078a7212b4a21, content: ' A common type of embezzlement is that of personal use of entrusted government resources; for exampl...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 113.39244671567084)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,Clean Edge,"[Document(id=639620812442614c676be21cf936d052650009771bc4d08df98e8056e4441e38, content: ' +New government spending, regulation, and policies helped the industry weather the 2009 economic cri...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 114.57287042595362)]",1.0,0.03996779024600983,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=de63707a025645c7fcc7a26e6dd37ddd29a27cd60ff187ce487e0d07217708ee, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 184.29502006117178)]",1.0,1.0,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,Solemn League and Covenant,"[Document(id=294cb7f92ff7b12fcb46517fe4d6a01dde32bbc7da8bef063a2198c6c5e17b5d, content: ' King James VI and I moved the Church of Scotland towards an episcopal form of government, and in 16...', meta: {'name': 'Presbyterianism', 'source_id': '877a7482c970246770aa25a5ae653f4f112d330b960d60a2b0f3cd425e6aafa3', 'page_number': 1}, score: 139.826848514211)]",0.0,0.2895878553390503,0.0,0.0,0.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=f0c6a5cd5408a0fc01c220e5910c03a2241fbe85099410f518592c1fb7098441, content: ' ""Creating a partnership with Beyoncé, one of the most hard-working and talented people in the world...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 122.76790646829441)]",0.0,1.0000001192092896,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,420 tonnes,"[Document(id=c33b50fb6577eb4a1d34f1a64e9894bfb5e12d5d14ead247ecfbea9a73a8a7a1, content: ' The largest mass of elemental copper discovered weighed 420 tonnes and was found in 1857 on the Kew...', meta: {'name': 'Copper', 'source_id': 'de5c60a665b93f9b358be412d53a37ca43d90005e3ec9a553ac38a38f076fadc', 'page_number': 1}, score: 145.30230866209183)]",0.0,-0.06792526692152023,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,"1,436 people were killed and 1,792 seriously injured, which affected morale badly.","[Document(id=dc733dafa156c8cd33c8bfe0b3a7341d74398730f0f97dd80f1c0b07d6a07ef7, content: ' +In the north, substantial efforts were made against Newcastle-upon-Tyne and Sunderland, which were ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 160.31924929990572)]",0.0,0.06491856276988983,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Eomaia,"[Document(id=deb49612ed6c7dff5f1ffa19a9e6cc731756e662202cde46400d79672a69618a, content: ' In the early Triassic, all the medium to large land carnivore niches were taken over by archosaurs ...', meta: {'name': 'Mammal', 'source_id': 'ce12abe98969c8fca9c93088950b36e820ec47c4c63f339bc203185f79f0242d', 'page_number': 1}, score: 169.29409728513733)]",0.0,0.19462870061397552,0.0,0.0,0.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,evolutionary ideas,"[Document(id=d15c16a1667444f39bb31041c3f1fed3d72c856e8a47c2abfeec45c7779ab45c, content: 'On the Origin of Species, published on 24 November 1859, is a work of scientific literature by Charl...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 176.87905088243622)]",0.0,0.4085172116756439,0.0,0.0,0.0 +At what time did Tesla get dinner?,8:10 p.m,8:10 p.m,"[Document(id=2ed21a855190ee9369cd4351878edc528d570a2136e3a2612158d6108a59491e, content: ' until 6:00 p.m. or later, with dinner from exactly 8:10 p.m., at Delmonico's restaurant and later t...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 230.03812342004315)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1789,"[Document(id=93996e15e538ef99cbc1f14e3a6eca7f547c782d02004cfe4d90863b09e3c9ba, content: ' When hostilities were concluded in 1648 with the Treaty of Westphalia, most of Alsace was recognize...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 192.92581302092802)]",1.0,0.4256799817085266,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,augurs,"[Document(id=a5e5aea2af0395511da534b5312305295571fb6c4dadbbf94b18f8c15af3add7, content: 'The priesthoods of public religion were held by members of the elite classes. There was no principle...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 160.02474930212503)]",0.0,0.12105397880077362,0.0,0.0,0.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=d16248de2a5e0d9e6bbb7e85b411bf0a4143206e20471ecab73dc7158ce992b1, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 123.16570921986224)]",1.0,1.0,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,"Pius XI. He was sent a letter by Cardinal Eugenio Pacelli, and Roncalli later recalled that it was probably the last letter Pacelli sent until his election as Pope Pius XII","[Document(id=f60e9480f17445bd20fd691b4f23689feab6068d6b9b8032e2bc85a59563b9cf, content: ' Unaffected, he wrote to his sisters Ancilla and Maria and told them both that he was fine. +On 30 No...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 206.54313808498497)]",0.0,0.5919901132583618,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c3327e32d8658307f8dca684bbdeb22895ca7a8d7ee8ad6ff0eba6200c5e6da2, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 140.52734237020536)]",1.0,0.9777635335922241,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=3f0396346c47d3ea53931dd05016eb360d6261c1b5ab0c7a977a5bbf0df1be01, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 155.2885699000689)]",1.0,0.14734730124473572,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,King Saud,"[Document(id=7ff80fc3e26c40bb2e658e548b73f40381ee50a7b9df04df616525f3c23883eb, content: ' +As political instability grew in Syria, delegations from the country were sent to Nasser demanding ...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 196.1013949009622)]",1.0,0.534000039100647,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,the Arian Party,"[Document(id=2e20ace02a4527c4c0706b6e9a1dacf4dc045af3a98b5588a0e82992e03d18ef, content: ' Nevertheless, the practice of saying it at the altar grew; eventually Pius V made this practice uni...', meta: {'name': 'Athanasius_of_Alexandria', 'source_id': '2a8e7ec0a44075f1a567744982dcecf4bcea19f0065d4f73c742398a05fa1642', 'page_number': 1}, score: 137.83755011636683)]",0.0,0.04274138808250427,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=754901c14e07fd836481f4df6a9e1c790f259897d11a92d8f4449320bf432be1, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 123.02371619032975)]",1.0,0.47267118096351624,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston-North Charleston Urban Area,"[Document(id=8e10e19959a21e7f4445856828c620b95b3a03ed6ee3a2d28eb2161206a9558f, content: ' North Charleston is the second-largest city in the Charleston-North Charleston-Summerville Metropol...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 162.99016909035507)]",1.0,0.33725616335868835,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,"1523, followed by the whole Bible in the French language in 1530. William Farel was a student of Lefevre who went on to become a leader of the Swiss Reformation, establishing a Protestant government in Geneva. Jean Cauvin (John Calvin), another student at the University of Paris, also converted to Protestantism. Long after the sect was suppressed by Francis I, the remaining French Waldensians, then mostly in the Luberon region, sought to join William Farel, Calvin and the Reformation, and Olivetan published a French Bible for them. The French Confession of 1559 shows a decidedly Calvinistic influence. Sometime between 1550 and 1580, members of the Reformed church in France came to be commonly known as Huguenots.[citation needed] +In what became known as the St. Bartholomew's Day Massacre of 24 August – 3 October 1572","[Document(id=ef77c93614a70f002eb708dc8d224c0fe860d179e7bc703f697f95e92761e545, content: ' The Gallicans briefly achieved independence for the French church, on the principle that the religi...', meta: {'name': 'Huguenot', 'source_id': '8c6e1ef6132295dfe82bead80d85963191a790434a85ac498376747f1b1241c7', 'page_number': 1}, score: 134.12150801123863)]",0.0,0.2547072470188141,0.0,0.0,0.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,give it to the world,"[Document(id=cd1bbe0a0f57ccb9f150760bab01a15e98f2aa46f2da4ccfd8b6ace2c7bcef40, content: '"" He stated that the theory was ""worked out in all details"" and that he hoped to soon give it to the...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 163.47005127547294)]",1.0,0.20373223721981049,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,British Union of Fascists,"[Document(id=244034de9b004674160b84dd73a587fddc8ae2c1bcd4dd6d67d8854cb936c8c2, content: ' The Government, up until November 1940, was opposed to the centralised organisation of shelter. Hom...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 111.67806894929063)]",0.0,0.3484962582588196,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Bibliothèque Bleue,"[Document(id=4900cc4ca5bf67834a3e328c9a83f1d78e6326b38651b0d151b090f279ac39f1, content: '"" Women, as salonnières, were ""the legitimate governors of [the] potentially unruly discourse"" that ...', meta: {'name': 'Age_of_Enlightenment', 'source_id': 'd6935b64f79e30b7c3de4c952b643a55674c548cd0bb703870a094fd3e646d84', 'page_number': 1}, score: 94.53483948843362)]",0.0,0.2553384304046631,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,memory,"[Document(id=e3b88b4f578ec5a002692c5a052496d37d6ee090a0491c5dc17087f489872445, content: ' When negative numbers are required, they are usually stored in two's complement notation. Other arr...', meta: {'name': 'Computer', 'source_id': 'a39560aa5caca299e2e8876ee9b291a51b896b645abcc4d5f2a790a0fa89fcef', 'page_number': 1}, score: 110.41973994552734)]",0.0,0.30840396881103516,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war","Britain had been surprised by the sudden Prussian offensive but now began shipping supplies and ₤670,000 (equivalent to ₤89.9 million in 2015) to its new ally","[Document(id=8cbcb986640b5e50e52d03f45b607146fd6aa0fafcaa20029b538705477e1278, content: ' The attack on neutral Saxony caused outrage across Europe and led to the strengthening of the anti-...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 173.76550381768013)]",1.0,0.2980388402938843,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=0a1d789e1e3ae04146d38c10ae30c76bb73173ec2df854d0b0695309c7e8629a, content: '” +On April 23, 2008, Dell announced the closure of one of its biggest Canadian call-centers in Kanat...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 154.56627095248245)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,Christian Unity Secretariat,"[Document(id=18ed06b8889609136231a06d99465ac7da3164b444e51e58cace4a1e98ddebcb, content: ' He was the last pope to date to be crowned; his successor Pope John Paul I replaced the Papal Coron...', meta: {'name': 'Pope_Paul_VI', 'source_id': 'fc82a40c990c0b3840b06b32643858d39984b66d78ae950561d809dc84d6edf8', 'page_number': 1}, score: 152.845765627712)]",0.0,0.2545766830444336,0.0,0.0,0.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=0b3a599a54fac34c82f29d858c51bbf5ce4f37032b3539a76c2b8a6b918abc6e, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 231.42251702622502)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=45ad6989b7801812895d43ea694bad169af9b5dd9024dd96b099fe071a196217, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 208.54315140237432)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Milan,"[Document(id=f3ddc00ec9151a3a78139029d06124f9dd3a5c7d358c3b467fee97a09cc00605, content: '."" (Pope Gregory I, would have full access to the Vatican Archives). +Constantius, ordered Liberius i...', meta: {'name': 'Athanasius_of_Alexandria', 'source_id': '2a8e7ec0a44075f1a567744982dcecf4bcea19f0065d4f73c742398a05fa1642', 'page_number': 1}, score: 155.37445791726404)]",0.0,-0.021660923957824707,0.0,0.0,0.0 +In what decade was disco popular?,1970s,1980s,"[Document(id=40c02ba6acd7df36a81130944465adc587a46eaf90fc8f3232939968d3e04b8b, content: ' +In the late 1980s, many local Chicago house music artists suddenly found themselves presented with ...', meta: {'name': 'House_music', 'source_id': 'c556f4a459e8a99b6d2065bab8fd6a918d0a00f13291aad2473922492e4abf2e', 'page_number': 1}, score: 154.38767033374768)]",0.0,0.7754138708114624,0.0,0.0,0.0 +What do the rivers form?,lakes,lakes,"[Document(id=c751375299aac2090fe706008704ed47f7358e09ebad2acbc0d54e2c5b0ab5d3, content: ' Cities such as Milan depend on 80 percent of water from Alpine runoff. Water from the rivers is use...', meta: {'name': 'Alps', 'source_id': 'd2eb3ad3af7d6110ae691bf40660ff7a79e4e9dbfd25785b4e55c7700bfcae79', 'page_number': 1}, score: 122.65050850059238)]",1.0,1.0,1.0,1.0,1.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,"the English alphabet, ASCII encodes 128 specified characters into seven-bit integers as shown by the ASCII chart on the right. The characters encoded are numbers 0 to 9, lowercase letters a to z, uppercase letters A to Z, basic punctuation symbols, control codes that originated with Teletype machines, and a space. For example, lowercase j would become binary 1101010 and decimal 106. ASCII includes definitions for 128 characters: 33 are non-printing control characters (many now obsolete) that affect how text and space are processed and 95 printable characters, including the space (which is considered an invisible graphic:223). +The code itself was patterned so that most control codes were together, and all graphic codes were together, for ease of identification. The first two columns (32 positions) were reserved for control characters.:220, 236 § 8,9) The ""space"" character had to come before graphics to make sorting easier, so it became position 20hex;:237 § 10 for the same reason, many special signs commonly used as separators were placed before digits. The committee decided it was important to support uppercase 64-character alphabets, and chose to pattern ASCII so it could be reduced easily to a usable 64-character set of graphic codes,:228, 237 § 14 as was done in the DEC SIXBIT code. Lowercase letters were therefore not interleaved with uppercase. To keep options available for lowercase letters and other graphics, the special and numeric codes were arranged before the letters, and the letter A was placed in position 41hex to match the draft of the corresponding British standard","[Document(id=8b5d2a51277d0cfa487480affbc5d72594c3901e01195b7283d6b82eab96ddb9, content: 'Originally based on the English alphabet, ASCII encodes 128 specified characters into seven-bit inte...', meta: {'name': 'ASCII', 'source_id': '4bd7e27c39431d5c4712c32c534ed40f7df89f7b80f6eccb22975178fb7b1be2', 'page_number': 1}, score: 163.1721608166594)]",0.0,0.2587267756462097,0.0,0.0,0.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,10,"[Document(id=add8c8aff407c78c35e0202c21fe2475241b48198c75789ef9255e23f94b5978, content: ' The album was certified Triple Platinum in the United States alone and became a major success throu...', meta: {'name': 'Adult_contemporary_music', 'source_id': '97671df8695f447ceee464d87be9f360589d67646ccf18f2eb61b6153865369a', 'page_number': 1}, score: 105.724148748294)]",0.0,0.4718608856201172,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,89.,"[Document(id=9172a42a065a9a9def97dc9c6923221ca37a1ddd8c4839b6799dfb8f0dd0495b, content: ' New Delhi has a literacy rate of 89.38% according to 2011 census, which is highest in Delhi. +Hindui...', meta: {'name': 'New_Delhi', 'source_id': '3b716631dedc36f770955e2ea4abca667e0b1db5ad4b63ad7a7e250e2bba1a5f', 'page_number': 1}, score: 106.38217760589907)]",0.0,0.28285151720046997,0.0,0.0,0.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=23b161a9804a8a11c97c4f3f14b6762b49fdf3b1fefb8ff1de76b00cfa00995a, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 159.1381537658833)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,United Nations,"[Document(id=36cca080196ef1b35130c0b7bcd6bb4216fa5de9889c7c13bfa049ba8b3661c7, content: ' Criticism was focused on the institution of the monarchy itself and the Queen's wider family rather...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 147.77558783372996)]",1.0,0.1381721794605255,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th century,"[Document(id=2ab8eb5978dfe802538939b80ae771d663adc411a3a6da5e18b0edcdba1d8f8f, content: ' Sculptors such as Girardon, Coysevox and Coustou acquired reputations as the finest artists in the ...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 170.6255699703063)]",1.0,0.8267526626586914,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,11 December 1936,"[Document(id=d222effdfada6505db2706b9557fa616cf1482882ea3dced11edadc541327c6e, content: ' One of the few stirs arose when the Canadian Prime Minister, R. B. Bennett, considered the Duke for...', meta: {'name': 'George_VI', 'source_id': '5a05abc99d3e2c9f678c9681513c2d8cdfe043b062ba971ef3f9f31e7c0c4622', 'page_number': 1}, score: 154.95061791715284)]",0.0,0.11978332698345184,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,defect prevention and detection,"[Document(id=504295554c9b7c44405de1c2a1cdf6d3646442868b73a26ce82dae1bf5116767, content: ' +Unit testing is a software development process that involves synchronized application of a broad sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 118.07047957699992)]",1.0,-0.08391132205724716,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=bb0e180e009c490f3ae30fb55a9a91cbe512409c2e12ebae0203298a6f3efe78, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 117.52238522381246)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=cdfa1e6bea8fd42b00ce80468ba471689216a6195425c2435fb1141b0b8c7e6f, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 219.74746796807392)]",1.0,1.0,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1917,"[Document(id=5cf39c40640013969626bfa9dcc17e86c891033c6881c53d41dfced842a316bd, content: ' +The Russian Revolution is the series of revolutions in Russia in 1917, which destroyed the Tsarist ...', meta: {'name': 'Modern_history', 'source_id': '59c9b273f6f85af9d336afaf5154a702230bef3726363ecbf5c8378f5d8798c7', 'page_number': 1}, score: 172.22838449876542)]",0.0,0.6207311749458313,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,"Private and personal worship was an important aspect of religious practices. In a sense, each household was a temple to the gods","[Document(id=d2af24870dc313fd20684bc694305efc093549248524947f6bd762d819334887, content: '[citation needed] +Roman religious beliefs date back to the founding of Rome, around 800 BC. However,...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 239.36564833446252)]",0.0,0.3531951308250427,0.0,0.0,0.0 +What is one example of failure testing?,Software fault injection,Software fault injection,"[Document(id=76ad3fb3d257a3812a169d749a5fb13f2e05383764e8f9dfc53895f08747b375, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 133.72966963796148)]",1.0,0.9999998807907104,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,25-man squad of players aged over 21,"[Document(id=729a1a7b3183917b1dc01acb07f1b632efc33f4eea6aeffbe484b1530c2d94f0, content: ' By 2009, under 40% of the players in the Premier League were English. +In response to concerns that ...', meta: {'name': 'Premier_League', 'source_id': '369baa9a5a854704f16fadd35144d3a3150cf1e9ae1671d9b50a3448547b814b', 'page_number': 1}, score: 105.43494496687926)]",0.0,0.2028268426656723,0.0,0.0,0.0 +In which year was the new constitution ratified?,1855,1977,"[Document(id=cd7897f96259919a226f490cef36c94f009c4cd789e86e763ab320a9ec5bca5a, content: ' The march originated at the Opera Plaza after speakers, mainly intellectuals, addressed the crowd. +...', meta: {'name': 'Dissolution_of_the_Soviet_Union', 'source_id': 'f13431b95b0890d57a225554d0ef9f1d051f9e4f48b8d8798689c11670d5b2ac', 'page_number': 1}, score: 121.1361988185173)]",0.0,0.46762916445732117,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,"Russian Bomarsund fortress on Åland Islands. In the same month, the Western Allied Baltic Fleet tried to destroy heavily defended Russian dockyards at Sveaborg outside Helsinki. More than 1000 enemy guns tested the strength of the fortress for two days. Despite the shelling, the sailors of the 120-gun ship Rossiya","[Document(id=3c65091ec8b7ef0626d34520c5e99c8f6539d594394209264472f89890ca987c, content: ' These battles are known in Finland as the Åland war. +In August 1855 a Franco-British naval force ca...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 167.4531894262477)]",0.0,0.11404582113027573,0.0,0.0,0.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=92daacd08f6215b2b6c9a23e96940ae9ffa3af8f6a0300108c6bfcf840702db9, content: ' In western New Haven, the university holds 500 acres (2.0 km2) of athletic facilities, including th...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 185.11339063937942)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,expressed sequence tags,"[Document(id=f61b4e12858ae84a04762a59c6bfaccab2588f02781fe27a55cb8323cef086ed, content: ' This mechanism is a common source of new genes in prokaryotes, sometimes thought to contribute more...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 104.78775277873254)]",1.0,0.13049247860908508,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1973,"[Document(id=8be794b0b9d6c44d95d0cb0782d29f36f0b508ef78ccf7ad10df252fbd1dfa4b, content: ' Gaddafi ordered an Egyptian submarine to target the ship, but Sadat cancelled the order, fearing a ...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 204.97418578989914)]",1.0,0.4185156226158142,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,Jupiter,"[Document(id=a8cc5557b921c5766b219ced4b8319da3ea79db597efbb98b7cbe6998afe6656, content: ' Ambrose, the influential Bishop of Milan and future saint, wrote urging the rejection of Symmachus'...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 185.2449344711922)]",0.0,0.0053202565759420395,0.0,0.0,0.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,21–24%,"[Document(id=4a2399f100a536a4baf71c5dedf166c45f09b1b237a505e9a84faf1da7fd8a1b, content: ' +In 2014 the Notre Dame student body consisted of 12,179 students, with 8,448 undergraduates, 2,138 ...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 172.25504829596989)]",1.0,0.9999998807907104,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,19 February,"[Document(id=3aa48796f054c4788769b2991e2826cffc49e4db56b22dc5cb5d2e279b46d630, content: ' The network of bunkers and pillboxes greatly favored the defender. +Starting in mid-June 1944, Iwo J...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 168.23687425884253)]",1.0,0.24278101325035095,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,2005 European Championship,"[Document(id=76be05dc0de2dfbcf6d3d16e80abbfa4ee57f55d30696681b941ecd6ff700822, content: 'O.K. Greek basketball teams are the most successful in European basketball the last 25 years, having...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 163.3646309027018)]",1.0,0.5996231436729431,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,Voiceless aspiration,"[Document(id=84e89d7a46f8edb679e02ffd1057e7edc22a9eeba00c35da796444d51e11482d, content: ' +There are no dedicated IPA symbols for degrees of aspiration and typically only two degrees are mar...', meta: {'name': 'Aspirated_consonant', 'source_id': 'ded60872a72621c0cc2ee33d9a7b0d60f1474075e260da958b752dbe72e00321', 'page_number': 1}, score: 124.20364522142323)]",0.0,0.21327462792396545,0.0,0.0,0.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=7308906ca1725346de1f8669d04745bd0719f93959d2b77a3e2eedd2aea218ba, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 151.26005977413936)]",1.0,0.4064074754714966,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,protonated molecular hydrogen or the trihydrogen cation,"[Document(id=bde62e3adecc34229459dbb473112814698f8a0ce91d07d2fc909e9fe9b2d5e2, content: ' +To avoid the implication of the naked ""solvated proton"" in solution, acidic aqueous solutions are s...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 129.82964543860288)]",1.0,0.11410942673683167,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements,"[Document(id=ff11a1c6102e2b293943cbcfb8c2935799487df6f465dea412c00844c1c7c655, content: '"" In July 2013, the Assistance Association for Political Prisoners indicated that there were approxi...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 166.35832930030904)]",1.0,0.997592568397522,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,Os Arsenalistas,"[Document(id=5008c48e9b8d1c751b9388b7772f14176f8c01ef2697fa8a4d0c1fd478bfbfca, content: ' In 1909, Sparta Prague adopted a dark red kit like the one Arsenal wore at the time; in 1938, Hiber...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 79.88047283584906)]",1.0,-0.08058242499828339,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,February 2006,"[Document(id=3513fcc6c75aee92ba5e4d20e40d134b9159f33067469bcc48634091ed74a9ed, content: '8 billion annually. Melbourne has been attracting an increasing share of domestic and international ...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 185.98741924757314)]",1.0,0.3492073714733124,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:15.csv new file mode 100644 index 0000000..106996e --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:15.csv @@ -0,0 +1,129 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=3c9b5484de418625b08e3d8f4e3c3398f40f2f5af0bbf48528ee46ea38c64e7c, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 176.94739092146085)]",1.0,0.9999998807907104,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,5 July 2005,"[Document(id=2bb0143cbe0013e9a1f2c007273d15715e8d6daa8bd81bb7fd41acf67926990b, content: ' After the success of the British Galleries, opened in 2001, it was decided to embark on a major red...', meta: {'name': 'Victoria_and_Albert_Museum', 'source_id': '202e0997b99bb8f0aaa6dd9c09c05e008cf8809c92549b6c9f073b5d26d1a3dd', 'page_number': 1}, score: 79.8326457180098)]",0.0,0.46504393219947815,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Beirut,"[Document(id=115b44313903e1b7cef422e7da97789d64c7e257f3d45d92337275e4cb9d8de8, content: ' The most controversial Army commander was William Westmoreland whose strategy involved systematic d...', meta: {'name': 'Military_history_of_the_United_States', 'source_id': 'e80a3db424753f4e406b9cfac948a9c9640a8278fb51e7ba7c06781d3f1813f4', 'page_number': 1}, score: 171.22162582879656)]",0.0,0.17634135484695435,0.0,0.0,0.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,West Saxon,"[Document(id=5cd12aa5c8db2e55b0fda8b25d06d70935857d40964f83d1f62b6c72b55f0215, content: ' +A later literary standard, dating from the later 10th century, arose under the influence of Bishop ...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 114.08671337452475)]",1.0,0.25228339433670044,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=b43e607046d3a9f2878f76721d6d75cb6c46d2bdb38f6b3fdc5f1b9a2f23a163, content: ' +In The Church of Jesus Christ of Latter-day Saints (LDS Church), the teacher is an office in the Aa...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 141.90348317285938)]",1.0,0.10249558091163635,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,no other nation has won more than 4 Euroleague championships in this period,"[Document(id=9c5e4e5ebe04f6d60e8e13bc3cfe5cd0c8fa7c41c1148c916c472a26ca7c677b, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 170.336796659614)]",1.0,0.17913703620433807,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=0639a6470cdb94c436a07cbb24189b437a50f73fdb84ffa10dd2faeb282342a2, content: ' +The show had originally planned on having four judges following the Pop Idol format; however, only ...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 123.15646815816092)]",1.0,0.7108100652694702,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,colored gels,"[Document(id=11c1b6ee06da3c39222feb05aa6a4abae48126f7c603e6523f0de5925d37a441, content: 'Infrared radiation is used in industrial, scientific, and medical applications. Night-vision devices...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 116.47360197118208)]",1.0,-0.0035034893080592155,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=5dac70957842f214d4c60f76564b27a7e657cd237c677ed236d1ed1ca76e360f, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 169.64570832993456)]",1.0,1.0,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,two urban districts,"[Document(id=d439f7d8555c7aea5751230341480bbf37f3e3f9e6b4e542f7cb145dfa03c950, content: ' In the three free states of Bavaria, Saxony, and Thuringia the government is referred to as the Sta...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 161.52389140584813)]",1.0,0.12825778126716614,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,β-Lactam antibiotics,"[Document(id=3b8de679cf4330422020b3c530f29f7e55171fe9f5a7383faca38ec8723c7d6e, content: ' Bacterial cell walls are different from the cell walls of plants and fungi, which are made of cellu...', meta: {'name': 'Bacteria', 'source_id': 'f8e9d365bfe51780e8f42f3a07cb69dd7a6df46bc7c67030d0853bdb13eeb390', 'page_number': 1}, score: 68.15934476800317)]",0.0,0.03749807924032211,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,district court,"[Document(id=a54d74acb097c757f1d529fef3759c7167a1133c97bc7020d3ed8e7abf0a8c44, content: 'S. Court of Appeals for the Fifth Circuit got the issue wrong — on the second try as well as on the ...', meta: {'name': 'Affirmative_action_in_the_United_States', 'source_id': 'b7156a1660bb591e3abb4460028a7922c063ba8962ad6dce11678f8b08c5cdcb', 'page_number': 1}, score: 100.49442833929467)]",0.0,0.2981231212615967,0.0,0.0,0.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,God's consequent nature,"[Document(id=47d8785b4e464ebb67e06bd4f4b7a36000d8b8a168c4c50f7e02c02df38feb7e, content: '"" He then observes that the mark of higher forms of life is that they are actively engaged in modify...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 194.04915088812754)]",1.0,0.0033599603921175003,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,personal ends and desires,"[Document(id=eba9752f016e83ce3d0b74da7d93673c03d7a87e4199f69c3759adc00d09ff22, content: ' Hayek proposed that under the Rule of Law individuals would be able to make wise investments and fu...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 89.26086848083912)]",0.0,0.14371861517429352,0.0,0.0,0.0 +Which region of the US experiences rain year round?,Pacific Northwest,Southwest,"[Document(id=add6364cc3fcf03efa7ffa889e1092b61692cea7887f8707b197baeb670911c0, content: ' The Southwest has the worst droughts; one is thought to have lasted over 500 years and to have hurt...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 169.40613730411468)]",1.0,0.6455832719802856,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Antigen specificity,"[Document(id=7f56f4718d147b0156e077e4853a6ac0250773335364d5e5cac1ff37df572de8, content: ' The adaptive immune response is antigen-specific and requires the recognition of specific ""non-self...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 158.72987153215703)]",1.0,0.2857634127140045,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=9f4c20a2bce44cf9bd3af493be183ab333c4d80eef37f323c78a73218b2cb402, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 164.16816603528014)]",1.0,1.0,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,"2011/12 – £765.2 million) and total expenditure of £754.9 million (2011/12 – £702.0 million). Key sources of income included £329.5 million from research grants and contracts (2011/12 – £313.9 million), £186.3 million from academic fees and support grants (2011/12 – £163.1 million), £168","[Document(id=389229d8f68a9ad229d7de282cdd7950da0a10f9e0421a77c07544e84425f011, content: ' Prince Albert's vision built the Victoria and Albert Museum, Science Museum, Natural History Museum...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 134.5070842194833)]",1.0,0.07216578722000122,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1955,"[Document(id=e2beebdfa17be4f6efba32829e2212a1399b680760fcc02660150921b570ef84, content: 'Dwight David ""Ike"" Eisenhower (/ˈaɪzənˌhaʊ.ər/ EYE-zən-HOW-ər; October 14, 1890 – March 28, 1969) wa...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 178.3948185668636)]",1.0,0.6117546558380127,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,"2,300 km2","[Document(id=5d56044357c45f6633a12781f9ccab0c392f05bb9311df2581b4325ada3c133d, content: ' +Paris is located in northern central France. By road it is 450 kilometres (280 mi) south-east of Lo...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 173.17023838877753)]",1.0,0.3935243487358093,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,rates of growth and investment,"[Document(id=2ceee3158fa42ccc05a00626664d0dc2609f0a02800a7d45c6887dfc6805db72, content: '"" +In 1993, Galor and Zeira showed that inequality in the presence of credit market imperfections has...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 100.57414376277009)]",0.0,0.06044796481728554,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=497c11f9fbbfb1f37be3dcd0a29d747c5f8f8967252a25e340b937417a080280, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 175.82713084515086)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,the Supreme Court,"[Document(id=526d25ae3dbd5c2c09e9f06e633483cb073a627b7d6218748e2b42e71b316f0a, content: ' The Superior Electoral Tribunal (Tribunal Superior Eleitoral) is the court of last resort of electo...', meta: {'name': 'Supreme_court', 'source_id': 'af99711cf0b2a1212f0d8426a2b67dff359c1025c5a49019ddbd0e35af38f770', 'page_number': 1}, score: 117.11572983098434)]",0.0,-0.06360477209091187,0.0,0.0,0.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,Balaton Principality,"[Document(id=63d1469ba8c52e953e84a5272412efd96f10761e80271a6f8824c2b695835762, content: ' +Around the 6th century, Slavs appeared on Byzantine borders in great numbers.[page needed] The Byza...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 205.67525086861957)]",1.0,0.01038026437163353,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.","with the gain of the amplifier between the sensor and the analog-to-digital converter, the saturation speed can be increased","[Document(id=a3fc752defa31a3ba97e1a76467f1e902ff87984a4d0fb5db5bb756efede29a4, content: ' This is the only technique available under the standard for output formats that are not in the sRGB...', meta: {'name': 'Film_speed', 'source_id': 'cd350045b0fbc5c253411cbf952460337caeec603c6f046e13f60bcc9428df61', 'page_number': 1}, score: 108.3630433763505)]",0.0,0.08613400161266327,0.0,0.0,0.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,urban and literary classes,"[Document(id=fbf2110b67db9729de11d4fe22a0c0ff32b3a45b47041c33f08674359d8df041, content: ' The city of Alghero in Sardinia was repopulated with Catalan speakers in the 14th century. The lang...', meta: {'name': 'Catalan_language', 'source_id': 'e2178d77cee39fac7bfc6ad15bc5cd10fa8a19c21faea32b0ea8a06b3d4a200f', 'page_number': 1}, score: 170.64869262627934)]",0.0,0.5015207529067993,0.0,0.0,0.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,"Active defense is ""the employment of limited offensive action and counterattacks to deny a contested area or position to the enemy"" (JP 1-02). It includes both ballistic missile defense and air breathing threat defense, and encompasses point defense, area defense, and high value airborne asset defense","[Document(id=a2da36fd5a8977b20b0785de4f7d0b07e2dd179a63bac36686ecb6a3ad2e5a38, content: ' The Air Force continues to pursue safe, secure and effective nuclear weapons consistent with operat...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 81.6000617232985)]",0.0,0.05544251203536987,0.0,0.0,0.0 +What is that power that enables us to love?,Prevenient grace,mercy of God and His care for His creation,"[Document(id=ab06859e83076577bc2a04088a7525ddee1f44dedfa9ffcf46872843c8c86632, content: ' They neither expected nor sought the appearance of the stigmata or any other physical manifestation...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 91.85024388900882)]",0.0,0.2979738712310791,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,He Jin,"[Document(id=232041b4afb4ca71e3d1d3697502447b0c4bfa12f1eccde1a6f36b432b833244, content: ' 189 AD), half-brother to Empress He (d. 189 AD), plotted with Yuan Shao (d. 202 AD) to overthrow th...', meta: {'name': 'Han_dynasty', 'source_id': '060d95b91f9c20e5c8cd8b8a032ab44aac38ab88ce62d3c302bea0865f533568', 'page_number': 1}, score: 148.7241546412495)]",1.0,1.0,1.0,1.0,1.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,sharia law,"[Document(id=21cb89e76694844f819e54d7269a6349d3404a6310be592588c5473ad2d8efe2, content: ' Gaddafi proclaimed that the People's Congresses provided for Libya's every political need, renderin...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 209.0268681523744)]",1.0,0.6846116781234741,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",House of Mountbatten,"[Document(id=e315f255311d7dc4c756937a51f6602a99f1b8e3b0a145306a3d8c41c258bc57, content: ' She was educated privately at home. Her father acceded to the throne on the abdication of his broth...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 235.82296588222033)]",1.0,0.7660073637962341,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,Port Moresby,"[Document(id=b0fed1abc72a4d56e9957121cabc7bf9e24323c310875bcb6c1dbad5036aa408, content: 'S.-UK Combined Chiefs of Staff, which was also in Washington. Allied resistance, at first symbolic, ...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 151.05475774942101)]",0.0,0.09719950705766678,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,Hohokam,"[Document(id=566e0e1e8fea2b2d4f4780311f659535816e073357090acf35a5060d287a12d1, content: 'Tucson (/ˈtuːsɒn/ /tuːˈsɒn/) is a city and the county seat of Pima County, Arizona, United States, a...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 204.4425567869381)]",1.0,-0.01937844417989254,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=9a450124afc12f193f72ae1f8310b72ada7cbbf8f34bbd71f66eb32b0f47efbc, content: ' With the development of the modern evolutionary synthesis in the 1930s and 1940s, Darwin's concept ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 189.49249077670783)]",1.0,1.0,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,"the Archive would have to delete pages from its system upon request of the creator. The exclusion policies for the Wayback Machine may be found in the FAQ section of the site. The Wayback Machine also retroactively respects robots.txt files, i.e., pages that currently are blocked to robots on the live web temporarily will be made unavailable from the archives as well. +In late 2002, the Internet Archive","[Document(id=0b1c19c2f87338db98035e45f9e5406c75d590764931173b2f73a83e0eea3255, content: ' providing an authoritative statement of the archivist), the United States patent office and the Eur...', meta: {'name': 'Wayback_Machine', 'source_id': '1d3e2979ec8b766777dcd67b2e231e400023f336a0622836e5036408af125544', 'page_number': 1}, score: 84.4611791300581)]",0.0,0.0648309662938118,0.0,0.0,0.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=ec70e294a8a1203ecdbb9fe95270466a0129a5d607fe306df146559206aa5b31, content: ' Avicenna first began to learn the Quran and literature in such a way that when he was ten years old...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 189.51529079780602)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=391f8278a6d664b0fda2a91e05a75fcb178e560aca2778cf3383f74470691074, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 152.93271782643825)]",1.0,0.0805651843547821,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=540a5b537ec7e1fdea905baae428e9a4c76167e7be305ec2dac494ec1ac16d1f, content: 'Old English (Ænglisc, Anglisc, Englisc) or Anglo-Saxon is the earliest historical form of the Englis...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 162.1246341956293)]",1.0,1.0,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=b281d8f2fa5c72bf9176af2f66fbd72725ba99c1c3e8e2e12044e694c6ee3bbe, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 125.71453753599548)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,Clean Edge,"[Document(id=7726f77f73261125d28fc6db733db2a8ae6e451dfc266962ad1ddc5ccf22d22f, content: ' +New government spending, regulation, and policies helped the industry weather the 2009 economic cri...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 108.44000862757655)]",1.0,0.03996780887246132,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=d4ddb3955cf6ff9d6bed687f54220f695fef0951972c64309938d9cf7670f6ba, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 184.29502006117178)]",1.0,1.0,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,the German Reformation,"[Document(id=2562c4ed0cf84994d606d5866fb6563cf019303140e39c1b2a35e85652c879f9, content: ' In spite of influential supporters among the English aristocracy, such as John of Gaunt, the moveme...', meta: {'name': 'Late_Middle_Ages', 'source_id': 'fb62f16f8c6599f7200add017bece6e2ea916520e544af1dd94713410c254ce9', 'page_number': 1}, score: 133.62001907744167)]",0.0,0.26043757796287537,0.0,0.0,0.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=dccc646845ca49e7358315767a80e7f3cbcc38b5a971e1af1ab775ad80542dee, content: ' Contemporary critics have noted the consistent minimalistic aesthetic exhibited throughout DONDA cr...', meta: {'name': 'Kanye_West', 'source_id': '4693bb2ae0f5b7c5ed3788607bba35813f42eada13da446f4c397c0715d2dc91', 'page_number': 1}, score: 139.18466144808437)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How much was each copper token worth?,a halfpenny,half,"[Document(id=952bc6793e5bd05e920359a7d299dcdb76c771c23011997dd25bc323157a4af5, content: ' Experience with copper has assisted the development of other metals; in particular, copper smelting...', meta: {'name': 'Copper', 'source_id': 'de5c60a665b93f9b358be412d53a37ca43d90005e3ec9a553ac38a38f076fadc', 'page_number': 1}, score: 128.07191961798566)]",0.0,0.4059985876083374,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,"1,436 people were killed and 1,792 seriously injured, which affected morale badly. Another raid was carried out on 11/12 May 1941. Westminster Abbey and the Law Courts were damaged, while the Chamber of the House of Commons was destroyed. One-third of London's streets were impassable.","[Document(id=1a4ba8fa507fb2fb6dac9df67abbdff76568d8f7a2e9cf8a578677e271a7190c, content: ' +In the north, substantial efforts were made against Newcastle-upon-Tyne and Sunderland, which were ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 159.2869259349145)]",0.0,-0.0633482113480568,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=5b537a30444228b3b5e39f8e28a321a8e774f6051560dcab29bb0ece8b5ae99e, content: ' Unlike Archaeopteryx and the non-avialan feathered dinosaurs, who primarily ate meat, recent studie...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 186.97083867996844)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,Lamarckism,"[Document(id=9a450124afc12f193f72ae1f8310b72ada7cbbf8f34bbd71f66eb32b0f47efbc, content: ' With the development of the modern evolutionary synthesis in the 1930s and 1940s, Darwin's concept ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 175.4835605613558)]",0.0,0.2597317695617676,0.0,0.0,0.0 +At what time did Tesla get dinner?,8:10 p.m,Milutin Tesla,"[Document(id=9a58b0104cf69e9abff0b56acffacb90ccade8544edf12c170db7baa7e3ca778, content: 'm., no Sundays or holidays excepted. He was ""mortified when [his] father made light of [those] hard ...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 148.651253464165)]",1.0,0.020163975656032562,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1789,"[Document(id=5ba09395255feae4ebf751bb80a7a4fa5d79691518f143a0bddc36e8712f8fc6, content: ' As a result, Alsace was transformed into a mosaic of Catholic and Protestant territories. On the ot...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 186.03336468019958)]",1.0,0.42567989230155945,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,augurs,"[Document(id=b80b0d7fa56110987db7fc274c40400c9afab47b60f93bd3980be6d29191c35f, content: 'The priesthoods of public religion were held by members of the elite classes. There was no principle...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 159.85134956306916)]",0.0,0.12105413526296616,0.0,0.0,0.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 24, 2008","[Document(id=4f40aab6f8383cb6b8f88176e135c4d6714583b1d76b8770a1d38af81eb3fd4c, content: ' Tony Goh, chairman of the Australian Council of Chinese Organisations, has said the ACCO would be t...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 116.2648803199296)]",1.0,0.9696242213249207,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Cardinal Eugenio Pacelli,"[Document(id=0ba8e9751d1501b0f5e0a1ccd2f17ab7018f7b8f1e53f73d800134df09ba5e26, content: ' Unaffected, he wrote to his sisters Ancilla and Maria and told them both that he was fine. +On 30 No...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 205.87684528714007)]",0.0,0.2810269892215729,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=61583a75fdba243f6a812f3bf73c41e686e4f8374db81e4dd5f1a4d5a4430336, content: ' +Non-Australian citizens who are Australian permanent residents should be aware that during their st...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 136.08818508050095)]",1.0,0.9404148459434509,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=1c55594460ba4e27d8751041858090c94678420d1701172511f1bad1a1d70154, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 156.21619178330843)]",1.0,0.14337167143821716,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Nuri al-Said,"[Document(id=c355e0f20036a60ada1e4711a62b4d11adebf07dce76a808203fdd588cc1168e, content: '"" Nasser ordered a crackdown against Syrian communists, dismissing many of them from their governmen...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 207.58747886895094)]",1.0,0.6631616353988647,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,the Arian Party,"[Document(id=2e20ace02a4527c4c0706b6e9a1dacf4dc045af3a98b5588a0e82992e03d18ef, content: ' Nevertheless, the practice of saying it at the altar grew; eventually Pius V made this practice uni...', meta: {'name': 'Athanasius_of_Alexandria', 'source_id': '2a8e7ec0a44075f1a567744982dcecf4bcea19f0065d4f73c742398a05fa1642', 'page_number': 1}, score: 137.83753446354876)]",0.0,0.04274138808250427,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=0869972e54922afd3497ba60a5f3e981d9c0ab240629036531512fe359bc4da8, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 116.6152783244603)]",1.0,0.4726710319519043,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston–North Charleston–Summerville Metropolitan Statistical Area,"[Document(id=a00baea2441c87e38109afe86358b94ae591d905c9ff5c35c440986bd364ee66, content: 'Charleston is the oldest and second-largest city in the U.S. state of South Carolina, the county sea...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 162.87774974359357)]",1.0,0.16861854493618011,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1910,"[Document(id=79eeb1c3137235ac7064f222e6a7f84a9b4c281bae3c034a93d372701f8db89b, content: ' In 1910 it numbered seventeen convents or secondary houses. In 1905, it established a large house o...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 114.35774784689463)]",0.0,0.6511441469192505,0.0,0.0,0.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,give it to the world,"[Document(id=1e4e1d3c7bd6960597e6d57e6d91165b39ccbebb9cfab5098f2e71e26d753bd3, content: '"" He stated that the theory was ""worked out in all details"" and that he hoped to soon give it to the...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 156.21514712424104)]",1.0,0.24044181406497955,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,cordon sanitarian,"[Document(id=da081545e8e8020b03087fbbcde823b131a8ffeb227970cc6962ac2b8197ec87, content: ' When a party achieves enough seats in an election to form a majority, the party's frontbench become...', meta: {'name': 'Political_party', 'source_id': '476a9215de81b75a4f17737eed1679570973e55b5ed6175aa0ec1014b09d4632', 'page_number': 1}, score: 101.09700252196419)]",0.0,0.14516454935073853,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Bibliothèque Bleue,"[Document(id=431a21bc2021ab05bedf5f9c62189ca16d692858fb84414f96b2f293034850da, content: '"" Women, as salonnières, were ""the legitimate governors of [the] potentially unruly discourse"" that ...', meta: {'name': 'Age_of_Enlightenment', 'source_id': 'd6935b64f79e30b7c3de4c952b643a55674c548cd0bb703870a094fd3e646d84', 'page_number': 1}, score: 94.00186696356431)]",0.0,0.2553384304046631,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,energy,"[Document(id=00b574bfc939ab2a3239f2319b448e050abe92bc80f032d893d1104888d204a6, content: ' +Matter should not be confused with mass, as the two are not quite the same in modern physics. For e...', meta: {'name': 'Matter', 'source_id': 'fafedf64e24f54304d3e01e242b20132f162827f8682f638342026f9f4d705a3', 'page_number': 1}, score: 103.44828507534538)]",0.0,0.2168877124786377,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war","reinforce the 19,000-man army under the Duke of Brunswick-Bevern at Kolin in Bohemia","[Document(id=989fe46c10e07b18dbfff4038a4cb7c7cc9b631442009173db0e44321b462009, content: ' The attack on neutral Saxony caused outrage across Europe and led to the strengthening of the anti-...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 175.9372333242284)]",1.0,0.3140609860420227,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=cea0f1307bbe685005f9642d975856221cf9a5dd6f18a7b87c780156408770d1, content: '” +On April 23, 2008, Dell announced the closure of one of its biggest Canadian call-centers in Kanat...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 156.44545320970798)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,The Vestals,"[Document(id=ec3ca2da10a5b33dfa44ec5cda121def852283ef4e6ecfef559bf7ea1b8ac7e1, content: ' Cult donations were the property of the deity, whose priest must provide cult regardless of shortfa...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 143.92065782312642)]",1.0,0.13071516156196594,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,The Church of Jesus Christ of Latter-day Saints (LDS Church,"[Document(id=78a91c7580a0e664c3ceef9bbd618fc18e8a8810cf6d4ec5149e42ba06515b26, content: 'Brigham Young University (often referred to as BYU or, colloquially, The Y) is a private research un...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 213.8764733392019)]",1.0,0.10917423665523529,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=b562ec53d471dfbb3ba9b12247b16c622039308f7ec5b5b5344127cfeb67e87d, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 206.25156723372163)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Egypt,"[Document(id=8196e650a52ea97a3680d7b8825d63c0c208b5e8e1b2fbce07e516caaffe5289, content: ' In 53 BC, Crassus launched a Roman invasion of the Parthian Empire (modern Iraq and Iran). After in...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 146.12013707324002)]",0.0,0.20476114749908447,0.0,0.0,0.0 +In what decade was disco popular?,1970s,1980s,"[Document(id=a1ebb6111bad62b247dd1b756728c9569fa6d4b9a446c19b1950e68e79933994, content: ' As of 2015, house music remains extremely popular in both clubs and in the mainstream pop scene whi...', meta: {'name': 'House_music', 'source_id': 'c556f4a459e8a99b6d2065bab8fd6a918d0a00f13291aad2473922492e4abf2e', 'page_number': 1}, score: 174.3308231329952)]",0.0,0.7754139304161072,0.0,0.0,0.0 +What do the rivers form?,lakes,lakes,"[Document(id=dfdb4032d57d026d1e52c68a1cff13d84af492b1c4c23fee3c33111d08105adf, content: ' The Rhine originates in a 30 square kilometre area in Switzerland and represents almost 60 percent ...', meta: {'name': 'Alps', 'source_id': 'd2eb3ad3af7d6110ae691bf40660ff7a79e4e9dbfd25785b4e55c7700bfcae79', 'page_number': 1}, score: 114.16977401000194)]",1.0,1.0,1.0,1.0,1.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,CJK ideographs,"[Document(id=8c55128e915db91be79f675390a7251853ff83d2c6945bcbba98c2fef9e723ac, content: ' For example, the ""fullwidth forms"" section of code points encompasses a full Latin alphabet that is...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 175.52010913132966)]",1.0,0.10791099071502686,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,"65 million records worldwide, and become the most successful Idol contestant in the U.S., selling over 14 million albums copies in the U.S. and has more Underwood has won seven Grammy Awards, the most Grammys by an ""American Idol"" alumnus. +Season five began on January 17, 2006. It remains the highest-rated season in the show's run so far. Two of the more prominent contestants during the Hollywood round were the Brittenum twins who were later disqualified for identity theft. +Chris Daughtry's performance of Fuel's ""Hemorrhage (In My Hands)"" on the show was widely praised and led to an invitation to join the band as Fuel's new lead singer, an invitation he declined. His performance of Live's version of ""I Walk the Line"" was well received by the judges but later criticized in some quarters for not crediting the arrangement to Live. He was eliminated at the top four","[Document(id=ec3b9b3ab5f2d528e25ddd559ed5ece0f75a24fbbd4154d11d3be76e232109cd, content: ' Both Underwood and Bice released the coronation song ""Inside Your Heaven"". Underwood has since sold...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 109.27218695122491)]",0.0,0.21880652010440826,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,52.,"[Document(id=11a2244744af7f3792b4b6f74654916eec2e83b88635692b25149d8c43e77ee6, content: ' +The era of planning in Himachal Pradesh started 1948 along with the rest of India. The first five-y...', meta: {'name': 'Himachal_Pradesh', 'source_id': 'ddcec2b95c05dd3a27f226df5c7c3597d80c7deaf9af8238acb6fcd9e9d4f233', 'page_number': 1}, score: 111.51305710162845)]",0.0,0.17345157265663147,0.0,0.0,0.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=ad70291d4dec497bcd48d02a2bfbb4780dffd558cf0eee2d7c976f107f401335, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 159.1381537658833)]",1.0,0.9999999403953552,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,priggish schoolgirl,"[Document(id=3c1ecbe51e48e61a4d451c9646ffe3dac3fc64b2ee04786738bfa29af14f4309, content: '"" Before her tour through parts of Quebec in 1964, the press reported that extremists within the Que...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 151.64558411532542)]",1.0,0.025507476180791855,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th century,"[Document(id=0c04b1a35e4ceeb9462f4aa4eb8de3f2e575631e540b2b5c457542a40eb7ed58, content: ' The Hôtel Ritz on Place Vendôme opened in 1898, followed by the Hôtel Crillon in an 18th-century bu...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 178.0487233065503)]",1.0,0.8267526626586914,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,11 December 1936,"[Document(id=06cbf9491f511f16650c4f94483faad3dd6deb11121a7158726cb23ef2b5b8c1, content: ' One of the few stirs arose when the Canadian Prime Minister, R. B. Bennett, considered the Duke for...', meta: {'name': 'George_VI', 'source_id': '5a05abc99d3e2c9f678c9681513c2d8cdfe043b062ba971ef3f9f31e7c0c4622', 'page_number': 1}, score: 137.500661055153)]",0.0,0.11978332698345184,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,large quantities of data or a large number of users,"[Document(id=dd704f4bc758ab475db02844a8a24263dd4fdb32afbc7f7dc86b810f8fa62e4f, content: ' +Load testing is primarily concerned with testing that the system can continue to operate under a sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 149.3892484981509)]",1.0,0.8659517765045166,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,Title II (common carrier) of the Communications Act of 1934,"[Document(id=7aba2d10978318bc5abba467ce5b46b678a83c63b04c2770eb6bfdb5855d1dce, content: ' On 16 January 2015, Republicans presented legislation, in the form of a U.S. Congress H.R. discussi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 100.06290327936973)]",1.0,0.28573131561279297,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,"interception of bombers and other fighters, reconnaissance, and patrol","[Document(id=d2709fd4938aadfc321fd75534f338a1182dec7bcf787b2638ddb43d1a59b926, content: ' Special Operations Command (USSOCOM). It conducts long-range, special operations missions, and is e...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 192.83729826021232)]",1.0,0.3352121114730835,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1854,"[Document(id=af2d0cc576e7737d85b64a99a23cdab42a569764a59025658756f21259782f72, content: ':130–43 +In the spring of 1854 the Russians again advanced, crossing the Danube River into the Turkis...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 171.36151765222343)]",0.0,0.6653627157211304,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,to absorb the deities and cults of other peoples,"[Document(id=876b4580a64fd77fb8816f9aef13a054a2ac16b7e85433b15298374090048432, content: ' Women, slaves, and children all participated in a range of religious activities. Some public ritual...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 231.74310101590044)]",1.0,-0.03058496303856373,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,"software scalability. The related load testing activity of when performed as a non-functional activity is often referred to as endurance testing. Volume testing is a way to test software functions even when certain components (for example a file or database) increase radically in size. Stress testing is a way to test reliability under unexpected or rare workloads. Stability testing (often referred to as load or endurance testing) checks to see if the software can continuously function well in or above an acceptable period. +Development Testing is a software development process that involves synchronized application of a broad spectrum of defect prevention and detection strategies in order to reduce software development risks, time, and costs. It is performed by the software developer or engineer during the construction phase of the software development lifecycle. Rather than replace traditional QA focuses, it augments it. Development Testing aims to eliminate construction errors before code is promoted to QA; this strategy is intended to increase the quality of the resulting software as well as the efficiency of the overall development and QA process. +In contrast, some emerging software disciplines such as extreme programming and the agile software development movement, adhere to a ""test-driven software development"" model. In this process, unit tests are written first, by the software engineers (often with pair programming in the extreme programming methodology). Of course these tests fail initially; as they are expected to. Then as code is written it passes incrementally larger portions of the test suites","[Document(id=dd704f4bc758ab475db02844a8a24263dd4fdb32afbc7f7dc86b810f8fa62e4f, content: ' +Load testing is primarily concerned with testing that the system can continue to operate under a sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 127.9253077734964)]",1.0,0.3516249358654022,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,home-grown players,"[Document(id=0d02d0727bbf3d538faf4d0e4b493fb1a575377aa15b678ef4c8a1fa1646ad91, content: ' By 2009, under 40% of the players in the Premier League were English. +In response to concerns that ...', meta: {'name': 'Premier_League', 'source_id': '369baa9a5a854704f16fadd35144d3a3150cf1e9ae1671d9b50a3448547b814b', 'page_number': 1}, score: 98.30450237695865)]",0.0,0.23592567443847656,0.0,0.0,0.0 +In which year was the new constitution ratified?,1855,1956,"[Document(id=c5c366f06788f022f78b708df479c782035fa76318f4adb339bcf7725475034a, content: ' Nasser was welcomed by large crowds of people lining the streets of Cairo on his return to Egypt on...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 125.38863131677583)]",0.0,0.7131929993629456,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Rossiya,"[Document(id=97e16b6bc2e6669fe1654aa87cbb22e40d888b619d1aef47d13bd6236d3f958a, content: ' The outnumbered Russian Baltic Fleet confined its movements to the areas around its fortifications....', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 161.24723935586772)]",0.0,0.0533168725669384,0.0,0.0,0.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=afdbd08089615eb2bbb9dd9a5945a7e094c57e1f63a4d1f083e6183556a9f3e7, content: ' Moreover, Yale has been accused of failing to treat workers with respect. In a 2003 strike, however...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 171.7668856618156)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,alternative variants,"[Document(id=74c523cc7b77506967566fb8161bf4043d30ccf8c68e884af6cf2b9d24106e71, content: ' Conversely, plants can have extremely large genomes, with rice containing >46,000 protein-coding ge...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 109.03169875302669)]",1.0,1.0,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1980,"[Document(id=cdc48a6ad3a2e7247518958e73c2ec683689a78ed8978eafe40f8f9c84ca8545, content: ' Military spending increased, while other administrative budgets were cut back. Libya had long suppo...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 205.52285831252408)]",1.0,0.4845914840698242,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,Jupiter,"[Document(id=a8cc5557b921c5766b219ced4b8319da3ea79db597efbb98b7cbe6998afe6656, content: ' Ambrose, the influential Bishop of Milan and future saint, wrote urging the rejection of Symmachus'...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 185.2449344711922)]",0.0,0.005320226773619652,0.0,0.0,0.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,"1987–2005), there was a rapid growth in the school's reputation, faculty, and resources. He increased the faculty by more than 500 professors; the academic quality of the student body has improved dramatically, with the average SAT score rising from 1240 to 1360; the number of minority students more than doubled; the endowment grew from $350 million to more than $3 billion; the annual operating budget rose from $177 million to more than $650 million; and annual research funding improved from $15 million to more than $70 million. Notre Dame's most recent[when?] capital campaign raised $1.","[Document(id=f676fd95f068792ff77624bf3b35dae04ac9867081bfd07e539ab85e7d1bd983, content: ', Notre Dame's Vice President of Student Affairs, added that coeducation ""opened up a whole other po...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 132.79254919815736)]",1.0,0.07137862592935562,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,4 June,"[Document(id=fe75f8e6c8d71ab8d430dfcb6b295cf49793b5e0f3c6dcd092c231ca1c16a92b, content: ' +A Japanese force was sent north to attack the Aleutian Islands. The next stage of the plan called f...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 167.2999768147891)]",1.0,0.20023301243782043,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=9c5e4e5ebe04f6d60e8e13bc3cfe5cd0c8fa7c41c1148c916c472a26ca7c677b, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 165.19965128505237)]",1.0,1.0,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,/p pʰ b/.,"[Document(id=08cfca2838b0ec38cf0ea4ba7b4b0c80a691af71784941039d34a905b951dce2, content: ' Punjabi has lost breathy-voiced consonants, which resulted in a tone system, and therefore has a di...', meta: {'name': 'Aspirated_consonant', 'source_id': 'ded60872a72621c0cc2ee33d9a7b0d60f1474075e260da958b752dbe72e00321', 'page_number': 1}, score: 121.47809687609444)]",0.0,0.0958951860666275,0.0,0.0,0.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=25a9235e2187510deb110e2fe84f6e0f7a2dbacbbd0bd491e44e8a83b3be8cbf, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 144.93989592071756)]",1.0,0.4064074754714966,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,protonated molecular hydrogen,"[Document(id=6e618286b9705860ffa2586c851128c816120c84dcf91348f6ee11f32b6d590f, content: ' This species is central to discussion of acids. Under the Bronsted-Lowry theory, acids are proton d...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 126.73501886889714)]",1.0,0.1572583019733429,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,"Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements. The Independent reported in June 2012 that ""Children are being sold as conscripts","[Document(id=e39a424de393c7814379d5771c4abc998c371eadee31f5a4692a573651cdce01, content: ' South Africa also voted against the resolution. +There is consensus that the military regime in Myan...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 131.50547962808298)]",1.0,0.8325707316398621,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,Os Arsenalistas,"[Document(id=dfaec7f9e3204887acf9b981d6586d85b481c4e77939d8133c7905bd55afb9ff, content: ' In 1909, Sparta Prague adopted a dark red kit like the one Arsenal wore at the time; in 1938, Hiber...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 78.84011165012167)]",1.0,-0.08058242499828339,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,"Melbourne Museum, Federation Square, the Melbourne Exhibition and Convention Centre, Crown Casino and the CityLink tollway","[Document(id=8f12a4f2f110f8e6ee345f7bd435b34c8ee9ca95f6be09352773bd3feef59307, content: ' +The discovery of gold in Victoria in mid 1851 led to the Victorian gold rush, and Melbourne, which ...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 192.46212824953716)]",1.0,0.04737495630979538,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:5.csv new file mode 100644 index 0000000..4efb4df --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:1__chunk_size:5.csv @@ -0,0 +1,127 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,priestly caste,"[Document(id=81f6510f9c3161c284ded9e3c69551eff2ab954b99d32b61663123ece4455df3, content: ' Some rites of the domus (household) were held in public places but were legally defined as privata ...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 176.58292358181697)]",1.0,0.04427206516265869,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,"April 29, 2011","[Document(id=005790da2506fa1debe2ca34f643aaa24f11655bf9331396afcc9fc3c636a8f9, content: ' Interestingly, the poll also found that even South Jersey voters expressed opposition to the plan; ...', meta: {'name': 'Atlantic_City,_New_Jersey', 'source_id': '40e9ddfdc81bef98169b0c6d3711f5c9ebc0931bd0a274d9cc6ce967dca6cb85', 'page_number': 1}, score: 87.80952109419226)]",0.0,0.4457192122936249,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Rotterdam,"[Document(id=3fe9edc56bd80e018f3b5d2c1a425a36eeeae2507d5266fa8dcf4b61ff983f68, content: ' Thereafter bombing operations were against ports and shipping and propaganda leaflet drops. Operati...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 164.93550538086404)]",1.0,0.19386684894561768,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,"Ænglisc, Anglisc, Englisc","[Document(id=169a5194d5b8e4398d0437961bfa539bb8d8fa94fb38d4cba89afa5e1b31232c, content: 'Old English (Ænglisc, Anglisc, Englisc) or Anglo-Saxon is the earliest historical form of the Englis...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 110.28235177333441)]",1.0,0.1304444968700409,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Protestant canon of scripture,"[Document(id=33bae84e0fadac77ab1c114162e6f7e6df9a4d4170cf8108b6c99042e377a34b, content: ' The religion's leadership, while disclaiming divine inspiration and infallibility, is said to provi...', meta: {'name': 'Jehovah%27s_Witnesses', 'source_id': '24c4ceb07a03deed95302a705611ef12a9f83e147ff32a82bd8e3659b73793ec', 'page_number': 1}, score: 134.28319020928075)]",0.0,0.03074711561203003,0.0,0.0,0.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=927ee26533dc7f17be50a4cb7fc74bed43a4bc44825d916333e6dd9c5d18c958, content: ' The Greek league, the A1 Ethniki, is considered one of the top volleyball leagues in Europe and the...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 177.5734971726734)]",1.0,1.0,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,fourteenth and fifteenth,"[Document(id=92c657af0ea397b4fe292701d891bd9f255649c72802b358eda1429072392d21, content: '. Lopez, Urban and Connick, Jr. all returned as judges for the show's fourteenth and fifteenth seaso...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 122.80108301911179)]",1.0,0.5482897162437439,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=05e7c56a6da1ed703309ec6f627bc69c369ec51519a9e5eb8c442673c62365e4, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 129.47865242752295)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=4437c778daec446c4362623ed3261e5ec4170314f4aa14f8381b8c7ee6eef3fe, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 209.32334393129398)]",1.0,1.0,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,"Köln or Cologne Carnival, as well as Mainz and Düsseldorf are the largest and most famous. Other cities have their own, often less well-known celebrations, parades and parties such as Worms am Rhein, Speyer, Kaiserslautern, Frankfurt, Darmstadt, Mannheim, Ludwigshafen, Stuttgart, Augsburg and München [Munich] Nürnberg","[Document(id=fb95fc203542f93f3545ff98f4c4744c4d94b74ba0b46562cac74ea4c34fcc21, content: ' The Carnival season also includes Fastelavens søndag (with cream buns) and fastelavensris with deco...', meta: {'name': 'Carnival', 'source_id': '6276aebf7ba195a529455cfda1ccf9d9aa6ddf58fac44b707047b161538ef46e', 'page_number': 1}, score: 149.5881317831743)]",0.0,0.24246129393577576,0.0,0.0,0.0 +What seeks to limit the use of polysyllabic characters?,CPC,lack septa,"[Document(id=e17b1a29df1c9f5c2187144a80272f17ecd053e6e3aedeaf8ab5cebd902b391d, content: ' Many polychaetes have a muscular pharynx that can be everted (turned inside out to extend it). In t...', meta: {'name': 'Annelid', 'source_id': '4d1835976d6f8a3a104eed564c45fb20b1a080365dcb082f33afd480371401ed', 'page_number': 1}, score: 67.46384235035063)]",0.0,0.09892848879098892,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,state courts,"[Document(id=7f051b6956fd0f9827b2a02cbd64b58b99b63b11bc1b8803eb572808e15e4f5d, content: ' The purpose of federal habeas corpus is to ensure that state courts, through the process of direct ...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 114.71892461568778)]",1.0,0.3909306526184082,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,process philosophy,"[Document(id=08668bbf8e8bcec721195bb786b2eb33635f6112b144f005903dec55795b998e, content: '"" +Historically Whitehead's work has been most influential in the field of American progressive theol...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 235.39117807275846)]",1.0,-0.031905245035886765,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,worshipping God in the manner and season most agreeable to the dictates of his own conscience,"[Document(id=0e1d4f1d8d4e3533adb041cb5f1b6a2a3c5856cee62abc76e787c6878ad0d6b9, content: ' Similarly, the Constitution of Massachusetts originally provided that ""no subject shall be hurt, mo...', meta: {'name': 'Separation_of_church_and_state_in_the_United_States', 'source_id': '9e181b8958f59a2f5a36329a7f787f416428f691cac0094c165c53403bedaabf', 'page_number': 1}, score: 91.80492411481453)]",0.0,0.10118145495653152,0.0,0.0,0.0 +Which region of the US experiences rain year round?,Pacific Northwest,Southwest,"[Document(id=994f169f938f2ae6c5b72207e8f053085554aee5e18da6a0e4c89bdb0a7fb708, content: ' The Southwest has the worst droughts; one is thought to have lasted over 500 years and to have hurt...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 179.13218864040363)]",1.0,0.6455833315849304,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Antigen specificity,"[Document(id=8e364a66a416e7184c815546c99f2af87e1565f3c1094415321a6192f360b407, content: ' The adaptive immune response is antigen-specific and requires the recognition of specific ""non-self...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 157.71412992185162)]",1.0,0.2857634425163269,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=2abfa717cced74cfc35f5836efd511c9dcad606c2013e9cf62940c0910458753, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 149.42361214417218)]",1.0,1.0,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,2011/12 – £765.2 million) and total expenditure of £754.,"[Document(id=38d36ddbfcff6c32f28791ecef866ff54adf64d946d9968ffab17a763c85d252, content: ' +In the financial year ended 31 July 2013, Imperial had a total net income of £822.0 million (2011/1...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 166.73957922427968)]",1.0,-0.0481702983379364,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1956,"[Document(id=603e948e2d5861c60c18ba7b1c0f362e16e56a274db386f3ebff62d4701f67a4, content: ' +In November 1956, Eisenhower forced an end to the combined British, French and Israeli invasion of ...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 179.41434942700835)]",1.0,0.651989758014679,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,smaller than the Paris Region,"[Document(id=2c90b8f3aa4ef32856137bcaa02b53d2d47c621a15ef25588a8833e564cba3cd, content: ' The Paris Urban Area, or ""unité urbaine"", is a statistical area created by the French statistical a...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 164.6012210741375)]",1.0,0.004089668393135071,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,Crime rate,"[Document(id=dcd8da4b243b1a8d91858407ae3ae6dac8e86b8331f09a52ba058f6361484420, content: ' Other studies have confirmed this relationship. The UNICEF index of ""child well-being in rich count...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 104.15467669914096)]",0.0,0.12108045816421509,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=720eb3795a623e31335108bcc29a4b774e33272bb625d5968983846dd3b35f68, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 170.60211135792204)]",1.0,1.0,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=cf59c0f0619e715b979051e0a84530bf73463713fa11f2961866d233ac2537f5, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 146.15927986962998)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,Germans,"[Document(id=23a53a873c47e9fe2410e2f688b410f3c194b4abd23b71a9c7e34f1be276e7ef, content: ' +The relationship between the Slavs and a tribe called the Veneti east of the River Vistula in the R...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 211.74025773273928)]",1.0,0.16895852982997894,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.",set to the EI with no exposure compensation,"[Document(id=4848e3d05918ff1f327c2508412043f33a28fe67af2a248a7b13fa45e3717aa1, content: ' This is the only technique available under the standard for output formats that are not in the sRGB...', meta: {'name': 'Film_speed', 'source_id': 'cd350045b0fbc5c253411cbf952460337caeec603c6f046e13f60bcc9428df61', 'page_number': 1}, score: 113.51941140142256)]",0.0,0.042886704206466675,0.0,0.0,0.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,"Italo-Dalmatian language family. This wide category includes: +The Sardinian language is considered to be its own Romance language family, separate not only from standard Italian but also the wider Italo-Dalmatian family, and it includes the Campidanese Sardinian and Logudorese Sardinian variants. However, Gallurese, Sassarese, and Corsican are also spoken in Sardinia","[Document(id=7e4fa174f7215176b7089831728fb3591837413591a1e7ed9f2d22662a832dc5, content: ' A regional Italian dialect is generally standard Italian that has been heavily influenced or mixed ...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 174.88098487849155)]",1.0,0.07159404456615448,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,"anti-aircraft guns, and explosives. Other targets would be considered if the primary ones could not be attacked because of weather conditions. +A further line in the directive stressed the need to inflict the heaviest losses possible, but also to intensify the air war","[Document(id=6d4509eb1003e4fc992f26a598e45ce12e5fb93908416158debcc49779863903, content: ' The indifference displayed by OKL to Directive 23 was perhaps best demonstrated in operational dire...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 88.19963965663412)]",0.0,0.155845507979393,0.0,0.0,0.0 +What is that power that enables us to love?,Prevenient grace,Long-term relationships,"[Document(id=ea267afb4d8dad8c8255ebba8353c34df146b7c6af273678dee26c388f0bebaa, content: ' caregiving, appropriate attachment), although these skills are not strongly developed until late ad...', meta: {'name': 'Adolescence', 'source_id': '4687b887b5783807c17e352eca017193687de7d3428929359d466eaed34ebbe4', 'page_number': 1}, score: 107.35020570859797)]",0.0,0.007295046001672745,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,He Jin,"[Document(id=6b20ebc1df3c4a4239954b87829f542f45ce58307a74b92a5c1a806a2183d971, content: ' When the eunuchs discovered this, however, they had her brother He Miao (何苗) rescind the order. The...', meta: {'name': 'Han_dynasty', 'source_id': '060d95b91f9c20e5c8cd8b8a032ab44aac38ab88ce62d3c302bea0865f533568', 'page_number': 1}, score: 167.52952159485534)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,sharia law,"[Document(id=1b2e436fd42f2c0f0d0cc510814e434b693f9c004a137710dd97898a0cc1af7f, content: ' Gaddafi proclaimed that the People's Congresses provided for Libya's every political need, renderin...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 208.43309463032864)]",1.0,0.6846115589141846,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",King Edward VIII,"[Document(id=5d0ec73e68030cca16b151a58c415ccbb15a4d620c4c2e2e7d1d02aa31e7cfc7, content: ' +Elizabeth and Philip were married on 20 November 1947 at Westminster Abbey. They received 2500 wedd...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 225.84273184415082)]",1.0,-0.006807837635278702,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,The British wanted Berlin,"[Document(id=6c0f42d1676a68f1b531e9e5b1359d998c737c45c3b08b7486a9ab7ac5c47827, content: ' He had serious disagreements with Churchill and Montgomery over questions of strategy, but these ra...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 151.3026455113039)]",0.0,0.1772448569536209,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,520,"[Document(id=33e9dfafdd354d53b8df9e52dedc7d21df6b1664a929a02fd5759ba272c2f879, content: 'Tucson (/ˈtuːsɒn/ /tuːˈsɒn/) is a city and the county seat of Pima County, Arizona, United States, a...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 204.02207210630115)]",1.0,0.1610102504491806,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,Darwin,"[Document(id=8a3d99861a789823d22fe5a70f29b2241456430bbdc6ef3cb4304a4d81769442, content: ' With the development of the modern evolutionary synthesis in the 1930s and 1940s, Darwin's concept ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 178.86740838544142)]",1.0,0.16302336752414703,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,dedicated backup files,"[Document(id=d62f58c8737fe86268de0ebf0cb0ef4eae1f9ba5f33735f7cadb2e83beb9e377, content: 'g., cases when the database is found corrupted due to a software error, or if it has been updated wi...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 93.70196948277217)]",1.0,0.32296425104141235,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Quran,"[Document(id=4929afcbf8a25329cc28344a59a6da0673a0b29592a46e295b09e09e3666b249, content: ' Avicenna's views on Islamic theology (and philosophy) were enormously influential, forming part of ...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 213.08857882277886)]",1.0,0.29319992661476135,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya seized Tenasserim and Lan Na,"[Document(id=ca6db29fb81cf6a69632107c67ace9bdd1fdd8f61142978a0684c6cd6f540122, content: ' Ayutthaya seized Tenasserim and Lan Na, and Portuguese mercenaries established Portuguese rule at T...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 154.99059803938104)]",1.0,0.2441307157278061,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,"Ænglisc, Anglisc, Englisc","[Document(id=169a5194d5b8e4398d0437961bfa539bb8d8fa94fb38d4cba89afa5e1b31232c, content: 'Old English (Ænglisc, Anglisc, Englisc) or Anglo-Saxon is the earliest historical form of the Englis...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 167.03188545150329)]",1.0,0.10017972439527512,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=f1fa43fc9c1c5bd46b9f5455701bf99b0e98d4579816d755389bcc2dbba1bcda, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 127.2107380542836)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,The energy from the sun,"[Document(id=6873b580ad3f15a0882d302e702bc9af713e66089c5a7f2aa10feabe24a0ebed, content: ' The energy from the sun could play a key role in de-carbonizing the global economy alongside improv...', meta: {'name': 'Solar_energy', 'source_id': '4b26f9c886129869a3884289e417d88e1bc8a531535e048a63701a7b1784a8c1', 'page_number': 1}, score: 110.16765479693024)]",0.0,0.048814255744218826,0.0,0.0,0.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=e8da082f5022b804801ddb2778f16c32226f0ecb59f3ed7f8fcdae17653f162c, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 184.832858928507)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,The Cathar Crusade,"[Document(id=08215551b881c7cefad5bb5baaaddc4d6f9a5802a709d61e32985651e2115c85, content: ' By the 11th century, more organised groups such as the Patarini, the Dulcinians, the Waldensians an...', meta: {'name': 'Heresy', 'source_id': '8ba473262f8508a9c99699bb09cd022993896ece1fed1463a9cf4bd7eb0c3e68', 'page_number': 1}, score: 159.6433571883927)]",1.0,1.0,1.0,1.0,1.0 +What music service is a huge competitor for Tidal?,Spotify,Aspiro,"[Document(id=ef9c78ac21a913a9aad74111f50f9247dd1e578b598b35e509b5dfb85cdd75a6, content: ' Contemporary critics have noted the consistent minimalistic aesthetic exhibited throughout DONDA cr...', meta: {'name': 'Kanye_West', 'source_id': '4693bb2ae0f5b7c5ed3788607bba35813f42eada13da446f4c397c0715d2dc91', 'page_number': 1}, score: 153.4513427100565)]",1.0,0.00771007314324379,1.0,1.0,1.0 +How much was each copper token worth?,a halfpenny,420 tonnes,"[Document(id=687916890077183f6cd89496d12592eec3d76bd82f46a80bfac0591dba1c0c1c, content: ' The largest mass of elemental copper discovered weighed 420 tonnes and was found in 1857 on the Kew...', meta: {'name': 'Copper', 'source_id': 'de5c60a665b93f9b358be412d53a37ca43d90005e3ec9a553ac38a38f076fadc', 'page_number': 1}, score: 132.34045561810535)]",0.0,-0.06792526692152023,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,"Sewer, rail, docklands, and electric installations were damaged.","[Document(id=c2a1dab12b313626459f38712aea7c56f2c359477289ae6301106d90c43faa1b, content: ' +In the north, substantial efforts were made against Newcastle-upon-Tyne and Sunderland, which were ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 157.01749548291156)]",0.0,0.01816275343298912,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=22e5680a1c41aa000971ff89fccd80f808c8f88ae7d0f5c0d81eb763ce026955, content: ' Unlike Archaeopteryx and the non-avialan feathered dinosaurs, who primarily ate meat, recent studie...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 200.4427546556809)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,the modern evolutionary synthesis,"[Document(id=54fb8e44f718e454d5f4aef7097036553aa91b829fc2ea08644a1f9fc17e50f5, content: ' +The theories developed in the 1930s and 1940s to integrate molecular genetics with Darwinian evolut...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 186.4803046658906)]",1.0,1.0,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,eight o'clock,"[Document(id=5dfd5078daafbc2ef3695636e6d70b42db2abc79ac389c20eac0417969714c11, content: ' Tesla would telephone his dinner order to the headwaiter, who also could be the only one to serve h...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 200.28502177347957)]",1.0,0.6441541910171509,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1444,"[Document(id=18dbb6f8a4452ee88a36944ed7b5a77c2233677f5c2c84c6d98113a4c1519651, content: ' After the conclusion of the war, France was again free to pursue its desire to reach the Rhine and ...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 180.92305663796213)]",1.0,0.9999999403953552,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,censors,"[Document(id=3749ef6e14ea9af3893cfd84917e3a39acc2212480fc846e5a9231414a5599e6, content: ' Praetors administered civil law and commanded provincial armies. Every five years, two censors were...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 160.88829017583132)]",1.0,0.5763707756996155,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=80f5e2e80143434aa6d2d78a16f411f7f6ab4f8ffffbd29914ad0c75e8bf31da, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 120.63367823315784)]",1.0,0.9999999403953552,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Angelo Giuseppe Roncalli,"[Document(id=d025eebf35da454623a00c5d71d009d27de02dc1bdc601d3e2b18ee0f2c3a436, content: 'Pope Saint John XXIII (Latin: Ioannes XXIII; Italian: Giovanni XXIII) born Angelo Giuseppe Roncalli,...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 202.56309147609278)]",0.0,0.21420668065547943,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c983e8f9fccac478cd7d895c53ca7636f4291c3a87693dd68246b03e75a4be42, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 151.58417156835065)]",1.0,0.9777635335922241,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",The creation of heavily armoured cataphract-type soldiers as cavalry was an important feature of the 5th-century Roman military. The various invading tribes had differing emphasis on types of soldiers,"[Document(id=176bfe2f82167e6c0052939480075e0192ca105862b81cbb918d59de01773b8c, content: ' +During the later Roman Empire, the principal military developments were attempts to create an effec...', meta: {'name': 'Middle_Ages', 'source_id': 'f13b345e20c54b6cc55363080d7492474df62715717172aa9f1029f286010ad9', 'page_number': 1}, score: 155.40354636325165)]",0.0,0.16384783387184143,0.0,0.0,0.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,King Saud,"[Document(id=5d0ec0248e1b31849b4fcf2b6de857e2ffd286b834f600afc95bf4d1111c7ae6, content: '"" Nasser ordered a crackdown against Syrian communists, dismissing many of them from their governmen...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 224.98326107643072)]",1.0,0.5339999794960022,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,Officer's Club,"[Document(id=62e6ed804eee1f4f43c5f9b715fe2cd7a4d80fc795bab6b82fe1ec962f592d64, content: ' Nasser recalled that her wails ""haunted"" him and firmly dissuaded him from undertaking similar acti...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 134.38736074532318)]",0.0,0.01744076795876026,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=f6a12178d43defb2a103deca0f4fb166a5e99e5a8e58be48c76be81bd5d2bd38, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 122.22996323290984)]",1.0,0.4726710319519043,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Moncks Corner,"[Document(id=829bc238e3aa32b3a2d1d1087c36e846f8e99eed9690beff8a876f9cd3c3b0a2, content: ' North Charleston is the second-largest city in the Charleston-North Charleston-Summerville Metropol...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 160.82842405678284)]",1.0,0.18905964493751526,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1523,"[Document(id=af521f4462f7c624e3d32905b63bfb54303d37eecbb7fa381cb828b7ead3a42a, content: ' The Gallicans briefly achieved independence for the French church, on the principle that the religi...', meta: {'name': 'Huguenot', 'source_id': '8c6e1ef6132295dfe82bead80d85963191a790434a85ac498376747f1b1241c7', 'page_number': 1}, score: 136.6167675359813)]",0.0,0.4937823414802551,0.0,0.0,0.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,"teleforce"" weapon","[Document(id=60f1f69a282d73eabc8ac6d80df34e28a168bcd0259aa89f6195a0e4b5d2af7e, content: ' In early 1938, Tesla was able to get up. He at once resumed the pigeon-feeding walks on a much more...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 160.49288449971363)]",1.0,0.07724405825138092,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,nomocracy,"[Document(id=622d12e3bb680eab92dad10a2eb5ac159bb02ac6bc5c72a67279515f6315943f, content: ' Government based upon the rule of law is called nomocracy. +In the West, the ancient Greeks initiall...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 111.87709802586735)]",0.0,0.2944614589214325,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,The Eagle and Child and the Lamb and Flag,"[Document(id=a3dbac4bc0b16e2ff3bf9f3c4792a63262476d7373f810881dff5acffbedd8f3, content: ' +The Angel, Islington was formerly a coaching inn, the first on the route northwards out of London, ...', meta: {'name': 'Pub', 'source_id': '502d7ebebad9932fd886ae14992fa24a60ee4433f6dc693672b70b3b21ed1232', 'page_number': 1}, score: 97.75501653939075)]",0.0,0.04613007232546806,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,nuclear energy,"[Document(id=66b5aeeb67af8735040a1914ded019de3bfde1eb8bba052201cc26831535797b, content: ' It may also be convenient to distinguish gravitational energy, thermal energy, several types of nuc...', meta: {'name': 'Energy', 'source_id': '2acb86f5610282e79520ecdff8b7d4d9d9c6d7c8d58ab1b6353ccedefcbcae7a', 'page_number': 1}, score: 109.4759290816955)]",0.0,0.03675124794244766,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war","Britain had been surprised by the sudden Prussian offensive but now began shipping supplies and ₤670,000","[Document(id=cddea2faff19cdfc25553256fc0dd044482534aa314d6a2ff382e22824f27497, content: ' The attack on neutral Saxony caused outrage across Europe and led to the strengthening of the anti-...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 187.73039000319807)]",1.0,0.29078197479248047,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,"13 percent of the company's total intake by 1998. Three years later, Dell passed Compaq as the top provider of Intel-based servers, with 31","[Document(id=08a3d22e85cf463253832f09aece7cb6f9124094f57b475f1813e553c0fa4c0f, content: ' Consequently, Dell's enterprise revenues, almost nonexistent in 1994, accounted for 13 percent of t...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 157.4319576783758)]",1.0,0.1973249465227127,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,the Jewish underground,"[Document(id=1d72519a69a78ccb8cb4d26f487ecf66397a11809b83ed36b917f9922d18c76a, content: ' Unaffected, he wrote to his sisters Ancilla and Maria and told them both that he was fine. +On 30 No...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 151.97694599005956)]",0.0,0.09238065034151077,0.0,0.0,0.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,Institute of International Education,"[Document(id=70acc5e644128c0a06947ca2ec4b38512aa385f2039a0aa99910886c2769b361, content: ' In 2001, the Institute of International Education ranked BYU as the number one university in the U....', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 217.01691352942052)]",1.0,0.4156174957752228,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=694b7ad0ec30f16563c095bfd8aa73a286298746fa8b5b6b568961e806fe7462, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 200.7654419813894)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Beroea,"[Document(id=b0db6a298369be019a3d01ef5bfbe30dd534f6a2be471b538c76854bccadccaa, content: '."" (Pope Gregory I, would have full access to the Vatican Archives). +Constantius, ordered Liberius i...', meta: {'name': 'Athanasius_of_Alexandria', 'source_id': '2a8e7ec0a44075f1a567744982dcecf4bcea19f0065d4f73c742398a05fa1642', 'page_number': 1}, score: 154.05413994317473)]",0.0,0.1712186634540558,0.0,0.0,0.0 +In what decade was disco popular?,1970s,90's,"[Document(id=cf86cea5ed69b1cddee9e7bfd4d861b5f270fd7c89048980e1e45da52aa037d0, content: ' Techno and rave was happening in the 90's and had a lot of different forms. There was very experime...', meta: {'name': 'Madonna_(entertainer)', 'source_id': '0868d847bb768c30a7507239169d5e441bb3181b66c8b9353a863daa3d58e425', 'page_number': 1}, score: 179.8445269758427)]",0.0,0.5847175121307373,0.0,0.0,0.0 +What do the rivers form?,lakes,lakes,"[Document(id=d9c66874c7c0a9fe45438a723827763a95eacdbe604f0f1e81a1a311692bb3a4, content: ' The Rhine originates in a 30 square kilometre area in Switzerland and represents almost 60 percent ...', meta: {'name': 'Alps', 'source_id': 'd2eb3ad3af7d6110ae691bf40660ff7a79e4e9dbfd25785b4e55c7700bfcae79', 'page_number': 1}, score: 136.31986270459694)]",1.0,1.0,1.0,1.0,1.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,the English alphabet,"[Document(id=79f26e42dd7702142ed7d1e92a23812856512d31177acccc13a6b2634280e3f3, content: 'Originally based on the English alphabet, ASCII encodes 128 specified characters into seven-bit inte...', meta: {'name': 'ASCII', 'source_id': '4bd7e27c39431d5c4712c32c534ed40f7df89f7b80f6eccb22975178fb7b1be2', 'page_number': 1}, score: 154.38856966441136)]",0.0,0.02527976967394352,0.0,0.0,0.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,eight,"[Document(id=540573be22203cfcd80a8767ca45244594a8a0f7965c6724d1800a112dc82391, content: ' It has become a recognized springboard for launching the career of many artists as bona fide stars....', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 136.98690851760873)]",0.0,0.7335977554321289,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,30,"[Document(id=87a93f72cbb9c00ea6477b2a31d21c284a2ecf07733f93b0fb4ba7b277f5b8f5, content: ' It is within the power of both the union government and the state governments to govern schools sin...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 129.9025361296588)]",1.0,1.0,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=99743eb7e7277e11f539b5449857c5f012a842facd618f6a75ff178a89fc1f76, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 155.07687203117038)]",1.0,1.0,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,solo public appearance on a visit to the Grenadier Guards,"[Document(id=331c1fa6114188993824feeab60399c92e5d44233f4ea3a0e3a47f363d1ca459, content: ' The book describes Elizabeth's love of horses and dogs, her orderliness, and her attitude of respon...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 143.80750919536004)]",1.0,-0.02931702323257923,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=da71570909e9fb8c035368ed15e2aa75e525f67f8967bb44ee0441867cd548fe, content: ' The French Revolution and political and social change in France had a profound influence on art in ...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 183.14691808537043)]",1.0,1.0,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,1530,"[Document(id=75d10fd326eaa70212e1a7df2aee9fdf7245da6f9056af21f32aa2e4063ce7d3, content: ' Albert established himself as the first duke of the Duchy of Prussia and a vassal of the Polish cro...', meta: {'name': 'East_Prussia', 'source_id': '8739ef146d6d2f6b5a8d73f47b49b6914f68ff7c7dc1364b157b041ba7893321', 'page_number': 1}, score: 155.19674363646152)]",0.0,0.2953075170516968,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,large quantities of data or a large number of users,"[Document(id=4753cc282d43999b382172589cceb076cee5e8185383911435f640a568593cbe, content: ' +Load testing is primarily concerned with testing that the system can continue to operate under a sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 154.96837900503925)]",1.0,0.8635382056236267,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=5f8dc7264853d3b0b88a03420e7878e16fa4c08ef70ab7cc5d722e889b21dda6, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 119.60475215544247)]",1.0,1.0,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=87ffdf42e04cd4e5aabc33e35caf9614ba61758e355a1d97a38a524ba0bedf46, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 259.32749430318006)]",1.0,0.9999999403953552,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1854,"[Document(id=ae931e2fa116a2f50e732849bda9db89ea2500925baddc8026e2033f9f7f334c, content: ':175–176 In July 1854, the Turks under Omar Pasha crossed the Danube into Wallachia and on 7 July 18...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 166.47104419579009)]",0.0,0.6653626561164856,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,Private and personal worship,"[Document(id=8cb202622013f5234d79ec3a4275501aaa91b4fdefe32dbbccfd335829f7ffa1, content: '[citation needed] +Roman religious beliefs date back to the founding of Rome, around 800 BC. However,...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 224.81247000090372)]",0.0,0.4898977279663086,0.0,0.0,0.0 +What is one example of failure testing?,Software fault injection,lack of backward compatibility,"[Document(id=14252d20e848a8ca81ceff20f7277b85158f25de7f602c12fb90fff97ab2033b, content: ' +A common cause of software failure (real or perceived) is a lack of its compatibility with other ap...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 130.46787137338387)]",1.0,0.1547165811061859,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,"any eligible club down to Levels 10 of the English football league system - all 92 professional clubs in the Premier League and Football League (Levels 1 to 4), and several hundred ""non-league"" teams in Steps 1 to 6 of the National League System (Levels 5 to 10). A record 763 clubs competed in 2011–12. The tournament consists of 12 randomly drawn rounds followed by the semi-finals and the final. Entrants are not seeded","[Document(id=19a66851b1eaeb826f76bf58a77bd1f5d1f72ecb286783dd621092b41bfe8bef, content: 'The competition is open to any eligible club down to Levels 10 of the English football league system...', meta: {'name': 'FA_Cup', 'source_id': '0356ce80ab6fe66ba7e3d7bfca04c698120e3113a2cd4f8423f9a7f47f2a50b3', 'page_number': 1}, score: 106.19905139433514)]",0.0,0.1330350786447525,0.0,0.0,0.0 +In which year was the new constitution ratified?,1855,1957,"[Document(id=53c8c2a994988656cf26a8f84b4f33b2c0a40abc2d66236b1557cf06bda2e138, content: ' The NU would select a nominee for the presidential election whose name would be provided for public...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 139.42901542493598)]",0.0,0.6997417211532593,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,"Japanese and Allied forces occupying various parts of the island, over the following six months both sides poured resources into an escalating battle of attrition on land, at sea, and in the sky.","[Document(id=5dc79160b5d260520630c9160b6382e28b30dc8f883ffb4682413bf39ea97f1f, content: ' From July 1942, a few Australian reserve battalions, many of them very young and untrained, fought ...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 170.85364050914495)]",0.0,0.04725157096982002,0.0,0.0,0.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=1eb83a50e062eec7af6abab3de1bf0b5576a4c75ac168cc64f1b77a2e268c579, content: '7 km2) Yale-Myers Forest in Connecticut's Quiet Corner—and nature preserves including Horse Island. +...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 191.53811571133681)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,expressed sequence tags,"[Document(id=c181af6d9e4133e2db3dd3214c28363e87a8f250668a4fc4d9c06fd0060f35eb, content: ' Conversely, plants can have extremely large genomes, with rice containing >46,000 protein-coding ge...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 114.01989222941418)]",1.0,0.13049259781837463,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1980,"[Document(id=6fa0d2cdcb0d00898debed351e32ab41dc4678320f30c21f07a92e9289207601, content: ' Military spending increased, while other administrative budgets were cut back. Libya had long suppo...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 214.09821488402224)]",1.0,0.4845913052558899,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,"Stoicism and theory of justice of the Roman world, as well emphasis on the role of the state in applying mercy as a moral example. Augustine also preached that one was not a member of his or her city, but was either a citizen of the City of God (Civitas Dei) or the City of Man","[Document(id=046bb2211da01072ac955e6087ba2d6dcb1192ef23b7ba62df9e4298d857f576, content: ' +The early Christian philosophy of Augustine of Hippo was heavily influenced by Plato. A key change ...', meta: {'name': 'Political_philosophy', 'source_id': 'b6ae8bbff967363b12ca505926b5bff22989ed948eaa878bbf82df20c179d98b', 'page_number': 1}, score: 196.8183596417783)]",0.0,0.21538060903549194,0.0,0.0,0.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,"8,448 undergraduates, 2,138 graduate and professional and 1,593","[Document(id=1fa5bbb9afa8ebfbb9575bd6b27204acdf8f5bcb61e4e7771f62bb0b32428d62, content: ' +In 2014 the Notre Dame student body consisted of 12,179 students, with 8,448 undergraduates, 2,138 ...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 187.92811511577906)]",1.0,0.28410908579826355,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,20 October,"[Document(id=7f6d0a9e9a3e49df55737ef78eeafa784e1bc879c484b5e1795fb994301c9ebc, content: ' Kurita's force had begun the battle with five battleships; when he returned to Japan, only Yamato w...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 174.6041177508166)]",1.0,0.03874627500772476,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,basketball,"[Document(id=12d9fa7ff9f514e66e336f50a70a372c3162770635ba732cc4335141fd65ee50, content: 'O.K, Maroussi) have won 3 Triple Crowns, 5 Saporta Cups, 2 Korać Cups and 1 FIBA Europe Champions Cu...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 183.37264468656736)]",1.0,0.03532267361879349,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,unaspiration,"[Document(id=35b7bfb48aac6571131a3b68834170a8fb3ad46829fae2cb23ab240eb4ef67cb, content: ' +Unaspirated or tenuis consonants are occasionally marked with the modifier letter for unaspiration ...', meta: {'name': 'Aspirated_consonant', 'source_id': 'ded60872a72621c0cc2ee33d9a7b0d60f1474075e260da958b752dbe72e00321', 'page_number': 1}, score: 111.41365660634347)]",0.0,0.09681069105863571,0.0,0.0,0.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass,"[Document(id=fdcc0196b4b8a411c852d4317f4371bd9d7338e17b11d3bebb1bcb7848dd8ddf, content: ' The combination of the undersized DC-blocking capacitors and the typical low-impedance of most cons...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 145.05192227613009)]",1.0,0.6248049139976501,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,quantum mechanical,"[Document(id=e248ff75cbbe44aa00589ce96d2d6a12e8311dee527524077686bbeb3cd47e37, content: ' However, the electromagnetic force attracts electrons and protons to one another, while planets and...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 118.46250890702927)]",1.0,0.09526881575584412,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,Children are being sold as conscripts,"[Document(id=0088f05f01a71535c2cb1e5b41db4930183fdb107aa5d9b1b2402b982ed6e8a4, content: '"" In July 2013, the Assistance Association for Political Prisoners indicated that there were approxi...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 154.09655586407015)]",1.0,0.30359911918640137,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=1bdd87144e8c05c0c8820e3bd6186deb63a4be13d5810e39af795993e4721bb7, content: ' The book was twice adapted for the cinema – the 1997 British film focuses on Arsenal's 1988–89 titl...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 87.2309971301853)]",1.0,1.0,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,The 1850s and 1860s,"[Document(id=5065b911750db5485f2cb49749f8e95f270f3e0d705842ca5e8f26d61da4d4ce, content: ' The city centre, Docklands, St. Kilda Road and Southbank areas feature high-density forms. +With the...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 207.90527358822658)]",1.0,0.6798640489578247,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:10.csv new file mode 100644 index 0000000..4630bac --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:10.csv @@ -0,0 +1,128 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=ac8e9c286327aea24d00bceb002b08ad3fbc3007673c77da9b48b6b6b91aa76f, content: ' Some rites of the domus (household) were held in public places but were legally defined as privata ...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 185.7836198788653)]",1.0,0.9999999403953552,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,January 2010,"[Document(id=5987439923479496ef29d3e07778d456397d370fee0aa96398bfcb29d803683b, content: ' On January 8, 2009, Dell announced that it would move all Dell manufacturing in Limerick to Dell's ...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 92.3397989607895)]",0.0,0.3453691005706787,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Tora Bora,"[Document(id=edf1d880f9644a86ef60cf78769e6b01b8387a648936eb886b2a89ea73f13424, content: ' On 7 October 2001, the official invasion began with British and U.S. forces conducting airstrike ca...', meta: {'name': 'War_on_Terror', 'source_id': 'b9ac47a39d382362992253f0ccfa0072ea6216fe1daa748c4899980b467292f4', 'page_number': 1}, score: 170.61233506154028)]",0.0,0.154091477394104,0.0,0.0,0.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,"Ænglisc, Anglisc, Englisc","[Document(id=d806131552e88e82e2690be85e2fbf31773e47614f5d7a8efdc4ab729427ca2b, content: 'Old English (Ænglisc, Anglisc, Englisc) or Anglo-Saxon is the earliest historical form of the Englis...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 109.78878002154552)]",1.0,0.1304444968700409,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,"low-church"" traditions of Protestantism","[Document(id=70acdc5f12009efe2ebd2dd786809c97eacf0dc441dc5d0ed86857bd7db13270, content: ' Also, with the advent of the internet, many teachers are now selling their lesson plans to other te...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 111.66204991140583)]",1.0,0.12859657406806946,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,"Panathinaikos, Olympiacos, Aris Thessaloniki, AEK Athens, P.A.O.K, Maroussi","[Document(id=76be05dc0de2dfbcf6d3d16e80abbfa4ee57f55d30696681b941ecd6ff700822, content: 'O.K. Greek basketball teams are the most successful in European basketball the last 25 years, having...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 174.66555941263255)]",1.0,0.3749234676361084,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=17489faf6fb39343de67cc2bfe448e959559775109bb646aa0e4d02b716ddf77, content: ' Jennifer Lopez and Steven Tyler joined the judging panel in season ten, but both left after two sea...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 107.71739626930493)]",1.0,0.7108100056648254,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=1c8a379dc092fa8d374927640e6d16f46286416581451c66a1da80fef1412acc, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 129.0274963398839)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=1eed45d4c946ec058384afdd313db9b847d2b384629f1872466e03c7bc13a2e5, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 173.3412040228327)]",1.0,1.0,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,two urban districts,"[Document(id=b99c33e3f1152b05cb402034099d36d6b8fc3942c6deb97d0de9d7f47d56c738, content: ' The equivalent of the Minister-President is the Senatspräsident (President of the Senate) in Bremen...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 166.68434734067063)]",1.0,0.12825782597064972,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,"Lipopolysaccharides, also called endotoxins, are composed of polysaccharides and lipid A that is responsible for much of the toxicity of gram-negative bacteria. Most bacteria have the gram-negative cell wall, and only the Firmicutes and Actinobacteria have the alternative gram-positive arrangement. These two groups were previously known as the low G+C and high G+C Gram-positive bacteria, respectively. These differences in structure can produce differences in antibiotic susceptibility; for instance, vancomycin can kill only gram-positive bacteria and is ineffective against gram-negative pathogens, such as Haemophilus influenzae or Pseudomonas aeruginosa. If the bacterial cell wall is entirely removed, it is called a protoplast, whereas if it is partially removed, it is called a spheroplast. β-Lactam antibiotics, such as penicillin, inhibit the formation of peptidoglycan cross-links in the bacterial cell wall. The enzyme lysozyme, found in human tears, also digests the cell wall of bacteria and is the body's main defense against eye infections. +Acid-fast bacteria, such as Mycobacteria, are resistant to decolorization by acids during staining procedures","[Document(id=a93c46780eb38a9b8195740a871eddb9a85c482bf7d89e72d9247c60999aed47, content: ' Lipopolysaccharides, also called endotoxins, are composed of polysaccharides and lipid A that is re...', meta: {'name': 'Bacteria', 'source_id': 'f8e9d365bfe51780e8f42f3a07cb69dd7a6df46bc7c67030d0853bdb13eeb390', 'page_number': 1}, score: 67.86823728702234)]",0.0,0.16699571907520294,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,U.S. Supreme Court,"[Document(id=0873fa8d65c3e4c79ab76cb82a1993eaa21b936e8eec702597e4a3f9bfd65150, content: ' The UNC-Chapel Hill lawsuit alleges discrimination against white and Asian students, while the Harv...', meta: {'name': 'Affirmative_action_in_the_United_States', 'source_id': 'b7156a1660bb591e3abb4460028a7922c063ba8962ad6dce11678f8b08c5cdcb', 'page_number': 1}, score: 101.02615234422868)]",0.0,0.22163008153438568,0.0,0.0,0.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,rank amateur,"[Document(id=a022efa8ed739794e05008dbe44d9b83a87fc4f4b7a609ec096b9d437bc5089e, content: ' For Whitehead, education should be the exact opposite of the multidisciplinary, value-free school m...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 210.30818292133372)]",1.0,1.0,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,treason,"[Document(id=2954716cf85dfc3cbc22e1e1bbc015e2ddc7abd27427ced8fbf5d44180b2afe2, content: '.."" The Deuteronomic social vision may have influenced opponents of the divine right of kings, inclu...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 75.94772194895006)]",0.0,0.054149776697158813,0.0,0.0,0.0 +Which region of the US experiences rain year round?,Pacific Northwest,Southwest,"[Document(id=ef3b24124d6963ab10e31070a01eb903bae0b374f75f9a00a0da61890b7d8c80, content: ', tornadoes are more common than anywhere else on Earth and touch down most commonly in the spring a...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 182.51279558274814)]",1.0,0.6455833315849304,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Antigen specificity,"[Document(id=a4a5008fab761323a2771c50538a7e75c9cf4ed6426bbef3a6ae08b7edd123e4, content: ' The adaptive immune response is antigen-specific and requires the recognition of specific ""non-self...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 159.91248406200884)]",1.0,0.2857634425163269,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=bde5f6c2b4913151f9e46d6578f4f20345ade116f2e96b5ae76ef509e1e517dd, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 164.45063393002806)]",1.0,1.0,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,3.36,"[Document(id=8190e2f3cfba3c67fc319c1eba30ec986d8174be2decff7ed0bb12a364fb52bb, content: ' +The College's endowment is sub-divided into three distinct portfolios: (i) Unitised Scheme – a unit...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 140.20154888024658)]",1.0,-0.009853528812527657,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1955,"[Document(id=0d88cb30320f0b038a71e1d9aa43a891187ded88e8298726341208761ff3f32a, content: ' Congress agreed to his request in 1955 for the Formosa Resolution, which obliged the U.S. to milita...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 185.40736255863564)]",1.0,0.6117545962333679,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,"2,300 km2","[Document(id=b1a257cabbb0177c72b7e21764d67b5fde052b256f5fa1b3a533adabf91c13c0, content: ' Overall, the city is relatively flat, and the lowest point is 35 m (115 ft) above sea level. Paris ...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 183.58814741162405)]",1.0,0.39352455735206604,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,lower level of economic growth,"[Document(id=fde899a9d8a603c5a1a9d80bde11c5e7902d2c542c8d9a1485111d5c6b381bbd, content: ' This process, known to economists as rent-seeking, brings income not from creation of wealth but fr...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 89.0235141824955)]",0.0,0.05020417273044586,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=56ff4bdb4f543c61c1f516ba1e544e83e117ab5512bae58b2264fa6b6d86f26b, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 173.34560648176648)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=5c5f6a0591170904a56f4c0417e667089761d8d8f013f55283aff164fdadd75b, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 116.19394976028495)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,Veneti,"[Document(id=4236cbbf71506fe35f0da033ea9b465edf3cfbfdd8e6bac2b259b18835cc0408, content: ' +The relationship between the Slavs and a tribe called the Veneti east of the River Vistula in the R...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 210.39712772144958)]",1.0,0.36141833662986755,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.",when each pivoting divides the list in half,"[Document(id=e8683d0ee8010f97dee653f8386c791c980c6a9d74b90ebf2539aa0cd57f763c, content: ' If we assume that all possible permutations of the input list are equally likely, the average time ...', meta: {'name': 'Computational_complexity_theory', 'source_id': '3e0d0d92095b10b4cb42da481613cced2291ffa7365347553c276089ba6e764a', 'page_number': 1}, score: 83.67976359207012)]",0.0,0.11146857589483261,0.0,0.0,0.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,literate and upper class,"[Document(id=149c6390292392d4438c9191dca43bbe827c20a86bb7bfc502679813d7fcc108, content: ' Due to Eastern Lombard's status as a Gallo-Italic language, an Eastern Lombard speaker may, in fact...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 177.45530984259983)]",1.0,0.8130882978439331,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,to explode in the air,"[Document(id=4e21aed2e6d5a2193f1991660032d4445fa58d39e44d263ec7754bf0c98869ec, content: ' These batteries, particularly with guns, usually deploy in a small area, although batteries may be ...', meta: {'name': 'Anti-aircraft_warfare', 'source_id': 'c54639424f62ce369356ed8c4e1e9fab8769359dd28029aa89832f5aa994f45d', 'page_number': 1}, score: 81.70938919778587)]",1.0,0.06160792335867882,1.0,1.0,1.0 +What is that power that enables us to love?,Prevenient grace,Long-term relationships allow adolescents to gain the skills necessary for high-quality relationships later in life and develop feelings of self-worth,"[Document(id=9df0e87f01c1b2c8ac7969d77fe5940944b4885a75655d3b107fbb3e960b582b, content: '0 for males and 17.3 for females. The typical duration of relationships increases throughout the tee...', meta: {'name': 'Adolescence', 'source_id': '4687b887b5783807c17e352eca017193687de7d3428929359d466eaed34ebbe4', 'page_number': 1}, score: 99.769775039754)]",0.0,0.00461157038807869,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,He Jin,"[Document(id=1023d346fa09f68f024520eed11ceb8d146eebd929e4d93e19ec61ae32461e57, content: ' 189 AD), half-brother to Empress He (d. 189 AD), plotted with Yuan Shao (d. 202 AD) to overthrow th...', meta: {'name': 'Han_dynasty', 'source_id': '060d95b91f9c20e5c8cd8b8a032ab44aac38ab88ce62d3c302bea0865f533568', 'page_number': 1}, score: 149.4057899599862)]",1.0,1.0,1.0,1.0,1.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,introduced sharia,"[Document(id=e2a95241acaa96c9b3665ffd83614f229b121a721d01ae7e3a778b2ce1d5050d, content: ' Intent on pushing Libya towards ""Islamic socialism"", he introduced sharia as the basis for the lega...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 208.30154430383593)]",1.0,0.6521134972572327,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",Peter Townsend,"[Document(id=4b69cd784f30edcd0bcf343e88529300a3f5f46cab6549e05e32a9f6208375df, content: ' +Elizabeth and Philip were married on 20 November 1947 at Westminster Abbey. They received 2500 wedd...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 208.66892736307855)]",1.0,0.19164665043354034,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,sea communications,"[Document(id=2982b60b277d226d31c0a9fe90582fab11b0c6ff0f2e5a54d4e342474fbc3795, content: ' +In 1941, the Luftwaffe shifted strategy again. Erich Raeder—commander-in-chief of the Kriegsmarine—...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 149.98474112003294)]",0.0,-0.1473047137260437,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,10%,"[Document(id=e834bcf1ef63c7b72f9bf36d447e483da5003c1c77f033ef064e514b7082f587, content: ' To the south, N. Highland Avenue leads to the Barraza-Aviation Parkway bicycle path. +South Tucson i...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 197.57082115721136)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,William Paley,"[Document(id=ff5e838b8a7222ff21358b50a0b3729b6239d2d61bd80a57e62cd9d0ea6d30db, content: ' Lamarck thought there was an inherent progressive tendency driving organisms continuously towards g...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 168.91207509332548)]",1.0,0.23203226923942566,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,Database transactions,"[Document(id=5baf5b500fc870a57b1f1a3f45f13e9b554e1d91359189ce3677cff042e60b2d, content: ' Data security prevents unauthorized users from viewing or updating the database. Using passwords, u...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 86.92723770233755)]",1.0,0.4896238446235657,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=cbebf9a70685ea0e72bcf8c9f1dbbe9b7163ee26279b633446edf34314600fb3, content: ' Janssens demonstrated that Avicenna was a Sunni Hanafi. However, the 14th cenutry Shia faqih Nurull...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 181.94319586501797)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=f08f17188f751088b2b76f2c1a69ffc86f340979f05ea84b4b201def163ec6f3, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 158.17672599242826)]",1.0,0.08274449408054352,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,the scholarly and diplomatic lingua franca of Western Europe,"[Document(id=d806131552e88e82e2690be85e2fbf31773e47614f5d7a8efdc4ab729427ca2b, content: 'Old English (Ænglisc, Anglisc, Englisc) or Anglo-Saxon is the earliest historical form of the Englis...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 161.545426839566)]",1.0,-0.028287040069699287,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=c54ac19342651f97c105c4825d0b82cbf05684cf1d7c4921e0d078a7212b4a21, content: ' A common type of embezzlement is that of personal use of entrusted government resources; for exampl...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 113.39244671567084)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,Clean Edge,"[Document(id=639620812442614c676be21cf936d052650009771bc4d08df98e8056e4441e38, content: ' +New government spending, regulation, and policies helped the industry weather the 2009 economic cri...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 114.57287042595362)]",1.0,0.03996779024600983,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=de63707a025645c7fcc7a26e6dd37ddd29a27cd60ff187ce487e0d07217708ee, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 184.29502006117178)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,St. Bartholomew's Day Massacre,"[Document(id=ef77c93614a70f002eb708dc8d224c0fe860d179e7bc703f697f95e92761e545, content: ' The Gallicans briefly achieved independence for the French church, on the principle that the religi...', meta: {'name': 'Huguenot', 'source_id': '8c6e1ef6132295dfe82bead80d85963191a790434a85ac498376747f1b1241c7', 'page_number': 1}, score: 138.33324791635357)]",0.0,0.2595777213573456,0.0,0.0,0.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=f0c6a5cd5408a0fc01c220e5910c03a2241fbe85099410f518592c1fb7098441, content: ' ""Creating a partnership with Beyoncé, one of the most hard-working and talented people in the world...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 122.76790646829441)]",0.0,1.0000001192092896,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,"15,000 t","[Document(id=7350dde4c1db287bcdc1678d351d984bcefbfbb76ea61772d6ec37583745c8f4, content: ' Julius Caesar had his own coins made from brass, while Octavianus Augustus Caesar's coins were made...', meta: {'name': 'Copper', 'source_id': 'de5c60a665b93f9b358be412d53a37ca43d90005e3ec9a553ac38a38f076fadc', 'page_number': 1}, score: 127.28424664644346)]",0.0,0.1019832119345665,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,"1,436 people were killed and 1,792 seriously injured, which affected morale badly.","[Document(id=dc733dafa156c8cd33c8bfe0b3a7341d74398730f0f97dd80f1c0b07d6a07ef7, content: ' +In the north, substantial efforts were made against Newcastle-upon-Tyne and Sunderland, which were ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 160.31924929990572)]",0.0,0.06491856276988983,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Eomaia,"[Document(id=deb49612ed6c7dff5f1ffa19a9e6cc731756e662202cde46400d79672a69618a, content: ' In the early Triassic, all the medium to large land carnivore niches were taken over by archosaurs ...', meta: {'name': 'Mammal', 'source_id': 'ce12abe98969c8fca9c93088950b36e820ec47c4c63f339bc203185f79f0242d', 'page_number': 1}, score: 169.29409728513733)]",0.0,0.19462870061397552,0.0,0.0,0.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,Natural theology,"[Document(id=84267fbb9a5549e7b04e7c50127070dbac174697c575d4780cb3ee14b8679af7, content: ' It was thought that the rediscovery of Mendelian inheritance invalidated Darwin's views. +While some...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 168.31304934894283)]",0.0,0.3708055913448334,0.0,0.0,0.0 +At what time did Tesla get dinner?,8:10 p.m,8:10 p.m,"[Document(id=2ed21a855190ee9369cd4351878edc528d570a2136e3a2612158d6108a59491e, content: ' until 6:00 p.m. or later, with dinner from exactly 8:10 p.m., at Delmonico's restaurant and later t...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 230.03812342004315)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1789,"[Document(id=93996e15e538ef99cbc1f14e3a6eca7f547c782d02004cfe4d90863b09e3c9ba, content: ' When hostilities were concluded in 1648 with the Treaty of Westphalia, most of Alsace was recognize...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 192.92581302092802)]",1.0,0.4256799817085266,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,augurs,"[Document(id=a5e5aea2af0395511da534b5312305295571fb6c4dadbbf94b18f8c15af3add7, content: 'The priesthoods of public religion were held by members of the elite classes. There was no principle...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 160.02474930212503)]",0.0,0.12105397880077362,0.0,0.0,0.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=d16248de2a5e0d9e6bbb7e85b411bf0a4143206e20471ecab73dc7158ce992b1, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 123.16570921986224)]",1.0,0.9999999403953552,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,"Pius XI. He was sent a letter by Cardinal Eugenio Pacelli, and Roncalli later recalled that it was probably the last letter Pacelli sent until his election as Pope Pius XII","[Document(id=f60e9480f17445bd20fd691b4f23689feab6068d6b9b8032e2bc85a59563b9cf, content: ' Unaffected, he wrote to his sisters Ancilla and Maria and told them both that he was fine. +On 30 No...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 193.24640581111305)]",0.0,0.5919901132583618,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c3327e32d8658307f8dca684bbdeb22895ca7a8d7ee8ad6ff0eba6200c5e6da2, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 140.52734237020536)]",1.0,0.9777635335922241,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=3f0396346c47d3ea53931dd05016eb360d6261c1b5ab0c7a977a5bbf0df1be01, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 162.14933838277432)]",1.0,0.14734730124473572,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Prime Minister Hussein Sirri Pasha,"[Document(id=2742e06ecc0f89cd4249a010992ea21b1098edf0e33e8bcef6fb1a43012b8d68, content: ' It was here that Nasser and his closest comrades, including Sadat and Amer, first discussed their d...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 193.56057646704534)]",1.0,0.6850929260253906,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,Army Chief of Staff,"[Document(id=6efe91225999db5446906baa4a05c76ae7b2289802a7557fec46193f813b94ba, content: ' He then was posted as chief military aide to General MacArthur, Army Chief of Staff. In 1932, he pa...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 123.8434560431782)]",0.0,-0.007286764681339264,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=754901c14e07fd836481f4df6a9e1c790f259897d11a92d8f4449320bf432be1, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 123.02371619032975)]",1.0,0.47267118096351624,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston–North Charleston–Summerville Metropolitan Statistical Area,"[Document(id=620f512c3df1eb1788fcecbd5ec4c0438953e005020fab4303e121d55369ade5, content: 'Charleston is the oldest and second-largest city in the U.S. state of South Carolina, the county sea...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 162.28901843144436)]",1.0,0.20230621099472046,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1850,"[Document(id=1e3e69a84cc78a6aeb6d7a1e0750f22f9b9f8a20e99eb5fb853b1aee5e4a5464, content: ' In the year 2000, there were 5,171 Dominican friars in solemn vows, 917 student brothers, and 237 n...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 114.08279937842687)]",0.0,0.7635257244110107,0.0,0.0,0.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,give it to the world,"[Document(id=cd1bbe0a0f57ccb9f150760bab01a15e98f2aa46f2da4ccfd8b6ace2c7bcef40, content: '"" He stated that the theory was ""worked out in all details"" and that he hoped to soon give it to the...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 157.6234986896534)]",1.0,0.20373223721981049,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,Conservative Party,"[Document(id=2ed7ec47dfa8a6f2657c25409b254c42e400c9863ba44e8ab436132c050b8f18, content: ' As opposed to the instability of the earlier factions, which were often tied to a particular leader...', meta: {'name': 'Political_party', 'source_id': '476a9215de81b75a4f17737eed1679570973e55b5ed6175aa0ec1014b09d4632', 'page_number': 1}, score: 102.83304796718625)]",0.0,0.2992589473724365,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,The Eagle and Child and the Lamb and Flag,"[Document(id=463e7639a75455c71923ca7ab663fc14689389b615c5d0a79e6d3f4f8a3efa67, content: ' +The Angel, Islington was formerly a coaching inn, the first on the route northwards out of London, ...', meta: {'name': 'Pub', 'source_id': '502d7ebebad9932fd886ae14992fa24a60ee4433f6dc693672b70b3b21ed1232', 'page_number': 1}, score: 94.35735602218969)]",0.0,0.04613007232546806,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,memory,"[Document(id=e3b88b4f578ec5a002692c5a052496d37d6ee090a0491c5dc17087f489872445, content: ' When negative numbers are required, they are usually stored in two's complement notation. Other arr...', meta: {'name': 'Computer', 'source_id': 'a39560aa5caca299e2e8876ee9b291a51b896b645abcc4d5f2a790a0fa89fcef', 'page_number': 1}, score: 110.41973994552734)]",0.0,0.30840396881103516,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war","Britain also dispatched 9,000 troops to reinforce Ferdinand's Hanoverian army","[Document(id=60dd852bd4811252974d9b4fd57e9056d66a8aa3d37e5b165e9ecfbabcd5ff4e, content: ' Hadik, however, left the city with his Hussars and safely reached the Austrian lines. Subsequently,...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 169.66570089574043)]",1.0,0.2595359981060028,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=0a1d789e1e3ae04146d38c10ae30c76bb73173ec2df854d0b0695309c7e8629a, content: '” +On April 23, 2008, Dell announced the closure of one of its biggest Canadian call-centers in Kanat...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 154.56627095248245)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,Roman Curia,"[Document(id=fbef94a0288b5d083c93b4da5c36f178efacfd1ea24699f179b4aa34c77781b7, content: ' Several texts proposed for his approval had to be changed. But all texts were finally agreed upon. ...', meta: {'name': 'Pope_Paul_VI', 'source_id': 'fc82a40c990c0b3840b06b32643858d39984b66d78ae950561d809dc84d6edf8', 'page_number': 1}, score: 143.94454170340043)]",0.0,0.20437279343605042,0.0,0.0,0.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=0b3a599a54fac34c82f29d858c51bbf5ce4f37032b3539a76c2b8a6b918abc6e, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 231.42251702622502)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=45ad6989b7801812895d43ea694bad169af9b5dd9024dd96b099fe071a196217, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 208.54315140237432)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Milan,"[Document(id=f3ddc00ec9151a3a78139029d06124f9dd3a5c7d358c3b467fee97a09cc00605, content: '."" (Pope Gregory I, would have full access to the Vatican Archives). +Constantius, ordered Liberius i...', meta: {'name': 'Athanasius_of_Alexandria', 'source_id': '2a8e7ec0a44075f1a567744982dcecf4bcea19f0065d4f73c742398a05fa1642', 'page_number': 1}, score: 155.37445791726404)]",0.0,-0.021660923957824707,0.0,0.0,0.0 +In what decade was disco popular?,1970s,1980s,"[Document(id=40c02ba6acd7df36a81130944465adc587a46eaf90fc8f3232939968d3e04b8b, content: ' +In the late 1980s, many local Chicago house music artists suddenly found themselves presented with ...', meta: {'name': 'House_music', 'source_id': 'c556f4a459e8a99b6d2065bab8fd6a918d0a00f13291aad2473922492e4abf2e', 'page_number': 1}, score: 154.38767033374768)]",0.0,0.7754138708114624,0.0,0.0,0.0 +What do the rivers form?,lakes,lakes,"[Document(id=c751375299aac2090fe706008704ed47f7358e09ebad2acbc0d54e2c5b0ab5d3, content: ' Cities such as Milan depend on 80 percent of water from Alpine runoff. Water from the rivers is use...', meta: {'name': 'Alps', 'source_id': 'd2eb3ad3af7d6110ae691bf40660ff7a79e4e9dbfd25785b4e55c7700bfcae79', 'page_number': 1}, score: 122.65050850059238)]",1.0,1.0,1.0,1.0,1.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,ISO-8859-1,"[Document(id=1ca3328892ba24004a75d8cfaab0f58ce32bed8ee034be6fa54e33e8b0a62eb5, content: ' UTF-16 extends UCS-2, using one 16-bit unit for the characters that were representable in UCS-2 and...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 160.02748578769464)]",1.0,0.9999998807907104,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,sixth,"[Document(id=5567145967b8e3e9c915e0f901144470e7375757e88c1cee87ef724f63eeeb55, content: ' The album's first single, ""Frozen"", became Madonna's first single to debut at number one in the UK,...', meta: {'name': 'Madonna_(entertainer)', 'source_id': '0868d847bb768c30a7507239169d5e441bb3181b66c8b9353a863daa3d58e425', 'page_number': 1}, score: 104.24713931541928)]",0.0,0.5111092329025269,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,89.,"[Document(id=9172a42a065a9a9def97dc9c6923221ca37a1ddd8c4839b6799dfb8f0dd0495b, content: ' New Delhi has a literacy rate of 89.38% according to 2011 census, which is highest in Delhi. +Hindui...', meta: {'name': 'New_Delhi', 'source_id': '3b716631dedc36f770955e2ea4abca667e0b1db5ad4b63ad7a7e250e2bba1a5f', 'page_number': 1}, score: 106.38217760589907)]",0.0,0.28285151720046997,0.0,0.0,0.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=23b161a9804a8a11c97c4f3f14b6762b49fdf3b1fefb8ff1de76b00cfa00995a, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 159.1381537658833)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,United Nations,"[Document(id=36cca080196ef1b35130c0b7bcd6bb4216fa5de9889c7c13bfa049ba8b3661c7, content: ' Criticism was focused on the institution of the monarchy itself and the Queen's wider family rather...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 147.77558783372996)]",1.0,0.1381722390651703,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th century,"[Document(id=2ab8eb5978dfe802538939b80ae771d663adc411a3a6da5e18b0edcdba1d8f8f, content: ' Sculptors such as Girardon, Coysevox and Coustou acquired reputations as the finest artists in the ...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 170.6255699703063)]",1.0,0.8267526626586914,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,1155,"[Document(id=c7bdd00168040f6dde256d6ef2194d3f83ef5509c789f5d39e9215b8ddef1069, content: ' His successors continued to struggle against the papacy as well as the German nobility. A period of...', meta: {'name': 'Middle_Ages', 'source_id': 'f13b345e20c54b6cc55363080d7492474df62715717172aa9f1029f286010ad9', 'page_number': 1}, score: 135.05448547534107)]",1.0,0.4921835660934448,1.0,1.0,1.0 +What two methods can be used when using Load Testing?,data or a large number of users,load or endurance,"[Document(id=77ce7cf14e807c8808892e96984a50baad4f22bdca6b22034b2fb170f96240a2, content: ' Stability testing (often referred to as load or endurance testing) checks to see if the software ca...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 108.62422873080027)]",1.0,0.14578144252300262,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=bb0e180e009c490f3ae30fb55a9a91cbe512409c2e12ebae0203298a6f3efe78, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 117.52238522381246)]",1.0,1.0,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=cdfa1e6bea8fd42b00ce80468ba471689216a6195425c2435fb1141b0b8c7e6f, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 206.33038319807193)]",1.0,0.9999999403953552,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1917,"[Document(id=5cf39c40640013969626bfa9dcc17e86c891033c6881c53d41dfced842a316bd, content: ' +The Russian Revolution is the series of revolutions in Russia in 1917, which destroyed the Tsarist ...', meta: {'name': 'Modern_history', 'source_id': '59c9b273f6f85af9d336afaf5154a702230bef3726363ecbf5c8378f5d8798c7', 'page_number': 1}, score: 172.22838449876542)]",0.0,0.6207312941551208,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,"Private and personal worship was an important aspect of religious practices. In a sense, each household was a temple to the gods","[Document(id=d2af24870dc313fd20684bc694305efc093549248524947f6bd762d819334887, content: '[citation needed] +Roman religious beliefs date back to the founding of Rome, around 800 BC. However,...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 239.36564833446252)]",0.0,0.3531951308250427,0.0,0.0,0.0 +What is one example of failure testing?,Software fault injection,Software fault injection,"[Document(id=76ad3fb3d257a3812a169d749a5fb13f2e05383764e8f9dfc53895f08747b375, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 133.72966963796148)]",1.0,0.9999998807907104,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,25-man squad of players aged over 21,"[Document(id=729a1a7b3183917b1dc01acb07f1b632efc33f4eea6aeffbe484b1530c2d94f0, content: ' By 2009, under 40% of the players in the Premier League were English. +In response to concerns that ...', meta: {'name': 'Premier_League', 'source_id': '369baa9a5a854704f16fadd35144d3a3150cf1e9ae1671d9b50a3448547b814b', 'page_number': 1}, score: 105.43494496687926)]",0.0,0.2028268426656723,0.0,0.0,0.0 +In which year was the new constitution ratified?,1855,1977,"[Document(id=cd7897f96259919a226f490cef36c94f009c4cd789e86e763ab320a9ec5bca5a, content: ' The march originated at the Opera Plaza after speakers, mainly intellectuals, addressed the crowd. +...', meta: {'name': 'Dissolution_of_the_Soviet_Union', 'source_id': 'f13431b95b0890d57a225554d0ef9f1d051f9e4f48b8d8798689c11670d5b2ac', 'page_number': 1}, score: 121.1361988185173)]",0.0,0.46762916445732117,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,"Russian Bomarsund fortress on Åland Islands. In the same month, the Western Allied Baltic Fleet tried to destroy heavily defended Russian dockyards at Sveaborg outside Helsinki. More than 1000 enemy guns tested the strength of the fortress for two days. Despite the shelling, the sailors of the 120-gun ship Rossiya","[Document(id=3c65091ec8b7ef0626d34520c5e99c8f6539d594394209264472f89890ca987c, content: ' These battles are known in Finland as the Åland war. +In August 1855 a Franco-British naval force ca...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 167.4531894262477)]",0.0,0.11404582113027573,0.0,0.0,0.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=92daacd08f6215b2b6c9a23e96940ae9ffa3af8f6a0300108c6bfcf840702db9, content: ' In western New Haven, the university holds 500 acres (2.0 km2) of athletic facilities, including th...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 177.64039326449637)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,expressed sequence tags,"[Document(id=f61b4e12858ae84a04762a59c6bfaccab2588f02781fe27a55cb8323cef086ed, content: ' This mechanism is a common source of new genes in prokaryotes, sometimes thought to contribute more...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 104.78775277873254)]",1.0,0.13049247860908508,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1980,"[Document(id=e97c93db725af1581bd486f05e5e76d0f4731b0215b471c0ebd9a4eb033f7c96, content: ' The mission failed; 400 Libyans were killed and they were forced to retreat. Gaddafi later came to ...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 204.3926580375098)]",1.0,0.48459160327911377,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,Jupiter,"[Document(id=a8cc5557b921c5766b219ced4b8319da3ea79db597efbb98b7cbe6998afe6656, content: ' Ambrose, the influential Bishop of Milan and future saint, wrote urging the rejection of Symmachus'...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 185.2449344711922)]",0.0,0.0053202565759420395,0.0,0.0,0.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,21–24%,"[Document(id=4a2399f100a536a4baf71c5dedf166c45f09b1b237a505e9a84faf1da7fd8a1b, content: ' +In 2014 the Notre Dame student body consisted of 12,179 students, with 8,448 undergraduates, 2,138 ...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 172.25504829596989)]",1.0,0.9999998807907104,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,19 February,"[Document(id=3aa48796f054c4788769b2991e2826cffc49e4db56b22dc5cb5d2e279b46d630, content: ' The network of bunkers and pillboxes greatly favored the defender. +Starting in mid-June 1944, Iwo J...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 168.23687425884253)]",1.0,0.24278084933757782,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=4b9cc1d29ed0f9a4cb783c7febb93802073dee536efd60ee53a2c3e5bec9cccf, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 154.4942077869361)]",1.0,1.0,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,/p pʰ b,"[Document(id=f80807120f788ee71503c071a59466d44904fb238af267935e4ae418c2b5a1da, content: ' Punjabi has lost breathy-voiced consonants, which resulted in a tone system, and therefore has a di...', meta: {'name': 'Aspirated_consonant', 'source_id': 'ded60872a72621c0cc2ee33d9a7b0d60f1474075e260da958b752dbe72e00321', 'page_number': 1}, score: 121.52265386631508)]",0.0,0.07327696681022644,0.0,0.0,0.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=7308906ca1725346de1f8669d04745bd0719f93959d2b77a3e2eedd2aea218ba, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 151.26005977413936)]",1.0,0.40640756487846375,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,in the interstellar medium,"[Document(id=628fa2cca8f8b08b31e468b195f5cd5c6ad4092446b69382591236851e2cc2ff, content: ' +Hydrogen, as atomic H, is the most abundant chemical element in the universe, making up 75% of norm...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 114.30356430255492)]",1.0,0.09371746331453323,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements,"[Document(id=ff11a1c6102e2b293943cbcfb8c2935799487df6f465dea412c00844c1c7c655, content: '"" In July 2013, the Assistance Association for Political Prisoners indicated that there were approxi...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 166.35832930030904)]",1.0,0.997592568397522,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,Os Arsenalistas,"[Document(id=5008c48e9b8d1c751b9388b7772f14176f8c01ef2697fa8a4d0c1fd478bfbfca, content: ' In 1909, Sparta Prague adopted a dark red kit like the one Arsenal wore at the time; in 1938, Hiber...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 79.88047283584906)]",1.0,-0.08058245480060577,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,February 2006,"[Document(id=3513fcc6c75aee92ba5e4d20e40d134b9159f33067469bcc48634091ed74a9ed, content: '8 billion annually. Melbourne has been attracting an increasing share of domestic and international ...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 185.98741924757314)]",1.0,0.34920746088027954,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:15.csv new file mode 100644 index 0000000..d0730e0 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:15.csv @@ -0,0 +1,123 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=3c9b5484de418625b08e3d8f4e3c3398f40f2f5af0bbf48528ee46ea38c64e7c, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 176.94739092146085)]",1.0,0.9999998807907104,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,5 July 2005,"[Document(id=2bb0143cbe0013e9a1f2c007273d15715e8d6daa8bd81bb7fd41acf67926990b, content: ' After the success of the British Galleries, opened in 2001, it was decided to embark on a major red...', meta: {'name': 'Victoria_and_Albert_Museum', 'source_id': '202e0997b99bb8f0aaa6dd9c09c05e008cf8809c92549b6c9f073b5d26d1a3dd', 'page_number': 1}, score: 79.8326457180098)]",0.0,0.46504393219947815,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Tora Bora,"[Document(id=0eb20bd2a77cf38001b407668fb5851a524f20fa206d4f94205383da24400352, content: ' On 7 October 2001, the official invasion began with British and U.S. forces conducting airstrike ca...', meta: {'name': 'War_on_Terror', 'source_id': 'b9ac47a39d382362992253f0ccfa0072ea6216fe1daa748c4899980b467292f4', 'page_number': 1}, score: 170.56443831891409)]",0.0,0.15409140288829803,0.0,0.0,0.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,"Ænglisc, Anglisc, Englisc) or Anglo-Saxon","[Document(id=540a5b537ec7e1fdea905baae428e9a4c76167e7be305ec2dac494ec1ac16d1f, content: 'Old English (Ænglisc, Anglisc, Englisc) or Anglo-Saxon is the earliest historical form of the Englis...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 109.47656677866291)]",1.0,0.19367319345474243,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=b43e607046d3a9f2878f76721d6d75cb6c46d2bdb38f6b3fdc5f1b9a2f23a163, content: ' +In The Church of Jesus Christ of Latter-day Saints (LDS Church), the teacher is an office in the Aa...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 141.90348317285938)]",1.0,0.10249558091163635,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=4af194e6a549f3cca324fac69732006a675ddeb8e841c964604dff19122e4f16, content: 'O.K, Maroussi) have won 3 Triple Crowns, 5 Saporta Cups, 2 Korać Cups and 1 FIBA Europe Champions Cu...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 155.4234665974737)]",1.0,0.9999999403953552,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=0639a6470cdb94c436a07cbb24189b437a50f73fdb84ffa10dd2faeb282342a2, content: ' +The show had originally planned on having four judges following the Pop Idol format; however, only ...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 123.15646815816092)]",1.0,0.7108100652694702,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=2296ed852027297f8181dc1f64af4fcbb66e36bb282c6197423888ce9d011df5, content: '9–14 μm) and produce images of that radiation. Since infrared radiation is emitted by all objects ba...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 114.32649598257161)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=5dac70957842f214d4c60f76564b27a7e657cd237c677ed236d1ed1ca76e360f, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 155.5405676121626)]",1.0,1.0,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,two urban districts,"[Document(id=d439f7d8555c7aea5751230341480bbf37f3e3f9e6b4e542f7cb145dfa03c950, content: ' In the three free states of Bavaria, Saxony, and Thuringia the government is referred to as the Sta...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 161.52389140584813)]",1.0,0.12825778126716614,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,β-Lactam antibiotics,"[Document(id=3b8de679cf4330422020b3c530f29f7e55171fe9f5a7383faca38ec8723c7d6e, content: ' Bacterial cell walls are different from the cell walls of plants and fungi, which are made of cellu...', meta: {'name': 'Bacteria', 'source_id': 'f8e9d365bfe51780e8f42f3a07cb69dd7a6df46bc7c67030d0853bdb13eeb390', 'page_number': 1}, score: 68.15934476800317)]",0.0,0.03749799355864525,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,district court,"[Document(id=a54d74acb097c757f1d529fef3759c7167a1133c97bc7020d3ed8e7abf0a8c44, content: 'S. Court of Appeals for the Fifth Circuit got the issue wrong — on the second try as well as on the ...', meta: {'name': 'Affirmative_action_in_the_United_States', 'source_id': 'b7156a1660bb591e3abb4460028a7922c063ba8962ad6dce11678f8b08c5cdcb', 'page_number': 1}, score: 100.49442833929467)]",0.0,0.2981231212615967,0.0,0.0,0.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,God's consequent nature,"[Document(id=47d8785b4e464ebb67e06bd4f4b7a36000d8b8a168c4c50f7e02c02df38feb7e, content: '"" He then observes that the mark of higher forms of life is that they are actively engaged in modify...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 194.04915088812754)]",1.0,-0.012107331305742264,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,slave,"[Document(id=74be6b32b41b12abb2990476f778fb5335428e0a6b6f01cf7a1235ad434899b9, content: 'The rule of law is the legal principle that law should govern a nation, as opposed to being governed...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 85.0298293465967)]",0.0,0.055851925164461136,0.0,0.0,0.0 +Which region of the US experiences rain year round?,Pacific Northwest,Hawaii,"[Document(id=5cc5c90d3b7502d6e064b808c140d69c74657b0e5d9b60b7a5c3a8b76125d80b, content: 'S., with Quinault Rainforest in Washington having an average of 137 inches (348 cm). Hawaii receives...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 162.08782552673898)]",1.0,0.26181256771087646,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Antigen specificity,"[Document(id=7f56f4718d147b0156e077e4853a6ac0250773335364d5e5cac1ff37df572de8, content: ' The adaptive immune response is antigen-specific and requires the recognition of specific ""non-self...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 158.72987153215703)]",1.0,0.2857634127140045,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=9f4c20a2bce44cf9bd3af493be183ab333c4d80eef37f323c78a73218b2cb402, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 164.16816603528014)]",1.0,1.0,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,"3*). The 2008 Research Assessment Exercise also showed five subjects – Pure Mathematics, Epidemiology and Public Health, Chemical Engineering, Civil Engineering, and Mechanical, Aeronautical and Manufacturing Engineering – were assessed to be the best[clarification needed] in terms of the proportion of internationally recognised research quality. +Imperial College Healthcare NHS Trust was formed on 1 October 2007 by the merger of Hammersmith Hospitals NHS Trust (Charing Cross Hospital, Hammersmith Hospital and Queen Charlotte's and Chelsea Hospital) and St Mary's NHS Trust (St. Mary's Hospital and Western Eye Hospital) with Imperial College London Faculty of Medicine","[Document(id=2eda08ca6f7b84cfcba1c93efa2266542d3f72df60ef8898ce7a2337e02a91a8, content: '9 million from Funding Council grants (2011/12 – £172.4 million) and £12.5 million from endowment an...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 128.41401665791395)]",1.0,0.30592256784439087,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1955,"[Document(id=e2beebdfa17be4f6efba32829e2212a1399b680760fcc02660150921b570ef84, content: 'Dwight David ""Ike"" Eisenhower (/ˈaɪzənˌhaʊ.ər/ EYE-zən-HOW-ər; October 14, 1890 – March 28, 1969) wa...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 178.3948185668636)]",1.0,0.6117546558380127,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,"2,300 km2","[Document(id=5d56044357c45f6633a12781f9ccab0c392f05bb9311df2581b4325ada3c133d, content: ' +Paris is located in northern central France. By road it is 450 kilometres (280 mi) south-east of Lo...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 173.17023838877753)]",1.0,0.3935243487358093,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,"education increases measurable intelligence. Studies have shown that while educational attainment is important in predicting intelligence in later life, intelligence at 53 is more closely correlated to intelligence at 8 years old","[Document(id=85ad820474faf97ab67cb2e88fa7a8918ba053dd42f925d5b131e99fbf09a755, content: ' Educational psychology both draws from and contributes to cognitive science and the learning scienc...', meta: {'name': 'Education', 'source_id': '80079a0cc7970848bb327b41c4c044a89b33b363968cebeee5d10bac1c584862', 'page_number': 1}, score: 92.56192788229203)]",0.0,0.3706241250038147,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=497c11f9fbbfb1f37be3dcd0a29d747c5f8f8967252a25e340b937417a080280, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 175.82713084515086)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=8a0f220e6df4e50504002c6ad6312d289ff774c9c753db65e80fb1022f521be2, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 115.93977230752789)]",1.0,1.0,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,Indo-European,"[Document(id=6dfceb86a92f3071080d87731516cf9681746b00fd5c00deac18699cdc548a64, content: ' A notable political union of the 20th century that covered most South Slavs was Yugoslavia, but it ...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 200.64880411744542)]",1.0,0.1052892878651619,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.","with the gain of the amplifier between the sensor and the analog-to-digital converter, the saturation speed can be increased","[Document(id=a3fc752defa31a3ba97e1a76467f1e902ff87984a4d0fb5db5bb756efede29a4, content: ' This is the only technique available under the standard for output formats that are not in the sRGB...', meta: {'name': 'Film_speed', 'source_id': 'cd350045b0fbc5c253411cbf952460337caeec603c6f046e13f60bcc9428df61', 'page_number': 1}, score: 108.3630433763505)]",0.0,0.08265610039234161,0.0,0.0,0.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,urban and literary classes,"[Document(id=fbf2110b67db9729de11d4fe22a0c0ff32b3a45b47041c33f08674359d8df041, content: ' The city of Alghero in Sardinia was repopulated with Catalan speakers in the 14th century. The lang...', meta: {'name': 'Catalan_language', 'source_id': 'e2178d77cee39fac7bfc6ad15bc5cd10fa8a19c21faea32b0ea8a06b3d4a200f', 'page_number': 1}, score: 170.64869262627934)]",0.0,0.4680020213127136,0.0,0.0,0.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,Nuclear arms reduction,"[Document(id=0f23a3d664b2d9d293629ea9d692c835b9a8e93ffb30bc45de463cd9b9d93424, content: ' Therefore, Khrushchev would not take part in the summit. Up until this event, Eisenhower felt he ha...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 81.54270754807897)]",0.0,0.12531504034996033,0.0,0.0,0.0 +What is that power that enables us to love?,Prevenient grace,mercy of God and His care for His creation,"[Document(id=ab06859e83076577bc2a04088a7525ddee1f44dedfa9ffcf46872843c8c86632, content: ' They neither expected nor sought the appearance of the stigmata or any other physical manifestation...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 91.85024388900882)]",0.0,0.3296220898628235,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,He Jin,"[Document(id=232041b4afb4ca71e3d1d3697502447b0c4bfa12f1eccde1a6f36b432b833244, content: ' 189 AD), half-brother to Empress He (d. 189 AD), plotted with Yuan Shao (d. 202 AD) to overthrow th...', meta: {'name': 'Han_dynasty', 'source_id': '060d95b91f9c20e5c8cd8b8a032ab44aac38ab88ce62d3c302bea0865f533568', 'page_number': 1}, score: 148.7241546412495)]",1.0,1.0,1.0,1.0,1.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,sharia law,"[Document(id=21cb89e76694844f819e54d7269a6349d3404a6310be592588c5473ad2d8efe2, content: ' Gaddafi proclaimed that the People's Congresses provided for Libya's every political need, renderin...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 209.0268681523744)]",1.0,0.6846116781234741,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",House of Mountbatten,"[Document(id=e315f255311d7dc4c756937a51f6602a99f1b8e3b0a145306a3d8c41c258bc57, content: ' She was educated privately at home. Her father acceded to the throne on the abdication of his broth...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 235.82296588222033)]",1.0,0.7922807931900024,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,Port Moresby,"[Document(id=b0fed1abc72a4d56e9957121cabc7bf9e24323c310875bcb6c1dbad5036aa408, content: 'S.-UK Combined Chiefs of Staff, which was also in Washington. Allied resistance, at first symbolic, ...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 151.05475774942101)]",0.0,0.09719950705766678,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,Hohokam,"[Document(id=566e0e1e8fea2b2d4f4780311f659535816e073357090acf35a5060d287a12d1, content: 'Tucson (/ˈtuːsɒn/ /tuːˈsɒn/) is a city and the county seat of Pima County, Arizona, United States, a...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 204.4425567869381)]",1.0,-0.01937844417989254,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=9a450124afc12f193f72ae1f8310b72ada7cbbf8f34bbd71f66eb32b0f47efbc, content: ' With the development of the modern evolutionary synthesis in the 1930s and 1940s, Darwin's concept ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 189.49249077670783)]",1.0,1.0,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,"the Archive would have to delete pages from its system upon request of the creator. The exclusion policies for the Wayback Machine may be found in the FAQ section of the site. The Wayback Machine also retroactively respects robots.txt files, i.e., pages that currently are blocked to robots on the live web temporarily will be made unavailable from the archives as well. +In late 2002, the Internet Archive","[Document(id=0b1c19c2f87338db98035e45f9e5406c75d590764931173b2f73a83e0eea3255, content: ' providing an authoritative statement of the archivist), the United States patent office and the Eur...', meta: {'name': 'Wayback_Machine', 'source_id': '1d3e2979ec8b766777dcd67b2e231e400023f336a0622836e5036408af125544', 'page_number': 1}, score: 84.4611791300581)]",0.0,0.10763765126466751,0.0,0.0,0.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=ec70e294a8a1203ecdbb9fe95270466a0129a5d607fe306df146559206aa5b31, content: ' Avicenna first began to learn the Quran and literature in such a way that when he was ten years old...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 189.51529079780602)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=391f8278a6d664b0fda2a91e05a75fcb178e560aca2778cf3383f74470691074, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 152.93271782643825)]",1.0,0.08274449408054352,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=540a5b537ec7e1fdea905baae428e9a4c76167e7be305ec2dac494ec1ac16d1f, content: 'Old English (Ænglisc, Anglisc, Englisc) or Anglo-Saxon is the earliest historical form of the Englis...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 162.1246341956293)]",1.0,1.0,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=b281d8f2fa5c72bf9176af2f66fbd72725ba99c1c3e8e2e12044e694c6ee3bbe, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 125.71453753599548)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,Clean Edge,"[Document(id=7726f77f73261125d28fc6db733db2a8ae6e451dfc266962ad1ddc5ccf22d22f, content: ' +New government spending, regulation, and policies helped the industry weather the 2009 economic cri...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 108.44000862757655)]",1.0,0.03996780887246132,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=d4ddb3955cf6ff9d6bed687f54220f695fef0951972c64309938d9cf7670f6ba, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 184.29502006117178)]",1.0,1.0,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,the German Reformation,"[Document(id=2562c4ed0cf84994d606d5866fb6563cf019303140e39c1b2a35e85652c879f9, content: ' In spite of influential supporters among the English aristocracy, such as John of Gaunt, the moveme...', meta: {'name': 'Late_Middle_Ages', 'source_id': 'fb62f16f8c6599f7200add017bece6e2ea916520e544af1dd94713410c254ce9', 'page_number': 1}, score: 133.62001907744167)]",0.0,0.24226750433444977,0.0,0.0,0.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=ea56a7c43f1347c76ec07affd8ad20729d040a8d58a36f56583b537f903e8142, content: ' +The release of a video-game Starpower: Beyoncé was cancelled after Beyoncé pulled out of a $100 mil...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 112.66482791724965)]",0.0,1.0000001192092896,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,"15,000 t","[Document(id=98b439fa147a97e5eae30c05b00e8ced8af2688f70d603a4a19bd0acee1fb227, content: ' Electroplating commonly uses copper as a base for other metals such as nickel. +Copper has been in u...', meta: {'name': 'Copper', 'source_id': 'de5c60a665b93f9b358be412d53a37ca43d90005e3ec9a553ac38a38f076fadc', 'page_number': 1}, score: 122.23139081007312)]",0.0,0.10198316723108292,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,"1,436 people were killed and 1,792 seriously injured, which affected morale badly. Another raid was carried out on 11/12 May 1941. Westminster Abbey and the Law Courts were damaged, while the Chamber of the House of Commons was destroyed. One-third of London's streets were impassable.","[Document(id=1a4ba8fa507fb2fb6dac9df67abbdff76568d8f7a2e9cf8a578677e271a7190c, content: ' +In the north, substantial efforts were made against Newcastle-upon-Tyne and Sunderland, which were ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 159.2869259349145)]",0.0,-0.05634152516722679,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=5b537a30444228b3b5e39f8e28a321a8e774f6051560dcab29bb0ece8b5ae99e, content: ' Unlike Archaeopteryx and the non-avialan feathered dinosaurs, who primarily ate meat, recent studie...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 186.97083867996844)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,Lamarckism,"[Document(id=9a450124afc12f193f72ae1f8310b72ada7cbbf8f34bbd71f66eb32b0f47efbc, content: ' With the development of the modern evolutionary synthesis in the 1930s and 1940s, Darwin's concept ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 175.4835605613558)]",0.0,0.2597317695617676,0.0,0.0,0.0 +At what time did Tesla get dinner?,8:10 p.m,3 a.m. to 11 p,"[Document(id=7ab4bce8d6c4986fced2482d7df23cc8143be198d80d729cef5c53dff273d0bc, content: ':32 The classes were held in German, as it was a school within the Austro-Hungarian Military Frontie...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 137.4402934899115)]",1.0,0.8442951440811157,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,2007,"[Document(id=9110e2aded9c3f4623e8b1d9a2fc3eefed2672371c89283d34e3a11304a9222c, content: ' Although Germany never formally annexed Alsace-Lorraine, it was incorporated into the Greater Germa...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 177.11536613714622)]",1.0,0.1668260097503662,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,augurs,"[Document(id=b80b0d7fa56110987db7fc274c40400c9afab47b60f93bd3980be6d29191c35f, content: 'The priesthoods of public religion were held by members of the elite classes. There was no principle...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 159.85134956306916)]",0.0,0.12105413526296616,0.0,0.0,0.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008",April 9,"[Document(id=bcb8a4d40e4e94182477bd61b84fbd041d74b5d411214204882581a9a4a0d862, content: ' Among them was Jane Birkin, who spoke to the media about the ""lack of freedom of speech"" in China. ...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 112.90831116674465)]",1.0,0.7683700323104858,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Cardinal Eugenio Pacelli,"[Document(id=0ba8e9751d1501b0f5e0a1ccd2f17ab7018f7b8f1e53f73d800134df09ba5e26, content: ' Unaffected, he wrote to his sisters Ancilla and Maria and told them both that he was fine. +On 30 No...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 205.87684528714007)]",0.0,0.34143221378326416,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=61583a75fdba243f6a812f3bf73c41e686e4f8374db81e4dd5f1a4d5a4430336, content: ' +Non-Australian citizens who are Australian permanent residents should be aware that during their st...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 136.08818508050095)]",1.0,0.9777635335922241,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=1c55594460ba4e27d8751041858090c94678420d1701172511f1bad1a1d70154, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 156.21619178330843)]",1.0,0.14734730124473572,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Nuri al-Said,"[Document(id=c355e0f20036a60ada1e4711a62b4d11adebf07dce76a808203fdd588cc1168e, content: '"" Nasser ordered a crackdown against Syrian communists, dismissing many of them from their governmen...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 207.58747886895094)]",1.0,0.6631616353988647,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,the Arian Party,"[Document(id=2e20ace02a4527c4c0706b6e9a1dacf4dc045af3a98b5588a0e82992e03d18ef, content: ' Nevertheless, the practice of saying it at the altar grew; eventually Pius V made this practice uni...', meta: {'name': 'Athanasius_of_Alexandria', 'source_id': '2a8e7ec0a44075f1a567744982dcecf4bcea19f0065d4f73c742398a05fa1642', 'page_number': 1}, score: 137.83753446354876)]",0.0,0.04274138808250427,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=0869972e54922afd3497ba60a5f3e981d9c0ab240629036531512fe359bc4da8, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 116.6152783244603)]",1.0,0.4726710319519043,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,downtown medical district,"[Document(id=f2ed49ccc8f85dd664fcdca57c3499ff0b7c7b7afb1177548f6b0038c9e3b49f, content: ' The downtown medical district is experiencing rapid growth of biotechnology and medical research in...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 144.2052302061412)]",1.0,0.3277379870414734,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,mulatto and octoroon,"[Document(id=92de78073423bb03eb54a64d0f332ce68f1771bd6a477f077368c5e13d629e9a, content: ' However, in its Recommendation ECRI uses this term in order to ensure that those persons who are ge...', meta: {'name': 'Race_(human_categorization)', 'source_id': '1232f7fe18d6e2adb87627ce2042b468cfa3fb9ba0b6afb2165c345f440b5339', 'page_number': 1}, score: 108.95556425556728)]",0.0,0.04100791737437248,0.0,0.0,0.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,sell his idea to the U.S. military,"[Document(id=727f582f5aae2334de1fd6ecfae66ae1de524ec5b00a77311c574a592fabdcb3, content: ' Louis, Missouri, the Franklin Institute in Philadelphia, Pennsylvania, and the National Electric Li...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 156.11323355057357)]",1.0,0.2841935157775879,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,cordon sanitarian,"[Document(id=da081545e8e8020b03087fbbcde823b131a8ffeb227970cc6962ac2b8197ec87, content: ' When a party achieves enough seats in an election to form a majority, the party's frontbench become...', meta: {'name': 'Political_party', 'source_id': '476a9215de81b75a4f17737eed1679570973e55b5ed6175aa0ec1014b09d4632', 'page_number': 1}, score: 101.09700252196419)]",0.0,0.14516454935073853,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Bibliothèque Bleue,"[Document(id=431a21bc2021ab05bedf5f9c62189ca16d692858fb84414f96b2f293034850da, content: '"" Women, as salonnières, were ""the legitimate governors of [the] potentially unruly discourse"" that ...', meta: {'name': 'Age_of_Enlightenment', 'source_id': 'd6935b64f79e30b7c3de4c952b643a55674c548cd0bb703870a094fd3e646d84', 'page_number': 1}, score: 94.00186696356431)]",0.0,0.2553384304046631,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,memory,"[Document(id=fcc8eaffb3c19e9488a36ba4b6b749da52a91f8712d5e0620ae7488f9ed8fade, content: ' Graphics processors and computers with SIMD and MIMD features often contain ALUs that can perform a...', meta: {'name': 'Computer', 'source_id': 'a39560aa5caca299e2e8876ee9b291a51b896b645abcc4d5f2a790a0fa89fcef', 'page_number': 1}, score: 96.26642504147465)]",0.0,0.30840399861335754,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",Frederick moved the bulk of his eastern forces to Pomerania,"[Document(id=2d6f9a27ae03d73ce7ee871f068b5e485decdd52e7f7cb2530780627731891ae, content: ' First, Frederick devastated Prince Soubise's French force at the Battle of Rossbach on 5 November 1...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 168.14260150153137)]",1.0,0.3530529737472534,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=cea0f1307bbe685005f9642d975856221cf9a5dd6f18a7b87c780156408770d1, content: '” +On April 23, 2008, Dell announced the closure of one of its biggest Canadian call-centers in Kanat...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 156.44545320970798)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,The Vestals,"[Document(id=ec3ca2da10a5b33dfa44ec5cda121def852283ef4e6ecfef559bf7ea1b8ac7e1, content: ' Cult donations were the property of the deity, whose priest must provide cult regardless of shortfa...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 143.92065782312642)]",1.0,0.13071516156196594,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=73c809a12535348ffed165eff42c189f3aa95c6ed455c42b5e7932cd44167a8d, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 206.8994736921715)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=b562ec53d471dfbb3ba9b12247b16c622039308f7ec5b5b5344127cfeb67e87d, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 206.25156723372163)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Milan,"[Document(id=423e38d8a6d6af214d9084d6c9c45752923b4fc73735c2d71dfaf17e2be72ac8, content: ' He sent him, indeed, five hundred pieces of gold ""to bear his charges"" but Liberius refused them, s...', meta: {'name': 'Athanasius_of_Alexandria', 'source_id': '2a8e7ec0a44075f1a567744982dcecf4bcea19f0065d4f73c742398a05fa1642', 'page_number': 1}, score: 143.824958900112)]",0.0,-0.021660974249243736,0.0,0.0,0.0 +In what decade was disco popular?,1970s,1980s,"[Document(id=a1ebb6111bad62b247dd1b756728c9569fa6d4b9a446c19b1950e68e79933994, content: ' As of 2015, house music remains extremely popular in both clubs and in the mainstream pop scene whi...', meta: {'name': 'House_music', 'source_id': 'c556f4a459e8a99b6d2065bab8fd6a918d0a00f13291aad2473922492e4abf2e', 'page_number': 1}, score: 174.3308231329952)]",0.0,0.7754139304161072,0.0,0.0,0.0 +What do the rivers form?,lakes,lakes,"[Document(id=dfdb4032d57d026d1e52c68a1cff13d84af492b1c4c23fee3c33111d08105adf, content: ' The Rhine originates in a 30 square kilometre area in Switzerland and represents almost 60 percent ...', meta: {'name': 'Alps', 'source_id': 'd2eb3ad3af7d6110ae691bf40660ff7a79e4e9dbfd25785b4e55c7700bfcae79', 'page_number': 1}, score: 114.16977401000194)]",1.0,1.0,1.0,1.0,1.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,an ASCII file,"[Document(id=39d421bf1b8a6f42626cef116937dc028d1e423b078f59d7429a3dc5b9a4264c, content: 'Originally based on the English alphabet, ASCII encodes 128 specified characters into seven-bit inte...', meta: {'name': 'ASCII', 'source_id': '4bd7e27c39431d5c4712c32c534ed40f7df89f7b80f6eccb22975178fb7b1be2', 'page_number': 1}, score: 163.1721608166594)]",0.0,0.18398842215538025,0.0,0.0,0.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,"65 million records worldwide, and become the most successful Idol contestant in the U.S., selling over 14 million albums copies in the U.S. and has more Underwood has won seven Grammy Awards, the most Grammys by an ""American Idol"" alumnus. +Season five began on January 17, 2006. It remains the highest-rated season in the show's run so far. Two of the more prominent contestants during the Hollywood round were the Brittenum twins who were later disqualified for identity theft. +Chris Daughtry's performance of Fuel's ""Hemorrhage (In My Hands)"" on the show was widely praised and led to an invitation to join the band as Fuel's new lead singer, an invitation he declined. His performance of Live's version of ""I Walk the Line"" was well received by the judges but later criticized in some quarters for not crediting the arrangement to Live. He was eliminated at the top four","[Document(id=ec3b9b3ab5f2d528e25ddd559ed5ece0f75a24fbbd4154d11d3be76e232109cd, content: ' Both Underwood and Bice released the coronation song ""Inside Your Heaven"". Underwood has since sold...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 109.27218695122491)]",0.0,0.20669300854206085,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,16,"[Document(id=dc6d2af99abb08909b2ed77db3e665639a7bf81662aa16867b624d401177775f, content: ' The Hong Kong University of Science and Technology, a university with a focus in applied science, e...', meta: {'name': 'Institute_of_technology', 'source_id': 'bd3c1ce78fe4328bf8eb2cbd8027759a2a927528ce7e496ce3ebcf9675bc060e', 'page_number': 1}, score: 107.311590077452)]",0.0,0.3298308253288269,0.0,0.0,0.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=ad70291d4dec497bcd48d02a2bfbb4780dffd558cf0eee2d7c976f107f401335, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 159.1381537658833)]",1.0,0.9999999403953552,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,"a touch of humour, gentleness and understanding"".","[Document(id=0a24a2ae13af1d4669b8a2f74791d8b6c9fcf28ba3103552f6b7f5c81e2ec8b3, content: ' Patriation removed the role of the British parliament from the Canadian constitution, but the monar...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 137.47701637339432)]",1.0,-0.08331970870494843,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=faa03729332a5c07c1c9d6d87747d15af8b3043f87a9cff7389d9a1c23d97c7c, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 166.16265214381096)]",1.0,0.9999998807907104,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,11 December 1936,"[Document(id=06cbf9491f511f16650c4f94483faad3dd6deb11121a7158726cb23ef2b5b8c1, content: ' One of the few stirs arose when the Canadian Prime Minister, R. B. Bennett, considered the Duke for...', meta: {'name': 'George_VI', 'source_id': '5a05abc99d3e2c9f678c9681513c2d8cdfe043b062ba971ef3f9f31e7c0c4622', 'page_number': 1}, score: 137.500661055153)]",0.0,0.11978332698345184,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,large quantities of data or a large number of users,"[Document(id=dd704f4bc758ab475db02844a8a24263dd4fdb32afbc7f7dc86b810f8fa62e4f, content: ' +Load testing is primarily concerned with testing that the system can continue to operate under a sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 149.3892484981509)]",1.0,0.8635382056236267,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,Title II (common carrier) of the Communications Act of 1934,"[Document(id=7aba2d10978318bc5abba467ce5b46b678a83c63b04c2770eb6bfdb5855d1dce, content: ' On 16 January 2015, Republicans presented legislation, in the form of a U.S. Congress H.R. discussi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 100.06290327936973)]",1.0,0.29370126128196716,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,"interception of bombers and other fighters, reconnaissance, and patrol","[Document(id=d2709fd4938aadfc321fd75534f338a1182dec7bcf787b2638ddb43d1a59b926, content: ' Special Operations Command (USSOCOM). It conducts long-range, special operations missions, and is e...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 192.83729826021232)]",1.0,0.331512987613678,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1854,"[Document(id=af2d0cc576e7737d85b64a99a23cdab42a569764a59025658756f21259782f72, content: ':130–43 +In the spring of 1854 the Russians again advanced, crossing the Danube River into the Turkis...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 171.36151765222343)]",0.0,0.6653627157211304,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,practical and contractual,"[Document(id=b80b0d7fa56110987db7fc274c40400c9afab47b60f93bd3980be6d29191c35f, content: 'The priesthoods of public religion were held by members of the elite classes. There was no principle...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 193.7799070146575)]",1.0,0.13173553347587585,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,"defects in dead code will never result in failures. A defect can turn into a failure when the environment is changed. Examples of these changes in environment include the software being run on a new computer hardware platform, alterations in source data, or interacting with different software","[Document(id=f10f419092a1f6acf685021bef11a7a6ebef797bdd44e3997bd6b4a635aa2a89, content: ' Not all defects will necessarily result in failures. For example, defects in dead code will never r...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 106.64395109119121)]",1.0,0.34705621004104614,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,home-grown players,"[Document(id=0d02d0727bbf3d538faf4d0e4b493fb1a575377aa15b678ef4c8a1fa1646ad91, content: ' By 2009, under 40% of the players in the Premier League were English. +In response to concerns that ...', meta: {'name': 'Premier_League', 'source_id': '369baa9a5a854704f16fadd35144d3a3150cf1e9ae1671d9b50a3448547b814b', 'page_number': 1}, score: 98.30450237695865)]",0.0,0.23592567443847656,0.0,0.0,0.0 +In which year was the new constitution ratified?,1855,1956,"[Document(id=c5c366f06788f022f78b708df479c782035fa76318f4adb339bcf7725475034a, content: ' Nasser was welcomed by large crowds of people lining the streets of Cairo on his return to Egypt on...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 125.38863131677583)]",0.0,0.7131929993629456,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Rossiya,"[Document(id=97e16b6bc2e6669fe1654aa87cbb22e40d888b619d1aef47d13bd6236d3f958a, content: ' The outnumbered Russian Baltic Fleet confined its movements to the areas around its fortifications....', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 161.24723935586772)]",0.0,0.0533168725669384,0.0,0.0,0.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=afdbd08089615eb2bbb9dd9a5945a7e094c57e1f63a4d1f083e6183556a9f3e7, content: ' Moreover, Yale has been accused of failing to treat workers with respect. In a 2003 strike, however...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 171.7668856618156)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,alternative variants,"[Document(id=74c523cc7b77506967566fb8161bf4043d30ccf8c68e884af6cf2b9d24106e71, content: ' Conversely, plants can have extremely large genomes, with rice containing >46,000 protein-coding ge...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 109.03169875302669)]",1.0,1.0,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1980,"[Document(id=cdc48a6ad3a2e7247518958e73c2ec683689a78ed8978eafe40f8f9c84ca8545, content: ' Military spending increased, while other administrative budgets were cut back. Libya had long suppo...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 205.52285831252408)]",1.0,0.4845914840698242,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,Jupiter,"[Document(id=a8cc5557b921c5766b219ced4b8319da3ea79db597efbb98b7cbe6998afe6656, content: ' Ambrose, the influential Bishop of Milan and future saint, wrote urging the rejection of Symmachus'...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 185.2449344711922)]",0.0,0.005320226773619652,0.0,0.0,0.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,39.1%),"[Document(id=fc88c82bbe8a5d66ca954ba928957f174fc7fd55fc1c67f366e483ee9b7f3159, content: ' Theodore M. Hesburgh CSC, President Emeritus of the University of Notre Dame. The institute has con...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 123.73139229110417)]",1.0,0.8029515743255615,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,7 December,"[Document(id=4a23364cdf0a802b2462cb8e0ef1b7713db72f80a53ee5886cb9b68f25920854, content: ' Kurita's force had begun the battle with five battleships; when he returned to Japan, only Yamato w...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 165.93645639842947)]",1.0,0.04009123891592026,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=9c5e4e5ebe04f6d60e8e13bc3cfe5cd0c8fa7c41c1148c916c472a26ca7c677b, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 165.19965128505237)]",1.0,1.0,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,/p pʰ b/.,"[Document(id=08cfca2838b0ec38cf0ea4ba7b4b0c80a691af71784941039d34a905b951dce2, content: ' Punjabi has lost breathy-voiced consonants, which resulted in a tone system, and therefore has a di...', meta: {'name': 'Aspirated_consonant', 'source_id': 'ded60872a72621c0cc2ee33d9a7b0d60f1474075e260da958b752dbe72e00321', 'page_number': 1}, score: 121.47809687609444)]",0.0,0.0958951860666275,0.0,0.0,0.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=25a9235e2187510deb110e2fe84f6e0f7a2dbacbbd0bd491e44e8a83b3be8cbf, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 144.93989592071756)]",1.0,0.4064074754714966,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,protonated molecular hydrogen,"[Document(id=6e618286b9705860ffa2586c851128c816120c84dcf91348f6ee11f32b6d590f, content: ' This species is central to discussion of acids. Under the Bronsted-Lowry theory, acids are proton d...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 126.73501886889714)]",1.0,0.16139346361160278,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,"Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements. The Independent reported in June 2012 that ""Children are being sold as conscripts","[Document(id=e39a424de393c7814379d5771c4abc998c371eadee31f5a4692a573651cdce01, content: ' South Africa also voted against the resolution. +There is consensus that the military regime in Myan...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 131.50547962808298)]",1.0,0.9422255158424377,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1966–67,"[Document(id=ba20188e907b6851f746fd539dbc0cab1e88e0349d17ed43168a54a40a2d1344, content: ' The laurel leaves also represent strength. To complete the crest, 1886 and 2011 are shown on either...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 75.3577185564337)]",1.0,0.4470002055168152,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,February 2006,"[Document(id=b7f872340a2353028ee8ea0f0e2ba802acb0824c43169f4135ee2fec5fa00b32, content: '8 billion and export revenues of $615 million. In addition, tourism also plays an important role in ...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 184.96961800246208)]",1.0,0.3492073714733124,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:5.csv new file mode 100644 index 0000000..490e735 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:2__chunk_size:5.csv @@ -0,0 +1,127 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=99729d5c09ec68a18e6cb9e326f59027b5c69bc74e05e8c8c0d825360370bf5e, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 173.02697070797495)]",1.0,0.9999998807907104,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,March,"[Document(id=5081c97cf3c1bba15e1c4e67633c829431bdafa2c6e4103be63d8bb2cf12a442, content: ' The park's amphetheatre was renovated in 2010, and has hosted performances by acts such as Deftones...', meta: {'name': 'Fresno,_California', 'source_id': 'a46a1dee5f6d188d5f5a510a3c694e88541550757193e312801d8b786a38b1fa', 'page_number': 1}, score: 85.07322132565646)]",0.0,0.046456485986709595,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Ormoc Bay,"[Document(id=54cf290f562ea7c2ffff22fe517ae92e59e00fbdbc47dd6d72ae885b0455fd69, content: ' Fifth Air Force was able to devastate the Japanese attempts to resupply. In torrential rains and ov...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 151.79338514889298)]",0.0,0.09204164147377014,0.0,0.0,0.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,"Ænglisc, Anglisc, Englisc","[Document(id=169a5194d5b8e4398d0437961bfa539bb8d8fa94fb38d4cba89afa5e1b31232c, content: 'Old English (Ænglisc, Anglisc, Englisc) or Anglo-Saxon is the earliest historical form of the Englis...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 110.28235177333441)]",1.0,0.1304444968700409,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,"low-church"" traditions of Protestantism","[Document(id=bbad1043bc2ff983eeca01fd03f84a8577a718a20a14adc37febb6fddb52936a, content: ' +In The Church of Jesus Christ of Latter-day Saints (LDS Church), the teacher is an office in the Aa...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 133.68718996186385)]",1.0,0.12859657406806946,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=927ee26533dc7f17be50a4cb7fc74bed43a4bc44825d916333e6dd9c5d18c958, content: ' The Greek league, the A1 Ethniki, is considered one of the top volleyball leagues in Europe and the...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 177.5734971726734)]",1.0,0.9999998807907104,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=5a76d0c122f92ef189448d61efc0333c0e7eb4caa25ac15e516eca3fccbdd7aa, content: ' +The show had originally planned on having four judges following the Pop Idol format; however, only ...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 119.61464119422864)]",1.0,0.7108101844787598,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=05e7c56a6da1ed703309ec6f627bc69c369ec51519a9e5eb8c442673c62365e4, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 129.47865242752295)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=4437c778daec446c4362623ed3261e5ec4170314f4aa14f8381b8c7ee6eef3fe, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 209.32334393129398)]",1.0,1.0,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,two urban districts,"[Document(id=a102b81b2963aaefdf1a83d1a1d96870be10e96bb78da447e6f4a0220bbf260f, content: ' The equivalent of the Minister-President is the Senatspräsident (President of the Senate) in Bremen...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 144.68905502955556)]",1.0,0.12825781106948853,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,lack septa,"[Document(id=e17b1a29df1c9f5c2187144a80272f17ecd053e6e3aedeaf8ab5cebd902b391d, content: ' Many polychaetes have a muscular pharynx that can be everted (turned inside out to extend it). In t...', meta: {'name': 'Annelid', 'source_id': '4d1835976d6f8a3a104eed564c45fb20b1a080365dcb082f33afd480371401ed', 'page_number': 1}, score: 67.46384235035063)]",0.0,0.09892848879098892,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,state courts,"[Document(id=7f051b6956fd0f9827b2a02cbd64b58b99b63b11bc1b8803eb572808e15e4f5d, content: ' The purpose of federal habeas corpus is to ensure that state courts, through the process of direct ...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 114.71892461568778)]",1.0,0.3909306526184082,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,process philosophy,"[Document(id=08668bbf8e8bcec721195bb786b2eb33635f6112b144f005903dec55795b998e, content: '"" +Historically Whitehead's work has been most influential in the field of American progressive theol...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 235.39117807275846)]",1.0,-0.031905245035886765,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,worshipping God in the manner and season most agreeable to the dictates of his own conscience,"[Document(id=0e1d4f1d8d4e3533adb041cb5f1b6a2a3c5856cee62abc76e787c6878ad0d6b9, content: ' Similarly, the Constitution of Massachusetts originally provided that ""no subject shall be hurt, mo...', meta: {'name': 'Separation_of_church_and_state_in_the_United_States', 'source_id': '9e181b8958f59a2f5a36329a7f787f416428f691cac0094c165c53403bedaabf', 'page_number': 1}, score: 91.80492411481453)]",0.0,0.10118145495653152,0.0,0.0,0.0 +Which region of the US experiences rain year round?,Pacific Northwest,Southwest,"[Document(id=994f169f938f2ae6c5b72207e8f053085554aee5e18da6a0e4c89bdb0a7fb708, content: ' The Southwest has the worst droughts; one is thought to have lasted over 500 years and to have hurt...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 179.13218864040363)]",1.0,0.6455833315849304,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Antigen specificity,"[Document(id=8e364a66a416e7184c815546c99f2af87e1565f3c1094415321a6192f360b407, content: ' The adaptive immune response is antigen-specific and requires the recognition of specific ""non-self...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 157.71412992185162)]",1.0,0.2857634127140045,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=2abfa717cced74cfc35f5836efd511c9dcad606c2013e9cf62940c0910458753, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 149.42361214417218)]",1.0,1.0,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,undergraduate,"[Document(id=8c2436b77a6c1726e2314124b2ca86e8eecc9e0f4e28a3c3eeeb8bb9925556f1, content: ' According to payscale.com, undergraduate alumni of University of Notre Dame have a mid-career media...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 163.89933007475037)]",0.0,0.2480124831199646,0.0,0.0,0.0 +In what year did Eisenhower propose his second civil rights act?,1960,1942,"[Document(id=dac723a43b39bdbec9d480c31d16576f349280e2468cd1ace681a22042059770, content: ' Marshall, who spotted talent and promoted accordingly. +At the end of May 1942, Eisenhower accompani...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 170.7410237583216)]",1.0,0.5387994050979614,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,35 m (115 ft) above sea level,"[Document(id=89c05129f31a044eaed6fac18639fcc56e299083dfdb66dc03a3e31ba5b7574b, content: ' Overall, the city is relatively flat, and the lowest point is 35 m (115 ft) above sea level. Paris ...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 161.4995300214229)]",1.0,0.3301456570625305,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,population-wide satisfaction and happiness,"[Document(id=a792069b9115c5d2ca90d3eda29dbeede49903489c6a9a0415eba57fd1b1d853, content: ' Thus, the marginal utility of wealth per person (""the additional dollar"") decreases as a person bec...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 101.62234477060485)]",0.0,0.14678871631622314,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=720eb3795a623e31335108bcc29a4b774e33272bb625d5968983846dd3b35f68, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 170.60211135792204)]",1.0,1.0,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=cf59c0f0619e715b979051e0a84530bf73463713fa11f2961866d233ac2537f5, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 146.15927986962998)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,Yugoslavia,"[Document(id=6eead1c25fd5324d0ed222674d7ab9d1fec494ce0eaa13aff2223c2eab0a9430, content: ' A notable political union of the 20th century that covered most South Slavs was Yugoslavia, but it ...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 200.3425462595638)]",1.0,0.0391981303691864,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.",set to the EI with no exposure compensation,"[Document(id=4848e3d05918ff1f327c2508412043f33a28fe67af2a248a7b13fa45e3717aa1, content: ' This is the only technique available under the standard for output formats that are not in the sRGB...', meta: {'name': 'Film_speed', 'source_id': 'cd350045b0fbc5c253411cbf952460337caeec603c6f046e13f60bcc9428df61', 'page_number': 1}, score: 113.51941140142256)]",0.0,0.042886704206466675,0.0,0.0,0.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,quadrivium of the liberal arts,"[Document(id=f362bdcfde3970c1965d3564d5709917d4463c295e291066153a18e0ddc2456d, content: ' Together the three poets established the Tuscan dialect as the norm for the modern Italian language...', meta: {'name': 'Late_Middle_Ages', 'source_id': 'fb62f16f8c6599f7200add017bece6e2ea916520e544af1dd94713410c254ce9', 'page_number': 1}, score: 172.48469574466233)]",0.0,0.20520564913749695,0.0,0.0,0.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,"anti-aircraft guns, and explosives. Other targets would be considered if the primary ones could not be attacked because of weather conditions. +A further line in the directive stressed the need to inflict the heaviest losses possible, but also to intensify the air war","[Document(id=6d4509eb1003e4fc992f26a598e45ce12e5fb93908416158debcc49779863903, content: ' The indifference displayed by OKL to Directive 23 was perhaps best demonstrated in operational dire...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 88.19963965663412)]",0.0,0.1558454930782318,0.0,0.0,0.0 +What is that power that enables us to love?,Prevenient grace,Emotions,"[Document(id=5a260e08d6f4027a44c44e45bb3870f94a28beee3a72e7855a8dfe7efe426909, content: ' The theory proposed by Nico Frijda where appraisal leads to action tendencies is another example. +E...', meta: {'name': 'Emotion', 'source_id': 'd8cc93a545b4114ba331de59898315369d2946cc279a22b3d9fd171f89c8221b', 'page_number': 1}, score: 106.10330318103026)]",0.0,0.11000385880470276,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,He Jin,"[Document(id=6b20ebc1df3c4a4239954b87829f542f45ce58307a74b92a5c1a806a2183d971, content: ' When the eunuchs discovered this, however, they had her brother He Miao (何苗) rescind the order. The...', meta: {'name': 'Han_dynasty', 'source_id': '060d95b91f9c20e5c8cd8b8a032ab44aac38ab88ce62d3c302bea0865f533568', 'page_number': 1}, score: 167.52952159485534)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,sharia,"[Document(id=f0f866d3b3b357cebd62bef6998e3e11e90a372e00343c3d189939c7eb20be2e, content: ' +In May 1970, the Revolutionary Intellectuals Seminar was held to bring intellectuals in line with t...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 201.4704913998913)]",1.0,0.5289127230644226,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",heir presumptive,"[Document(id=d84cede505487aab2b7f8ca81ca6dc2ec7e647562d9fcef338a4a7bc8b2c2e2b, content: ' She was educated privately at home. Her father acceded to the throne on the abdication of his broth...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 217.49870495818075)]",1.0,0.22167840600013733,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,The British wanted Berlin,"[Document(id=6c0f42d1676a68f1b531e9e5b1359d998c737c45c3b08b7486a9ab7ac5c47827, content: ' He had serious disagreements with Churchill and Montgomery over questions of strategy, but these ra...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 151.3026455113039)]",0.0,0.1772448569536209,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,one million,"[Document(id=0b99b20c14580dd706b0df37d7b27d75bc4008452345635ef6e68d407270d471, content: ' In 2006 the population of Pima County, in which Tucson is located, passed one million while the Cit...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 188.11852215734172)]",1.0,0.13645018637180328,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,Ernst Haeckel,"[Document(id=b48d392d4d5b02ed79e9f7b623195a59922d320516725422d9b0cdf7598971a2, content: ' +Evolutionary ideas, although not natural selection, were accepted by German biologists accustomed t...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 169.7821801153408)]",1.0,-0.026901554316282272,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,dedicated backup files,"[Document(id=d62f58c8737fe86268de0ebf0cb0ef4eae1f9ba5f33735f7cadb2e83beb9e377, content: 'g., cases when the database is found corrupted due to a software error, or if it has been updated wi...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 93.70196948277217)]",1.0,0.32296431064605713,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Thomas Aquinas,"[Document(id=06f9c29c0b848cdedfee3a4ee6e26a39bfdc0f7b653b8f5f97c0340774f37b8c, content: ' +His Book of Healing became available in Europe in partial Latin translation some fifty years after ...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 186.97194805206948)]",1.0,0.15194137394428253,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya seized Tenasserim and Lan Na,"[Document(id=ca6db29fb81cf6a69632107c67ace9bdd1fdd8f61142978a0684c6cd6f540122, content: ' Ayutthaya seized Tenasserim and Lan Na, and Portuguese mercenaries established Portuguese rule at T...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 154.99059803938104)]",1.0,0.2441307157278061,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,Ingvaeonic (also known as North Sea Germanic) dialects,"[Document(id=9a27238fee0d389c0990690f0201315187259a5da5733ce27ff271c3f814b67a, content: ' +Old English is a West Germanic language, developing out of Ingvaeonic (also known as North Sea Germ...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 144.57941959616778)]",1.0,0.18400278687477112,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=f1fa43fc9c1c5bd46b9f5455701bf99b0e98d4579816d755389bcc2dbba1bcda, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 127.2107380542836)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,The chance to participate in the economic success of technological inventions,"[Document(id=8ca670761ac712ffb787aa7e4d7d937782d05faf36b89536ba8fc4b7b1dbea7d, content: ' In particular, Calvin rejected luxury. Therefore, craftsmen, industrialists, and other businessmen ...', meta: {'name': 'Protestantism', 'source_id': '4e459ea6668bbb8e35a3b3731021de782c29c314b694a30df606a1413f2e1325', 'page_number': 1}, score: 108.05624968004788)]",0.0,-0.07336610555648804,0.0,0.0,0.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=e8da082f5022b804801ddb2778f16c32226f0ecb59f3ed7f8fcdae17653f162c, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 184.832858928507)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,The Cathar Crusade,"[Document(id=08215551b881c7cefad5bb5baaaddc4d6f9a5802a709d61e32985651e2115c85, content: ' By the 11th century, more organised groups such as the Patarini, the Dulcinians, the Waldensians an...', meta: {'name': 'Heresy', 'source_id': '8ba473262f8508a9c99699bb09cd022993896ece1fed1463a9cf4bd7eb0c3e68', 'page_number': 1}, score: 159.6433571883927)]",1.0,1.0,1.0,1.0,1.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=2fca0ca848c224a8494e3eead28c715beb0ffe86531f3f21ea833479b2f3f978, content: ' Including Beyoncé and Jay-Z, sixteen artist stakeholders (such as Kanye West, Rihanna, Madonna, Chr...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 117.78614306193649)]",0.0,1.0,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,420 tonnes,"[Document(id=687916890077183f6cd89496d12592eec3d76bd82f46a80bfac0591dba1c0c1c, content: ' The largest mass of elemental copper discovered weighed 420 tonnes and was found in 1857 on the Kew...', meta: {'name': 'Copper', 'source_id': 'de5c60a665b93f9b358be412d53a37ca43d90005e3ec9a553ac38a38f076fadc', 'page_number': 1}, score: 132.34045561810535)]",0.0,-0.06792526692152023,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,"Sewer, rail, docklands, and electric installations were damaged.","[Document(id=c2a1dab12b313626459f38712aea7c56f2c359477289ae6301106d90c43faa1b, content: ' +In the north, substantial efforts were made against Newcastle-upon-Tyne and Sunderland, which were ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 157.01749548291156)]",0.0,0.01816275343298912,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=22e5680a1c41aa000971ff89fccd80f808c8f88ae7d0f5c0d81eb763ce026955, content: ' Unlike Archaeopteryx and the non-avialan feathered dinosaurs, who primarily ate meat, recent studie...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 200.4427546556809)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,the modern evolutionary synthesis,"[Document(id=54fb8e44f718e454d5f4aef7097036553aa91b829fc2ea08644a1f9fc17e50f5, content: ' +The theories developed in the 1930s and 1940s to integrate molecular genetics with Darwinian evolut...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 186.4803046658906)]",1.0,1.0,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,eight o'clock,"[Document(id=5dfd5078daafbc2ef3695636e6d70b42db2abc79ac389c20eac0417969714c11, content: ' Tesla would telephone his dinner order to the headwaiter, who also could be the only one to serve h...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 200.28502177347957)]",1.0,0.6441541910171509,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1444,"[Document(id=18dbb6f8a4452ee88a36944ed7b5a77c2233677f5c2c84c6d98113a4c1519651, content: ' After the conclusion of the war, France was again free to pursue its desire to reach the Rhine and ...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 180.92305663796213)]",1.0,1.0,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,censors,"[Document(id=3749ef6e14ea9af3893cfd84917e3a39acc2212480fc846e5a9231414a5599e6, content: ' Praetors administered civil law and commanded provincial armies. Every five years, two censors were...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 160.88829017583132)]",1.0,0.5763707756996155,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=80f5e2e80143434aa6d2d78a16f411f7f6ab4f8ffffbd29914ad0c75e8bf31da, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 120.63367823315784)]",1.0,0.9999999403953552,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Pope John XXIII,"[Document(id=2210efe78b716d2e44c94aae5208e52012507f25a18877960555a3c61a18e96f, content: ' Peter's Square: ""Dear children, returning home, you will find children: give your children a hug an...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 198.73923583731397)]",0.0,0.6684889197349548,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c983e8f9fccac478cd7d895c53ca7636f4291c3a87693dd68246b03e75a4be42, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 151.58417156835065)]",1.0,0.9777635335922241,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=f0adc94ea827dd57e6a4ed0151d80a56b2b699e6457dfd794787f7d9f9121082, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 148.5285249465643)]",1.0,0.14734730124473572,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Prime Minister Hussein Sirri Pasha,"[Document(id=25d98a87bc4019e4527fe3dbf903b4491eb5dbfb5600c6d91214b870a2dbf0f9, content: ' It was here that Nasser and his closest comrades, including Sadat and Amer, first discussed their d...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 193.0604166327542)]",1.0,0.6850929260253906,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,Officer's Club,"[Document(id=62e6ed804eee1f4f43c5f9b715fe2cd7a4d80fc795bab6b82fe1ec962f592d64, content: ' Nasser recalled that her wails ""haunted"" him and firmly dissuaded him from undertaking similar acti...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 134.38736074532318)]",0.0,0.01744076795876026,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=f6a12178d43defb2a103deca0f4fb166a5e99e5a8e58be48c76be81bd5d2bd38, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 122.22996323290984)]",1.0,0.4726710319519043,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Moncks Corner,"[Document(id=829bc238e3aa32b3a2d1d1087c36e846f8e99eed9690beff8a876f9cd3c3b0a2, content: ' North Charleston is the second-largest city in the Charleston-North Charleston-Summerville Metropol...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 160.82842405678284)]",1.0,0.18905964493751526,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1523,"[Document(id=af521f4462f7c624e3d32905b63bfb54303d37eecbb7fa381cb828b7ead3a42a, content: ' The Gallicans briefly achieved independence for the French church, on the principle that the religi...', meta: {'name': 'Huguenot', 'source_id': '8c6e1ef6132295dfe82bead80d85963191a790434a85ac498376747f1b1241c7', 'page_number': 1}, score: 136.6167675359813)]",0.0,0.4937823414802551,0.0,0.0,0.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,"teleforce"" weapon","[Document(id=60f1f69a282d73eabc8ac6d80df34e28a168bcd0259aa89f6195a0e4b5d2af7e, content: ' In early 1938, Tesla was able to get up. He at once resumed the pigeon-feeding walks on a much more...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 160.49288449971363)]",1.0,0.07724405825138092,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,Anti-Federalists,"[Document(id=28633a57f551f7befc042c5ec0a1465570662253672db0458a7f7f56f6428917, content: ' Constitution was being drafted, the Federalist Party supported a stronger central government, while...', meta: {'name': 'Federalism', 'source_id': '05eb4479f6579b86eef28e7954d58c2044997504c63cad297935c95307861772', 'page_number': 1}, score: 109.20383352308949)]",0.0,0.4092292785644531,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,The Eagle and Child and the Lamb and Flag,"[Document(id=a3dbac4bc0b16e2ff3bf9f3c4792a63262476d7373f810881dff5acffbedd8f3, content: ' +The Angel, Islington was formerly a coaching inn, the first on the route northwards out of London, ...', meta: {'name': 'Pub', 'source_id': '502d7ebebad9932fd886ae14992fa24a60ee4433f6dc693672b70b3b21ed1232', 'page_number': 1}, score: 97.75501653939075)]",0.0,0.04613007232546806,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,technology,"[Document(id=02932955f4b1491e6e0753a1f4c1655d6a498d453ea6bf1b48fe583af02a48db, content: ' The question concerns how time is allocated across a number of activities (such as time spent at ho...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 102.38581866089173)]",0.0,0.2870693504810333,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",Frederick moved the bulk of his eastern forces to Pomerania,"[Document(id=2e241d6a2d549d7fb7ef44a92718ad0b9c17de781fc1ddf4c4c4311b6193515f, content: ' Frederick sent urgent requests to Britain for more substantial assistance, as he was now without an...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 184.09512006010272)]",1.0,0.3530529737472534,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,"13 percent of the company's total intake by 1998. Three years later, Dell passed Compaq as the top provider of Intel-based servers, with 31","[Document(id=08a3d22e85cf463253832f09aece7cb6f9124094f57b475f1813e553c0fa4c0f, content: ' Consequently, Dell's enterprise revenues, almost nonexistent in 1994, accounted for 13 percent of t...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 157.4319576783758)]",1.0,0.19732491672039032,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,domus,"[Document(id=81f6510f9c3161c284ded9e3c69551eff2ab954b99d32b61663123ece4455df3, content: ' Some rites of the domus (household) were held in public places but were legally defined as privata ...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 150.9932333320122)]",1.0,0.1496075987815857,1.0,1.0,1.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=10406721e1bacc4b94568efb8105444ce33c23b507417efe2e032c93615c0677, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 205.01521643478702)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=694b7ad0ec30f16563c095bfd8aa73a286298746fa8b5b6b568961e806fe7462, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 200.7654419813894)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Beroea,"[Document(id=b0db6a298369be019a3d01ef5bfbe30dd534f6a2be471b538c76854bccadccaa, content: '."" (Pope Gregory I, would have full access to the Vatican Archives). +Constantius, ordered Liberius i...', meta: {'name': 'Athanasius_of_Alexandria', 'source_id': '2a8e7ec0a44075f1a567744982dcecf4bcea19f0065d4f73c742398a05fa1642', 'page_number': 1}, score: 154.05413994317473)]",0.0,0.1712186634540558,0.0,0.0,0.0 +In what decade was disco popular?,1970s,90's,"[Document(id=cf86cea5ed69b1cddee9e7bfd4d861b5f270fd7c89048980e1e45da52aa037d0, content: ' Techno and rave was happening in the 90's and had a lot of different forms. There was very experime...', meta: {'name': 'Madonna_(entertainer)', 'source_id': '0868d847bb768c30a7507239169d5e441bb3181b66c8b9353a863daa3d58e425', 'page_number': 1}, score: 179.8445269758427)]",0.0,0.5847175121307373,0.0,0.0,0.0 +What do the rivers form?,lakes,lakes,"[Document(id=d9c66874c7c0a9fe45438a723827763a95eacdbe604f0f1e81a1a311692bb3a4, content: ' The Rhine originates in a 30 square kilometre area in Switzerland and represents almost 60 percent ...', meta: {'name': 'Alps', 'source_id': 'd2eb3ad3af7d6110ae691bf40660ff7a79e4e9dbfd25785b4e55c7700bfcae79', 'page_number': 1}, score: 136.31986270459694)]",1.0,1.0,1.0,1.0,1.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,the English alphabet,"[Document(id=79f26e42dd7702142ed7d1e92a23812856512d31177acccc13a6b2634280e3f3, content: 'Originally based on the English alphabet, ASCII encodes 128 specified characters into seven-bit inte...', meta: {'name': 'ASCII', 'source_id': '4bd7e27c39431d5c4712c32c534ed40f7df89f7b80f6eccb22975178fb7b1be2', 'page_number': 1}, score: 154.38856966441136)]",0.0,0.025279667228460312,0.0,0.0,0.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,44,"[Document(id=b8e4c4f73e595a4bf934f4743c25ffdd012fa0b8bdf2c0ce7ffd0ad37403f5dd, content: ' +Madonna holds the record for the most number-ones on all combined Billboard charts, including twelv...', meta: {'name': 'Madonna_(entertainer)', 'source_id': '0868d847bb768c30a7507239169d5e441bb3181b66c8b9353a863daa3d58e425', 'page_number': 1}, score: 116.87147868544233)]",0.0,0.19012457132339478,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,30,"[Document(id=87a93f72cbb9c00ea6477b2a31d21c284a2ecf07733f93b0fb4ba7b277f5b8f5, content: ' It is within the power of both the union government and the state governments to govern schools sin...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 129.9025361296588)]",1.0,1.0,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=99743eb7e7277e11f539b5449857c5f012a842facd618f6a75ff178a89fc1f76, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 155.07687203117038)]",1.0,1.0,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,solo public appearance on a visit to the Grenadier Guards,"[Document(id=331c1fa6114188993824feeab60399c92e5d44233f4ea3a0e3a47f363d1ca459, content: ' The book describes Elizabeth's love of horses and dogs, her orderliness, and her attitude of respon...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 143.80750919536004)]",1.0,-0.02931702323257923,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=ff2ec1a3232f7d74c84b198defa77dd9879cdb65474a7dab2bf47da19675285e, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 177.00800205562217)]",1.0,0.9999999403953552,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,1740,"[Document(id=51a7cda8329feb5be4b828db4c3949d0f3972d8afc87046ed9763336b16b4114, content: ' +Frederick the Great, the king of Prussia from 1740 to 1786, saw himself as a leader of the Enlighte...', meta: {'name': 'Age_of_Enlightenment', 'source_id': 'd6935b64f79e30b7c3de4c952b643a55674c548cd0bb703870a094fd3e646d84', 'page_number': 1}, score: 145.02006011302487)]",0.0,0.25414443016052246,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,large quantities of data or a large number of users,"[Document(id=4753cc282d43999b382172589cceb076cee5e8185383911435f640a568593cbe, content: ' +Load testing is primarily concerned with testing that the system can continue to operate under a sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 154.96837900503925)]",1.0,0.8635382056236267,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=5f8dc7264853d3b0b88a03420e7878e16fa4c08ef70ab7cc5d722e889b21dda6, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 119.60475215544247)]",1.0,1.0,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=87ffdf42e04cd4e5aabc33e35caf9614ba61758e355a1d97a38a524ba0bedf46, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 259.32749430318006)]",1.0,0.9999999403953552,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1854,"[Document(id=ae931e2fa116a2f50e732849bda9db89ea2500925baddc8026e2033f9f7f334c, content: ':175–176 In July 1854, the Turks under Omar Pasha crossed the Danube into Wallachia and on 7 July 18...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 166.47104419579009)]",0.0,0.6653626561164856,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,Private and personal worship,"[Document(id=8cb202622013f5234d79ec3a4275501aaa91b4fdefe32dbbccfd335829f7ffa1, content: '[citation needed] +Roman religious beliefs date back to the founding of Rome, around 800 BC. However,...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 224.81247000090372)]",0.0,0.4898977279663086,0.0,0.0,0.0 +What is one example of failure testing?,Software fault injection,defects in dead code,"[Document(id=7475e2af3065cbe79f0e067059ae3ca86f6548819c59b2c8ff21a438fc2f6346, content: ' Not all defects will necessarily result in failures. For example, defects in dead code will never r...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 112.420145159293)]",1.0,0.20567262172698975,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,Goalkeepers,"[Document(id=dde1f3d5248e35f7ba793940bcbe48c24b20edc95ec4f306ad8006ccc3bb1c73, content: ' Competition rules may state a minimum number of players required to constitute a team, which is usu...', meta: {'name': 'Association_football', 'source_id': 'f2b7305b09ad8a43b8389a5eb2d84b8ea4b4deaa7968d21fe9da9da9ecda2c6b', 'page_number': 1}, score: 105.73158780608007)]",1.0,0.8274490833282471,1.0,1.0,1.0 +In which year was the new constitution ratified?,1855,1789,"[Document(id=622a6215e7b7fd56283c9c82ebb139d7b9ea3b341dd076ca022738fd1e235092, content: ' Following this ""Pyrrhic victory"", Cornwallis chose to move to the Virginia coastline to get reinfor...', meta: {'name': 'North_Carolina', 'source_id': '3282e3dedcf0f1923e66ebf5b79cf9b9deec27c8ca1df8908e03770b02b38126', 'page_number': 1}, score: 126.51363074019773)]",0.0,0.5541975498199463,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,General Belgrano,"[Document(id=d722ec11335f8302273d59fd37f3b36beb8750fe1d615e23090ff456c9626c6f, content: ' +One of the paper's best known front pages, published on 4 May 1982, commemorated the torpedoing of ...', meta: {'name': 'The_Sun_(United_Kingdom)', 'source_id': 'f3e43af95385a83926c5f55e334fdf85194f862cc4b324d6cfc50b85c0244bbd', 'page_number': 1}, score: 163.48297421082395)]",0.0,0.002335941419005394,0.0,0.0,0.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=1eb83a50e062eec7af6abab3de1bf0b5576a4c75ac168cc64f1b77a2e268c579, content: '7 km2) Yale-Myers Forest in Connecticut's Quiet Corner—and nature preserves including Horse Island. +...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 191.53811571133681)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,expressed sequence tags,"[Document(id=c181af6d9e4133e2db3dd3214c28363e87a8f250668a4fc4d9c06fd0060f35eb, content: ' Conversely, plants can have extremely large genomes, with rice containing >46,000 protein-coding ge...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 114.01989222941418)]",1.0,0.13049247860908508,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1980,"[Document(id=6fa0d2cdcb0d00898debed351e32ab41dc4678320f30c21f07a92e9289207601, content: ' Military spending increased, while other administrative budgets were cut back. Libya had long suppo...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 214.09821488402224)]",1.0,0.48459139466285706,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,monasticism,"[Document(id=0dd55f7c8a45d79ff127432f3d18e4c842a70b92bbe1cf8864e7cfbdffccbbca, content: ' +Nonetheless, within a few years of his death, Gregory of Nazianzus called him the ""Pillar of the Ch...', meta: {'name': 'Athanasius_of_Alexandria', 'source_id': '2a8e7ec0a44075f1a567744982dcecf4bcea19f0065d4f73c742398a05fa1642', 'page_number': 1}, score: 189.85400139648488)]",0.0,0.2760372757911682,0.0,0.0,0.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,39.1%),"[Document(id=88c1169eb8a5c301a3e8bb36466a4a18159a705a0296cfed8548baf03d852546, content: ' The academic profile of the enrolled class continues to rate among the top 10 to 15 in the nation f...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 129.95337496597662)]",1.0,0.8029516339302063,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,20 October,"[Document(id=7f6d0a9e9a3e49df55737ef78eeafa784e1bc879c484b5e1795fb994301c9ebc, content: ' Kurita's force had begun the battle with five battleships; when he returned to Japan, only Yamato w...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 174.6041177508166)]",1.0,0.03874627500772476,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,basketball,"[Document(id=12d9fa7ff9f514e66e336f50a70a372c3162770635ba732cc4335141fd65ee50, content: 'O.K, Maroussi) have won 3 Triple Crowns, 5 Saporta Cups, 2 Korać Cups and 1 FIBA Europe Champions Cu...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 183.37264468656736)]",1.0,0.03532267361879349,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,"(""Can you your pen not find because it far too dark","[Document(id=d10bc2aa429e55969934f976b711e2ed0e408629173680b12b85b5711713dc02, content: ' in het jaar des Heren (Anno Domini), where ""-en"" is actually the genitive ending of the weak noun. ...', meta: {'name': 'Dutch_language', 'source_id': '38c4da472a27f770c9c829b498d0e1e30e81320ca29e7d0546aebb638c81be6a', 'page_number': 1}, score: 109.78396328462887)]",0.0,-0.03480945900082588,0.0,0.0,0.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=158a6b8443733e42d24dec31180770da4724f3bd14ed49e47a937624f7aeed47, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 143.5323404197054)]",1.0,0.40640756487846375,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,quantum mechanical,"[Document(id=e248ff75cbbe44aa00589ce96d2d6a12e8311dee527524077686bbeb3cd47e37, content: ' However, the electromagnetic force attracts electrons and protons to one another, while planets and...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 118.46250890702927)]",1.0,0.09526881575584412,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,Children are being sold as conscripts,"[Document(id=0088f05f01a71535c2cb1e5b41db4930183fdb107aa5d9b1b2402b982ed6e8a4, content: '"" In July 2013, the Assistance Association for Political Prisoners indicated that there were approxi...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 154.09655586407015)]",1.0,0.30359911918640137,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=1bdd87144e8c05c0c8820e3bd6186deb63a4be13d5810e39af795993e4721bb7, content: ' The book was twice adapted for the cinema – the 1997 British film focuses on Arsenal's 1988–89 titl...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 87.2309971301853)]",1.0,1.0,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,February 2006,"[Document(id=7da3a14e7c1a2593c36359e18d0afbc482e5aba0c1ad5c3a8e024a2d8e6820ca, content: '8 billion annually. Melbourne has been attracting an increasing share of domestic and international ...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 190.52435017827867)]",1.0,0.34920746088027954,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:10.csv new file mode 100644 index 0000000..5969b76 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:10.csv @@ -0,0 +1,125 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=ac8e9c286327aea24d00bceb002b08ad3fbc3007673c77da9b48b6b6b91aa76f, content: ' Some rites of the domus (household) were held in public places but were legally defined as privata ...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 172.20880681428673)]",1.0,0.9999999403953552,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,March,"[Document(id=f57debdba01d2f54638ad2ea5d7f83ec82701e84c455e0dbe992aa67a497d5ed, content: ' The park has a multi-use amphitheatre that seats up to 2,500 people, authentic Japanese Garden, fen...', meta: {'name': 'Fresno,_California', 'source_id': 'a46a1dee5f6d188d5f5a510a3c694e88541550757193e312801d8b786a38b1fa', 'page_number': 1}, score: 76.33771101797709)]",0.0,0.04645652696490288,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Henderson Field,"[Document(id=ed37a013fee4250f08c6e4ac742a925bddf32a783e6e7ad6a4ce05ecb911000d, content: ' From July 1942, a few Australian reserve battalions, many of them very young and untrained, fought ...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 160.96193124977387)]",0.0,0.11904579401016235,0.0,0.0,0.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,Mercian,"[Document(id=87ce0c567b1b01caef21127feb46f37d5c70db487d739c7177f21781764a7cab, content: ' Norse was also widely spoken in the parts of England which fell under Danish law. +Some of the most ...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 109.5507591657911)]",1.0,0.0658683329820633,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,"low-church"" traditions of Protestantism","[Document(id=70acdc5f12009efe2ebd2dd786809c97eacf0dc441dc5d0ed86857bd7db13270, content: ' Also, with the advent of the internet, many teachers are now selling their lesson plans to other te...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 111.66204991140583)]",1.0,0.12859657406806946,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,"Panathinaikos, Olympiacos, Aris Thessaloniki, AEK Athens, P.A.O.K, Maroussi","[Document(id=76be05dc0de2dfbcf6d3d16e80abbfa4ee57f55d30696681b941ecd6ff700822, content: 'O.K. Greek basketball teams are the most successful in European basketball the last 25 years, having...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 152.09265079516422)]",1.0,0.3749234676361084,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,13,"[Document(id=63e9ac4931bb015305e1fa406d4e54b92a6935adf471aeb03ebad59ce92e5aeb, content: ' The Hollywood round was moved to the Kodak Theatre for 2009 and was also extended to two weeks. Ido...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 89.69065984438612)]",1.0,0.3285146951675415,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=1c8a379dc092fa8d374927640e6d16f46286416581451c66a1da80fef1412acc, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 129.0274963398839)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=1eed45d4c946ec058384afdd313db9b847d2b384629f1872466e03c7bc13a2e5, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 173.3412040228327)]",1.0,1.0,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,two urban districts,"[Document(id=b99c33e3f1152b05cb402034099d36d6b8fc3942c6deb97d0de9d7f47d56c738, content: ' The equivalent of the Minister-President is the Senatspräsident (President of the Senate) in Bremen...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 166.68434734067063)]",1.0,0.12825782597064972,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,"Lipopolysaccharides, also called endotoxins, are composed of polysaccharides and lipid A that is responsible for much of the toxicity of gram-negative bacteria. Most bacteria have the gram-negative cell wall, and only the Firmicutes and Actinobacteria have the alternative gram-positive arrangement. These two groups were previously known as the low G+C and high G+C Gram-positive bacteria, respectively. These differences in structure can produce differences in antibiotic susceptibility; for instance, vancomycin can kill only gram-positive bacteria and is ineffective against gram-negative pathogens, such as Haemophilus influenzae or Pseudomonas aeruginosa. If the bacterial cell wall is entirely removed, it is called a protoplast, whereas if it is partially removed, it is called a spheroplast. β-Lactam antibiotics, such as penicillin, inhibit the formation of peptidoglycan cross-links in the bacterial cell wall. The enzyme lysozyme, found in human tears, also digests the cell wall of bacteria and is the body's main defense against eye infections. +Acid-fast bacteria, such as Mycobacteria, are resistant to decolorization by acids during staining procedures","[Document(id=a93c46780eb38a9b8195740a871eddb9a85c482bf7d89e72d9247c60999aed47, content: ' Lipopolysaccharides, also called endotoxins, are composed of polysaccharides and lipid A that is re...', meta: {'name': 'Bacteria', 'source_id': 'f8e9d365bfe51780e8f42f3a07cb69dd7a6df46bc7c67030d0853bdb13eeb390', 'page_number': 1}, score: 67.86823728702234)]",0.0,0.16699571907520294,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,state courts,"[Document(id=147b37810aaaff7417c371f4082c3ac45e8f46c0560ff29ec5e3f8d90656c5bb, content: ' The purpose of federal habeas corpus is to ensure that state courts, through the process of direct ...', meta: {'name': 'Capital_punishment_in_the_United_States', 'source_id': '0a4e4dda32f7be6a4f78da5ed9ee21d2cc7659112c0d087b2923a482bdc77b5b', 'page_number': 1}, score: 96.73063096569572)]",1.0,0.3909306526184082,1.0,1.0,1.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,rank amateur,"[Document(id=a022efa8ed739794e05008dbe44d9b83a87fc4f4b7a609ec096b9d437bc5089e, content: ' For Whitehead, education should be the exact opposite of the multidisciplinary, value-free school m...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 210.30818292133372)]",1.0,1.0,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,treason,"[Document(id=2954716cf85dfc3cbc22e1e1bbc015e2ddc7abd27427ced8fbf5d44180b2afe2, content: '.."" The Deuteronomic social vision may have influenced opponents of the divine right of kings, inclu...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 75.94772194895006)]",0.0,0.054149776697158813,0.0,0.0,0.0 +Which region of the US experiences rain year round?,Pacific Northwest,Southwest,"[Document(id=ef3b24124d6963ab10e31070a01eb903bae0b374f75f9a00a0da61890b7d8c80, content: ', tornadoes are more common than anywhere else on Earth and touch down most commonly in the spring a...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 182.51279558274814)]",1.0,0.6455833315849304,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Antigen specificity,"[Document(id=a4a5008fab761323a2771c50538a7e75c9cf4ed6426bbef3a6ae08b7edd123e4, content: ' The adaptive immune response is antigen-specific and requires the recognition of specific ""non-self...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 159.91248406200884)]",1.0,0.2857634425163269,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=bde5f6c2b4913151f9e46d6578f4f20345ade116f2e96b5ae76ef509e1e517dd, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 164.45063393002806)]",1.0,1.0,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,3.36,"[Document(id=8190e2f3cfba3c67fc319c1eba30ec986d8174be2decff7ed0bb12a364fb52bb, content: ' +The College's endowment is sub-divided into three distinct portfolios: (i) Unitised Scheme – a unit...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 140.20154888024658)]",1.0,-0.009853528812527657,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1955,"[Document(id=0d88cb30320f0b038a71e1d9aa43a891187ded88e8298726341208761ff3f32a, content: ' Congress agreed to his request in 1955 for the Formosa Resolution, which obliged the U.S. to milita...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 185.40736255863564)]",1.0,0.6117545962333679,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=891cab83df456a1bb11d8e7c301e45b02e564d224d3be1f7bdb82d02edc8f221, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 156.9284298092429)]",1.0,0.7246970534324646,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,higher GDP growth,"[Document(id=d808adb7f33c5872d1362d8ebeedf89fc5b01f9c770c93ea4364c373e0fed521, content: ' In contrast, an increase in the income share of the bottom 20 percent (the poor) is associated with...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 87.37153986450713)]",0.0,0.09903770685195923,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=56ff4bdb4f543c61c1f516ba1e544e83e117ab5512bae58b2264fa6b6d86f26b, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 173.34560648176648)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=5c5f6a0591170904a56f4c0417e667089761d8d8f013f55283aff164fdadd75b, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 116.19394976028495)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,Veneti,"[Document(id=4236cbbf71506fe35f0da033ea9b465edf3cfbfdd8e6bac2b259b18835cc0408, content: ' +The relationship between the Slavs and a tribe called the Veneti east of the River Vistula in the R...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 210.39712772144958)]",1.0,0.36141833662986755,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.","measures sexual orientation on a continuum, considers various dimensions of sexual orientation, and considers homosexuality and heterosexuality separately","[Document(id=04d7cfe710801483b2e5c7c67b89cf9966fc92203783ea424fccf90e1b51520c, content: ' The classification scheme works such that individuals in the same category show the same balance be...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 79.03556725755448)]",1.0,0.4782727062702179,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,literate and upper class,"[Document(id=149c6390292392d4438c9191dca43bbe827c20a86bb7bfc502679813d7fcc108, content: ' Due to Eastern Lombard's status as a Gallo-Italic language, an Eastern Lombard speaker may, in fact...', meta: {'name': 'Dialect', 'source_id': '2e05b686e48c9512ae5466f1d15202b44fdc903ea32d0a42c5dbc9ef173d051d', 'page_number': 1}, score: 177.45530984259983)]",1.0,0.8130882978439331,1.0,1.0,1.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,to explode in the air,"[Document(id=4e21aed2e6d5a2193f1991660032d4445fa58d39e44d263ec7754bf0c98869ec, content: ' These batteries, particularly with guns, usually deploy in a small area, although batteries may be ...', meta: {'name': 'Anti-aircraft_warfare', 'source_id': 'c54639424f62ce369356ed8c4e1e9fab8769359dd28029aa89832f5aa994f45d', 'page_number': 1}, score: 81.70938919778587)]",1.0,0.06160792335867882,1.0,1.0,1.0 +What is that power that enables us to love?,Prevenient grace,Long-term relationships allow adolescents to gain the skills necessary for high-quality relationships later in life and develop feelings of self-worth,"[Document(id=9df0e87f01c1b2c8ac7969d77fe5940944b4885a75655d3b107fbb3e960b582b, content: '0 for males and 17.3 for females. The typical duration of relationships increases throughout the tee...', meta: {'name': 'Adolescence', 'source_id': '4687b887b5783807c17e352eca017193687de7d3428929359d466eaed34ebbe4', 'page_number': 1}, score: 99.769775039754)]",0.0,0.00461157038807869,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,He Jin,"[Document(id=1023d346fa09f68f024520eed11ceb8d146eebd929e4d93e19ec61ae32461e57, content: ' 189 AD), half-brother to Empress He (d. 189 AD), plotted with Yuan Shao (d. 202 AD) to overthrow th...', meta: {'name': 'Han_dynasty', 'source_id': '060d95b91f9c20e5c8cd8b8a032ab44aac38ab88ce62d3c302bea0865f533568', 'page_number': 1}, score: 149.4057899599862)]",1.0,1.0,1.0,1.0,1.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,introduced sharia,"[Document(id=e2a95241acaa96c9b3665ffd83614f229b121a721d01ae7e3a778b2ce1d5050d, content: ' Intent on pushing Libya towards ""Islamic socialism"", he introduced sharia as the basis for the lega...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 208.30154430383593)]",1.0,0.6521134972572327,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",Peter Townsend,"[Document(id=4b69cd784f30edcd0bcf343e88529300a3f5f46cab6549e05e32a9f6208375df, content: ' +Elizabeth and Philip were married on 20 November 1947 at Westminster Abbey. They received 2500 wedd...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 210.18174733409978)]",1.0,0.19164665043354034,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,sea communications,"[Document(id=2982b60b277d226d31c0a9fe90582fab11b0c6ff0f2e5a54d4e342474fbc3795, content: ' +In 1941, the Luftwaffe shifted strategy again. Erich Raeder—commander-in-chief of the Kriegsmarine—...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 149.98474112003294)]",0.0,-0.1473047137260437,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,10%,"[Document(id=e834bcf1ef63c7b72f9bf36d447e483da5003c1c77f033ef064e514b7082f587, content: ' To the south, N. Highland Avenue leads to the Barraza-Aviation Parkway bicycle path. +South Tucson i...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 197.57082115721136)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=67c87173e747d72d79bc92866e553902d56583aae14965ecaa3b2da507256976, content: ' After the turmoil of the English Civil War, the Royal Society wanted to show that science did not t...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 161.88306078991866)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,Database transactions,"[Document(id=5baf5b500fc870a57b1f1a3f45f13e9b554e1d91359189ce3677cff042e60b2d, content: ' Data security prevents unauthorized users from viewing or updating the database. Using passwords, u...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 81.34420688916279)]",1.0,0.4896238446235657,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=cbebf9a70685ea0e72bcf8c9f1dbbe9b7163ee26279b633446edf34314600fb3, content: ' Janssens demonstrated that Avicenna was a Sunni Hanafi. However, the 14th cenutry Shia faqih Nurull...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 181.94319586501797)]",1.0,1.0,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=f08f17188f751088b2b76f2c1a69ffc86f340979f05ea84b4b201def163ec6f3, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 145.42691498267624)]",1.0,0.08274449408054352,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,the scholarly and diplomatic lingua franca of Western Europe,"[Document(id=d806131552e88e82e2690be85e2fbf31773e47614f5d7a8efdc4ab729427ca2b, content: 'Old English (Ænglisc, Anglisc, Englisc) or Anglo-Saxon is the earliest historical form of the Englis...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 161.545426839566)]",1.0,-0.028287040069699287,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=c54ac19342651f97c105c4825d0b82cbf05684cf1d7c4921e0d078a7212b4a21, content: ' A common type of embezzlement is that of personal use of entrusted government resources; for exampl...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 113.39244671567084)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,Clean Edge,"[Document(id=639620812442614c676be21cf936d052650009771bc4d08df98e8056e4441e38, content: ' +New government spending, regulation, and policies helped the industry weather the 2009 economic cri...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 114.57287042595362)]",1.0,0.03996779024600983,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=de63707a025645c7fcc7a26e6dd37ddd29a27cd60ff187ce487e0d07217708ee, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 184.29502006117178)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,the German Reformation,"[Document(id=97cae4559ff35001a636e6d751c2be9dad6330f78280d65b945d9ea8427e813b, content: ' In spite of influential supporters among the English aristocracy, such as John of Gaunt, the moveme...', meta: {'name': 'Late_Middle_Ages', 'source_id': 'fb62f16f8c6599f7200add017bece6e2ea916520e544af1dd94713410c254ce9', 'page_number': 1}, score: 133.83751571547543)]",0.0,0.24226750433444977,0.0,0.0,0.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=f0c6a5cd5408a0fc01c220e5910c03a2241fbe85099410f518592c1fb7098441, content: ' ""Creating a partnership with Beyoncé, one of the most hard-working and talented people in the world...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 122.76790646829441)]",0.0,1.0000001192092896,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,"15,000 t","[Document(id=7350dde4c1db287bcdc1678d351d984bcefbfbb76ea61772d6ec37583745c8f4, content: ' Julius Caesar had his own coins made from brass, while Octavianus Augustus Caesar's coins were made...', meta: {'name': 'Copper', 'source_id': 'de5c60a665b93f9b358be412d53a37ca43d90005e3ec9a553ac38a38f076fadc', 'page_number': 1}, score: 127.28424664644346)]",0.0,0.1019832119345665,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,"1,436 people were killed and 1,792 seriously injured, which affected morale badly.","[Document(id=dc733dafa156c8cd33c8bfe0b3a7341d74398730f0f97dd80f1c0b07d6a07ef7, content: ' +In the north, substantial efforts were made against Newcastle-upon-Tyne and Sunderland, which were ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 160.31924929990572)]",0.0,0.06491856276988983,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=0815d5b0f30156ea39d24a8a15062d5e21822317a105f659f2000892f8d657c8, content: ' +The consensus view in contemporary paleontology is that the flying theropods, or avialans, are the ...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 161.75850646724686)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,modern evolutionary synthesis,"[Document(id=83ca9ab1c162586b4348657558983681b0a67dd6b03723f61266a7970c6bcbfd, content: ' The debate over the book contributed to the campaign by T. H. Huxley and his fellow members of the ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 167.0453688273251)]",0.0,0.9619842767715454,0.0,0.0,0.0 +At what time did Tesla get dinner?,8:10 p.m,8:10 p.m,"[Document(id=2ed21a855190ee9369cd4351878edc528d570a2136e3a2612158d6108a59491e, content: ' until 6:00 p.m. or later, with dinner from exactly 8:10 p.m., at Delmonico's restaurant and later t...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 230.03812342004315)]",1.0,1.0000001192092896,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1444,"[Document(id=9dbf1201bfba9c8190d82357ee4f2f5a482d6d6711313439dac94a993edf3757, content: ' +Holy Roman Empire central power had begun to decline following years of imperial adventures in Ital...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 161.7635145905844)]",1.0,0.9999999403953552,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,augurs,"[Document(id=a5e5aea2af0395511da534b5312305295571fb6c4dadbbf94b18f8c15af3add7, content: 'The priesthoods of public religion were held by members of the elite classes. There was no principle...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 160.02474930212503)]",0.0,0.12105397880077362,0.0,0.0,0.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=d16248de2a5e0d9e6bbb7e85b411bf0a4143206e20471ecab73dc7158ce992b1, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 123.16570921986224)]",1.0,0.9999999403953552,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,"Pius XI. He was sent a letter by Cardinal Eugenio Pacelli, and Roncalli later recalled that it was probably the last letter Pacelli sent until his election as Pope Pius XII","[Document(id=f60e9480f17445bd20fd691b4f23689feab6068d6b9b8032e2bc85a59563b9cf, content: ' Unaffected, he wrote to his sisters Ancilla and Maria and told them both that he was fine. +On 30 No...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 206.54313808498497)]",0.0,0.5919901132583618,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c3327e32d8658307f8dca684bbdeb22895ca7a8d7ee8ad6ff0eba6200c5e6da2, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 140.52734237020536)]",1.0,0.9777635335922241,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=3f0396346c47d3ea53931dd05016eb360d6261c1b5ab0c7a977a5bbf0df1be01, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 155.2885699000689)]",1.0,0.14734730124473572,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Prime Minister Hussein Sirri Pasha,"[Document(id=2742e06ecc0f89cd4249a010992ea21b1098edf0e33e8bcef6fb1a43012b8d68, content: ' It was here that Nasser and his closest comrades, including Sadat and Amer, first discussed their d...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 193.56057646704534)]",1.0,0.6850929260253906,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,Army Chief of Staff,"[Document(id=6efe91225999db5446906baa4a05c76ae7b2289802a7557fec46193f813b94ba, content: ' He then was posted as chief military aide to General MacArthur, Army Chief of Staff. In 1932, he pa...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 123.8434560431782)]",0.0,-0.0072867972776293755,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=754901c14e07fd836481f4df6a9e1c790f259897d11a92d8f4449320bf432be1, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 123.02371619032975)]",1.0,0.47267118096351624,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,Charleston–North Charleston–Summerville Metropolitan Statistical Area,"[Document(id=620f512c3df1eb1788fcecbd5ec4c0438953e005020fab4303e121d55369ade5, content: 'Charleston is the oldest and second-largest city in the U.S. state of South Carolina, the county sea...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 162.28901843144436)]",1.0,0.20230621099472046,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,1850,"[Document(id=1e3e69a84cc78a6aeb6d7a1e0750f22f9b9f8a20e99eb5fb853b1aee5e4a5464, content: ' In the year 2000, there were 5,171 Dominican friars in solemn vows, 917 student brothers, and 237 n...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 114.08279937842687)]",0.0,0.7635257244110107,0.0,0.0,0.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,sell his idea to the U.S. military,"[Document(id=1ee394bf7283ad8cd4a5e372c57f28d714e37b587517d82c10d82981009b9740, content: ' Tesla tried to sell his idea to the U.S. military as a type of radio-controlled torpedo, but they s...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 155.00802413090742)]",1.0,0.2841935157775879,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,British Union of Fascists,"[Document(id=244034de9b004674160b84dd73a587fddc8ae2c1bcd4dd6d67d8854cb936c8c2, content: ' The Government, up until November 1940, was opposed to the centralised organisation of shelter. Hom...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 111.67806894929063)]",0.0,0.3484962582588196,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,The Eagle and Child and the Lamb and Flag,"[Document(id=463e7639a75455c71923ca7ab663fc14689389b615c5d0a79e6d3f4f8a3efa67, content: ' +The Angel, Islington was formerly a coaching inn, the first on the route northwards out of London, ...', meta: {'name': 'Pub', 'source_id': '502d7ebebad9932fd886ae14992fa24a60ee4433f6dc693672b70b3b21ed1232', 'page_number': 1}, score: 94.35735602218969)]",0.0,0.04613007232546806,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,memory,"[Document(id=e3b88b4f578ec5a002692c5a052496d37d6ee090a0491c5dc17087f489872445, content: ' When negative numbers are required, they are usually stored in two's complement notation. Other arr...', meta: {'name': 'Computer', 'source_id': 'a39560aa5caca299e2e8876ee9b291a51b896b645abcc4d5f2a790a0fa89fcef', 'page_number': 1}, score: 110.41973994552734)]",0.0,0.30840396881103516,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war","Britain also dispatched 9,000 troops to reinforce Ferdinand's Hanoverian army","[Document(id=60dd852bd4811252974d9b4fd57e9056d66a8aa3d37e5b165e9ecfbabcd5ff4e, content: ' Hadik, however, left the city with his Hussars and safely reached the Austrian lines. Subsequently,...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 169.66570089574043)]",1.0,0.2595359981060028,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=0a1d789e1e3ae04146d38c10ae30c76bb73173ec2df854d0b0695309c7e8629a, content: '” +On April 23, 2008, Dell announced the closure of one of its biggest Canadian call-centers in Kanat...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 154.56627095248245)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,Christian Unity Secretariat,"[Document(id=18ed06b8889609136231a06d99465ac7da3164b444e51e58cace4a1e98ddebcb, content: ' He was the last pope to date to be crowned; his successor Pope John Paul I replaced the Papal Coron...', meta: {'name': 'Pope_Paul_VI', 'source_id': 'fc82a40c990c0b3840b06b32643858d39984b66d78ae950561d809dc84d6edf8', 'page_number': 1}, score: 152.845765627712)]",0.0,0.2545766830444336,0.0,0.0,0.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=0b3a599a54fac34c82f29d858c51bbf5ce4f37032b3539a76c2b8a6b918abc6e, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 231.42251702622502)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=45ad6989b7801812895d43ea694bad169af9b5dd9024dd96b099fe071a196217, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 208.54315140237432)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Numidia,"[Document(id=5bece70140dc93b2d211014e46f2a8467378fb5b3661178e320b6fd9f30f0fd8, content: ' Eventually, Jugurtha renewed his offensive, leading to a long and inconclusive war with Rome. He al...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 140.69038882852715)]",0.0,-0.05100712925195694,0.0,0.0,0.0 +In what decade was disco popular?,1970s,1980s,"[Document(id=40c02ba6acd7df36a81130944465adc587a46eaf90fc8f3232939968d3e04b8b, content: ' +In the late 1980s, many local Chicago house music artists suddenly found themselves presented with ...', meta: {'name': 'House_music', 'source_id': 'c556f4a459e8a99b6d2065bab8fd6a918d0a00f13291aad2473922492e4abf2e', 'page_number': 1}, score: 154.38767033374768)]",0.0,0.7754138708114624,0.0,0.0,0.0 +What do the rivers form?,lakes,lakes,"[Document(id=c751375299aac2090fe706008704ed47f7358e09ebad2acbc0d54e2c5b0ab5d3, content: ' Cities such as Milan depend on 80 percent of water from Alpine runoff. Water from the rivers is use...', meta: {'name': 'Alps', 'source_id': 'd2eb3ad3af7d6110ae691bf40660ff7a79e4e9dbfd25785b4e55c7700bfcae79', 'page_number': 1}, score: 122.65050850059238)]",1.0,1.0,1.0,1.0,1.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,ISO-8859-1,"[Document(id=1ca3328892ba24004a75d8cfaab0f58ce32bed8ee034be6fa54e33e8b0a62eb5, content: ' UTF-16 extends UCS-2, using one 16-bit unit for the characters that were representable in UCS-2 and...', meta: {'name': 'Unicode', 'source_id': '5466bae2d66ecdb546af3cc34795de9f594465bd49e997a8c32cca0dc3b5d1e2', 'page_number': 1}, score: 160.02748578769464)]",1.0,0.9999998807907104,1.0,1.0,1.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,sixth,"[Document(id=5567145967b8e3e9c915e0f901144470e7375757e88c1cee87ef724f63eeeb55, content: ' The album's first single, ""Frozen"", became Madonna's first single to debut at number one in the UK,...', meta: {'name': 'Madonna_(entertainer)', 'source_id': '0868d847bb768c30a7507239169d5e441bb3181b66c8b9353a863daa3d58e425', 'page_number': 1}, score: 104.24713931541928)]",0.0,0.5111092329025269,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,"41%), Sirmaur district 530,164 (7.","[Document(id=2b2a0a32a1973d54640e0e315e5a99e9e5578f50f63cd9128d7e9c1d682765f5, content: '41%), Sirmaur district 530,164 (7.73%), Una district 521,057 (7.60%), Chamba district 518,844 (7.57%...', meta: {'name': 'Himachal_Pradesh', 'source_id': 'ddcec2b95c05dd3a27f226df5c7c3597d80c7deaf9af8238acb6fcd9e9d4f233', 'page_number': 1}, score: 103.45917920437779)]",0.0,0.18524779379367828,0.0,0.0,0.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=23b161a9804a8a11c97c4f3f14b6762b49fdf3b1fefb8ff1de76b00cfa00995a, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 159.1381537658833)]",1.0,1.0000001192092896,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,"a touch of humour, gentleness and understanding"".","[Document(id=c7239e48dc02a900f2897ae3d8f229d324938f75dbe1dc6902493605c7a27f72, content: ' Patriation removed the role of the British parliament from the Canadian constitution, but the monar...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 134.60844833490034)]",1.0,-0.08331970870494843,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=c37151361f1ebbd218e8e53e243abfe86d24fabddd8be6f3401cf56e9e98633b, content: ' Its name is said to have come in 1814 from the Russian soldiers who occupied the city; ""bistro"" mea...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 154.93299659418653)]",1.0,0.9999998807907104,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,1155,"[Document(id=c7bdd00168040f6dde256d6ef2194d3f83ef5509c789f5d39e9215b8ddef1069, content: ' His successors continued to struggle against the papacy as well as the German nobility. A period of...', meta: {'name': 'Middle_Ages', 'source_id': 'f13b345e20c54b6cc55363080d7492474df62715717172aa9f1029f286010ad9', 'page_number': 1}, score: 135.05448547534107)]",1.0,0.4921835660934448,1.0,1.0,1.0 +What two methods can be used when using Load Testing?,data or a large number of users,load or endurance,"[Document(id=77ce7cf14e807c8808892e96984a50baad4f22bdca6b22034b2fb170f96240a2, content: ' Stability testing (often referred to as load or endurance testing) checks to see if the software ca...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 108.62422873080027)]",1.0,0.14578144252300262,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=bb0e180e009c490f3ae30fb55a9a91cbe512409c2e12ebae0203298a6f3efe78, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 117.52238522381246)]",1.0,1.0,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=cdfa1e6bea8fd42b00ce80468ba471689216a6195425c2435fb1141b0b8c7e6f, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 219.74746796807392)]",1.0,0.9999999403953552,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1854,"[Document(id=dce7d3533b06f69f181c3de13124bc37d0935b0fc7460eda6680d9f25382da40, content: ':130–43 +In the spring of 1854 the Russians again advanced, crossing the Danube River into the Turkis...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 170.65248009278505)]",0.0,0.6653627157211304,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,a part of daily life,"[Document(id=1a8c1e09e777c4e183dceed861053866bfd85f790fb0c2cd9b9aad59ad7dc5b2, content: ' Even the most skeptical among Rome's intellectual elite such as Cicero, who was an augur, saw relig...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 216.20605156380242)]",1.0,0.2284637987613678,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,Software fault injection,"[Document(id=76ad3fb3d257a3812a169d749a5fb13f2e05383764e8f9dfc53895f08747b375, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 133.72966963796148)]",1.0,0.9999998807907104,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,25-man squad of players aged over 21,"[Document(id=729a1a7b3183917b1dc01acb07f1b632efc33f4eea6aeffbe484b1530c2d94f0, content: ' By 2009, under 40% of the players in the Premier League were English. +In response to concerns that ...', meta: {'name': 'Premier_League', 'source_id': '369baa9a5a854704f16fadd35144d3a3150cf1e9ae1671d9b50a3448547b814b', 'page_number': 1}, score: 105.43494496687926)]",0.0,0.2028268426656723,0.0,0.0,0.0 +In which year was the new constitution ratified?,1855,1582,"[Document(id=a638f2f35fdc3348abd72c12123d6405e680f89ab2ba20a06a276d5bec5d7185, content: ' In 1577, a Compendium was sent to expert mathematicians outside the reform commission for comments....', meta: {'name': 'Gregorian_calendar', 'source_id': 'ca744d28337ec72fbc3c2a58e32b2af45254132ca9704f144c8014d187288e9a', 'page_number': 1}, score: 119.17205306382988)]",0.0,0.5848165154457092,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,"Russian Bomarsund fortress on Åland Islands. In the same month, the Western Allied Baltic Fleet tried to destroy heavily defended Russian dockyards at Sveaborg outside Helsinki. More than 1000 enemy guns tested the strength of the fortress for two days. Despite the shelling, the sailors of the 120-gun ship Rossiya","[Document(id=3c65091ec8b7ef0626d34520c5e99c8f6539d594394209264472f89890ca987c, content: ' These battles are known in Finland as the Åland war. +In August 1855 a Franco-British naval force ca...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 167.4531894262477)]",0.0,0.11404582113027573,0.0,0.0,0.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=92daacd08f6215b2b6c9a23e96940ae9ffa3af8f6a0300108c6bfcf840702db9, content: ' In western New Haven, the university holds 500 acres (2.0 km2) of athletic facilities, including th...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 185.11339063937942)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,expressed sequence tags,"[Document(id=f61b4e12858ae84a04762a59c6bfaccab2588f02781fe27a55cb8323cef086ed, content: ' This mechanism is a common source of new genes in prokaryotes, sometimes thought to contribute more...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 104.78775277873254)]",1.0,0.13049247860908508,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1989,"[Document(id=097cc790917875463dd3428a59035e34aab1b101e458283bb98ccd2fef6bfe59, content: ' The period also saw a growth in domestic Islamist opposition, formulated into groups like the Musli...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 199.0090476175897)]",1.0,0.5311593413352966,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,Jupiter,"[Document(id=a8cc5557b921c5766b219ced4b8319da3ea79db597efbb98b7cbe6998afe6656, content: ' Ambrose, the influential Bishop of Milan and future saint, wrote urging the rejection of Symmachus'...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 185.2449344711922)]",0.0,0.0053202565759420395,0.0,0.0,0.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,21–24%,"[Document(id=4a2399f100a536a4baf71c5dedf166c45f09b1b237a505e9a84faf1da7fd8a1b, content: ' +In 2014 the Notre Dame student body consisted of 12,179 students, with 8,448 undergraduates, 2,138 ...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 172.25504829596989)]",1.0,0.9999998807907104,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,25 October,"[Document(id=f338e2189fa194b23ca50a67ebd0647e84f119f03f303e8a80c0ee355a155d72, content: ' commanders ignored reports that Kurita had turned back towards San Bernardino Strait. They had take...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 160.27172691772705)]",1.0,0.02242216095328331,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=4b9cc1d29ed0f9a4cb783c7febb93802073dee536efd60ee53a2c3e5bec9cccf, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 154.4942077869361)]",1.0,1.0,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,/p pʰ b,"[Document(id=f80807120f788ee71503c071a59466d44904fb238af267935e4ae418c2b5a1da, content: ' Punjabi has lost breathy-voiced consonants, which resulted in a tone system, and therefore has a di...', meta: {'name': 'Aspirated_consonant', 'source_id': 'ded60872a72621c0cc2ee33d9a7b0d60f1474075e260da958b752dbe72e00321', 'page_number': 1}, score: 121.52265386631508)]",0.0,0.07327696681022644,0.0,0.0,0.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=7308906ca1725346de1f8669d04745bd0719f93959d2b77a3e2eedd2aea218ba, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 151.26005977413936)]",1.0,0.40640756487846375,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,in the interstellar medium,"[Document(id=628fa2cca8f8b08b31e468b195f5cd5c6ad4092446b69382591236851e2cc2ff, content: ' +Hydrogen, as atomic H, is the most abundant chemical element in the universe, making up 75% of norm...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 114.30356430255492)]",1.0,0.09371746331453323,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements,"[Document(id=ff11a1c6102e2b293943cbcfb8c2935799487df6f465dea412c00844c1c7c655, content: '"" In July 2013, the Assistance Association for Political Prisoners indicated that there were approxi...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 166.35832930030904)]",1.0,0.997592568397522,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=ac63b8357f80e413cededa7c5a37849ede32b9c8d7d3181d6e287572836f5f86, content: ' They formed the backdrop to one of the earliest football-related films, The Arsenal Stadium Mystery...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 72.26640254792218)]",1.0,1.0,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,February 2006,"[Document(id=3513fcc6c75aee92ba5e4d20e40d134b9159f33067469bcc48634091ed74a9ed, content: '8 billion annually. Melbourne has been attracting an increasing share of domestic and international ...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 185.98741924757314)]",1.0,0.34920746088027954,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:15.csv new file mode 100644 index 0000000..0fffa2d --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:15.csv @@ -0,0 +1,122 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=3c9b5484de418625b08e3d8f4e3c3398f40f2f5af0bbf48528ee46ea38c64e7c, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 176.94739092146085)]",1.0,0.9999998807907104,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,5 July 2005,"[Document(id=2bb0143cbe0013e9a1f2c007273d15715e8d6daa8bd81bb7fd41acf67926990b, content: ' After the success of the British Galleries, opened in 2001, it was decided to embark on a major red...', meta: {'name': 'Victoria_and_Albert_Museum', 'source_id': '202e0997b99bb8f0aaa6dd9c09c05e008cf8809c92549b6c9f073b5d26d1a3dd', 'page_number': 1}, score: 79.8326457180098)]",0.0,0.46504393219947815,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Coventry,"[Document(id=93c78551f27ece2b380d8a30b1c8852063e2afe373bf66ac0ff8fc705d37f96c, content: ' On 19 November 1940 the famous RAF night fighter ace John Cunningham shot down a Ju 88 bomber using...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 170.3618732501316)]",1.0,0.1549026221036911,1.0,1.0,1.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,-de,"[Document(id=1d129059a6cc053af03a870ad5fe1671146b32a0cf0373364427a2d022b5d3f0, content: ' Old English also sometimes uses compound constructions to express other verbal aspects, the future ...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 99.03555301837389)]",1.0,0.4963504672050476,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,Hinduism,"[Document(id=b43e607046d3a9f2878f76721d6d75cb6c46d2bdb38f6b3fdc5f1b9a2f23a163, content: ' +In The Church of Jesus Christ of Latter-day Saints (LDS Church), the teacher is an office in the Aa...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 141.90348317285938)]",1.0,0.10249563306570053,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=4af194e6a549f3cca324fac69732006a675ddeb8e841c964604dff19122e4f16, content: 'O.K, Maroussi) have won 3 Triple Crowns, 5 Saporta Cups, 2 Korać Cups and 1 FIBA Europe Champions Cu...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 155.4234665974737)]",1.0,1.0,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,four,"[Document(id=64448bd26ee757122a322e3c87d03eaf23b7b9bfaa5b939f12b70c7a629dd6d7, content: ' This season's judging panel consisted of Randy Jackson, along with Mariah Carey, Keith Urban and Ni...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 99.72177642799429)]",1.0,1.0,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=2296ed852027297f8181dc1f64af4fcbb66e36bb282c6197423888ce9d011df5, content: '9–14 μm) and produce images of that radiation. Since infrared radiation is emitted by all objects ba...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 114.32649598257161)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=5dac70957842f214d4c60f76564b27a7e657cd237c677ed236d1ed1ca76e360f, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 169.64570832993456)]",1.0,1.0,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,two urban districts,"[Document(id=d439f7d8555c7aea5751230341480bbf37f3e3f9e6b4e542f7cb145dfa03c950, content: ' In the three free states of Bavaria, Saxony, and Thuringia the government is referred to as the Sta...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 161.52389140584813)]",1.0,0.12825778126716614,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,β-Lactam antibiotics,"[Document(id=3b8de679cf4330422020b3c530f29f7e55171fe9f5a7383faca38ec8723c7d6e, content: ' Bacterial cell walls are different from the cell walls of plants and fungi, which are made of cellu...', meta: {'name': 'Bacteria', 'source_id': 'f8e9d365bfe51780e8f42f3a07cb69dd7a6df46bc7c67030d0853bdb13eeb390', 'page_number': 1}, score: 68.15934476800317)]",0.0,0.03749799355864525,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,district court,"[Document(id=a54d74acb097c757f1d529fef3759c7167a1133c97bc7020d3ed8e7abf0a8c44, content: 'S. Court of Appeals for the Fifth Circuit got the issue wrong — on the second try as well as on the ...', meta: {'name': 'Affirmative_action_in_the_United_States', 'source_id': 'b7156a1660bb591e3abb4460028a7922c063ba8962ad6dce11678f8b08c5cdcb', 'page_number': 1}, score: 100.49442833929467)]",0.0,0.2981231212615967,0.0,0.0,0.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,William James and John Dewey,"[Document(id=af86ec9bae4b13ce6d1249dc216f158ad2b5969e7d1ca52c1e64a93e2419a98a, content: ' However, it was not until the 1970s and 1980s that Whitehead's thought drew much attention outside ...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 187.2676303169056)]",1.0,-0.03977593034505844,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,slave,"[Document(id=74be6b32b41b12abb2990476f778fb5335428e0a6b6f01cf7a1235ad434899b9, content: 'The rule of law is the legal principle that law should govern a nation, as opposed to being governed...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 85.0298293465967)]",0.0,0.05585194379091263,0.0,0.0,0.0 +Which region of the US experiences rain year round?,Pacific Northwest,Hawaii,"[Document(id=5cc5c90d3b7502d6e064b808c140d69c74657b0e5d9b60b7a5c3a8b76125d80b, content: 'S., with Quinault Rainforest in Washington having an average of 137 inches (348 cm). Hawaii receives...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 162.08782552673898)]",1.0,0.26181262731552124,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Antigen specificity,"[Document(id=7f56f4718d147b0156e077e4853a6ac0250773335364d5e5cac1ff37df572de8, content: ' The adaptive immune response is antigen-specific and requires the recognition of specific ""non-self...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 158.72987153215703)]",1.0,0.2857634127140045,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=9f4c20a2bce44cf9bd3af493be183ab333c4d80eef37f323c78a73218b2cb402, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 164.16816603528014)]",1.0,1.0,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,"3*). The 2008 Research Assessment Exercise also showed five subjects – Pure Mathematics, Epidemiology and Public Health, Chemical Engineering, Civil Engineering, and Mechanical, Aeronautical and Manufacturing Engineering – were assessed to be the best[clarification needed] in terms of the proportion of internationally recognised research quality. +Imperial College Healthcare NHS Trust was formed on 1 October 2007 by the merger of Hammersmith Hospitals NHS Trust (Charing Cross Hospital, Hammersmith Hospital and Queen Charlotte's and Chelsea Hospital) and St Mary's NHS Trust (St. Mary's Hospital and Western Eye Hospital) with Imperial College London Faculty of Medicine","[Document(id=2eda08ca6f7b84cfcba1c93efa2266542d3f72df60ef8898ce7a2337e02a91a8, content: '9 million from Funding Council grants (2011/12 – £172.4 million) and £12.5 million from endowment an...', meta: {'name': 'Imperial_College_London', 'source_id': '8ff02d910ce681fdd72074b13ed322271625386966ee827dabf1c93ca556fd6e', 'page_number': 1}, score: 128.41401665791395)]",1.0,0.30592256784439087,1.0,1.0,1.0 +In what year did Eisenhower propose his second civil rights act?,1960,1955,"[Document(id=e2beebdfa17be4f6efba32829e2212a1399b680760fcc02660150921b570ef84, content: 'Dwight David ""Ike"" Eisenhower (/ˈaɪzənˌhaʊ.ər/ EYE-zən-HOW-ər; October 14, 1890 – March 28, 1969) wa...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 178.3948185668636)]",1.0,0.6117545366287231,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=44b4daa4d3c4378951ca880516b3342520595caa477d61e2c5fc88de3627c8f0, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 156.9284298092429)]",1.0,0.7246969938278198,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,"education increases measurable intelligence. Studies have shown that while educational attainment is important in predicting intelligence in later life, intelligence at 53 is more closely correlated to intelligence at 8 years old","[Document(id=85ad820474faf97ab67cb2e88fa7a8918ba053dd42f925d5b131e99fbf09a755, content: ' Educational psychology both draws from and contributes to cognitive science and the learning scienc...', meta: {'name': 'Education', 'source_id': '80079a0cc7970848bb327b41c4c044a89b33b363968cebeee5d10bac1c584862', 'page_number': 1}, score: 92.56192788229203)]",0.0,0.3706241250038147,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=497c11f9fbbfb1f37be3dcd0a29d747c5f8f8967252a25e340b937417a080280, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 175.82713084515086)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=8a0f220e6df4e50504002c6ad6312d289ff774c9c753db65e80fb1022f521be2, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 115.93977230752789)]",1.0,1.0,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,Indo-European,"[Document(id=6dfceb86a92f3071080d87731516cf9681746b00fd5c00deac18699cdc548a64, content: ' A notable political union of the 20th century that covered most South Slavs was Yugoslavia, but it ...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 200.64880411744542)]",1.0,0.1052892878651619,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.","measures sexual orientation on a continuum, considers various dimensions of sexual orientation, and considers homosexuality and heterosexuality separately","[Document(id=3638fb16add213ce4e749139ce1852d845838bce357ac5b46f8f2163ed664217, content: ' The classification scheme works such that individuals in the same category show the same balance be...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 79.03556725755448)]",1.0,0.4782727062702179,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,urban and literary classes,"[Document(id=fbf2110b67db9729de11d4fe22a0c0ff32b3a45b47041c33f08674359d8df041, content: ' The city of Alghero in Sardinia was repopulated with Catalan speakers in the 14th century. The lang...', meta: {'name': 'Catalan_language', 'source_id': 'e2178d77cee39fac7bfc6ad15bc5cd10fa8a19c21faea32b0ea8a06b3d4a200f', 'page_number': 1}, score: 170.64869262627934)]",0.0,0.4680020213127136,0.0,0.0,0.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,Nuclear arms reduction,"[Document(id=0f23a3d664b2d9d293629ea9d692c835b9a8e93ffb30bc45de463cd9b9d93424, content: ' Therefore, Khrushchev would not take part in the summit. Up until this event, Eisenhower felt he ha...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 81.54270754807897)]",0.0,0.12531504034996033,0.0,0.0,0.0 +What is that power that enables us to love?,Prevenient grace,mercy of God and His care for His creation,"[Document(id=ab06859e83076577bc2a04088a7525ddee1f44dedfa9ffcf46872843c8c86632, content: ' They neither expected nor sought the appearance of the stigmata or any other physical manifestation...', meta: {'name': 'Dominican_Order', 'source_id': 'f55ba4e5b37c737e23256c74229a0bf3a5c697b70e171c6ce20c437d900b023e', 'page_number': 1}, score: 91.85024388900882)]",0.0,0.3296220898628235,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,He Jin,"[Document(id=232041b4afb4ca71e3d1d3697502447b0c4bfa12f1eccde1a6f36b432b833244, content: ' 189 AD), half-brother to Empress He (d. 189 AD), plotted with Yuan Shao (d. 202 AD) to overthrow th...', meta: {'name': 'Han_dynasty', 'source_id': '060d95b91f9c20e5c8cd8b8a032ab44aac38ab88ce62d3c302bea0865f533568', 'page_number': 1}, score: 148.7241546412495)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,sharia law,"[Document(id=21cb89e76694844f819e54d7269a6349d3404a6310be592588c5473ad2d8efe2, content: ' Gaddafi proclaimed that the People's Congresses provided for Libya's every political need, renderin...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 209.0268681523744)]",1.0,0.6846116781234741,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",House of Mountbatten,"[Document(id=e315f255311d7dc4c756937a51f6602a99f1b8e3b0a145306a3d8c41c258bc57, content: ' She was educated privately at home. Her father acceded to the throne on the abdication of his broth...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 235.82296588222033)]",1.0,0.7922807931900024,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,Port Moresby,"[Document(id=b0fed1abc72a4d56e9957121cabc7bf9e24323c310875bcb6c1dbad5036aa408, content: 'S.-UK Combined Chiefs of Staff, which was also in Washington. Allied resistance, at first symbolic, ...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 151.05475774942101)]",0.0,0.09719950705766678,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,10%,"[Document(id=3f9e73fac96f476882c4b1cad262df3b9bf4863376ed88062ea62a6de747d305, content: ' To the south, N. Highland Avenue leads to the Barraza-Aviation Parkway bicycle path. +South Tucson i...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 191.0487031374489)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,John Ray,"[Document(id=9a450124afc12f193f72ae1f8310b72ada7cbbf8f34bbd71f66eb32b0f47efbc, content: ' With the development of the modern evolutionary synthesis in the 1930s and 1940s, Darwin's concept ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 189.49249077670783)]",1.0,1.0,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,Database transactions,"[Document(id=64793ba16b3dcf1ad2ea402349708087ba8524851dc937aa0c8517e629e95067, content: ' Data security prevents unauthorized users from viewing or updating the database. Using passwords, u...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 79.02602062480052)]",1.0,0.4896238446235657,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=ec70e294a8a1203ecdbb9fe95270466a0129a5d607fe306df146559206aa5b31, content: ' Avicenna first began to learn the Quran and literature in such a way that when he was ten years old...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 189.51529079780602)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya recovered its territories by 1770,"[Document(id=391f8278a6d664b0fda2a91e05a75fcb178e560aca2778cf3383f74470691074, content: ' Its trade and secular administrative reforms built a prosperous economy for more than 80 years. Fro...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 152.93271782643825)]",1.0,0.08274449408054352,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,runic system,"[Document(id=540a5b537ec7e1fdea905baae428e9a4c76167e7be305ec2dac494ec1ac16d1f, content: 'Old English (Ænglisc, Anglisc, Englisc) or Anglo-Saxon is the earliest historical form of the Englis...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 162.1246341956293)]",1.0,1.0,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,unholy alliance,"[Document(id=b281d8f2fa5c72bf9176af2f66fbd72725ba99c1c3e8e2e12044e694c6ee3bbe, content: ' This sum itself may be all or a portion of the difference between the actual (inflated) payment to ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 125.71453753599548)]",1.0,1.0,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,Clean Edge,"[Document(id=7726f77f73261125d28fc6db733db2a8ae6e451dfc266962ad1ddc5ccf22d22f, content: ' +New government spending, regulation, and policies helped the industry weather the 2009 economic cri...', meta: {'name': 'Renewable_energy_commercialization', 'source_id': 'fa5f7904b8df5272f4f870ed401f610435c916171c87b47deed9a45e6bd594ea', 'page_number': 1}, score: 108.44000862757655)]",1.0,0.03996780887246132,1.0,1.0,1.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=d4ddb3955cf6ff9d6bed687f54220f695fef0951972c64309938d9cf7670f6ba, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 184.29502006117178)]",1.0,1.0,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,the German Reformation,"[Document(id=2562c4ed0cf84994d606d5866fb6563cf019303140e39c1b2a35e85652c879f9, content: ' In spite of influential supporters among the English aristocracy, such as John of Gaunt, the moveme...', meta: {'name': 'Late_Middle_Ages', 'source_id': 'fb62f16f8c6599f7200add017bece6e2ea916520e544af1dd94713410c254ce9', 'page_number': 1}, score: 133.62001907744167)]",0.0,0.24226750433444977,0.0,0.0,0.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=ea56a7c43f1347c76ec07affd8ad20729d040a8d58a36f56583b537f903e8142, content: ' +The release of a video-game Starpower: Beyoncé was cancelled after Beyoncé pulled out of a $100 mil...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 112.66482791724965)]",0.0,1.0,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,"15,000 t","[Document(id=98b439fa147a97e5eae30c05b00e8ced8af2688f70d603a4a19bd0acee1fb227, content: ' Electroplating commonly uses copper as a base for other metals such as nickel. +Copper has been in u...', meta: {'name': 'Copper', 'source_id': 'de5c60a665b93f9b358be412d53a37ca43d90005e3ec9a553ac38a38f076fadc', 'page_number': 1}, score: 122.23139081007312)]",0.0,0.10198323428630829,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,"Zuikaku, Zuihō, and Chiyoda","[Document(id=af18f20df66b4ad801b8fc50e843ce8b9b13855af1e2d1d0723bc9a36346b583, content: ' fighter patrols. By 08:00 U.S. fighters had destroyed the screen of Japanese fighters and were hitt...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 143.23018949541458)]",0.0,0.007430467754602432,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=5b537a30444228b3b5e39f8e28a321a8e774f6051560dcab29bb0ece8b5ae99e, content: ' Unlike Archaeopteryx and the non-avialan feathered dinosaurs, who primarily ate meat, recent studie...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 186.97083867996844)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,Lamarckism,"[Document(id=9a450124afc12f193f72ae1f8310b72ada7cbbf8f34bbd71f66eb32b0f47efbc, content: ' With the development of the modern evolutionary synthesis in the 1930s and 1940s, Darwin's concept ...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 175.4835605613558)]",0.0,0.2597317695617676,0.0,0.0,0.0 +At what time did Tesla get dinner?,8:10 p.m,3 a.m. to 11 p,"[Document(id=7ab4bce8d6c4986fced2482d7df23cc8143be198d80d729cef5c53dff273d0bc, content: ':32 The classes were held in German, as it was a school within the Austro-Hungarian Military Frontie...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 137.4402934899115)]",1.0,0.8442951440811157,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,2007,"[Document(id=9110e2aded9c3f4623e8b1d9a2fc3eefed2672371c89283d34e3a11304a9222c, content: ' Although Germany never formally annexed Alsace-Lorraine, it was incorporated into the Greater Germa...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 177.11536613714622)]",1.0,0.166826069355011,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,augurs,"[Document(id=b80b0d7fa56110987db7fc274c40400c9afab47b60f93bd3980be6d29191c35f, content: 'The priesthoods of public religion were held by members of the elite classes. There was no principle...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 159.85134956306916)]",0.0,0.12105412036180496,0.0,0.0,0.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008",April 9,"[Document(id=bcb8a4d40e4e94182477bd61b84fbd041d74b5d411214204882581a9a4a0d862, content: ' Among them was Jane Birkin, who spoke to the media about the ""lack of freedom of speech"" in China. ...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 112.90831116674465)]",1.0,0.7683700323104858,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,John XXIII,"[Document(id=560cf5b1213e86f2e17ba4e867bceae328ab716c2b83a14df6bebf2151df26a1, content: ' +In December 2013, Vatican officials approved a supposed miracle that was attributed to the interces...', meta: {'name': 'Pope_Paul_VI', 'source_id': 'fc82a40c990c0b3840b06b32643858d39984b66d78ae950561d809dc84d6edf8', 'page_number': 1}, score: 191.34888558747295)]",1.0,0.5264829397201538,1.0,1.0,1.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=61583a75fdba243f6a812f3bf73c41e686e4f8374db81e4dd5f1a4d5a4430336, content: ' +Non-Australian citizens who are Australian permanent residents should be aware that during their st...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 136.08818508050095)]",1.0,0.9777635335922241,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=1c55594460ba4e27d8751041858090c94678420d1701172511f1bad1a1d70154, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 156.21619178330843)]",1.0,0.14734730124473572,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Nuri al-Said,"[Document(id=c355e0f20036a60ada1e4711a62b4d11adebf07dce76a808203fdd588cc1168e, content: '"" Nasser ordered a crackdown against Syrian communists, dismissing many of them from their governmen...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 207.58747886895094)]",1.0,0.6631616353988647,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,Egyptian Armed Force,"[Document(id=657e834d8c1daaec1071022c219bee449d7af1fb73732bdb2f3ff7d79c8646ec, content: ' +Convinced that he needed a wasta, or an influential intermediary to promote his application above t...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 121.25070163706141)]",0.0,0.0009296378120779991,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=0869972e54922afd3497ba60a5f3e981d9c0ab240629036531512fe359bc4da8, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 116.6152783244603)]",1.0,0.4726710319519043,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,downtown medical district,"[Document(id=f2ed49ccc8f85dd664fcdca57c3499ff0b7c7b7afb1177548f6b0038c9e3b49f, content: ' The downtown medical district is experiencing rapid growth of biotechnology and medical research in...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 144.2052302061412)]",1.0,0.3277379870414734,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,mulatto and octoroon,"[Document(id=92de78073423bb03eb54a64d0f332ce68f1771bd6a477f077368c5e13d629e9a, content: ' However, in its Recommendation ECRI uses this term in order to ensure that those persons who are ge...', meta: {'name': 'Race_(human_categorization)', 'source_id': '1232f7fe18d6e2adb87627ce2042b468cfa3fb9ba0b6afb2165c345f440b5339', 'page_number': 1}, score: 108.95556425556728)]",0.0,0.04100785404443741,0.0,0.0,0.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,sell his idea to the U.S. military,"[Document(id=727f582f5aae2334de1fd6ecfae66ae1de524ec5b00a77311c574a592fabdcb3, content: ' Louis, Missouri, the Franklin Institute in Philadelphia, Pennsylvania, and the National Electric Li...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 156.11323355057357)]",1.0,0.2841935157775879,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,cordon sanitarian,"[Document(id=da081545e8e8020b03087fbbcde823b131a8ffeb227970cc6962ac2b8197ec87, content: ' When a party achieves enough seats in an election to form a majority, the party's frontbench become...', meta: {'name': 'Political_party', 'source_id': '476a9215de81b75a4f17737eed1679570973e55b5ed6175aa0ec1014b09d4632', 'page_number': 1}, score: 101.09700252196419)]",0.0,0.14516454935073853,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore,Bibliothèque Bleue,"[Document(id=431a21bc2021ab05bedf5f9c62189ca16d692858fb84414f96b2f293034850da, content: '"" Women, as salonnières, were ""the legitimate governors of [the] potentially unruly discourse"" that ...', meta: {'name': 'Age_of_Enlightenment', 'source_id': 'd6935b64f79e30b7c3de4c952b643a55674c548cd0bb703870a094fd3e646d84', 'page_number': 1}, score: 94.00186696356431)]",0.0,0.2553384304046631,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,analytic philosophy,"[Document(id=6b4d3470d108ae94e907ed5cb5c688df345ffb225a282f26f0f753a1ab50e7cc, content: 'Philosophy of space and time is the branch of philosophy concerned with the issues surrounding the o...', meta: {'name': 'Philosophy_of_space_and_time', 'source_id': '5578d6ef93e9e6f246ec225cb481a7decb69e22faa45aa7889ab826e1e2d4028', 'page_number': 1}, score: 95.98418646424173)]",0.0,0.09864171594381332,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",Frederick moved the bulk of his eastern forces to Pomerania,"[Document(id=2d6f9a27ae03d73ce7ee871f068b5e485decdd52e7f7cb2530780627731891ae, content: ' First, Frederick devastated Prince Soubise's French force at the Battle of Rossbach on 5 November 1...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 168.14260150153137)]",1.0,0.3530529737472534,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,10%,"[Document(id=cea0f1307bbe685005f9642d975856221cf9a5dd6f18a7b87c780156408770d1, content: '” +On April 23, 2008, Dell announced the closure of one of its biggest Canadian call-centers in Kanat...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 156.44545320970798)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,Roman Curia,"[Document(id=849449c1450a95f93162f06fa31dc5068c49e9309fa9a5571acb08b712097028, content: ' Several texts proposed for his approval had to be changed. But all texts were finally agreed upon. ...', meta: {'name': 'Pope_Paul_VI', 'source_id': 'fc82a40c990c0b3840b06b32643858d39984b66d78ae950561d809dc84d6edf8', 'page_number': 1}, score: 140.04009463160082)]",0.0,0.2043728232383728,0.0,0.0,0.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=73c809a12535348ffed165eff42c189f3aa95c6ed455c42b5e7932cd44167a8d, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 206.8994736921715)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=b562ec53d471dfbb3ba9b12247b16c622039308f7ec5b5b5344127cfeb67e87d, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 206.25156723372163)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Misrata,"[Document(id=bb7db77c511c635fae48109d91d7a0cc78aee8d26387a3050dcaba82488972aa, content: ' On 25 August, the Arab League recognised the NTC to be ""the legitimate representative of the Libyan...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 141.60938024167194)]",0.0,0.3027350902557373,0.0,0.0,0.0 +In what decade was disco popular?,1970s,1980s,"[Document(id=a1ebb6111bad62b247dd1b756728c9569fa6d4b9a446c19b1950e68e79933994, content: ' As of 2015, house music remains extremely popular in both clubs and in the mainstream pop scene whi...', meta: {'name': 'House_music', 'source_id': 'c556f4a459e8a99b6d2065bab8fd6a918d0a00f13291aad2473922492e4abf2e', 'page_number': 1}, score: 174.3308231329952)]",0.0,0.775413990020752,0.0,0.0,0.0 +What do the rivers form?,lakes,lakes,"[Document(id=dfdb4032d57d026d1e52c68a1cff13d84af492b1c4c23fee3c33111d08105adf, content: ' The Rhine originates in a 30 square kilometre area in Switzerland and represents almost 60 percent ...', meta: {'name': 'Alps', 'source_id': 'd2eb3ad3af7d6110ae691bf40660ff7a79e4e9dbfd25785b4e55c7700bfcae79', 'page_number': 1}, score: 114.16977401000194)]",1.0,1.0,1.0,1.0,1.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,an ASCII file,"[Document(id=39d421bf1b8a6f42626cef116937dc028d1e423b078f59d7429a3dc5b9a4264c, content: 'Originally based on the English alphabet, ASCII encodes 128 specified characters into seven-bit inte...', meta: {'name': 'ASCII', 'source_id': '4bd7e27c39431d5c4712c32c534ed40f7df89f7b80f6eccb22975178fb7b1be2', 'page_number': 1}, score: 163.1721608166594)]",0.0,0.18398842215538025,0.0,0.0,0.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,"65 million records worldwide, and become the most successful Idol contestant in the U.S., selling over 14 million albums copies in the U.S. and has more Underwood has won seven Grammy Awards, the most Grammys by an ""American Idol"" alumnus. +Season five began on January 17, 2006. It remains the highest-rated season in the show's run so far. Two of the more prominent contestants during the Hollywood round were the Brittenum twins who were later disqualified for identity theft. +Chris Daughtry's performance of Fuel's ""Hemorrhage (In My Hands)"" on the show was widely praised and led to an invitation to join the band as Fuel's new lead singer, an invitation he declined. His performance of Live's version of ""I Walk the Line"" was well received by the judges but later criticized in some quarters for not crediting the arrangement to Live. He was eliminated at the top four","[Document(id=ec3b9b3ab5f2d528e25ddd559ed5ece0f75a24fbbd4154d11d3be76e232109cd, content: ' Both Underwood and Bice released the coronation song ""Inside Your Heaven"". Underwood has since sold...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 109.27218695122491)]",0.0,0.20669300854206085,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,16,"[Document(id=dc6d2af99abb08909b2ed77db3e665639a7bf81662aa16867b624d401177775f, content: ' The Hong Kong University of Science and Technology, a university with a focus in applied science, e...', meta: {'name': 'Institute_of_technology', 'source_id': 'bd3c1ce78fe4328bf8eb2cbd8027759a2a927528ce7e496ce3ebcf9675bc060e', 'page_number': 1}, score: 107.311590077452)]",0.0,0.3298308253288269,0.0,0.0,0.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=ad70291d4dec497bcd48d02a2bfbb4780dffd558cf0eee2d7c976f107f401335, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 159.1381537658833)]",1.0,1.0,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,"a touch of humour, gentleness and understanding"".","[Document(id=0a24a2ae13af1d4669b8a2f74791d8b6c9fcf28ba3103552f6b7f5c81e2ec8b3, content: ' Patriation removed the role of the British parliament from the Canadian constitution, but the monar...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 137.47701637339432)]",1.0,-0.08331970870494843,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=faa03729332a5c07c1c9d6d87747d15af8b3043f87a9cff7389d9a1c23d97c7c, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 166.16265214381096)]",1.0,1.0,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,11 December 1936,"[Document(id=06cbf9491f511f16650c4f94483faad3dd6deb11121a7158726cb23ef2b5b8c1, content: ' One of the few stirs arose when the Canadian Prime Minister, R. B. Bennett, considered the Duke for...', meta: {'name': 'George_VI', 'source_id': '5a05abc99d3e2c9f678c9681513c2d8cdfe043b062ba971ef3f9f31e7c0c4622', 'page_number': 1}, score: 137.500661055153)]",0.0,0.11978332698345184,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,large quantities of data or a large number of users,"[Document(id=dd704f4bc758ab475db02844a8a24263dd4fdb32afbc7f7dc86b810f8fa62e4f, content: ' +Load testing is primarily concerned with testing that the system can continue to operate under a sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 149.3892484981509)]",1.0,0.8635382056236267,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,Title II (common carrier) of the Communications Act of 1934,"[Document(id=7aba2d10978318bc5abba467ce5b46b678a83c63b04c2770eb6bfdb5855d1dce, content: ' On 16 January 2015, Republicans presented legislation, in the form of a U.S. Congress H.R. discussi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 100.06290327936973)]",1.0,0.29370126128196716,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,"interception of bombers and other fighters, reconnaissance, and patrol","[Document(id=d2709fd4938aadfc321fd75534f338a1182dec7bcf787b2638ddb43d1a59b926, content: ' Special Operations Command (USSOCOM). It conducts long-range, special operations missions, and is e...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 192.83729826021232)]",1.0,0.331512987613678,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1758,"[Document(id=fea192934abe21b4cb96f1e186336d5d38f81f716b1c5edb0a30df3d55249b58, content: ' +By this point Frederick was increasingly concerned by the Russian advance from the east and marched...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 156.32804278378717)]",0.0,0.5242623090744019,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,practical and contractual,"[Document(id=b80b0d7fa56110987db7fc274c40400c9afab47b60f93bd3980be6d29191c35f, content: 'The priesthoods of public religion were held by members of the elite classes. There was no principle...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 193.7799070146575)]",1.0,0.13173553347587585,1.0,1.0,1.0 +What is one example of failure testing?,Software fault injection,"defects in dead code will never result in failures. A defect can turn into a failure when the environment is changed. Examples of these changes in environment include the software being run on a new computer hardware platform, alterations in source data, or interacting with different software","[Document(id=f10f419092a1f6acf685021bef11a7a6ebef797bdd44e3997bd6b4a635aa2a89, content: ' Not all defects will necessarily result in failures. For example, defects in dead code will never r...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 106.64395109119121)]",1.0,0.34705621004104614,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,home-grown players,"[Document(id=0d02d0727bbf3d538faf4d0e4b493fb1a575377aa15b678ef4c8a1fa1646ad91, content: ' By 2009, under 40% of the players in the Premier League were English. +In response to concerns that ...', meta: {'name': 'Premier_League', 'source_id': '369baa9a5a854704f16fadd35144d3a3150cf1e9ae1671d9b50a3448547b814b', 'page_number': 1}, score: 98.30450237695865)]",0.0,0.23592567443847656,0.0,0.0,0.0 +In which year was the new constitution ratified?,1855,1956,"[Document(id=c5c366f06788f022f78b708df479c782035fa76318f4adb339bcf7725475034a, content: ' Nasser was welcomed by large crowds of people lining the streets of Cairo on his return to Egypt on...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 125.38863131677583)]",0.0,0.7131929993629456,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Naiad,"[Document(id=c450c10c9cc7ec2362a5a8c23142549f03d77146379f9bb96ab788f8c91887e3, content: ' One year later, the American schooner Dolphin arrived and picked up two boys, the last survivors of...', meta: {'name': 'Marshall_Islands', 'source_id': '762a4f6095de127ded5441c7a17d1c9c38f731ac9cb1ffe8de30af6573be7edf', 'page_number': 1}, score: 156.38281126749234)]",1.0,0.9999998807907104,1.0,1.0,1.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=afdbd08089615eb2bbb9dd9a5945a7e094c57e1f63a4d1f083e6183556a9f3e7, content: ' Moreover, Yale has been accused of failing to treat workers with respect. In a 2003 strike, however...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 171.7668856618156)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,alternative variants,"[Document(id=74c523cc7b77506967566fb8161bf4043d30ccf8c68e884af6cf2b9d24106e71, content: ' Conversely, plants can have extremely large genomes, with rice containing >46,000 protein-coding ge...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 109.03169875302669)]",1.0,1.0,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1980,"[Document(id=cdc48a6ad3a2e7247518958e73c2ec683689a78ed8978eafe40f8f9c84ca8545, content: ' Military spending increased, while other administrative budgets were cut back. Libya had long suppo...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 205.52285831252408)]",1.0,0.48459139466285706,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,Jupiter,"[Document(id=a8cc5557b921c5766b219ced4b8319da3ea79db597efbb98b7cbe6998afe6656, content: ' Ambrose, the influential Bishop of Milan and future saint, wrote urging the rejection of Symmachus'...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 185.2449344711922)]",0.0,0.0053202565759420395,0.0,0.0,0.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,"12,179 students, with 8,448 undergraduates, 2,138 graduate and professional and 1,593","[Document(id=e38425e521e859fd4eefb246346c370cf8ce1992d1d14b16d364e12c8fd02f64, content: ' Intellectual leaders included Gurian, Jacques Maritain, Frank O'Malley, Leo Richard Ward, F. A. Her...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 123.23823309943518)]",1.0,0.2471694052219391,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,4 June,"[Document(id=fe75f8e6c8d71ab8d430dfcb6b295cf49793b5e0f3c6dcd092c231ca1c16a92b, content: ' +A Japanese force was sent north to attack the Aleutian Islands. The next stage of the plan called f...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 167.2999768147891)]",1.0,0.20023301243782043,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,Euro 2004,"[Document(id=9c5e4e5ebe04f6d60e8e13bc3cfe5cd0c8fa7c41c1148c916c472a26ca7c677b, content: ' Their best ever performance was in the 1896 Summer Olympics, when Greece finished second in the med...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 165.19965128505237)]",1.0,0.9999999403953552,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,"(""Can you your pen not find because it far too dark","[Document(id=564ffd4c0b64a9c7e64cbc5966dacc957eb845cf2b27caaba762309fffcfc345, content: ' in het jaar des Heren (Anno Domini), where ""-en"" is actually the genitive ending of the weak noun. ...', meta: {'name': 'Dutch_language', 'source_id': '38c4da472a27f770c9c829b498d0e1e30e81320ca29e7d0546aebb638c81be6a', 'page_number': 1}, score: 109.78396328462887)]",0.0,-0.03480945900082588,0.0,0.0,0.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=25a9235e2187510deb110e2fe84f6e0f7a2dbacbbd0bd491e44e8a83b3be8cbf, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 144.93989592071756)]",1.0,0.4064074754714966,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,protonated molecular hydrogen,"[Document(id=6e618286b9705860ffa2586c851128c816120c84dcf91348f6ee11f32b6d590f, content: ' This species is central to discussion of acids. Under the Bronsted-Lowry theory, acids are proton d...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 126.73501886889714)]",1.0,0.16139346361160278,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,"Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements. The Independent reported in June 2012 that ""Children are being sold as conscripts","[Document(id=e39a424de393c7814379d5771c4abc998c371eadee31f5a4692a573651cdce01, content: ' South Africa also voted against the resolution. +There is consensus that the military regime in Myan...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 131.50547962808298)]",1.0,0.9422255158424377,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1966–67,"[Document(id=ba20188e907b6851f746fd539dbc0cab1e88e0349d17ed43168a54a40a2d1344, content: ' The laurel leaves also represent strength. To complete the crest, 1886 and 2011 are shown on either...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 75.3577185564337)]",1.0,0.44700032472610474,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,February 2006,"[Document(id=b7f872340a2353028ee8ea0f0e2ba802acb0824c43169f4135ee2fec5fa00b32, content: '8 billion and export revenues of $615 million. In addition, tourism also plays an important role in ...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 184.96961800246208)]",1.0,0.3492073714733124,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:5.csv new file mode 100644 index 0000000..76d263c --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/detailed_msmarco-distilroberta-base-v2__top_k:3__chunk_size:5.csv @@ -0,0 +1,124 @@ +questions,true_answers,predicted_answers,retrieved_docs,doc_mrr,sas,doc_map,doc_recall_single_hit,doc_recall_multi_hit +What were the earliest priesthoods? ,flamines,flamines,"[Document(id=99729d5c09ec68a18e6cb9e326f59027b5c69bc74e05e8c8c0d825360370bf5e, content: ' In household cult, the paterfamilias functioned as priest, and members of his familia as acolytes a...', meta: {'name': 'Religion_in_ancient_Rome', 'source_id': '5539950aa68a34aaa32ce17890e29473510376f98e0ff668c55f32354d881f90', 'page_number': 1}, score: 173.02697070797495)]",1.0,0.9999998807907104,1.0,1.0,1.0 +When was a third visit to Mnafeld scheduled? ,early 1546,March,"[Document(id=5081c97cf3c1bba15e1c4e67633c829431bdafa2c6e4103be63d8bb2cf12a442, content: ' The park's amphetheatre was renovated in 2010, and has hosted performances by acts such as Deftones...', meta: {'name': 'Fresno,_California', 'source_id': 'a46a1dee5f6d188d5f5a510a3c694e88541550757193e312801d8b786a38b1fa', 'page_number': 1}, score: 85.07322132565646)]",0.0,0.046456485986709595,0.0,0.0,0.0 +What was the name of the airfield where heavy raids took place?,Farnborough,Ormoc Bay,"[Document(id=54cf290f562ea7c2ffff22fe517ae92e59e00fbdbc47dd6d72ae885b0455fd69, content: ' Fifth Air Force was able to devastate the Japanese attempts to resupply. In torrential rains and ov...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 151.79338514889298)]",0.0,0.09204164147377014,0.0,0.0,0.0 +The Modern English ending -'s is derived from what ending in Old English?,-es,"Ænglisc, Anglisc, Englisc","[Document(id=169a5194d5b8e4398d0437961bfa539bb8d8fa94fb38d4cba89afa5e1b31232c, content: 'Old English (Ænglisc, Anglisc, Englisc) or Anglo-Saxon is the earliest historical form of the Englis...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 110.28235177333441)]",1.0,0.1304444968700409,1.0,1.0,1.0 +In what area is it common for spiritual mentorship to be extremely high?,the West,"low-church"" traditions of Protestantism","[Document(id=bbad1043bc2ff983eeca01fd03f84a8577a718a20a14adc37febb6fddb52936a, content: ' +In The Church of Jesus Christ of Latter-day Saints (LDS Church), the teacher is an office in the Aa...', meta: {'name': 'Teacher', 'source_id': '8d3b7bd3176a10380300a49fa713b8fffa5e77a181df51ea823d2bdeb46d4c7b', 'page_number': 1}, score: 133.68718996186385)]",1.0,0.12859657406806946,1.0,1.0,1.0 +Who is the only Greek volleyball club to win European titles?,Olympiacos,Olympiacos,"[Document(id=927ee26533dc7f17be50a4cb7fc74bed43a4bc44825d916333e6dd9c5d18c958, content: ' The Greek league, the A1 Ethniki, is considered one of the top volleyball leagues in Europe and the...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 177.5734971726734)]",1.0,0.9999998807907104,1.0,1.0,1.0 +How many judges were on the panel for this season?,four,three,"[Document(id=dddbddd251efc34f4b3d3c9022140571b04cbc8835dace1cf407b1ef1ea9dd5d, content: ' Jennifer Lopez and Steven Tyler joined the judging panel in season ten, but both left after two sea...', meta: {'name': 'American_Idol', 'source_id': '0ab0bf3d7b34caba79d26524c23f7e26f93bc7c5b29888839e9a63b4e42fa52b', 'page_number': 1}, score: 109.75365806507682)]",1.0,0.7108101844787598,1.0,1.0,1.0 +"Along with a chemical process, what type of process is used by night vision devices to convert infrared into visible light?",electrical,electrical,"[Document(id=05e7c56a6da1ed703309ec6f627bc69c369ec51519a9e5eb8c442673c62365e4, content: ' Infrared can also be used to detect protostars before they begin to emit visible light. Stars emit ...', meta: {'name': 'Infrared', 'source_id': 'ba9d9843065facd8aa75e472f0c7d760ae3d78712335a49f7abaf499acb21d1c', 'page_number': 1}, score: 129.47865242752295)]",1.0,1.0,1.0,1.0,1.0 +The second most prominent religion in New York is what?,Judaism,Judaism,"[Document(id=4437c778daec446c4362623ed3261e5ec4170314f4aa14f8381b8c7ee6eef3fe, content: ' +Christianity (59%), particularly Catholicism (33%), was the most prevalently practiced religion in ...', meta: {'name': 'New_York_City', 'source_id': '8a1509d057a51ed7a68b42ff87df5d459ac7f15f3d0f4f1685859ba197f9686d', 'page_number': 1}, score: 209.32334393129398)]",1.0,1.0,1.0,1.0,1.0 +The Free Hanseatic City of Bremen includes which cities?,Bremen and Bremerhaven,two urban districts,"[Document(id=a102b81b2963aaefdf1a83d1a1d96870be10e96bb78da447e6f4a0220bbf260f, content: ' The equivalent of the Minister-President is the Senatspräsident (President of the Senate) in Bremen...', meta: {'name': 'States_of_Germany', 'source_id': '60764e89141121f734e956aeeb48fcbe12c7e4fdec683c4f9fa29406a3f0579e', 'page_number': 1}, score: 144.68905502955556)]",1.0,0.12825781106948853,1.0,1.0,1.0 +What seeks to limit the use of polysyllabic characters?,CPC,lack septa,"[Document(id=e17b1a29df1c9f5c2187144a80272f17ecd053e6e3aedeaf8ab5cebd902b391d, content: ' Many polychaetes have a muscular pharynx that can be everted (turned inside out to extend it). In t...', meta: {'name': 'Annelid', 'source_id': '4d1835976d6f8a3a104eed564c45fb20b1a080365dcb082f33afd480371401ed', 'page_number': 1}, score: 67.46384235035063)]",0.0,0.09892848879098892,0.0,0.0,0.0 +In what courts can federal habeas corpus suits be brought?,federal,State and Federal Circuit,"[Document(id=0df047c0c9a537856c65dd1b8a83167b8bf321c913462f93d87acc192d8b2dc9, content: ' It is both the constitutional court and the court of last resort in Brazilian law. It only reviews ...', meta: {'name': 'Supreme_court', 'source_id': 'af99711cf0b2a1212f0d8426a2b67dff359c1025c5a49019ddbd0e35af38f770', 'page_number': 1}, score: 103.06015374852305)]",0.0,0.6240383386611938,0.0,0.0,0.0 +What did Whitehead consider himself as a philosopher? ,rank amateur,the greatest philosopher of the 20th century,"[Document(id=1d3a969acd3216f34aa4ec29de6ec4a5e5e388d98de9d5586f2e04f27773f302, content: ' In English-speaking countries, his primary works are little-studied outside of Claremont and a sele...', meta: {'name': 'Alfred_North_Whitehead', 'source_id': '8117b81ebe7588c941510f73f8227e3b220b56755171cb0418fdd752bb385bda', 'page_number': 1}, score: 206.62669174733202)]",1.0,0.07779727876186371,1.0,1.0,1.0 +What condition does a person have to meet to be allowed by law on a coin?,deceased,worshipping God in the manner and season most agreeable to the dictates of his own conscience,"[Document(id=0e1d4f1d8d4e3533adb041cb5f1b6a2a3c5856cee62abc76e787c6878ad0d6b9, content: ' Similarly, the Constitution of Massachusetts originally provided that ""no subject shall be hurt, mo...', meta: {'name': 'Separation_of_church_and_state_in_the_United_States', 'source_id': '9e181b8958f59a2f5a36329a7f787f416428f691cac0094c165c53403bedaabf', 'page_number': 1}, score: 91.80492411481453)]",0.0,0.10118141770362854,0.0,0.0,0.0 +Which region of the US experiences rain year round?,Pacific Northwest,Southwest,"[Document(id=994f169f938f2ae6c5b72207e8f053085554aee5e18da6a0e4c89bdb0a7fb708, content: ' The Southwest has the worst droughts; one is thought to have lasted over 500 years and to have hurt...', meta: {'name': 'Geography_of_the_United_States', 'source_id': '6eef5e1e8e3a1d489b23e0304d3cc9472b84a44871b2fd5ecfc8ab3f12e8ccf0', 'page_number': 1}, score: 179.13218864040363)]",1.0,0.6455833315849304,1.0,1.0,1.0 +What is the ability to recognize and adapt to new specific pathogens called?,Adaptive (or acquired) immunity,Antigen specificity,"[Document(id=8e364a66a416e7184c815546c99f2af87e1565f3c1094415321a6192f360b407, content: ' The adaptive immune response is antigen-specific and requires the recognition of specific ""non-self...', meta: {'name': 'Immune_system', 'source_id': 'fcb8c5092b2596a66d4f96587d653cbabda606874d86632c35ec40ef8f023051', 'page_number': 1}, score: 157.71412992185162)]",1.0,0.2857634425163269,1.0,1.0,1.0 +Who allegedly hit a home run to the Center?,Babe Ruth,Babe Ruth,"[Document(id=2abfa717cced74cfc35f5836efd511c9dcad606c2013e9cf62940c0910458753, content: ' Because of the tie the Giants and Cubs ended up tied for first place. The Giants lost the ensuing o...', meta: {'name': 'Chicago_Cubs', 'source_id': '10bd5330115a2fe2b9ef60d21abe3942769709161783409af0c4444ccac129c3', 'page_number': 1}, score: 149.42361214417218)]",1.0,1.0,1.0,1.0,1.0 +Which type of graduate from Imperial earned the second highest average starting salary after graduation?,Computing graduates,undergraduate,"[Document(id=8c2436b77a6c1726e2314124b2ca86e8eecc9e0f4e28a3c3eeeb8bb9925556f1, content: ' According to payscale.com, undergraduate alumni of University of Notre Dame have a mid-career media...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 163.89933007475037)]",0.0,0.2480124831199646,0.0,0.0,0.0 +In what year did Eisenhower propose his second civil rights act?,1960,1942,"[Document(id=dac723a43b39bdbec9d480c31d16576f349280e2468cd1ace681a22042059770, content: ' Marshall, who spotted talent and promoted accordingly. +At the end of May 1942, Eisenhower accompani...', meta: {'name': 'Dwight_D._Eisenhower', 'source_id': '859016dbddc09f1f2a8921e109d7fed820d1b8f064a73fa80275cba10849cb55', 'page_number': 1}, score: 170.7410237583216)]",1.0,0.5387994050979614,1.0,1.0,1.0 +What is the current height limitation in Paris in central areas?,50 m,50 m (160 ft),"[Document(id=d89cdeedbc8441f4e2c52a3575e8c530fcce225561404b37c7cdd108d1cc549f, content: ' Modern Paris owes much to its late 19th century Second Empire remodelling by the Baron Haussmann: m...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 160.35950445198006)]",1.0,0.7246970534324646,1.0,1.0,1.0 +Income level was found to have a signification correlation to which outcome in the study?,nutrition literacy,lower levels of growth,"[Document(id=e62fe349ee4e73a3572e9f63b19d92af4d05f2e4ba5ba37814049ed91dfeb15b, content: '"" +In 1993, Galor and Zeira showed that inequality in the presence of credit market imperfections has...', meta: {'name': 'Economic_inequality', 'source_id': '648ff32b8c146c974b23e115e3a61420b1cdbea9de1f5bf38442ce4c1d9caa42', 'page_number': 1}, score: 91.58925425640356)]",0.0,0.08601036667823792,0.0,0.0,0.0 +How much did Viacom want in damages in their lawsuit against youtube?,$1 billion,$1 billion,"[Document(id=720eb3795a623e31335108bcc29a4b774e33272bb625d5968983846dd3b35f68, content: ' Three successful complaints for copyright infringement against a user account will result in the ac...', meta: {'name': 'YouTube', 'source_id': '4d18798daeceb997a6dcb04b575dbbdad768671ca9202116370611d944d510be', 'page_number': 1}, score: 170.60211135792204)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Who should be able to challenge administrative orders in court?,individuals,individuals,"[Document(id=cf59c0f0619e715b979051e0a84530bf73463713fa11f2961866d233ac2537f5, content: ' Dicey's: “no man is punishable or can be lawfully made to suffer in body or goods except for a dist...', meta: {'name': 'Rule_of_law', 'source_id': '58b9477525cf8e214cd9a236fe22cc9bfe5e78fda6e2b41433bdb3e30f99d3d2', 'page_number': 1}, score: 146.15927986962998)]",1.0,1.0000001192092896,1.0,1.0,1.0 +The Slavs were under what name in the early 6th century?,the Antes and the Sclaveni,the Antes and the Sclaveni,"[Document(id=47d265d7c6c10ca0d6f93b7f8d3f5cc58da5eeb0104c993e0a00be57db28de17, content: ' The Old East Slavic Slavuta for the Dnieper River was argued by Henrich Bartek (1907–1986) to be de...', meta: {'name': 'Slavs', 'source_id': 'a55731ff3cd03d8fe34d4e646dbfcdc2088f01190e5110c9ff0baf4e4b486847', 'page_number': 1}, score: 196.2818777933153)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How are the results simplified when using the SASO?,"into four summaries that look specifically at responses that correspond to either homosexuality, heterosexuality, bisexuality or asexuality.","measures sexual orientation on a continuum, considers various dimensions of sexual orientation, and considers homosexuality and heterosexuality separately","[Document(id=0c139e3611be6bea0c99d8a10050e9e4d2d795b04b887593d7d757b669491756, content: ' The classification scheme works such that individuals in the same category show the same balance be...', meta: {'name': 'Sexual_orientation', 'source_id': 'f94279650936a403aa3d2b43ab1ad42b1fd14d05455aa86fbcda6cda978ffe5c', 'page_number': 1}, score: 91.46228830594487)]",1.0,0.4782726764678955,1.0,1.0,1.0 +What socioeconomic class used the Florentine Tuscan language in Dante's time?,upper class,quadrivium of the liberal arts,"[Document(id=f362bdcfde3970c1965d3564d5709917d4463c295e291066153a18e0ddc2456d, content: ' Together the three poets established the Tuscan dialect as the norm for the modern Italian language...', meta: {'name': 'Late_Middle_Ages', 'source_id': 'fb62f16f8c6599f7200add017bece6e2ea916520e544af1dd94713410c254ce9', 'page_number': 1}, score: 172.48469574466233)]",0.0,0.20520564913749695,0.0,0.0,0.0 +"In addition to ammo proposals, what other need was articulated?",The need for some form of tracer or smoke trail,explode in the air,"[Document(id=13415c0e7fbf3c69960bcf9d57e74c7913c152de1b86f5d5e8fbe6b19f466b41, content: ' These batteries, particularly with guns, usually deploy in a small area, although batteries may be ...', meta: {'name': 'Anti-aircraft_warfare', 'source_id': 'c54639424f62ce369356ed8c4e1e9fab8769359dd28029aa89832f5aa994f45d', 'page_number': 1}, score: 80.56317196877986)]",1.0,-0.06186457350850105,1.0,1.0,1.0 +What is that power that enables us to love?,Prevenient grace,Emotions,"[Document(id=5a260e08d6f4027a44c44e45bb3870f94a28beee3a72e7855a8dfe7efe426909, content: ' The theory proposed by Nico Frijda where appraisal leads to action tendencies is another example. +E...', meta: {'name': 'Emotion', 'source_id': 'd8cc93a545b4114ba331de59898315369d2946cc279a22b3d9fd171f89c8221b', 'page_number': 1}, score: 106.10330318103026)]",0.0,0.11000385880470276,0.0,0.0,0.0 +"Who was assassinated on September 22, 189 AD?",He Jin,He Jin,"[Document(id=6b20ebc1df3c4a4239954b87829f542f45ce58307a74b92a5c1a806a2183d971, content: ' When the eunuchs discovered this, however, they had her brother He Miao (何苗) rescind the order. The...', meta: {'name': 'Han_dynasty', 'source_id': '060d95b91f9c20e5c8cd8b8a032ab44aac38ab88ce62d3c302bea0865f533568', 'page_number': 1}, score: 167.52952159485534)]",1.0,1.0000001192092896,1.0,1.0,1.0 +How did Gaddafi change the legal system?,he introduced sharia as the basis for the legal system,sharia,"[Document(id=f0f866d3b3b357cebd62bef6998e3e11e90a372e00343c3d189939c7eb20be2e, content: ' +In May 1970, the Revolutionary Intellectuals Seminar was held to bring intellectuals in line with t...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 201.4704913998913)]",1.0,0.5289127230644226,1.0,1.0,1.0 +What name was it assumed Elizabeth would take upon her marriage to Philip?,"Mountbatten,",heir presumptive,"[Document(id=d84cede505487aab2b7f8ca81ca6dc2ec7e647562d9fcef338a4a7bc8b2c2e2b, content: ' She was educated privately at home. Her father acceded to the throne on the abdication of his broth...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 217.49870495818075)]",1.0,0.22167840600013733,1.0,1.0,1.0 +What was the attack on the British weakness?,Battle of Fort Bull,the Suez Crisis,"[Document(id=c73fb457c593e6b61bbba02396af010402dc804eeec63f77c635431e6550ce06, content: ' Margaret Thatcher later described the mindset she believed had befallen the British political estab...', meta: {'name': 'British_Empire', 'source_id': '8f673e96ef153333a8fc1d3855be60f5be1829316ed1fcb58dd8590fdb083946', 'page_number': 1}, score: 147.34963896569508)]",0.0,0.08610106259584427,0.0,0.0,0.0 +How much of South Tucson is Native American?,10%,10%,"[Document(id=19e07536fc4d6a422f717376ebb5c165dc66d2b84c2b4069db4dbb4757a892d6, content: ' It was first incorporated in 1936, and later reincorporated in 1940. The population consists of abo...', meta: {'name': 'Tucson,_Arizona', 'source_id': '1cd8368afbbb7156e4d37c16c9795a1cf7209fbd0036331b6759337ae62d7224', 'page_number': 1}, score: 186.13951045499653)]",1.0,0.9999998807907104,1.0,1.0,1.0 +Whose theory claimed that species were unchanging and designed by God?,John Ray,William Paley,"[Document(id=1de1b94fbb49d9d6bbc2cdcf71ef4f81cd3ec13b95fe79865c57979200ac9095, content: ' Lamarck thought there was an inherent progressive tendency driving organisms continuously towards g...', meta: {'name': 'On_the_Origin_of_Species', 'source_id': 'adac4711b7d224abae00474e53083755e6a0b31beb5aef768574d738fabcf0e6', 'page_number': 1}, score: 168.5436908396954)]",1.0,0.23203226923942566,1.0,1.0,1.0 +Who can restore a database that has been corrupted?,database administrator,dedicated backup files,"[Document(id=d62f58c8737fe86268de0ebf0cb0ef4eae1f9ba5f33735f7cadb2e83beb9e377, content: 'g., cases when the database is found corrupted due to a software error, or if it has been updated wi...', meta: {'name': 'Database', 'source_id': '98b09743ece9be42ad021f0dc6ffab08c322e7eeace6aae0b98b4bbf67224bd3', 'page_number': 1}, score: 93.70196948277217)]",1.0,0.32296431064605713,1.0,1.0,1.0 +What unpopular philosopher's text did Avicenna learn from?,Abu Abdullah Nateli,Abu Abdullah Nateli,"[Document(id=a3a017724e8f025ada3a4a92e98993ec44b17e0941134ce7e52db3191cd3ea08, content: ' He learned Indian arithmetic from an Indian greengrocer,ءMahmoud Massahi and he began to learn more...', meta: {'name': 'Avicenna', 'source_id': '25cbb1011a29473e1dfa8d96ebe95bc8ead7393548de57e871f959624f7238f1', 'page_number': 1}, score: 186.57760345482487)]",1.0,1.0000001192092896,1.0,1.0,1.0 +What was the event called that caused Ayutthaya to reclaim lost territories ?,the Chinese threat,Ayutthaya seized Tenasserim and Lan Na,"[Document(id=ca6db29fb81cf6a69632107c67ace9bdd1fdd8f61142978a0684c6cd6f540122, content: ' Ayutthaya seized Tenasserim and Lan Na, and Portuguese mercenaries established Portuguese rule at T...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 154.99059803938104)]",1.0,0.2441307157278061,1.0,1.0,1.0 +"Before the introduction of the Latin alphabet, how was Old English written?",runic system,Ingvaeonic (also known as North Sea Germanic) dialects,"[Document(id=9a27238fee0d389c0990690f0201315187259a5da5733ce27ff271c3f814b67a, content: ' +Old English is a West Germanic language, developing out of Ingvaeonic (also known as North Sea Germ...', meta: {'name': 'Old_English', 'source_id': '499102fb98655495aa1290f40a7501a80d34498ace9fe691454bd3cbff83f7cd', 'page_number': 1}, score: 144.57941959616778)]",1.0,0.18400278687477112,1.0,1.0,1.0 +"When groups that are antagonistic to each other form a coalition for hidden gain, it is called what?",unholy alliance,nepotism,"[Document(id=0322cf28631ab8410c95bc8f80e0b42570297570bd2142703fd186cc1e1e7f6a, content: ' A similar problem can also be seen in Eastern Europe, for example in Romania, where the government ...', meta: {'name': 'Political_corruption', 'source_id': 'e7c84fb03cb06b72c4ab51eb24218e9f2a10b95493e43d575342db0ffa3e9e1d', 'page_number': 1}, score: 115.05848969499738)]",1.0,0.09873715788125992,1.0,1.0,1.0 +What made a strong economic case for investing in low carbon technologies?,Stern Review,The chance to participate in the economic success of technological inventions,"[Document(id=8ca670761ac712ffb787aa7e4d7d937782d05faf36b89536ba8fc4b7b1dbea7d, content: ' In particular, Calvin rejected luxury. Therefore, craftsmen, industrialists, and other businessmen ...', meta: {'name': 'Protestantism', 'source_id': '4e459ea6668bbb8e35a3b3731021de782c29c314b694a30df606a1413f2e1325', 'page_number': 1}, score: 108.05624968004788)]",0.0,-0.07336610555648804,0.0,0.0,0.0 +"Who introduced the Latin version of the word ""insect""?",Pliny the Elder,Pliny the Elder,"[Document(id=e8da082f5022b804801ddb2778f16c32226f0ecb59f3ed7f8fcdae17653f162c, content: ' +The word ""insect"" comes from the Latin word insectum, meaning ""with a notched or divided body"", or ...', meta: {'name': 'Insect', 'source_id': 'e14215fdfb7e1394430dedb24549d773464f110b7f19b86558bc101f84126630', 'page_number': 1}, score: 184.832858928507)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What was started by the Roman Catholic Church to dispense of the Cathars in Languedoc?,The Cathar Crusade,The Cathar Crusade,"[Document(id=08215551b881c7cefad5bb5baaaddc4d6f9a5802a709d61e32985651e2115c85, content: ' By the 11th century, more organised groups such as the Patarini, the Dulcinians, the Waldensians an...', meta: {'name': 'Heresy', 'source_id': '8ba473262f8508a9c99699bb09cd022993896ece1fed1463a9cf4bd7eb0c3e68', 'page_number': 1}, score: 159.6433571883927)]",1.0,1.0,1.0,1.0,1.0 +What music service is a huge competitor for Tidal?,Spotify,Spotify,"[Document(id=2fca0ca848c224a8494e3eead28c715beb0ffe86531f3f21ea833479b2f3f978, content: ' Including Beyoncé and Jay-Z, sixteen artist stakeholders (such as Kanye West, Rihanna, Madonna, Chr...', meta: {'name': 'Beyoncé', 'source_id': 'df5e2ca169dc6cf68bbfa81e78478ec035f276c65d7f24df15ba60a18401232b', 'page_number': 1}, score: 117.78614306193649)]",0.0,1.0,0.0,0.0,0.0 +How much was each copper token worth?,a halfpenny,420 tonnes,"[Document(id=687916890077183f6cd89496d12592eec3d76bd82f46a80bfac0591dba1c0c1c, content: ' The largest mass of elemental copper discovered weighed 420 tonnes and was found in 1857 on the Kew...', meta: {'name': 'Copper', 'source_id': 'de5c60a665b93f9b358be412d53a37ca43d90005e3ec9a553ac38a38f076fadc', 'page_number': 1}, score: 132.34045561810535)]",0.0,-0.06792522221803665,0.0,0.0,0.0 +What ships were attacked in 1852?,Glencoe and Sea Nymph,"Sewer, rail, docklands, and electric installations were damaged.","[Document(id=c2a1dab12b313626459f38712aea7c56f2c359477289ae6301106d90c43faa1b, content: ' +In the north, substantial efforts were made against Newcastle-upon-Tyne and Sunderland, which were ...', meta: {'name': 'The_Blitz', 'source_id': 'fea4ab1f340714cf83574f4d286ac91905b5e48ca1b66018f16a22a73ba1bcbf', 'page_number': 1}, score: 157.01749548291156)]",0.0,0.018162749707698822,0.0,0.0,0.0 +What was the first fossil to display both clearly traditional reptilian characteristics?,Archaeopteryx,Archaeopteryx,"[Document(id=22e5680a1c41aa000971ff89fccd80f808c8f88ae7d0f5c0d81eb763ce026955, content: ' Unlike Archaeopteryx and the non-avialan feathered dinosaurs, who primarily ate meat, recent studie...', meta: {'name': 'Bird', 'source_id': '76114e3e9c92caec1de2bcfd38b6f22e1b48487f665fa6392f914934a6748ae1', 'page_number': 1}, score: 200.4427546556809)]",1.0,0.9999999403953552,1.0,1.0,1.0 +What are the theories that integrate molecular genetics with Darwinian evolution called?,the modern evolutionary synthesis,the modern evolutionary synthesis,"[Document(id=54fb8e44f718e454d5f4aef7097036553aa91b829fc2ea08644a1f9fc17e50f5, content: ' +The theories developed in the 1930s and 1940s to integrate molecular genetics with Darwinian evolut...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 186.4803046658906)]",1.0,1.0,1.0,1.0,1.0 +At what time did Tesla get dinner?,8:10 p.m,eight o'clock,"[Document(id=5dfd5078daafbc2ef3695636e6d70b42db2abc79ac389c20eac0417969714c11, content: ' Tesla would telephone his dinner order to the headwaiter, who also could be the only one to serve h...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 200.28502177347957)]",1.0,0.6441541910171509,1.0,1.0,1.0 +In what year did the French reach Alsace?,1444,1444,"[Document(id=18dbb6f8a4452ee88a36944ed7b5a77c2233677f5c2c84c6d98113a4c1519651, content: ' After the conclusion of the war, France was again free to pursue its desire to reach the Rhine and ...', meta: {'name': 'Alsace', 'source_id': '1105499d303196fb0b7ae4d26209720b0040e63376f114d458fd77f05836cbd4', 'page_number': 1}, score: 180.92305663796213)]",1.0,1.0,1.0,1.0,1.0 +Who was responsible for the handling of the Roman census?,two censors,censors,"[Document(id=3749ef6e14ea9af3893cfd84917e3a39acc2212480fc846e5a9231414a5599e6, content: ' Praetors administered civil law and commanded provincial armies. Every five years, two censors were...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 160.88829017583132)]",1.0,0.5763707756996155,1.0,1.0,1.0 +When was a resolution agreed to about Chinese human rights issues in San Francisco?,"April 1, 2008","April 1, 2008","[Document(id=80f5e2e80143434aa6d2d78a16f411f7f6ab4f8ffffbd29914ad0c75e8bf31da, content: ' The cost to the city for hosting the event was reported to be USD $726,400, nearly half of which ha...', meta: {'name': '2008_Summer_Olympics_torch_relay', 'source_id': 'a45e6ea09a1ff12d8a239a682c8ef3735ae894d5ab6427b50909053bb046e8b5', 'page_number': 1}, score: 120.63367823315784)]",1.0,0.9999999403953552,1.0,1.0,1.0 +Who was the last pope to be crowned?,Paul VI,Pope John XXIII,"[Document(id=2210efe78b716d2e44c94aae5208e52012507f25a18877960555a3c61a18e96f, content: ' Peter's Square: ""Dear children, returning home, you will find children: give your children a hug an...', meta: {'name': 'Pope_John_XXIII', 'source_id': '21b200b333306d54410b53cfd12e123951dabfc1f534196e5309132d00e8184d', 'page_number': 1}, score: 198.73923583731397)]",0.0,0.6684889197349548,0.0,0.0,0.0 +Who handles serious medical emergencies on Norfolk Island?,the Royal Australian Air Force,Royal Australian Air Force,"[Document(id=c983e8f9fccac478cd7d895c53ca7636f4291c3a87693dd68246b03e75a4be42, content: ' All visitors to Norfolk Island, including Australians, are recommended to purchase travel insurance...', meta: {'name': 'Norfolk_Island', 'source_id': '8be872c9c3302a16163599d52ec60841ddff13042f8e8e2e0aa14de2c192e2ee', 'page_number': 1}, score: 151.58417156835065)]",1.0,0.9777635335922241,1.0,1.0,1.0 +Were Roman soldiers expected to purchase their own equipment?,"wealthiest citizens, who were able to purchase the best equipment",the Roman state was forced to arm its soldiers at the expense of the state,"[Document(id=f0adc94ea827dd57e6a4ed0151d80a56b2b699e6457dfd794787f7d9f9121082, content: ' As a result, the Roman state was forced to arm its soldiers at the expense of the state, which it h...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 148.5285249465643)]",1.0,0.1473473310470581,1.0,1.0,1.0 +Who did Nasser's group try to assassinate?,Hussein Sirri Amer,Prime Minister Hussein Sirri Pasha,"[Document(id=25d98a87bc4019e4527fe3dbf903b4491eb5dbfb5600c6d91214b870a2dbf0f9, content: ' It was here that Nasser and his closest comrades, including Sadat and Amer, first discussed their d...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 193.0604166327542)]",1.0,0.6850929260253906,1.0,1.0,1.0 +What was AJ Ayer an important member of?,Logical empiricism,Officer's Club,"[Document(id=62e6ed804eee1f4f43c5f9b715fe2cd7a4d80fc795bab6b82fe1ec962f592d64, content: ' Nasser recalled that her wails ""haunted"" him and firmly dissuaded him from undertaking similar acti...', meta: {'name': 'Gamal_Abdel_Nasser', 'source_id': 'b85466a1645c50ee3517e05cdea8d4daa48f1870f244255ab389ead0fa0e44ed', 'page_number': 1}, score: 134.38736074532318)]",0.0,0.01744076795876026,0.0,0.0,0.0 +In which year was a settlement reached regarding GE's pollution of the Housatonic River?,1999,1977,"[Document(id=f6a12178d43defb2a103deca0f4fb166a5e99e5a8e58be48c76be81bd5d2bd38, content: ' Around the same time, General Electric's Canadian counterpart, Canadian General Electric, was forme...', meta: {'name': 'General_Electric', 'source_id': '26d4044eb2d79191635af69e57c262af8041a70871e60e989128c458529f04cc', 'page_number': 1}, score: 122.22996323290984)]",1.0,0.4726710319519043,1.0,1.0,1.0 +What area in Charleston is considered the 'industrial area'?,the city's Neck area,downtown medical district,"[Document(id=0b7915915ef0141c882fe46817cee7fb758759c0f34482f57922cd19e5f5e512, content: ' The downtown medical district is experiencing rapid growth of biotechnology and medical research in...', meta: {'name': 'Charleston,_South_Carolina', 'source_id': '6135d76cf2064b0c4f70e685c5c7e15b8093869ebd10f9a74e8acb07e2173763', 'page_number': 1}, score: 131.85885427937427)]",1.0,0.3277379870414734,1.0,1.0,1.0 +In what year did Frédéric obtain citizenship in France?,1835,987,"[Document(id=3077d778b76a4be9c89493fd0024e1efac75ffc05c5234cb0f7221f8eb64e25d, content: ' His burial place became an important religious shrine; the Basilica of Saint-Denis was built there ...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 119.30819864596624)]",0.0,0.09933178126811981,0.0,0.0,0.0 +What was Tesla afraid someone was trying to do with his invention?,steal the invention,"teleforce"" weapon","[Document(id=60f1f69a282d73eabc8ac6d80df34e28a168bcd0259aa89f6195a0e4b5d2af7e, content: ' In early 1938, Tesla was able to get up. He at once resumed the pigeon-feeding walks on a much more...', meta: {'name': 'Nikola_Tesla', 'source_id': 'ceea6a1645d15ba7fe20b687be69cd66b3781b3889ef8a0693edc64967068a28', 'page_number': 1}, score: 160.49288449971363)]",1.0,0.07724405825138092,1.0,1.0,1.0 +What is the group called that does not agree with government at all?,anarchists,Anti-Federalists,"[Document(id=28633a57f551f7befc042c5ec0a1465570662253672db0458a7f7f56f6428917, content: ' Constitution was being drafted, the Federalist Party supported a stronger central government, while...', meta: {'name': 'Federalism', 'source_id': '05eb4479f6579b86eef28e7954d58c2044997504c63cad297935c95307861772', 'page_number': 1}, score: 109.20383352308949)]",0.0,0.4092292785644531,0.0,0.0,0.0 +What was the name of the bookstore the authors met in?,Old Corner Bookstore," +Borders Books","[Document(id=dc39a0d280bd80482bd1f85abb4ea6b5faa34f6852b88a443a1213340981baeb, content: ' +Borders Books, started in Ann Arbor, was opened by brothers Tom and Louis Borders in 1971 with a st...', meta: {'name': 'Ann_Arbor,_Michigan', 'source_id': '55a6d0ee3d94f5d503c08fe4f77804c12af611ae1760064d896f02c1d99c66cc', 'page_number': 1}, score: 93.23013565822075)]",0.0,0.4030146896839142,0.0,0.0,0.0 +Time and space are both examples of what type of resource?,complexity resources,technology,"[Document(id=02932955f4b1491e6e0753a1f4c1655d6a498d453ea6bf1b48fe583af02a48db, content: ' The question concerns how time is allocated across a number of activities (such as time spent at ho...', meta: {'name': 'Time', 'source_id': 'f3ee42c6d38306d65c6207a15a1b9c65ff08a69728d5d0197f9593548177b229', 'page_number': 1}, score: 102.38581866089173)]",0.0,0.28706932067871094,0.0,0.0,0.0 +What had happened to Frederick's extra troops from Russia?,"Catherine, who ended Russia's alliance with Prussia and withdrew from the war",the Russians withdrew,"[Document(id=eca5c9f3c39c66eacec8a656a3f8f9321556069ec0115ec391afa4709f5324eb, content: ' +By this point Frederick was increasingly concerned by the Russian advance from the east and marched...', meta: {'name': 'Seven_Years%27_War', 'source_id': '3ce1ed9d37c2147a385da7d5e5b943fd3d7b17c5d89dca3dd9b367b2b1f095e3', 'page_number': 1}, score: 176.916268511699)]",1.0,0.5133723020553589,1.0,1.0,1.0 +What percentage of its workforce did Dell terminate in from 2007 to 2008?,10%,"1,900","[Document(id=0a58a9bec3bafa763c6bd2394f7973d45a99b44ce82a60edde77fc80d33bce92, content: ' plants was transferred to contract manufacturers in Asia and Mexico, or some of Dell's own factorie...', meta: {'name': 'Dell', 'source_id': '2a4fe0f2b4ecfc25a3563020204c8a55c9674b77ca6df3c224557299cb0a5581', 'page_number': 1}, score: 147.43065296511634)]",1.0,0.3221844434738159,1.0,1.0,1.0 +What organization was Rome's official caretaker?,the senate,Christian Unity Secretariat,"[Document(id=dd784bbd39741346a23d0cf3feeacc82ec660daed03b91ed8b39035d65072e43, content: ' He was the last pope to date to be crowned; his successor Pope John Paul I replaced the Papal Coron...', meta: {'name': 'Pope_Paul_VI', 'source_id': 'fc82a40c990c0b3840b06b32643858d39984b66d78ae950561d809dc84d6edf8', 'page_number': 1}, score: 156.08060962392145)]",0.0,0.2545766830444336,0.0,0.0,0.0 +Who chose BYU as the location of the national Middle East Language Resource Center?,United States Department of Education,United States Department of Education,"[Document(id=10406721e1bacc4b94568efb8105444ce33c23b507417efe2e032c93615c0677, content: ' Several of its language programs are the largest of their kind in the nation, the Russian program b...', meta: {'name': 'Brigham_Young_University', 'source_id': 'cb3b81b6f2c540426803106face67bc5fd4db7b1a9ef422ec3601456732b4785', 'page_number': 1}, score: 205.01521643478702)]",1.0,1.0,1.0,1.0,1.0 +Who discovered Hydrogen gas?,Robert Boyle,Robert Boyle,"[Document(id=694b7ad0ec30f16563c095bfd8aa73a286298746fa8b5b6b568961e806fe7462, content: ' +In 1671, Robert Boyle discovered and described the reaction between iron filings and dilute acids, ...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 200.7654419813894)]",1.0,1.0000001192092896,1.0,1.0,1.0 +Where was Polybius forced to go as hostage?,Rome,Beroea,"[Document(id=b0db6a298369be019a3d01ef5bfbe30dd534f6a2be471b538c76854bccadccaa, content: '."" (Pope Gregory I, would have full access to the Vatican Archives). +Constantius, ordered Liberius i...', meta: {'name': 'Athanasius_of_Alexandria', 'source_id': '2a8e7ec0a44075f1a567744982dcecf4bcea19f0065d4f73c742398a05fa1642', 'page_number': 1}, score: 154.05413994317473)]",0.0,0.1712186634540558,0.0,0.0,0.0 +In what decade was disco popular?,1970s,90's,"[Document(id=cf86cea5ed69b1cddee9e7bfd4d861b5f270fd7c89048980e1e45da52aa037d0, content: ' Techno and rave was happening in the 90's and had a lot of different forms. There was very experime...', meta: {'name': 'Madonna_(entertainer)', 'source_id': '0868d847bb768c30a7507239169d5e441bb3181b66c8b9353a863daa3d58e425', 'page_number': 1}, score: 179.8445269758427)]",0.0,0.5847175121307373,0.0,0.0,0.0 +What do the rivers form?,lakes,lakes,"[Document(id=d9c66874c7c0a9fe45438a723827763a95eacdbe604f0f1e81a1a311692bb3a4, content: ' The Rhine originates in a 30 square kilometre area in Switzerland and represents almost 60 percent ...', meta: {'name': 'Alps', 'source_id': 'd2eb3ad3af7d6110ae691bf40660ff7a79e4e9dbfd25785b4e55c7700bfcae79', 'page_number': 1}, score: 136.31986270459694)]",1.0,1.0,1.0,1.0,1.0 +What were the first 256 code points of Unicode made identical to? ,ISO-8859-1,the English alphabet,"[Document(id=79f26e42dd7702142ed7d1e92a23812856512d31177acccc13a6b2634280e3f3, content: 'Originally based on the English alphabet, ASCII encodes 128 specified characters into seven-bit inte...', meta: {'name': 'ASCII', 'source_id': '4bd7e27c39431d5c4712c32c534ed40f7df89f7b80f6eccb22975178fb7b1be2', 'page_number': 1}, score: 154.38856966441136)]",0.0,0.025279667228460312,0.0,0.0,0.0 +"How many countries did her song ""Irreplaceable"" get number one status in?",five,sixth,"[Document(id=1ab6320b9edd68ea2165ed4c6ee68d6651f440bfd61bfdb952d62356bfc5ffe0, content: ' The album's first single, ""Frozen"", became Madonna's first single to debut at number one in the UK,...', meta: {'name': 'Madonna_(entertainer)', 'source_id': '0868d847bb768c30a7507239169d5e441bb3181b66c8b9353a863daa3d58e425', 'page_number': 1}, score: 115.12531859825845)]",0.0,0.5111092329025269,0.0,0.0,0.0 +How many Examination Boards exist in India?,30,30,"[Document(id=87a93f72cbb9c00ea6477b2a31d21c284a2ecf07733f93b0fb4ba7b277f5b8f5, content: ' It is within the power of both the union government and the state governments to govern schools sin...', meta: {'name': 'Private_school', 'source_id': 'c62330ba95e9919513bc9fc65c0d21a748b4a518b9b1f905a113de7f37274e44', 'page_number': 1}, score: 129.9025361296588)]",1.0,1.0,1.0,1.0,1.0 +What play showed an early depiction of civil disobedience?,Antigone,Antigone,"[Document(id=99743eb7e7277e11f539b5449857c5f012a842facd618f6a75ff178a89fc1f76, content: 'One of its earliest massive implementations was brought about by Egyptians against the British occup...', meta: {'name': 'Civil_disobedience', 'source_id': '505ac0ceafea50bbbfc74281c3c932b97c3074217d7ffe74512a2d0eb97fe491', 'page_number': 1}, score: 155.67890239334605)]",1.0,1.0,1.0,1.0,1.0 +"Once in conversation with the Queen, how is Elizabeth addressed?",Ma'am,solo public appearance on a visit to the Grenadier Guards,"[Document(id=331c1fa6114188993824feeab60399c92e5d44233f4ea3a0e3a47f363d1ca459, content: ' The book describes Elizabeth's love of horses and dogs, her orderliness, and her attitude of respon...', meta: {'name': 'Elizabeth_II', 'source_id': '701ad172dfcc8791d990bf596fe61055a5548b2e1847cd32a5d9c011ed8a1ed8', 'page_number': 1}, score: 143.80750919536004)]",1.0,-0.02931702323257923,1.0,1.0,1.0 +In what century did Paris become largely into fashion?,19th,19th,"[Document(id=ff2ec1a3232f7d74c84b198defa77dd9879cdb65474a7dab2bf47da19675285e, content: ' +Paris has been an international capital of high fashion since the 19th century, particularly in the...', meta: {'name': 'Paris', 'source_id': '52b8f3ebbfb9baf9226768b89151b597c215bb08f28021147d2d798f707976c9', 'page_number': 1}, score: 177.00800205562217)]",1.0,1.0,1.0,1.0,1.0 +When did Alfred the Great begin his reign?,871,1740,"[Document(id=51a7cda8329feb5be4b828db4c3949d0f3972d8afc87046ed9763336b16b4114, content: ' +Frederick the Great, the king of Prussia from 1740 to 1786, saw himself as a leader of the Enlighte...', meta: {'name': 'Age_of_Enlightenment', 'source_id': 'd6935b64f79e30b7c3de4c952b643a55674c548cd0bb703870a094fd3e646d84', 'page_number': 1}, score: 145.02006011302487)]",0.0,0.25414443016052246,0.0,0.0,0.0 +What two methods can be used when using Load Testing?,data or a large number of users,large quantities of data or a large number of users,"[Document(id=4753cc282d43999b382172589cceb076cee5e8185383911435f640a568593cbe, content: ' +Load testing is primarily concerned with testing that the system can continue to operate under a sp...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 154.96837900503925)]",1.0,0.8635382056236267,1.0,1.0,1.0 +what may possibly be a solution to net neutrality concerns? ,municipal broadband,municipal broadband,"[Document(id=5f8dc7264853d3b0b88a03420e7878e16fa4c08ef70ab7cc5d722e889b21dda6, content: 'S. Federal Communications Commission (FCC) was reported to be considering a new rule that will permi...', meta: {'name': 'Internet_service_provider', 'source_id': 'b6cdeebacab1c0fc99cc348ba10a7ad5f8a09ca4691c74f89374730939ffbbb4', 'page_number': 1}, score: 119.60475215544247)]",1.0,1.0,1.0,1.0,1.0 +What are the ground-attack aircraft in the USAF usually deployed in support for? ,U.S. ground forces,U.S. ground forces,"[Document(id=87ffdf42e04cd4e5aabc33e35caf9614ba61758e355a1d97a38a524ba0bedf46, content: ' +The ground-attack aircraft of the USAF are designed to attack targets on the ground and are often d...', meta: {'name': 'United_States_Air_Force', 'source_id': '1bb222de66e5a2c81f1d5b75bf3eced066939aec82934a5cb176a607b4e8cc8f', 'page_number': 1}, score: 259.32749430318006)]",1.0,0.9999999403953552,1.0,1.0,1.0 + In what year did the invasion of Russia take place?,1812,1854,"[Document(id=ae931e2fa116a2f50e732849bda9db89ea2500925baddc8026e2033f9f7f334c, content: ':175–176 In July 1854, the Turks under Omar Pasha crossed the Danube into Wallachia and on 7 July 18...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 166.47104419579009)]",0.0,0.6653626561164856,0.0,0.0,0.0 +What was the practice of religion to the Romans?,personal choice,Private and personal worship,"[Document(id=8cb202622013f5234d79ec3a4275501aaa91b4fdefe32dbbccfd335829f7ffa1, content: '[citation needed] +Roman religious beliefs date back to the founding of Rome, around 800 BC. However,...', meta: {'name': 'Roman_Republic', 'source_id': '3f4556109d2f785759587a7f8d52a5aa1b1abda3015c73990aa173f05a0d62c1', 'page_number': 1}, score: 224.81247000090372)]",0.0,0.4898977279663086,0.0,0.0,0.0 +What is one example of failure testing?,Software fault injection,Software fault injection,"[Document(id=827178e2fa13b1045cb5486ccca30dd5c2a87aa9e9f903cbdd2808ec1b93a098, content: ' Beta versions can be made available to the open public to increase the feedback field to a maximal ...', meta: {'name': 'Software_testing', 'source_id': '141fdc660b7505b00b8550733c65d48914cde6833d577a5494a41a6cb01a3827', 'page_number': 1}, score: 111.26517223272702)]",1.0,0.9999998807907104,1.0,1.0,1.0 +"Out of the maximum amount of players allowed, one must be a what?",goalkeeper,over 21,"[Document(id=099e1639d5c40822a089a702137bc67222d29e4cb46a5fcb2c1651014d1f0cf1, content: ' The two transfer windows run from the last day of the season to 31 August and from 31 December to 3...', meta: {'name': 'Premier_League', 'source_id': '369baa9a5a854704f16fadd35144d3a3150cf1e9ae1671d9b50a3448547b814b', 'page_number': 1}, score: 96.30151656116497)]",0.0,0.07544993609189987,0.0,0.0,0.0 +In which year was the new constitution ratified?,1855,1789,"[Document(id=622a6215e7b7fd56283c9c82ebb139d7b9ea3b341dd076ca022738fd1e235092, content: ' Following this ""Pyrrhic victory"", Cornwallis chose to move to the Virginia coastline to get reinfor...', meta: {'name': 'North_Carolina', 'source_id': '3282e3dedcf0f1923e66ebf5b79cf9b9deec27c8ca1df8908e03770b02b38126', 'page_number': 1}, score: 126.51363074019773)]",0.0,0.5541975498199463,0.0,0.0,0.0 +What vessel was attacked in 1845?,Naiad,Rossiya,"[Document(id=f2b1a574489e03f9b830e06bacdbab3d40738c21ff9c614a55ed401b75720143, content: ' These battles are known in Finland as the Åland war. +In August 1855 a Franco-British naval force ca...', meta: {'name': 'Crimean_War', 'source_id': '66f44772af3b4c2aebf16f2866ee8ea9e71fc3471480ed94ea0b2638c0f5b570', 'page_number': 1}, score: 158.70825348764376)]",0.0,0.05331704020500183,0.0,0.0,0.0 +When did Travel + Leisure list Yale as one of the most beautiful campuses?,2011,2011,"[Document(id=1eb83a50e062eec7af6abab3de1bf0b5576a4c75ac168cc64f1b77a2e268c579, content: '7 km2) Yale-Myers Forest in Connecticut's Quiet Corner—and nature preserves including Horse Island. +...', meta: {'name': 'Yale_University', 'source_id': 'f33638c4d6b8d7572ba500f3a8affca1729aae3046c83a94e2a7424dd50f6549', 'page_number': 1}, score: 191.53811571133681)]",1.0,1.0,1.0,1.0,1.0 +What sort of variants did the Human Genome Project indicate that many of the measured transcripts were?,alternative variants,alternative variants of the same genes,"[Document(id=616efe41146a20d96fcfa37d16fa50ce52cca8acf43b362e86a15a714f86ddff, content: ' Subsequently, the sequencing in the Human Genome Project indicated that many of these transcripts w...', meta: {'name': 'Gene', 'source_id': 'aedb09488dc3b766bb8c9b41d942aeb0d62f6f6c6c92506004b72557882725cc', 'page_number': 1}, score: 96.17689529097493)]",1.0,0.7289966940879822,1.0,1.0,1.0 +During what year did the Italians invade Libya?,1911,1980,"[Document(id=6fa0d2cdcb0d00898debed351e32ab41dc4678320f30c21f07a92e9289207601, content: ' Military spending increased, while other administrative budgets were cut back. Libya had long suppo...', meta: {'name': 'Muammar_Gaddafi', 'source_id': '8152b4f6f236e7f6841b777d688aa105c66dbcee3fe926720311ce23eed0a380', 'page_number': 1}, score: 214.09821488402224)]",1.0,0.48459139466285706,1.0,1.0,1.0 +What did Davidson believe the God of Aristotle is synonymous with?,rational thought,monasticism,"[Document(id=0dd55f7c8a45d79ff127432f3d18e4c842a70b92bbe1cf8864e7cfbdffccbbca, content: ' +Nonetheless, within a few years of his death, Gregory of Nazianzus called him the ""Pillar of the Ch...', meta: {'name': 'Athanasius_of_Alexandria', 'source_id': '2a8e7ec0a44075f1a567744982dcecf4bcea19f0065d4f73c742398a05fa1642', 'page_number': 1}, score: 189.85400139648488)]",0.0,0.2760372757911682,0.0,0.0,0.0 +What percentage of students at Notre Dame are the children of former Notre Dame students?,21–24%,39.1%),"[Document(id=88c1169eb8a5c301a3e8bb36466a4a18159a705a0296cfed8548baf03d852546, content: ' The academic profile of the enrolled class continues to rate among the top 10 to 15 in the nation f...', meta: {'name': 'University_of_Notre_Dame', 'source_id': '4d72b1adc03b693b389a144097b6ebc1668cfbaef6fa42bbc023428ed14a60d9', 'page_number': 1}, score: 129.95337496597662)]",1.0,0.8029516339302063,1.0,1.0,1.0 +On what day did U.S> planes located Ozawa's fleet?,second,20 October,"[Document(id=7f6d0a9e9a3e49df55737ef78eeafa784e1bc879c484b5e1795fb994301c9ebc, content: ' Kurita's force had begun the battle with five battleships; when he returned to Japan, only Yamato w...', meta: {'name': 'Pacific_War', 'source_id': 'bbc89e90f0fa3a97026e63250a28c1a17f157ab8c6d81bc70d6d55b8da059c57', 'page_number': 1}, score: 174.6041177508166)]",1.0,0.03874627500772476,1.0,1.0,1.0 +Greek was crowned the European champions in what event?,Euro 2004,basketball,"[Document(id=12d9fa7ff9f514e66e336f50a70a372c3162770635ba732cc4335141fd65ee50, content: 'O.K, Maroussi) have won 3 Triple Crowns, 5 Saporta Cups, 2 Korać Cups and 1 FIBA Europe Champions Cu...', meta: {'name': 'Greece', 'source_id': '13d8bb8decd2812a2721cd8f6ecffbf13e7a3bc07776ab008e28497e15162217', 'page_number': 1}, score: 183.37264468656736)]",1.0,0.03532267361879349,1.0,1.0,1.0 +Which tense indicates that an action hasn't finished?,imperfective,"(""Can you your pen not find because it far too dark","[Document(id=d10bc2aa429e55969934f976b711e2ed0e408629173680b12b85b5711713dc02, content: ' in het jaar des Heren (Anno Domini), where ""-en"" is actually the genitive ending of the weak noun. ...', meta: {'name': 'Dutch_language', 'source_id': '38c4da472a27f770c9c829b498d0e1e30e81320ca29e7d0546aebb638c81be6a', 'page_number': 1}, score: 109.78396328462887)]",0.0,-0.03480945900082588,0.0,0.0,0.0 +What acoustic element could be distorted by different audio settings in iPods prior to 2007?,Bass,bass instruments,"[Document(id=158a6b8443733e42d24dec31180770da4724f3bd14ed49e47a937624f7aeed47, content: ' This would happen for EQ settings like R&B, Rock, Acoustic, and Bass Booster, because the equalizer...', meta: {'name': 'IPod', 'source_id': 'f0324763387a7e11c235569bada6935891b2c5ba40dd065ab5acc1bb5de8dbed', 'page_number': 1}, score: 143.5323404197054)]",1.0,0.40640756487846375,1.0,1.0,1.0 +In what way can Neutral triatomic hydrogen exist?,excited form,molecular forms,"[Document(id=20580d781b7cb1b0f929dd861809d47639d869e422d7e9f42898f3544f7840a9, content: ' The most common isotope of hydrogen, termed protium (name rarely used, symbol 1H), has one proton a...', meta: {'name': 'Hydrogen', 'source_id': '68980750bab4ae17fc394871df89fbbb807d0b180a08bf2698ebc1ec0613d8a2', 'page_number': 1}, score: 109.88123072416457)]",1.0,0.041921474039554596,1.0,1.0,1.0 +How are children used in the conflicts in Burma?,Child soldiers have and continue to play a major part in the Burmese Army as well as Burmese rebel movements.,Children are being sold as conscripts,"[Document(id=0088f05f01a71535c2cb1e5b41db4930183fdb107aa5d9b1b2402b982ed6e8a4, content: '"" In July 2013, the Assistance Association for Political Prisoners indicated that there were approxi...', meta: {'name': 'Myanmar', 'source_id': '3d5d8c2f0b407573851f7793034e3f28664c9739d422faaf23da089d3bb275b3', 'page_number': 1}, score: 154.09655586407015)]",1.0,0.30359911918640137,1.0,1.0,1.0 +In what decades were Arsenal players stereotyped in a negative way?,1970s and 1980s,1970s and 1980s,"[Document(id=1bdd87144e8c05c0c8820e3bd6186deb63a4be13d5810e39af795993e4721bb7, content: ' The book was twice adapted for the cinema – the 1997 British film focuses on Arsenal's 1988–89 titl...', meta: {'name': 'Arsenal_F.C.', 'source_id': 'b197b14120aabbc3bf2073d4b3e99c1c51d56d5ac5248f8a97dadcfa056a6f7e', 'page_number': 1}, score: 87.2309971301853)]",1.0,1.0,1.0,1.0,1.0 +When did new construction start in Melbourne?,late 1890s,February 2006,"[Document(id=7da3a14e7c1a2593c36359e18d0afbc482e5aba0c1ad5c3a8e024a2d8e6820ca, content: '8 billion annually. Melbourne has been attracting an increasing share of domestic and international ...', meta: {'name': 'Melbourne', 'source_id': 'e9cc00699ea29b72949874ec0e147efa277ecdaa2a727246f376aab1a8d342bd', 'page_number': 1}, score: 190.52435017827867)]",1.0,0.34920746088027954,1.0,1.0,1.0 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:10.csv new file mode 100644 index 0000000..b719343 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:10.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.73 +sas,0.580519768241793 +doc_map,0.73 +doc_recall_single_hit,0.73 +doc_recall_multi_hit,0.73 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:15.csv new file mode 100644 index 0000000..5f9cf86 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:15.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.73 +sas,0.5086992412433028 +doc_map,0.73 +doc_recall_single_hit,0.73 +doc_recall_multi_hit,0.73 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:5.csv new file mode 100644 index 0000000..b4a7158 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:1__chunk_size:5.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.75 +sas,0.5439403309300542 +doc_map,0.75 +doc_recall_single_hit,0.75 +doc_recall_multi_hit,0.75 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:10.csv new file mode 100644 index 0000000..dedcf81 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:10.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.76 +sas,0.7005659690685571 +doc_map,0.76 +doc_recall_single_hit,0.76 +doc_recall_multi_hit,0.76 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:15.csv new file mode 100644 index 0000000..1dd7dfb --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:15.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.74 +sas,0.5680542129278183 +doc_map,0.74 +doc_recall_single_hit,0.74 +doc_recall_multi_hit,0.74 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:5.csv new file mode 100644 index 0000000..b9fd055 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:2__chunk_size:5.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.8 +sas,0.6338555014133453 +doc_map,0.8 +doc_recall_single_hit,0.8 +doc_recall_multi_hit,0.8 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:10.csv new file mode 100644 index 0000000..c08a76c --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:10.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.78 +sas,0.7054367866925895 +doc_map,0.78 +doc_recall_single_hit,0.78 +doc_recall_multi_hit,0.78 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:15.csv new file mode 100644 index 0000000..03b1567 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:15.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.75 +sas,0.6176330048777163 +doc_map,0.75 +doc_recall_single_hit,0.75 +doc_recall_multi_hit,0.75 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:5.csv new file mode 100644 index 0000000..842092a --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-MiniLM-L6-v2__top_k:3__chunk_size:5.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.81 +sas,0.6950865586102009 +doc_map,0.81 +doc_recall_single_hit,0.81 +doc_recall_multi_hit,0.81 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:1__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:1__chunk_size:10.csv new file mode 100644 index 0000000..dd2be57 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:1__chunk_size:10.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.79 +sas,0.617127796690911 +doc_map,0.79 +doc_recall_single_hit,0.79 +doc_recall_multi_hit,0.79 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:1__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:1__chunk_size:15.csv new file mode 100644 index 0000000..86b58ef --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:1__chunk_size:15.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.77 +sas,0.599645134229213 +doc_map,0.77 +doc_recall_single_hit,0.77 +doc_recall_multi_hit,0.77 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:1__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:1__chunk_size:5.csv new file mode 100644 index 0000000..54a7ef9 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:1__chunk_size:5.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.78 +sas,0.6250351478345693 +doc_map,0.78 +doc_recall_single_hit,0.78 +doc_recall_multi_hit,0.78 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:2__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:2__chunk_size:10.csv new file mode 100644 index 0000000..ac34d1b --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:2__chunk_size:10.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.8 +sas,0.6936491883173584 +doc_map,0.8 +doc_recall_single_hit,0.8 +doc_recall_multi_hit,0.8 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:2__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:2__chunk_size:15.csv new file mode 100644 index 0000000..2fdb401 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:2__chunk_size:15.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.81 +sas,0.678553427439183 +doc_map,0.81 +doc_recall_single_hit,0.81 +doc_recall_multi_hit,0.81 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:2__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:2__chunk_size:5.csv new file mode 100644 index 0000000..363cd3d --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:2__chunk_size:5.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.77 +sas,0.6755654202587903 +doc_map,0.77 +doc_recall_single_hit,0.77 +doc_recall_multi_hit,0.77 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:3__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:3__chunk_size:10.csv new file mode 100644 index 0000000..62763d4 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:3__chunk_size:10.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.82 +sas,0.7250674534961581 +doc_map,0.82 +doc_recall_single_hit,0.82 +doc_recall_multi_hit,0.82 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:3__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:3__chunk_size:15.csv new file mode 100644 index 0000000..6344f9c --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:3__chunk_size:15.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.8 +sas,0.7145748696476221 +doc_map,0.8 +doc_recall_single_hit,0.8 +doc_recall_multi_hit,0.8 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:3__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:3__chunk_size:5.csv new file mode 100644 index 0000000..0cdbfd6 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_all-mpnet-base-v2__top_k:3__chunk_size:5.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.8 +sas,0.6931215447746217 +doc_map,0.8 +doc_recall_single_hit,0.8 +doc_recall_multi_hit,0.8 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:10.csv new file mode 100644 index 0000000..609600f --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:10.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.63 +sas,0.45827048864215614 +doc_map,0.63 +doc_recall_single_hit,0.63 +doc_recall_multi_hit,0.63 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:15.csv new file mode 100644 index 0000000..fea3ba2 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:15.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.65 +sas,0.41487298944033685 +doc_map,0.65 +doc_recall_single_hit,0.65 +doc_recall_multi_hit,0.65 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:5.csv new file mode 100644 index 0000000..836647b --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:1__chunk_size:5.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.67 +sas,0.43005497539415954 +doc_map,0.67 +doc_recall_single_hit,0.67 +doc_recall_multi_hit,0.67 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:10.csv new file mode 100644 index 0000000..3ed5154 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:10.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.66 +sas,0.48562305023893715 +doc_map,0.66 +doc_recall_single_hit,0.66 +doc_recall_multi_hit,0.66 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:15.csv new file mode 100644 index 0000000..bcb1757 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:15.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.64 +sas,0.4707915974222124 +doc_map,0.64 +doc_recall_single_hit,0.64 +doc_recall_multi_hit,0.64 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:5.csv new file mode 100644 index 0000000..3823911 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:2__chunk_size:5.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.68 +sas,0.46445536917075514 +doc_map,0.68 +doc_recall_single_hit,0.68 +doc_recall_multi_hit,0.68 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:10.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:10.csv new file mode 100644 index 0000000..006a3aa --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:10.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.7 +sas,0.523677001548931 +doc_map,0.7 +doc_recall_single_hit,0.7 +doc_recall_multi_hit,0.7 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:15.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:15.csv new file mode 100644 index 0000000..c1c1d4c --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:15.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.68 +sas,0.5044686373043805 +doc_map,0.68 +doc_recall_single_hit,0.68 +doc_recall_multi_hit,0.68 diff --git a/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:5.csv b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:5.csv new file mode 100644 index 0000000..5187a27 --- /dev/null +++ b/evaluations/results/squad_parameter_search_extractive_qa_2024_06_20/score_report_msmarco-distilroberta-base-v2__top_k:3__chunk_size:5.csv @@ -0,0 +1,6 @@ +metrics,score +doc_mrr,0.67 +sas,0.5072944475337863 +doc_map,0.67 +doc_recall_single_hit,0.67 +doc_recall_multi_hit,0.67