EleutherAI · haileyschoelkopf · May 24, 2024 · Apr 8, 2024 · Apr 8, 2024 · Apr 8, 2024
@@ -1,5 +1,6 @@
 import os
 
+import datasets
 import yaml
 
 
@@ -173,6 +174,11 @@
  "word_unscrambling",
 ]
 
+skip_tasks = [
+ "simple_arithmetic_json_multiple_choice",
+ "simple_arithmetic_multiple_targets_json",
+]
+
 
 def main() -> None:
  for path, task_type in zip(
@@ -183,11 +189,29 @@ def main() -> None:
  for task in all_subtasks:
  file_name = f"{task}.yaml"
  try:
+ template_file = task_type
+ if path == "multiple_choice":
+ print(f"Checking {task} for multiple choices")
+ if task in skip_tasks:
+ continue
+ data = datasets.load_dataset("hails/bigbench", task + "_zero_shot")
+ multiple_choice_targets = data["default"][0][
+ "multiple_choice_targets"
+ ]
+ if len(multiple_choice_targets) == 0:
+ continue
+ else:
+ template_file = "multiple_choice_template_b_yaml"
+ if set(data["default"][0]["targets"]) < set(
+ multiple_choice_targets
+ ):
+ template_file = "multiple_choice_template_a_yaml"
+
  with open(f"{path}/{file_name}", "w", encoding="utf-8") as f:
  f.write("# Generated by utils.py\n")
  yaml.dump(
  {
- "include": f"../{task_type}",
+ "include": f"../{template_file}",
  "task": "bigbench_"
  + task
  + "_{}".format(task_type.split("_template_yaml")[0]),

@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: abstract_narrative_understanding_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_abstract_narrative_understanding_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: anachronisms_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_anachronisms_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: analogical_similarity_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_analogical_similarity_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: analytic_entailment_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_analytic_entailment_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: arithmetic_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_arithmetic_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: authorship_verification_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_authorship_verification_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: bbq_lite_json_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_bbq_lite_json_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: causal_judgment_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_causal_judgment_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: cause_and_effect_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_cause_and_effect_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: checkmate_in_one_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_checkmate_in_one_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: cifar10_classification_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_cifar10_classification_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: code_line_description_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_code_line_description_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: color_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_color_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: common_morpheme_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_common_morpheme_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: conceptual_combinations_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_conceptual_combinations_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: contextual_parametric_knowledge_conflicts_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_contextual_parametric_knowledge_conflicts_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: crash_blossom_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_crash_blossom_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: crass_ai_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_crass_ai_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: cryobiology_spanish_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_cryobiology_spanish_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: cs_algorithms_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_cs_algorithms_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: dark_humor_detection_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_dark_humor_detection_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: date_understanding_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_date_understanding_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: disambiguation_qa_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_disambiguation_qa_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: discourse_marker_prediction_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_discourse_marker_prediction_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: dyck_languages_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_dyck_languages_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: elementary_math_qa_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_elementary_math_qa_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: emoji_movie_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_emoji_movie_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: emojis_emotion_prediction_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_emojis_emotion_prediction_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: empirical_judgments_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_empirical_judgments_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: english_proverbs_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_english_proverbs_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: english_russian_proverbs_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_english_russian_proverbs_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: entailed_polarity_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_entailed_polarity_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: entailed_polarity_hindi_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_entailed_polarity_hindi_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: epistemic_reasoning_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_epistemic_reasoning_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: evaluating_information_essentiality_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_evaluating_information_essentiality_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: fact_checker_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_fact_checker_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: fantasy_reasoning_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_fantasy_reasoning_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: figure_of_speech_detection_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_figure_of_speech_detection_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: formal_fallacies_syllogisms_negation_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_formal_fallacies_syllogisms_negation_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: general_knowledge_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_general_knowledge_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: geometric_shapes_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_geometric_shapes_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: goal_step_wikihow_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_goal_step_wikihow_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: gre_reading_comprehension_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_gre_reading_comprehension_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: hhh_alignment_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_hhh_alignment_multiple_choice
@@ -1,4 +1,4 @@
 # Generated by utils.py
 dataset_name: hindu_knowledge_zero_shot
-include: ../multiple_choice_template_yaml
+include: ../multiple_choice_template_a_yaml
 task: bigbench_hindu_knowledge_multiple_choice