Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bigbench fix #1686

Merged
merged 7 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion lm_eval/tasks/bigbench/generate_tasks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

import datasets
import yaml


Expand Down Expand Up @@ -173,6 +174,11 @@
"word_unscrambling",
]

skip_tasks = [
"simple_arithmetic_json_multiple_choice",
"simple_arithmetic_multiple_targets_json",
]


def main() -> None:
for path, task_type in zip(
Expand All @@ -183,11 +189,29 @@ def main() -> None:
for task in all_subtasks:
file_name = f"{task}.yaml"
try:
template_file = task_type
if path == "multiple_choice":
print(f"Checking {task} for multiple choices")
if task in skip_tasks:
continue
data = datasets.load_dataset("hails/bigbench", task + "_zero_shot")
multiple_choice_targets = data["default"][0][
"multiple_choice_targets"
]
if len(multiple_choice_targets) == 0:
continue
else:
template_file = "multiple_choice_template_b_yaml"
if set(data["default"][0]["targets"]) < set(
multiple_choice_targets
):
template_file = "multiple_choice_template_a_yaml"

with open(f"{path}/{file_name}", "w", encoding="utf-8") as f:
f.write("# Generated by utils.py\n")
yaml.dump(
{
"include": f"../{task_type}",
"include": f"../{template_file}",
"task": "bigbench_"
+ task
+ "_{}".format(task_type.split("_template_yaml")[0]),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: abstract_narrative_understanding_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_abstract_narrative_understanding_multiple_choice
2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/anachronisms.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: anachronisms_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_anachronisms_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: analogical_similarity_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_analogical_similarity_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: analytic_entailment_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_analytic_entailment_multiple_choice
2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/arithmetic.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: arithmetic_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_arithmetic_multiple_choice

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: authorship_verification_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_authorship_verification_multiple_choice

This file was deleted.

4 changes: 0 additions & 4 deletions lm_eval/tasks/bigbench/multiple_choice/auto_debugging.yaml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: bbq_lite_json_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_bbq_lite_json_multiple_choice

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: causal_judgment_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_causal_judgment_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: cause_and_effect_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_cause_and_effect_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: checkmate_in_one_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_checkmate_in_one_multiple_choice

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: cifar10_classification_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_cifar10_classification_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: code_line_description_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_code_line_description_multiple_choice
4 changes: 0 additions & 4 deletions lm_eval/tasks/bigbench/multiple_choice/codenames.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/color.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: color_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_color_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: common_morpheme_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_common_morpheme_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: conceptual_combinations_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_conceptual_combinations_multiple_choice

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: contextual_parametric_knowledge_conflicts_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_contextual_parametric_knowledge_conflicts_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: crash_blossom_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_crash_blossom_multiple_choice
2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/crass_ai.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: crass_ai_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_crass_ai_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: cryobiology_spanish_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_cryobiology_spanish_multiple_choice
4 changes: 0 additions & 4 deletions lm_eval/tasks/bigbench/multiple_choice/cryptonite.yaml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: cs_algorithms_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_cs_algorithms_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: dark_humor_detection_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_dark_humor_detection_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: date_understanding_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_date_understanding_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: disambiguation_qa_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_disambiguation_qa_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: discourse_marker_prediction_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_discourse_marker_prediction_multiple_choice
4 changes: 0 additions & 4 deletions lm_eval/tasks/bigbench/multiple_choice/disfl_qa.yaml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: dyck_languages_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_dyck_languages_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: elementary_math_qa_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_elementary_math_qa_multiple_choice
2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/emoji_movie.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: emoji_movie_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_emoji_movie_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: emojis_emotion_prediction_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_emojis_emotion_prediction_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: empirical_judgments_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_empirical_judgments_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: english_proverbs_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_english_proverbs_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: english_russian_proverbs_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_english_russian_proverbs_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: entailed_polarity_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_entailed_polarity_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: entailed_polarity_hindi_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_entailed_polarity_hindi_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: epistemic_reasoning_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_epistemic_reasoning_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: evaluating_information_essentiality_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_evaluating_information_essentiality_multiple_choice
2 changes: 1 addition & 1 deletion lm_eval/tasks/bigbench/multiple_choice/fact_checker.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: fact_checker_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_fact_checker_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: fantasy_reasoning_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_fantasy_reasoning_multiple_choice
4 changes: 0 additions & 4 deletions lm_eval/tasks/bigbench/multiple_choice/few_shot_nlg.yaml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: figure_of_speech_detection_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_figure_of_speech_detection_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: formal_fallacies_syllogisms_negation_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_formal_fallacies_syllogisms_negation_multiple_choice
4 changes: 0 additions & 4 deletions lm_eval/tasks/bigbench/multiple_choice/gem.yaml

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: general_knowledge_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_general_knowledge_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: geometric_shapes_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_geometric_shapes_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: goal_step_wikihow_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_goal_step_wikihow_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: gre_reading_comprehension_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_gre_reading_comprehension_multiple_choice
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: hhh_alignment_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_hhh_alignment_multiple_choice

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by utils.py
dataset_name: hindu_knowledge_zero_shot
include: ../multiple_choice_template_yaml
include: ../multiple_choice_template_a_yaml
task: bigbench_hindu_knowledge_multiple_choice
Loading
Loading