EleutherAI/truthful_qa_mc

EleutherAI · norabelrose · Apr 22, 2023 · Apr 22, 2023 · Apr 22, 2023 · Apr 22, 2023
commit ef12130b5ab1e0e859626c7ba382256656e7571d
diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py
@@ -108,7 +108,6 @@ def load_prompts(
  yield _convert_to_prompts(
  example,
  binarize=binarize,
- choices_column=prompter.choices_column,
  label_column=label_column,
  label_choices=label_choices, # type: ignore[arg-type]
  num_variants=num_variants,
@@ -122,7 +121,6 @@ def _convert_to_prompts(
  example: dict[str, Any],
  prompter: DatasetTemplates,
  binarize: bool,
- choices_column: str | None,
  label_column: str,
  label_choices: list[bool | int | str],
  num_variants: int,
@@ -143,10 +141,6 @@ def qa_cat(q: str, a: str) -> str:
  # For sanity checking that prompts are unique
  prompt_counter = Counter()
  label = example[label_column]
- if choices_column:
- label_choices = example[choices_column]
- if isinstance(label, int):
- label_choices = list(range(len(label_choices)))
 
  if binarize:
  # Replace the full list of possibilities with a randomly sampled false label

diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py
@@ -72,7 +72,9 @@ def evaluate_preds(
  if c == 2:
  auroc = roc_auc_ci(y_true, y_logits[..., 1] - y_logits[..., 0])
  else:
- auroc = roc_auc_ci(to_one_hot(y_true, c).long(), y_logits)
+ auroc = roc_auc_ci(
+ to_one_hot(y_true, c).long().flatten(1), y_logits.flatten(1)
+ )
  else:
  raise ValueError(f"Unknown mode: {ensembling}")
 

diff --git a/elk/promptsource/templates.py b/elk/promptsource/templates.py
@@ -247,7 +247,6 @@ class DatasetTemplates:
  helper functions necessary to read/write to the yaml file
  """
 
- choices_column: str | None
  label_column: str | None
  label_choices: list[int | str]
  templates: dict[str, Template]
@@ -262,7 +261,6 @@ def __init__(self, dataset_name: str, subset_name: str | None = None):
  # Required field; contains all the templates keyed by ID
  self.templates = yaml_dict["templates"]
 
- self.choices_column = yaml_dict.get("choices_column")
  self.label_column = yaml_dict.get("label_column")
  self.label_choices = yaml_dict.get("label_choices", [])
 

diff --git a/...es/norabelrose/truthful_qa/templates.yaml → .../EleutherAI/truthful_qa_mc/templates.yaml b/...es/norabelrose/truthful_qa/templates.yaml → .../EleutherAI/truthful_qa_mc/templates.yaml
@@ -1,9 +1,8 @@
 # Adapted from the ARC-Easy templates
-dataset: norabelrose/truthful_qa_mc
-choices_column: choices
+dataset: EleutherAI/truthful_qa_mc
 templates:
  a1: !Template
- answer_choices: 'A|||B|||C|||D|||E|||F|||G|||H|||I|||J|||K|||L|||M|||N'
+ answer_choices: 'A|||B|||C|||D'
  id: a1
  jinja: 'Pick the most correct option to answer the following question.
 
@@ -14,10 +13,11 @@ templates:
  Options:
 
  {% for letter, t in zip(answer_choices, choices) %}
-
  - {{letter}}: {{t}}
 
- {% endfor %} |||{{answer_choices[label]}}'
+ {% endfor %}
+
+ |||{{answer_choices[label]}}'
  metadata: !TemplateMetadata
  choices_in_prompt: true
  languages:
@@ -28,20 +28,19 @@ templates:
  name: pick_the_most_correct_option
  reference: ''
  a2: !Template
- answer_choices: 'A|||B|||C|||D|||E|||F|||G|||H|||I|||J|||K|||L|||M|||N'
+ answer_choices: 'A|||B|||C|||D'
  id: a2
  jinja: '{{question}}
 
 
  Options:
 
  {% for letter, t in zip(answer_choices, choices) %}
-
  - {{letter}}: {{t}}
 
  {% endfor %}
 
- A:|||{{answer_choices[label]}}'
+ Answer:|||{{answer_choices[label]}}'
  metadata: !TemplateMetadata
  choices_in_prompt: true
  languages:
@@ -52,22 +51,23 @@ templates:
  name: qa_options
  reference: ''
  a3: !Template
- answer_choices: 'A|||B|||C|||D|||E|||F|||G|||H|||I|||J|||K|||L|||M|||N'
+ answer_choices: 'A|||B|||C|||D'
  id: a3
  jinja: 'I am hesitating between 4 options to answer the following question, which
  option should I choose?
 
+
  Question: {{question}}
 
+
  Possibilities:
 
  {% for letter, t in zip(answer_choices, choices) %}
-
  - {{letter}}: {{t}}
 
  {% endfor %}
 
- A:|||{{answer_choices[label]}}'
+ Answer:|||{{answer_choices[label]}}'
  metadata: !TemplateMetadata
  choices_in_prompt: true
  languages:
@@ -78,20 +78,19 @@ templates:
  name: i_am_hesitating
  reference: ''
  a4: !Template
- answer_choices: 'A|||B|||C|||D|||E|||F|||G|||H|||I|||J|||K|||L|||M|||N'
+ answer_choices: 'A|||B|||C|||D'
  id: a4
  jinja: 'I gave my students this multiple choice question: {{question}}
 
 
  Only one answer is correct among these 4 choices:
 
  {% for letter, t in zip(answer_choices, choices) %}
-
  - {{letter}}: {{t}}
 
  {% endfor %}
 
- A:|||{{answer_choices[label]}}'
+ Answer:|||{{answer_choices[label]}}'
  metadata: !TemplateMetadata
  choices_in_prompt: true
  languages:
@@ -101,12 +100,20 @@ templates:
  original_task: true
  name: multiple_choice
  reference: ''
- a6: !Template
- answer_choices: 'A|||B|||C|||D|||E|||F|||G|||H|||I|||J|||K|||L|||M|||N'
+ a5: !Template
+ answer_choices: 'A|||B|||C|||D'
  id: a6
- jinja: "Here's a problem to solve: {{question}}\n\nAmong the 4 following options,\
- \ which is the correct answer?\n{% for letter, t in zip(answer_choices, choices)\
- \ %}\n- {{letter}}: {{t}}\n {% endfor %}A:|||{{answer_choices[label]}}"
+ jinja: "Here's a problem to solve: {{question}}
+
+
+ Among the 4 following options, which is the correct answer?
+
+ {% for letter, t in zip(answer_choices, choices) %}
+ {{letter}}: {{t}}
+
+ {% endfor %}
+
+ Answer:|||{{answer_choices[label]}}"
  metadata: !TemplateMetadata
  choices_in_prompt: true
  languages:

diff --git a/elk/run.py b/elk/run.py
@@ -125,8 +125,8 @@ def prepare_data(
 
  for ds_name, ds in self.datasets:
  key = select_split(ds, split_type)
-
  split = ds[key].with_format("torch", device=device, dtype=torch.int16)
+
  labels = assert_type(Tensor, split["label"])
  val_h = int16_to_float32(assert_type(Tensor, split[f"hidden_{layer}"]))
 

diff --git a/elk/training/sweep.py b/elk/training/sweep.py
@@ -89,6 +89,7 @@ def execute(self):
  if eval_dataset in train_datasets:
  continue
 
+ assert run.out_dir is not None
  eval = Eval(
  data=replace(run.data, model=model, datasets=(eval_dataset,)),
  source=run.out_dir,

diff --git a/elk/utils/data_utils.py b/elk/utils/data_utils.py
@@ -123,9 +123,7 @@ def infer_num_classes(label_feature: Any) -> int:
  """Return the number of classes in a `Dataset`.
 
  Returns:
- The number of classes.
- Raises:
- ValueError: If the label column is not a `ClassLabel` or `Value('bool')`.
+ The number of classes, or -1 if it's unclear.
  """
  if isinstance(label_feature, ClassLabel):
  # We piggyback on the ClassLabel feature type to get the number of classes