Partial support for truthful_qa

EleutherAI · norabelrose · Apr 22, 2023 · Apr 22, 2023 · Apr 22, 2023 · Apr 22, 2023
commit c9e62ea22138b2e30f0cf31f249258fbcca830b5
diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py
@@ -10,25 +10,21 @@
 from ..metrics import evaluate_preds
 from ..run import Run
 from ..training import Reporter
+from ..utils import Color
 
 
-@dataclass
+@dataclass(kw_only=True)
 class Eval(Run):
  """Full specification of a reporter evaluation run."""
 
- # Using None as a default here is a hack; we actually raise an error if it's not
- # specified in __post_init__. TODO: Maybe this is an indication we should be using
- # composition and not inheritance here?
- source: Path | None = field(default=None, positional=True)
+ source: Path = field(positional=True)
  skip_supervised: bool = False
 
  def __post_init__(self):
- assert self.source, "Must specify a source experiment."
-
  if not self.out_dir:
  self.out_dir = self.source / "transfer" / "+".join(self.data.datasets)
 
- def execute(self, highlight_color: str = "cyan"):
+ def execute(self, highlight_color: Color = "cyan"):
  return super().execute(highlight_color, split_type="val")
 
  @torch.inference_mode()
@@ -39,7 +35,6 @@ def apply_to_layer(
  device = self.get_device(devices, world_size)
  val_output = self.prepare_data(device, layer, "val")
 
- assert self.source, "Must specify a source experiment."
  experiment_dir = elk_reporter_dir() / self.source
 
  reporter_path = experiment_dir / "reporters" / f"layer_{layer}.pt"

diff --git a/elk/extraction/extraction.py b/elk/extraction/extraction.py
@@ -253,7 +253,7 @@ def extract_hiddens(
  input_ids = input_ids[..., -min(cur_len, max_len) :]
 
  # Make sure we only pass the arguments that the model expects
- inputs = dict(input_ids=input_ids)
+ inputs = dict(input_ids=input_ids.long())
  if is_enc_dec:
  inputs["labels"] = answer
 

diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py
@@ -104,13 +104,11 @@ def load_prompts(
  print("No label column found, not balancing")
  ds = ds.to_iterable_dataset()
 
- if rank == 0:
- print(f"Label choices: {label_choices}")
-
  for example in ds:
  yield _convert_to_prompts(
  example,
  binarize=binarize,
+ choices_column=prompter.choices_column,
  label_column=label_column,
  label_choices=label_choices, # type: ignore[arg-type]
  num_variants=num_variants,
@@ -124,6 +122,7 @@ def _convert_to_prompts(
  example: dict[str, Any],
  prompter: DatasetTemplates,
  binarize: bool,
+ choices_column: str | None,
  label_column: str,
  label_choices: list[bool | int | str],
  num_variants: int,
@@ -144,6 +143,11 @@ def qa_cat(q: str, a: str) -> str:
  # For sanity checking that prompts are unique
  prompt_counter = Counter()
  label = example[label_column]
+ if choices_column:
+ label_choices = example[choices_column]
+ if isinstance(label, int):
+ label_choices = list(range(len(label_choices)))
+
  if binarize:
  # Replace the full list of possibilities with a randomly sampled false label
  # and the correct label, as done in the DLK paper. Note that this does add some

diff --git a/elk/promptsource/templates.py b/elk/promptsource/templates.py
@@ -270,13 +270,7 @@ class DatasetTemplates:
  helper functions necessary to read/write to the yaml file
  """
 
- TEMPLATES_KEY = "templates"
- DATASET_KEY = "dataset"
- SUBSET_KEY = "subset"
- LABEL_COLUMN_KEY = "label_column"
- LABEL_CHOICES_KEY = "label_choices"
- TEMPLATE_FILENAME = "templates.yaml"
-
+ choices_column: str | None
  label_column: str | None
  label_choices: list[int | str]
  templates: dict[str, Template]
@@ -289,11 +283,11 @@ def __init__(self, dataset_name: str, subset_name: str | None = None):
  yaml_dict = yaml.load(f, Loader=yaml.FullLoader)
 
  # Required field; contains all the templates keyed by ID
- self.templates = yaml_dict[self.TEMPLATES_KEY]
+ self.templates = yaml_dict["templates"]
 
- # Optional fields; may be None
- self.label_column = yaml_dict.get(self.LABEL_COLUMN_KEY)
- self.label_choices = yaml_dict.get(self.LABEL_CHOICES_KEY, [])
+ self.choices_column = yaml_dict.get("choices_column")
+ self.label_column = yaml_dict.get("label_column")
+ self.label_choices = yaml_dict.get("label_choices", [])
 
  def drop_non_mc_templates(self) -> int:
  """Drop all templates that aren't multiple choice, return the number dropped"""
@@ -326,7 +320,7 @@ def folder_path(self) -> str:
 
  @property
  def yaml_path(self) -> str:
- path = os.path.join(self.folder_path, self.TEMPLATE_FILENAME)
+ path = os.path.join(self.folder_path, "templates.yaml")
  if not os.path.exists(path):
  raise ValueError(f"Expected prompt templates to exist at {path}")
 

diff --git a/elk/promptsource/templates/norabelrose/truthful_qa/templates.yaml b/elk/promptsource/templates/norabelrose/truthful_qa/templates.yaml
@@ -0,0 +1,118 @@
+# Adapted from the ARC-Easy templates
+dataset: norabelrose/truthful_qa_mc
+choices_column: choices
+templates:
+ a1: !Template
+ answer_choices: 'A|||B|||C|||D|||E|||F|||G|||H|||I|||J|||K|||L|||M|||N'
+ id: a1
+ jinja: 'Pick the most correct option to answer the following question.
+
+
+ {{question}}
+
+
+ Options:
+
+ {% for letter, t in zip(answer_choices, choices) %}
+
+ - {{letter}}: {{t}}
+
+ {% endfor %} |||{{answer_choices[label]}}'
+ metadata: !TemplateMetadata
+ choices_in_prompt: true
+ languages:
+ - en
+ metrics:
+ - Accuracy
+ original_task: true
+ name: pick_the_most_correct_option
+ reference: ''
+ a2: !Template
+ answer_choices: 'A|||B|||C|||D|||E|||F|||G|||H|||I|||J|||K|||L|||M|||N'
+ id: a2
+ jinja: '{{question}}
+
+
+ Options:
+
+ {% for letter, t in zip(answer_choices, choices) %}
+
+ - {{letter}}: {{t}}
+
+ {% endfor %}
+
+ A:|||{{answer_choices[label]}}'
+ metadata: !TemplateMetadata
+ choices_in_prompt: true
+ languages:
+ - en
+ metrics:
+ - Accuracy
+ original_task: true
+ name: qa_options
+ reference: ''
+ a3: !Template
+ answer_choices: 'A|||B|||C|||D|||E|||F|||G|||H|||I|||J|||K|||L|||M|||N'
+ id: a3
+ jinja: 'I am hesitating between 4 options to answer the following question, which
+ option should I choose?
+
+ Question: {{question}}
+
+ Possibilities:
+
+ {% for letter, t in zip(answer_choices, choices) %}
+
+ - {{letter}}: {{t}}
+
+ {% endfor %}
+
+ A:|||{{answer_choices[label]}}'
+ metadata: !TemplateMetadata
+ choices_in_prompt: true
+ languages:
+ - en
+ metrics:
+ - Accuracy
+ original_task: true
+ name: i_am_hesitating
+ reference: ''
+ a4: !Template
+ answer_choices: 'A|||B|||C|||D|||E|||F|||G|||H|||I|||J|||K|||L|||M|||N'
+ id: a4
+ jinja: 'I gave my students this multiple choice question: {{question}}
+
+
+ Only one answer is correct among these 4 choices:
+
+ {% for letter, t in zip(answer_choices, choices) %}
+
+ - {{letter}}: {{t}}
+
+ {% endfor %}
+
+ A:|||{{answer_choices[label]}}'
+ metadata: !TemplateMetadata
+ choices_in_prompt: true
+ languages:
+ - en
+ metrics:
+ - Accuracy
+ original_task: true
+ name: multiple_choice
+ reference: ''
+ a6: !Template
+ answer_choices: 'A|||B|||C|||D|||E|||F|||G|||H|||I|||J|||K|||L|||M|||N'
+ id: a6
+ jinja: "Here's a problem to solve: {{question}}\n\nAmong the 4 following options,\
+ \ which is the correct answer?\n{% for letter, t in zip(answer_choices, choices)\
+ \ %}\n- {{letter}}: {{t}}\n {% endfor %}A:|||{{answer_choices[label]}}"
+ metadata: !TemplateMetadata
+ choices_in_prompt: true
+ languages:
+ - en
+ metrics:
+ - Accuracy
+ original_task: true
+ name: heres_a_problem
+ reference: ''
diff --git a/elk/run.py b/elk/run.py
@@ -21,6 +21,7 @@
 from .extraction.dataset_name import DatasetDictWithName
 from .files import elk_reporter_dir, memorably_named_dir
 from .utils import (
+ Color,
  assert_type,
  get_layer_indices,
  int16_to_float32,
@@ -48,7 +49,7 @@ class Run(ABC, Serializable):
 
  def execute(
  self,
- highlight_color: str = "cyan",
+ highlight_color: Color = "cyan",
  split_type: Literal["train", "val", None] = None,
  ):
  self.datasets = [
@@ -127,6 +128,7 @@ def prepare_data(
 
  split = ds[key].with_format("torch", device=device, dtype=torch.int16)
  labels = assert_type(Tensor, split["label"])
+ breakpoint()
  val_h = int16_to_float32(assert_type(Tensor, split[f"hidden_{layer}"]))
 
  with split.formatted_as("torch", device=device):

diff --git a/elk/utils/data_utils.py b/elk/utils/data_utils.py
@@ -118,10 +118,7 @@ def infer_num_classes(label_feature: Any) -> int:
  elif isinstance(label_feature, Value) and label_feature.dtype == "bool":
  return 2
  else:
- raise ValueError(
- f"Can't infer number of classes from label column of type {label_feature}. "
- f"Please update the num_classes field in the prompt template yaml file."
- )
+ return -1
 
 
 def get_layer_indices(ds: DatasetDict) -> list[int]:

diff --git a/elk/utils/typing.py b/elk/utils/typing.py
@@ -14,14 +14,14 @@ def assert_type(typ: Type[T], obj: Any) -> T:
 
 
 def float32_to_int16(x: torch.Tensor) -> torch.Tensor:
- """Converts float32 to float16, then reinterprets as int16."""
- downcast = x.type(torch.float16)
+ """Converts float32 to bfloat16, then reinterprets as int16."""
+ downcast = x.type(torch.bfloat16)
  if not downcast.isfinite().all():
  raise ValueError("Cannot convert to 16 bit: values are not finite")
 
  return downcast.view(torch.int16)
 
 
 def int16_to_float32(x: torch.Tensor) -> torch.Tensor:
- """Converts int16 to float16, then reinterprets as float32."""
- return x.view(torch.float16).type(torch.float32)
+ """Converts int16 to bfloat16, then reinterprets as float32."""
+ return x.view(torch.bfloat16).type(torch.float32)