Changing a ton of stuff

EleutherAI · norabelrose · Apr 16, 2023 · Apr 4, 2023 · Apr 4, 2023 · Apr 4, 2023
commit 0c35bc773b0cddf4cd3b8bc339f1da840d3e3c9c
diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py
@@ -60,7 +60,7 @@ def evaluate_reporter(
  """Evaluate a single reporter on a single layer."""
  device = self.get_device(devices, world_size)
 
- _, _, test_x0, test_x1, _, test_labels, _ = self.prepare_data(
+ _, test_h, _, test_labels, _ = self.prepare_data(
  device,
  layer,
  )
@@ -71,12 +71,7 @@ def evaluate_reporter(
  reporter: Reporter = torch.load(reporter_path, map_location=device)
  reporter.eval()
 
- test_result = reporter.score(
- test_labels,
- test_x0,
- test_x1,
- )
-
+ test_result = reporter.score(test_labels, test_h)
  stats_row = pd.Series(
  {
  "layer": layer,
@@ -89,7 +84,7 @@ def evaluate_reporter(
  lr_model = load_baseline(lr_dir, layer)
  lr_model.eval()
  lr_auroc, lr_acc = evaluate_baseline(
- lr_model.cuda(), test_x0.cuda(), test_x1.cuda(), test_labels
+ lr_model.cuda(), test_h.cuda(), test_labels
  )
 
  stats_row["lr_auroc"] = lr_auroc

diff --git a/elk/extraction/balanced_sampler.py b/elk/extraction/balanced_sampler.py
@@ -1,4 +1,5 @@
 from collections import deque
+from dataclasses import dataclass, field
 from itertools import cycle
 from random import Random
 from typing import Iterable, Iterator, Optional
@@ -11,39 +12,42 @@
 from ..utils.typing import assert_type
 
 
+@dataclass
 class BalancedSampler(TorchIterableDataset):
  """
- Approximately balances a binary classification dataset in a streaming fashion.
-
- Args:
- dataset (IterableDataset): The HuggingFace IterableDataset to balance.
- label_col (Optional[str], optional): The name of the column containing the
- binary label. If not provided, the label column will be inferred from
- the dataset features. Defaults to None.
- buffer_size (int, optional): The total buffer size to use for balancing the
- dataset. This value should be divisible by 2, as it will be equally
- divided between the two binary label values (0 and 1). Defaults to 1000.
+ A sampler that approximately balances a multi-class classification dataset in a
+ streaming fashion.
+
+ Attributes:
+ data: The input dataset to balance.
+ num_classes: The total number of classes expected in the data.
+ buffer_size: The total buffer size to use for balancing the dataset. Each class
+ will have its own buffer with this size.
  """
 
- def __init__(self, data: Iterable[dict], buffer_size: int = 1000):
- self.data = data
+ data: Iterable[dict]
+ num_classes: int
+ buffer_size: int = 1000
+ buffers: dict[int, deque[dict]] = field(default_factory=dict, init=False)
 
- self.neg_buffer = deque(maxlen=buffer_size)
- self.pos_buffer = deque(maxlen=buffer_size)
+ def __post_init__(self):
+ # Initialize empty buffers
+ self.buffers = {
+ label: deque(maxlen=self.buffer_size) for label in range(self.num_classes)
+ }
 
  def __iter__(self):
  for sample in self.data:
  label = sample["label"]
 
- # Add the sample to the appropriate buffer
- if label == 0:
- self.neg_buffer.append(sample)
- else:
- self.pos_buffer.append(sample)
+ # Add the sample to the buffer for its class label
+ self.buffers[label].append(sample)
 
- while self.neg_buffer and self.pos_buffer:
- yield self.neg_buffer.popleft()
- yield self.pos_buffer.popleft()
+ # Check if all buffers have at least one sample
+ while all(len(buffer) > 0 for buffer in self.buffers.values()):
+ # Yield one sample from each buffer in a round-robin fashion
+ for buf in self.buffers.values():
+ yield buf.popleft()
 
 
 class FewShotSampler:

diff --git a/elk/extraction/extraction.py b/elk/extraction/extraction.py
@@ -7,8 +7,8 @@
 
 import torch
 from datasets import (
+ Array2D,
  Array3D,
- ClassLabel,
  DatasetDict,
  Features,
  Sequence,
@@ -22,11 +22,12 @@
 from transformers import AutoConfig, AutoTokenizer
 from transformers.modeling_outputs import Seq2SeqLMOutput
 
-# import torch.nn.functional as F
 from ..utils import (
  assert_type,
  convert_span,
  float32_to_int16,
+ infer_label_column,
+ infer_num_classes,
  instantiate_model,
  is_autoregressive,
  select_train_val_splits,
@@ -103,7 +104,7 @@ def extract_hiddens(
  stream=cfg.prompts.stream,
  rank=rank,
  world_size=world_size,
- ) # this dataset is already sharded, but hasn't been truncated to max_examples
+ ) # this dataset is already sharded, buqt hasn't been truncated to max_examples
 
  model = instantiate_model(
  cfg.model, torch_dtype="auto" if device != "cpu" else torch.float32
@@ -125,12 +126,14 @@ def extract_hiddens(
  if rank == world_size - 1:
  max_examples += global_max_examples % world_size
 
- for example in islice(BalancedSampler(prompt_ds), max_examples):
+ for example in islice(BalancedSampler(prompt_ds, 3), max_examples):
  num_variants = len(example["prompts"])
+ num_choices = len(example["prompts"][0])
+
  hidden_dict = {
  f"hidden_{layer_idx}": torch.empty(
  num_variants,
- 2, # contrast pair
+ num_choices,
  model.config.hidden_size,
  device=device,
  dtype=torch.int16,
@@ -139,7 +142,7 @@ def extract_hiddens(
  }
  lm_preds = torch.empty(
  num_variants,
- 2, # contrast pair
+ num_choices,
  device=device,
  dtype=torch.float32,
  )
@@ -232,8 +235,7 @@ def extract_hiddens(
  **hidden_dict,
  )
  if has_lm_preds:
- # We only need the probability of the positive example since this is binary
- out_record["model_preds"] = lm_preds.softmax(dim=-1)[..., 1]
+ out_record["model_preds"] = lm_preds.softmax(dim=-1)
 
  yield out_record
 
@@ -271,10 +273,14 @@ def get_splits() -> SplitDict:
  ds_name, _, config_name = cfg.prompts.datasets[0].partition(" ")
  info = get_dataset_config_info(ds_name, config_name or None)
 
+ ds_features = assert_type(Features, info.features)
+ label_col = infer_label_column(ds_features)
+ num_classes = infer_num_classes(ds_features[label_col])
+
  layer_cols = {
  f"hidden_{layer}": Array3D(
  dtype="int16",
- shape=(num_variants, 2, model_cfg.hidden_size),
+ shape=(num_variants, num_classes, model_cfg.hidden_size),
  )
  for layer in cfg.layers or range(model_cfg.num_hidden_layers)
  }
@@ -283,21 +289,20 @@ def get_splits() -> SplitDict:
  Value(dtype="string"),
  length=num_variants,
  ),
- "label": ClassLabel(names=["neg", "pos"]),
+ "label": Value(dtype="int64"),
  "text_inputs": Sequence(
  Sequence(
  Value(dtype="string"),
- length=2,
  ),
  length=num_variants,
  ),
  }
 
  # Only add model_preds if the model is an autoregressive model
  if is_autoregressive(model_cfg):
- other_cols["model_preds"] = Sequence(
- Value(dtype="float32"),
- length=num_variants,
+ other_cols["model_preds"] = Array2D(
+ shape=(num_variants, num_classes),
+ dtype="float32",
  )
 
  devices = select_usable_devices(num_gpus, min_memory=cfg.min_gpu_mem)
@@ -318,7 +323,6 @@ def get_splits() -> SplitDict:
  )
  for (split_name, split_info) in get_splits().items()
  }
-
  import multiprocess as mp
 
  mp.set_start_method("spawn") # type: ignore[attr-defined]

diff --git a/elk/extraction/generator.py b/elk/extraction/generator.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass, field
-from typing import Any, Callable, Dict, Optional
+from typing import Any, Callable, Optional
 
 import datasets
 from datasets.splits import NamedSplit
@@ -20,7 +20,7 @@ class _SplitGenerator:
 
  name: str
  split_info: datasets.SplitInfo
- gen_kwargs: Dict = field(default_factory=dict)
+ gen_kwargs: dict = field(default_factory=dict)
 
  def __post_init__(self):
  self.name = str(self.name) # Make sure we convert NamedSplits in strings

diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py
@@ -14,7 +14,6 @@
 from ..promptsource import DatasetTemplates
 from ..utils import (
  assert_type,
- binarize,
  infer_label_column,
  infer_num_classes,
  select_train_val_splits,
@@ -220,10 +219,7 @@ def qa_cat(q: str, a: str) -> str:
  for template in templates:
  choices = []
 
- if num_classes > 2:
- template, label = binarize(template, label, rng)
-
- for answer_idx in range(2):
+ for answer_idx in range(num_classes):
  fake_example = example.copy()
  fake_example[label_column] = answer_idx
 

diff --git a/elk/metrics.py b/elk/metrics.py
@@ -0,0 +1,79 @@
+from functools import partial
+from typing import Literal
+
+import torch
+from sklearn.metrics import average_precision_score, roc_auc_score
+from torch import Tensor
+
+
+def to_one_hot(labels: Tensor, n_classes: int) -> Tensor:
+ """
+ Convert a tensor of class labels to a one-hot representation.
+
+ Args:
+ labels (Tensor): A tensor of class labels of shape (N,).
+ n_classes (int): The total number of unique classes.
+
+ Returns:
+ Tensor: A one-hot representation tensor of shape (N, n_classes).
+ """
+ one_hot_labels = torch.zeros(labels.size(0), n_classes, dtype=torch.float32)
+ return one_hot_labels.scatter_(1, labels.unsqueeze(1).long(), 1)
+
+
+def accuracy(y_true: Tensor, y_pred: Tensor) -> float:
+ """
+ Compute the accuracy of a classification model.
+
+ Args:
+ y_true: Ground truth tensor of shape (N,).
+ y_pred: Predicted class tensor of shape (N,) or (N, n_classes).
+
+ Returns:
+ float: Accuracy of the model.
+ """
+ # Check if binary or multi-class classification
+ if len(y_pred.shape) == 1:
+ hard_preds = y_pred > 0.5
+ else:
+ hard_preds = y_pred.argmax(-1)
+
+ return hard_preds.eq(y_true).float().mean().item()
+
+
+def mean_auc(y_true: Tensor, y_scores: Tensor, curve: Literal["roc", "pr"]) -> float:
+ """
+ Compute the mean area under the receiver operating curve (AUROC) or
+ precision-recall curve (average precision or mAP) for binary or multi-class
+ classification problems.
+
+ Args:
+ y_true: Ground truth tensor of shape (N,) or (N, n_classes).
+ y_scores: Predicted probability tensor of shape (N,) for binary
+ or (N, n_classes) for multi-class.
+ curve: Type of curve to compute the mean AUC. Either 'pr' for
+ precision-recall curve or 'roc' for receiver operating
+ characteristic curve. Defaults to 'pr'.
+
+ Returns:
+ float: Either mean AUROC or mean average precision (mAP).
+ """
+ score_fn = {
+ "pr": average_precision_score,
+ "roc": partial(roc_auc_score, multi_class="ovo"),
+ }.get(curve, None)
+
+ if score_fn is None:
+ raise ValueError("Invalid curve type. Supported values are 'pr' and 'roc'.")
+
+ if len(y_scores.shape) == 1 or y_scores.shape[1] == 1:
+ return float(score_fn(y_true, y_scores.squeeze(1)))
+ else:
+ n_classes = y_scores.shape[1]
+ y_true_one_hot = to_one_hot(y_true, n_classes)
+
+ return score_fn(y_true_one_hot, y_scores)
+ # return np.array([
+ # score_fn(y_true_one_hot[:, i], y_scores[:, i])
+ # for i in range(n_classes)
+ # ]).mean()