GLUE MNLI works on Deberta

EleutherAI · norabelrose · Apr 16, 2023 · Apr 4, 2023 · Apr 4, 2023 · Apr 4, 2023
commit 3e6c39c3bffb670f81fcef4546f4f007cbe94d54
diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py
@@ -40,6 +40,7 @@ class Eval(Serializable):
  num_gpus: int = -1
  min_gpu_mem: int | None = None
  skip_baseline: bool = False
+ concatenated_layer_offset: int = 0
 
  def execute(self):
  datasets = self.data.prompts.datasets

diff --git a/elk/metrics.py b/elk/metrics.py
@@ -1,7 +1,3 @@
-from functools import partial
-from typing import Literal
-
-from sklearn.metrics import average_precision_score, roc_auc_score
 from torch import Tensor
 
 
@@ -37,42 +33,4 @@ def accuracy(y_true: Tensor, y_pred: Tensor) -> float:
  else:
  hard_preds = y_pred.argmax(-1)
 
- return hard_preds.eq(y_true).float().mean().item()
-
-
-def mean_auc(y_true: Tensor, y_scores: Tensor, curve: Literal["roc", "pr"]) -> float:
- """
- Compute the mean area under the receiver operating curve (AUROC) or
- precision-recall curve (average precision or mAP) for binary or multi-class
- classification problems.
-
- Args:
- y_true: Ground truth tensor of shape (N,) or (N, n_classes).
- y_scores: Predicted probability tensor of shape (N,) for binary
- or (N, n_classes) for multi-class.
- curve: Type of curve to compute the mean AUC. Either 'pr' for
- precision-recall curve or 'roc' for receiver operating
- characteristic curve. Defaults to 'pr'.
-
- Returns:
- float: Either mean AUROC or mean average precision (mAP).
- """
- score_fn = {
- "pr": average_precision_score,
- "roc": partial(roc_auc_score, multi_class="ovo"),
- }.get(curve, None)
-
- if score_fn is None:
- raise ValueError("Invalid curve type. Supported values are 'pr' and 'roc'.")
-
- if len(y_scores.shape) == 1 or y_scores.shape[1] == 1:
- return float(score_fn(y_true, y_scores.squeeze(1)))
- else:
- n_classes = y_scores.shape[1]
- y_true_one_hot = to_one_hot(y_true, n_classes)
-
- return score_fn(y_true_one_hot, y_scores)
- # return np.array([
- # score_fn(y_true_one_hot[:, i], y_scores[:, i])
- # for i in range(n_classes)
- # ]).mean()
+ return hard_preds.cpu().eq(y_true.cpu()).float().mean().item()
diff --git a/elk/training/eigen_reporter.py b/elk/training/eigen_reporter.py
@@ -227,28 +227,34 @@ def fit(
  loss = self.fit_streaming()
 
  if labels is not None:
+ (_, v, k, _) = hiddens.shape
+ hiddens = rearrange(hiddens, "n v k d -> (n v k) d")
+ labels = to_one_hot(repeat(labels, "n -> (n v)", v=v), k).flatten()
+
  self.platt_scale(labels, hiddens)
 
  return loss
 
  def platt_scale(self, labels: Tensor, hiddens: Tensor, max_iter: int = 100):
- """Fit the scale and bias terms to data with LBFGS."""
+ """Fit the scale and bias terms to data with LBFGS.
 
+ Args:
+ labels: Binary labels of shape [batch].
+ hiddens: Hidden states of shape [batch, dim].
+ max_iter: Maximum number of iterations for LBFGS.
+ """
  opt = optim.LBFGS(
  [self.bias, self.scale],
  line_search_fn="strong_wolfe",
  max_iter=max_iter,
  tolerance_change=torch.finfo(hiddens.dtype).eps,
  tolerance_grad=torch.finfo(hiddens.dtype).eps,
  )
- (_, v, k, _) = hiddens.shape
- labels = to_one_hot(repeat(labels, "n -> (n v)", v=v), k)
 
  def closure():
  opt.zero_grad()
- logits = rearrange(self(hiddens), "n v k -> (n v) k")
  loss = nn.functional.binary_cross_entropy_with_logits(
- logits, labels.float()
+ self(hiddens), labels.float()
  )
 
  loss.backward()

diff --git a/elk/training/reporter.py b/elk/training/reporter.py
@@ -13,7 +13,7 @@
 from torch import Tensor
 
 from ..calibration import CalibrationError
-from ..metrics import to_one_hot
+from ..metrics import accuracy, to_one_hot
 from .classifier import Classifier
 
 
@@ -165,13 +165,13 @@ def score(self, labels: Tensor, hiddens: Tensor) -> EvalResult:
  cal_err = 0.0
 
  raw_preds = to_one_hot(logits.argmax(dim=-1), c).long()
- auroc = roc_auc_score(
- to_one_hot(Y, c).long().flatten().cpu(), logits.cpu().flatten()
- )
- raw_acc = raw_preds.flatten().eq(Y).float().mean()
+ Y = to_one_hot(Y, c).long().flatten()
+
+ auroc = roc_auc_score(Y.cpu(), logits.cpu().flatten())
+ raw_acc = accuracy(Y, raw_preds.flatten())
 
  return EvalResult(
- acc=raw_acc.item(),
+ acc=float(raw_acc),
  cal_acc=cal_acc,
  auroc=float(auroc),
  ece=cal_err,

diff --git a/elk/training/supervised.py b/elk/training/supervised.py
@@ -1,8 +1,9 @@
 import torch
 from einops import rearrange, repeat
-from sklearn.metrics import accuracy_score, roc_auc_score
+from sklearn.metrics import roc_auc_score
 from torch import Tensor
 
+from ..metrics import accuracy, to_one_hot
 from ..utils import assert_type
 from .classifier import Classifier
 
@@ -11,32 +12,31 @@ def evaluate_supervised(
  lr_model: Classifier, val_h: Tensor, val_labels: Tensor
 ) -> tuple[float, float]:
  (n, v, k, d) = val_h.shape
- X_val = val_h.view(-1, d)
+
  with torch.no_grad():
- lr_preds = lr_model(X_val).sigmoid().cpu()
+ logits = rearrange(lr_model(val_h).cpu().squeeze(), "n v k -> (n v) k")
+ raw_preds = to_one_hot(logits.argmax(dim=-1), k).long()
 
- val_labels_aug = (
- torch.cat([val_labels, 1 - val_labels]).repeat_interleave(v)
- ).cpu()
+ labels = repeat(val_labels, "n -> (n v)", v=v)
+ labels = to_one_hot(labels, k).flatten()
 
- lr_acc = accuracy_score(val_labels_aug, lr_preds > 0.5)
- lr_auroc = roc_auc_score(val_labels_aug, lr_preds)
+ lr_acc = accuracy(labels, raw_preds.flatten())
+ lr_auroc = roc_auc_score(labels.cpu(), logits.cpu().flatten())
 
  return assert_type(float, lr_auroc), assert_type(float, lr_acc)
 
 
 def train_supervised(data: dict[str, tuple], device: str) -> Classifier:
  Xs, train_labels = [], []
 
- for x0, x1, labels, _ in data.values():
- (_, v, _) = x0.shape
- x0 = rearrange(x0, "n v d -> (n v) d")
- x1 = rearrange(x1, "n v d -> (n v) d")
+ for train_h, labels, _ in data.values():
+ (_, v, k, _) = train_h.shape
+ train_h = rearrange(train_h, "n v k d -> (n v k) d")
 
  labels = repeat(labels, "n -> (n v)", v=v)
- labels = torch.cat([labels, 1 - labels])
+ labels = to_one_hot(labels, k).flatten()
 
- Xs.append(torch.cat([x0, x1]).squeeze())
+ Xs.append(train_h)
  train_labels.append(labels)
 
  X, train_labels = torch.cat(Xs), torch.cat(train_labels)

diff --git a/elk/training/train.py b/elk/training/train.py
@@ -86,26 +86,37 @@ def train_reporter(
  train_dict = self.prepare_data(device, layer, "train")
  val_dict = self.prepare_data(device, layer, "val")
 
- # Can't figure out a way to make this line less ugly
- hidden_size = next(iter(train_dict.values()))[0].shape[-1]
- reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir))
- pseudo_clf = self.get_pseudo_classifier(train_dict, device)
+ (train_h, train_labels, _), *rest = train_dict.values()
+ (n, v, k, d) = train_h.shape
+
+ if not all(other_h.shape[2] == k for other_h, _, _ in rest):
+ raise ValueError("All datasets must have the same number of classes")
 
+ reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir))
  if isinstance(self.cfg.net, CcsReporterConfig):
  assert len(train_dict) == 1, "CCS only supports single-task training"
 
- reporter = CcsReporter(self.cfg.net, hidden_size, device=device)
- (train_h, labels, _) = next(iter(train_dict.values()))
- train_loss = reporter.fit(train_h, labels)
+ reporter = CcsReporter(self.cfg.net, d, device=device)
+ train_loss = reporter.fit(train_h, train_labels)
 
  elif isinstance(self.cfg.net, EigenReporterConfig):
  # To enable training on multiple tasks with different numbers of variants,
  # we update the statistics in a streaming fashion and then fit
- reporter = EigenReporter(self.cfg.net, hidden_size, device=device)
- for ds_name, (val_h, labels, _) in train_dict.items():
- reporter.update(val_h)
+ reporter = EigenReporter(self.cfg.net, d, k, device=device)
+
+ hidden_list, label_list = [], []
+ for ds_name, (train_h, train_labels, _) in train_dict.items():
+ hidden_list.append(train_h)
+ label_list.append(train_labels)
+ reporter.update(train_h)
 
  train_loss = reporter.fit_streaming()
+ reporter.platt_scale(
+ to_one_hot(
+ repeat(torch.cat(label_list), "n -> (n v)", v=v), k
+ ).flatten(),
+ rearrange(torch.cat(hidden_list), "n v k d -> (n v k) d"),
+ )
  else:
  raise ValueError(f"Unknown reporter config type: {type(self.cfg.net)}")
 
@@ -122,22 +133,25 @@ def train_reporter(
  for ds_name, (val_h, val_gt, val_lm_preds) in val_dict.items():
  val_result = reporter.score(val_gt, val_h)
  with torch.no_grad():
- (n, v, k, d) = val_h.shape
-
- pseudo_preds = pseudo_clf(
- # n v k d -> (n v k) d
- rearrange(val_h, "n v k d -> (n v k) d")
- )
- pseudo_labels = torch.cat(
- [
- val_h.new_zeros(n),
- val_h.new_ones(n),
- ]
- )
- pseudo_labels = repeat(pseudo_labels, "n -> (n v)", v=v)
- pseudo_auroc = float(
- roc_auc_score(pseudo_labels.cpu(), pseudo_preds.cpu())
- )
+ if k == 2:
+ pseudo_clf = self.get_pseudo_classifier(train_dict, device)
+ pseudo_preds = pseudo_clf(
+ # n v k d -> (n v k) d
+ rearrange(val_h, "n v k d -> (n v k) d")
+ )
+ pseudo_labels = torch.cat(
+ [
+ val_h.new_zeros(n),
+ val_h.new_ones(n),
+ ]
+ )
+ pseudo_labels = repeat(pseudo_labels, "n -> (n v)", v=v)
+ pseudo_auroc = float(
+ roc_auc_score(pseudo_labels.cpu(), pseudo_preds.cpu())
+ )
+ else:
+ # We don't bother with computing the pseudo-AUROC for multi-class
+ pseudo_auroc = None
 
  if val_lm_preds is not None:
  val_gt_cpu = repeat(val_gt, "n -> (n v)", v=v).cpu()
@@ -174,21 +188,18 @@ def train_reporter(
  def get_pseudo_classifier(self, data: dict[str, tuple], device: str) -> Classifier:
  """Check the separability of the pseudo-labels at a given layer."""
 
- x0s, x1s = [], []
- for x0, x1, _, _ in data.values():
- x0s.append(rearrange(x0, "n v d -> (n v) d"))
- x1s.append(rearrange(x1, "n v d -> (n v) d"))
+ X = torch.cat(
+ [rearrange(h, "n v k d -> (n v) k d") for h, _, _ in data.values()]
+ )
+ (N, k, d) = X.shape
+ assert k == 2, "Pseudo-labels should be binary"
 
  # Simple de-meaning normalization
- X0 = torch.cat(x0s)
- X1 = torch.cat(x1s)
- X0 -= X0.mean(dim=0)
- X1 -= X1.mean(dim=0)
-
- X = torch.cat([X0, X1])
- Y = torch.cat([X0.new_zeros(X0.shape[0]), X0.new_ones(X1.shape[0])])
+ X -= X.mean(dim=0)
+ X = rearrange(X, "N k d -> (N k) d")
+ Y = torch.cat([X.new_zeros(N), X.new_ones(N)])
 
- pseudo_clf = Classifier(X.shape[-1], device=device)
+ pseudo_clf = Classifier(d, device=device)
  pseudo_clf.fit(X, Y)
  return pseudo_clf