acc now computes top-1 acc

EleutherAI · norabelrose · Apr 17, 2023 · Apr 16, 2023 · Apr 16, 2023 · Apr 16, 2023
commit 941c3968b141013f1bb118e9441e6f7ea81cf726
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,12 @@ elk/trained/*
 nohup.out
 .idea
 *.pkl
+.vscode/launch.json
+elk/experiment_results.ipynb
+elk/training/loss_api.py
+prototyping.ipynb
+run.sh
+dummy/
 
 # scripts for experiments in progress
 my_*.sh

diff --git a/elk/training/reporter.py b/elk/training/reporter.py
@@ -94,7 +94,13 @@ def score(self, labels: Tensor, hiddens: Tensor) -> EvalResult:
 
  Returns:
  an instance of EvalResult containing the loss, accuracy, calibrated
- accuracy, and AUROC of the probe on `hiddens`.
+ accuracy, and AUROC of the probe on `contrast_set`.
+ Accuracy: top-1 accuracy averaged over questions and variants.
+ Calibrated accuracy: top-1 accuracy averaged over questions and
+ variants, calibrated so that x% of the predictions are `True`,
+ where x is the proprtion of examples with ground truth label `True`.
+ AUROC: averaged over the n * v * c binary questions
+ ECE: Expected Calibration Error
  """
  logits = self(hiddens)
  (_, v, c) = logits.shape
@@ -116,9 +122,10 @@ def score(self, labels: Tensor, hiddens: Tensor) -> EvalResult:
  cal_acc = 0.0
  cal_err = 0.0
 
- raw_preds = to_one_hot(logits.argmax(dim=-1), c).long()
- Y = to_one_hot(Y, c).long().flatten()
+ Y_one_hot = to_one_hot(Y, c).long().flatten()
+ roc_auc_ci(Y_one_hot.cpu(), logits.cpu().flatten())
 
+ raw_preds = logits.argmax(dim=-1).long()
  raw_acc = accuracy(Y, raw_preds.flatten())
  auroc_result = roc_auc_ci(Y, logits.flatten())
  return EvalResult(