From 934cd54e9123fcaa6de36c42957e0229ea10e600 Mon Sep 17 00:00:00 2001 From: Nora Belrose Date: Wed, 26 Apr 2023 10:19:33 +0000 Subject: [PATCH 001/102] Log ensembled metrics --- elk/evaluation/evaluate.py | 39 +++++++++++++++++++------------ elk/metrics/eval.py | 36 +++++++++++++++++++++-------- elk/run.py | 2 +- elk/training/train.py | 47 +++++++++++++++++++++++--------------- 4 files changed, 82 insertions(+), 42 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 57ee6d031..2b9e85099 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -45,21 +45,32 @@ def apply_to_layer( for ds_name, (val_h, val_gt, _) in val_output.items(): meta = {"dataset": ds_name, "layer": layer} - val_result = evaluate_preds(val_gt, reporter(val_h)) - row_bufs["eval"].append({**meta, **val_result.to_dict()}) + val_credences = reporter(val_h) + for mode in ("none", "partial", "full"): + row_bufs["eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(val_gt, val_credences, mode).to_dict(), + } + ) - lr_dir = experiment_dir / "lr_models" - if not self.skip_supervised and lr_dir.exists(): - with open(lr_dir / f"layer_{layer}.pt", "rb") as f: - lr_models = torch.load(f, map_location=device) - if not isinstance(lr_models, list): # backward compatibility - lr_models = [lr_models] + lr_dir = experiment_dir / "lr_models" + if not self.skip_supervised and lr_dir.exists(): + with open(lr_dir / f"layer_{layer}.pt", "rb") as f: + lr_models = torch.load(f, map_location=device) + if not isinstance(lr_models, list): # backward compatibility + lr_models = [lr_models] - for i, model in enumerate(lr_models): - model.eval() - lr_result = evaluate_preds(val_gt, model(val_h)) - row_bufs["lr_eval"].append( - {"inlp_iter": i, **meta, **lr_result.to_dict()} - ) + for i, model in enumerate(lr_models): + model.eval() + row_bufs["lr_eval"].append( + { + "ensembling": mode, + "inlp_iter": i, + **meta, + **evaluate_preds(val_gt, model(val_h), mode).to_dict(), + } + ) return {k: pd.DataFrame(v) for k, v in row_bufs.items()} diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index dcc5ce355..653beae55 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -1,4 +1,5 @@ from dataclasses import asdict, dataclass +from typing import Literal import torch from einops import repeat @@ -37,16 +38,20 @@ def to_dict(self, prefix: str = "") -> dict[str, float]: else {} ) auroc_dict = {f"{prefix}auroc_{k}": v for k, v in asdict(self.roc_auc).items()} - return {**acc_dict, **cal_acc_dict, **cal_dict, **auroc_dict} + return {**auroc_dict, **cal_acc_dict, **acc_dict, **cal_dict} -def evaluate_preds(y_true: Tensor, y_logits: Tensor) -> EvalResult: +def evaluate_preds( + y_true: Tensor, + y_logits: Tensor, + ensembling: Literal["none", "partial", "full"] = "none", +) -> EvalResult: """ Evaluate the performance of a classification model. Args: y_true: Ground truth tensor of shape (N,). - y_pred: Predicted class tensor of shape (N, variants, n_classes). + y_logits: Predicted class tensor of shape (N, variants, n_classes). Returns: dict: A dictionary containing the accuracy, AUROC, and ECE. @@ -54,16 +59,29 @@ def evaluate_preds(y_true: Tensor, y_logits: Tensor) -> EvalResult: (n, v, c) = y_logits.shape assert y_true.shape == (n,) - # Clustered bootstrap confidence intervals for AUROC - y_true = repeat(y_true, "n -> n v", v=v) - auroc = roc_auc_ci(to_one_hot(y_true, c).long().flatten(1), y_logits.flatten(1)) - acc = accuracy_ci(y_true, y_logits.argmax(dim=-1)) - + if ensembling == "full": + y_logits = y_logits.mean(dim=1) + else: + y_true = repeat(y_true, "n -> n v", v=v) + + y_pred = y_logits.argmax(dim=-1) + if ensembling == "none": + auroc = roc_auc_ci(to_one_hot(y_true, c).long().flatten(1), y_logits.flatten(1)) + elif ensembling in ("partial", "full"): + # Pool together the negative and positive class logits + if c == 2: + auroc = roc_auc_ci(y_true, y_logits[..., 1] - y_logits[..., 0]) + else: + auroc = roc_auc_ci(to_one_hot(y_true, c).long(), y_logits) + else: + raise ValueError(f"Unknown mode: {ensembling}") + + acc = accuracy_ci(y_true, y_pred) cal_acc = None cal_err = None if c == 2: - pos_probs = y_logits.softmax(-1)[..., 1] + pos_probs = torch.sigmoid(y_logits[..., 1] - y_logits[..., 0]) # Calibrated accuracy cal_thresh = pos_probs.float().quantile(y_true.float().mean()) diff --git a/elk/run.py b/elk/run.py index 838c228f2..d7fa549e9 100644 --- a/elk/run.py +++ b/elk/run.py @@ -173,7 +173,7 @@ def apply_to_layers( finally: # Make sure the CSVs are written even if we crash or get interrupted for name, dfs in df_buffers.items(): - df = pd.concat(dfs).sort_values(by="layer") + df = pd.concat(dfs).sort_values(by=["layer", "ensembling"]) df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) if self.debug: save_debug_log(self.datasets, self.out_dir) diff --git a/elk/training/train.py b/elk/training/train.py index ddecc06f7..ad5a799a6 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -124,24 +124,35 @@ def apply_to_layer( for ds_name, (val_h, val_gt, val_lm_preds) in val_dict.items(): meta = {"dataset": ds_name, "layer": layer} - val_result = evaluate_preds(val_gt, reporter(val_h)) - row_bufs["eval"].append( - { - **meta, - "pseudo_auroc": pseudo_auroc, - "train_loss": train_loss, - **val_result.to_dict(), - } - ) - - if val_lm_preds is not None: - lm_result = evaluate_preds(val_gt, val_lm_preds) - row_bufs["lm_eval"].append({**meta, **lm_result.to_dict()}) - - for i, model in enumerate(lr_models): - lr_result = evaluate_preds(val_gt, model(val_h)) - row_bufs["lr_eval"].append( - {"inlp_iter": i, **meta, **lr_result.to_dict()} + val_credences = reporter(val_h) + for mode in ("none", "partial", "full"): + row_bufs["eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(val_gt, val_credences, mode).to_dict(), + "pseudo_auroc": pseudo_auroc, + "train_loss": train_loss, + } ) + if val_lm_preds is not None: + row_bufs["lm_eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), + } + ) + + for i, model in enumerate(lr_models): + row_bufs["lr_eval"].append( + { + **meta, + "ensembling": mode, + "inlp_iter": i, + **evaluate_preds(val_gt, model(val_h), mode).to_dict(), + } + ) + return {k: pd.DataFrame(v) for k, v in row_bufs.items()} From dff69bf7184c4ea2d53043ed0009c7ebaf658f52 Mon Sep 17 00:00:00 2001 From: Nora Belrose Date: Wed, 26 Apr 2023 10:42:04 +0000 Subject: [PATCH 002/102] Fixing pyright version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6575e57a9..1a787fba7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ dev = [ "hypothesis", "pre-commit", "pytest", - "pyright", + "pyright==1.1.304", "scikit-learn", ] From a493b857469cd55564d642b6cbc91dc9580c700b Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Sun, 30 Apr 2023 00:40:22 +0200 Subject: [PATCH 003/102] experiment with layer ensembling --- elk/evaluation/evaluate.py | 7 ++++-- elk/example.py | 49 ++++++++++++++++++++++++++++++++++++++ elk/run.py | 13 +++++++--- 3 files changed, 64 insertions(+), 5 deletions(-) create mode 100644 elk/example.py diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 2b9e85099..653e8defb 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -30,7 +30,7 @@ def __post_init__(self): def apply_to_layer( self, layer: int, devices: list[str], world_size: int - ) -> dict[str, pd.DataFrame]: + ) -> tuple[dict[str, pd.DataFrame], list[dict]]: """Evaluate a single reporter on a single layer.""" device = self.get_device(devices, world_size) val_output = self.prepare_data(device, layer, "val") @@ -42,10 +42,13 @@ def apply_to_layer( reporter.eval() row_bufs = defaultdict(list) + + vals = [] for ds_name, (val_h, val_gt, _) in val_output.items(): meta = {"dataset": ds_name, "layer": layer} val_credences = reporter(val_h) + vals.append({**meta, "val_gt": val_gt, "val_credences": val_credences}) for mode in ("none", "partial", "full"): row_bufs["eval"].append( { @@ -73,4 +76,4 @@ def apply_to_layer( } ) - return {k: pd.DataFrame(v) for k, v in row_bufs.items()} + return ({k: pd.DataFrame(v) for k, v in row_bufs.items()}, vals) diff --git a/elk/example.py b/elk/example.py new file mode 100644 index 000000000..aff5ecde8 --- /dev/null +++ b/elk/example.py @@ -0,0 +1,49 @@ +import pickle +from pathlib import Path +import numpy as np + +from torch import Tensor +import torch +from elk.metrics.eval import to_one_hot + +from elk.metrics.roc_auc import roc_auc_ci + +root = Path('/home/laurito/elk-reporters/microsoft/deberta-large-mnli/imdb/quizzical-allen/transfer_eval') + +# load pickle file +with open(root / 'vals.pkl', 'rb') as f: + vals_buffers = pickle.load(f) + +y_logits_means = [] +y_trues_list = [] +for vals in vals_buffers: + y_logits = vals[0]["val_credences"] + y_trues = vals[0]["val_gt"] + (n, v, c) = y_logits.shape + assert y_trues.shape == (n,) + + y_logits = y_logits.mean(dim=1) + + y_logits_means.append(y_logits) + y_trues_list.append(y_trues) + + if c == 2: + auroc = roc_auc_ci(y_trues, y_logits[..., 1] - y_logits[..., 0]) + else: + auroc = roc_auc_ci(to_one_hot(y_trues, c).long(), y_logits) + + print("layer", vals[0]["layer"], "auroc", auroc) + + +y_trues = y_trues_list[22:-1] +y_logits = y_logits_means[22:-1] + +layer_mean = torch.mean(torch.stack(y_logits), dim=2) + +breakpoint() + +i = 0 +for y_logits, y_true in zip(y_logits_means, y_trues): + auroc = roc_auc_ci(y_true, layer_mean[..., 1] - layer_mean[..., 0]) + print("auroc", auroc) + i = i + 1 diff --git a/elk/run.py b/elk/run.py index d7fa549e9..b9cff2844 100644 --- a/elk/run.py +++ b/elk/run.py @@ -1,4 +1,5 @@ import os +import pickle import random from abc import ABC, abstractmethod from collections import defaultdict @@ -165,10 +166,11 @@ def apply_to_layers( with ctx.Pool(num_devices) as pool: mapper = pool.imap_unordered if num_devices > 1 else map df_buffers = defaultdict(list) - + vals_buffers = [] try: - for df_dict in tqdm(mapper(func, layers), total=len(layers)): - for k, v in df_dict.items(): + for df_dict, vals in tqdm(mapper(func, layers), total=len(layers)): + vals_buffers.append(vals) + for k, v in df_dict.items(): # type: ignore df_buffers[k].append(v) finally: # Make sure the CSVs are written even if we crash or get interrupted @@ -177,3 +179,8 @@ def apply_to_layers( df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) if self.debug: save_debug_log(self.datasets, self.out_dir) + # save vals_buffers as pickle + with open(self.out_dir / f"vals.pkl", "wb") as f: + pickle.dump(vals_buffers, f) + + print("Saved vals to ", self.out_dir / f"vals.pkl") \ No newline at end of file From af5def6cc53427734aaaf44dd3a9b04d225b1818 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Sun, 30 Apr 2023 17:40:21 +0200 Subject: [PATCH 004/102] add draft example for ensembling datasets --- elk/example.py | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/elk/example.py b/elk/example.py index aff5ecde8..7feb4b0a5 100644 --- a/elk/example.py +++ b/elk/example.py @@ -8,15 +8,28 @@ from elk.metrics.roc_auc import roc_auc_ci +# imdb root = Path('/home/laurito/elk-reporters/microsoft/deberta-large-mnli/imdb/quizzical-allen/transfer_eval') +# boolq +# root = Path('/home/laurito/elk-reporters/microsoft/deberta-large-mnli/imdb/quizzical-allen/transfer_eval') +# elk eval "microsoft/ +# deberta-large-mnli/i +# mdb/quizzical-allen" +# microsoft/deberta-l +# arge-mnli imdb --num +# _gpus 1 + # load pickle file with open(root / 'vals.pkl', 'rb') as f: vals_buffers = pickle.load(f) y_logits_means = [] y_trues_list = [] +k_prompts_aurocs = [] for vals in vals_buffers: + print("vals.shape", len(vals)) + y_logits = vals[0]["val_credences"] y_trues = vals[0]["val_gt"] (n, v, c) = y_logits.shape @@ -32,18 +45,25 @@ else: auroc = roc_auc_ci(to_one_hot(y_trues, c).long(), y_logits) + k_prompts_aurocs.append(auroc) + print("layer", vals[0]["layer"], "auroc", auroc) +def get_best_aurocs_indices(aurocs, max=5): + sorted_indices = sorted(range(len(aurocs)), key=lambda i: aurocs[i].estimate) + # the best aurocs are at the end of the list + return sorted_indices[-max:] + +best_aurocs_indices = get_best_aurocs_indices(k_prompts_aurocs) +print("best_aurocs_indices", best_aurocs_indices) + +y_trues = [y_trues_list[i] for i in best_aurocs_indices] +y_logits = [y_logits_means[i] for i in best_aurocs_indices] -y_trues = y_trues_list[22:-1] -y_logits = y_logits_means[22:-1] +y_logits_layers = torch.stack(y_logits) +y_layer_logits_means = torch.mean(y_logits_layers, dim=0) -layer_mean = torch.mean(torch.stack(y_logits), dim=2) +auroc = roc_auc_ci(y_trues[2], y_layer_logits_means[..., 1] - y_layer_logits_means[..., 0]) +print(auroc) -breakpoint() -i = 0 -for y_logits, y_true in zip(y_logits_means, y_trues): - auroc = roc_auc_ci(y_true, layer_mean[..., 1] - layer_mean[..., 0]) - print("auroc", auroc) - i = i + 1 From 04a2a82c2ad570fc9e4d0907449971636e84a0d5 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Sun, 30 Apr 2023 16:09:25 +0000 Subject: [PATCH 005/102] add comment --- elk/example.py | 1 + 1 file changed, 1 insertion(+) diff --git a/elk/example.py b/elk/example.py index 7feb4b0a5..c784cefd4 100644 --- a/elk/example.py +++ b/elk/example.py @@ -49,6 +49,7 @@ print("layer", vals[0]["layer"], "auroc", auroc) +# only use to find pattern in data def get_best_aurocs_indices(aurocs, max=5): sorted_indices = sorted(range(len(aurocs)), key=lambda i: aurocs[i].estimate) # the best aurocs are at the end of the list From cda7de7765c5a6e849e78447afa89557e9d48a27 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Sun, 30 Apr 2023 18:59:03 +0200 Subject: [PATCH 006/102] add eval in comments --- elk/example.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/elk/example.py b/elk/example.py index 7feb4b0a5..6666820ac 100644 --- a/elk/example.py +++ b/elk/example.py @@ -13,12 +13,7 @@ # boolq # root = Path('/home/laurito/elk-reporters/microsoft/deberta-large-mnli/imdb/quizzical-allen/transfer_eval') -# elk eval "microsoft/ -# deberta-large-mnli/i -# mdb/quizzical-allen" -# microsoft/deberta-l -# arge-mnli imdb --num -# _gpus 1 +# elk eval "microsoft/deberta-large-mnli/imdb/quizzical-allen" microsoft/deberta-large-mnli imdb --num_gpus 1 # load pickle file with open(root / 'vals.pkl', 'rb') as f: From 0ceaa3a4abb617e00699b087c040183f235662ce Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Mon, 1 May 2023 14:44:24 +0200 Subject: [PATCH 007/102] add different root --- elk/example.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/elk/example.py b/elk/example.py index c784cefd4..454ca3a36 100644 --- a/elk/example.py +++ b/elk/example.py @@ -9,7 +9,9 @@ from elk.metrics.roc_auc import roc_auc_ci # imdb -root = Path('/home/laurito/elk-reporters/microsoft/deberta-large-mnli/imdb/quizzical-allen/transfer_eval') +# root = Path('/home/laurito/elk-reporters/microsoft/deberta-large-mnli/imdb/quizzical-allen/transfer_eval') +# root = Path('/home/wombat_share/laurito/elk_reporters/microsoft/deberta-v2-xxlarge-mnli/super_glue:boolq/nervous-mclean/transfer_eval') +root = Path('/home/wombat_share/laurito/elk_reporters/huggyllama/llama-13b/imdb/silly-hoover/transfer_eval') # boolq # root = Path('/home/laurito/elk-reporters/microsoft/deberta-large-mnli/imdb/quizzical-allen/transfer_eval') @@ -50,7 +52,7 @@ print("layer", vals[0]["layer"], "auroc", auroc) # only use to find pattern in data -def get_best_aurocs_indices(aurocs, max=5): +def get_best_aurocs_indices(aurocs, max=3): sorted_indices = sorted(range(len(aurocs)), key=lambda i: aurocs[i].estimate) # the best aurocs are at the end of the list return sorted_indices[-max:] @@ -68,3 +70,4 @@ def get_best_aurocs_indices(aurocs, max=5): print(auroc) +# elk eval "microsoft/deberta-large-mnli/super_glue:boolq/nervous-mclean" microsoft/deberta-large-mnli "super_glue:boolq" --num_gpus 1 \ No newline at end of file From 0bd274f2a3a13d97f6a2e2a29a4da77863b1279c Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Sat, 27 May 2023 23:57:56 +0200 Subject: [PATCH 008/102] add empty list of vals --- elk/training/train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elk/training/train.py b/elk/training/train.py index e0aee29b5..e4aee466f 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -53,7 +53,7 @@ def apply_to_layer( layer: int, devices: list[str], world_size: int, - ) -> dict[str, pd.DataFrame]: + ) -> tuple[dict[str, pd.DataFrame], list[dict]]: """Train a single reporter on a single layer.""" self.make_reproducible(seed=self.net.seed + layer) @@ -192,4 +192,4 @@ def apply_to_layer( } ) - return {k: pd.DataFrame(v) for k, v in row_bufs.items()} + return ({k: pd.DataFrame(v) for k, v in row_bufs.items()}, []) From 994af9bed201fe6a7803d6a80f255a7ec377db18 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Sat, 17 Jun 2023 18:55:38 +0200 Subject: [PATCH 009/102] add first version of layer ensembling to eval --- elk/example.py | 68 --------------------------------------------- elk/metrics/eval.py | 43 ++++++++++++++++++++++++++++ elk/run.py | 14 +++++++--- 3 files changed, 53 insertions(+), 72 deletions(-) delete mode 100644 elk/example.py diff --git a/elk/example.py b/elk/example.py deleted file mode 100644 index 68e15e0a7..000000000 --- a/elk/example.py +++ /dev/null @@ -1,68 +0,0 @@ -import pickle -from pathlib import Path -import numpy as np - -from torch import Tensor -import torch -from elk.metrics.eval import to_one_hot - -from elk.metrics.roc_auc import roc_auc_ci - -# imdb -# root = Path('/home/laurito/elk-reporters/microsoft/deberta-large-mnli/imdb/quizzical-allen/transfer_eval') -# root = Path('/home/wombat_share/laurito/elk_reporters/microsoft/deberta-v2-xxlarge-mnli/super_glue:boolq/nervous-mclean/transfer_eval') -root = Path('/home/wombat_share/laurito/elk_reporters/huggyllama/llama-13b/imdb/silly-hoover/transfer_eval') - -# boolq -# root = Path('/home/laurito/elk-reporters/microsoft/deberta-large-mnli/imdb/quizzical-allen/transfer_eval') -# elk eval "microsoft/deberta-large-mnli/imdb/quizzical-allen" microsoft/deberta-large-mnli imdb --num_gpus 1 - -# load pickle file -with open(root / 'vals.pkl', 'rb') as f: - vals_buffers = pickle.load(f) - -y_logits_means = [] -y_trues_list = [] -k_prompts_aurocs = [] -for vals in vals_buffers: - print("vals.shape", len(vals)) - - y_logits = vals[0]["val_credences"] - y_trues = vals[0]["val_gt"] - (n, v, c) = y_logits.shape - assert y_trues.shape == (n,) - - y_logits = y_logits.mean(dim=1) - - y_logits_means.append(y_logits) - y_trues_list.append(y_trues) - - if c == 2: - auroc = roc_auc_ci(y_trues, y_logits[..., 1] - y_logits[..., 0]) - else: - auroc = roc_auc_ci(to_one_hot(y_trues, c).long(), y_logits) - - k_prompts_aurocs.append(auroc) - - print("layer", vals[0]["layer"], "auroc", auroc) - -# only use to find pattern in data -def get_best_aurocs_indices(aurocs, max=3): - sorted_indices = sorted(range(len(aurocs)), key=lambda i: aurocs[i].estimate) - # the best aurocs are at the end of the list - return sorted_indices[-max:] - -best_aurocs_indices = get_best_aurocs_indices(k_prompts_aurocs) -print("best_aurocs_indices", best_aurocs_indices) - -y_trues = [y_trues_list[i] for i in best_aurocs_indices] -y_logits = [y_logits_means[i] for i in best_aurocs_indices] - -y_logits_layers = torch.stack(y_logits) -y_layer_logits_means = torch.mean(y_logits_layers, dim=0) - -auroc = roc_auc_ci(y_trues[2], y_layer_logits_means[..., 1] - y_layer_logits_means[..., 0]) -print(auroc) - - -# elk eval "microsoft/deberta-large-mnli/super_glue:boolq/nervous-mclean" microsoft/deberta-large-mnli "super_glue:boolq" --num_gpus 1 \ No newline at end of file diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 653beae55..432369f07 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -93,6 +93,49 @@ def evaluate_preds( return EvalResult(acc, cal_acc, cal_err, auroc) +def layer_ensembling(vals_buffers) -> EvalResult: + y_logits_means = [] + y_trues_list = [] + k_prompts_aurocs = [] + for vals in vals_buffers: + print("vals.shape", len(vals)) + + y_logits = vals[0]["val_credences"] + y_trues = vals[0]["val_gt"] + (n, _, c) = y_logits.shape + assert y_trues.shape == (n,) + + y_logits = y_logits.mean(dim=1) + + y_logits_means.append(y_logits) + y_trues_list.append(y_trues) + + if c == 2: + auroc = roc_auc_ci(y_trues, y_logits[..., 1] - y_logits[..., 0]) + else: + auroc = roc_auc_ci(to_one_hot(y_trues, c).long(), y_logits) + + k_prompts_aurocs.append(auroc) + + print("layer", vals[0]["layer"], "auroc", auroc) + + # only use to find pattern in data + def get_best_aurocs_indices(aurocs, max=3): + sorted_indices = sorted(range(len(aurocs)), key=lambda i: aurocs[i].estimate) + # the best aurocs are at the end of the list + return sorted_indices[-max:] + + best_aurocs_indices = get_best_aurocs_indices(k_prompts_aurocs) + print("best_aurocs_indices", best_aurocs_indices) + + y_trues = [y_trues_list[i] for i in best_aurocs_indices] + y_logits = [y_logits_means[i] for i in best_aurocs_indices] + + y_logits_layers = torch.stack(y_logits) + y_layer_logits_means = torch.mean(y_logits_layers, dim=0) + + auroc = roc_auc_ci(y_trues[2], y_layer_logits_means[..., 1] - y_layer_logits_means[..., 0]) + return EvalResult(None, None, None, auroc) def to_one_hot(labels: Tensor, n_classes: int) -> Tensor: """ diff --git a/elk/run.py b/elk/run.py index c0ba52a9d..e10ea265c 100644 --- a/elk/run.py +++ b/elk/run.py @@ -18,6 +18,8 @@ from torch import Tensor from tqdm import tqdm +from elk.metrics.eval import layer_ensembling + from .debug_logging import save_debug_log from .extraction import Extract, extract from .extraction.dataset_name import DatasetDictWithName @@ -183,6 +185,7 @@ def apply_to_layers( try: for df_dict, vals in tqdm(mapper(func, layers), total=len(layers)): vals_buffers.append(vals) + print("vals", vals) for k, v in df_dict.items(): # type: ignore df_buffers[k].append(v) finally: @@ -192,8 +195,11 @@ def apply_to_layers( df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) if self.debug: save_debug_log(self.datasets, self.out_dir) - # save vals_buffers as pickle - with open(self.out_dir / f"vals.pkl", "wb") as f: - pickle.dump(vals_buffers, f) + breakpoint() + print("hi") + # layer_ensembling_results = layer_ensembling(vals_buffers) + - print("Saved vals to ", self.out_dir / f"vals.pkl") \ No newline at end of file + # for name, dfs in df_buffers.items(): + # df = pd.concat(dfs).sort_values(by=["layer", "ensembling"]) + # df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) \ No newline at end of file From 6ca191624debf08e0f18715d0c72e500b2a24751 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Mon, 19 Jun 2023 13:03:51 +0200 Subject: [PATCH 010/102] add vals to train --- elk/training/train.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/elk/training/train.py b/elk/training/train.py index e4aee466f..d863ba748 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -137,12 +137,14 @@ def apply_to_layer( lr_models = [] row_bufs = defaultdict(list) + vals = [] for ds_name in val_dict: val_h, val_gt, val_lm_preds = val_dict[ds_name] train_h, train_gt, train_lm_preds = train_dict[ds_name] meta = {"dataset": ds_name, "layer": layer} val_credences = reporter(val_h) + vals.append({**meta, "val_gt": val_gt, "val_credences": val_credences}) train_credences = reporter(train_h) for mode in ("none", "partial", "full"): row_bufs["eval"].append( @@ -192,4 +194,4 @@ def apply_to_layer( } ) - return ({k: pd.DataFrame(v) for k, v in row_bufs.items()}, []) + return ({k: pd.DataFrame(v) for k, v in row_bufs.items()}, vals) From b0d0f83a53e17abff2f0e598e0dd8e99c1bc4216 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Mon, 19 Jun 2023 15:16:38 +0200 Subject: [PATCH 011/102] refactoring & cleanup of eval and layer ensembling --- elk/metrics/eval.py | 143 +++++++++++++++++++++++--------------------- elk/run.py | 24 ++++---- 2 files changed, 84 insertions(+), 83 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 432369f07..c15038922 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -3,6 +3,7 @@ import torch from einops import repeat +import pandas as pd from torch import Tensor from .accuracy import AccuracyResult, accuracy_ci @@ -41,6 +42,37 @@ def to_dict(self, prefix: str = "") -> dict[str, float]: return {**auroc_dict, **cal_acc_dict, **acc_dict, **cal_dict} +def calc_auroc(y_logits, y_true, ensembling, num_classes): + if ensembling == "none": + auroc = roc_auc_ci(to_one_hot(y_true, num_classes).long().flatten(1), y_logits.flatten(1)) + elif ensembling in ("partial", "full"): + # Pool together the negative and positive class logits + if num_classes == 2: + auroc = roc_auc_ci(y_true, y_logits[..., 1] - y_logits[..., 0]) + else: + auroc = roc_auc_ci(to_one_hot(y_true, num_classes).long(), y_logits) + else: + raise ValueError(f"Unknown mode: {ensembling}") + + return auroc + + +def calc_calibrated_accuracies(y_true, pos_probs): + cal_thresh = pos_probs.float().quantile(y_true.float().mean()) + cal_preds = pos_probs.gt(cal_thresh).to(torch.int) + cal_acc = accuracy_ci(y_true, cal_preds) + return cal_acc + +def calc_calibrated_errors(y_true, pos_probs): + cal = CalibrationError().update(y_true.flatten(), pos_probs.flatten()) + cal_err = cal.compute() + return cal_err + +def calc_accuracies(y_logits, y_true): + y_pred = y_logits.argmax(dim=-1) + return accuracy_ci(y_true, y_pred) + + def evaluate_preds( y_true: Tensor, y_logits: Tensor, @@ -50,92 +82,65 @@ def evaluate_preds( Evaluate the performance of a classification model. Args: - y_true: Ground truth tensor of shape (N,). - y_logits: Predicted class tensor of shape (N, variants, n_classes). + y_true: Ground truth tensor of shape (n,). + y_logits: Predicted class tensor of shape (n, num_variants, num_classes). Returns: dict: A dictionary containing the accuracy, AUROC, and ECE. """ - (n, v, c) = y_logits.shape + (n, num_variants, num_classes) = y_logits.shape assert y_true.shape == (n,) - + if ensembling == "full": y_logits = y_logits.mean(dim=1) else: - y_true = repeat(y_true, "n -> n v", v=v) - - y_pred = y_logits.argmax(dim=-1) - if ensembling == "none": - auroc = roc_auc_ci(to_one_hot(y_true, c).long().flatten(1), y_logits.flatten(1)) - elif ensembling in ("partial", "full"): - # Pool together the negative and positive class logits - if c == 2: - auroc = roc_auc_ci(y_true, y_logits[..., 1] - y_logits[..., 0]) - else: - auroc = roc_auc_ci(to_one_hot(y_true, c).long(), y_logits) - else: - raise ValueError(f"Unknown mode: {ensembling}") - - acc = accuracy_ci(y_true, y_pred) - cal_acc = None - cal_err = None - - if c == 2: - pos_probs = torch.sigmoid(y_logits[..., 1] - y_logits[..., 0]) - - # Calibrated accuracy - cal_thresh = pos_probs.float().quantile(y_true.float().mean()) - cal_preds = pos_probs.gt(cal_thresh).to(torch.int) - cal_acc = accuracy_ci(y_true, cal_preds) - - cal = CalibrationError().update(y_true.flatten(), pos_probs.flatten()) - cal_err = cal.compute() + y_true = repeat(y_true, "n -> n v", v=num_variants) + + return calc_eval_results(y_true, y_logits, ensembling, num_classes) + +def calc_eval_results(y_true, y_logits, ensembling, num_classes): + acc = calc_accuracies(y_logits=y_logits, y_true=y_true) + + pos_probs = torch.sigmoid(y_logits[..., 1] - y_logits[..., 0]) + cal_acc = calc_calibrated_accuracies(y_true=y_true, pos_probs=pos_probs) if num_classes == 2 else None + cal_err = calc_calibrated_errors(y_true=y_true, pos_probs=pos_probs) if num_classes == 2 else None + + auroc = calc_auroc(y_logits=y_logits, + y_true=y_true, + ensembling=ensembling, + num_classes=num_classes) return EvalResult(acc, cal_acc, cal_err, auroc) -def layer_ensembling(vals_buffers) -> EvalResult: +def layer_ensembling(layer_outputs) -> EvalResult: y_logits_means = [] - y_trues_list = [] - k_prompts_aurocs = [] - for vals in vals_buffers: - print("vals.shape", len(vals)) - - y_logits = vals[0]["val_credences"] - y_trues = vals[0]["val_gt"] - (n, _, c) = y_logits.shape - assert y_trues.shape == (n,) - - y_logits = y_logits.mean(dim=1) + y_trues = [] + + for layer_output in layer_outputs: + y_logits = layer_output[0]["val_credences"] - y_logits_means.append(y_logits) - y_trues_list.append(y_trues) - - if c == 2: - auroc = roc_auc_ci(y_trues, y_logits[..., 1] - y_logits[..., 0]) - else: - auroc = roc_auc_ci(to_one_hot(y_trues, c).long(), y_logits) - - k_prompts_aurocs.append(auroc) - - print("layer", vals[0]["layer"], "auroc", auroc) - - # only use to find pattern in data - def get_best_aurocs_indices(aurocs, max=3): - sorted_indices = sorted(range(len(aurocs)), key=lambda i: aurocs[i].estimate) - # the best aurocs are at the end of the list - return sorted_indices[-max:] - - best_aurocs_indices = get_best_aurocs_indices(k_prompts_aurocs) - print("best_aurocs_indices", best_aurocs_indices) - - y_trues = [y_trues_list[i] for i in best_aurocs_indices] - y_logits = [y_logits_means[i] for i in best_aurocs_indices] + # full ensembling + y_logits_means.append(y_logits.mean(dim=1)) + + y_true = layer_output[0]["val_gt"] + y_trues.append(y_true) + + num_classes = layer_outputs[0][0]["val_credences"].shape[2] + + # get logits and ground_truth from middle to last layer + middle_index = len(y_trues) // 2 + y_trues = y_trues[middle_index:] + y_logits = y_logits_means[middle_index:] y_logits_layers = torch.stack(y_logits) + + # layer ensembling of the stacked logits y_layer_logits_means = torch.mean(y_logits_layers, dim=0) - auroc = roc_auc_ci(y_trues[2], y_layer_logits_means[..., 1] - y_layer_logits_means[..., 0]) - return EvalResult(None, None, None, auroc) + return calc_eval_results(y_true=y_trues[2], + y_logits=y_layer_logits_means, + ensembling="full", + num_classes=num_classes) def to_one_hot(labels: Tensor, n_classes: int) -> Tensor: """ diff --git a/elk/run.py b/elk/run.py index e10ea265c..32a2943a2 100644 --- a/elk/run.py +++ b/elk/run.py @@ -3,7 +3,7 @@ import random from abc import ABC, abstractmethod from collections import defaultdict -from dataclasses import dataclass +from dataclasses import asdict, dataclass from functools import partial from pathlib import Path from typing import Callable, Literal @@ -18,7 +18,7 @@ from torch import Tensor from tqdm import tqdm -from elk.metrics.eval import layer_ensembling +from elk.metrics.eval import EvalResult, layer_ensembling from .debug_logging import save_debug_log from .extraction import Extract, extract @@ -181,25 +181,21 @@ def apply_to_layers( with ctx.Pool(num_devices) as pool: mapper = pool.imap_unordered if num_devices > 1 else map df_buffers = defaultdict(list) - vals_buffers = [] + layer_outputs = [] try: - for df_dict, vals in tqdm(mapper(func, layers), total=len(layers)): - vals_buffers.append(vals) - print("vals", vals) + for df_dict, layer_output in tqdm(mapper(func, layers), total=len(layers)): + layer_outputs.append(layer_output) for k, v in df_dict.items(): # type: ignore df_buffers[k].append(v) finally: + # Make sure the CSVs are written even if we crash or get interrupted for name, dfs in df_buffers.items(): df = pd.concat(dfs).sort_values(by=["layer", "ensembling"]) df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) if self.debug: save_debug_log(self.datasets, self.out_dir) - breakpoint() - print("hi") - # layer_ensembling_results = layer_ensembling(vals_buffers) - - - # for name, dfs in df_buffers.items(): - # df = pd.concat(dfs).sort_values(by=["layer", "ensembling"]) - # df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) \ No newline at end of file + + layer_ensembling_results: EvalResult = layer_ensembling(layer_outputs) + df = pd.DataFrame(layer_ensembling_results.to_dict(), index=[0]) + df.round(4).to_csv(self.out_dir / f"layer_ensembling_results.csv", index=False) \ No newline at end of file From 241a03a0fdfea11a395dfb3efd1ec038da7096b4 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Mon, 19 Jun 2023 21:42:58 +0200 Subject: [PATCH 012/102] add annotations --- elk/metrics/eval.py | 60 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index c15038922..f36e03b64 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -57,18 +57,50 @@ def calc_auroc(y_logits, y_true, ensembling, num_classes): return auroc -def calc_calibrated_accuracies(y_true, pos_probs): +def calc_calibrated_accuracies(y_true, pos_probs) -> AccuracyResult: + """ + Calculate the calibrated accuracies + + Args: + y_true: Ground truth tensor of shape (n,). + pos_probs: Predicted class tensor of shape (n, num_variants, num_classes). + + Returns: + AccuracyResult: A dictionary containing the accuracy and confidence interval. + """ + cal_thresh = pos_probs.float().quantile(y_true.float().mean()) cal_preds = pos_probs.gt(cal_thresh).to(torch.int) cal_acc = accuracy_ci(y_true, cal_preds) return cal_acc -def calc_calibrated_errors(y_true, pos_probs): +def calc_calibrated_errors(y_true, pos_probs) -> CalibrationEstimate: + """ + Calculate the expected calibration error. + + Args: + y_true: Ground truth tensor of shape (n,). + y_logits: Predicted class tensor of shape (n, num_variants, num_classes). + + Returns: + CalibrationEstimate: + """ + cal = CalibrationError().update(y_true.flatten(), pos_probs.flatten()) cal_err = cal.compute() return cal_err -def calc_accuracies(y_logits, y_true): +def calc_accuracies(y_logits, y_true) -> AccuracyResult: + """ + Calculate the accuracy + + Args: + y_true: Ground truth tensor of shape (n,). + y_logits: Predicted class tensor of shape (n, num_variants, num_classes). + + Returns: + AccuracyResult: A dictionary containing the accuracy and confidence interval. + """ y_pred = y_logits.argmax(dim=-1) return accuracy_ci(y_true, y_pred) @@ -98,7 +130,18 @@ def evaluate_preds( return calc_eval_results(y_true, y_logits, ensembling, num_classes) -def calc_eval_results(y_true, y_logits, ensembling, num_classes): +def calc_eval_results(y_true, y_logits, ensembling, num_classes) -> EvalResult: + """ + Calculate the evaluation results + + Args: + y_true: Ground truth tensor of shape (n,). + y_logits: Predicted class tensor of shape (n, num_variants, num_classes). + ensembling: The ensembling mode. + + Returns: + EvalResult: The result of evaluating a classifier containing the accuracy, calibrated accuracies, calibrated errors, and AUROC. + """ acc = calc_accuracies(y_logits=y_logits, y_true=y_true) pos_probs = torch.sigmoid(y_logits[..., 1] - y_logits[..., 0]) @@ -113,6 +156,15 @@ def calc_eval_results(y_true, y_logits, ensembling, num_classes): return EvalResult(acc, cal_acc, cal_err, auroc) def layer_ensembling(layer_outputs) -> EvalResult: + """ + Return EvalResult after ensembling the probe output of the middle to last layers + + Args: + layer_outputs: A list of dictionaries containing the ground truth and predicted class tensor of shape (n, num_variants, num_classes). + + Returns: + EvalResult: The result of evaluating a classifier containing the accuracy, calibrated accuracies, calibrated errors, and AUROC. + """ y_logits_means = [] y_trues = [] From e8d042a912e3aa6c7555cab96358142407d766cf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 Jun 2023 19:46:10 +0000 Subject: [PATCH 013/102] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/evaluation/evaluate.py | 2 +- elk/metrics/eval.py | 77 +++++++++++++++++++++++--------------- elk/run.py | 14 ++++--- 3 files changed, 55 insertions(+), 38 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index f156d09e3..e315f3077 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -44,7 +44,7 @@ def apply_to_layer( reporter.eval() row_bufs = defaultdict(list) - + vals = [] for ds_name, (val_h, val_gt, _) in val_output.items(): meta = {"dataset": ds_name, "layer": layer} diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index f36e03b64..b28feeff1 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -3,7 +3,6 @@ import torch from einops import repeat -import pandas as pd from torch import Tensor from .accuracy import AccuracyResult, accuracy_ci @@ -44,7 +43,9 @@ def to_dict(self, prefix: str = "") -> dict[str, float]: def calc_auroc(y_logits, y_true, ensembling, num_classes): if ensembling == "none": - auroc = roc_auc_ci(to_one_hot(y_true, num_classes).long().flatten(1), y_logits.flatten(1)) + auroc = roc_auc_ci( + to_one_hot(y_true, num_classes).long().flatten(1), y_logits.flatten(1) + ) elif ensembling in ("partial", "full"): # Pool together the negative and positive class logits if num_classes == 2: @@ -53,43 +54,45 @@ def calc_auroc(y_logits, y_true, ensembling, num_classes): auroc = roc_auc_ci(to_one_hot(y_true, num_classes).long(), y_logits) else: raise ValueError(f"Unknown mode: {ensembling}") - + return auroc def calc_calibrated_accuracies(y_true, pos_probs) -> AccuracyResult: """ Calculate the calibrated accuracies - + Args: y_true: Ground truth tensor of shape (n,). pos_probs: Predicted class tensor of shape (n, num_variants, num_classes). - + Returns: AccuracyResult: A dictionary containing the accuracy and confidence interval. """ - + cal_thresh = pos_probs.float().quantile(y_true.float().mean()) cal_preds = pos_probs.gt(cal_thresh).to(torch.int) cal_acc = accuracy_ci(y_true, cal_preds) return cal_acc + def calc_calibrated_errors(y_true, pos_probs) -> CalibrationEstimate: """ Calculate the expected calibration error. - + Args: y_true: Ground truth tensor of shape (n,). y_logits: Predicted class tensor of shape (n, num_variants, num_classes). - + Returns: - CalibrationEstimate: + CalibrationEstimate: """ - + cal = CalibrationError().update(y_true.flatten(), pos_probs.flatten()) cal_err = cal.compute() return cal_err + def calc_accuracies(y_logits, y_true) -> AccuracyResult: """ Calculate the accuracy @@ -122,7 +125,7 @@ def evaluate_preds( """ (n, num_variants, num_classes) = y_logits.shape assert y_true.shape == (n,) - + if ensembling == "full": y_logits = y_logits.mean(dim=1) else: @@ -130,6 +133,7 @@ def evaluate_preds( return calc_eval_results(y_true, y_logits, ensembling, num_classes) + def calc_eval_results(y_true, y_logits, ensembling, num_classes) -> EvalResult: """ Calculate the evaluation results @@ -138,47 +142,55 @@ def calc_eval_results(y_true, y_logits, ensembling, num_classes) -> EvalResult: y_true: Ground truth tensor of shape (n,). y_logits: Predicted class tensor of shape (n, num_variants, num_classes). ensembling: The ensembling mode. - + Returns: EvalResult: The result of evaluating a classifier containing the accuracy, calibrated accuracies, calibrated errors, and AUROC. """ acc = calc_accuracies(y_logits=y_logits, y_true=y_true) - + pos_probs = torch.sigmoid(y_logits[..., 1] - y_logits[..., 0]) - cal_acc = calc_calibrated_accuracies(y_true=y_true, pos_probs=pos_probs) if num_classes == 2 else None - cal_err = calc_calibrated_errors(y_true=y_true, pos_probs=pos_probs) if num_classes == 2 else None - - auroc = calc_auroc(y_logits=y_logits, - y_true=y_true, - ensembling=ensembling, - num_classes=num_classes) + cal_acc = ( + calc_calibrated_accuracies(y_true=y_true, pos_probs=pos_probs) + if num_classes == 2 + else None + ) + cal_err = ( + calc_calibrated_errors(y_true=y_true, pos_probs=pos_probs) + if num_classes == 2 + else None + ) + + auroc = calc_auroc( + y_logits=y_logits, y_true=y_true, ensembling=ensembling, num_classes=num_classes + ) return EvalResult(acc, cal_acc, cal_err, auroc) + def layer_ensembling(layer_outputs) -> EvalResult: """ Return EvalResult after ensembling the probe output of the middle to last layers - + Args: layer_outputs: A list of dictionaries containing the ground truth and predicted class tensor of shape (n, num_variants, num_classes). - + Returns: EvalResult: The result of evaluating a classifier containing the accuracy, calibrated accuracies, calibrated errors, and AUROC. """ y_logits_means = [] y_trues = [] - + for layer_output in layer_outputs: y_logits = layer_output[0]["val_credences"] - + # full ensembling y_logits_means.append(y_logits.mean(dim=1)) - + y_true = layer_output[0]["val_gt"] y_trues.append(y_true) - + num_classes = layer_outputs[0][0]["val_credences"].shape[2] - + # get logits and ground_truth from middle to last layer middle_index = len(y_trues) // 2 y_trues = y_trues[middle_index:] @@ -189,10 +201,13 @@ def layer_ensembling(layer_outputs) -> EvalResult: # layer ensembling of the stacked logits y_layer_logits_means = torch.mean(y_logits_layers, dim=0) - return calc_eval_results(y_true=y_trues[2], - y_logits=y_layer_logits_means, - ensembling="full", - num_classes=num_classes) + return calc_eval_results( + y_true=y_trues[2], + y_logits=y_layer_logits_means, + ensembling="full", + num_classes=num_classes, + ) + def to_one_hot(labels: Tensor, n_classes: int) -> Tensor: """ diff --git a/elk/run.py b/elk/run.py index 32a2943a2..969b808e0 100644 --- a/elk/run.py +++ b/elk/run.py @@ -1,9 +1,8 @@ import os -import pickle import random from abc import ABC, abstractmethod from collections import defaultdict -from dataclasses import asdict, dataclass +from dataclasses import dataclass from functools import partial from pathlib import Path from typing import Callable, Literal @@ -183,12 +182,13 @@ def apply_to_layers( df_buffers = defaultdict(list) layer_outputs = [] try: - for df_dict, layer_output in tqdm(mapper(func, layers), total=len(layers)): + for df_dict, layer_output in tqdm( + mapper(func, layers), total=len(layers) + ): layer_outputs.append(layer_output) - for k, v in df_dict.items(): # type: ignore + for k, v in df_dict.items(): # type: ignore df_buffers[k].append(v) finally: - # Make sure the CSVs are written even if we crash or get interrupted for name, dfs in df_buffers.items(): df = pd.concat(dfs).sort_values(by=["layer", "ensembling"]) @@ -198,4 +198,6 @@ def apply_to_layers( layer_ensembling_results: EvalResult = layer_ensembling(layer_outputs) df = pd.DataFrame(layer_ensembling_results.to_dict(), index=[0]) - df.round(4).to_csv(self.out_dir / f"layer_ensembling_results.csv", index=False) \ No newline at end of file + df.round(4).to_csv( + self.out_dir / "layer_ensembling_results.csv", index=False + ) From a4ace25821fa03f15c58f2b31159adf735975c79 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Mon, 19 Jun 2023 22:12:03 +0200 Subject: [PATCH 014/102] rename vals to layer_outputs --- elk/evaluation/evaluate.py | 6 +++--- elk/training/train.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index f156d09e3..7cd8533de 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -45,12 +45,12 @@ def apply_to_layer( row_bufs = defaultdict(list) - vals = [] + layer_outputs = [] for ds_name, (val_h, val_gt, _) in val_output.items(): meta = {"dataset": ds_name, "layer": layer} val_credences = reporter(val_h) - vals.append({**meta, "val_gt": val_gt, "val_credences": val_credences}) + layer_outputs.append({**meta, "val_gt": val_gt, "val_credences": val_credences}) for mode in ("none", "partial", "full"): row_bufs["eval"].append( { @@ -78,4 +78,4 @@ def apply_to_layer( } ) - return ({k: pd.DataFrame(v) for k, v in row_bufs.items()}, vals) + return ({k: pd.DataFrame(v) for k, v in row_bufs.items()}, layer_outputs) diff --git a/elk/training/train.py b/elk/training/train.py index d863ba748..2275422e1 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -137,14 +137,14 @@ def apply_to_layer( lr_models = [] row_bufs = defaultdict(list) - vals = [] + layer_outputs = [] for ds_name in val_dict: val_h, val_gt, val_lm_preds = val_dict[ds_name] train_h, train_gt, train_lm_preds = train_dict[ds_name] meta = {"dataset": ds_name, "layer": layer} val_credences = reporter(val_h) - vals.append({**meta, "val_gt": val_gt, "val_credences": val_credences}) + layer_outputs.append({**meta, "val_gt": val_gt, "val_credences": val_credences}) train_credences = reporter(train_h) for mode in ("none", "partial", "full"): row_bufs["eval"].append( @@ -194,4 +194,4 @@ def apply_to_layer( } ) - return ({k: pd.DataFrame(v) for k, v in row_bufs.items()}, vals) + return ({k: pd.DataFrame(v) for k, v in row_bufs.items()}, layer_outputs) From 2156ad8328c1b283fe6459fe28e6433d0003e655 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 Jun 2023 20:13:09 +0000 Subject: [PATCH 015/102] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/evaluation/evaluate.py | 6 ++++-- elk/training/train.py | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 7cd8533de..667eb5880 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -44,13 +44,15 @@ def apply_to_layer( reporter.eval() row_bufs = defaultdict(list) - + layer_outputs = [] for ds_name, (val_h, val_gt, _) in val_output.items(): meta = {"dataset": ds_name, "layer": layer} val_credences = reporter(val_h) - layer_outputs.append({**meta, "val_gt": val_gt, "val_credences": val_credences}) + layer_outputs.append( + {**meta, "val_gt": val_gt, "val_credences": val_credences} + ) for mode in ("none", "partial", "full"): row_bufs["eval"].append( { diff --git a/elk/training/train.py b/elk/training/train.py index 2275422e1..d0d1e11b8 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -144,7 +144,9 @@ def apply_to_layer( meta = {"dataset": ds_name, "layer": layer} val_credences = reporter(val_h) - layer_outputs.append({**meta, "val_gt": val_gt, "val_credences": val_credences}) + layer_outputs.append( + {**meta, "val_gt": val_gt, "val_credences": val_credences} + ) train_credences = reporter(train_h) for mode in ("none", "partial", "full"): row_bufs["eval"].append( From e391da657cc7fa4a3a869d01ad10c356de342054 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Mon, 19 Jun 2023 22:19:35 +0200 Subject: [PATCH 016/102] fir formatting --- elk/evaluation/evaluate.py | 7 +++++-- elk/metrics/eval.py | 9 ++++++--- elk/training/train.py | 4 +++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 7cd8533de..58f84fc0a 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -44,13 +44,16 @@ def apply_to_layer( reporter.eval() row_bufs = defaultdict(list) - + layer_outputs = [] for ds_name, (val_h, val_gt, _) in val_output.items(): meta = {"dataset": ds_name, "layer": layer} val_credences = reporter(val_h) - layer_outputs.append({**meta, "val_gt": val_gt, "val_credences": val_credences}) + layer_outputs.append( + {**meta, "val_gt": val_gt, "val_credences": val_credences} + ) + for mode in ("none", "partial", "full"): row_bufs["eval"].append( { diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index b28feeff1..1bee1c23a 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -144,7 +144,8 @@ def calc_eval_results(y_true, y_logits, ensembling, num_classes) -> EvalResult: ensembling: The ensembling mode. Returns: - EvalResult: The result of evaluating a classifier containing the accuracy, calibrated accuracies, calibrated errors, and AUROC. + EvalResult: The result of evaluating a classifier containing the accuracy, + calibrated accuracies, calibrated errors, and AUROC. """ acc = calc_accuracies(y_logits=y_logits, y_true=y_true) @@ -172,10 +173,12 @@ def layer_ensembling(layer_outputs) -> EvalResult: Return EvalResult after ensembling the probe output of the middle to last layers Args: - layer_outputs: A list of dictionaries containing the ground truth and predicted class tensor of shape (n, num_variants, num_classes). + layer_outputs: A list of dictionaries containing the ground truth and + predicted class tensor of shape (n, num_variants, num_classes). Returns: - EvalResult: The result of evaluating a classifier containing the accuracy, calibrated accuracies, calibrated errors, and AUROC. + EvalResult: The result of evaluating a classifier containing the accuracy, + calibrated accuracies, calibrated errors, and AUROC. """ y_logits_means = [] y_trues = [] diff --git a/elk/training/train.py b/elk/training/train.py index 2275422e1..d0d1e11b8 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -144,7 +144,9 @@ def apply_to_layer( meta = {"dataset": ds_name, "layer": layer} val_credences = reporter(val_h) - layer_outputs.append({**meta, "val_gt": val_gt, "val_credences": val_credences}) + layer_outputs.append( + {**meta, "val_gt": val_gt, "val_credences": val_credences} + ) train_credences = reporter(train_h) for mode in ("none", "partial", "full"): row_bufs["eval"].append( From 528367d7efb9b42f35443263d70b1c1949e16fb2 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Wed, 21 Jun 2023 12:30:31 +0000 Subject: [PATCH 017/102] make layer ensembling work on multiple gpus --- elk/metrics/eval.py | 4 ++-- elk/training/train.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 1bee1c23a..80869dff4 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -184,12 +184,12 @@ def layer_ensembling(layer_outputs) -> EvalResult: y_trues = [] for layer_output in layer_outputs: - y_logits = layer_output[0]["val_credences"] + y_logits = layer_output[0]["val_credences"].cpu() # full ensembling y_logits_means.append(y_logits.mean(dim=1)) - y_true = layer_output[0]["val_gt"] + y_true = layer_output[0]["val_gt"].cpu() y_trues.append(y_true) num_classes = layer_outputs[0][0]["val_credences"].shape[2] diff --git a/elk/training/train.py b/elk/training/train.py index d0d1e11b8..b906fd548 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -144,9 +144,11 @@ def apply_to_layer( meta = {"dataset": ds_name, "layer": layer} val_credences = reporter(val_h) + layer_outputs.append( - {**meta, "val_gt": val_gt, "val_credences": val_credences} + {**meta, "val_gt": val_gt.detach(), "val_credences": val_credences.detach()} ) + train_credences = reporter(train_h) for mode in ("none", "partial", "full"): row_bufs["eval"].append( From d4df5175c36277023e7a823fd9745a9b3aec7d15 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 21 Jun 2023 12:30:51 +0000 Subject: [PATCH 018/102] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/training/train.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/elk/training/train.py b/elk/training/train.py index b906fd548..c785d5a67 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -146,7 +146,11 @@ def apply_to_layer( val_credences = reporter(val_h) layer_outputs.append( - {**meta, "val_gt": val_gt.detach(), "val_credences": val_credences.detach()} + { + **meta, + "val_gt": val_gt.detach(), + "val_credences": val_credences.detach(), + } ) train_credences = reporter(train_h) From 2661ea13b15bfa5ea988a87a5e0736827617ec6f Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Wed, 21 Jun 2023 12:42:02 +0000 Subject: [PATCH 019/102] make sure we use the same device --- elk/metrics/eval.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 80869dff4..2916ba0c5 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -183,13 +183,14 @@ def layer_ensembling(layer_outputs) -> EvalResult: y_logits_means = [] y_trues = [] + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') for layer_output in layer_outputs: - y_logits = layer_output[0]["val_credences"].cpu() + y_logits = layer_output[0]["val_credences"].to(device) # full ensembling y_logits_means.append(y_logits.mean(dim=1)) - y_true = layer_output[0]["val_gt"].cpu() + y_true = layer_output[0]["val_gt"].to(device) y_trues.append(y_true) num_classes = layer_outputs[0][0]["val_credences"].shape[2] From 21cccb7c7f2e8238f96700bd5dc3895120b6e1ca Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 21 Jun 2023 12:42:15 +0000 Subject: [PATCH 020/102] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/metrics/eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 2916ba0c5..d0827decc 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -183,7 +183,7 @@ def layer_ensembling(layer_outputs) -> EvalResult: y_logits_means = [] y_trues = [] - device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") for layer_output in layer_outputs: y_logits = layer_output[0]["val_credences"].to(device) From 2495c3a9cd06064e0cb3c23173282404a79da6b1 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Wed, 21 Jun 2023 16:10:19 +0000 Subject: [PATCH 021/102] calc layer ensembling for all prompt ensembling modes --- elk/metrics/eval.py | 6 +++--- elk/run.py | 19 +++++++++++++++---- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 2916ba0c5..9e6ff91d3 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -168,7 +168,7 @@ def calc_eval_results(y_true, y_logits, ensembling, num_classes) -> EvalResult: return EvalResult(acc, cal_acc, cal_err, auroc) -def layer_ensembling(layer_outputs) -> EvalResult: +def layer_ensembling(layer_outputs: list, ensembling: str) -> EvalResult: """ Return EvalResult after ensembling the probe output of the middle to last layers @@ -183,7 +183,7 @@ def layer_ensembling(layer_outputs) -> EvalResult: y_logits_means = [] y_trues = [] - device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") for layer_output in layer_outputs: y_logits = layer_output[0]["val_credences"].to(device) @@ -208,7 +208,7 @@ def layer_ensembling(layer_outputs) -> EvalResult: return calc_eval_results( y_true=y_trues[2], y_logits=y_layer_logits_means, - ensembling="full", + ensembling=ensembling, num_classes=num_classes, ) diff --git a/elk/run.py b/elk/run.py index 969b808e0..93493c6f8 100644 --- a/elk/run.py +++ b/elk/run.py @@ -17,7 +17,7 @@ from torch import Tensor from tqdm import tqdm -from elk.metrics.eval import EvalResult, layer_ensembling +from elk.metrics.eval import layer_ensembling from .debug_logging import save_debug_log from .extraction import Extract, extract @@ -196,8 +196,19 @@ def apply_to_layers( if self.debug: save_debug_log(self.datasets, self.out_dir) - layer_ensembling_results: EvalResult = layer_ensembling(layer_outputs) - df = pd.DataFrame(layer_ensembling_results.to_dict(), index=[0]) - df.round(4).to_csv( + dfs = [] + for ensemble in [ + "full", + "partial", + "none", + ]: # TODO: Replace ensemble strings with enums everywhere + layer_ensembling_results = layer_ensembling(layer_outputs, ensemble) + df = pd.DataFrame(layer_ensembling_results.to_dict(), index=[0]) + df = df.round(4) + df["ensemble"] = ensemble + dfs.append(df) + + df_conc = pd.concat(dfs) + df_conc.to_csv( self.out_dir / "layer_ensembling_results.csv", index=False ) From 908308b1da691ee2de8affc18940501ed4676112 Mon Sep 17 00:00:00 2001 From: jon Date: Fri, 23 Jun 2023 15:10:14 +0100 Subject: [PATCH 022/102] implement ensembling enum --- elk/evaluation/evaluate.py | 13 ++++++++----- elk/metrics/eval.py | 14 ++++++++------ elk/plotting/visualize.py | 30 +++++++++++++++++------------- elk/run.py | 14 +++++++------- elk/training/train.py | 3 ++- elk/utils/types.py | 11 +++++++++++ 6 files changed, 53 insertions(+), 32 deletions(-) create mode 100644 elk/utils/types.py diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 667eb5880..da5ea0c1a 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -11,6 +11,7 @@ from ..run import Run from ..training import Reporter from ..utils import Color +from ..utils.types import Ensembling @dataclass(kw_only=True) @@ -53,12 +54,12 @@ def apply_to_layer( layer_outputs.append( {**meta, "val_gt": val_gt, "val_credences": val_credences} ) - for mode in ("none", "partial", "full"): + for ensembling in Ensembling.all(): row_bufs["eval"].append( { **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_credences, mode).to_dict(), + "ensembling": ensembling.value, + **evaluate_preds(val_gt, val_credences, ensembling).to_dict(), } ) @@ -73,10 +74,12 @@ def apply_to_layer( model.eval() row_bufs["lr_eval"].append( { - "ensembling": mode, + "ensembling": ensembling.value, "inlp_iter": i, **meta, - **evaluate_preds(val_gt, model(val_h), mode).to_dict(), + **evaluate_preds( + val_gt, model(val_h), ensembling + ).to_dict(), } ) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 9e6ff91d3..79093c5e3 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -1,10 +1,10 @@ from dataclasses import asdict, dataclass -from typing import Literal import torch from einops import repeat from torch import Tensor +from ..utils.types import Ensembling from .accuracy import AccuracyResult, accuracy_ci from .calibration import CalibrationError, CalibrationEstimate from .roc_auc import RocAucResult, roc_auc_ci @@ -42,11 +42,11 @@ def to_dict(self, prefix: str = "") -> dict[str, float]: def calc_auroc(y_logits, y_true, ensembling, num_classes): - if ensembling == "none": + if ensembling == Ensembling.NONE: auroc = roc_auc_ci( to_one_hot(y_true, num_classes).long().flatten(1), y_logits.flatten(1) ) - elif ensembling in ("partial", "full"): + elif ensembling in (Ensembling.PARTIAL, Ensembling.FULL): # Pool together the negative and positive class logits if num_classes == 2: auroc = roc_auc_ci(y_true, y_logits[..., 1] - y_logits[..., 0]) @@ -111,7 +111,7 @@ def calc_accuracies(y_logits, y_true) -> AccuracyResult: def evaluate_preds( y_true: Tensor, y_logits: Tensor, - ensembling: Literal["none", "partial", "full"] = "none", + ensembling: Ensembling = Ensembling.NONE, ) -> EvalResult: """ Evaluate the performance of a classification model. @@ -119,6 +119,7 @@ def evaluate_preds( Args: y_true: Ground truth tensor of shape (n,). y_logits: Predicted class tensor of shape (n, num_variants, num_classes). + ensembling: The ensembling mode. Returns: dict: A dictionary containing the accuracy, AUROC, and ECE. @@ -126,7 +127,7 @@ def evaluate_preds( (n, num_variants, num_classes) = y_logits.shape assert y_true.shape == (n,) - if ensembling == "full": + if ensembling == Ensembling.FULL: y_logits = y_logits.mean(dim=1) else: y_true = repeat(y_true, "n -> n v", v=num_variants) @@ -168,13 +169,14 @@ def calc_eval_results(y_true, y_logits, ensembling, num_classes) -> EvalResult: return EvalResult(acc, cal_acc, cal_err, auroc) -def layer_ensembling(layer_outputs: list, ensembling: str) -> EvalResult: +def layer_ensembling(layer_outputs: list, ensembling: Ensembling) -> EvalResult: """ Return EvalResult after ensembling the probe output of the middle to last layers Args: layer_outputs: A list of dictionaries containing the ground truth and predicted class tensor of shape (n, num_variants, num_classes). + ensembling: The ensembling mode. Returns: EvalResult: The result of evaluating a classifier containing the accuracy, diff --git a/elk/plotting/visualize.py b/elk/plotting/visualize.py index a82fd7d75..7771d9ce3 100644 --- a/elk/plotting/visualize.py +++ b/elk/plotting/visualize.py @@ -9,6 +9,8 @@ from rich.console import Console from rich.table import Table +from elk.utils.types import Ensembling + @dataclass class SweepByDsMultiplot: @@ -19,16 +21,16 @@ class SweepByDsMultiplot: def render( self, sweep: "SweepVisualization", - with_transfer=False, - ensembles=["full", "partial", "none"], - write=False, + with_transfer: bool = False, + ensemblings: Ensembling = Ensembling.all(), + write: bool = False, ) -> go.Figure: """Render the multiplot visualization. Args: sweep: The SweepVisualization instance containing the data. with_transfer: Flag indicating whether to include transfer eval data. - ensembles: Filter for which ensembing options to include. + ensemblings: Filter for which ensembing options to include. write: Flag indicating whether to write the visualization to disk. Returns: @@ -49,10 +51,10 @@ def render( x_title="Layer", y_title="AUROC", ) - color_map = dict(zip(ensembles, qualitative.Plotly)) + color_map = dict(zip(ensemblings, qualitative.Plotly)) - for ensemble in ensembles: - ensemble_data: pd.DataFrame = df[df["ensembling"] == ensemble] + for ensembling in ensemblings: + ensemble_data: pd.DataFrame = df[df["ensembling"] == ensembling.value] if with_transfer: # TODO write tests ensemble_data = ensemble_data.groupby( ["eval_dataset", "layer", "ensembling"], as_index=False @@ -77,11 +79,11 @@ def render( x=dataset_data["layer"], y=dataset_data["auroc_estimate"], mode="lines", - name=ensemble, + name=ensembling.value, showlegend=False if dataset_name != unique_datasets[0] else True, - line=dict(color=color_map[ensemble]), + line=dict(color=color_map[ensembling]), ), row=row, col=col, @@ -115,7 +117,7 @@ class TransferEvalHeatmap: layer: int score_type: str = "auroc_estimate" - ensembling: str = "full" + ensembling: Ensembling = Ensembling.FULL def render(self, df: pd.DataFrame) -> go.Figure: """Render the heatmap visualization. @@ -245,7 +247,7 @@ def render_and_save( sweep: "SweepVisualization", dataset_names: list[str] | None = None, score_type="auroc_estimate", - ensembling="full", + ensembling=Ensembling.FULL, ) -> None: """Render and save the visualization for the model. @@ -262,7 +264,9 @@ def render_and_save( model_path.mkdir(parents=True, exist_ok=True) if self.is_transfer: for layer in range(layer_min, layer_max + 1): - filtered = df[(df["layer"] == layer) & (df["ensembling"] == ensembling)] + filtered = df[ + (df["layer"] == layer) & (df["ensembling"] == ensembling.value) + ] fig = TransferEvalHeatmap( layer, score_type=score_type, ensembling=ensembling ).render(filtered) @@ -382,7 +386,7 @@ def render_table( Returns: The generated score table as a pandas DataFrame. """ - df = self.df[self.df["ensembling"] == "partial"] + df = self.df[self.df["ensembling"] == Ensembling.PARTIAL.value] # For each model, we use the layer whose mean AUROC is the highest best_layers, model_dfs = [], [] diff --git a/elk/run.py b/elk/run.py index 93493c6f8..427344206 100644 --- a/elk/run.py +++ b/elk/run.py @@ -31,6 +31,7 @@ select_split, select_usable_devices, ) +from .utils.types import Ensembling @dataclass @@ -197,15 +198,14 @@ def apply_to_layers( save_debug_log(self.datasets, self.out_dir) dfs = [] - for ensemble in [ - "full", - "partial", - "none", - ]: # TODO: Replace ensemble strings with enums everywhere - layer_ensembling_results = layer_ensembling(layer_outputs, ensemble) + + for ensembling in Ensembling.all(): + layer_ensembling_results = layer_ensembling( + layer_outputs, ensembling + ) df = pd.DataFrame(layer_ensembling_results.to_dict(), index=[0]) df = df.round(4) - df["ensemble"] = ensemble + df["ensemble"] = ensembling dfs.append(df) df_conc = pd.concat(dfs) diff --git a/elk/training/train.py b/elk/training/train.py index c785d5a67..1f82a6f97 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -14,6 +14,7 @@ from ..metrics import evaluate_preds, to_one_hot from ..run import Run from ..training.supervised import train_supervised +from ..utils.types import Ensembling from ..utils.typing import assert_type from .ccs_reporter import CcsReporter, CcsReporterConfig from .eigen_reporter import EigenReporter, EigenReporterConfig @@ -154,7 +155,7 @@ def apply_to_layer( ) train_credences = reporter(train_h) - for mode in ("none", "partial", "full"): + for mode in Ensembling.all(): row_bufs["eval"].append( { **meta, diff --git a/elk/utils/types.py b/elk/utils/types.py new file mode 100644 index 000000000..3ea75a9c9 --- /dev/null +++ b/elk/utils/types.py @@ -0,0 +1,11 @@ +from enum import Enum + + +class Ensembling(Enum): + FULL = "full" + PARTIAL = "partial" + NONE = "none" + + @staticmethod + def all() -> list["Ensembling"]: + return list(Ensembling) From fc980d7e8433dd368eab0792c6b9a1b1872524e8 Mon Sep 17 00:00:00 2001 From: jon Date: Fri, 23 Jun 2023 15:30:43 +0100 Subject: [PATCH 023/102] Fix ensembling value writing error --- elk/run.py | 1 + elk/training/train.py | 30 +++++++++++++++++++----------- tests/test_smoke_elicit.py | 2 ++ tests/test_smoke_eval.py | 1 + 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/elk/run.py b/elk/run.py index 427344206..a01aaeb2e 100644 --- a/elk/run.py +++ b/elk/run.py @@ -192,6 +192,7 @@ def apply_to_layers( finally: # Make sure the CSVs are written even if we crash or get interrupted for name, dfs in df_buffers.items(): + print(dfs) df = pd.concat(dfs).sort_values(by=["layer", "ensembling"]) df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) if self.debug: diff --git a/elk/training/train.py b/elk/training/train.py index 1f82a6f97..d670a1922 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -155,12 +155,12 @@ def apply_to_layer( ) train_credences = reporter(train_h) - for mode in Ensembling.all(): + for ensembling in Ensembling.all(): row_bufs["eval"].append( { **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_credences, mode).to_dict(), + "ensembling": ensembling.value, + **evaluate_preds(val_gt, val_credences, ensembling).to_dict(), "pseudo_auroc": pseudo_auroc, "train_loss": train_loss, } @@ -169,8 +169,10 @@ def apply_to_layer( row_bufs["train_eval"].append( { **meta, - "ensembling": mode, - **evaluate_preds(train_gt, train_credences, mode).to_dict(), + "ensembling": ensembling.value, + **evaluate_preds( + train_gt, train_credences, ensembling + ).to_dict(), "train_loss": train_loss, } ) @@ -179,8 +181,10 @@ def apply_to_layer( row_bufs["lm_eval"].append( { **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), + "ensembling": ensembling.value, + **evaluate_preds( + val_gt, val_lm_preds, ensembling + ).to_dict(), } ) @@ -188,8 +192,10 @@ def apply_to_layer( row_bufs["train_lm_eval"].append( { **meta, - "ensembling": mode, - **evaluate_preds(train_gt, train_lm_preds, mode).to_dict(), + "ensembling": ensembling.value, + **evaluate_preds( + train_gt, train_lm_preds, ensembling + ).to_dict(), } ) @@ -197,9 +203,11 @@ def apply_to_layer( row_bufs["lr_eval"].append( { **meta, - "ensembling": mode, + "ensembling": ensembling.value, "inlp_iter": i, - **evaluate_preds(val_gt, model(val_h), mode).to_dict(), + **evaluate_preds( + val_gt, model(val_h), ensembling + ).to_dict(), } ) diff --git a/tests/test_smoke_elicit.py b/tests/test_smoke_elicit.py index 7cf0e8c9d..369b86a6d 100644 --- a/tests/test_smoke_elicit.py +++ b/tests/test_smoke_elicit.py @@ -31,6 +31,7 @@ def test_smoke_elicit_run_tiny_gpt2_ccs(tmp_path: Path): "lr_models", "reporters", "eval.csv", + "layer_ensembling_results.csv", ] for file in expected_files: assert file in created_file_names @@ -62,6 +63,7 @@ def test_smoke_elicit_run_tiny_gpt2_eigen(tmp_path: Path): "lr_models", "reporters", "eval.csv", + "layer_ensembling_results.csv", ] for file in expected_files: assert file in created_file_names diff --git a/tests/test_smoke_eval.py b/tests/test_smoke_eval.py index d58db6cd3..2ff6819d7 100644 --- a/tests/test_smoke_eval.py +++ b/tests/test_smoke_eval.py @@ -11,6 +11,7 @@ "cfg.yaml", "fingerprints.yaml", "eval.csv", + "layer_ensembling_results.csv", ] From 5aa30a9645bc9d0dabdaa24479e54fdc2cefdcac Mon Sep 17 00:00:00 2001 From: jon Date: Fri, 23 Jun 2023 15:32:11 +0100 Subject: [PATCH 024/102] accidentally a print --- elk/run.py | 1 - 1 file changed, 1 deletion(-) diff --git a/elk/run.py b/elk/run.py index a01aaeb2e..427344206 100644 --- a/elk/run.py +++ b/elk/run.py @@ -192,7 +192,6 @@ def apply_to_layers( finally: # Make sure the CSVs are written even if we crash or get interrupted for name, dfs in df_buffers.items(): - print(dfs) df = pd.concat(dfs).sort_values(by=["layer", "ensembling"]) df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) if self.debug: From d5b8584dec3bbf7884e279dc718a8b8be702857c Mon Sep 17 00:00:00 2001 From: jon Date: Fri, 23 Jun 2023 16:06:12 +0100 Subject: [PATCH 025/102] slightly refactor layer stuff and fix tests --- elk/metrics/eval.py | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 79093c5e3..516ae741f 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -131,7 +131,6 @@ def evaluate_preds( y_logits = y_logits.mean(dim=1) else: y_true = repeat(y_true, "n -> n v", v=num_variants) - return calc_eval_results(y_true, y_logits, ensembling, num_classes) @@ -182,34 +181,24 @@ def layer_ensembling(layer_outputs: list, ensembling: Ensembling) -> EvalResult: EvalResult: The result of evaluating a classifier containing the accuracy, calibrated accuracies, calibrated errors, and AUROC. """ + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") y_logits_means = [] - y_trues = [] + y_true = layer_outputs[0][0]["val_gt"].to(device) - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") for layer_output in layer_outputs: y_logits = layer_output[0]["val_credences"].to(device) - - # full ensembling - y_logits_means.append(y_logits.mean(dim=1)) - - y_true = layer_output[0]["val_gt"].to(device) - y_trues.append(y_true) + y_logits_means.append(y_logits.mean(dim=1)) # full ensembling num_classes = layer_outputs[0][0]["val_credences"].shape[2] - # get logits and ground_truth from middle to last layer - middle_index = len(y_trues) // 2 - y_trues = y_trues[middle_index:] - y_logits = y_logits_means[middle_index:] - - y_logits_layers = torch.stack(y_logits) - + middle_index = len(layer_outputs) // 2 + y_logits_stacked = torch.stack(y_logits_means[middle_index:]) # layer ensembling of the stacked logits - y_layer_logits_means = torch.mean(y_logits_layers, dim=0) + y_logits_stacked_mean = torch.mean(y_logits_stacked, dim=0) return calc_eval_results( - y_true=y_trues[2], - y_logits=y_layer_logits_means, + y_true=y_true, + y_logits=y_logits_stacked_mean, ensembling=ensembling, num_classes=num_classes, ) From 638081435e778fd3fa316c1a5de357b2bf6e466d Mon Sep 17 00:00:00 2001 From: jon Date: Fri, 23 Jun 2023 16:17:07 +0100 Subject: [PATCH 026/102] try fixing type hints --- elk/plotting/visualize.py | 3 ++- elk/utils/types.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/elk/plotting/visualize.py b/elk/plotting/visualize.py index 7771d9ce3..4550212df 100644 --- a/elk/plotting/visualize.py +++ b/elk/plotting/visualize.py @@ -1,5 +1,6 @@ from dataclasses import dataclass from pathlib import Path +from typing import Iterable import pandas as pd import plotly.express as px @@ -22,7 +23,7 @@ def render( self, sweep: "SweepVisualization", with_transfer: bool = False, - ensemblings: Ensembling = Ensembling.all(), + ensemblings: Iterable[Ensembling] = Ensembling.all(), write: bool = False, ) -> go.Figure: """Render the multiplot visualization. diff --git a/elk/utils/types.py b/elk/utils/types.py index 3ea75a9c9..add1a4d14 100644 --- a/elk/utils/types.py +++ b/elk/utils/types.py @@ -7,5 +7,5 @@ class Ensembling(Enum): NONE = "none" @staticmethod - def all() -> list["Ensembling"]: - return list(Ensembling) + def all() -> tuple["Ensembling"]: + return tuple(Ensembling) From 98d19b762ddc0244ec500b235dea0f439e9d9c7b Mon Sep 17 00:00:00 2001 From: jon Date: Fri, 23 Jun 2023 16:40:36 +0100 Subject: [PATCH 027/102] tidy up output --- elk/run.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/elk/run.py b/elk/run.py index 427344206..92d1e8009 100644 --- a/elk/run.py +++ b/elk/run.py @@ -205,10 +205,15 @@ def apply_to_layers( ) df = pd.DataFrame(layer_ensembling_results.to_dict(), index=[0]) df = df.round(4) - df["ensemble"] = ensembling + df["ensembling"] = ensembling.value dfs.append(df) - df_conc = pd.concat(dfs) - df_conc.to_csv( + df_concat = pd.concat(dfs) + # Rearrange the columns so that ensembling is in front + columns = ["ensemble"] + [ + col for col in df_concat.columns if col != "ensembling" + ] + df_concat = df_concat[columns] + df_concat.to_csv( self.out_dir / "layer_ensembling_results.csv", index=False ) From e6914e16640681c2e10ec345df42664f6a499609 Mon Sep 17 00:00:00 2001 From: jon Date: Fri, 23 Jun 2023 16:42:01 +0100 Subject: [PATCH 028/102] accidentally a char --- elk/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elk/run.py b/elk/run.py index 92d1e8009..3f487a1f0 100644 --- a/elk/run.py +++ b/elk/run.py @@ -210,7 +210,7 @@ def apply_to_layers( df_concat = pd.concat(dfs) # Rearrange the columns so that ensembling is in front - columns = ["ensemble"] + [ + columns = ["ensembling"] + [ col for col in df_concat.columns if col != "ensembling" ] df_concat = df_concat[columns] From 29b1cb8a9414730bba7b977b46af8c84935badf4 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Sat, 24 Jun 2023 14:47:00 +0000 Subject: [PATCH 029/102] rename to PromptEnsembling --- elk/evaluation/evaluate.py | 4 ++-- elk/metrics/eval.py | 12 ++++++------ elk/plotting/visualize.py | 10 +++++----- elk/run.py | 4 ++-- elk/training/train.py | 4 ++-- elk/utils/types.py | 6 +++--- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index da5ea0c1a..9f6aa1fbd 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -11,7 +11,7 @@ from ..run import Run from ..training import Reporter from ..utils import Color -from ..utils.types import Ensembling +from ..utils.types import PromptEnsembling @dataclass(kw_only=True) @@ -54,7 +54,7 @@ def apply_to_layer( layer_outputs.append( {**meta, "val_gt": val_gt, "val_credences": val_credences} ) - for ensembling in Ensembling.all(): + for ensembling in PromptEnsembling.all(): row_bufs["eval"].append( { **meta, diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 516ae741f..b8c6a8994 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -4,7 +4,7 @@ from einops import repeat from torch import Tensor -from ..utils.types import Ensembling +from ..utils.types import PromptEnsembling from .accuracy import AccuracyResult, accuracy_ci from .calibration import CalibrationError, CalibrationEstimate from .roc_auc import RocAucResult, roc_auc_ci @@ -42,11 +42,11 @@ def to_dict(self, prefix: str = "") -> dict[str, float]: def calc_auroc(y_logits, y_true, ensembling, num_classes): - if ensembling == Ensembling.NONE: + if ensembling == PromptEnsembling.NONE: auroc = roc_auc_ci( to_one_hot(y_true, num_classes).long().flatten(1), y_logits.flatten(1) ) - elif ensembling in (Ensembling.PARTIAL, Ensembling.FULL): + elif ensembling in (PromptEnsembling.PARTIAL, PromptEnsembling.FULL): # Pool together the negative and positive class logits if num_classes == 2: auroc = roc_auc_ci(y_true, y_logits[..., 1] - y_logits[..., 0]) @@ -111,7 +111,7 @@ def calc_accuracies(y_logits, y_true) -> AccuracyResult: def evaluate_preds( y_true: Tensor, y_logits: Tensor, - ensembling: Ensembling = Ensembling.NONE, + ensembling: PromptEnsembling = PromptEnsembling.NONE, ) -> EvalResult: """ Evaluate the performance of a classification model. @@ -127,7 +127,7 @@ def evaluate_preds( (n, num_variants, num_classes) = y_logits.shape assert y_true.shape == (n,) - if ensembling == Ensembling.FULL: + if ensembling == PromptEnsembling.FULL: y_logits = y_logits.mean(dim=1) else: y_true = repeat(y_true, "n -> n v", v=num_variants) @@ -168,7 +168,7 @@ def calc_eval_results(y_true, y_logits, ensembling, num_classes) -> EvalResult: return EvalResult(acc, cal_acc, cal_err, auroc) -def layer_ensembling(layer_outputs: list, ensembling: Ensembling) -> EvalResult: +def layer_ensembling(layer_outputs: list, ensembling: PromptEnsembling) -> EvalResult: """ Return EvalResult after ensembling the probe output of the middle to last layers diff --git a/elk/plotting/visualize.py b/elk/plotting/visualize.py index 4550212df..1eeb223c0 100644 --- a/elk/plotting/visualize.py +++ b/elk/plotting/visualize.py @@ -10,7 +10,7 @@ from rich.console import Console from rich.table import Table -from elk.utils.types import Ensembling +from elk.utils.types import PromptEnsembling @dataclass @@ -23,7 +23,7 @@ def render( self, sweep: "SweepVisualization", with_transfer: bool = False, - ensemblings: Iterable[Ensembling] = Ensembling.all(), + ensemblings: Iterable[PromptEnsembling] = PromptEnsembling.all(), write: bool = False, ) -> go.Figure: """Render the multiplot visualization. @@ -118,7 +118,7 @@ class TransferEvalHeatmap: layer: int score_type: str = "auroc_estimate" - ensembling: Ensembling = Ensembling.FULL + ensembling: PromptEnsembling = PromptEnsembling.FULL def render(self, df: pd.DataFrame) -> go.Figure: """Render the heatmap visualization. @@ -248,7 +248,7 @@ def render_and_save( sweep: "SweepVisualization", dataset_names: list[str] | None = None, score_type="auroc_estimate", - ensembling=Ensembling.FULL, + ensembling=PromptEnsembling.FULL, ) -> None: """Render and save the visualization for the model. @@ -387,7 +387,7 @@ def render_table( Returns: The generated score table as a pandas DataFrame. """ - df = self.df[self.df["ensembling"] == Ensembling.PARTIAL.value] + df = self.df[self.df["ensembling"] == PromptEnsembling.PARTIAL.value] # For each model, we use the layer whose mean AUROC is the highest best_layers, model_dfs = [], [] diff --git a/elk/run.py b/elk/run.py index 3f487a1f0..e369eae53 100644 --- a/elk/run.py +++ b/elk/run.py @@ -31,7 +31,7 @@ select_split, select_usable_devices, ) -from .utils.types import Ensembling +from .utils.types import PromptEnsembling @dataclass @@ -199,7 +199,7 @@ def apply_to_layers( dfs = [] - for ensembling in Ensembling.all(): + for ensembling in PromptEnsembling.all(): layer_ensembling_results = layer_ensembling( layer_outputs, ensembling ) diff --git a/elk/training/train.py b/elk/training/train.py index d670a1922..acacba4c1 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -14,7 +14,7 @@ from ..metrics import evaluate_preds, to_one_hot from ..run import Run from ..training.supervised import train_supervised -from ..utils.types import Ensembling +from ..utils.types import PromptEnsembling from ..utils.typing import assert_type from .ccs_reporter import CcsReporter, CcsReporterConfig from .eigen_reporter import EigenReporter, EigenReporterConfig @@ -155,7 +155,7 @@ def apply_to_layer( ) train_credences = reporter(train_h) - for ensembling in Ensembling.all(): + for ensembling in PromptEnsembling.all(): row_bufs["eval"].append( { **meta, diff --git a/elk/utils/types.py b/elk/utils/types.py index add1a4d14..eadeb81ad 100644 --- a/elk/utils/types.py +++ b/elk/utils/types.py @@ -1,11 +1,11 @@ from enum import Enum -class Ensembling(Enum): +class PromptEnsembling(Enum): FULL = "full" PARTIAL = "partial" NONE = "none" @staticmethod - def all() -> tuple["Ensembling"]: - return tuple(Ensembling) + def all() -> tuple["PromptEnsembling"]: + return tuple(PromptEnsembling) From bed615a4adf4aeed433e9330287ac73dabacb8e0 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Sun, 9 Jul 2023 20:36:40 +0000 Subject: [PATCH 030/102] add annotations and types --- elk/metrics/eval.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index b8c6a8994..cc4c54d81 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -41,7 +41,19 @@ def to_dict(self, prefix: str = "") -> dict[str, float]: return {**auroc_dict, **cal_acc_dict, **acc_dict, **cal_dict} -def calc_auroc(y_logits, y_true, ensembling, num_classes): +def calc_auroc(y_logits: Tensor, y_true: Tensor, ensembling: PromptEnsembling, num_classes: int) -> RocAucResult: + """ + Calculate the AUROC + + Args: + y_true: Ground truth tensor of shape (n,). + y_logits: Predicted class tensor of shape (n, num_variants, num_classes). + ensembling: The ensembling mode. + num_classes: The number of classes. + + Returns: + RocAucResult: A dictionary containing the AUROC and confidence interval. + """ if ensembling == PromptEnsembling.NONE: auroc = roc_auc_ci( to_one_hot(y_true, num_classes).long().flatten(1), y_logits.flatten(1) @@ -134,7 +146,7 @@ def evaluate_preds( return calc_eval_results(y_true, y_logits, ensembling, num_classes) -def calc_eval_results(y_true, y_logits, ensembling, num_classes) -> EvalResult: +def calc_eval_results(y_true: Tensor, y_logits: Tensor, ensembling: PromptEnsembling, num_classes: int) -> EvalResult: """ Calculate the evaluation results From bf49e99295c118bebb9e80622d3f3896a91ddf0c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 9 Jul 2023 20:36:54 +0000 Subject: [PATCH 031/102] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/metrics/eval.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index cc4c54d81..7b6545512 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -41,7 +41,9 @@ def to_dict(self, prefix: str = "") -> dict[str, float]: return {**auroc_dict, **cal_acc_dict, **acc_dict, **cal_dict} -def calc_auroc(y_logits: Tensor, y_true: Tensor, ensembling: PromptEnsembling, num_classes: int) -> RocAucResult: +def calc_auroc( + y_logits: Tensor, y_true: Tensor, ensembling: PromptEnsembling, num_classes: int +) -> RocAucResult: """ Calculate the AUROC @@ -146,7 +148,9 @@ def evaluate_preds( return calc_eval_results(y_true, y_logits, ensembling, num_classes) -def calc_eval_results(y_true: Tensor, y_logits: Tensor, ensembling: PromptEnsembling, num_classes: int) -> EvalResult: +def calc_eval_results( + y_true: Tensor, y_logits: Tensor, ensembling: PromptEnsembling, num_classes: int +) -> EvalResult: """ Calculate the evaluation results From 1f5d8be1e52ba4bfb6d9e6a2f4c17bc8b2735d0d Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Sun, 9 Jul 2023 20:44:29 +0000 Subject: [PATCH 032/102] clearer naming: prompt_ensembling --- elk/metrics/eval.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index cc4c54d81..66382a681 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -41,31 +41,31 @@ def to_dict(self, prefix: str = "") -> dict[str, float]: return {**auroc_dict, **cal_acc_dict, **acc_dict, **cal_dict} -def calc_auroc(y_logits: Tensor, y_true: Tensor, ensembling: PromptEnsembling, num_classes: int) -> RocAucResult: +def calc_auroc(y_logits: Tensor, y_true: Tensor, prompt_ensembling: PromptEnsembling, num_classes: int) -> RocAucResult: """ Calculate the AUROC Args: y_true: Ground truth tensor of shape (n,). y_logits: Predicted class tensor of shape (n, num_variants, num_classes). - ensembling: The ensembling mode. + prompt_ensembling: The prompt_ensembling mode. num_classes: The number of classes. Returns: RocAucResult: A dictionary containing the AUROC and confidence interval. """ - if ensembling == PromptEnsembling.NONE: + if prompt_ensembling == PromptEnsembling.NONE: auroc = roc_auc_ci( to_one_hot(y_true, num_classes).long().flatten(1), y_logits.flatten(1) ) - elif ensembling in (PromptEnsembling.PARTIAL, PromptEnsembling.FULL): + elif prompt_ensembling in (PromptEnsembling.PARTIAL, PromptEnsembling.FULL): # Pool together the negative and positive class logits if num_classes == 2: auroc = roc_auc_ci(y_true, y_logits[..., 1] - y_logits[..., 0]) else: auroc = roc_auc_ci(to_one_hot(y_true, num_classes).long(), y_logits) else: - raise ValueError(f"Unknown mode: {ensembling}") + raise ValueError(f"Unknown mode: {prompt_ensembling}") return auroc @@ -123,7 +123,7 @@ def calc_accuracies(y_logits, y_true) -> AccuracyResult: def evaluate_preds( y_true: Tensor, y_logits: Tensor, - ensembling: PromptEnsembling = PromptEnsembling.NONE, + prompt_ensembling: PromptEnsembling = PromptEnsembling.NONE, ) -> EvalResult: """ Evaluate the performance of a classification model. @@ -131,7 +131,7 @@ def evaluate_preds( Args: y_true: Ground truth tensor of shape (n,). y_logits: Predicted class tensor of shape (n, num_variants, num_classes). - ensembling: The ensembling mode. + prompt_ensembling: The prompt_ensembling mode. Returns: dict: A dictionary containing the accuracy, AUROC, and ECE. @@ -139,21 +139,21 @@ def evaluate_preds( (n, num_variants, num_classes) = y_logits.shape assert y_true.shape == (n,) - if ensembling == PromptEnsembling.FULL: + if prompt_ensembling == PromptEnsembling.FULL: y_logits = y_logits.mean(dim=1) else: y_true = repeat(y_true, "n -> n v", v=num_variants) - return calc_eval_results(y_true, y_logits, ensembling, num_classes) + return calc_eval_results(y_true, y_logits, prompt_ensembling, num_classes) -def calc_eval_results(y_true: Tensor, y_logits: Tensor, ensembling: PromptEnsembling, num_classes: int) -> EvalResult: +def calc_eval_results(y_true: Tensor, y_logits: Tensor, prompt_ensembling: PromptEnsembling, num_classes: int) -> EvalResult: """ Calculate the evaluation results Args: y_true: Ground truth tensor of shape (n,). y_logits: Predicted class tensor of shape (n, num_variants, num_classes). - ensembling: The ensembling mode. + prompt_ensembling: The prompt_ensembling mode. Returns: EvalResult: The result of evaluating a classifier containing the accuracy, @@ -174,20 +174,20 @@ def calc_eval_results(y_true: Tensor, y_logits: Tensor, ensembling: PromptEnsemb ) auroc = calc_auroc( - y_logits=y_logits, y_true=y_true, ensembling=ensembling, num_classes=num_classes + y_logits=y_logits, y_true=y_true, prompt_ensembling=prompt_ensembling, num_classes=num_classes ) return EvalResult(acc, cal_acc, cal_err, auroc) -def layer_ensembling(layer_outputs: list, ensembling: PromptEnsembling) -> EvalResult: +def layer_ensembling(layer_outputs: list, prompt_ensembling: PromptEnsembling) -> EvalResult: """ - Return EvalResult after ensembling the probe output of the middle to last layers + Return EvalResult after prompt_ensembling the probe output of the middle to last layers Args: layer_outputs: A list of dictionaries containing the ground truth and predicted class tensor of shape (n, num_variants, num_classes). - ensembling: The ensembling mode. + prompt_ensembling: The prompt_ensembling mode. Returns: EvalResult: The result of evaluating a classifier containing the accuracy, @@ -199,19 +199,19 @@ def layer_ensembling(layer_outputs: list, ensembling: PromptEnsembling) -> EvalR for layer_output in layer_outputs: y_logits = layer_output[0]["val_credences"].to(device) - y_logits_means.append(y_logits.mean(dim=1)) # full ensembling + y_logits_means.append(y_logits.mean(dim=1)) # full prompt_ensembling num_classes = layer_outputs[0][0]["val_credences"].shape[2] # get logits and ground_truth from middle to last layer middle_index = len(layer_outputs) // 2 y_logits_stacked = torch.stack(y_logits_means[middle_index:]) - # layer ensembling of the stacked logits + # layer prompt_ensembling of the stacked logits y_logits_stacked_mean = torch.mean(y_logits_stacked, dim=0) return calc_eval_results( y_true=y_true, y_logits=y_logits_stacked_mean, - ensembling=ensembling, + prompt_ensembling=prompt_ensembling, num_classes=num_classes, ) From ec377165d4e8893158648e6192e2f351b09ddb67 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Jul 2023 13:54:44 +0000 Subject: [PATCH 033/102] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/metrics/eval.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 8a3448724..53ad729ed 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -178,13 +178,18 @@ def calc_eval_results( ) auroc = calc_auroc( - y_logits=y_logits, y_true=y_true, prompt_ensembling=prompt_ensembling, num_classes=num_classes + y_logits=y_logits, + y_true=y_true, + prompt_ensembling=prompt_ensembling, + num_classes=num_classes, ) return EvalResult(acc, cal_acc, cal_err, auroc) -def layer_ensembling(layer_outputs: list, prompt_ensembling: PromptEnsembling) -> EvalResult: +def layer_ensembling( + layer_outputs: list, prompt_ensembling: PromptEnsembling +) -> EvalResult: """ Return EvalResult after prompt_ensembling the probe output of the middle to last layers From 193662491ed4f407623fd744b62ca9596f048ae2 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Wed, 12 Jul 2023 13:57:56 +0000 Subject: [PATCH 034/102] better name for ensembling --- elk/evaluation/evaluate.py | 10 +++++----- elk/metrics/eval.py | 4 ++-- elk/training/train.py | 22 +++++++++++----------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 9f6aa1fbd..12aa974be 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -54,12 +54,12 @@ def apply_to_layer( layer_outputs.append( {**meta, "val_gt": val_gt, "val_credences": val_credences} ) - for ensembling in PromptEnsembling.all(): + for prompt_ensembling in PromptEnsembling.all(): row_bufs["eval"].append( { **meta, - "ensembling": ensembling.value, - **evaluate_preds(val_gt, val_credences, ensembling).to_dict(), + "prompt_ensembling": prompt_ensembling.value, + **evaluate_preds(val_gt, val_credences, prompt_ensembling).to_dict(), } ) @@ -74,11 +74,11 @@ def apply_to_layer( model.eval() row_bufs["lr_eval"].append( { - "ensembling": ensembling.value, + "prompt_ensembling": prompt_ensembling.value, "inlp_iter": i, **meta, **evaluate_preds( - val_gt, model(val_h), ensembling + val_gt, model(val_h), prompt_ensembling ).to_dict(), } ) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 8a3448724..090f2feb3 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -42,7 +42,7 @@ def to_dict(self, prefix: str = "") -> dict[str, float]: def calc_auroc( - y_logits: Tensor, y_true: Tensor, ensembling: PromptEnsembling, num_classes: int + y_logits: Tensor, y_true: Tensor, prompt_ensembling: PromptEnsembling, num_classes: int ) -> RocAucResult: """ Calculate the AUROC @@ -149,7 +149,7 @@ def evaluate_preds( def calc_eval_results( - y_true: Tensor, y_logits: Tensor, ensembling: PromptEnsembling, num_classes: int + y_true: Tensor, y_logits: Tensor, prompt_ensembling: PromptEnsembling, num_classes: int ) -> EvalResult: """ Calculate the evaluation results diff --git a/elk/training/train.py b/elk/training/train.py index 587412f8a..e995c9a7e 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -146,12 +146,12 @@ def apply_to_layer( ) train_credences = reporter(train_h) - for ensembling in PromptEnsembling.all(): + for prompt_ensembling in PromptEnsembling.all(): row_bufs["eval"].append( { **meta, - "ensembling": ensembling.value, - **evaluate_preds(val_gt, val_credences, ensembling).to_dict(), + "prompt_ensembling": prompt_ensembling.value, + **evaluate_preds(val_gt, val_credences, prompt_ensembling).to_dict(), "pseudo_auroc": pseudo_auroc, "train_loss": train_loss, } @@ -160,9 +160,9 @@ def apply_to_layer( row_bufs["train_eval"].append( { **meta, - "ensembling": ensembling.value, + "prompt_ensembling": prompt_ensembling.value, **evaluate_preds( - train_gt, train_credences, ensembling + train_gt, train_credences, prompt_ensembling ).to_dict(), "train_loss": train_loss, } @@ -172,9 +172,9 @@ def apply_to_layer( row_bufs["lm_eval"].append( { **meta, - "ensembling": ensembling.value, + "prompt_ensembling": prompt_ensembling.value, **evaluate_preds( - val_gt, val_lm_preds, ensembling + val_gt, val_lm_preds, prompt_ensembling ).to_dict(), } ) @@ -183,9 +183,9 @@ def apply_to_layer( row_bufs["train_lm_eval"].append( { **meta, - "ensembling": ensembling.value, + "prompt_ensembling": prompt_ensembling.value, **evaluate_preds( - train_gt, train_lm_preds, ensembling + train_gt, train_lm_preds, prompt_ensembling ).to_dict(), } ) @@ -194,10 +194,10 @@ def apply_to_layer( row_bufs["lr_eval"].append( { **meta, - "ensembling": ensembling.value, + "prompt_ensembling": prompt_ensembling.value, "inlp_iter": i, **evaluate_preds( - val_gt, model(val_h), ensembling + val_gt, model(val_h), prompt_ensembling ).to_dict(), } ) From 484788e808f8357a83984e7e1799ea301c99f94d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Jul 2023 13:58:11 +0000 Subject: [PATCH 035/102] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/evaluation/evaluate.py | 4 +++- elk/metrics/eval.py | 10 ++++++++-- elk/training/train.py | 4 +++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 12aa974be..543d358d9 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -59,7 +59,9 @@ def apply_to_layer( { **meta, "prompt_ensembling": prompt_ensembling.value, - **evaluate_preds(val_gt, val_credences, prompt_ensembling).to_dict(), + **evaluate_preds( + val_gt, val_credences, prompt_ensembling + ).to_dict(), } ) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index a2160f3b2..55548a3ea 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -42,7 +42,10 @@ def to_dict(self, prefix: str = "") -> dict[str, float]: def calc_auroc( - y_logits: Tensor, y_true: Tensor, prompt_ensembling: PromptEnsembling, num_classes: int + y_logits: Tensor, + y_true: Tensor, + prompt_ensembling: PromptEnsembling, + num_classes: int, ) -> RocAucResult: """ Calculate the AUROC @@ -149,7 +152,10 @@ def evaluate_preds( def calc_eval_results( - y_true: Tensor, y_logits: Tensor, prompt_ensembling: PromptEnsembling, num_classes: int + y_true: Tensor, + y_logits: Tensor, + prompt_ensembling: PromptEnsembling, + num_classes: int, ) -> EvalResult: """ Calculate the evaluation results diff --git a/elk/training/train.py b/elk/training/train.py index e995c9a7e..a75609b92 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -151,7 +151,9 @@ def apply_to_layer( { **meta, "prompt_ensembling": prompt_ensembling.value, - **evaluate_preds(val_gt, val_credences, prompt_ensembling).to_dict(), + **evaluate_preds( + val_gt, val_credences, prompt_ensembling + ).to_dict(), "pseudo_auroc": pseudo_auroc, "train_loss": train_loss, } From 7dcecf8836e518495ab463ee44d942863b0f398c Mon Sep 17 00:00:00 2001 From: jon Date: Tue, 4 Jul 2023 15:17:40 +0100 Subject: [PATCH 036/102] fix args passing to sweep --- elk/training/sweep.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/elk/training/sweep.py b/elk/training/sweep.py index e4aca5a00..b13136f43 100755 --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -171,7 +171,13 @@ def execute(self): num_gpus=run.num_gpus, min_gpu_mem=run.min_gpu_mem, skip_supervised=run.supervised == "none", + prompt_indices=run.prompt_indices, + concatenated_layer_offset=run.concatenated_layer_offset, + datasets=run.datasets, + debug=run.debug, + disable_cache=run.disable_cache, ) + eval.execute(highlight_color="green") if self.visualize: From a697d24680bbea4fdbe6945610a18e79b76847f1 Mon Sep 17 00:00:00 2001 From: jon Date: Tue, 4 Jul 2023 14:23:03 +0000 Subject: [PATCH 037/102] remove datasets --- elk/training/sweep.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/elk/training/sweep.py b/elk/training/sweep.py index b13136f43..273064318 100755 --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -162,6 +162,7 @@ def execute(self): continue assert run.out_dir is not None + # TODO we should fix this so that this isn't needed eval = Eval( data=replace( run.data, model=model, datasets=(eval_dataset,) @@ -173,7 +174,9 @@ def execute(self): skip_supervised=run.supervised == "none", prompt_indices=run.prompt_indices, concatenated_layer_offset=run.concatenated_layer_offset, - datasets=run.datasets, + # datasets=run.datasets, + # this isn't needed because it's + # immediately overwritten debug=run.debug, disable_cache=run.disable_cache, ) From aa1dc8861e935dffaf047846d4abc831e3769b86 Mon Sep 17 00:00:00 2001 From: jon Date: Mon, 10 Jul 2023 13:39:44 +0100 Subject: [PATCH 038/102] change variable name --- elk/evaluation/evaluate.py | 2 +- elk/run.py | 8 ++++---- elk/training/ccs_reporter.py | 4 ++-- elk/training/classifier.py | 2 +- elk/training/sweep.py | 12 ++++++++---- tests/test_classifier.py | 2 +- tests/test_smoke_elicit.py | 4 ++-- tests/test_smoke_eval.py | 6 +++--- 8 files changed, 22 insertions(+), 18 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index d6054e332..ffbe2d977 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -23,7 +23,7 @@ def __post_init__(self): # Set our output directory before super().execute() does if not self.out_dir: root = elk_reporter_dir() / self.source - self.out_dir = root / "transfer" / "+".join(self.data.datasets) + self.out_dir = root / "transfer" / "+".join(self.extract.datasets) def execute(self, highlight_color: Color = "cyan"): return super().execute(highlight_color, split_type="val") diff --git a/elk/run.py b/elk/run.py index fb8903ccf..5ab020d7f 100644 --- a/elk/run.py +++ b/elk/run.py @@ -33,7 +33,7 @@ @dataclass class Run(ABC, Serializable): - data: Extract + extract: Extract out_dir: Path | None = None """Directory to save results to. If None, a directory will be created automatically.""" @@ -67,14 +67,14 @@ def execute( min_gpu_mem=self.min_gpu_mem, split_type=split_type, ) - for cfg in self.data.explode() + for cfg in self.extract.explode() ] if self.out_dir is None: # Save in a memorably-named directory inside of # ELK_REPORTER_DIR// - ds_name = "+".join(self.data.datasets) - root = elk_reporter_dir() / self.data.model / ds_name + ds_name = "+".join(self.extract.datasets) + root = elk_reporter_dir() / self.extract.model / ds_name self.out_dir = memorably_named_dir(root) diff --git a/elk/training/ccs_reporter.py b/elk/training/ccs_reporter.py index cd161dd9b..a5d7d6282 100644 --- a/elk/training/ccs_reporter.py +++ b/elk/training/ccs_reporter.py @@ -145,7 +145,7 @@ def reset_parameters(self): theta = torch.randn(1, probe.in_features + 1, device=probe.weight.device) theta /= theta.norm() probe.weight.data = theta[:, :-1] - probe.bias.data = theta[:, -1] + probe.bias.extract = theta[:, -1] elif self.config.init == "default": for layer in self.probe: @@ -219,7 +219,7 @@ def fit(self, hiddens: Tensor) -> float: if self.config.init == "pca": diffs = torch.flatten(x_pos - x_neg, 0, 1) _, __, V = torch.pca_lowrank(diffs, q=i + 1) - self.probe[0].weight.data = V[:, -1, None].T + self.probe[0].weight.extract = V[:, -1, None].T if self.config.optimizer == "lbfgs": loss = self.train_loop_lbfgs(x_neg, x_pos) diff --git a/elk/training/classifier.py b/elk/training/classifier.py index 148da939f..4e7adc97b 100644 --- a/elk/training/classifier.py +++ b/elk/training/classifier.py @@ -51,7 +51,7 @@ def __init__( self.linear = torch.nn.Linear( input_dim, num_classes if num_classes > 2 else 1, device=device, dtype=dtype ) - self.linear.bias.data.zero_() + self.linear.bias.extract.zero_() self.linear.weight.data.zero_() def forward(self, x: Tensor) -> Tensor: diff --git a/elk/training/sweep.py b/elk/training/sweep.py index 273064318..bebebfbbd 100755 --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -53,7 +53,7 @@ class Sweep: # A bit of a hack to add all the command line arguments from Elicit run_template: Elicit = Elicit( - data=Extract( + extract=Extract( model="", datasets=("",), ) @@ -132,7 +132,9 @@ def execute(self): out_dir = sweep_dir / model / dataset_str data = replace( - self.run_template.data, model=model, datasets=train_datasets + self.run_template.extract, + model=model, + datasets=train_datasets, ) run = replace(self.run_template, data=data, out_dir=out_dir) if var_weight is not None and neg_cov_weight is not None: @@ -164,8 +166,10 @@ def execute(self): assert run.out_dir is not None # TODO we should fix this so that this isn't needed eval = Eval( - data=replace( - run.data, model=model, datasets=(eval_dataset,) + extract=replace( + run.extract, + model=model, + datasets=(eval_dataset,), ), source=run.out_dir, out_dir=run.out_dir / "transfer" / eval_dataset, diff --git a/tests/test_classifier.py b/tests/test_classifier.py index bdc9023df..10b86a193 100644 --- a/tests/test_classifier.py +++ b/tests/test_classifier.py @@ -28,7 +28,7 @@ def test_classifier_roughly_same_sklearn(): ) # check that the weights are roughly the same sklearn_coef = torch.from_numpy(model.coef_) - torch_coef = classifier.linear.weight.data + torch_coef = classifier.linear.weight.extract torch.testing.assert_close(sklearn_coef, torch_coef, atol=1e-2, rtol=1e-2) # check that on a new sample, the predictions are roughly the same diff --git a/tests/test_smoke_elicit.py b/tests/test_smoke_elicit.py index bac0f3989..4bfb430d5 100644 --- a/tests/test_smoke_elicit.py +++ b/tests/test_smoke_elicit.py @@ -10,7 +10,7 @@ def test_smoke_elicit_run_tiny_gpt2_ccs(tmp_path: Path): model_path, min_mem = "sshleifer/tiny-gpt2", 10 * 1024**2 dataset_name = "imdb" elicit = Elicit( - data=Extract( + extract=Extract( model=model_path, datasets=(dataset_name,), max_examples=(10, 10), @@ -41,7 +41,7 @@ def test_smoke_elicit_run_tiny_gpt2_eigen(tmp_path: Path): model_path, min_mem = "sshleifer/tiny-gpt2", 10 * 1024**2 dataset_name = "imdb" elicit = Elicit( - data=Extract( + extract=Extract( model=model_path, datasets=(dataset_name,), max_examples=(10, 10), diff --git a/tests/test_smoke_eval.py b/tests/test_smoke_eval.py index 4efd7112d..0fe70cd47 100644 --- a/tests/test_smoke_eval.py +++ b/tests/test_smoke_eval.py @@ -26,7 +26,7 @@ def setup_elicit( Returns the elicit run configuration. """ elicit = Elicit( - data=Extract( + extract=Extract( model=model_path, datasets=(dataset_name,), max_examples=(10, 10), @@ -54,7 +54,7 @@ def eval_run(elicit: Elicit, transfer_datasets: tuple[str, ...] = ()) -> float: Returns a reference time (in seconds) for file modification checking. """ tmp_path = elicit.out_dir - extract = elicit.data + extract = elicit.extract assert tmp_path is not None # record elicit modification time as reference. @@ -64,7 +64,7 @@ def eval_run(elicit: Elicit, transfer_datasets: tuple[str, ...] = ()) -> float: # update datasets to a different dataset extract.datasets = transfer_datasets - eval = Eval(data=extract, source=tmp_path) + eval = Eval(extract=extract, source=tmp_path) eval.execute() return start_time_sec From 0acd41c457c3d5fa8e31b93949ef645a55a0d455 Mon Sep 17 00:00:00 2001 From: jon Date: Wed, 12 Jul 2023 14:27:26 +0100 Subject: [PATCH 039/102] move to make_eval --- elk/training/sweep.py | 20 +------------------- elk/training/train.py | 24 +++++++++++++++++++++++- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/elk/training/sweep.py b/elk/training/sweep.py index bebebfbbd..ec6e6b66d 100755 --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -165,26 +165,8 @@ def execute(self): assert run.out_dir is not None # TODO we should fix this so that this isn't needed - eval = Eval( - extract=replace( - run.extract, - model=model, - datasets=(eval_dataset,), - ), - source=run.out_dir, - out_dir=run.out_dir / "transfer" / eval_dataset, - num_gpus=run.num_gpus, - min_gpu_mem=run.min_gpu_mem, - skip_supervised=run.supervised == "none", - prompt_indices=run.prompt_indices, - concatenated_layer_offset=run.concatenated_layer_offset, - # datasets=run.datasets, - # this isn't needed because it's - # immediately overwritten - debug=run.debug, - disable_cache=run.disable_cache, - ) + eval = run.make_eval(model, eval_dataset) eval.execute(highlight_color="green") if self.visualize: diff --git a/elk/training/train.py b/elk/training/train.py index 8392f2d9a..cf6f6d756 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -1,7 +1,7 @@ """Main training loop.""" from collections import defaultdict -from dataclasses import dataclass +from dataclasses import dataclass, replace from pathlib import Path from typing import Literal @@ -11,6 +11,7 @@ from simple_parsing import subgroups from simple_parsing.helpers.serialization import save +from ..evaluation import Eval from ..metrics import evaluate_preds, to_one_hot from ..run import Run from ..training.supervised import train_supervised @@ -48,6 +49,27 @@ def create_models_dir(self, out_dir: Path): return reporter_dir, lr_dir + def make_eval(self, model, eval_dataset): + return Eval( + extract=replace( + self.extract, + model=model, + datasets=(eval_dataset,), + ), + source=self.out_dir, + out_dir=self.out_dir / "transfer" / eval_dataset, + num_gpus=self.num_gpus, + min_gpu_mem=self.min_gpu_mem, + skip_supervised=self.supervised == "none", + prompt_indices=self.prompt_indices, + concatenated_layer_offset=self.concatenated_layer_offset, + # datasets=run.datasets isn't needed because it's + # immediately overwritten + debug=self.debug, + disable_cache=self.disable_cache, + ) + + def apply_to_layer( self, layer: int, From 025acb1d5bed83c51ccc1ffb6fea0fb45e418f8a Mon Sep 17 00:00:00 2001 From: jon Date: Wed, 12 Jul 2023 14:35:43 +0100 Subject: [PATCH 040/102] Revert "change variable name" This reverts commit f2cdbb104c383093a5d827edb75b507c18b15b89. --- elk/evaluation/evaluate.py | 2 +- elk/run.py | 8 ++++---- elk/training/ccs_reporter.py | 4 ++-- elk/training/classifier.py | 2 +- elk/training/sweep.py | 6 ++---- tests/test_classifier.py | 2 +- tests/test_smoke_elicit.py | 4 ++-- tests/test_smoke_eval.py | 6 +++--- 8 files changed, 16 insertions(+), 18 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index ffbe2d977..d6054e332 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -23,7 +23,7 @@ def __post_init__(self): # Set our output directory before super().execute() does if not self.out_dir: root = elk_reporter_dir() / self.source - self.out_dir = root / "transfer" / "+".join(self.extract.datasets) + self.out_dir = root / "transfer" / "+".join(self.data.datasets) def execute(self, highlight_color: Color = "cyan"): return super().execute(highlight_color, split_type="val") diff --git a/elk/run.py b/elk/run.py index 5ab020d7f..fb8903ccf 100644 --- a/elk/run.py +++ b/elk/run.py @@ -33,7 +33,7 @@ @dataclass class Run(ABC, Serializable): - extract: Extract + data: Extract out_dir: Path | None = None """Directory to save results to. If None, a directory will be created automatically.""" @@ -67,14 +67,14 @@ def execute( min_gpu_mem=self.min_gpu_mem, split_type=split_type, ) - for cfg in self.extract.explode() + for cfg in self.data.explode() ] if self.out_dir is None: # Save in a memorably-named directory inside of # ELK_REPORTER_DIR// - ds_name = "+".join(self.extract.datasets) - root = elk_reporter_dir() / self.extract.model / ds_name + ds_name = "+".join(self.data.datasets) + root = elk_reporter_dir() / self.data.model / ds_name self.out_dir = memorably_named_dir(root) diff --git a/elk/training/ccs_reporter.py b/elk/training/ccs_reporter.py index a5d7d6282..cd161dd9b 100644 --- a/elk/training/ccs_reporter.py +++ b/elk/training/ccs_reporter.py @@ -145,7 +145,7 @@ def reset_parameters(self): theta = torch.randn(1, probe.in_features + 1, device=probe.weight.device) theta /= theta.norm() probe.weight.data = theta[:, :-1] - probe.bias.extract = theta[:, -1] + probe.bias.data = theta[:, -1] elif self.config.init == "default": for layer in self.probe: @@ -219,7 +219,7 @@ def fit(self, hiddens: Tensor) -> float: if self.config.init == "pca": diffs = torch.flatten(x_pos - x_neg, 0, 1) _, __, V = torch.pca_lowrank(diffs, q=i + 1) - self.probe[0].weight.extract = V[:, -1, None].T + self.probe[0].weight.data = V[:, -1, None].T if self.config.optimizer == "lbfgs": loss = self.train_loop_lbfgs(x_neg, x_pos) diff --git a/elk/training/classifier.py b/elk/training/classifier.py index 4e7adc97b..148da939f 100644 --- a/elk/training/classifier.py +++ b/elk/training/classifier.py @@ -51,7 +51,7 @@ def __init__( self.linear = torch.nn.Linear( input_dim, num_classes if num_classes > 2 else 1, device=device, dtype=dtype ) - self.linear.bias.extract.zero_() + self.linear.bias.data.zero_() self.linear.weight.data.zero_() def forward(self, x: Tensor) -> Tensor: diff --git a/elk/training/sweep.py b/elk/training/sweep.py index ec6e6b66d..9a2393455 100755 --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -53,7 +53,7 @@ class Sweep: # A bit of a hack to add all the command line arguments from Elicit run_template: Elicit = Elicit( - extract=Extract( + data=Extract( model="", datasets=("",), ) @@ -132,9 +132,7 @@ def execute(self): out_dir = sweep_dir / model / dataset_str data = replace( - self.run_template.extract, - model=model, - datasets=train_datasets, + self.run_template.data, model=model, datasets=train_datasets ) run = replace(self.run_template, data=data, out_dir=out_dir) if var_weight is not None and neg_cov_weight is not None: diff --git a/tests/test_classifier.py b/tests/test_classifier.py index 10b86a193..bdc9023df 100644 --- a/tests/test_classifier.py +++ b/tests/test_classifier.py @@ -28,7 +28,7 @@ def test_classifier_roughly_same_sklearn(): ) # check that the weights are roughly the same sklearn_coef = torch.from_numpy(model.coef_) - torch_coef = classifier.linear.weight.extract + torch_coef = classifier.linear.weight.data torch.testing.assert_close(sklearn_coef, torch_coef, atol=1e-2, rtol=1e-2) # check that on a new sample, the predictions are roughly the same diff --git a/tests/test_smoke_elicit.py b/tests/test_smoke_elicit.py index 4bfb430d5..bac0f3989 100644 --- a/tests/test_smoke_elicit.py +++ b/tests/test_smoke_elicit.py @@ -10,7 +10,7 @@ def test_smoke_elicit_run_tiny_gpt2_ccs(tmp_path: Path): model_path, min_mem = "sshleifer/tiny-gpt2", 10 * 1024**2 dataset_name = "imdb" elicit = Elicit( - extract=Extract( + data=Extract( model=model_path, datasets=(dataset_name,), max_examples=(10, 10), @@ -41,7 +41,7 @@ def test_smoke_elicit_run_tiny_gpt2_eigen(tmp_path: Path): model_path, min_mem = "sshleifer/tiny-gpt2", 10 * 1024**2 dataset_name = "imdb" elicit = Elicit( - extract=Extract( + data=Extract( model=model_path, datasets=(dataset_name,), max_examples=(10, 10), diff --git a/tests/test_smoke_eval.py b/tests/test_smoke_eval.py index 0fe70cd47..4efd7112d 100644 --- a/tests/test_smoke_eval.py +++ b/tests/test_smoke_eval.py @@ -26,7 +26,7 @@ def setup_elicit( Returns the elicit run configuration. """ elicit = Elicit( - extract=Extract( + data=Extract( model=model_path, datasets=(dataset_name,), max_examples=(10, 10), @@ -54,7 +54,7 @@ def eval_run(elicit: Elicit, transfer_datasets: tuple[str, ...] = ()) -> float: Returns a reference time (in seconds) for file modification checking. """ tmp_path = elicit.out_dir - extract = elicit.extract + extract = elicit.data assert tmp_path is not None # record elicit modification time as reference. @@ -64,7 +64,7 @@ def eval_run(elicit: Elicit, transfer_datasets: tuple[str, ...] = ()) -> float: # update datasets to a different dataset extract.datasets = transfer_datasets - eval = Eval(extract=extract, source=tmp_path) + eval = Eval(data=extract, source=tmp_path) eval.execute() return start_time_sec From 80507fc182dd817a1d9dbf8ebf750fdf642dd494 Mon Sep 17 00:00:00 2001 From: jon Date: Wed, 12 Jul 2023 14:43:10 +0100 Subject: [PATCH 041/102] fix remnant of renaming --- elk/training/sweep.py | 1 - elk/training/train.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/elk/training/sweep.py b/elk/training/sweep.py index 9a2393455..ff6cc8e8d 100755 --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -5,7 +5,6 @@ from datasets import get_dataset_config_info from transformers import AutoConfig -from ..evaluation import Eval from ..extraction import Extract from ..files import memorably_named_dir, sweeps_dir from ..plotting.visualize import visualize_sweep diff --git a/elk/training/train.py b/elk/training/train.py index cf6f6d756..2e6c24330 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -51,8 +51,8 @@ def create_models_dir(self, out_dir: Path): def make_eval(self, model, eval_dataset): return Eval( - extract=replace( - self.extract, + data=replace( + self.data, model=model, datasets=(eval_dataset,), ), From 06cb70ee02a2ef10060139fc8c8cf0e44f28626c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Jul 2023 13:43:48 +0000 Subject: [PATCH 042/102] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/training/train.py | 1 - 1 file changed, 1 deletion(-) diff --git a/elk/training/train.py b/elk/training/train.py index 2e6c24330..8929ce7c5 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -69,7 +69,6 @@ def make_eval(self, model, eval_dataset): disable_cache=self.disable_cache, ) - def apply_to_layer( self, layer: int, From ea56c631793484d9e544d611098ccd1e0f2221bf Mon Sep 17 00:00:00 2001 From: jon Date: Wed, 12 Jul 2023 14:44:13 +0100 Subject: [PATCH 043/102] rename to elicit --- elk/training/sweep.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/elk/training/sweep.py b/elk/training/sweep.py index ff6cc8e8d..fb6beba14 100755 --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -133,19 +133,19 @@ def execute(self): data = replace( self.run_template.data, model=model, datasets=train_datasets ) - run = replace(self.run_template, data=data, out_dir=out_dir) + elicit = replace(self.run_template, data=data, out_dir=out_dir) if var_weight is not None and neg_cov_weight is not None: - assert isinstance(run.net, EigenFitterConfig) - run.net.var_weight = var_weight - run.net.neg_cov_weight = neg_cov_weight + assert isinstance(elicit.net, EigenFitterConfig) + elicit.net.var_weight = var_weight + elicit.net.neg_cov_weight = neg_cov_weight # Add hyperparameter values to output directory if needed - assert run.out_dir is not None - run.out_dir /= f"var_weight={var_weight:.2f}" - run.out_dir /= f"neg_cov_weight={neg_cov_weight:.2f}" + assert elicit.out_dir is not None + elicit.out_dir /= f"var_weight={var_weight:.2f}" + elicit.out_dir /= f"neg_cov_weight={neg_cov_weight:.2f}" try: - run.execute() + elicit.execute() except torch.linalg.LinAlgError as e: print(colorize(f"LinAlgError: {e}", "red")) continue @@ -160,10 +160,10 @@ def execute(self): if eval_dataset in train_datasets: continue - assert run.out_dir is not None + assert elicit.out_dir is not None # TODO we should fix this so that this isn't needed - eval = run.make_eval(model, eval_dataset) + eval = elicit.make_eval(model, eval_dataset) eval.execute(highlight_color="green") if self.visualize: From 9988cc5cc9704ffc453519c9d7b239f010cd5f01 Mon Sep 17 00:00:00 2001 From: jon Date: Wed, 12 Jul 2023 14:52:19 +0100 Subject: [PATCH 044/102] remove unnecessary comments --- elk/training/sweep.py | 1 - elk/training/train.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/elk/training/sweep.py b/elk/training/sweep.py index fb6beba14..d57c46002 100755 --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -161,7 +161,6 @@ def execute(self): continue assert elicit.out_dir is not None - # TODO we should fix this so that this isn't needed eval = elicit.make_eval(model, eval_dataset) eval.execute(highlight_color="green") diff --git a/elk/training/train.py b/elk/training/train.py index 8929ce7c5..8099e25a6 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -63,8 +63,7 @@ def make_eval(self, model, eval_dataset): skip_supervised=self.supervised == "none", prompt_indices=self.prompt_indices, concatenated_layer_offset=self.concatenated_layer_offset, - # datasets=run.datasets isn't needed because it's - # immediately overwritten + # datasets isn't needed because it's immediately overwritten debug=self.debug, disable_cache=self.disable_cache, ) From f7b96b00ac0ecc4f3ea00a39093d9cd6a732bc76 Mon Sep 17 00:00:00 2001 From: jon Date: Wed, 12 Jul 2023 15:17:15 +0100 Subject: [PATCH 045/102] fix pyright --- elk/training/train.py | 1 + 1 file changed, 1 insertion(+) diff --git a/elk/training/train.py b/elk/training/train.py index 8099e25a6..82316506f 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -50,6 +50,7 @@ def create_models_dir(self, out_dir: Path): return reporter_dir, lr_dir def make_eval(self, model, eval_dataset): + assert self.out_dir is not None return Eval( data=replace( self.data, From 35c53f5e1c2b6373e7b5b9e2dfce587350f52bab Mon Sep 17 00:00:00 2001 From: jon Date: Wed, 12 Jul 2023 15:33:11 +0100 Subject: [PATCH 046/102] remove check --- elk/training/sweep.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/elk/training/sweep.py b/elk/training/sweep.py index d57c46002..bbd761c92 100755 --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -160,8 +160,6 @@ def execute(self): if eval_dataset in train_datasets: continue - assert elicit.out_dir is not None - eval = elicit.make_eval(model, eval_dataset) eval.execute(highlight_color="green") From 47bcfb2c5fe45d2936dcc7a730d45413e4428738 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 13 Jul 2023 14:42:14 +0100 Subject: [PATCH 047/102] added cli arg --- elk/evaluation/evaluate.py | 2 +- elk/run.py | 11 +++++++++-- elk/training/train.py | 1 + 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index d6054e332..8462cc005 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -30,7 +30,7 @@ def execute(self, highlight_color: Color = "cyan"): @torch.inference_mode() def apply_to_layer( - self, layer: int, devices: list[str], world_size: int + self, layer: int, devices: list[str], world_size: int, probe_per_prompt: bool ) -> dict[str, pd.DataFrame]: """Evaluate a single reporter on a single layer.""" device = self.get_device(devices, world_size) diff --git a/elk/run.py b/elk/run.py index fb8903ccf..1659e3926 100644 --- a/elk/run.py +++ b/elk/run.py @@ -46,6 +46,10 @@ class Run(ABC, Serializable): prompt_indices: tuple[int, ...] = () """The indices of the prompt templates to use. If empty, all prompts are used.""" + probe_per_prompt: bool = False + """If true, a probe is trained per prompt template. Otherwise, a single probe is + trained for all prompt templates.""" + concatenated_layer_offset: int = 0 debug: bool = False min_gpu_mem: int | None = None # in bytes @@ -99,13 +103,16 @@ def execute( devices = select_usable_devices(self.num_gpus, min_memory=self.min_gpu_mem) num_devices = len(devices) func: Callable[[int], dict[str, pd.DataFrame]] = partial( - self.apply_to_layer, devices=devices, world_size=num_devices + self.apply_to_layer, + devices=devices, + world_size=num_devices, + probe_per_prompt=self.probe_per_prompt, ) self.apply_to_layers(func=func, num_devices=num_devices) @abstractmethod def apply_to_layer( - self, layer: int, devices: list[str], world_size: int + self, layer: int, devices: list[str], world_size: int, probe_per_prompt: bool ) -> dict[str, pd.DataFrame]: """Train or eval a reporter on a single layer.""" diff --git a/elk/training/train.py b/elk/training/train.py index 8392f2d9a..2292309b4 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -53,6 +53,7 @@ def apply_to_layer( layer: int, devices: list[str], world_size: int, + probe_per_prompt: bool, ) -> dict[str, pd.DataFrame]: """Train a single reporter on a single layer.""" From a50fe5772adc9e59f66e066a7df22f42863335d2 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 13 Jul 2023 15:23:58 +0100 Subject: [PATCH 048/102] refactor reporter training --- elk/training/train.py | 88 ++++++++++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 23 deletions(-) diff --git a/elk/training/train.py b/elk/training/train.py index 2292309b4..3eb018bcf 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -1,7 +1,7 @@ """Main training loop.""" from collections import defaultdict -from dataclasses import dataclass +from dataclasses import dataclass, replace from pathlib import Path from typing import Literal @@ -11,15 +11,22 @@ from simple_parsing import subgroups from simple_parsing.helpers.serialization import save +from ..evaluation import Eval from ..metrics import evaluate_preds, to_one_hot from ..run import Run from ..training.supervised import train_supervised from ..utils.typing import assert_type from .ccs_reporter import CcsConfig, CcsReporter -from .common import FitterConfig +from .common import FitterConfig, Reporter from .eigen_reporter import EigenFitter, EigenFitterConfig +@dataclass +class ReporterTrainResult: + reporter: CcsReporter | Reporter + train_loss: float | None + + @dataclass class Elicit(Run): """Full specification of a reporter training run.""" @@ -48,22 +55,31 @@ def create_models_dir(self, out_dir: Path): return reporter_dir, lr_dir - def apply_to_layer( - self, - layer: int, - devices: list[str], - world_size: int, - probe_per_prompt: bool, - ) -> dict[str, pd.DataFrame]: - """Train a single reporter on a single layer.""" - - self.make_reproducible(seed=self.net.seed + layer) - device = self.get_device(devices, world_size) - + def make_eval(self, model, eval_dataset): + assert self.out_dir is not None + return Eval( + data=replace( + self.data, + model=model, + datasets=(eval_dataset,), + ), + source=self.out_dir, + out_dir=self.out_dir / "transfer" / eval_dataset, + num_gpus=self.num_gpus, + min_gpu_mem=self.min_gpu_mem, + skip_supervised=self.supervised == "none", + prompt_indices=self.prompt_indices, + concatenated_layer_offset=self.concatenated_layer_offset, + # datasets isn't needed because it's immediately overwritten + debug=self.debug, + disable_cache=self.disable_cache, + ) + + # Create a separate function to handle the reporter training. + def train_reporter(self, device, layer, out_dir) -> ReporterTrainResult: train_dict = self.prepare_data(device, layer, "train") - val_dict = self.prepare_data(device, layer, "val") - (first_train_h, train_gt, _), *rest = train_dict.values() + (first_train_h, train_gt, _), *rest = train_dict.values() # TODO can remove? (_, v, k, d) = first_train_h.shape if not all(other_h.shape[-1] == d for other_h, _, _ in rest): raise ValueError("All datasets must have the same hidden state size") @@ -75,16 +91,12 @@ def apply_to_layer( if not all(other_h.shape[-2] == k for other_h, _, _ in rest): raise ValueError("All datasets must have the same number of classes") - reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) train_loss = None - if isinstance(self.net, CcsConfig): assert len(train_dict) == 1, "CCS only supports single-task training" - reporter = CcsReporter(self.net, d, device=device, num_variants=v) train_loss = reporter.fit(first_train_h) - (_, v, k, _) = first_train_h.shape reporter.platt_scale( to_one_hot(repeat(train_gt, "n -> (n v)", v=v), k).flatten(), rearrange(first_train_h, "n v k d -> (n v k) d"), @@ -116,20 +128,50 @@ def apply_to_layer( raise ValueError(f"Unknown reporter config type: {type(self.net)}") # Save reporter checkpoint to disk - torch.save(reporter, reporter_dir / f"layer_{layer}.pt") + torch.save(reporter, out_dir / f"layer_{layer}.pt") - # Fit supervised logistic regression model + return ReporterTrainResult(reporter, train_loss) + + def train_lr_model(self, train_dict, device, layer, out_dir): if self.supervised != "none": lr_models = train_supervised( train_dict, device=device, mode=self.supervised, ) - with open(lr_dir / f"layer_{layer}.pt", "wb") as file: + with open(out_dir / f"layer_{layer}.pt", "wb") as file: torch.save(lr_models, file) else: lr_models = [] + return lr_models + + def apply_to_layer( + self, + layer: int, + devices: list[str], + world_size: int, + probe_per_prompt: bool, + ) -> dict[str, pd.DataFrame]: + """Train a single reporter on a single layer.""" + + self.make_reproducible(seed=self.net.seed + layer) + device = self.get_device(devices, world_size) + + train_dict = self.prepare_data(device, layer, "train") + val_dict = self.prepare_data(device, layer, "val") + + (first_train_h, train_gt, _), *rest = train_dict.values() + (_, v, k, d) = first_train_h.shape + + reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) + + reporter_train_result = self.train_reporter(device, layer, reporter_dir) + reporter = reporter_train_result.reporter + train_loss = reporter_train_result.train_loss + + lr_models = self.train_lr_model(train_dict, device, layer, lr_dir) + row_bufs = defaultdict(list) for ds_name in val_dict: val_h, val_gt, val_lm_preds = val_dict[ds_name] From b6de9576966bbf19cf54d2b4117ab239c6938115 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Thu, 13 Jul 2023 19:19:00 +0000 Subject: [PATCH 049/102] remove pseudo auroc --- elk/training/train.py | 1 - 1 file changed, 1 deletion(-) diff --git a/elk/training/train.py b/elk/training/train.py index ad4d4fd96..fa6138869 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -156,7 +156,6 @@ def apply_to_layer( **evaluate_preds( val_gt, val_credences, prompt_ensembling ).to_dict(), - "pseudo_auroc": pseudo_auroc, "train_loss": train_loss, } ) From cf32b0c870ee190349fb3535d107b778c5759feb Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Thu, 13 Jul 2023 19:24:04 +0000 Subject: [PATCH 050/102] rename to prompt_ensembling --- elk/plotting/visualize.py | 24 ++++++++++++------------ elk/run.py | 14 +++++++------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/elk/plotting/visualize.py b/elk/plotting/visualize.py index 1eeb223c0..dc68ba1d0 100644 --- a/elk/plotting/visualize.py +++ b/elk/plotting/visualize.py @@ -54,11 +54,11 @@ def render( ) color_map = dict(zip(ensemblings, qualitative.Plotly)) - for ensembling in ensemblings: - ensemble_data: pd.DataFrame = df[df["ensembling"] == ensembling.value] + for prompt_ensembling in ensemblings: + ensemble_data: pd.DataFrame = df[df["prompt_ensembling"] == prompt_ensembling.value] if with_transfer: # TODO write tests ensemble_data = ensemble_data.groupby( - ["eval_dataset", "layer", "ensembling"], as_index=False + ["eval_dataset", "layer", "prompt_ensembling"], as_index=False ).agg({"auroc_estimate": "mean"}) else: ensemble_data = ensemble_data[ @@ -80,11 +80,11 @@ def render( x=dataset_data["layer"], y=dataset_data["auroc_estimate"], mode="lines", - name=ensembling.value, + name=prompt_ensembling.value, showlegend=False if dataset_name != unique_datasets[0] else True, - line=dict(color=color_map[ensembling]), + line=dict(color=color_map[prompt_ensembling]), ), row=row, col=col, @@ -96,7 +96,7 @@ def render( fig.update_layout( legend=dict( - title="Ensembling", + title="prompt_ensembling", ), title=f"AUROC Trend: {self.model_name}", ) @@ -118,7 +118,7 @@ class TransferEvalHeatmap: layer: int score_type: str = "auroc_estimate" - ensembling: PromptEnsembling = PromptEnsembling.FULL + prompt_ensembling: PromptEnsembling = PromptEnsembling.FULL def render(self, df: pd.DataFrame) -> go.Figure: """Render the heatmap visualization. @@ -248,7 +248,7 @@ def render_and_save( sweep: "SweepVisualization", dataset_names: list[str] | None = None, score_type="auroc_estimate", - ensembling=PromptEnsembling.FULL, + prompt_ensembling=PromptEnsembling.FULL, ) -> None: """Render and save the visualization for the model. @@ -256,7 +256,7 @@ def render_and_save( sweep: The SweepVisualization instance. dataset_names: List of dataset names to include in the visualization. score_type: The type of score to display. - ensembling: The ensembling option to consider. + prompt_ensembling: The prompt_ensembling option to consider. """ df = self.df model_name = self.model_name @@ -266,10 +266,10 @@ def render_and_save( if self.is_transfer: for layer in range(layer_min, layer_max + 1): filtered = df[ - (df["layer"] == layer) & (df["ensembling"] == ensembling.value) + (df["layer"] == layer) & (df["prompt_ensembling"] == prompt_ensembling.value) ] fig = TransferEvalHeatmap( - layer, score_type=score_type, ensembling=ensembling + layer, score_type=score_type, prompt_ensembling=prompt_ensembling ).render(filtered) fig.write_image(file=model_path / f"{layer}.png") fig = TransferEvalTrend(dataset_names).render(df) @@ -387,7 +387,7 @@ def render_table( Returns: The generated score table as a pandas DataFrame. """ - df = self.df[self.df["ensembling"] == PromptEnsembling.PARTIAL.value] + df = self.df[self.df["prompt_ensembling"] == PromptEnsembling.PARTIAL.value] # For each model, we use the layer whose mean AUROC is the highest best_layers, model_dfs = [], [] diff --git a/elk/run.py b/elk/run.py index e369eae53..594bca4cb 100644 --- a/elk/run.py +++ b/elk/run.py @@ -192,26 +192,26 @@ def apply_to_layers( finally: # Make sure the CSVs are written even if we crash or get interrupted for name, dfs in df_buffers.items(): - df = pd.concat(dfs).sort_values(by=["layer", "ensembling"]) + df = pd.concat(dfs).sort_values(by=["layer", "prompt_ensembling"]) df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) if self.debug: save_debug_log(self.datasets, self.out_dir) dfs = [] - for ensembling in PromptEnsembling.all(): + for prompt_ensembling in PromptEnsembling.all(): layer_ensembling_results = layer_ensembling( - layer_outputs, ensembling + layer_outputs, prompt_ensembling ) df = pd.DataFrame(layer_ensembling_results.to_dict(), index=[0]) df = df.round(4) - df["ensembling"] = ensembling.value + df["prompt_ensembling"] = prompt_ensembling.value dfs.append(df) df_concat = pd.concat(dfs) - # Rearrange the columns so that ensembling is in front - columns = ["ensembling"] + [ - col for col in df_concat.columns if col != "ensembling" + # Rearrange the columns so that prompt_ensembling is in front + columns = ["prompt_ensembling"] + [ + col for col in df_concat.columns if col != "prompt_ensembling" ] df_concat = df_concat[columns] df_concat.to_csv( From 769676a850885a2db317931a5bb0075ea0775ad4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 13 Jul 2023 19:24:19 +0000 Subject: [PATCH 051/102] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/plotting/visualize.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/elk/plotting/visualize.py b/elk/plotting/visualize.py index dc68ba1d0..3c2b9cdf7 100644 --- a/elk/plotting/visualize.py +++ b/elk/plotting/visualize.py @@ -55,7 +55,9 @@ def render( color_map = dict(zip(ensemblings, qualitative.Plotly)) for prompt_ensembling in ensemblings: - ensemble_data: pd.DataFrame = df[df["prompt_ensembling"] == prompt_ensembling.value] + ensemble_data: pd.DataFrame = df[ + df["prompt_ensembling"] == prompt_ensembling.value + ] if with_transfer: # TODO write tests ensemble_data = ensemble_data.groupby( ["eval_dataset", "layer", "prompt_ensembling"], as_index=False @@ -266,7 +268,8 @@ def render_and_save( if self.is_transfer: for layer in range(layer_min, layer_max + 1): filtered = df[ - (df["layer"] == layer) & (df["prompt_ensembling"] == prompt_ensembling.value) + (df["layer"] == layer) + & (df["prompt_ensembling"] == prompt_ensembling.value) ] fig = TransferEvalHeatmap( layer, score_type=score_type, prompt_ensembling=prompt_ensembling From e6c9d4c0961c25e0e7583d7e6e4a76413330ef14 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Thu, 13 Jul 2023 19:35:07 +0000 Subject: [PATCH 052/102] precomit fixes --- elk/metrics/eval.py | 3 ++- elk/plotting/visualize.py | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 55548a3ea..6c6ee3c8b 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -197,7 +197,8 @@ def layer_ensembling( layer_outputs: list, prompt_ensembling: PromptEnsembling ) -> EvalResult: """ - Return EvalResult after prompt_ensembling the probe output of the middle to last layers + Return EvalResult after prompt_ensembling + the probe output of the middle to last layers Args: layer_outputs: A list of dictionaries containing the ground truth and diff --git a/elk/plotting/visualize.py b/elk/plotting/visualize.py index dc68ba1d0..3c2b9cdf7 100644 --- a/elk/plotting/visualize.py +++ b/elk/plotting/visualize.py @@ -55,7 +55,9 @@ def render( color_map = dict(zip(ensemblings, qualitative.Plotly)) for prompt_ensembling in ensemblings: - ensemble_data: pd.DataFrame = df[df["prompt_ensembling"] == prompt_ensembling.value] + ensemble_data: pd.DataFrame = df[ + df["prompt_ensembling"] == prompt_ensembling.value + ] if with_transfer: # TODO write tests ensemble_data = ensemble_data.groupby( ["eval_dataset", "layer", "prompt_ensembling"], as_index=False @@ -266,7 +268,8 @@ def render_and_save( if self.is_transfer: for layer in range(layer_min, layer_max + 1): filtered = df[ - (df["layer"] == layer) & (df["prompt_ensembling"] == prompt_ensembling.value) + (df["layer"] == layer) + & (df["prompt_ensembling"] == prompt_ensembling.value) ] fig = TransferEvalHeatmap( layer, score_type=score_type, prompt_ensembling=prompt_ensembling From 52b1394232b2f23ab5867be1b6729bfd249ff306 Mon Sep 17 00:00:00 2001 From: jon Date: Fri, 14 Jul 2023 14:19:15 +0100 Subject: [PATCH 053/102] WIP add multiprobe training --- elk/training/train.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/elk/training/train.py b/elk/training/train.py index 3eb018bcf..5d2a6b500 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -76,9 +76,9 @@ def make_eval(self, model, eval_dataset): ) # Create a separate function to handle the reporter training. - def train_reporter(self, device, layer, out_dir) -> ReporterTrainResult: - train_dict = self.prepare_data(device, layer, "train") - + def train_and_save_reporter( + self, device, layer, out_dir, train_dict + ) -> ReporterTrainResult: (first_train_h, train_gt, _), *rest = train_dict.values() # TODO can remove? (_, v, k, d) = first_train_h.shape if not all(other_h.shape[-1] == d for other_h, _, _ in rest): @@ -128,6 +128,7 @@ def train_reporter(self, device, layer, out_dir) -> ReporterTrainResult: raise ValueError(f"Unknown reporter config type: {type(self.net)}") # Save reporter checkpoint to disk + # TODO have to change this torch.save(reporter, out_dir / f"layer_{layer}.pt") return ReporterTrainResult(reporter, train_loss) @@ -166,13 +167,36 @@ def apply_to_layer( reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) - reporter_train_result = self.train_reporter(device, layer, reporter_dir) + probe_per_prompt = True + if probe_per_prompt: + train_dicts = [ + { + ds_name: ( + train_h[:, i : i + 1, ...], + train_gt, + lm_preds[:, i : i + 1, ...], + ) + } + for ds_name, (train_h, _, lm_preds) in train_dict.items() + for i in range(v) # v is number of variants + ] + + [ + self.train_and_save_reporter(device, layer, reporter_dir, train_dict) + for train_dict in train_dicts + ] + else: + reporter_train_result = self.train_and_save_reporter( + device, layer, reporter_dir, train_dict + ) + reporter = reporter_train_result.reporter train_loss = reporter_train_result.train_loss lr_models = self.train_lr_model(train_dict, device, layer, lr_dir) row_bufs = defaultdict(list) + for ds_name in val_dict: val_h, val_gt, val_lm_preds = val_dict[ds_name] train_h, train_gt, train_lm_preds = train_dict[ds_name] From 2420ae0fba1a4de1321b7ebd9c7c52c91f499537 Mon Sep 17 00:00:00 2001 From: jon Date: Fri, 14 Jul 2023 16:38:15 +0100 Subject: [PATCH 054/102] multiprobe elicit works --- elk/run.py | 16 ++-- elk/training/train.py | 184 +++++++++++++++++++++++++++--------------- 2 files changed, 128 insertions(+), 72 deletions(-) diff --git a/elk/run.py b/elk/run.py index 1659e3926..b444d80b2 100644 --- a/elk/run.py +++ b/elk/run.py @@ -187,13 +187,19 @@ def apply_to_layers( df_buffers = defaultdict(list) try: - for df_dict in tqdm(mapper(func, layers), total=len(layers)): - for k, v in df_dict.items(): - df_buffers[k].append(v) + for df_dicts in tqdm(mapper(func, layers), total=len(layers)): + for df_dict in df_dicts: + for k, v in df_dict.items(): + df_buffers[k].append(v) finally: # Make sure the CSVs are written even if we crash or get interrupted for name, dfs in df_buffers.items(): - df = pd.concat(dfs).sort_values(by=["layer", "ensembling"]) - df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) + sortby = ["layer", "ensembling"] + if "prompt_index" in dfs[0].columns: + sortby.append("prompt_index") + # TODO make the prompt index third col + df = pd.concat(dfs).sort_values(by=sortby) + out_path = self.out_dir / f"{name}.csv" + df.round(4).to_csv(out_path, index=False) if self.debug: save_debug_log(self.datasets, self.out_dir) diff --git a/elk/training/train.py b/elk/training/train.py index 5d2a6b500..2d85cc132 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -15,7 +15,6 @@ from ..metrics import evaluate_preds, to_one_hot from ..run import Run from ..training.supervised import train_supervised -from ..utils.typing import assert_type from .ccs_reporter import CcsConfig, CcsReporter from .common import FitterConfig, Reporter from .eigen_reporter import EigenFitter, EigenFitterConfig @@ -41,6 +40,81 @@ class Elicit(Run): cross-validation. Defaults to "single", which means to train a single classifier on the training data. "cv" means to use cross-validation.""" + def evaluate_and_save( + self, + train_loss, + reporter, + train_dict, + val_dict, + lr_models, + layer, + prompt_index=None, + ): + row_bufs = defaultdict(list) + for ds_name in val_dict: + val_h, val_gt, val_lm_preds = val_dict[ds_name] + train_h, train_gt, train_lm_preds = train_dict[ds_name] + meta = {"dataset": ds_name, "layer": layer} + + val_credences = reporter(val_h) + train_credences = reporter(train_h) + maybe_prompt_index = ( + {} if prompt_index is None else {"prompt_index": prompt_index} + ) + for mode in ("none", "partial", "full"): + row_bufs["eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(val_gt, val_credences, mode).to_dict(), + "train_loss": train_loss, + **maybe_prompt_index, + } + ) + + row_bufs["train_eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(train_gt, train_credences, mode).to_dict(), + "train_loss": train_loss, + **maybe_prompt_index, + } + ) + + if val_lm_preds is not None: + row_bufs["lm_eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), + **maybe_prompt_index, + } + ) + + if train_lm_preds is not None: + row_bufs["train_lm_eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(train_gt, train_lm_preds, mode).to_dict(), + **maybe_prompt_index, + } + ) + + for i, model in enumerate(lr_models): + row_bufs["lr_eval"].append( + { + **meta, + "ensembling": mode, + "inlp_iter": i, + **evaluate_preds(val_gt, model(val_h), mode).to_dict(), + **maybe_prompt_index, + } + ) + + return {k: pd.DataFrame(v) for k, v in row_bufs.items()} + def create_models_dir(self, out_dir: Path): lr_dir = None lr_dir = out_dir / "lr_models" @@ -129,6 +203,7 @@ def train_and_save_reporter( # Save reporter checkpoint to disk # TODO have to change this + out_dir.mkdir(parents=True, exist_ok=True) torch.save(reporter, out_dir / f"layer_{layer}.pt") return ReporterTrainResult(reporter, train_loss) @@ -140,6 +215,8 @@ def train_lr_model(self, train_dict, device, layer, out_dir): device=device, mode=self.supervised, ) + # make dir if not exists + out_dir.mkdir(parents=True, exist_ok=True) with open(out_dir / f"layer_{layer}.pt", "wb") as file: torch.save(lr_models, file) else: @@ -153,7 +230,7 @@ def apply_to_layer( devices: list[str], world_size: int, probe_per_prompt: bool, - ) -> dict[str, pd.DataFrame]: + ) -> list[dict[str, pd.DataFrame]]: """Train a single reporter on a single layer.""" self.make_reproducible(seed=self.net.seed + layer) @@ -165,7 +242,8 @@ def apply_to_layer( (first_train_h, train_gt, _), *rest = train_dict.values() (_, v, k, d) = first_train_h.shape - reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) + # TODO is this even needed + # reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) probe_per_prompt = True if probe_per_prompt: @@ -174,81 +252,53 @@ def apply_to_layer( ds_name: ( train_h[:, i : i + 1, ...], train_gt, - lm_preds[:, i : i + 1, ...], + lm_preds[:, i : i + 1, ...] if lm_preds is not None else None, ) } for ds_name, (train_h, _, lm_preds) in train_dict.items() for i in range(v) # v is number of variants ] - [ - self.train_and_save_reporter(device, layer, reporter_dir, train_dict) - for train_dict in train_dicts - ] - else: - reporter_train_result = self.train_and_save_reporter( - device, layer, reporter_dir, train_dict - ) - - reporter = reporter_train_result.reporter - train_loss = reporter_train_result.train_loss - - lr_models = self.train_lr_model(train_dict, device, layer, lr_dir) - - row_bufs = defaultdict(list) - - for ds_name in val_dict: - val_h, val_gt, val_lm_preds = val_dict[ds_name] - train_h, train_gt, train_lm_preds = train_dict[ds_name] - meta = {"dataset": ds_name, "layer": layer} + res = [] + for i, train_dict in enumerate(train_dicts): + reporters_path = self.out_dir / str(i) / "reporters" + lr_path = self.out_dir / str(i) / "lr_models" - val_credences = reporter(val_h) - train_credences = reporter(train_h) - for mode in ("none", "partial", "full"): - row_bufs["eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_credences, mode).to_dict(), - "train_loss": train_loss, - } + reporter_train_result = self.train_and_save_reporter( + device, layer, reporters_path, train_dict ) - row_bufs["train_eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(train_gt, train_credences, mode).to_dict(), - "train_loss": train_loss, - } - ) + reporter = reporter_train_result.reporter + train_loss = reporter_train_result.train_loss - if val_lm_preds is not None: - row_bufs["lm_eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), - } - ) + lr_models = self.train_lr_model(train_dict, device, layer, lr_path) - if train_lm_preds is not None: - row_bufs["train_lm_eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(train_gt, train_lm_preds, mode).to_dict(), - } + res.append( + self.evaluate_and_save( + train_loss, + reporter, + train_dict, + val_dict, + lr_models, + layer, + prompt_index=i, ) + ) + return res + else: + reporter_train_result = self.train_and_save_reporter( + device, layer, self.out_dir / "reporters", train_dict + ) - for i, model in enumerate(lr_models): - row_bufs["lr_eval"].append( - { - **meta, - "ensembling": mode, - "inlp_iter": i, - **evaluate_preds(val_gt, model(val_h), mode).to_dict(), - } - ) + reporter = reporter_train_result.reporter + train_loss = reporter_train_result.train_loss - return {k: pd.DataFrame(v) for k, v in row_bufs.items()} + lr_models = self.train_lr_model( + train_dict, device, layer, self.out_dir / "lr_models" + ) + + return [ + self.evaluate_and_save( + train_loss, reporter, train_dict, val_dict, lr_models, layer + ) + ] From f35626a8bc73ba3b1408b20b8794ac7f39543116 Mon Sep 17 00:00:00 2001 From: jon Date: Fri, 14 Jul 2023 16:46:23 +0100 Subject: [PATCH 055/102] fix pyright --- elk/run.py | 6 +++--- elk/training/train.py | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/elk/run.py b/elk/run.py index b444d80b2..fd7235698 100644 --- a/elk/run.py +++ b/elk/run.py @@ -102,7 +102,7 @@ def execute( devices = select_usable_devices(self.num_gpus, min_memory=self.min_gpu_mem) num_devices = len(devices) - func: Callable[[int], dict[str, pd.DataFrame]] = partial( + func: Callable[[int], list[dict[str, pd.DataFrame]]] = partial( self.apply_to_layer, devices=devices, world_size=num_devices, @@ -113,7 +113,7 @@ def execute( @abstractmethod def apply_to_layer( self, layer: int, devices: list[str], world_size: int, probe_per_prompt: bool - ) -> dict[str, pd.DataFrame]: + ) -> list[dict[str, pd.DataFrame]]: """Train or eval a reporter on a single layer.""" def make_reproducible(self, seed: int): @@ -162,7 +162,7 @@ def concatenate(self, layers): def apply_to_layers( self, - func: Callable[[int], dict[str, pd.DataFrame]], + func: Callable[[int], list[dict[str, pd.DataFrame]]], num_devices: int, ): """Apply a function to each layer of the datasets in parallel diff --git a/elk/training/train.py b/elk/training/train.py index 2d85cc132..902edfd47 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -232,6 +232,8 @@ def apply_to_layer( probe_per_prompt: bool, ) -> list[dict[str, pd.DataFrame]]: """Train a single reporter on a single layer.""" + assert self.out_dir is not None # TODO this is really annoying, why can it be + # None? self.make_reproducible(seed=self.net.seed + layer) device = self.get_device(devices, world_size) From 898c3f1c7a6094b71e5166ce43e9aa98a20f0aea Mon Sep 17 00:00:00 2001 From: jon Date: Tue, 18 Jul 2023 14:32:01 +0100 Subject: [PATCH 056/102] implemented multi probe for elicit --- elk/training/train.py | 151 +++++++++++++++++++++++++----------------- 1 file changed, 89 insertions(+), 62 deletions(-) diff --git a/elk/training/train.py b/elk/training/train.py index 902edfd47..277567c47 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -19,13 +19,30 @@ from .common import FitterConfig, Reporter from .eigen_reporter import EigenFitter, EigenFitterConfig +# declare AnyReporter as CcsReporter | Reporter type alias +AnyReporter = CcsReporter | Reporter + @dataclass class ReporterTrainResult: - reporter: CcsReporter | Reporter + reporter: AnyReporter train_loss: float | None +class MultiReporter: + def __init__(self, reporter_results: list[ReporterTrainResult]): + self.reporter_results: list[ReporterTrainResult] = reporter_results + self.reporters = [r.reporter for r in reporter_results] + train_losses = [r.train_loss for r in reporter_results] + self.train_loss = ( + None if train_losses[0] is None else sum(train_losses) / len(train_losses) + ) + + def __call__(self, h): + credences = [r(h) for r in self.reporters] + return torch.stack(credences).mean(dim=0) + + @dataclass class Elicit(Run): """Full specification of a reporter training run.""" @@ -43,12 +60,11 @@ class Elicit(Run): def evaluate_and_save( self, train_loss, - reporter, + reporter: AnyReporter | MultiReporter, train_dict, val_dict, lr_models, layer, - prompt_index=None, ): row_bufs = defaultdict(list) for ds_name in val_dict: @@ -56,62 +72,74 @@ def evaluate_and_save( train_h, train_gt, train_lm_preds = train_dict[ds_name] meta = {"dataset": ds_name, "layer": layer} - val_credences = reporter(val_h) - train_credences = reporter(train_h) - maybe_prompt_index = ( - {} if prompt_index is None else {"prompt_index": prompt_index} - ) - for mode in ("none", "partial", "full"): - row_bufs["eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_credences, mode).to_dict(), - "train_loss": train_loss, - **maybe_prompt_index, - } - ) - - row_bufs["train_eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(train_gt, train_credences, mode).to_dict(), - "train_loss": train_loss, - **maybe_prompt_index, - } - ) - - if val_lm_preds is not None: - row_bufs["lm_eval"].append( + def eval_all( + reporter: AnyReporter | MultiReporter, + prompt_index: int | Literal["multi"], + ): + val_credences = reporter(val_h) + train_credences = reporter(train_h) + prompt_index = {"prompt_index": prompt_index} + for mode in ("none", "partial", "full"): + row_bufs["eval"].append( { **meta, "ensembling": mode, - **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), - **maybe_prompt_index, + **evaluate_preds(val_gt, val_credences, mode).to_dict(), + "train_loss": train_loss, + **prompt_index, } ) - if train_lm_preds is not None: - row_bufs["train_lm_eval"].append( + row_bufs["train_eval"].append( { **meta, "ensembling": mode, - **evaluate_preds(train_gt, train_lm_preds, mode).to_dict(), - **maybe_prompt_index, + **evaluate_preds(train_gt, train_credences, mode).to_dict(), + "train_loss": train_loss, + **prompt_index, } ) - for i, model in enumerate(lr_models): - row_bufs["lr_eval"].append( - { - **meta, - "ensembling": mode, - "inlp_iter": i, - **evaluate_preds(val_gt, model(val_h), mode).to_dict(), - **maybe_prompt_index, - } - ) + if val_lm_preds is not None: + row_bufs["lm_eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), + **prompt_index, + } + ) + + if train_lm_preds is not None: + row_bufs["train_lm_eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds( + train_gt, train_lm_preds, mode + ).to_dict(), + **prompt_index, + } + ) + + for i, model in enumerate(lr_models): + row_bufs["lr_eval"].append( + { + **meta, + "ensembling": mode, + "inlp_iter": i, + **evaluate_preds(val_gt, model(val_h), mode).to_dict(), + **prompt_index, + } + ) + + if isinstance(reporter, MultiReporter): + for prompt_index, reporter_result in enumerate( + reporter.reporter_results + ): + eval_all(reporter_result.reporter, prompt_index) + + eval_all(reporter, "multi") return {k: pd.DataFrame(v) for k, v in row_bufs.items()} @@ -261,7 +289,7 @@ def apply_to_layer( for i in range(v) # v is number of variants ] - res = [] + results = [] for i, train_dict in enumerate(train_dicts): reporters_path = self.out_dir / str(i) / "reporters" lr_path = self.out_dir / str(i) / "lr_models" @@ -269,24 +297,23 @@ def apply_to_layer( reporter_train_result = self.train_and_save_reporter( device, layer, reporters_path, train_dict ) - - reporter = reporter_train_result.reporter - train_loss = reporter_train_result.train_loss + results.append(reporter_train_result) lr_models = self.train_lr_model(train_dict, device, layer, lr_path) - res.append( - self.evaluate_and_save( - train_loss, - reporter, - train_dict, - val_dict, - lr_models, - layer, - prompt_index=i, - ) + multi_reporter = MultiReporter(results) + train_loss = multi_reporter.train_loss + + return [ + self.evaluate_and_save( + train_loss, + multi_reporter, + train_dict, + val_dict, + lr_models, # TODO I don't care about this right now but + layer, ) - return res + ] else: reporter_train_result = self.train_and_save_reporter( device, layer, self.out_dir / "reporters", train_dict From 01d5baadebe4247cd00dac0c4bd1e4c052f515b5 Mon Sep 17 00:00:00 2001 From: jon Date: Tue, 18 Jul 2023 15:35:54 +0100 Subject: [PATCH 057/102] undo list --- elk/run.py | 16 ++-- elk/training/train.py | 207 +++++++++++++++++++++--------------------- 2 files changed, 110 insertions(+), 113 deletions(-) diff --git a/elk/run.py b/elk/run.py index fd7235698..0084ad31f 100644 --- a/elk/run.py +++ b/elk/run.py @@ -102,7 +102,7 @@ def execute( devices = select_usable_devices(self.num_gpus, min_memory=self.min_gpu_mem) num_devices = len(devices) - func: Callable[[int], list[dict[str, pd.DataFrame]]] = partial( + func: Callable[[int], dict[str, pd.DataFrame]] = partial( self.apply_to_layer, devices=devices, world_size=num_devices, @@ -113,7 +113,7 @@ def execute( @abstractmethod def apply_to_layer( self, layer: int, devices: list[str], world_size: int, probe_per_prompt: bool - ) -> list[dict[str, pd.DataFrame]]: + ) -> dict[str, pd.DataFrame]: """Train or eval a reporter on a single layer.""" def make_reproducible(self, seed: int): @@ -162,7 +162,7 @@ def concatenate(self, layers): def apply_to_layers( self, - func: Callable[[int], list[dict[str, pd.DataFrame]]], + func: Callable[[int], dict[str, pd.DataFrame]], num_devices: int, ): """Apply a function to each layer of the datasets in parallel @@ -187,17 +187,17 @@ def apply_to_layers( df_buffers = defaultdict(list) try: - for df_dicts in tqdm(mapper(func, layers), total=len(layers)): - for df_dict in df_dicts: - for k, v in df_dict.items(): - df_buffers[k].append(v) + for df_dict in tqdm(mapper(func, layers), total=len(layers)): + for k, v in df_dict.items(): + df_buffers[k].append(v) finally: # Make sure the CSVs are written even if we crash or get interrupted for name, dfs in df_buffers.items(): sortby = ["layer", "ensembling"] if "prompt_index" in dfs[0].columns: sortby.append("prompt_index") - # TODO make the prompt index third col + # make the prompt index third col + df = pd.concat(dfs).sort_values(by=sortby) out_path = self.out_dir / f"{name}.csv" df.round(4).to_csv(out_path, index=False) diff --git a/elk/training/train.py b/elk/training/train.py index 277567c47..69f651b28 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -33,115 +33,117 @@ class MultiReporter: def __init__(self, reporter_results: list[ReporterTrainResult]): self.reporter_results: list[ReporterTrainResult] = reporter_results self.reporters = [r.reporter for r in reporter_results] - train_losses = [r.train_loss for r in reporter_results] - self.train_loss = ( - None if train_losses[0] is None else sum(train_losses) / len(train_losses) - ) + train_losses = [r.train_loss for r in reporter_results] if reporter_results[ + 0].train_loss \ + is not None else None + self.train_loss = sum(train_losses) / len( + train_losses + ) if train_losses is not None else None def __call__(self, h): credences = [r(h) for r in self.reporters] return torch.stack(credences).mean(dim=0) -@dataclass -class Elicit(Run): - """Full specification of a reporter training run.""" +def evaluate_and_save( + train_loss, + reporter: AnyReporter | MultiReporter, + train_dict, + val_dict, + lr_models, + layer, +): + row_bufs = defaultdict(list) + for ds_name in val_dict: + val_h, val_gt, val_lm_preds = val_dict[ds_name] + train_h, train_gt, train_lm_preds = train_dict[ds_name] + meta = {"dataset": ds_name, "layer": layer} + + def eval_all( + reporter: AnyReporter | MultiReporter, + prompt_index: int | Literal["multi"], + ): + val_credences = reporter(val_h) + train_credences = reporter(train_h) + prompt_index = {"prompt_index": prompt_index} + for mode in ("none", "partial", "full"): + row_bufs["eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(val_gt, val_credences, mode).to_dict(), + "train_loss": train_loss, + **prompt_index, + } + ) - net: FitterConfig = subgroups( - {"ccs": CcsConfig, "eigen": EigenFitterConfig}, default="eigen" - ) - """Config for building the reporter network.""" + row_bufs["train_eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(train_gt, train_credences, mode).to_dict(), + "train_loss": train_loss, + **prompt_index, + } + ) - supervised: Literal["none", "single", "inlp", "cv"] = "single" - """Whether to train a supervised classifier, and if so, whether to use - cross-validation. Defaults to "single", which means to train a single classifier - on the training data. "cv" means to use cross-validation.""" + if val_lm_preds is not None: + row_bufs["lm_eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), + **prompt_index, + } + ) - def evaluate_and_save( - self, - train_loss, - reporter: AnyReporter | MultiReporter, - train_dict, - val_dict, - lr_models, - layer, - ): - row_bufs = defaultdict(list) - for ds_name in val_dict: - val_h, val_gt, val_lm_preds = val_dict[ds_name] - train_h, train_gt, train_lm_preds = train_dict[ds_name] - meta = {"dataset": ds_name, "layer": layer} - - def eval_all( - reporter: AnyReporter | MultiReporter, - prompt_index: int | Literal["multi"], - ): - val_credences = reporter(val_h) - train_credences = reporter(train_h) - prompt_index = {"prompt_index": prompt_index} - for mode in ("none", "partial", "full"): - row_bufs["eval"].append( + if train_lm_preds is not None: + row_bufs["train_lm_eval"].append( { **meta, "ensembling": mode, - **evaluate_preds(val_gt, val_credences, mode).to_dict(), - "train_loss": train_loss, + **evaluate_preds( + train_gt, train_lm_preds, mode + ).to_dict(), **prompt_index, } ) - row_bufs["train_eval"].append( + for i, model in enumerate(lr_models): + row_bufs["lr_eval"].append( { **meta, "ensembling": mode, - **evaluate_preds(train_gt, train_credences, mode).to_dict(), - "train_loss": train_loss, + "inlp_iter": i, + **evaluate_preds(val_gt, model(val_h), mode).to_dict(), **prompt_index, } ) - if val_lm_preds is not None: - row_bufs["lm_eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), - **prompt_index, - } - ) - - if train_lm_preds is not None: - row_bufs["train_lm_eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds( - train_gt, train_lm_preds, mode - ).to_dict(), - **prompt_index, - } - ) - - for i, model in enumerate(lr_models): - row_bufs["lr_eval"].append( - { - **meta, - "ensembling": mode, - "inlp_iter": i, - **evaluate_preds(val_gt, model(val_h), mode).to_dict(), - **prompt_index, - } - ) - - if isinstance(reporter, MultiReporter): - for prompt_index, reporter_result in enumerate( - reporter.reporter_results - ): - eval_all(reporter_result.reporter, prompt_index) - - eval_all(reporter, "multi") - - return {k: pd.DataFrame(v) for k, v in row_bufs.items()} + if isinstance(reporter, MultiReporter): + for prompt_index, reporter_result in enumerate( + reporter.reporter_results + ): + eval_all(reporter_result.reporter, prompt_index) + + eval_all(reporter, "multi") + + return {k: pd.DataFrame(v) for k, v in row_bufs.items()} + + +@dataclass +class Elicit(Run): + """Full specification of a reporter training run.""" + + net: FitterConfig = subgroups( + {"ccs": CcsConfig, "eigen": EigenFitterConfig}, default="eigen" + ) + """Config for building the reporter network.""" + + supervised: Literal["none", "single", "inlp", "cv"] = "single" + """Whether to train a supervised classifier, and if so, whether to use + cross-validation. Defaults to "single", which means to train a single classifier + on the training data. "cv" means to use cross-validation.""" def create_models_dir(self, out_dir: Path): lr_dir = None @@ -258,7 +260,7 @@ def apply_to_layer( devices: list[str], world_size: int, probe_per_prompt: bool, - ) -> list[dict[str, pd.DataFrame]]: + ) -> dict[str, pd.DataFrame]: """Train a single reporter on a single layer.""" assert self.out_dir is not None # TODO this is really annoying, why can it be # None? @@ -275,14 +277,13 @@ def apply_to_layer( # TODO is this even needed # reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) - probe_per_prompt = True if probe_per_prompt: train_dicts = [ { ds_name: ( - train_h[:, i : i + 1, ...], + train_h[:, i: i + 1, ...], train_gt, - lm_preds[:, i : i + 1, ...] if lm_preds is not None else None, + lm_preds[:, i: i + 1, ...] if lm_preds is not None else None, ) } for ds_name, (train_h, _, lm_preds) in train_dict.items() @@ -304,16 +305,14 @@ def apply_to_layer( multi_reporter = MultiReporter(results) train_loss = multi_reporter.train_loss - return [ - self.evaluate_and_save( - train_loss, - multi_reporter, - train_dict, - val_dict, - lr_models, # TODO I don't care about this right now but - layer, - ) - ] + return evaluate_and_save( + train_loss, + multi_reporter, + train_dict, + val_dict, + lr_models, # TODO I don't care about this right now but + layer, + ) else: reporter_train_result = self.train_and_save_reporter( device, layer, self.out_dir / "reporters", train_dict @@ -326,8 +325,6 @@ def apply_to_layer( train_dict, device, layer, self.out_dir / "lr_models" ) - return [ - self.evaluate_and_save( - train_loss, reporter, train_dict, val_dict, lr_models, layer - ) - ] + return evaluate_and_save( + train_loss, reporter, train_dict, val_dict, lr_models, layer + ) From 7701c291fda586b5263096ad5f514992105a3256 Mon Sep 17 00:00:00 2001 From: jon Date: Tue, 18 Jul 2023 16:12:02 +0100 Subject: [PATCH 058/102] add more types and sorting --- elk/run.py | 13 ++++++-- elk/training/train.py | 72 ++++++++++++++++++++----------------------- 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/elk/run.py b/elk/run.py index 0084ad31f..07eb45df9 100644 --- a/elk/run.py +++ b/elk/run.py @@ -30,6 +30,8 @@ select_usable_devices, ) +PreparedData = dict[str, tuple[Tensor, Tensor, Tensor | None]] + @dataclass class Run(ABC, Serializable): @@ -132,7 +134,7 @@ def get_device(self, devices, world_size: int) -> str: def prepare_data( self, device: str, layer: int, split_type: Literal["train", "val"] - ) -> dict[str, tuple[Tensor, Tensor, Tensor | None]]: + ) -> PreparedData: """Prepare data for the specified layer and split type.""" out = {} @@ -196,9 +198,14 @@ def apply_to_layers( sortby = ["layer", "ensembling"] if "prompt_index" in dfs[0].columns: sortby.append("prompt_index") - # make the prompt index third col - df = pd.concat(dfs).sort_values(by=sortby) + + # Move prompt_index to the 2'th column + cols = list(df.columns) + cols.insert(2, cols.pop(cols.index("prompt_index"))) + df = df.reindex(columns=cols) + + # Save the CSV out_path = self.out_dir / f"{name}.csv" df.round(4).to_csv(out_path, index=False) if self.debug: diff --git a/elk/training/train.py b/elk/training/train.py index 69f651b28..be8402238 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -13,13 +13,13 @@ from ..evaluation import Eval from ..metrics import evaluate_preds, to_one_hot -from ..run import Run +from ..run import PreparedData, Run from ..training.supervised import train_supervised +from . import Classifier from .ccs_reporter import CcsConfig, CcsReporter from .common import FitterConfig, Reporter from .eigen_reporter import EigenFitter, EigenFitterConfig -# declare AnyReporter as CcsReporter | Reporter type alias AnyReporter = CcsReporter | Reporter @@ -33,12 +33,14 @@ class MultiReporter: def __init__(self, reporter_results: list[ReporterTrainResult]): self.reporter_results: list[ReporterTrainResult] = reporter_results self.reporters = [r.reporter for r in reporter_results] - train_losses = [r.train_loss for r in reporter_results] if reporter_results[ - 0].train_loss \ - is not None else None - self.train_loss = sum(train_losses) / len( - train_losses - ) if train_losses is not None else None + train_losses = ( + [r.train_loss for r in reporter_results] + if reporter_results[0].train_loss is not None + else None + ) + self.train_loss = ( + sum(train_losses) / len(train_losses) if train_losses is not None else None + ) def __call__(self, h): credences = [r(h) for r in self.reporters] @@ -46,12 +48,12 @@ def __call__(self, h): def evaluate_and_save( - train_loss, + train_loss: float | None, reporter: AnyReporter | MultiReporter, - train_dict, - val_dict, - lr_models, - layer, + train_dict: PreparedData, + val_dict: PreparedData, + lr_models: list[Classifier], + layer: int, ): row_bufs = defaultdict(list) for ds_name in val_dict: @@ -102,9 +104,7 @@ def eval_all( { **meta, "ensembling": mode, - **evaluate_preds( - train_gt, train_lm_preds, mode - ).to_dict(), + **evaluate_preds(train_gt, train_lm_preds, mode).to_dict(), **prompt_index, } ) @@ -121,9 +121,7 @@ def eval_all( ) if isinstance(reporter, MultiReporter): - for prompt_index, reporter_result in enumerate( - reporter.reporter_results - ): + for prompt_index, reporter_result in enumerate(reporter.reporter_results): eval_all(reporter_result.reporter, prompt_index) eval_all(reporter, "multi") @@ -238,7 +236,7 @@ def train_and_save_reporter( return ReporterTrainResult(reporter, train_loss) - def train_lr_model(self, train_dict, device, layer, out_dir): + def train_lr_model(self, train_dict, device, layer, out_dir) -> list[Classifier]: if self.supervised != "none": lr_models = train_supervised( train_dict, @@ -281,9 +279,9 @@ def apply_to_layer( train_dicts = [ { ds_name: ( - train_h[:, i: i + 1, ...], + train_h[:, i : i + 1, ...], train_gt, - lm_preds[:, i: i + 1, ...] if lm_preds is not None else None, + lm_preds[:, i : i + 1, ...] if lm_preds is not None else None, ) } for ds_name, (train_h, _, lm_preds) in train_dict.items() @@ -292,8 +290,12 @@ def apply_to_layer( results = [] for i, train_dict in enumerate(train_dicts): - reporters_path = self.out_dir / str(i) / "reporters" - lr_path = self.out_dir / str(i) / "lr_models" + # format i as a 2 digit string, assumes that there will never be more + # than 100 prompts + str_i = str(i).zfill(2) + base = self.out_dir / "reporters" / f"prompt_{str_i}" + reporters_path = base / "reporters" + lr_path = base / "lr_models" reporter_train_result = self.train_and_save_reporter( device, layer, reporters_path, train_dict @@ -302,29 +304,23 @@ def apply_to_layer( lr_models = self.train_lr_model(train_dict, device, layer, lr_path) - multi_reporter = MultiReporter(results) - train_loss = multi_reporter.train_loss + maybe_multi_reporter = MultiReporter(results) + train_loss = maybe_multi_reporter.train_loss + + # TODO fix lr_models - return evaluate_and_save( - train_loss, - multi_reporter, - train_dict, - val_dict, - lr_models, # TODO I don't care about this right now but - layer, - ) else: reporter_train_result = self.train_and_save_reporter( device, layer, self.out_dir / "reporters", train_dict ) - reporter = reporter_train_result.reporter + maybe_multi_reporter = reporter_train_result.reporter train_loss = reporter_train_result.train_loss lr_models = self.train_lr_model( train_dict, device, layer, self.out_dir / "lr_models" ) - return evaluate_and_save( - train_loss, reporter, train_dict, val_dict, lr_models, layer - ) + return evaluate_and_save( + train_loss, maybe_multi_reporter, train_dict, val_dict, lr_models, layer + ) From 4310def1352b2464e0a4b126c2a95e3c830d7ad7 Mon Sep 17 00:00:00 2001 From: jon Date: Tue, 18 Jul 2023 16:16:22 +0100 Subject: [PATCH 059/102] weird duplicate arg --- elk/run.py | 1 - 1 file changed, 1 deletion(-) diff --git a/elk/run.py b/elk/run.py index 07eb45df9..7e48cfa2d 100644 --- a/elk/run.py +++ b/elk/run.py @@ -56,7 +56,6 @@ class Run(ABC, Serializable): debug: bool = False min_gpu_mem: int | None = None # in bytes num_gpus: int = -1 - out_dir: Path | None = None disable_cache: bool = field(default=False, to_dict=False) def execute( From 602815216380de2b465ed7c130c121edd271e432 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Tue, 18 Jul 2023 15:36:26 +0000 Subject: [PATCH 060/102] fix num_classes --- elk/metrics/eval.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 6c6ee3c8b..7363f7c3f 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -141,15 +141,23 @@ def evaluate_preds( Returns: dict: A dictionary containing the accuracy, AUROC, and ECE. """ + y_logits, y_true, num_classes = prepare(y_logits, y_true, prompt_ensembling) + return calc_eval_results(y_true, y_logits, prompt_ensembling, num_classes) + + +def prepare(y_logits: Tensor, y_true: Tensor, prompt_ensembling: PromptEnsembling): + """ + Prepare the logits and ground truth for evaluation + """ (n, num_variants, num_classes) = y_logits.shape - assert y_true.shape == (n,) + assert y_true.shape == (n,), f"y_true.shape: {y_true.shape} is not equal to n: {n}" if prompt_ensembling == PromptEnsembling.FULL: y_logits = y_logits.mean(dim=1) else: y_true = repeat(y_true, "n -> n v", v=num_variants) - return calc_eval_results(y_true, y_logits, prompt_ensembling, num_classes) + return y_logits, y_true, num_classes def calc_eval_results( y_true: Tensor, @@ -210,17 +218,17 @@ def layer_ensembling( calibrated accuracies, calibrated errors, and AUROC. """ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - y_logits_means = [] + y_logits_collection = [] y_true = layer_outputs[0][0]["val_gt"].to(device) for layer_output in layer_outputs: y_logits = layer_output[0]["val_credences"].to(device) - y_logits_means.append(y_logits.mean(dim=1)) # full prompt_ensembling + y_logits, y_true, num_classes = prepare(y_logits, y_true, prompt_ensembling) + y_logits_collection.append(y_logits) - num_classes = layer_outputs[0][0]["val_credences"].shape[2] # get logits and ground_truth from middle to last layer middle_index = len(layer_outputs) // 2 - y_logits_stacked = torch.stack(y_logits_means[middle_index:]) + y_logits_stacked = torch.stack(y_logits_collection[middle_index:]) # layer prompt_ensembling of the stacked logits y_logits_stacked_mean = torch.mean(y_logits_stacked, dim=0) From 6d7d99a90fcdb4f3898ef14681bb25af1e394972 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Jul 2023 15:39:21 +0000 Subject: [PATCH 061/102] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/metrics/eval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 7363f7c3f..2159686bd 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -159,6 +159,7 @@ def prepare(y_logits: Tensor, y_true: Tensor, prompt_ensembling: PromptEnsemblin return y_logits, y_true, num_classes + def calc_eval_results( y_true: Tensor, y_logits: Tensor, From 4148857b89b965c64cc16ea8f7926fbdeaa545de Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Tue, 18 Jul 2023 17:38:02 +0000 Subject: [PATCH 062/102] fix bug where y_true has a dimension of two --- elk/metrics/eval.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 7363f7c3f..61aac802c 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -219,11 +219,14 @@ def layer_ensembling( """ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") y_logits_collection = [] - y_true = layer_outputs[0][0]["val_gt"].to(device) for layer_output in layer_outputs: + # all y_trues are identical, so just get the first + y_true = layer_outputs[0][0]["val_gt"].to(device) y_logits = layer_output[0]["val_credences"].to(device) - y_logits, y_true, num_classes = prepare(y_logits, y_true, prompt_ensembling) + y_logits, y_true, num_classes = prepare(y_logits=y_logits, + y_true=y_true, + prompt_ensembling=prompt_ensembling) y_logits_collection.append(y_logits) # get logits and ground_truth from middle to last layer From 964f03ddd2294c04c3f45e722ecdc98452d28ffd Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Tue, 18 Jul 2023 17:38:17 +0000 Subject: [PATCH 063/102] cleanup --- elk/evaluation/evaluate.py | 6 +++--- elk/run.py | 3 ++- elk/training/train.py | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 7325d23fb..ce6d7a734 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -44,12 +44,12 @@ def apply_to_layer( row_bufs = defaultdict(list) - layer_outputs = [] + layer_output = [] for ds_name, (val_h, val_gt, _) in val_output.items(): meta = {"dataset": ds_name, "layer": layer} val_credences = reporter(val_h) - layer_outputs.append( + layer_output.append( {**meta, "val_gt": val_gt, "val_credences": val_credences} ) for prompt_ensembling in PromptEnsembling.all(): @@ -83,4 +83,4 @@ def apply_to_layer( } ) - return ({k: pd.DataFrame(v) for k, v in row_bufs.items()}, layer_outputs) + return ({k: pd.DataFrame(v) for k, v in row_bufs.items()}, layer_output) diff --git a/elk/run.py b/elk/run.py index 594bca4cb..7d1117b1f 100644 --- a/elk/run.py +++ b/elk/run.py @@ -201,7 +201,8 @@ def apply_to_layers( for prompt_ensembling in PromptEnsembling.all(): layer_ensembling_results = layer_ensembling( - layer_outputs, prompt_ensembling + layer_outputs=layer_outputs, + prompt_ensembling=prompt_ensembling ) df = pd.DataFrame(layer_ensembling_results.to_dict(), index=[0]) df = df.round(4) diff --git a/elk/training/train.py b/elk/training/train.py index fa6138869..5963212df 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -131,7 +131,7 @@ def apply_to_layer( lr_models = [] row_bufs = defaultdict(list) - layer_outputs = [] + layer_output = [] for ds_name in val_dict: val_h, val_gt, val_lm_preds = val_dict[ds_name] train_h, train_gt, train_lm_preds = train_dict[ds_name] @@ -139,7 +139,7 @@ def apply_to_layer( val_credences = reporter(val_h) - layer_outputs.append( + layer_output.append( { **meta, "val_gt": val_gt.detach(), @@ -205,4 +205,4 @@ def apply_to_layer( } ) - return ({k: pd.DataFrame(v) for k, v in row_bufs.items()}, layer_outputs) + return ({k: pd.DataFrame(v) for k, v in row_bufs.items()}, layer_output) From 06dad69455e950cd4c629b5ec4ff648bf95a892f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Jul 2023 17:39:46 +0000 Subject: [PATCH 064/102] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/metrics/eval.py | 6 +++--- elk/run.py | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index b3f9463af..5e7a232ba 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -225,9 +225,9 @@ def layer_ensembling( # all y_trues are identical, so just get the first y_true = layer_outputs[0][0]["val_gt"].to(device) y_logits = layer_output[0]["val_credences"].to(device) - y_logits, y_true, num_classes = prepare(y_logits=y_logits, - y_true=y_true, - prompt_ensembling=prompt_ensembling) + y_logits, y_true, num_classes = prepare( + y_logits=y_logits, y_true=y_true, prompt_ensembling=prompt_ensembling + ) y_logits_collection.append(y_logits) # get logits and ground_truth from middle to last layer diff --git a/elk/run.py b/elk/run.py index 7d1117b1f..74f3da69d 100644 --- a/elk/run.py +++ b/elk/run.py @@ -201,8 +201,7 @@ def apply_to_layers( for prompt_ensembling in PromptEnsembling.all(): layer_ensembling_results = layer_ensembling( - layer_outputs=layer_outputs, - prompt_ensembling=prompt_ensembling + layer_outputs=layer_outputs, prompt_ensembling=prompt_ensembling ) df = pd.DataFrame(layer_ensembling_results.to_dict(), index=[0]) df = df.round(4) From 0d2545b45cd07469000f9c5d7c8b94748b64e6ed Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Tue, 18 Jul 2023 17:52:53 +0000 Subject: [PATCH 065/102] add y_true_initial --- elk/metrics/eval.py | 12 ++++++++---- elk/run.py | 3 +-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index b3f9463af..f4cdc392f 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -221,13 +221,17 @@ def layer_ensembling( device = torch.device("cuda" if torch.cuda.is_available() else "cpu") y_logits_collection = [] + num_classes = None + y_true = None + for layer_output in layer_outputs: # all y_trues are identical, so just get the first - y_true = layer_outputs[0][0]["val_gt"].to(device) y_logits = layer_output[0]["val_credences"].to(device) - y_logits, y_true, num_classes = prepare(y_logits=y_logits, - y_true=y_true, - prompt_ensembling=prompt_ensembling) + y_logits, y_true, num_classes = prepare( + y_logits=y_logits, + y_true=layer_outputs[0][0]["val_gt"].to(device), + prompt_ensembling=prompt_ensembling, + ) y_logits_collection.append(y_logits) # get logits and ground_truth from middle to last layer diff --git a/elk/run.py b/elk/run.py index 7d1117b1f..74f3da69d 100644 --- a/elk/run.py +++ b/elk/run.py @@ -201,8 +201,7 @@ def apply_to_layers( for prompt_ensembling in PromptEnsembling.all(): layer_ensembling_results = layer_ensembling( - layer_outputs=layer_outputs, - prompt_ensembling=prompt_ensembling + layer_outputs=layer_outputs, prompt_ensembling=prompt_ensembling ) df = pd.DataFrame(layer_ensembling_results.to_dict(), index=[0]) df = df.round(4) From 5952b4b5ac1b4cb751c8cab46881b572ba2e36d5 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Tue, 18 Jul 2023 18:50:33 +0000 Subject: [PATCH 066/102] fix test error --- elk/metrics/eval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index f4cdc392f..0a7cd0b3a 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -221,8 +221,8 @@ def layer_ensembling( device = torch.device("cuda" if torch.cuda.is_available() else "cpu") y_logits_collection = [] - num_classes = None - y_true = None + num_classes = 2 + y_true = layer_outputs[0][0]["val_gt"].to(device) for layer_output in layer_outputs: # all y_trues are identical, so just get the first From 96a3dabc783d0a3f803bdadaebc59deb880148b6 Mon Sep 17 00:00:00 2001 From: jon Date: Wed, 19 Jul 2023 10:44:09 +0100 Subject: [PATCH 067/102] resolved circular import --- elk/evaluation/evaluate.py | 94 +++++++++++++++++++++++----------- elk/training/multi_reporter.py | 44 ++++++++++++++++ elk/training/train.py | 29 +---------- 3 files changed, 109 insertions(+), 58 deletions(-) create mode 100644 elk/training/multi_reporter.py diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 8462cc005..dc2d2f80b 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -1,6 +1,7 @@ from collections import defaultdict from dataclasses import dataclass from pathlib import Path +from typing import Literal import pandas as pd import torch @@ -9,6 +10,7 @@ from ..files import elk_reporter_dir from ..metrics import evaluate_preds from ..run import Run +from ..training.multi_reporter import AnyReporter, MultiReporter from ..utils import Color @@ -38,39 +40,69 @@ def apply_to_layer( experiment_dir = elk_reporter_dir() / self.source - reporter_path = experiment_dir / "reporters" / f"layer_{layer}.pt" - reporter = torch.load(reporter_path, map_location=device) + def load_reporter() -> AnyReporter | MultiReporter: + # check if experiment_dir / "reporters" has .pt files + first = next((experiment_dir / "reporters").iterdir()) + if not first.suffix == ".pt": + return MultiReporter.load( + experiment_dir / "reporters", layer, device=device + ) + else: + path = experiment_dir / "reporters" / f"layer_{layer}.pt" + return torch.load(path, map_location=device) + + reporter = load_reporter() row_bufs = defaultdict(list) - for ds_name, (val_h, val_gt, _) in val_output.items(): - meta = {"dataset": ds_name, "layer": layer} - - val_credences = reporter(val_h) - for mode in ("none", "partial", "full"): - row_bufs["eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_credences, mode).to_dict(), - } - ) - lr_dir = experiment_dir / "lr_models" - if not self.skip_supervised and lr_dir.exists(): - with open(lr_dir / f"layer_{layer}.pt", "rb") as f: - lr_models = torch.load(f, map_location=device) - if not isinstance(lr_models, list): # backward compatibility - lr_models = [lr_models] - - for i, model in enumerate(lr_models): - model.eval() - row_bufs["lr_eval"].append( - { - "ensembling": mode, - "inlp_iter": i, - **meta, - **evaluate_preds(val_gt, model(val_h), mode).to_dict(), - } - ) + def eval_all( + reporter: AnyReporter | MultiReporter, + prompt_index: int | Literal["multi"] | None = None, + ): + prompt_index = ( + {"prompt_index": prompt_index} if prompt_index is not None else {} + ) + for ds_name, (val_h, val_gt, _) in val_output.items(): + meta = {"dataset": ds_name, "layer": layer} + + val_credences = reporter(val_h) + for mode in ("none", "partial", "full"): + row_bufs["eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(val_gt, val_credences, mode).to_dict(), + **prompt_index, + } + ) + + lr_dir = experiment_dir / "lr_models" + if not self.skip_supervised and lr_dir.exists(): + with open(lr_dir / f"layer_{layer}.pt", "rb") as f: + lr_models = torch.load(f, map_location=device) + if not isinstance( + lr_models, list + ): # backward compatibility + lr_models = [lr_models] + + for i, model in enumerate(lr_models): + model.eval() + row_bufs["lr_eval"].append( + { + "ensembling": mode, + "inlp_iter": i, + **meta, + **evaluate_preds( + val_gt, model(val_h), mode + ).to_dict(), + } + ) + + if isinstance(reporter, MultiReporter): + for prompt_index, single_reporter in enumerate(reporter.reporters): + eval_all(single_reporter, prompt_index) + eval_all(reporter, "multi") + else: + eval_all(reporter) return {k: pd.DataFrame(v) for k, v in row_bufs.items()} diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py new file mode 100644 index 000000000..8d599f87a --- /dev/null +++ b/elk/training/multi_reporter.py @@ -0,0 +1,44 @@ +from dataclasses import dataclass +from pathlib import Path + +import torch as t + +from elk.training import CcsReporter +from elk.training.common import Reporter + +AnyReporter = CcsReporter | Reporter + + +@dataclass +class ReporterTrainResult: + reporter: AnyReporter + train_loss: float | None + + +class MultiReporter: + def __init__(self, reporter_results: list[ReporterTrainResult]): + self.reporter_results: list[ReporterTrainResult] = reporter_results + self.reporters = [r.reporter for r in reporter_results] + train_losses = ( + [r.train_loss for r in reporter_results] + if reporter_results[0].train_loss is not None + else None + ) + self.train_loss = ( + sum(train_losses) / len(train_losses) if train_losses is not None else None + ) + + def __call__(self, h): + credences = [r(h) for r in self.reporters] + return t.stack(credences).mean(dim=0) + + @staticmethod + def load(path: Path, layer: int, device: str): + prompt_folders = [p for p in path.iterdir() if p.is_dir()] + reporters = [] + for folder in prompt_folders: + path = folder / "reporters" / f"layer_{layer}.pt" + reporter = t.load(path, map_location=device) + reporters.append(reporter) + # TODO for now I don't care about the train losses + return MultiReporter([ReporterTrainResult(r, None) for r in reporters]) diff --git a/elk/training/train.py b/elk/training/train.py index be8402238..6df85ef1a 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -17,34 +17,9 @@ from ..training.supervised import train_supervised from . import Classifier from .ccs_reporter import CcsConfig, CcsReporter -from .common import FitterConfig, Reporter +from .common import FitterConfig from .eigen_reporter import EigenFitter, EigenFitterConfig - -AnyReporter = CcsReporter | Reporter - - -@dataclass -class ReporterTrainResult: - reporter: AnyReporter - train_loss: float | None - - -class MultiReporter: - def __init__(self, reporter_results: list[ReporterTrainResult]): - self.reporter_results: list[ReporterTrainResult] = reporter_results - self.reporters = [r.reporter for r in reporter_results] - train_losses = ( - [r.train_loss for r in reporter_results] - if reporter_results[0].train_loss is not None - else None - ) - self.train_loss = ( - sum(train_losses) / len(train_losses) if train_losses is not None else None - ) - - def __call__(self, h): - credences = [r(h) for r in self.reporters] - return torch.stack(credences).mean(dim=0) +from .multi_reporter import AnyReporter, MultiReporter, ReporterTrainResult def evaluate_and_save( From 9c2def0df61dd966a5901df29a5e2c1519c6673d Mon Sep 17 00:00:00 2001 From: jon Date: Wed, 19 Jul 2023 20:42:49 +0100 Subject: [PATCH 068/102] fixed index passing --- elk/evaluation/evaluate.py | 9 +++++ elk/run.py | 2 - elk/training/multi_reporter.py | 1 + elk/training/train.py | 70 +++++++++++++++++++++++----------- 4 files changed, 58 insertions(+), 24 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index dc2d2f80b..18485f36e 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -38,6 +38,15 @@ def apply_to_layer( device = self.get_device(devices, world_size) val_output = self.prepare_data(device, layer, "val") + val_output = { + ds_name: ( + train_h[:, self.prompt_indices, ...], + train_gt, + lm_preds[:, self.prompt_indices, ...] if lm_preds is not None else None, + ) + for ds_name, (train_h, train_gt, lm_preds) in val_output.items() + } + experiment_dir = elk_reporter_dir() / self.source def load_reporter() -> AnyReporter | MultiReporter: diff --git a/elk/run.py b/elk/run.py index 7e48cfa2d..343360a3b 100644 --- a/elk/run.py +++ b/elk/run.py @@ -143,8 +143,6 @@ def prepare_data( split = ds[key].with_format("torch", device=device, dtype=torch.int16) labels = assert_type(Tensor, split["label"]) hiddens = int16_to_float32(assert_type(Tensor, split[f"hidden_{layer}"])) - if self.prompt_indices: - hiddens = hiddens[:, self.prompt_indices] with split.formatted_as("torch", device=device): has_preds = "model_logits" in split.features diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index 8d599f87a..7b75a5bd7 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -13,6 +13,7 @@ class ReporterTrainResult: reporter: AnyReporter train_loss: float | None + prompt_index: int | None class MultiReporter: diff --git a/elk/training/train.py b/elk/training/train.py index 6df85ef1a..cdcda76f6 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -38,10 +38,16 @@ def evaluate_and_save( def eval_all( reporter: AnyReporter | MultiReporter, - prompt_index: int | Literal["multi"], + prompt_index: int | Literal["multi"] | None = None, + i: int = 0, ): - val_credences = reporter(val_h) - train_credences = reporter(train_h) + if isinstance(prompt_index, int): + val_credences = reporter(val_h[:, [prompt_index], :, :]) + train_credences = reporter(train_h[:, [prompt_index], :, :]) + else: + # TODO implement diagonal + val_credences = reporter(val_h) + train_credences = reporter(train_h) prompt_index = {"prompt_index": prompt_index} for mode in ("none", "partial", "full"): row_bufs["eval"].append( @@ -96,10 +102,11 @@ def eval_all( ) if isinstance(reporter, MultiReporter): - for prompt_index, reporter_result in enumerate(reporter.reporter_results): - eval_all(reporter_result.reporter, prompt_index) - - eval_all(reporter, "multi") + for reporter_result in reporter.reporter_results: + eval_all(reporter_result.reporter, reporter_result.prompt_index) + eval_all(reporter, prompt_index="multi") + else: + eval_all(reporter, prompt_index=None) return {k: pd.DataFrame(v) for k, v in row_bufs.items()} @@ -154,9 +161,10 @@ def make_eval(self, model, eval_dataset): # Create a separate function to handle the reporter training. def train_and_save_reporter( - self, device, layer, out_dir, train_dict + self, device, layer, out_dir, train_dict, prompt_index=None ) -> ReporterTrainResult: (first_train_h, train_gt, _), *rest = train_dict.values() # TODO can remove? + breakpoint() (_, v, k, d) = first_train_h.shape if not all(other_h.shape[-1] == d for other_h, _, _ in rest): raise ValueError("All datasets must have the same hidden state size") @@ -209,7 +217,7 @@ def train_and_save_reporter( out_dir.mkdir(parents=True, exist_ok=True) torch.save(reporter, out_dir / f"layer_{layer}.pt") - return ReporterTrainResult(reporter, train_loss) + return ReporterTrainResult(reporter, train_loss, prompt_index) def train_lr_model(self, train_dict, device, layer, out_dir) -> list[Classifier]: if self.supervised != "none": @@ -241,7 +249,8 @@ def apply_to_layer( self.make_reproducible(seed=self.net.seed + layer) device = self.get_device(devices, world_size) - train_dict = self.prepare_data(device, layer, "train") + train_dict = self.prepare_data(device, layer, "train") # prepare data no + # longer does anything on prompt indices val_dict = self.prepare_data(device, layer, "val") (first_train_h, train_gt, _), *rest = train_dict.values() @@ -251,33 +260,40 @@ def apply_to_layer( # reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) if probe_per_prompt: - train_dicts = [ + prompt_indices = self.prompt_indices if self.prompt_indices else range(v) + prompt_train_dicts = [ { ds_name: ( - train_h[:, i : i + 1, ...], + train_h[:, [prompt_index], ...], train_gt, - lm_preds[:, i : i + 1, ...] if lm_preds is not None else None, + lm_preds[:, [prompt_index], ...] + if lm_preds is not None + else None, ) } for ds_name, (train_h, _, lm_preds) in train_dict.items() - for i in range(v) # v is number of variants + for prompt_index in prompt_indices # v is number of variants ] results = [] - for i, train_dict in enumerate(train_dicts): - # format i as a 2 digit string, assumes that there will never be more - # than 100 prompts - str_i = str(i).zfill(2) + + for prompt_index, prompt_train_dict in zip( + prompt_indices, prompt_train_dicts + ): + assert prompt_index < 100 # format i as a 2 digit string + str_i = str(prompt_index).zfill(2) base = self.out_dir / "reporters" / f"prompt_{str_i}" reporters_path = base / "reporters" lr_path = base / "lr_models" reporter_train_result = self.train_and_save_reporter( - device, layer, reporters_path, train_dict + device, layer, reporters_path, prompt_train_dict, prompt_index ) results.append(reporter_train_result) - lr_models = self.train_lr_model(train_dict, device, layer, lr_path) + lr_models = self.train_lr_model( + prompt_train_dict, device, layer, lr_path + ) maybe_multi_reporter = MultiReporter(results) train_loss = maybe_multi_reporter.train_loss @@ -285,15 +301,25 @@ def apply_to_layer( # TODO fix lr_models else: + prompt_train_dict = { + ds_name: ( + train_h[:, self.prompt_indices, ...], + train_gt, + lm_preds[:, self.prompt_indices, ...] + if lm_preds is not None + else None, + ) + for ds_name, (train_h, _, lm_preds) in train_dict.items() + } reporter_train_result = self.train_and_save_reporter( - device, layer, self.out_dir / "reporters", train_dict + device, layer, self.out_dir / "reporters", prompt_train_dict ) maybe_multi_reporter = reporter_train_result.reporter train_loss = reporter_train_result.train_loss lr_models = self.train_lr_model( - train_dict, device, layer, self.out_dir / "lr_models" + prompt_train_dict, device, layer, self.out_dir / "lr_models" ) return evaluate_and_save( From 29eeb7f964c2810b9864df300fdff124ac43e1de Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 14:24:06 +0100 Subject: [PATCH 069/102] fixed index passing again --- elk/evaluation/evaluate.py | 11 ++--------- elk/run.py | 13 +++++++++++++ elk/training/multi_reporter.py | 8 ++++++-- elk/training/train.py | 22 +++++++++------------- 4 files changed, 30 insertions(+), 24 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 18485f36e..f5a592e75 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -9,7 +9,7 @@ from ..files import elk_reporter_dir from ..metrics import evaluate_preds -from ..run import Run +from ..run import Run, select_data from ..training.multi_reporter import AnyReporter, MultiReporter from ..utils import Color @@ -38,14 +38,7 @@ def apply_to_layer( device = self.get_device(devices, world_size) val_output = self.prepare_data(device, layer, "val") - val_output = { - ds_name: ( - train_h[:, self.prompt_indices, ...], - train_gt, - lm_preds[:, self.prompt_indices, ...] if lm_preds is not None else None, - ) - for ds_name, (train_h, train_gt, lm_preds) in val_output.items() - } + val_output = select_data(val_output, self.prompt_indices) experiment_dir = elk_reporter_dir() / self.source diff --git a/elk/run.py b/elk/run.py index 343360a3b..cece5d9c0 100644 --- a/elk/run.py +++ b/elk/run.py @@ -33,6 +33,17 @@ PreparedData = dict[str, tuple[Tensor, Tensor, Tensor | None]] +def select_data(prepared_data: PreparedData, prompt_indices: list[int]): + return { + ds_name: ( + train_h[:, prompt_indices, ...], + train_gt, + lm_preds[:, prompt_indices, ...] if lm_preds is not None else None, + ) + for ds_name, (train_h, train_gt, lm_preds) in prepared_data.items() + } + + @dataclass class Run(ABC, Serializable): data: Extract @@ -143,6 +154,8 @@ def prepare_data( split = ds[key].with_format("torch", device=device, dtype=torch.int16) labels = assert_type(Tensor, split["label"]) hiddens = int16_to_float32(assert_type(Tensor, split[f"hidden_{layer}"])) + if self.prompt_indices: + hiddens = hiddens[:, self.prompt_indices, ...] with split.formatted_as("torch", device=device): has_preds = "model_logits" in split.features diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index 7b75a5bd7..0269d1e5c 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -30,8 +30,12 @@ def __init__(self, reporter_results: list[ReporterTrainResult]): ) def __call__(self, h): - credences = [r(h) for r in self.reporters] - return t.stack(credences).mean(dim=0) + num_variants = h.shape[1] + assert len(self.reporters) == num_variants + credences = [] + for i, reporter in enumerate(self.reporters): + credences.append(reporter(h[:, [i], :, :])) + return t.stack(credences, dim=0).mean(dim=0) @staticmethod def load(path: Path, layer: int, device: str): diff --git a/elk/training/train.py b/elk/training/train.py index cdcda76f6..a8e9ce557 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -42,10 +42,9 @@ def eval_all( i: int = 0, ): if isinstance(prompt_index, int): - val_credences = reporter(val_h[:, [prompt_index], :, :]) - train_credences = reporter(train_h[:, [prompt_index], :, :]) + val_credences = reporter(val_h[:, [i], :, :]) + train_credences = reporter(train_h[:, [i], :, :]) else: - # TODO implement diagonal val_credences = reporter(val_h) train_credences = reporter(train_h) prompt_index = {"prompt_index": prompt_index} @@ -90,20 +89,20 @@ def eval_all( } ) - for i, model in enumerate(lr_models): + for lr_model_num, model in enumerate(lr_models): row_bufs["lr_eval"].append( { **meta, "ensembling": mode, - "inlp_iter": i, + "inlp_iter": lr_model_num, **evaluate_preds(val_gt, model(val_h), mode).to_dict(), **prompt_index, } ) if isinstance(reporter, MultiReporter): - for reporter_result in reporter.reporter_results: - eval_all(reporter_result.reporter, reporter_result.prompt_index) + for i, reporter_result in enumerate(reporter.reporter_results): + eval_all(reporter_result.reporter, reporter_result.prompt_index, i) eval_all(reporter, prompt_index="multi") else: eval_all(reporter, prompt_index=None) @@ -164,7 +163,6 @@ def train_and_save_reporter( self, device, layer, out_dir, train_dict, prompt_index=None ) -> ReporterTrainResult: (first_train_h, train_gt, _), *rest = train_dict.values() # TODO can remove? - breakpoint() (_, v, k, d) = first_train_h.shape if not all(other_h.shape[-1] == d for other_h, _, _ in rest): raise ValueError("All datasets must have the same hidden state size") @@ -264,15 +262,13 @@ def apply_to_layer( prompt_train_dicts = [ { ds_name: ( - train_h[:, [prompt_index], ...], + train_h[:, [i], ...], train_gt, - lm_preds[:, [prompt_index], ...] - if lm_preds is not None - else None, + lm_preds[:, [i], ...] if lm_preds is not None else None, ) } for ds_name, (train_h, _, lm_preds) in train_dict.items() - for prompt_index in prompt_indices # v is number of variants + for i, _ in enumerate(prompt_indices) ] results = [] From f533418f9907c8816233d912c3ae47a50a06ee96 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 14:24:57 +0100 Subject: [PATCH 070/102] add assert --- elk/training/multi_reporter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index 0269d1e5c..3a021dc0c 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -18,6 +18,7 @@ class ReporterTrainResult: class MultiReporter: def __init__(self, reporter_results: list[ReporterTrainResult]): + assert len(reporter_results) > 0, "Must have at least one reporter" self.reporter_results: list[ReporterTrainResult] = reporter_results self.reporters = [r.reporter for r in reporter_results] train_losses = ( From 0f5ce0b2a6aacf5f7338c168b13a07ae1b7e53a3 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 14:29:17 +0100 Subject: [PATCH 071/102] fix prompt index in loading --- elk/training/multi_reporter.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index 3a021dc0c..7b2e9508b 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -45,6 +45,7 @@ def load(path: Path, layer: int, device: str): for folder in prompt_folders: path = folder / "reporters" / f"layer_{layer}.pt" reporter = t.load(path, map_location=device) - reporters.append(reporter) + prompt_index = int(folder.name.split("_")[-1]) + reporters.append((reporter, prompt_index)) # TODO for now I don't care about the train losses - return MultiReporter([ReporterTrainResult(r, None) for r in reporters]) + return MultiReporter([ReporterTrainResult(r, None, pi) for r, pi in reporters]) From 327d1eb17e112c79004f6041a5829d7203801628 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 14:38:20 +0100 Subject: [PATCH 072/102] remove redundant method --- elk/training/train.py | 33 ++------------------------------- 1 file changed, 2 insertions(+), 31 deletions(-) diff --git a/elk/training/train.py b/elk/training/train.py index a8e9ce557..741405f33 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -2,14 +2,12 @@ from collections import defaultdict from dataclasses import dataclass, replace -from pathlib import Path from typing import Literal import pandas as pd import torch from einops import rearrange, repeat from simple_parsing import subgroups -from simple_parsing.helpers.serialization import save from ..evaluation import Eval from ..metrics import evaluate_preds, to_one_hot @@ -124,20 +122,6 @@ class Elicit(Run): cross-validation. Defaults to "single", which means to train a single classifier on the training data. "cv" means to use cross-validation.""" - def create_models_dir(self, out_dir: Path): - lr_dir = None - lr_dir = out_dir / "lr_models" - reporter_dir = out_dir / "reporters" - - lr_dir.mkdir(parents=True, exist_ok=True) - reporter_dir.mkdir(parents=True, exist_ok=True) - - # Save the reporter config separately in the reporter directory - # for convenient loading of reporters later. - save(self.net, reporter_dir / "cfg.yaml", save_dc_types=True) - - return reporter_dir, lr_dir - def make_eval(self, model, eval_dataset): assert self.out_dir is not None return Eval( @@ -254,9 +238,6 @@ def apply_to_layer( (first_train_h, train_gt, _), *rest = train_dict.values() (_, v, k, d) = first_train_h.shape - # TODO is this even needed - # reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) - if probe_per_prompt: prompt_indices = self.prompt_indices if self.prompt_indices else range(v) prompt_train_dicts = [ @@ -297,25 +278,15 @@ def apply_to_layer( # TODO fix lr_models else: - prompt_train_dict = { - ds_name: ( - train_h[:, self.prompt_indices, ...], - train_gt, - lm_preds[:, self.prompt_indices, ...] - if lm_preds is not None - else None, - ) - for ds_name, (train_h, _, lm_preds) in train_dict.items() - } reporter_train_result = self.train_and_save_reporter( - device, layer, self.out_dir / "reporters", prompt_train_dict + device, layer, self.out_dir / "reporters", train_dict ) maybe_multi_reporter = reporter_train_result.reporter train_loss = reporter_train_result.train_loss lr_models = self.train_lr_model( - prompt_train_dict, device, layer, self.out_dir / "lr_models" + train_dict, device, layer, self.out_dir / "lr_models" ) return evaluate_and_save( From 51b7d3c59ae45ca58284da81351a3d189cd1dc6b Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 16:22:55 +0100 Subject: [PATCH 073/102] correctly eval with multiple probes and some renaming --- elk/evaluation/evaluate.py | 13 +++++---- elk/training/multi_reporter.py | 44 ++++++++++++++++-------------- elk/training/train.py | 50 ++++++++++++++++------------------ 3 files changed, 53 insertions(+), 54 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index f5a592e75..49aa58182 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -10,7 +10,7 @@ from ..files import elk_reporter_dir from ..metrics import evaluate_preds from ..run import Run, select_data -from ..training.multi_reporter import AnyReporter, MultiReporter +from ..training.multi_reporter import MultiReporter, SingleReporter from ..utils import Color @@ -42,7 +42,7 @@ def apply_to_layer( experiment_dir = elk_reporter_dir() / self.source - def load_reporter() -> AnyReporter | MultiReporter: + def load_reporter() -> SingleReporter | MultiReporter: # check if experiment_dir / "reporters" has .pt files first = next((experiment_dir / "reporters").iterdir()) if not first.suffix == ".pt": @@ -58,16 +58,17 @@ def load_reporter() -> AnyReporter | MultiReporter: row_bufs = defaultdict(list) def eval_all( - reporter: AnyReporter | MultiReporter, + reporter: SingleReporter | MultiReporter, prompt_index: int | Literal["multi"] | None = None, + i: int = 0, ): prompt_index = ( {"prompt_index": prompt_index} if prompt_index is not None else {} ) for ds_name, (val_h, val_gt, _) in val_output.items(): meta = {"dataset": ds_name, "layer": layer} + val_credences = reporter(val_h[:, [i], :, :]) - val_credences = reporter(val_h) for mode in ("none", "partial", "full"): row_bufs["eval"].append( { @@ -101,8 +102,8 @@ def eval_all( ) if isinstance(reporter, MultiReporter): - for prompt_index, single_reporter in enumerate(reporter.reporters): - eval_all(single_reporter, prompt_index) + for i, res in enumerate(reporter.reporter_w_infos): + eval_all(res.model, res.prompt_index, i) eval_all(reporter, "multi") else: eval_all(reporter) diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index 7b2e9508b..bb329d124 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -6,24 +6,25 @@ from elk.training import CcsReporter from elk.training.common import Reporter -AnyReporter = CcsReporter | Reporter +SingleReporter = CcsReporter | Reporter @dataclass -class ReporterTrainResult: - reporter: AnyReporter - train_loss: float | None - prompt_index: int | None +class ReporterWithInfo: # I don't love this name but I have no choice because + # of the other Reporter + model: SingleReporter + train_loss: float | None = None + prompt_index: int | None = None class MultiReporter: - def __init__(self, reporter_results: list[ReporterTrainResult]): - assert len(reporter_results) > 0, "Must have at least one reporter" - self.reporter_results: list[ReporterTrainResult] = reporter_results - self.reporters = [r.reporter for r in reporter_results] + def __init__(self, reporter: list[ReporterWithInfo]): + assert len(reporter) > 0, "Must have at least one reporter" + self.reporter_w_infos: list[ReporterWithInfo] = reporter + self.models = [r.model for r in reporter] train_losses = ( - [r.train_loss for r in reporter_results] - if reporter_results[0].train_loss is not None + [r.train_loss for r in reporter] + if reporter[0].train_loss is not None else None ) self.train_loss = ( @@ -32,20 +33,21 @@ def __init__(self, reporter_results: list[ReporterTrainResult]): def __call__(self, h): num_variants = h.shape[1] - assert len(self.reporters) == num_variants + assert len(self.models) == num_variants credences = [] - for i, reporter in enumerate(self.reporters): + for i, reporter in enumerate(self.models): credences.append(reporter(h[:, [i], :, :])) return t.stack(credences, dim=0).mean(dim=0) @staticmethod def load(path: Path, layer: int, device: str): prompt_folders = [p for p in path.iterdir() if p.is_dir()] - reporters = [] - for folder in prompt_folders: - path = folder / "reporters" / f"layer_{layer}.pt" - reporter = t.load(path, map_location=device) - prompt_index = int(folder.name.split("_")[-1]) - reporters.append((reporter, prompt_index)) - # TODO for now I don't care about the train losses - return MultiReporter([ReporterTrainResult(r, None, pi) for r, pi in reporters]) + reporters = [ + ( + t.load(folder / "reporters" / f"layer_{layer}.pt", map_location=device), + int(folder.name.split("_")[-1]), # prompt index + ) + for folder in prompt_folders + ] + # we don't care about the train losses for evaluating + return MultiReporter([ReporterWithInfo(r, None, pi) for r, pi in reporters]) diff --git a/elk/training/train.py b/elk/training/train.py index 741405f33..cf97b0a65 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -17,12 +17,12 @@ from .ccs_reporter import CcsConfig, CcsReporter from .common import FitterConfig from .eigen_reporter import EigenFitter, EigenFitterConfig -from .multi_reporter import AnyReporter, MultiReporter, ReporterTrainResult +from .multi_reporter import MultiReporter, ReporterWithInfo, SingleReporter def evaluate_and_save( train_loss: float | None, - reporter: AnyReporter | MultiReporter, + reporter: SingleReporter | MultiReporter, train_dict: PreparedData, val_dict: PreparedData, lr_models: list[Classifier], @@ -35,7 +35,7 @@ def evaluate_and_save( meta = {"dataset": ds_name, "layer": layer} def eval_all( - reporter: AnyReporter | MultiReporter, + reporter: SingleReporter | MultiReporter, prompt_index: int | Literal["multi"] | None = None, i: int = 0, ): @@ -45,7 +45,9 @@ def eval_all( else: val_credences = reporter(val_h) train_credences = reporter(train_h) - prompt_index = {"prompt_index": prompt_index} + prompt_index_dict = ( + {"prompt_index": prompt_index} if prompt_index is not None else {} + ) for mode in ("none", "partial", "full"): row_bufs["eval"].append( { @@ -53,7 +55,7 @@ def eval_all( "ensembling": mode, **evaluate_preds(val_gt, val_credences, mode).to_dict(), "train_loss": train_loss, - **prompt_index, + **prompt_index_dict, } ) @@ -63,7 +65,7 @@ def eval_all( "ensembling": mode, **evaluate_preds(train_gt, train_credences, mode).to_dict(), "train_loss": train_loss, - **prompt_index, + **prompt_index_dict, } ) @@ -73,7 +75,7 @@ def eval_all( **meta, "ensembling": mode, **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), - **prompt_index, + **prompt_index_dict, } ) @@ -83,7 +85,7 @@ def eval_all( **meta, "ensembling": mode, **evaluate_preds(train_gt, train_lm_preds, mode).to_dict(), - **prompt_index, + **prompt_index_dict, } ) @@ -94,13 +96,13 @@ def eval_all( "ensembling": mode, "inlp_iter": lr_model_num, **evaluate_preds(val_gt, model(val_h), mode).to_dict(), - **prompt_index, + **prompt_index_dict, } ) if isinstance(reporter, MultiReporter): - for i, reporter_result in enumerate(reporter.reporter_results): - eval_all(reporter_result.reporter, reporter_result.prompt_index, i) + for i, reporter_result in enumerate(reporter.reporter_w_infos): + eval_all(reporter_result.model, reporter_result.prompt_index, i) eval_all(reporter, prompt_index="multi") else: eval_all(reporter, prompt_index=None) @@ -145,7 +147,7 @@ def make_eval(self, model, eval_dataset): # Create a separate function to handle the reporter training. def train_and_save_reporter( self, device, layer, out_dir, train_dict, prompt_index=None - ) -> ReporterTrainResult: + ) -> ReporterWithInfo: (first_train_h, train_gt, _), *rest = train_dict.values() # TODO can remove? (_, v, k, d) = first_train_h.shape if not all(other_h.shape[-1] == d for other_h, _, _ in rest): @@ -199,7 +201,7 @@ def train_and_save_reporter( out_dir.mkdir(parents=True, exist_ok=True) torch.save(reporter, out_dir / f"layer_{layer}.pt") - return ReporterTrainResult(reporter, train_loss, prompt_index) + return ReporterWithInfo(reporter, train_loss, prompt_index) def train_lr_model(self, train_dict, device, layer, out_dir) -> list[Classifier]: if self.supervised != "none": @@ -231,14 +233,14 @@ def apply_to_layer( self.make_reproducible(seed=self.net.seed + layer) device = self.get_device(devices, world_size) - train_dict = self.prepare_data(device, layer, "train") # prepare data no - # longer does anything on prompt indices + train_dict = self.prepare_data(device, layer, "train") val_dict = self.prepare_data(device, layer, "val") (first_train_h, train_gt, _), *rest = train_dict.values() (_, v, k, d) = first_train_h.shape if probe_per_prompt: + # self.prompt_indices being () actually means "all prompts" prompt_indices = self.prompt_indices if self.prompt_indices else range(v) prompt_train_dicts = [ { @@ -261,33 +263,27 @@ def apply_to_layer( str_i = str(prompt_index).zfill(2) base = self.out_dir / "reporters" / f"prompt_{str_i}" reporters_path = base / "reporters" - lr_path = base / "lr_models" + base / "lr_models" reporter_train_result = self.train_and_save_reporter( device, layer, reporters_path, prompt_train_dict, prompt_index ) results.append(reporter_train_result) - lr_models = self.train_lr_model( - prompt_train_dict, device, layer, lr_path - ) - + # it is called maybe_multi_reporter because it might be a single reporter maybe_multi_reporter = MultiReporter(results) train_loss = maybe_multi_reporter.train_loss - - # TODO fix lr_models - else: reporter_train_result = self.train_and_save_reporter( device, layer, self.out_dir / "reporters", train_dict ) - maybe_multi_reporter = reporter_train_result.reporter + maybe_multi_reporter = reporter_train_result.model train_loss = reporter_train_result.train_loss - lr_models = self.train_lr_model( - train_dict, device, layer, self.out_dir / "lr_models" - ) + lr_models = self.train_lr_model( + train_dict, device, layer, self.out_dir / "lr_models" + ) return evaluate_and_save( train_loss, maybe_multi_reporter, train_dict, val_dict, lr_models, layer From 75fe56099306368c81db40fd99409fd4607b8432 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 16:25:48 +0100 Subject: [PATCH 074/102] remove wrong function --- elk/evaluation/evaluate.py | 4 +--- elk/run.py | 11 ----------- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 49aa58182..8581462d6 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -9,7 +9,7 @@ from ..files import elk_reporter_dir from ..metrics import evaluate_preds -from ..run import Run, select_data +from ..run import Run from ..training.multi_reporter import MultiReporter, SingleReporter from ..utils import Color @@ -38,8 +38,6 @@ def apply_to_layer( device = self.get_device(devices, world_size) val_output = self.prepare_data(device, layer, "val") - val_output = select_data(val_output, self.prompt_indices) - experiment_dir = elk_reporter_dir() / self.source def load_reporter() -> SingleReporter | MultiReporter: diff --git a/elk/run.py b/elk/run.py index cece5d9c0..03a0a46ba 100644 --- a/elk/run.py +++ b/elk/run.py @@ -33,17 +33,6 @@ PreparedData = dict[str, tuple[Tensor, Tensor, Tensor | None]] -def select_data(prepared_data: PreparedData, prompt_indices: list[int]): - return { - ds_name: ( - train_h[:, prompt_indices, ...], - train_gt, - lm_preds[:, prompt_indices, ...] if lm_preds is not None else None, - ) - for ds_name, (train_h, train_gt, lm_preds) in prepared_data.items() - } - - @dataclass class Run(ABC, Serializable): data: Extract From 1b6757a78518dcb035f3107b1486e1df0a43e993 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 16:35:23 +0100 Subject: [PATCH 075/102] pyright --- elk/evaluation/evaluate.py | 4 ++-- elk/training/multi_reporter.py | 5 ++++- elk/training/train.py | 1 - 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 8581462d6..44aa0411d 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -60,7 +60,7 @@ def eval_all( prompt_index: int | Literal["multi"] | None = None, i: int = 0, ): - prompt_index = ( + prompt_index_dict = ( {"prompt_index": prompt_index} if prompt_index is not None else {} ) for ds_name, (val_h, val_gt, _) in val_output.items(): @@ -73,7 +73,7 @@ def eval_all( **meta, "ensembling": mode, **evaluate_preds(val_gt, val_credences, mode).to_dict(), - **prompt_index, + **prompt_index_dict, } ) diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index bb329d124..4f373b12d 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -27,8 +27,11 @@ def __init__(self, reporter: list[ReporterWithInfo]): if reporter[0].train_loss is not None else None ) + self.train_loss = ( - sum(train_losses) / len(train_losses) if train_losses is not None else None + sum(train_losses) / len(train_losses) + if train_losses is not None # type: ignore + else None ) def __call__(self, h): diff --git a/elk/training/train.py b/elk/training/train.py index cf97b0a65..b0af8b394 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -263,7 +263,6 @@ def apply_to_layer( str_i = str(prompt_index).zfill(2) base = self.out_dir / "reporters" / f"prompt_{str_i}" reporters_path = base / "reporters" - base / "lr_models" reporter_train_result = self.train_and_save_reporter( device, layer, reporters_path, prompt_train_dict, prompt_index From 0d6c8b9ae897346f2caacf6068e8f7ebcf0ae6a1 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 16:44:32 +0100 Subject: [PATCH 076/102] pytest --- elk/run.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/elk/run.py b/elk/run.py index 03a0a46ba..a17b01cb9 100644 --- a/elk/run.py +++ b/elk/run.py @@ -199,10 +199,10 @@ def apply_to_layers( sortby.append("prompt_index") df = pd.concat(dfs).sort_values(by=sortby) - # Move prompt_index to the 2'th column - cols = list(df.columns) - cols.insert(2, cols.pop(cols.index("prompt_index"))) - df = df.reindex(columns=cols) + if "prompt_index" in df.columns: + cols = list(df.columns) + cols.insert(2, cols.pop(cols.index("prompt_index"))) + df = df.reindex(columns=cols) # Save the CSV out_path = self.out_dir / f"{name}.csv" From 785537b4882e9996e7c6b70aeb0e988d5c173acf Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 16:49:43 +0100 Subject: [PATCH 077/102] pyright --- elk/training/multi_reporter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index 4f373b12d..602b23e95 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -29,8 +29,8 @@ def __init__(self, reporter: list[ReporterWithInfo]): ) self.train_loss = ( - sum(train_losses) / len(train_losses) - if train_losses is not None # type: ignore + sum(train_losses) / len(train_losses) # type: ignore + if train_losses is not None else None ) From 049cd6331d8c7f8954f1ed5b954def13e653258b Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Sat, 22 Jul 2023 21:49:44 +0100 Subject: [PATCH 078/102] replace mode with prompt_ensembling.value --- elk/evaluation/evaluate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 3a6d63fe2..ed3b9c129 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -67,8 +67,8 @@ def apply_to_layer( row_bufs["lm_eval"].append( { **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), + "ensembling": prompt_ensembling.value, + **evaluate_preds(val_gt, val_lm_preds, prompt_ensembling.value).to_dict(), } ) From c8236ddcc2a7fe5b844b5ada0eb8a80740cbd303 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 22 Jul 2023 20:49:51 +0000 Subject: [PATCH 079/102] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/evaluation/evaluate.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index ed3b9c129..581ebe0f9 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -68,7 +68,9 @@ def apply_to_layer( { **meta, "ensembling": prompt_ensembling.value, - **evaluate_preds(val_gt, val_lm_preds, prompt_ensembling.value).to_dict(), + **evaluate_preds( + val_gt, val_lm_preds, prompt_ensembling.value + ).to_dict(), } ) From 56d1796eaa9a078993f588e92ad47ceeaf59bd21 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Sun, 23 Jul 2023 19:35:04 +0000 Subject: [PATCH 080/102] remove .value for lm_eval --- elk/evaluation/evaluate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 581ebe0f9..5d98f15e4 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -69,7 +69,7 @@ def apply_to_layer( **meta, "ensembling": prompt_ensembling.value, **evaluate_preds( - val_gt, val_lm_preds, prompt_ensembling.value + val_gt, val_lm_preds, prompt_ensembling ).to_dict(), } ) From 4d9c7811acbd58c50e14c02b039c8065a4c2156a Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 27 Jul 2023 11:46:26 +0100 Subject: [PATCH 081/102] add LayerApplied --- elk/evaluation/evaluate.py | 11 ++++++----- elk/run.py | 11 ++++++++++- elk/training/train.py | 8 +++++--- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 5d98f15e4..fd792b293 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -8,7 +8,7 @@ from ..files import elk_reporter_dir from ..metrics import evaluate_preds -from ..run import Run +from ..run import LayerApplied, Run from ..utils import Color from ..utils.types import PromptEnsembling @@ -32,7 +32,7 @@ def execute(self, highlight_color: Color = "cyan"): @torch.inference_mode() def apply_to_layer( self, layer: int, devices: list[str], world_size: int - ) -> tuple[dict[str, pd.DataFrame], list[dict]]: + ) -> LayerApplied: """Evaluate a single reporter on a single layer.""" device = self.get_device(devices, world_size) val_output = self.prepare_data(device, layer, "val") @@ -44,7 +44,7 @@ def apply_to_layer( row_bufs = defaultdict(list) - layer_output = [] + layer_output: list[dict] = [] for ds_name, (val_h, val_gt, val_lm_preds) in val_output.items(): meta = {"dataset": ds_name, "layer": layer} @@ -93,5 +93,6 @@ def apply_to_layer( ).to_dict(), } ) - - return ({k: pd.DataFrame(v) for k, v in row_bufs.items()}, layer_output) + return LayerApplied( + layer_output, {k: pd.DataFrame(v) for k, v in row_bufs.items()} + ) diff --git a/elk/run.py b/elk/run.py index 74f3da69d..00ddb084b 100644 --- a/elk/run.py +++ b/elk/run.py @@ -34,6 +34,15 @@ from .utils.types import PromptEnsembling +@dataclass(frozen=True) +class LayerApplied: + layer_output: list[dict] + """The output of the reporter on the layer, should contain credences and ground + truth labels.""" + df_dict: dict[str, pd.DataFrame] + """The evaluation results for the layer.""" + + @dataclass class Run(ABC, Serializable): data: Extract @@ -109,7 +118,7 @@ def execute( @abstractmethod def apply_to_layer( self, layer: int, devices: list[str], world_size: int - ) -> dict[str, pd.DataFrame]: + ) -> LayerApplied: """Train or eval a reporter on a single layer.""" def make_reproducible(self, seed: int): diff --git a/elk/training/train.py b/elk/training/train.py index 6b68c694f..b57eef81e 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -12,7 +12,7 @@ from simple_parsing.helpers.serialization import save from ..metrics import evaluate_preds, to_one_hot -from ..run import Run +from ..run import LayerApplied, Run from ..training.supervised import train_supervised from ..utils.types import PromptEnsembling from ..utils.typing import assert_type @@ -54,7 +54,7 @@ def apply_to_layer( layer: int, devices: list[str], world_size: int, - ) -> tuple[dict[str, pd.DataFrame], list[dict]]: + ) -> LayerApplied: """Train a single reporter on a single layer.""" self.make_reproducible(seed=self.net.seed + layer) @@ -205,4 +205,6 @@ def apply_to_layer( } ) - return ({k: pd.DataFrame(v) for k, v in row_bufs.items()}, layer_output) + return LayerApplied( + layer_output, {k: pd.DataFrame(v) for k, v in row_bufs.items()} + ) From 8961e95fc911d5c71808e55ab7b78943daca217b Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 27 Jul 2023 13:30:48 +0100 Subject: [PATCH 082/102] fix run.py part --- elk/run.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/elk/run.py b/elk/run.py index 00ddb084b..53d24ea62 100644 --- a/elk/run.py +++ b/elk/run.py @@ -110,7 +110,7 @@ def execute( devices = select_usable_devices(self.num_gpus, min_memory=self.min_gpu_mem) num_devices = len(devices) - func: Callable[[int], dict[str, pd.DataFrame]] = partial( + func: Callable[[int], LayerApplied] = partial( self.apply_to_layer, devices=devices, world_size=num_devices ) self.apply_to_layers(func=func, num_devices=num_devices) @@ -167,7 +167,7 @@ def concatenate(self, layers): def apply_to_layers( self, - func: Callable[[int], dict[str, pd.DataFrame]], + func: Callable[[int], LayerApplied], num_devices: int, ): """Apply a function to each layer of the datasets in parallel @@ -192,11 +192,9 @@ def apply_to_layers( df_buffers = defaultdict(list) layer_outputs = [] try: - for df_dict, layer_output in tqdm( - mapper(func, layers), total=len(layers) - ): - layer_outputs.append(layer_output) - for k, v in df_dict.items(): # type: ignore + for res in tqdm(mapper(func, layers), total=len(layers)): + layer_outputs.append(res.layer_output) + for k, v in res.df_dict.items(): # type: ignore df_buffers[k].append(v) finally: # Make sure the CSVs are written even if we crash or get interrupted From bd06cd30ca7c1e99573dbaa324d82dca63fdbdfe Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 27 Jul 2023 20:43:18 +0100 Subject: [PATCH 083/102] multidataset layer ensembling --- elk/evaluation/evaluate.py | 12 ++++----- elk/metrics/eval.py | 47 ++++++++++++++++++++--------------- elk/run.py | 50 +++++++++++++++++++++++--------------- elk/training/train.py | 19 +++++++++++---- 4 files changed, 77 insertions(+), 51 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index fd792b293..32ba25ddd 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -8,7 +8,7 @@ from ..files import elk_reporter_dir from ..metrics import evaluate_preds -from ..run import LayerApplied, Run +from ..run import LayerApplied, LayerOutput, Run from ..utils import Color from ..utils.types import PromptEnsembling @@ -44,14 +44,14 @@ def apply_to_layer( row_bufs = defaultdict(list) - layer_output: list[dict] = [] + layer_outputs: list[LayerOutput] = [] + for ds_name, (val_h, val_gt, val_lm_preds) in val_output.items(): meta = {"dataset": ds_name, "layer": layer} val_credences = reporter(val_h) - layer_output.append( - {**meta, "val_gt": val_gt, "val_credences": val_credences} - ) + + layer_outputs.append(LayerOutput(val_gt, val_credences, meta)) for prompt_ensembling in PromptEnsembling.all(): row_bufs["eval"].append( { @@ -94,5 +94,5 @@ def apply_to_layer( } ) return LayerApplied( - layer_output, {k: pd.DataFrame(v) for k, v in row_bufs.items()} + layer_outputs, {k: pd.DataFrame(v) for k, v in row_bufs.items()} ) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 0a7cd0b3a..b3b9fe60d 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -10,6 +10,13 @@ from .roc_auc import RocAucResult, roc_auc_ci +@dataclass +class LayerOutput: + val_gt: Tensor + val_credences: Tensor + meta: dict + + @dataclass(frozen=True) class EvalResult: """The result of evaluating a classifier.""" @@ -202,15 +209,30 @@ def calc_eval_results( return EvalResult(acc, cal_acc, cal_err, auroc) +def to_one_hot(labels: Tensor, n_classes: int) -> Tensor: + """ + Convert a tensor of class labels to a one-hot representation. + + Args: + labels (Tensor): A tensor of class labels of shape (N,). + n_classes (int): The total number of unique classes. + + Returns: + Tensor: A one-hot representation tensor of shape (N, n_classes). + """ + one_hot_labels = labels.new_zeros(*labels.shape, n_classes) + return one_hot_labels.scatter_(-1, labels.unsqueeze(-1).long(), 1) + + def layer_ensembling( - layer_outputs: list, prompt_ensembling: PromptEnsembling + layer_outputs: list[LayerOutput], prompt_ensembling: PromptEnsembling ) -> EvalResult: """ Return EvalResult after prompt_ensembling the probe output of the middle to last layers Args: - layer_outputs: A list of dictionaries containing the ground truth and + layer_outputs: A list of LayerOutput containing the ground truth and predicted class tensor of shape (n, num_variants, num_classes). prompt_ensembling: The prompt_ensembling mode. @@ -222,14 +244,14 @@ def layer_ensembling( y_logits_collection = [] num_classes = 2 - y_true = layer_outputs[0][0]["val_gt"].to(device) + y_true = layer_outputs[0].val_gt.to(device) for layer_output in layer_outputs: # all y_trues are identical, so just get the first - y_logits = layer_output[0]["val_credences"].to(device) + y_logits = layer_output.val_credences.to(device) y_logits, y_true, num_classes = prepare( y_logits=y_logits, - y_true=layer_outputs[0][0]["val_gt"].to(device), + y_true=layer_outputs[0].val_gt.to(device), prompt_ensembling=prompt_ensembling, ) y_logits_collection.append(y_logits) @@ -246,18 +268,3 @@ def layer_ensembling( prompt_ensembling=prompt_ensembling, num_classes=num_classes, ) - - -def to_one_hot(labels: Tensor, n_classes: int) -> Tensor: - """ - Convert a tensor of class labels to a one-hot representation. - - Args: - labels (Tensor): A tensor of class labels of shape (N,). - n_classes (int): The total number of unique classes. - - Returns: - Tensor: A one-hot representation tensor of shape (N, n_classes). - """ - one_hot_labels = labels.new_zeros(*labels.shape, n_classes) - return one_hot_labels.scatter_(-1, labels.unsqueeze(-1).long(), 1) diff --git a/elk/run.py b/elk/run.py index 53d24ea62..0b1fccf0b 100644 --- a/elk/run.py +++ b/elk/run.py @@ -17,12 +17,11 @@ from torch import Tensor from tqdm import tqdm -from elk.metrics.eval import layer_ensembling - from .debug_logging import save_debug_log from .extraction import Extract, extract from .extraction.dataset_name import DatasetDictWithName from .files import elk_reporter_dir, memorably_named_dir +from .metrics.eval import LayerOutput, layer_ensembling from .utils import ( Color, assert_type, @@ -36,7 +35,7 @@ @dataclass(frozen=True) class LayerApplied: - layer_output: list[dict] + layer_outputs: list[LayerOutput] """The output of the reporter on the layer, should contain credences and ground truth labels.""" df_dict: dict[str, pd.DataFrame] @@ -190,37 +189,48 @@ def apply_to_layers( with ctx.Pool(num_devices) as pool: mapper = pool.imap_unordered if num_devices > 1 else map df_buffers = defaultdict(list) - layer_outputs = [] + layer_outputs: list[LayerOutput] = [] try: for res in tqdm(mapper(func, layers), total=len(layers)): - layer_outputs.append(res.layer_output) + layer_outputs.extend(res.layer_outputs) for k, v in res.df_dict.items(): # type: ignore df_buffers[k].append(v) finally: # Make sure the CSVs are written even if we crash or get interrupted for name, dfs in df_buffers.items(): - df = pd.concat(dfs).sort_values(by=["layer", "prompt_ensembling"]) + PROMPT_ENSEMBLING = "prompt_ensembling" + df = pd.concat(dfs).sort_values(by=["layer", PROMPT_ENSEMBLING]) df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) if self.debug: save_debug_log(self.datasets, self.out_dir) dfs = [] - - for prompt_ensembling in PromptEnsembling.all(): - layer_ensembling_results = layer_ensembling( - layer_outputs=layer_outputs, prompt_ensembling=prompt_ensembling - ) - df = pd.DataFrame(layer_ensembling_results.to_dict(), index=[0]) - df = df.round(4) - df["prompt_ensembling"] = prompt_ensembling.value - dfs.append(df) + # groupby layer_outputs by their dataset name + grouped_layer_outputs = {} + # Group the LayerOutput objects by dataset name + for layer_output in layer_outputs: + dataset_name = layer_output.meta["dataset"] + if dataset_name in grouped_layer_outputs: + grouped_layer_outputs[dataset_name].append(layer_output) + else: + grouped_layer_outputs[dataset_name] = [layer_output] + + for dataset_name, layer_outputs in grouped_layer_outputs.items(): + for prompt_ensembling in PromptEnsembling.all(): + res = layer_ensembling( + layer_outputs=layer_outputs, + prompt_ensembling=prompt_ensembling, + ) + df = pd.DataFrame(res.to_dict(), index=[0]) + df = df.round(4) + df[PROMPT_ENSEMBLING] = prompt_ensembling.value + df["dataset"] = dataset_name + dfs.append(df) df_concat = pd.concat(dfs) # Rearrange the columns so that prompt_ensembling is in front - columns = ["prompt_ensembling"] + [ - col for col in df_concat.columns if col != "prompt_ensembling" + columns = [PROMPT_ENSEMBLING] + [ + col for col in df_concat.columns if col != PROMPT_ENSEMBLING ] df_concat = df_concat[columns] - df_concat.to_csv( - self.out_dir / "layer_ensembling_results.csv", index=False - ) + df_concat.to_csv(self.out_dir / "layer_ensembling.csv", index=False) diff --git a/elk/training/train.py b/elk/training/train.py index b57eef81e..78dad4918 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -12,6 +12,7 @@ from simple_parsing.helpers.serialization import save from ..metrics import evaluate_preds, to_one_hot +from ..metrics.eval import LayerOutput from ..run import LayerApplied, Run from ..training.supervised import train_supervised from ..utils.types import PromptEnsembling @@ -139,12 +140,20 @@ def apply_to_layer( val_credences = reporter(val_h) + # layer_output.append( + # { + # **meta, + # "val_gt": val_gt.detach(), + # "val_credences": val_credences.detach(), + # } + # ) + # Using the class layer_output.append( - { - **meta, - "val_gt": val_gt.detach(), - "val_credences": val_credences.detach(), - } + LayerOutput( + val_gt=val_gt.detach(), + val_credences=val_credences.detach(), + meta=meta, + ) ) train_credences = reporter(train_h) From f8882c6c982791737d53147beeecc2c0ebce8c52 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 27 Jul 2023 21:08:34 +0100 Subject: [PATCH 084/102] little refactoring sorting remove comment --- elk/run.py | 67 +++++++++++++++++++++++-------------------- elk/training/train.py | 9 ------ 2 files changed, 36 insertions(+), 40 deletions(-) diff --git a/elk/run.py b/elk/run.py index 0b1fccf0b..86c854284 100644 --- a/elk/run.py +++ b/elk/run.py @@ -32,6 +32,8 @@ ) from .utils.types import PromptEnsembling +PROMPT_ENSEMBLING = "prompt_ensembling" + @dataclass(frozen=True) class LayerApplied: @@ -42,6 +44,36 @@ class LayerApplied: """The evaluation results for the layer.""" +def calculate_layer_outputs(layer_outputs, out_path): + grouped_layer_outputs = {} + for layer_output in layer_outputs: + dataset_name = layer_output.meta["dataset"] + if dataset_name in grouped_layer_outputs: + grouped_layer_outputs[dataset_name].append(layer_output) + else: + grouped_layer_outputs[dataset_name] = [layer_output] + + dfs = [] + for dataset_name, layer_outputs in grouped_layer_outputs.items(): + for prompt_ensembling in PromptEnsembling.all(): + res = layer_ensembling( + layer_outputs=layer_outputs, + prompt_ensembling=prompt_ensembling, + ) + df = pd.DataFrame( + { + "dataset": dataset_name, + PROMPT_ENSEMBLING: prompt_ensembling.value, + **res.to_dict(), + }, + index=[0], + ).round(4) + dfs.append(df) + + df_concat = pd.concat(dfs) + df_concat.to_csv(out_path, index=False) + + @dataclass class Run(ABC, Serializable): data: Extract @@ -198,39 +230,12 @@ def apply_to_layers( finally: # Make sure the CSVs are written even if we crash or get interrupted for name, dfs in df_buffers.items(): - PROMPT_ENSEMBLING = "prompt_ensembling" df = pd.concat(dfs).sort_values(by=["layer", PROMPT_ENSEMBLING]) df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) if self.debug: save_debug_log(self.datasets, self.out_dir) - dfs = [] - # groupby layer_outputs by their dataset name - grouped_layer_outputs = {} - # Group the LayerOutput objects by dataset name - for layer_output in layer_outputs: - dataset_name = layer_output.meta["dataset"] - if dataset_name in grouped_layer_outputs: - grouped_layer_outputs[dataset_name].append(layer_output) - else: - grouped_layer_outputs[dataset_name] = [layer_output] - - for dataset_name, layer_outputs in grouped_layer_outputs.items(): - for prompt_ensembling in PromptEnsembling.all(): - res = layer_ensembling( - layer_outputs=layer_outputs, - prompt_ensembling=prompt_ensembling, - ) - df = pd.DataFrame(res.to_dict(), index=[0]) - df = df.round(4) - df[PROMPT_ENSEMBLING] = prompt_ensembling.value - df["dataset"] = dataset_name - dfs.append(df) - - df_concat = pd.concat(dfs) - # Rearrange the columns so that prompt_ensembling is in front - columns = [PROMPT_ENSEMBLING] + [ - col for col in df_concat.columns if col != PROMPT_ENSEMBLING - ] - df_concat = df_concat[columns] - df_concat.to_csv(self.out_dir / "layer_ensembling.csv", index=False) + calculate_layer_outputs( + layer_outputs=layer_outputs, + out_path=self.out_dir / "layer_ensembling.csv", + ) diff --git a/elk/training/train.py b/elk/training/train.py index 78dad4918..34be7e78c 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -139,15 +139,6 @@ def apply_to_layer( meta = {"dataset": ds_name, "layer": layer} val_credences = reporter(val_h) - - # layer_output.append( - # { - # **meta, - # "val_gt": val_gt.detach(), - # "val_credences": val_credences.detach(), - # } - # ) - # Using the class layer_output.append( LayerOutput( val_gt=val_gt.detach(), From 23183bcdb034654fa8c1ddce25af6b5c18831b69 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 27 Jul 2023 21:17:38 +0100 Subject: [PATCH 085/102] fix tests --- elk/evaluation/evaluate.py | 8 +++++--- elk/run.py | 1 + tests/test_smoke_elicit.py | 4 ++-- tests/test_smoke_eval.py | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 32ba25ddd..a74dca262 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -12,6 +12,8 @@ from ..utils import Color from ..utils.types import PromptEnsembling +PROMPT_ENSEMBLING = "prompt_ensembling" + @dataclass(kw_only=True) class Eval(Run): @@ -56,7 +58,7 @@ def apply_to_layer( row_bufs["eval"].append( { **meta, - "prompt_ensembling": prompt_ensembling.value, + PROMPT_ENSEMBLING: prompt_ensembling.value, **evaluate_preds( val_gt, val_credences, prompt_ensembling ).to_dict(), @@ -67,7 +69,7 @@ def apply_to_layer( row_bufs["lm_eval"].append( { **meta, - "ensembling": prompt_ensembling.value, + PROMPT_ENSEMBLING: prompt_ensembling.value, **evaluate_preds( val_gt, val_lm_preds, prompt_ensembling ).to_dict(), @@ -85,7 +87,7 @@ def apply_to_layer( model.eval() row_bufs["lr_eval"].append( { - "prompt_ensembling": prompt_ensembling.value, + PROMPT_ENSEMBLING: prompt_ensembling.value, "inlp_iter": i, **meta, **evaluate_preds( diff --git a/elk/run.py b/elk/run.py index 86c854284..f96b72ad2 100644 --- a/elk/run.py +++ b/elk/run.py @@ -230,6 +230,7 @@ def apply_to_layers( finally: # Make sure the CSVs are written even if we crash or get interrupted for name, dfs in df_buffers.items(): + print(dfs[0].columns) df = pd.concat(dfs).sort_values(by=["layer", PROMPT_ENSEMBLING]) df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) if self.debug: diff --git a/tests/test_smoke_elicit.py b/tests/test_smoke_elicit.py index f9db114c4..4ae8b22b0 100644 --- a/tests/test_smoke_elicit.py +++ b/tests/test_smoke_elicit.py @@ -31,7 +31,7 @@ def test_smoke_elicit_run_tiny_gpt2_ccs(tmp_path: Path): "lr_models", "reporters", "eval.csv", - "layer_ensembling_results.csv", + "layer_ensembling.csv", ] for file in expected_files: assert file in created_file_names @@ -63,7 +63,7 @@ def test_smoke_elicit_run_tiny_gpt2_eigen(tmp_path: Path): "lr_models", "reporters", "eval.csv", - "layer_ensembling_results.csv", + "layer_ensembling.csv", ] for file in expected_files: assert file in created_file_names diff --git a/tests/test_smoke_eval.py b/tests/test_smoke_eval.py index 45324faac..7f0bad7ea 100644 --- a/tests/test_smoke_eval.py +++ b/tests/test_smoke_eval.py @@ -11,7 +11,7 @@ "cfg.yaml", "fingerprints.yaml", "eval.csv", - "layer_ensembling_results.csv", + "layer_ensembling.csv", ] From d091f9d6695299bf797fd2c80385cc4956917593 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Mon, 31 Jul 2023 19:02:40 +0000 Subject: [PATCH 086/102] add annotation + cleanup --- elk/run.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/elk/run.py b/elk/run.py index f96b72ad2..25e8fb71e 100644 --- a/elk/run.py +++ b/elk/run.py @@ -44,7 +44,15 @@ class LayerApplied: """The evaluation results for the layer.""" -def calculate_layer_outputs(layer_outputs, out_path): +def calculate_layer_outputs(layer_outputs: list[LayerOutput], out_path: Path): + """ + Calculate the layer ensembling results for each dataset + and prompt ensembling and save them to a CSV file. + + Args: + layer_outputs: The layer outputs to calculate the results for. + out_path: The path to save the results to. + """ grouped_layer_outputs = {} for layer_output in layer_outputs: dataset_name = layer_output.meta["dataset"] @@ -230,7 +238,6 @@ def apply_to_layers( finally: # Make sure the CSVs are written even if we crash or get interrupted for name, dfs in df_buffers.items(): - print(dfs[0].columns) df = pd.concat(dfs).sort_values(by=["layer", PROMPT_ENSEMBLING]) df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) if self.debug: From 776c1863be0ed4ede51942843ee031cafd648346 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 31 Jul 2023 19:04:37 +0000 Subject: [PATCH 087/102] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elk/run.py b/elk/run.py index 25e8fb71e..55c449540 100644 --- a/elk/run.py +++ b/elk/run.py @@ -46,9 +46,9 @@ class LayerApplied: def calculate_layer_outputs(layer_outputs: list[LayerOutput], out_path: Path): """ - Calculate the layer ensembling results for each dataset + Calculate the layer ensembling results for each dataset and prompt ensembling and save them to a CSV file. - + Args: layer_outputs: The layer outputs to calculate the results for. out_path: The path to save the results to. From d284d11cb9b9af43266f6da7d316c4f403ba5cf2 Mon Sep 17 00:00:00 2001 From: jon Date: Wed, 9 Aug 2023 00:56:51 +0300 Subject: [PATCH 088/102] Merge remote-tracking branch 'origin/not-291-train-probe-per-prompt' into experiments-notodai --- elk/metrics/eval.py | 2 ++ elk/run.py | 2 +- elk/training/train.py | 23 ++++++++++++----------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index b3b9fe60d..d7b391b1f 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -149,6 +149,8 @@ def evaluate_preds( dict: A dictionary containing the accuracy, AUROC, and ECE. """ y_logits, y_true, num_classes = prepare(y_logits, y_true, prompt_ensembling) + print('hi') + print(prompt_ensembling) return calc_eval_results(y_true, y_logits, prompt_ensembling, num_classes) diff --git a/elk/run.py b/elk/run.py index a8b8c6a9f..e7d1498af 100644 --- a/elk/run.py +++ b/elk/run.py @@ -248,7 +248,7 @@ def apply_to_layers( for name, dfs in df_buffers.items(): df = pd.concat(dfs).sort_values(by=["layer", PROMPT_ENSEMBLING]) df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) - sortby = ["layer", "ensembling"] + sortby = ["layer", "prompt_ensembling"] if "prompt_index" in dfs[0].columns: sortby.append("prompt_index") df = pd.concat(dfs).sort_values(by=sortby) diff --git a/elk/training/train.py b/elk/training/train.py index f307455fd..ba2e4ae1f 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -59,12 +59,13 @@ def eval_all( prompt_index_dict = ( {"prompt_index": prompt_index} if prompt_index is not None else {} ) - for mode in ("none", "partial", "full"): + PROMPT_ENSEMBLING = "prompt_ensembling" + for prompt_ensembling in PromptEnsembling.all(): row_bufs["eval"].append( { **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_credences, mode).to_dict(), + PROMPT_ENSEMBLING: prompt_ensembling.value, + **evaluate_preds(val_gt, val_credences, prompt_ensembling).to_dict(), "train_loss": train_loss, **prompt_index_dict, } @@ -73,8 +74,8 @@ def eval_all( row_bufs["train_eval"].append( { **meta, - "ensembling": mode, - **evaluate_preds(train_gt, train_credences, mode).to_dict(), + PROMPT_ENSEMBLING: prompt_ensembling.value, + **evaluate_preds(train_gt, train_credences, prompt_ensembling).to_dict(), "train_loss": train_loss, **prompt_index_dict, } @@ -84,8 +85,8 @@ def eval_all( row_bufs["lm_eval"].append( { **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), + PROMPT_ENSEMBLING: prompt_ensembling.value, + **evaluate_preds(val_gt, val_lm_preds, prompt_ensembling).to_dict(), **prompt_index_dict, } ) @@ -94,8 +95,8 @@ def eval_all( row_bufs["train_lm_eval"].append( { **meta, - "ensembling": mode, - **evaluate_preds(train_gt, train_lm_preds, mode).to_dict(), + PROMPT_ENSEMBLING: prompt_ensembling.value, + **evaluate_preds(train_gt, train_lm_preds, prompt_ensembling).to_dict(), **prompt_index_dict, } ) @@ -104,9 +105,9 @@ def eval_all( row_bufs["lr_eval"].append( { **meta, - "ensembling": mode, + PROMPT_ENSEMBLING: prompt_ensembling.value, "inlp_iter": lr_model_num, - **evaluate_preds(val_gt, model(val_h), mode).to_dict(), + **evaluate_preds(val_gt, model(val_h), prompt_ensembling).to_dict(), **prompt_index_dict, } ) From bc6cf6754f235c90e9a60a232593820b12755b24 Mon Sep 17 00:00:00 2001 From: jon Date: Wed, 9 Aug 2023 15:23:15 +0300 Subject: [PATCH 089/102] fix --- elk/evaluation/evaluate.py | 17 +---- elk/metrics/eval.py | 2 - elk/run.py | 1 - elk/training/multi_reporter.py | 11 ++-- elk/training/train.py | 114 ++------------------------------- 5 files changed, 14 insertions(+), 131 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 7ea02ffbe..6395dba5a 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -61,16 +61,11 @@ def load_reporter() -> SingleReporter | MultiReporter: layer_outputs: list[LayerOutput] = [] def eval_all( - reporter: SingleReporter | MultiReporter, - prompt_index: int | Literal["multi"] | None = None, - i: int = 0, + reporter: SingleReporter | MultiReporter ): - prompt_index_dict = ( - {"prompt_index": prompt_index} if prompt_index is not None else {} - ) for ds_name, (val_h, val_gt, val_lm_preds) in val_output.items(): meta = {"dataset": ds_name, "layer": layer} - val_credences = reporter(val_h[:, [i], :, :]) + val_credences = reporter(val_h) layer_outputs.append(LayerOutput(val_gt, val_credences, meta)) for prompt_ensembling in PromptEnsembling.all(): row_bufs["eval"].append( @@ -78,7 +73,6 @@ def eval_all( **meta, PROMPT_ENSEMBLING: prompt_ensembling.value, **evaluate_preds(val_gt, val_credences, prompt_ensembling).to_dict(), - **prompt_index_dict, } ) @@ -115,12 +109,7 @@ def eval_all( } ) - if isinstance(reporter, MultiReporter): - for i, res in enumerate(reporter.reporter_w_infos): - eval_all(res.model, res.prompt_index, i) - eval_all(reporter, "multi") - else: - eval_all(reporter) + eval_all(reporter) return LayerApplied( layer_outputs, {k: pd.DataFrame(v) for k, v in row_bufs.items()} diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index d7b391b1f..b3b9fe60d 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -149,8 +149,6 @@ def evaluate_preds( dict: A dictionary containing the accuracy, AUROC, and ECE. """ y_logits, y_true, num_classes = prepare(y_logits, y_true, prompt_ensembling) - print('hi') - print(prompt_ensembling) return calc_eval_results(y_true, y_logits, prompt_ensembling, num_classes) diff --git a/elk/run.py b/elk/run.py index e7d1498af..c7a763255 100644 --- a/elk/run.py +++ b/elk/run.py @@ -263,7 +263,6 @@ def apply_to_layers( df.round(4).to_csv(out_path, index=False) if self.debug: save_debug_log(self.datasets, self.out_dir) - calculate_layer_outputs( layer_outputs=layer_outputs, out_path=self.out_dir / "layer_ensembling.csv", diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index 602b23e95..eb4603b65 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -35,11 +35,12 @@ def __init__(self, reporter: list[ReporterWithInfo]): ) def __call__(self, h): - num_variants = h.shape[1] - assert len(self.models) == num_variants - credences = [] - for i, reporter in enumerate(self.models): - credences.append(reporter(h[:, [i], :, :])) + n_eval_prompts = h.shape[1] + credences = [ + reporter(h[:, [prompt_index], :, :]) + for reporter in self.models + for prompt_index in range(n_eval_prompts) + ] return t.stack(credences, dim=0).mean(dim=0) @staticmethod diff --git a/elk/training/train.py b/elk/training/train.py index ba2e4ae1f..463b78d4e 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -39,16 +39,10 @@ def evaluate_and_save( meta = {"dataset": ds_name, "layer": layer} def eval_all( - reporter: SingleReporter | MultiReporter, - prompt_index: int | Literal["multi"] | None = None, - i: int = 0, + reporter: SingleReporter | MultiReporter ): - if isinstance(prompt_index, int): - val_credences = reporter(val_h[:, [i], :, :]) - train_credences = reporter(train_h[:, [i], :, :]) - else: - val_credences = reporter(val_h) - train_credences = reporter(train_h) + val_credences = reporter(val_h) + train_credences = reporter(train_h) layer_output.append( LayerOutput( val_gt=val_gt.detach(), @@ -56,9 +50,6 @@ def eval_all( meta=meta, ) ) - prompt_index_dict = ( - {"prompt_index": prompt_index} if prompt_index is not None else {} - ) PROMPT_ENSEMBLING = "prompt_ensembling" for prompt_ensembling in PromptEnsembling.all(): row_bufs["eval"].append( @@ -67,7 +58,6 @@ def eval_all( PROMPT_ENSEMBLING: prompt_ensembling.value, **evaluate_preds(val_gt, val_credences, prompt_ensembling).to_dict(), "train_loss": train_loss, - **prompt_index_dict, } ) @@ -77,7 +67,6 @@ def eval_all( PROMPT_ENSEMBLING: prompt_ensembling.value, **evaluate_preds(train_gt, train_credences, prompt_ensembling).to_dict(), "train_loss": train_loss, - **prompt_index_dict, } ) @@ -87,7 +76,6 @@ def eval_all( **meta, PROMPT_ENSEMBLING: prompt_ensembling.value, **evaluate_preds(val_gt, val_lm_preds, prompt_ensembling).to_dict(), - **prompt_index_dict, } ) @@ -97,7 +85,6 @@ def eval_all( **meta, PROMPT_ENSEMBLING: prompt_ensembling.value, **evaluate_preds(train_gt, train_lm_preds, prompt_ensembling).to_dict(), - **prompt_index_dict, } ) @@ -108,16 +95,10 @@ def eval_all( PROMPT_ENSEMBLING: prompt_ensembling.value, "inlp_iter": lr_model_num, **evaluate_preds(val_gt, model(val_h), prompt_ensembling).to_dict(), - **prompt_index_dict, } ) - if isinstance(reporter, MultiReporter): - for i, reporter_result in enumerate(reporter.reporter_w_infos): - eval_all(reporter_result.model, reporter_result.prompt_index, i) - eval_all(reporter, prompt_index="multi") - else: - eval_all(reporter, prompt_index=None) + eval_all(reporter) return LayerApplied( layer_output, {k: pd.DataFrame(v) for k, v in row_bufs.items()} @@ -138,20 +119,6 @@ class Elicit(Run): cross-validation. Defaults to "single", which means to train a single classifier on the training data. "cv" means to use cross-validation.""" - def create_models_dir(self, out_dir: Path): - lr_dir = None - lr_dir = out_dir / "lr_models" - reporter_dir = out_dir / "reporters" - - lr_dir.mkdir(parents=True, exist_ok=True) - reporter_dir.mkdir(parents=True, exist_ok=True) - - # Save the reporter config separately in the reporter directory - # for convenient loading of reporters later. - save(self.net, reporter_dir / "cfg.yaml", save_dc_types=True) - - return reporter_dir, lr_dir - def make_eval(self, model, eval_dataset): assert self.out_dir is not None return Eval( @@ -231,77 +198,6 @@ def train_and_save_reporter( return ReporterWithInfo(reporter, train_loss, prompt_index) - def apply_to_layer( - self, - layer: int, - devices: list[str], - world_size: int, - probe_per_prompt: bool, - ) -> LayerApplied: - """Train a single reporter on a single layer.""" - - self.make_reproducible(seed=self.net.seed + layer) - device = self.get_device(devices, world_size) - - train_dict = self.prepare_data(device, layer, "train") - val_dict = self.prepare_data(device, layer, "val") - - (first_train_h, train_gt, _), *rest = train_dict.values() - (_, v, k, d) = first_train_h.shape - if not all(other_h.shape[-1] == d for other_h, _, _ in rest): - raise ValueError("All datasets must have the same hidden state size") - - # For a while we did support datasets with different numbers of classes, but - # we reverted this once we switched to ConceptEraser. There are a few options - # for re-enabling it in the future but they are somewhat complex and it's not - # clear that it's worth it. - if not all(other_h.shape[-2] == k for other_h, _, _ in rest): - raise ValueError("All datasets must have the same number of classes") - - train_loss = None - if isinstance(self.net, CcsConfig): - assert len(train_dict) == 1, "CCS only supports single-task training" - reporter = CcsReporter(self.net, d, device=device, num_variants=v) - train_loss = reporter.fit(first_train_h) - - if not self.net.norm == "burns": - (_, v, k, _) = first_train_h.shape - reporter.platt_scale( - to_one_hot(repeat(train_gt, "n -> (n v)", v=v), k).flatten(), - rearrange(first_train_h, "n v k d -> (n v k) d"), - ) - - elif isinstance(self.net, EigenFitterConfig): - fitter = EigenFitter( - self.net, d, num_classes=k, num_variants=v, device=device - ) - - hidden_list, label_list = [], [] - for ds_name, (train_h, train_gt, _) in train_dict.items(): - (_, v, _, _) = train_h.shape - - # Datasets can have different numbers of variants, so we need to - # flatten them here before concatenating - hidden_list.append(rearrange(train_h, "n v k d -> (n v k) d")) - label_list.append( - to_one_hot(repeat(train_gt, "n -> (n v)", v=v), k).flatten() - ) - fitter.update(train_h) - - reporter = fitter.fit_streaming() - reporter.platt_scale( - torch.cat(label_list), - torch.cat(hidden_list), - ) - else: - raise ValueError(f"Unknown reporter config type: {type(self.net)}") - - # Save reporter checkpoint to disk - # TODO have to change this - out_dir.mkdir(parents=True, exist_ok=True) - torch.save(reporter, out_dir / f"layer_{layer}.pt") - - return ReporterWithInfo(reporter, train_loss, prompt_index) def train_lr_model(self, train_dict, device, layer, out_dir) -> list[Classifier]: if self.supervised != "none": @@ -325,7 +221,7 @@ def apply_to_layer( devices: list[str], world_size: int, probe_per_prompt: bool, - ) -> dict[str, pd.DataFrame]: + ) -> LayerApplied: """Train a single reporter on a single layer.""" assert self.out_dir is not None # TODO this is really annoying, why can it be # None? From c98e9518bf6e24fa8eb72432723319ca62cb8851 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 10 Aug 2023 15:16:37 +0000 Subject: [PATCH 090/102] fail gracefully --- .gitignore | 3 +++ elk/training/sweep.py | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 6c29ee26e..db5c7e2a3 100644 --- a/.gitignore +++ b/.gitignore @@ -2,9 +2,12 @@ *.npy elk/models/* elk/trained/* +elk/credences nohup.out .idea *.pkl +visual_llama.ipynb +visual_llama.py # scripts for experiments in progress my_*.sh diff --git a/elk/training/sweep.py b/elk/training/sweep.py index 89afce17a..4de6ba348 100755 --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -1,4 +1,4 @@ -from dataclasses import InitVar, dataclass, replace +from dataclasses import InitVar, asdict, dataclass, replace import numpy as np import torch @@ -162,7 +162,12 @@ def execute(self): continue eval = elicit.make_eval(model, eval_dataset) - eval.execute(highlight_color="green") + try: + eval.execute(highlight_color="green") + except Exception as e: + print(e) + print(asdict(eval)) + continue if self.visualize: visualize_sweep(sweep_dir) From c8163c3a8218a687dd720e0747c881626ab5dcd5 Mon Sep 17 00:00:00 2001 From: jon Date: Tue, 15 Aug 2023 16:15:00 +0300 Subject: [PATCH 091/102] add super_full credence; mean over n_eval_prompts * n_trained_reporters credences comment --- elk/evaluation/evaluate.py | 6 +++++- elk/training/multi_reporter.py | 27 +++++++++++++++++++-------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 6395dba5a..8837da024 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -65,7 +65,11 @@ def eval_all( ): for ds_name, (val_h, val_gt, val_lm_preds) in val_output.items(): meta = {"dataset": ds_name, "layer": layer} - val_credences = reporter(val_h) + val_credences = ( + reporter(val_h) + if isinstance(reporter, SingleReporter) + else reporter(val_h, super_full=True) + ) layer_outputs.append(LayerOutput(val_gt, val_credences, meta)) for prompt_ensembling in PromptEnsembling.all(): row_bufs["eval"].append( diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index eb4603b65..5bc1a3a6f 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -34,14 +34,25 @@ def __init__(self, reporter: list[ReporterWithInfo]): else None ) - def __call__(self, h): - n_eval_prompts = h.shape[1] - credences = [ - reporter(h[:, [prompt_index], :, :]) - for reporter in self.models - for prompt_index in range(n_eval_prompts) - ] - return t.stack(credences, dim=0).mean(dim=0) + def __call__(self, h, super_full=False): + if super_full: + n_eval_prompts = h.shape[1] + credences_by_eval_prompt = [ + t.cat([reporter(h[:, [j], :, :]) + for j in range(n_eval_prompts)], dim=1) + for reporter in self.models + ] # self.models * (n, n_eval_prompts, c) + credences = t.stack(credences_by_eval_prompt, dim=0).mean(dim=0) + assert credences.shape == (h.shape[0], n_eval_prompts, h.shape[2]) + return credences + else: + assert h.shape[1] == len(self.models) # somewhat weak check but better than nothing + credences = t.cat([ + reporter(h[:, [i], :, :]) + for i, reporter in enumerate(self.models) + ], dim=1) # credences: (n, v, c) + assert credences.shape == h.shape[:-1] + return credences @staticmethod def load(path: Path, layer: int, device: str): From 165b97a9b1a50b6e6e5d444e3bee2388dd5dd5c7 Mon Sep 17 00:00:00 2001 From: jon Date: Tue, 15 Aug 2023 16:21:00 +0300 Subject: [PATCH 092/102] add prompt eraser toggle to ccsconfig --- elk/training/ccs_reporter.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/elk/training/ccs_reporter.py b/elk/training/ccs_reporter.py index 472417f52..5769e0592 100644 --- a/elk/training/ccs_reporter.py +++ b/elk/training/ccs_reporter.py @@ -60,6 +60,8 @@ class CcsConfig(FitterConfig): """The optimizer to use.""" weight_decay: float = 0.01 """The weight decay or L2 penalty to use.""" + erase_prompts: bool = True + """Whether to apply concept erasure on the prompt template IDs.""" def __post_init__(self): self.loss_dict = parse_loss(self.loss) @@ -205,7 +207,8 @@ def fit(self, hiddens: Tensor) -> float: if self.config.norm == "burns": self.norm = BurnsNorm() else: - fitter = LeaceFitter(d, 2 * v, dtype=x_neg.dtype, device=x_neg.device) + z_dim = 2 * v if self.config.erase_prompts else 2 + fitter = LeaceFitter(d, z_dim, dtype=x_neg.dtype, device=x_neg.device) fitter.update( x=x_neg, # Independent indicator for each (template, pseudo-label) pair From c564a52759ccec74f4183e5d578942b30347a856 Mon Sep 17 00:00:00 2001 From: jon Date: Mon, 28 Aug 2023 23:32:49 +0300 Subject: [PATCH 093/102] added none ensembling --- elk/metrics/eval.py | 52 ++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index b3b9fe60d..be445644d 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -30,6 +30,8 @@ class EvalResult: roc_auc: RocAucResult """Area under the ROC curve. For multi-class classification, each class is treated as a one-vs-rest binary classification problem.""" + cal_thresh: float | None = None + """The threshold used to calibrate the predictions.""" def to_dict(self, prefix: str = "") -> dict[str, float]: """Convert the result to a dictionary.""" @@ -45,14 +47,14 @@ def to_dict(self, prefix: str = "") -> dict[str, float]: else {} ) auroc_dict = {f"{prefix}auroc_{k}": v for k, v in asdict(self.roc_auc).items()} - return {**auroc_dict, **cal_acc_dict, **acc_dict, **cal_dict} + return {**auroc_dict, **cal_acc_dict, **acc_dict, **cal_dict, f"{prefix}cal_thresh": self.cal_thresh} def calc_auroc( - y_logits: Tensor, - y_true: Tensor, - prompt_ensembling: PromptEnsembling, - num_classes: int, + y_logits: Tensor, + y_true: Tensor, + prompt_ensembling: PromptEnsembling, + num_classes: int, ) -> RocAucResult: """ Calculate the AUROC @@ -82,7 +84,7 @@ def calc_auroc( return auroc -def calc_calibrated_accuracies(y_true, pos_probs) -> AccuracyResult: +def calc_calibrated_accuracies(y_true, pos_probs) -> tuple[AccuracyResult, float]: """ Calculate the calibrated accuracies @@ -97,7 +99,7 @@ def calc_calibrated_accuracies(y_true, pos_probs) -> AccuracyResult: cal_thresh = pos_probs.float().quantile(y_true.float().mean()) cal_preds = pos_probs.gt(cal_thresh).to(torch.int) cal_acc = accuracy_ci(y_true, cal_preds) - return cal_acc + return cal_acc, cal_thresh def calc_calibrated_errors(y_true, pos_probs) -> CalibrationEstimate: @@ -117,7 +119,7 @@ def calc_calibrated_errors(y_true, pos_probs) -> CalibrationEstimate: return cal_err -def calc_accuracies(y_logits, y_true) -> AccuracyResult: +def calc_accuracies(y_logits, y_true, ensembling) -> AccuracyResult: """ Calculate the accuracy @@ -128,14 +130,18 @@ def calc_accuracies(y_logits, y_true) -> AccuracyResult: Returns: AccuracyResult: A dictionary containing the accuracy and confidence interval. """ - y_pred = y_logits.argmax(dim=-1) + THRESHOLD = 0.5 + if ensembling == PromptEnsembling.NONE: + y_pred = y_logits[..., 1].gt(THRESHOLD).to(torch.int) + else: + y_pred = y_logits.argmax(dim=-1) return accuracy_ci(y_true, y_pred) def evaluate_preds( - y_true: Tensor, - y_logits: Tensor, - prompt_ensembling: PromptEnsembling = PromptEnsembling.NONE, + y_true: Tensor, + y_logits: Tensor, + prompt_ensembling: PromptEnsembling = PromptEnsembling.NONE, ) -> EvalResult: """ Evaluate the performance of a classification model. @@ -168,10 +174,10 @@ def prepare(y_logits: Tensor, y_true: Tensor, prompt_ensembling: PromptEnsemblin def calc_eval_results( - y_true: Tensor, - y_logits: Tensor, - prompt_ensembling: PromptEnsembling, - num_classes: int, + y_true: Tensor, + y_logits: Tensor, + prompt_ensembling: PromptEnsembling, + num_classes: int, ) -> EvalResult: """ Calculate the evaluation results @@ -185,10 +191,12 @@ def calc_eval_results( EvalResult: The result of evaluating a classifier containing the accuracy, calibrated accuracies, calibrated errors, and AUROC. """ - acc = calc_accuracies(y_logits=y_logits, y_true=y_true) - - pos_probs = torch.sigmoid(y_logits[..., 1] - y_logits[..., 0]) - cal_acc = ( + acc = calc_accuracies(y_logits, y_true, prompt_ensembling) + pooled_logits = (y_logits[..., 1] + if prompt_ensembling == PromptEnsembling.NONE + else y_logits[..., 1] - y_logits[..., 0]) + pos_probs = torch.sigmoid(pooled_logits) + cal_acc, cal_thresh = ( calc_calibrated_accuracies(y_true=y_true, pos_probs=pos_probs) if num_classes == 2 else None @@ -206,7 +214,7 @@ def calc_eval_results( num_classes=num_classes, ) - return EvalResult(acc, cal_acc, cal_err, auroc) + return EvalResult(acc, cal_acc, cal_err, auroc, cal_thresh) def to_one_hot(labels: Tensor, n_classes: int) -> Tensor: @@ -225,7 +233,7 @@ def to_one_hot(labels: Tensor, n_classes: int) -> Tensor: def layer_ensembling( - layer_outputs: list[LayerOutput], prompt_ensembling: PromptEnsembling + layer_outputs: list[LayerOutput], prompt_ensembling: PromptEnsembling ) -> EvalResult: """ Return EvalResult after prompt_ensembling From 346a2bd9f71c49f10a1cdf3d4cdbe81ce921969e Mon Sep 17 00:00:00 2001 From: jon Date: Tue, 29 Aug 2023 19:28:36 +0000 Subject: [PATCH 094/102] try to fix memory issues --- elk/metrics/eval.py | 10 +++++++--- elk/metrics/roc_auc.py | 7 +++++++ elk/training/train.py | 1 + 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index be445644d..f69a8375d 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -249,17 +249,19 @@ def layer_ensembling( calibrated accuracies, calibrated errors, and AUROC. """ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print("layer_ensembling", device) + print("layer_ensembling layer_outputs[0].val_gt.device", layer_outputs[0].val_gt.device) y_logits_collection = [] num_classes = 2 - y_true = layer_outputs[0].val_gt.to(device) + y_true = layer_outputs[0].val_gt for layer_output in layer_outputs: # all y_trues are identical, so just get the first - y_logits = layer_output.val_credences.to(device) + y_logits = layer_output.val_credences y_logits, y_true, num_classes = prepare( y_logits=y_logits, - y_true=layer_outputs[0].val_gt.to(device), + y_true=layer_outputs[0].val_gt, prompt_ensembling=prompt_ensembling, ) y_logits_collection.append(y_logits) @@ -270,6 +272,8 @@ def layer_ensembling( # layer prompt_ensembling of the stacked logits y_logits_stacked_mean = torch.mean(y_logits_stacked, dim=0) + print(y_true.device) + return calc_eval_results( y_true=y_true, y_logits=y_logits_stacked_mean, diff --git a/elk/metrics/roc_auc.py b/elk/metrics/roc_auc.py index 1efce9337..c3644657c 100644 --- a/elk/metrics/roc_auc.py +++ b/elk/metrics/roc_auc.py @@ -37,9 +37,13 @@ def roc_auc(y_true: Tensor, y_pred: Tensor) -> Tensor: f"y_true and y_pred should have the same shape; " f"got {y_true.shape} and {y_pred.shape}" ) + if y_true.dim() not in (1, 2): raise ValueError("y_true and y_pred should be 1D or 2D tensors") + print("y_true roc_auc", y_true.device) + print("y_pred roc_auc", y_pred.device) + # Sort y_pred in descending order and get indices indices = y_pred.argsort(descending=True, dim=-1) @@ -105,6 +109,7 @@ def roc_auc_ci( # Either the number of samples (1D) or the number of clusters (2D) N = y_true.shape[0] device = y_true.device + print("roc_auc_ci y_true.device", y_true.device) # Generate random indices for bootstrap samples (shape: [num_bootstraps, N]) rng = torch.Generator(device=device).manual_seed(seed) @@ -114,6 +119,8 @@ def roc_auc_ci( y_true_bootstraps = y_true[indices] y_pred_bootstraps = y_pred[indices] + print("roc_auc_ci y_true_bootstraps.device", y_true_bootstraps.device) + # Compute ROC AUC scores for bootstrap samples. If the inputs were 2D, the # bootstrapped tensors are now 3D [num_bootstraps, N, cluster_size], so we # call flatten(1) to get a 2D tensor [num_bootstraps, N * cluster_size]. diff --git a/elk/training/train.py b/elk/training/train.py index c4404de42..e35e66b5f 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -143,6 +143,7 @@ def make_eval(self, model, eval_dataset): def train_and_save_reporter( self, device, layer, out_dir, train_dict, prompt_index=None ) -> ReporterWithInfo: + print(f"train_and_save_reporter device {device}") (first_train_h, train_gt, _), *rest = train_dict.values() # TODO can remove? (_, v, k, d) = first_train_h.shape if not all(other_h.shape[-1] == d for other_h, _, _ in rest): From 4c173988772bee0644dde7f8b6281bade85dc0dd Mon Sep 17 00:00:00 2001 From: jon Date: Tue, 29 Aug 2023 20:23:02 +0000 Subject: [PATCH 095/102] fixed autograd --- elk/metrics/eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index f69a8375d..1d0c2c90d 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -96,7 +96,7 @@ def calc_calibrated_accuracies(y_true, pos_probs) -> tuple[AccuracyResult, float AccuracyResult: A dictionary containing the accuracy and confidence interval. """ - cal_thresh = pos_probs.float().quantile(y_true.float().mean()) + cal_thresh = pos_probs.float().quantile(y_true.float().mean()).item() cal_preds = pos_probs.gt(cal_thresh).to(torch.int) cal_acc = accuracy_ci(y_true, cal_preds) return cal_acc, cal_thresh From 1ebdfd72c16ab8d3160c03ecab14c30557e5bfee Mon Sep 17 00:00:00 2001 From: jon Date: Tue, 29 Aug 2023 21:34:03 +0000 Subject: [PATCH 096/102] move to cpu before stacking --- elk/metrics/eval.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 1d0c2c90d..7f3e73c4a 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -233,7 +233,7 @@ def to_one_hot(labels: Tensor, n_classes: int) -> Tensor: def layer_ensembling( - layer_outputs: list[LayerOutput], prompt_ensembling: PromptEnsembling + layer_outputs: list[LayerOutput], prompt_ensembling: PromptEnsembling ) -> EvalResult: """ Return EvalResult after prompt_ensembling @@ -253,6 +253,10 @@ def layer_ensembling( print("layer_ensembling layer_outputs[0].val_gt.device", layer_outputs[0].val_gt.device) y_logits_collection = [] + for layer_output in layer_outputs: + layer_output.val_credences = layer_output.val_credences.to('cpu') + layer_output.val_gt = layer_output.val_gt.to('cpu') + num_classes = 2 y_true = layer_outputs[0].val_gt From a0637c32920c8dcd1fc3a0ab79b17c5e99b071f9 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Sun, 3 Sep 2023 15:50:51 +0200 Subject: [PATCH 097/102] remove printS --- elk/metrics/eval.py | 48 +++++++++++++++++++++++------------------- elk/metrics/roc_auc.py | 7 +----- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 7f3e73c4a..bfa03528f 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -47,14 +47,20 @@ def to_dict(self, prefix: str = "") -> dict[str, float]: else {} ) auroc_dict = {f"{prefix}auroc_{k}": v for k, v in asdict(self.roc_auc).items()} - return {**auroc_dict, **cal_acc_dict, **acc_dict, **cal_dict, f"{prefix}cal_thresh": self.cal_thresh} + return { + **auroc_dict, + **cal_acc_dict, + **acc_dict, + **cal_dict, + f"{prefix}cal_thresh": self.cal_thresh, + } def calc_auroc( - y_logits: Tensor, - y_true: Tensor, - prompt_ensembling: PromptEnsembling, - num_classes: int, + y_logits: Tensor, + y_true: Tensor, + prompt_ensembling: PromptEnsembling, + num_classes: int, ) -> RocAucResult: """ Calculate the AUROC @@ -139,9 +145,9 @@ def calc_accuracies(y_logits, y_true, ensembling) -> AccuracyResult: def evaluate_preds( - y_true: Tensor, - y_logits: Tensor, - prompt_ensembling: PromptEnsembling = PromptEnsembling.NONE, + y_true: Tensor, + y_logits: Tensor, + prompt_ensembling: PromptEnsembling = PromptEnsembling.NONE, ) -> EvalResult: """ Evaluate the performance of a classification model. @@ -174,10 +180,10 @@ def prepare(y_logits: Tensor, y_true: Tensor, prompt_ensembling: PromptEnsemblin def calc_eval_results( - y_true: Tensor, - y_logits: Tensor, - prompt_ensembling: PromptEnsembling, - num_classes: int, + y_true: Tensor, + y_logits: Tensor, + prompt_ensembling: PromptEnsembling, + num_classes: int, ) -> EvalResult: """ Calculate the evaluation results @@ -192,9 +198,11 @@ def calc_eval_results( calibrated accuracies, calibrated errors, and AUROC. """ acc = calc_accuracies(y_logits, y_true, prompt_ensembling) - pooled_logits = (y_logits[..., 1] - if prompt_ensembling == PromptEnsembling.NONE - else y_logits[..., 1] - y_logits[..., 0]) + pooled_logits = ( + y_logits[..., 1] + if prompt_ensembling == PromptEnsembling.NONE + else y_logits[..., 1] - y_logits[..., 0] + ) pos_probs = torch.sigmoid(pooled_logits) cal_acc, cal_thresh = ( calc_calibrated_accuracies(y_true=y_true, pos_probs=pos_probs) @@ -248,14 +256,12 @@ def layer_ensembling( EvalResult: The result of evaluating a classifier containing the accuracy, calibrated accuracies, calibrated errors, and AUROC. """ - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - print("layer_ensembling", device) - print("layer_ensembling layer_outputs[0].val_gt.device", layer_outputs[0].val_gt.device) + torch.device("cuda" if torch.cuda.is_available() else "cpu") y_logits_collection = [] for layer_output in layer_outputs: - layer_output.val_credences = layer_output.val_credences.to('cpu') - layer_output.val_gt = layer_output.val_gt.to('cpu') + layer_output.val_credences = layer_output.val_credences.to("cpu") + layer_output.val_gt = layer_output.val_gt.to("cpu") num_classes = 2 y_true = layer_outputs[0].val_gt @@ -276,8 +282,6 @@ def layer_ensembling( # layer prompt_ensembling of the stacked logits y_logits_stacked_mean = torch.mean(y_logits_stacked, dim=0) - print(y_true.device) - return calc_eval_results( y_true=y_true, y_logits=y_logits_stacked_mean, diff --git a/elk/metrics/roc_auc.py b/elk/metrics/roc_auc.py index c3644657c..4e12622e3 100644 --- a/elk/metrics/roc_auc.py +++ b/elk/metrics/roc_auc.py @@ -37,13 +37,10 @@ def roc_auc(y_true: Tensor, y_pred: Tensor) -> Tensor: f"y_true and y_pred should have the same shape; " f"got {y_true.shape} and {y_pred.shape}" ) - + if y_true.dim() not in (1, 2): raise ValueError("y_true and y_pred should be 1D or 2D tensors") - print("y_true roc_auc", y_true.device) - print("y_pred roc_auc", y_pred.device) - # Sort y_pred in descending order and get indices indices = y_pred.argsort(descending=True, dim=-1) @@ -119,8 +116,6 @@ def roc_auc_ci( y_true_bootstraps = y_true[indices] y_pred_bootstraps = y_pred[indices] - print("roc_auc_ci y_true_bootstraps.device", y_true_bootstraps.device) - # Compute ROC AUC scores for bootstrap samples. If the inputs were 2D, the # bootstrapped tensors are now 3D [num_bootstraps, N, cluster_size], so we # call flatten(1) to get a 2D tensor [num_bootstraps, N * cluster_size]. From f57b63fea798e7d5e8e96ae3f05cae45b13c00d4 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Sun, 3 Sep 2023 17:33:11 +0200 Subject: [PATCH 098/102] remove prints --- elk/metrics/roc_auc.py | 1 - elk/training/train.py | 33 ++++++++++++++++++--------------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/elk/metrics/roc_auc.py b/elk/metrics/roc_auc.py index 4e12622e3..11dbef1c0 100644 --- a/elk/metrics/roc_auc.py +++ b/elk/metrics/roc_auc.py @@ -106,7 +106,6 @@ def roc_auc_ci( # Either the number of samples (1D) or the number of clusters (2D) N = y_true.shape[0] device = y_true.device - print("roc_auc_ci y_true.device", y_true.device) # Generate random indices for bootstrap samples (shape: [num_bootstraps, N]) rng = torch.Generator(device=device).manual_seed(seed) diff --git a/elk/training/train.py b/elk/training/train.py index e35e66b5f..816d26ee9 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -2,7 +2,6 @@ from collections import defaultdict from dataclasses import dataclass, replace -from pathlib import Path from typing import Literal import pandas as pd @@ -12,8 +11,8 @@ from ..evaluation import Eval from ..metrics import evaluate_preds, to_one_hot -from ..run import PreparedData, Run, LayerApplied from ..metrics.eval import LayerOutput +from ..run import LayerApplied, PreparedData, Run from ..training.supervised import train_supervised from ..utils.types import PromptEnsembling from . import Classifier @@ -38,9 +37,7 @@ def evaluate_and_save( train_h, train_gt, train_lm_preds = train_dict[ds_name] meta = {"dataset": ds_name, "layer": layer} - def eval_all( - reporter: SingleReporter | MultiReporter - ): + def eval_all(reporter: SingleReporter | MultiReporter): val_credences = reporter(val_h) train_credences = reporter(train_h) layer_output.append( @@ -56,7 +53,9 @@ def eval_all( { **meta, PROMPT_ENSEMBLING: prompt_ensembling.value, - **evaluate_preds(val_gt, val_credences, prompt_ensembling).to_dict(), + **evaluate_preds( + val_gt, val_credences, prompt_ensembling + ).to_dict(), "train_loss": train_loss, } ) @@ -65,7 +64,9 @@ def eval_all( { **meta, PROMPT_ENSEMBLING: prompt_ensembling.value, - **evaluate_preds(train_gt, train_credences, prompt_ensembling).to_dict(), + **evaluate_preds( + train_gt, train_credences, prompt_ensembling + ).to_dict(), "train_loss": train_loss, } ) @@ -75,7 +76,9 @@ def eval_all( { **meta, PROMPT_ENSEMBLING: prompt_ensembling.value, - **evaluate_preds(val_gt, val_lm_preds, prompt_ensembling).to_dict(), + **evaluate_preds( + val_gt, val_lm_preds, prompt_ensembling + ).to_dict(), } ) @@ -84,7 +87,9 @@ def eval_all( { **meta, PROMPT_ENSEMBLING: prompt_ensembling.value, - **evaluate_preds(train_gt, train_lm_preds, prompt_ensembling).to_dict(), + **evaluate_preds( + train_gt, train_lm_preds, prompt_ensembling + ).to_dict(), } ) @@ -94,15 +99,15 @@ def eval_all( **meta, PROMPT_ENSEMBLING: prompt_ensembling.value, "inlp_iter": lr_model_num, - **evaluate_preds(val_gt, model(val_h), prompt_ensembling).to_dict(), + **evaluate_preds( + val_gt, model(val_h), prompt_ensembling + ).to_dict(), } ) eval_all(reporter) - return LayerApplied( - layer_output, {k: pd.DataFrame(v) for k, v in row_bufs.items()} - ) + return LayerApplied(layer_output, {k: pd.DataFrame(v) for k, v in row_bufs.items()}) @dataclass @@ -143,7 +148,6 @@ def make_eval(self, model, eval_dataset): def train_and_save_reporter( self, device, layer, out_dir, train_dict, prompt_index=None ) -> ReporterWithInfo: - print(f"train_and_save_reporter device {device}") (first_train_h, train_gt, _), *rest = train_dict.values() # TODO can remove? (_, v, k, d) = first_train_h.shape if not all(other_h.shape[-1] == d for other_h, _, _ in rest): @@ -196,7 +200,6 @@ def train_and_save_reporter( return ReporterWithInfo(reporter, train_loss, prompt_index) - def train_lr_model(self, train_dict, device, layer, out_dir) -> list[Classifier]: if self.supervised != "none": lr_models = train_supervised( From 392484c2d7bbd07f125da1f94aa96a467dd1bf9e Mon Sep 17 00:00:00 2001 From: jon Date: Wed, 6 Sep 2023 14:01:27 +0000 Subject: [PATCH 099/102] fix config name blank --- elk/training/sweep.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/elk/training/sweep.py b/elk/training/sweep.py index 4de6ba348..c48be50c0 100755 --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -22,6 +22,8 @@ def assert_models_exist(model_names): def assert_datasets_exist(dataset_names): for dataset_name in dataset_names: ds_name, _, config_name = dataset_name.partition(":") + if not config_name: + config_name = None get_dataset_config_info(ds_name, config_name=config_name) From 64e762ab5b72a93f223dfa1d576372e3ad2c93f6 Mon Sep 17 00:00:00 2001 From: jon Date: Fri, 13 Oct 2023 18:44:42 +0200 Subject: [PATCH 100/102] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci fix merge --- elk/metrics/accuracy.py | 5 ++++- elk/metrics/eval.py | 18 +++++++++++------- pyproject.toml | 4 ++-- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/elk/metrics/accuracy.py b/elk/metrics/accuracy.py index 33b946321..2f9685f8b 100644 --- a/elk/metrics/accuracy.py +++ b/elk/metrics/accuracy.py @@ -14,11 +14,14 @@ class AccuracyResult: """Lower bound of the confidence interval.""" upper: float """Upper bound of the confidence interval.""" + cal_thresh: float | None + """The threshold used to compute the calibrated accuracy.""" def accuracy_ci( y_true: Tensor, y_pred: Tensor, + cal_thresh: float | None = None, *, num_samples: int = 1000, level: float = 0.95, @@ -79,4 +82,4 @@ def accuracy_ci( # Compute the point estimate. Call flatten to ensure that we get a single number # computed across cluster boundaries even if the inputs were clustered. estimate = y_true.flatten().eq(y_pred.flatten()).float().mean().item() - return AccuracyResult(estimate, lower, upper) + return AccuracyResult(estimate, lower, upper, cal_thresh) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 26d3c512d..c77dd09e9 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -33,7 +33,7 @@ class EvalResult: cal_thresh: float | None """The threshold used to compute the calibrated accuracy.""" - def to_dict(self, prefix: str = "") -> dict[str, float]: + def to_dict(self, prefix: str = "") -> dict[str, float | None]: """Convert the result to a dictionary.""" acc_dict = {f"{prefix}acc_{k}": v for k, v in asdict(self.accuracy).items()} cal_acc_dict = ( @@ -89,6 +89,7 @@ def calc_auroc( return auroc + def calc_calibrated_accuracies(y_true, pos_probs) -> AccuracyResult: """ Calculate the calibrated accuracies @@ -101,11 +102,12 @@ def calc_calibrated_accuracies(y_true, pos_probs) -> AccuracyResult: AccuracyResult: A dictionary containing the accuracy and confidence interval. """ - cal_thresh = pos_probs.float().quantile(y_true.float().mean()) + cal_thresh = pos_probs.float().quantile(y_true.float().mean()).item() cal_preds = pos_probs.gt(cal_thresh).to(torch.int) - cal_acc = accuracy_ci(y_true, cal_preds) + cal_acc = accuracy_ci(y_true, cal_preds, cal_thresh) return cal_acc + def calc_calibrated_errors(y_true, pos_probs) -> CalibrationEstimate: """ Calculate the expected calibration error. @@ -122,6 +124,7 @@ def calc_calibrated_errors(y_true, pos_probs) -> CalibrationEstimate: cal_err = cal.compute() return cal_err + def calc_accuracies(y_logits, y_true) -> AccuracyResult: """ Calculate the accuracy @@ -193,10 +196,11 @@ def calc_eval_results( acc = calc_accuracies(y_logits=y_logits, y_true=y_true) pos_probs = torch.sigmoid(y_logits[..., 1] - y_logits[..., 0]) - cal_acc = ( + cal_acc, cal_thresh = ( calc_calibrated_accuracies(y_true=y_true, pos_probs=pos_probs) if num_classes == 2 - else None + else None, + None, ) cal_err = ( calc_calibrated_errors(y_true=y_true, pos_probs=pos_probs) @@ -207,11 +211,11 @@ def calc_eval_results( auroc = calc_auroc( y_logits=y_logits, y_true=y_true, - prompt_ensembling=prompt_ensembling, + ensembling=prompt_ensembling, num_classes=num_classes, ) - return EvalResult(acc, cal_acc, cal_err, auroc) + return EvalResult(acc, cal_acc, cal_err, auroc, cal_thresh) def to_one_hot(labels: Tensor, n_classes: int) -> Tensor: diff --git a/pyproject.toml b/pyproject.toml index f3f16504a..b0a078cde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ # We upstreamed bugfixes for Literal types in 0.1.1 "simple-parsing>=0.1.1", # Version 1.11 introduced Fully Sharded Data Parallel, which we plan to use soon - "torch>=1.11.0", + "torch==2.0", # Doesn't really matter but versions < 4.0 are very very old (pre-2016) "tqdm>=4.0.0", # 4.0 introduced the breaking change of using return_dict=True by default @@ -37,7 +37,7 @@ dependencies = [ # For visualization of results "plotly==5.14.1", "kaleido==0.2.1", - "rich==13.3.5" + "rich" ] version = "0.1.1" From 2cf59688319f691579e2a3bbbfd2c338a8913dc8 Mon Sep 17 00:00:00 2001 From: jon Date: Mon, 16 Oct 2023 16:53:00 +0200 Subject: [PATCH 101/102] fix pyright --- elk/metrics/eval.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/elk/metrics/eval.py b/elk/metrics/eval.py index 56a28afb4..62d42d418 100644 --- a/elk/metrics/eval.py +++ b/elk/metrics/eval.py @@ -204,12 +204,10 @@ def calc_eval_results( else y_logits[..., 1] - y_logits[..., 0] ) pos_probs = torch.sigmoid(pooled_logits) - cal_acc, cal_thresh = ( - calc_calibrated_accuracies(y_true=y_true, pos_probs=pos_probs) - if num_classes == 2 - else None, - None, - ) + if num_classes == 2: + cal_acc, cal_thresh = calc_calibrated_accuracies(y_true=y_true, pos_probs=pos_probs) + else: + cal_acc, cal_thresh = None, None cal_err = ( calc_calibrated_errors(y_true=y_true, pos_probs=pos_probs) if num_classes == 2 From 96338845d81d9a0ffe752473eba4eacf935cb659 Mon Sep 17 00:00:00 2001 From: jon Date: Mon, 16 Oct 2023 17:33:07 +0200 Subject: [PATCH 102/102] delete redundant test --- tests/test_viz.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/test_viz.py b/tests/test_viz.py index fe3214b01..8fb2bc77a 100644 --- a/tests/test_viz.py +++ b/tests/test_viz.py @@ -18,14 +18,14 @@ def setup_fs(fs): return Path(test_dir) -def test_get_model_paths(setup_fs): - test_dir = setup_fs - result = SweepVisualization._get_model_paths(test_dir) - - root = Path(test_dir) - for path in root.rglob("*"): - print(path) - assert len(result) == 3 - assert any([p.name == "llama-13b" for p in result]) - assert any([p.name == "llama-12b" for p in result]) - assert any([p.name == "gpt2-medium" for p in result]) +# def test_get_model_paths(setup_fs): +# test_dir = setup_fs +# result = SweepVisualization._get_model_paths(test_dir) +# +# root = Path(test_dir) +# for path in root.rglob("*"): +# print(path) +# assert len(result) == 3 +# assert any([p.name == "llama-13b" for p in result]) +# assert any([p.name == "llama-12b" for p in result]) +# assert any([p.name == "gpt2-medium" for p in result])