From c79f47d9127092d80a7c973f90bb1c8f5f6f4dae Mon Sep 17 00:00:00 2001 From: Alex Mallen Date: Thu, 27 Apr 2023 06:00:24 +0000 Subject: [PATCH 01/10] reduced reporter filesize by 4x; still unsure why the pickle file stores 1 remaining cov matrix --- elk/training/eigen_reporter.py | 38 +++++++++++++++++++++++++++++++--- elk/training/train.py | 3 +-- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/elk/training/eigen_reporter.py b/elk/training/eigen_reporter.py index fc122b45..ae2e8dcf 100644 --- a/elk/training/eigen_reporter.py +++ b/elk/training/eigen_reporter.py @@ -1,6 +1,7 @@ """An ELK reporter network.""" from dataclasses import dataclass +from pathlib import Path from typing import Optional import torch @@ -67,9 +68,9 @@ class EigenReporter(Reporter): config: EigenReporterConfig - intercluster_cov_M2: Tensor # variance - intracluster_cov: Tensor # invariance - contrastive_xcov_M2: Tensor # negative covariance + intercluster_cov_M2: Tensor | None # variance + intracluster_cov: Tensor | None # invariance + contrastive_xcov_M2: Tensor | None # negative covariance n: Tensor class_means: Tensor | None weight: Tensor @@ -147,13 +148,33 @@ def consistency(self) -> Tensor: def clear(self) -> None: """Clear the running statistics of the reporter.""" + assert ( + self.contrastive_xcov_M2 is not None + and self.intercluster_cov_M2 is not None + and self.intracluster_cov is not None + ), "Covariance matrices have been deleted" self.contrastive_xcov_M2.zero_() self.intracluster_cov.zero_() self.intercluster_cov_M2.zero_() self.n.zero_() + def delete_stats(self) -> None: + """Delete the running covariance matrices. + + This is useful for saving memory when we're done training the reporter. + """ + self.contrastive_xcov_M2 = None + self.intercluster_cov_M2 = None + self.intracluster_cov = None + @torch.no_grad() def update(self, hiddens: Tensor) -> None: + assert ( + self.contrastive_xcov_M2 is not None + and self.intercluster_cov_M2 is not None + and self.intracluster_cov is not None + ), "Covariance matrices have been deleted" + (n, _, k, d) = hiddens.shape # Sanity checks @@ -206,6 +227,11 @@ def update(self, hiddens: Tensor) -> None: def fit_streaming(self, truncated: bool = False) -> float: """Fit the probe using the current streaming statistics.""" inv_weight = 1 - self.config.neg_cov_weight + assert ( + self.contrastive_xcov_M2 is not None + and self.intercluster_cov_M2 is not None + and self.intracluster_cov is not None + ), "Covariance matrices have been deleted" A = ( self.config.var_weight * self.intercluster_cov - inv_weight * self.intracluster_cov @@ -285,3 +311,9 @@ def closure(): return float(loss) opt.step(closure) + + def save(self, path: Path | str, save_stats=False): + # TODO: this method will save separate JSON and PT files + if not save_stats: + self.delete_stats() + super().save(path) diff --git a/elk/training/train.py b/elk/training/train.py index ddecc06f..ccda32bb 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -105,8 +105,7 @@ def apply_to_layer( raise ValueError(f"Unknown reporter config type: {type(self.net)}") # Save reporter checkpoint to disk - with open(reporter_dir / f"layer_{layer}.pt", "wb") as file: - torch.save(reporter, file) + reporter.save(reporter_dir / f"layer_{layer}.pt") # Fit supervised logistic regression model if self.supervised != "none": From dca33b306f4f52eb8943beeb948dabce0b321de2 Mon Sep 17 00:00:00 2001 From: Alex Mallen Date: Thu, 27 Apr 2023 16:41:19 +0000 Subject: [PATCH 02/10] add save_reporter_stats CLA --- elk/run.py | 2 +- elk/training/eigen_reporter.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/elk/run.py b/elk/run.py index 838c228f..e476b651 100644 --- a/elk/run.py +++ b/elk/run.py @@ -41,7 +41,7 @@ class Run(ABC, Serializable): concatenated_layer_offset: int = 0 debug: bool = False - min_gpu_mem: int | None = None + min_gpu_mem: int | None = None # in bytes num_gpus: int = -1 out_dir: Path | None = None disable_cache: bool = field(default=False, to_dict=False) diff --git a/elk/training/eigen_reporter.py b/elk/training/eigen_reporter.py index ae2e8dcf..f1424616 100644 --- a/elk/training/eigen_reporter.py +++ b/elk/training/eigen_reporter.py @@ -29,6 +29,7 @@ class EigenReporterConfig(ReporterConfig): neg_cov_weight: float = 0.5 num_heads: int = 1 + save_reporter_stats: bool = False def __post_init__(self): if not (0 <= self.neg_cov_weight <= 1): @@ -312,8 +313,8 @@ def closure(): opt.step(closure) - def save(self, path: Path | str, save_stats=False): + def save(self, path: Path | str): # TODO: this method will save separate JSON and PT files - if not save_stats: + if not self.config.save_reporter_stats: self.delete_stats() super().save(path) From 9705ffb4b48ee42440dc593732f9fe187a3c75f6 Mon Sep 17 00:00:00 2001 From: Alex Mallen Date: Sat, 29 Apr 2023 01:01:27 +0000 Subject: [PATCH 03/10] sweep hparam_step working --- elk/training/sweep.py | 84 +++++++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 27 deletions(-) diff --git a/elk/training/sweep.py b/elk/training/sweep.py index cfe7d4e4..c55ebc52 100644 --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -1,9 +1,12 @@ from copy import deepcopy from dataclasses import InitVar, dataclass +import numpy as np + from ..evaluation.evaluate import Eval from ..extraction import Extract, PromptConfig from ..files import elk_reporter_dir, memorably_named_dir +from ..training.eigen_reporter import EigenReporterConfig from ..utils import colorize from .train import Elicit @@ -18,6 +21,10 @@ class Sweep: pool SST-2 and IMDB together.""" add_pooled: InitVar[bool] = False """Whether to add a dataset that pools all of the other datasets together.""" + hparam_step: float = -1.0 + """The step size for hyperparameter sweeps. Performs a 2D + sweep over a and b in (var_weight, inv_weight, neg_cov_weight) = (a, 1 - b, b) + If negative, no hyperparameter sweeps will be performed. Only valid for Eigen.""" name: str | None = None @@ -34,6 +41,13 @@ def __post_init__(self, add_pooled: bool): raise ValueError("No datasets specified") if not self.models: raise ValueError("No models specified") + # can only use hparam_step if we're using an eigen net + if self.hparam_step > 0 and not isinstance( + self.run_template.net, EigenReporterConfig + ): + raise ValueError("Can only use hparam_step with EigenReporterConfig") + elif self.hparam_step > 1: + raise ValueError("hparam_step must be in [0, 1]") # Add an additional dataset that pools all of the datasets together. if add_pooled: @@ -62,40 +76,56 @@ def execute(self): } ) + step = self.hparam_step + weights = np.arange(0.0, 1.0 + step, step) if step > 0 else [None] + for i, model_str in enumerate(self.models): # Magenta color for the model name print(f"\n\033[35m===== {model_str} ({i + 1} of {M}) =====\033[0m") for dataset_str in self.datasets: - out_dir = sweep_dir / model_str / dataset_str - # Allow for multiple datasets to be specified in a single string with # plus signs. This means we can pool datasets together inside of a # single sweep. train_datasets = [ds.strip() for ds in dataset_str.split("+")] - run = deepcopy(self.run_template) - run.data.model = model_str - run.data.prompts.datasets = train_datasets - run.out_dir = out_dir - run.execute() - - if len(eval_datasets) > 1: - print(colorize("== Transfer eval ==", "green")) - - # Now evaluate the reporter on the other datasets - for eval_dataset in eval_datasets: - # We already evaluated on this one during training - if eval_dataset in train_datasets: - continue - - data = deepcopy(run.data) - data.model = model_str - data.prompts.datasets = [eval_dataset] - - eval = Eval( - data=data, - source=str(run.out_dir), - out_dir=out_dir, - ) - eval.execute(highlight_color="green") + for var_weight in weights: + for neg_cov_weight in weights: + out_dir = sweep_dir / model_str / dataset_str + + run = deepcopy(self.run_template) + run.data.model = model_str + run.data.prompts.datasets = train_datasets + if var_weight is not None and neg_cov_weight is not None: + assert isinstance(run.net, EigenReporterConfig) + run.net.var_weight = var_weight + run.net.neg_cov_weight = neg_cov_weight + + # Add hyperparameter values to output directory if needed + out_dir /= ( + f"var_weight={var_weight}" + "_neg_cov_weight={neg_cov_weight}" + ) + + run.out_dir = out_dir + run.execute() + + if len(eval_datasets) > 1: + print(colorize("== Transfer eval ==", "green")) + + # Now evaluate the reporter on the other datasets + for eval_dataset in eval_datasets: + # We already evaluated on this one during training + if eval_dataset in train_datasets: + continue + + data = deepcopy(run.data) + data.model = model_str + data.prompts.datasets = [eval_dataset] + + eval = Eval( + data=data, + source=str(run.out_dir), + out_dir=out_dir, + ) + eval.execute(highlight_color="green") From bbebfc011b87d99d4c46ce1ac5c4e0c3e210e3db Mon Sep 17 00:00:00 2001 From: Alex Mallen Date: Sat, 29 Apr 2023 06:02:51 +0000 Subject: [PATCH 04/10] fix out_dir naming, skipping supervised --- .pre-commit-config.yaml | 0 elk/training/sweep.py | 3 ++- 2 files changed, 2 insertions(+), 1 deletion(-) mode change 100644 => 100755 .pre-commit-config.yaml mode change 100644 => 100755 elk/training/sweep.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml old mode 100644 new mode 100755 diff --git a/elk/training/sweep.py b/elk/training/sweep.py old mode 100644 new mode 100755 index c55ebc52..84bcafcc --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -104,7 +104,7 @@ def execute(self): # Add hyperparameter values to output directory if needed out_dir /= ( f"var_weight={var_weight}" - "_neg_cov_weight={neg_cov_weight}" + f"_neg_cov_weight={neg_cov_weight}" ) run.out_dir = out_dir @@ -127,5 +127,6 @@ def execute(self): data=data, source=str(run.out_dir), out_dir=out_dir, + skip_supervised=run.supervised == "none", ) eval.execute(highlight_color="green") From 4352986be22d998369bf492bd590f5ede1df5ed3 Mon Sep 17 00:00:00 2001 From: Alex Mallen Date: Sat, 29 Apr 2023 19:34:28 +0000 Subject: [PATCH 05/10] use regular eig as backup for eigh --- elk/training/eigen_reporter.py | 24 +++++++++------- elk/training/sweep.py | 52 ++++++++++++++++++++-------------- 2 files changed, 44 insertions(+), 32 deletions(-) diff --git a/elk/training/eigen_reporter.py b/elk/training/eigen_reporter.py index f1424616..562f0291 100644 --- a/elk/training/eigen_reporter.py +++ b/elk/training/eigen_reporter.py @@ -244,16 +244,20 @@ def fit_streaming(self, truncated: bool = False) -> float: else: try: L, Q = torch.linalg.eigh(A) - except torch.linalg.LinAlgError as e: - # Check if the matrix has non-finite values - if not A.isfinite().all(): - raise ValueError( - "Fitting the reporter failed because the VINC matrix has " - "non-finite entries. Usually this means the hidden states " - "themselves had non-finite values." - ) from e - else: - raise e + except torch.linalg.LinAlgError: + try: + L, Q = torch.linalg.eig(A) + L, Q = L.real, Q.real + except torch.linalg.LinAlgError as e: + # Check if the matrix has non-finite values + if not A.isfinite().all(): + raise ValueError( + "Fitting the reporter failed because the VINC matrix has " + "non-finite entries. Usually this means the hidden states " + "themselves had non-finite values." + ) from e + else: + raise e L, Q = L[-self.config.num_heads :], Q[:, -self.config.num_heads :] diff --git a/elk/training/sweep.py b/elk/training/sweep.py index 84bcafcc..f936d492 100755 --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -2,6 +2,7 @@ from dataclasses import InitVar, dataclass import numpy as np +import torch from ..evaluation.evaluate import Eval from ..extraction import Extract, PromptConfig @@ -25,6 +26,8 @@ class Sweep: """The step size for hyperparameter sweeps. Performs a 2D sweep over a and b in (var_weight, inv_weight, neg_cov_weight) = (a, 1 - b, b) If negative, no hyperparameter sweeps will be performed. Only valid for Eigen.""" + skip_transfer_eval: bool = False + """Whether to perform transfer eval on every pair of datasets.""" name: str | None = None @@ -108,25 +111,30 @@ def execute(self): ) run.out_dir = out_dir - run.execute() - - if len(eval_datasets) > 1: - print(colorize("== Transfer eval ==", "green")) - - # Now evaluate the reporter on the other datasets - for eval_dataset in eval_datasets: - # We already evaluated on this one during training - if eval_dataset in train_datasets: - continue - - data = deepcopy(run.data) - data.model = model_str - data.prompts.datasets = [eval_dataset] - - eval = Eval( - data=data, - source=str(run.out_dir), - out_dir=out_dir, - skip_supervised=run.supervised == "none", - ) - eval.execute(highlight_color="green") + try: + run.execute() + except torch._C._LinAlgError as e: # type: ignore + print(colorize(f"LinAlgError: {e}", "red")) + continue + + if not self.skip_transfer_eval: + if len(eval_datasets) > 1: + print(colorize("== Transfer eval ==", "green")) + + # Now evaluate the reporter on the other datasets + for eval_dataset in eval_datasets: + # We already evaluated on this one during training + if eval_dataset in train_datasets: + continue + + data = deepcopy(run.data) + data.model = model_str + data.prompts.datasets = [eval_dataset] + + eval = Eval( + data=data, + source=str(run.out_dir), + out_dir=out_dir, + skip_supervised=run.supervised == "none", + ) + eval.execute(highlight_color="green") From 60f6509e4a664c92abcec325bc0d9bc64a5a8706 Mon Sep 17 00:00:00 2001 From: Alex Mallen Date: Sun, 30 Apr 2023 19:55:16 +0000 Subject: [PATCH 06/10] split hparam directories vertically --- elk/training/sweep.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/elk/training/sweep.py b/elk/training/sweep.py index f936d492..22e0426b 100755 --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -105,10 +105,8 @@ def execute(self): run.net.neg_cov_weight = neg_cov_weight # Add hyperparameter values to output directory if needed - out_dir /= ( - f"var_weight={var_weight}" - f"_neg_cov_weight={neg_cov_weight}" - ) + out_dir /= f"var_weight={var_weight}" + out_dir /= f"neg_cov_weight={neg_cov_weight}" run.out_dir = out_dir try: From 2817f15ce13fa3ca0278b86d99debb931608b176 Mon Sep 17 00:00:00 2001 From: Alex Mallen Date: Mon, 1 May 2023 06:33:59 +0000 Subject: [PATCH 07/10] remove duplicate line --- elk/training/sweep.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/elk/training/sweep.py b/elk/training/sweep.py index a57a7b0a..1586b652 100755 --- a/elk/training/sweep.py +++ b/elk/training/sweep.py @@ -83,9 +83,6 @@ def execute(self): step = self.hparam_step weights = np.arange(0.0, 1.0 + step, step) if step > 0 else [None] - step = self.hparam_step - weights = np.arange(0.0, 1.0 + step, step) if step > 0 else [None] - for i, model in enumerate(self.models): print(colorize(f"===== {model} ({i + 1} of {M}) =====", "magenta")) From b0193fa93a22d4af9e3013f596af684940e6d208 Mon Sep 17 00:00:00 2001 From: Alex Mallen Date: Mon, 1 May 2023 15:45:42 +0000 Subject: [PATCH 08/10] fix num_variants arg --- elk/extraction/extraction.py | 1 + 1 file changed, 1 insertion(+) diff --git a/elk/extraction/extraction.py b/elk/extraction/extraction.py index 5446cd5c..cecc1cea 100644 --- a/elk/extraction/extraction.py +++ b/elk/extraction/extraction.py @@ -193,6 +193,7 @@ def extract_hiddens( binarize=cfg.binarize, split_type=split_type, template_path=cfg.template_path, + num_variants=cfg.num_variants, rank=rank, world_size=world_size, ) From ae85dbaf8d97acad8bdde39fa7494972c99f620e Mon Sep 17 00:00:00 2001 From: Alex Mallen Date: Mon, 1 May 2023 15:56:57 +0000 Subject: [PATCH 09/10] also pass num_shots into prompt_loader --- elk/extraction/extraction.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/elk/extraction/extraction.py b/elk/extraction/extraction.py index cecc1cea..17a42ffa 100644 --- a/elk/extraction/extraction.py +++ b/elk/extraction/extraction.py @@ -191,9 +191,10 @@ def extract_hiddens( prompt_ds = load_prompts( ds_names[0], binarize=cfg.binarize, + num_shots=cfg.num_shots, + num_variants=cfg.num_variants, split_type=split_type, template_path=cfg.template_path, - num_variants=cfg.num_variants, rank=rank, world_size=world_size, ) From c5980a40d7450c90bb036d6b255faa9ce87cd9d2 Mon Sep 17 00:00:00 2001 From: Nora Belrose Date: Fri, 5 May 2023 05:18:37 +0000 Subject: [PATCH 10/10] Remove superfluous assert --- elk/training/eigen_reporter.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/elk/training/eigen_reporter.py b/elk/training/eigen_reporter.py index b0be4bf4..d336dd0a 100644 --- a/elk/training/eigen_reporter.py +++ b/elk/training/eigen_reporter.py @@ -168,12 +168,6 @@ def consistency(self) -> Tensor: @torch.no_grad() def update(self, hiddens: Tensor) -> None: - assert ( - self.contrastive_xcov_M2 is not None - and self.intercluster_cov_M2 is not None - and self.intracluster_cov is not None - ), "Covariance matrices have been deleted" - (n, _, k, d) = hiddens.shape # Sanity checks @@ -231,11 +225,6 @@ def update(self, hiddens: Tensor) -> None: def fit_streaming(self, truncated: bool = False) -> float: """Fit the probe using the current streaming statistics.""" inv_weight = 1 - self.config.neg_cov_weight - assert ( - self.contrastive_xcov_M2 is not None - and self.intercluster_cov_M2 is not None - and self.intracluster_cov is not None - ), "Covariance matrices have been deleted" A = ( self.config.var_weight * self.intercluster_cov - inv_weight * self.intracluster_cov