Refactor & rename lanczos_eigsh for convergence, correctness, & speed (…

…EleutherAI#164) * Use a different default for ncv; throw an error when not converged * truncated_eigh now works * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
michaelbyun · Apr 7, 2023 · 51fab16 · 51fab16
1 parent be4980c
commit 51fab16
Show file tree

Hide file tree

Showing 6 changed files with 310 additions and 272 deletions.
diff --git a/elk/__init__.py b/elk/__init__.py
@@ -1,3 +1,11 @@
 from .extraction import Extract, extract_hiddens
+from .training import EigenReporter, EigenReporterConfig
+from .truncated_eigh import truncated_eigh
 
-__all__ = ["extract_hiddens", "Extract"]
+__all__ = [
+ "EigenReporter",
+ "EigenReporterConfig",
+ "extract_hiddens",
+ "Extract",
+ "truncated_eigh",
+]
diff --git a/elk/eigsh.py b/elk/eigsh.py
diff --git a/elk/training/eigen_reporter.py b/elk/training/eigen_reporter.py
@@ -2,12 +2,13 @@
 
 from dataclasses import dataclass
 from typing import Optional
+from warnings import warn
 
 import torch
 from torch import Tensor, nn, optim
 
-from ..eigsh import lanczos_eigsh
 from ..math_util import cov_mean_fused
+from ..truncated_eigh import ConvergenceError, truncated_eigh
 from .reporter import Reporter, ReporterConfig
 
 
@@ -169,20 +170,26 @@ def update(self, x_pos: Tensor, x_neg: Tensor) -> None:
  self.contrastive_xcov_M2.addmm_(neg_delta.mT, pos_delta2)
  self.contrastive_xcov_M2.addmm_(pos_delta.mT, neg_delta2)
 
- def fit_streaming(self, warm_start: bool = False) -> float:
+ def fit_streaming(self) -> float:
  """Fit the probe using the current streaming statistics."""
  A = (
  self.config.var_weight * self.intercluster_cov
  - self.config.inv_weight * self.intracluster_cov
  - self.config.neg_cov_weight * self.contrastive_xcov
  )
- v0 = self.weight.T.squeeze() if warm_start else None
 
- # We use "LA" (largest algebraic) instead of "LM" (largest magnitude) to
- # ensure that the eigenvalue is positive and not a large negative one
- L, Q = lanczos_eigsh(A, k=self.config.num_heads, v0=v0, which="LA")
- self.weight.data = Q.T
+ try:
+ L, Q = truncated_eigh(A, k=self.config.num_heads)
+ except ConvergenceError:
+ warn(
+ "Truncated eigendecomposition failed to converge. Falling back on "
+ "PyTorch's dense eigensolver."
+ )
+
+ L, Q = torch.linalg.eigh(A)
+ L, Q = L[-self.config.num_heads :], Q[:, -self.config.num_heads :]
 
+ self.weight.data = Q.T
  return -float(L[-1])
 
  def fit(