setting up sae_lens package and auto-deploy with semantic-release

jbloomAus · Apr 5, 2024 · ba41f32 · ba41f32
1 parent 773bc02
commit ba41f32
Show file tree

Hide file tree

Showing 51 changed files with 2,407 additions and 2,294 deletions.
diff --git a/.flake8 b/.flake8
@@ -5,4 +5,4 @@ max-complexity = 25
 extend-select = E9, F63, F7, F82
 show-source = true
 statistics = true
-exclude = ./sae_training/geom_median/, ./wandb/*, ./research/wandb/*
+exclude = ./wandb/*, ./research/wandb/*
diff --git a/.github/workflows/tests.yml → .github/workflows/build.yml b/.github/workflows/tests.yml → .github/workflows/build.yml
@@ -54,9 +54,43 @@ jobs:
  run: poetry run pyright
  - name: Run Unit Tests
  # Would use make, but want cov report in xml format
- run: poetry run pytest -v --cov=sae_training/ --cov-report=term-missing --cov-branch tests/unit --cov-report=xml
+ run: poetry run pytest -v --cov=sae_lens/training/ --cov-report=term-missing --cov-branch tests/unit --cov-report=xml
  - name: Upload coverage reports to Codecov
  uses: codecov/[email protected]
  with:
  token: ${{ secrets.CODECOV_TOKEN }}
  slug: jbloomAus/mats_sae_training
+
+ release:
+
+ needs: build
+ permissions:
+ contents: write
+ id-token: write
+ # https://github.community/t/how-do-i-specify-job-dependency-running-in-another-workflow/16482
+ if: github.event_name == 'push' && github.ref == 'refs/heads/main' && !contains(github.event.head_commit.message, 'chore(release):')
+ runs-on: ubuntu-latest
+ concurrency: release
+ environment:
+ name: pypi
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.11"
+ - name: Semantic Release
+ id: release
+ uses: python-semantic-release/[email protected]
+ with:
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ - name: Publish package distributions to PyPI
+ uses: pypa/gh-action-pypi-publish@release/v1
+ if: steps.release.outputs.released == 'true'
+ - name: Publish package distributions to GitHub Releases
+ uses: python-semantic-release/upload-to-gh-release@main
+ if: steps.release.outputs.released == 'true'
+ with:
+ github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/README.md b/README.md
@@ -1,12 +1,12 @@
 <img width="1308" alt="Screenshot 2024-03-21 at 3 08 28 pm" src="https://github.com/jbloomAus/mats_sae_training/assets/69127271/209012ec-a779-4036-b4be-7b7739ea87f6">
 
-# MATS SAE Training
+# SAELens Training
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![build](https://github.com/jbloomAus/mats_sae_training/actions/workflows/tests.yml/badge.svg)](https://github.com/jbloomAus/mats_sae_training/actions/workflows/tests.yml)
 [![Deploy Docs](https://github.com/jbloomAus/mats_sae_training/actions/workflows/deploy_docs.yml/badge.svg)](https://github.com/jbloomAus/mats_sae_training/actions/workflows/deploy_docs.yml)
 [![codecov](https://codecov.io/gh/jbloomAus/mats_sae_training/graph/badge.svg?token=N83NGH8CGE)](https://codecov.io/gh/jbloomAus/mats_sae_training)
 
-The MATS SAE training codebase (we'll rename it soon) exists to help researchers:
+The SAELens training codebase (we'll rename it soon) exists to help researchers:
 - Train sparse autoencoders.
 - Analyse sparse autoencoders and neural network internals.
 - Generate insights which make it easier to create safe and aligned AI systems.
@@ -27,7 +27,7 @@ poetry install
 
 ```python
 import torch 
-from sae_training.utils import LMSparseAutoencoderSessionloader
+from sae_lens import LMSparseAutoencoderSessionloader
 from huggingface_hub import hf_hub_download
 
 layer = 8 # pick a layer you want.
@@ -88,8 +88,8 @@ Making the code accessible: This involves tasks like turning the code base into
 
 The codebase contains 2 folders worth caring about:
 
-- sae_training: The main body of the code is here. Everything required for training SAEs. 
-- sae_analysis: This code is mainly house the feature visualizer code we use to generate dashboards. It was written by Callum McDougal but I've ported it here with permission and edited it to work with a few different activation types. 
+- training: The main body of the code is here. Everything required for training SAEs. 
+- analysis: This code is mainly house the feature visualizer code we use to generate dashboards. It was written by Callum McDougal but I've ported it here with permission and edited it to work with a few different activation types. 
 
 Some other folders:
 
@@ -123,8 +123,8 @@ import sys
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 os.environ["WANDB__SERVICE_WAIT"] = "300"
 
-from sae_training.config import LanguageModelSAERunnerConfig
-from sae_training.lm_runner import language_model_sae_runner
+from sae_lens.training.config import LanguageModelSAERunnerConfig
+from sae_lens.training.lm_runner import language_model_sae_runner
 
 cfg = LanguageModelSAERunnerConfig(
 
@@ -186,7 +186,7 @@ Once your SAE is trained, the final SAE weights will be saved to wandb and are l
 - An activations loader: from which you can get randomly sampled activations or batches of tokens from the dataset you used to train the SAE. (more on this in the tutorial)
 
 ```python
-from sae_training.utils import LMSparseAutoencoderSessionloader
+from sae_lens import LMSparseAutoencoderSessionloader
 
 path ="path/to/sparse_autoencoder.pt"
 model, sparse_autoencoder, activations_loader = LMSparseAutoencoderSessionloader.load_session_from_pretrained(

diff --git a/docs/about/citation.md b/docs/about/citation.md
@@ -2,7 +2,7 @@
 
 ```
 @misc{bloom2024saetrainingcodebase,
- title = {MATS SAE Training
+ title = {SAELens Training
  author = {Joseph Bloom},
  year = {2024},
  howpublished = {\url{}},

diff --git a/docs/index.md b/docs/index.md
@@ -1,12 +1,12 @@
 <img width="1308" alt="Screenshot 2024-03-21 at 3 08 28 pm" src="https://github.com/jbloomAus/mats_sae_training/assets/69127271/209012ec-a779-4036-b4be-7b7739ea87f6">
 
-# MATS SAE Training
+# SAELens
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![build](https://github.com/jbloomAus/mats_sae_training/actions/workflows/tests.yml/badge.svg)](https://github.com/jbloomAus/mats_sae_training/actions/workflows/tests.yml)
 [![Deploy Docs](https://github.com/jbloomAus/mats_sae_training/actions/workflows/deploy_docs.yml/badge.svg)](https://github.com/jbloomAus/mats_sae_training/actions/workflows/deploy_docs.yml)
 [![codecov](https://codecov.io/gh/jbloomAus/mats_sae_training/graph/badge.svg?token=N83NGH8CGE)](https://codecov.io/gh/jbloomAus/mats_sae_training)
 
-The MATS SAE training codebase (we'll rename it soon) exists to help researchers:
+The SAELens training codebase exists to help researchers:
 
 - Train sparse autoencoders.
 - Analyse sparse autoencoders and neural network internals.
@@ -16,12 +16,10 @@ The MATS SAE training codebase (we'll rename it soon) exists to help researchers
 
 ## Quick Start
 
-### Set Up
-
-This project uses [Poetry](https://python-poetry.org/) for dependency management. Ensure Poetry is installed, then to install the dependencies, run:
+### Installation
 
 ```
-poetry install
+pip install sae-lens
 ```
 
 ### Loading Sparse Autoencoders from Huggingface
@@ -30,7 +28,7 @@ poetry install
 
 ```python
 import torch 
-from sae_training.utils import LMSparseAutoencoderSessionloader
+from sae_lens import LMSparseAutoencoderSessionloader
 from huggingface_hub import hf_hub_download
 
 layer = 8 # pick a layer you want.
@@ -61,8 +59,8 @@ We highly recommend this [tutorial](https://www.lesswrong.com/posts/LnHowHgmrMbW
 
 The codebase contains 2 folders worth caring about:
 
-- sae_training: The main body of the code is here. Everything required for training SAEs. 
-- sae_analysis: This code is mainly house the feature visualizer code we use to generate dashboards. It was written by Callum McDougal but I've ported it here with permission and edited it to work with a few different activation types. 
+- training: The main body of the code is here. Everything required for training SAEs. 
+- analysis: This code is mainly house the feature visualizer code we use to generate dashboards. It was written by Callum McDougal but I've ported it here with permission and edited it to work with a few different activation types. 
 
 Some other folders:
 
@@ -78,7 +76,7 @@ Once your SAE is trained, the final SAE weights will be saved to wandb and are l
 - An activations loader: from which you can get randomly sampled activations or batches of tokens from the dataset you used to train the SAE. (more on this in the tutorial)
 
 ```python
-from sae_training.utils import LMSparseAutoencoderSessionloader
+from sae_lens import LMSparseAutoencoderSessionloader
 
 path ="path/to/sparse_autoencoder.pt"
 model, sparse_autoencoder, activations_loader = LMSparseAutoencoderSessionloader.load_session_from_pretrained(

diff --git a/docs/installation.md b/docs/installation.md
@@ -1,7 +1,7 @@
 # Installation 
 
-This project uses [Poetry](https://python-poetry.org/) for dependency management. Ensure Poetry is installed, then to install the dependencies, run:
+This package is available on PyPI. You can install it via pip:
 
 ```
-poetry install
+pip install sae-lens
 ```
diff --git a/docs/reference/language_models.md b/docs/reference/language_models.md
@@ -1,9 +1,9 @@
 # Language Models 
 
-::: sae_training.lm_runner
+::: sae_lens.training.lm_runner
 
-::: sae_training.train_sae_on_language_model
+::: sae_lens.training.train_sae_on_language_model
 
-::: sae_training.sparse_autoencoder
+::: sae_lens.training.sparse_autoencoder
 
-::: sae_training.activations_store
+::: sae_lens.training.activations_store
diff --git a/docs/reference/misc.md b/docs/reference/misc.md
@@ -1,7 +1,7 @@
 # Misc
 
-::: sae_training.config
+::: sae_lens.training.config
 
-::: sae_training.utils
+::: sae_lens.training.session_loader
 
-::: sae_training.optim
+::: sae_lens.training.optim
diff --git a/docs/reference/runners.md b/docs/reference/runners.md
@@ -1,3 +1,3 @@
 # Runners
 
-::: sae_training.lm_runner
+::: sae_lens.training.lm_runner
diff --git a/docs/reference/toy_models.md b/docs/reference/toy_models.md
@@ -1,6 +1,6 @@
 
-::: sae_training.train_sae_on_toy_model
+::: sae_lens.training.train_sae_on_toy_model
 
-::: sae_training.toy_model_runner
+::: sae_lens.training.toy_model_runner
 
-::: sae_training.toy_models
+::: sae_lens.training.toy_models
diff --git a/docs/training_saes.md b/docs/training_saes.md
@@ -23,8 +23,7 @@ import sys
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 os.environ["WANDB__SERVICE_WAIT"] = "300"
 
-from sae_training.config import LanguageModelSAERunnerConfig
-from sae_training.lm_runner import language_model_sae_runner
+from sae_lens import LanguageModelSAERunnerConfig, language_model_sae_runner
 
 cfg = LanguageModelSAERunnerConfig(
 

diff --git a/makefile b/makefile
@@ -15,10 +15,10 @@ test:
  make acceptance-test
 
 unit-test:
- poetry run pytest -v --cov=sae_training/ --cov-report=term-missing --cov-branch tests/unit
+ poetry run pytest -v --cov=sae_lens/training/ --cov-report=term-missing --cov-branch tests/unit
 
 acceptance-test:
- poetry run pytest -v --cov=sae_training/ --cov-report=term-missing --cov-branch tests/acceptance
+ poetry run pytest -v --cov=sae_lens/training/ --cov-report=term-missing --cov-branch tests/acceptance
 
 check-ci:
  make check-format

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -1,4 +1,4 @@
-site_name: MATS SAE Training
+site_name: SAELens Training
 site_description: Docs for Sparse Autoencoder Training Library
 site_author: Joseph Bloom
 repo_url: http:https://github.com/jbloomAus/mats_sae_training/
@@ -63,8 +63,7 @@ plugins:
  - mkdocstrings:
  custom_templates: null
  watch:
- - sae_training/ # Replace with the path to your Python code
- - sae_analysis/ # Replace with the path to your Python code
+ - sae_lens/ # Replace with the path to your Python code
 
 
 markdown_extensions:

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,10 +1,10 @@
 [tool.poetry]
-name = "mats_sae_training"
+name = "sae-lens"
 version = "0.1.0"
-description = "Training Sparse Autoencoders (SAEs)"
+description = "Training and Analyzing Sparse Autoencoders (SAEs)"
 authors = ["Joseph Bloom"]
 readme = "README.md"
-packages = [{include = "sae_analysis"}, {include = "sae_training"}]
+packages = [{include = "sae_lens"}]
 
 [tool.poetry.dependencies]
 python = "^3.10"
@@ -42,8 +42,6 @@ pyright = "^1.1.351"
 profile = "black"
 
 [tool.pyright]
-exclude = ["./sae_training/geom_median/"]
-
 typeCheckingMode = "strict"
 reportMissingTypeStubs = "none"
 reportUnknownMemberType = "none"
@@ -59,3 +57,12 @@ reportPrivateUsage = "none"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
+
+
+[tool.semantic_release]
+version_variables = [
+ "sae_lens/__init__.py:__version__",
+ "pyproject.toml:version",
+]
+branch = "main"
+build_command = "pip install poetry && poetry build"
diff --git a/sae_lens/__init__.py b/sae_lens/__init__.py
@@ -0,0 +1,24 @@
+__version__ = "0.1.0"
+
+from .training.activations_store import ActivationsStore
+from .training.cache_activations_runner import cache_activations_runner
+from .training.config import CacheActivationsRunnerConfig, LanguageModelSAERunnerConfig
+from .training.evals import run_evals
+from .training.lm_runner import language_model_sae_runner
+from .training.sae_group import SAEGroup
+from .training.session_loader import LMSparseAutoencoderSessionloader
+from .training.sparse_autoencoder import SparseAutoencoder
+from .training.train_sae_on_language_model import train_sae_group_on_language_model
+
+__all__ = [
+ "LanguageModelSAERunnerConfig",
+ "CacheActivationsRunnerConfig",
+ "LMSparseAutoencoderSessionloader",
+ "SparseAutoencoder",
+ "SAEGroup",
+ "run_evals",
+ "language_model_sae_runner",
+ "cache_activations_runner",
+ "ActivationsStore",
+ "train_sae_group_on_language_model",
+]
diff --git a/sae_analysis/__init__.py → sae_lens/analysis/__init__.py b/sae_analysis/__init__.py → sae_lens/analysis/__init__.py
diff --git a/sae_analysis/dashboard_runner.py → sae_lens/analysis/dashboard_runner.py b/sae_analysis/dashboard_runner.py → sae_lens/analysis/dashboard_runner.py
@@ -17,7 +17,7 @@
 from tqdm import tqdm
 
 import wandb
-from sae_training.utils import LMSparseAutoencoderSessionloader
+from sae_lens.training.session_loader import LMSparseAutoencoderSessionloader
 
 
 class DashboardRunner:

diff --git a/sae_analysis/feature_statistics.py → sae_lens/analysis/feature_statistics.py b/sae_analysis/feature_statistics.py → sae_lens/analysis/feature_statistics.py
@@ -3,7 +3,7 @@
 from tqdm import tqdm
 from transformer_lens import HookedTransformer
 
-from sae_training.sparse_autoencoder import SparseAutoencoder
+from sae_lens.training.sparse_autoencoder import SparseAutoencoder
 
 
 @torch.no_grad()

diff --git a/sae_analysis/neuronpedia_runner.py → sae_lens/analysis/neuronpedia_runner.py b/sae_analysis/neuronpedia_runner.py → sae_lens/analysis/neuronpedia_runner.py
@@ -13,7 +13,7 @@
 from sae_vis.data_storing_fns import FeatureVisParams
 from tqdm import tqdm
 
-from sae_training.utils import LMSparseAutoencoderSessionloader
+from sae_lens.training.session_loader import LMSparseAutoencoderSessionloader
 
 OUT_OF_RANGE_TOKEN = "<|outofrange|>"
 

diff --git a/sae_analysis/toolkit.py → sae_lens/analysis/toolkit.py b/sae_analysis/toolkit.py → sae_lens/analysis/toolkit.py
@@ -3,7 +3,7 @@
 import torch
 from huggingface_hub import hf_hub_download
 
-from sae_training.sparse_autoencoder import SparseAutoencoder
+from sae_lens.training.sparse_autoencoder import SparseAutoencoder
 
 
 def get_all_gpt2_small_saes() -> (

diff --git a/sae_analysis/tsea.py → sae_lens/analysis/tsea.py b/sae_analysis/tsea.py → sae_lens/analysis/tsea.py
diff --git a/sae_training/__init__.py → sae_lens/training/__init__.py b/sae_training/__init__.py → sae_lens/training/__init__.py
diff --git a/sae_training/activations_store.py → sae_lens/training/activations_store.py b/sae_training/activations_store.py → sae_lens/training/activations_store.py
diff --git a/sae_training/cache_activations_runner.py → ...lens/training/cache_activations_runner.py b/sae_training/cache_activations_runner.py → ...lens/training/cache_activations_runner.py
@@ -5,9 +5,9 @@
 from tqdm import tqdm
 from transformer_lens import HookedTransformer
 
-from sae_training.activations_store import ActivationsStore
-from sae_training.config import CacheActivationsRunnerConfig
-from sae_training.utils import shuffle_activations_pairwise
+from sae_lens.training.activations_store import ActivationsStore
+from sae_lens.training.config import CacheActivationsRunnerConfig
+from sae_lens.training.utils import shuffle_activations_pairwise
 
 
 def cache_activations_runner(cfg: CacheActivationsRunnerConfig):

diff --git a/sae_training/config.py → sae_lens/training/config.py b/sae_training/config.py → sae_lens/training/config.py
diff --git a/sae_training/evals.py → sae_lens/training/evals.py b/sae_training/evals.py → sae_lens/training/evals.py
@@ -8,8 +8,8 @@
 from transformer_lens.utils import get_act_name
 
 import wandb
-from sae_training.activations_store import ActivationsStore
-from sae_training.sparse_autoencoder import SparseAutoencoder
+from sae_lens.training.activations_store import ActivationsStore
+from sae_lens.training.sparse_autoencoder import SparseAutoencoder
 
 
 @torch.no_grad()

diff --git a/sae_training/geometric_median.py → sae_lens/training/geometric_median.py b/sae_training/geometric_median.py → sae_lens/training/geometric_median.py