Use a matrix of python versions for the pipeline (EleutherAI#129)

* Use a matrix of python versions * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add Python 3.11 * Fix typing issue on Python 3.11; prune deps * Fix dataclass bug on 3.11 --------- Co-authored-by: James Chua <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Nora Belrose <[email protected]>
michaelbyun · Mar 16, 2023 · 58e3630 · 58e3630
1 parent 026af7a
commit 58e3630
Show file tree

Hide file tree

Showing 5 changed files with 18 additions and 39 deletions.
diff --git a/.github/workflows/cpu_ci.yml b/.github/workflows/cpu_ci.yml
@@ -3,35 +3,17 @@ name: "Run CPU Tests"
 on: "push"
 
 jobs:
- run-tests-python3_9:
- runs-on: ubuntu-latest
+ run-tests:
+ strategy:
+ matrix:
+ python-versions: [ 3.9, "3.10", "3.11" ]
+ os: [ ubuntu-latest, macos-latest ]
+ runs-on: ${{ matrix.os }}
  steps:
  - uses: actions/checkout@v3
- - name: Install Python
- uses: actions/setup-python@v4
+ - uses: actions/setup-python@v4
  with:
- python-version: "3.9"
-
- - name: Upgrade Pip
- run: python -m pip install --upgrade pip
-
- - name: Install Dependencies
- run: pip install -e .[dev]
-
- - name: Type Checking
- uses: jakebailey/pyright-action@v1
-
- - name: Run normal tests, excluding GPU tests
- run: pytest -m "not gpu"
-
- run-tests-python3_10:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v3
- - name: Install Python
- uses: actions/setup-python@v4
- with:
- python-version: "3.10"
+ python-version: ${{ matrix.python-versions }}
 
  - name: Upgrade Pip
  run: python -m pip install --upgrade pip

diff --git a/elk/extraction/prompt_dataset.py b/elk/extraction/prompt_dataset.py
@@ -180,7 +180,7 @@ def __getitem__(self, index: int) -> list[Prompt]:
  """Get a list of prompts for a given predicate"""
  # get self.num_variants unique prompts from the template pool
  template_names = self.rng.sample(
- self.prompter.templates.keys(), self.num_variants
+ list(self.prompter.templates), self.num_variants
  )
 
  example = self.active_split[index]

diff --git a/elk/training/reporter.py b/elk/training/reporter.py
@@ -3,7 +3,6 @@
 from .classifier import Classifier
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from einops import rearrange
 from pathlib import Path
 from simple_parsing.helpers import Serializable
 from sklearn.metrics import roc_auc_score
@@ -121,11 +120,14 @@ def check_separability(
  ).repeat_interleave(val_x0.shape[1])
 
  pseudo_clf.fit(
- rearrange(torch.cat([x0, x1]), "b v d -> (b v) d"), pseudo_train_labels
+ # b v d -> (b v) d
+ torch.cat([x0, x1]).flatten(0, 1),
+ pseudo_train_labels,
  )
  with torch.no_grad():
  pseudo_preds = pseudo_clf(
- rearrange(torch.cat([val_x0, val_x1]), "b v d -> (b v) d")
+ # b v d -> (b v) d
+ torch.cat([val_x0, val_x1]).flatten(0, 1)
  )
  return float(roc_auc_score(pseudo_val_labels.cpu(), pseudo_preds.cpu()))
 

diff --git a/elk/training/train.py b/elk/training/train.py
@@ -12,7 +12,7 @@
 from datasets import DatasetDict
 from functools import partial
 from pathlib import Path
-from simple_parsing import subgroups, Serializable
+from simple_parsing import field, subgroups, Serializable
 from sklearn.metrics import accuracy_score, roc_auc_score
 from torch import Tensor
 from tqdm.auto import tqdm
@@ -41,7 +41,7 @@ class RunConfig(Serializable):
  net: ReporterConfig = subgroups(
  {"ccs": CcsReporterConfig, "eigen": EigenReporterConfig}, default="eigen"
  )
- optim: OptimConfig = OptimConfig()
+ optim: OptimConfig = field(default_factory=OptimConfig)
 
  label_frac: float = 0.0
  max_gpus: int = -1

diff --git a/pyproject.toml b/pyproject.toml
@@ -12,12 +12,8 @@ license = {text = "MIT License"}
 dependencies = [
  # Added Dataset.from_generator() method
  "datasets>=2.5.0",
- # TODO: consider removing this dependency since we only use it once
- "einops",
  # Introduced numpy.typing module
  "numpy>=1.20.0",
- # Introduced type annotations
- "prettytable>=3.5.0",
  # This version is old, but it's needed for certain HF tokenizers to work.
  "protobuf==3.20.*",
  # Basically any version should work as long as it supports the user's CUDA version
@@ -26,8 +22,8 @@ dependencies = [
  "scikit-learn>=1.0.0",
  # Needed for certain HF tokenizers
  "sentencepiece==0.1.97",
- # Support for Literal types was added in 0.0.21
- "simple-parsing>=0.0.21",
+ # We upstreamed bugfixes for Literal types in 0.1.1
+ "simple-parsing>=0.1.1",
  # Version 1.11 introduced Fully Sharded Data Parallel, which we plan to use soon
  "torch>=1.11.0",
  # Doesn't really matter but versions < 4.0 are very very old (pre-2016)
@@ -55,7 +51,6 @@ include = ["elk*"]
 reportPrivateImportUsage = false
 
 [tool.pytest.ini_options]
-markers = ["cpu: Marker for tests that do not depend on GPUs"]
 testpaths = ["tests"]
 
 [tool.setuptools.packages.find]