Skip to content

Commit

Permalink
Use a matrix of python versions for the pipeline (EleutherAI#129)
Browse files Browse the repository at this point in the history
* Use a matrix of python versions

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Python 3.11

* Fix typing issue on Python 3.11; prune deps

* Fix dataclass bug on 3.11

---------

Co-authored-by: James Chua <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Nora Belrose <[email protected]>
  • Loading branch information
4 people committed Mar 16, 2023
1 parent 026af7a commit 58e3630
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 39 deletions.
34 changes: 8 additions & 26 deletions .github/workflows/cpu_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,17 @@ name: "Run CPU Tests"
on: "push"

jobs:
run-tests-python3_9:
runs-on: ubuntu-latest
run-tests:
strategy:
matrix:
python-versions: [ 3.9, "3.10", "3.11" ]
os: [ ubuntu-latest, macos-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
- name: Install Python
uses: actions/setup-python@v4
- uses: actions/setup-python@v4
with:
python-version: "3.9"

- name: Upgrade Pip
run: python -m pip install --upgrade pip

- name: Install Dependencies
run: pip install -e .[dev]

- name: Type Checking
uses: jakebailey/pyright-action@v1

- name: Run normal tests, excluding GPU tests
run: pytest -m "not gpu"

run-tests-python3_10:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: ${{ matrix.python-versions }}

- name: Upgrade Pip
run: python -m pip install --upgrade pip
Expand Down
2 changes: 1 addition & 1 deletion elk/extraction/prompt_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def __getitem__(self, index: int) -> list[Prompt]:
"""Get a list of prompts for a given predicate"""
# get self.num_variants unique prompts from the template pool
template_names = self.rng.sample(
self.prompter.templates.keys(), self.num_variants
list(self.prompter.templates), self.num_variants
)

example = self.active_split[index]
Expand Down
8 changes: 5 additions & 3 deletions elk/training/reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from .classifier import Classifier
from abc import ABC, abstractmethod
from dataclasses import dataclass
from einops import rearrange
from pathlib import Path
from simple_parsing.helpers import Serializable
from sklearn.metrics import roc_auc_score
Expand Down Expand Up @@ -121,11 +120,14 @@ def check_separability(
).repeat_interleave(val_x0.shape[1])

pseudo_clf.fit(
rearrange(torch.cat([x0, x1]), "b v d -> (b v) d"), pseudo_train_labels
# b v d -> (b v) d
torch.cat([x0, x1]).flatten(0, 1),
pseudo_train_labels,
)
with torch.no_grad():
pseudo_preds = pseudo_clf(
rearrange(torch.cat([val_x0, val_x1]), "b v d -> (b v) d")
# b v d -> (b v) d
torch.cat([val_x0, val_x1]).flatten(0, 1)
)
return float(roc_auc_score(pseudo_val_labels.cpu(), pseudo_preds.cpu()))

Expand Down
4 changes: 2 additions & 2 deletions elk/training/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from datasets import DatasetDict
from functools import partial
from pathlib import Path
from simple_parsing import subgroups, Serializable
from simple_parsing import field, subgroups, Serializable
from sklearn.metrics import accuracy_score, roc_auc_score
from torch import Tensor
from tqdm.auto import tqdm
Expand Down Expand Up @@ -41,7 +41,7 @@ class RunConfig(Serializable):
net: ReporterConfig = subgroups(
{"ccs": CcsReporterConfig, "eigen": EigenReporterConfig}, default="eigen"
)
optim: OptimConfig = OptimConfig()
optim: OptimConfig = field(default_factory=OptimConfig)

label_frac: float = 0.0
max_gpus: int = -1
Expand Down
9 changes: 2 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,8 @@ license = {text = "MIT License"}
dependencies = [
# Added Dataset.from_generator() method
"datasets>=2.5.0",
# TODO: consider removing this dependency since we only use it once
"einops",
# Introduced numpy.typing module
"numpy>=1.20.0",
# Introduced type annotations
"prettytable>=3.5.0",
# This version is old, but it's needed for certain HF tokenizers to work.
"protobuf==3.20.*",
# Basically any version should work as long as it supports the user's CUDA version
Expand All @@ -26,8 +22,8 @@ dependencies = [
"scikit-learn>=1.0.0",
# Needed for certain HF tokenizers
"sentencepiece==0.1.97",
# Support for Literal types was added in 0.0.21
"simple-parsing>=0.0.21",
# We upstreamed bugfixes for Literal types in 0.1.1
"simple-parsing>=0.1.1",
# Version 1.11 introduced Fully Sharded Data Parallel, which we plan to use soon
"torch>=1.11.0",
# Doesn't really matter but versions < 4.0 are very very old (pre-2016)
Expand Down Expand Up @@ -55,7 +51,6 @@ include = ["elk*"]
reportPrivateImportUsage = false

[tool.pytest.ini_options]
markers = ["cpu: Marker for tests that do not depend on GPUs"]
testpaths = ["tests"]

[tool.setuptools.packages.find]
Expand Down

0 comments on commit 58e3630

Please sign in to comment.