Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sweep MVP #191

Merged
merged 7 commits into from
Apr 16, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Sweep MVP
  • Loading branch information
norabelrose committed Apr 16, 2023
commit 3e835e48fd5139ccb777f6638d4e570d3d25052a
5 changes: 3 additions & 2 deletions elk/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,22 @@
from simple_parsing import ArgumentParser

from elk.evaluation.evaluate import Eval
from elk.training.sweep import Sweep
from elk.training.train import Elicit


@dataclass
class Command:
"""Some top-level command"""

command: Elicit | Eval
command: Elicit | Eval | Sweep

def execute(self):
return self.command.execute()


def run():
parser = ArgumentParser(add_help=False, add_config_path_arg=True)
parser = ArgumentParser(add_help=False)
parser.add_arguments(Command, dest="run")
args = parser.parse_args()
run: Command = args.run
Expand Down
42 changes: 42 additions & 0 deletions elk/training/sweep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from dataclasses import dataclass

from ..extraction import Extract, PromptConfig
from ..files import elk_reporter_dir, memorably_named_dir
from .train import Elicit


@dataclass
class Sweep:
models: list[str]
datasets: list[str]

def __post_init__(self):
if not self.models:
raise ValueError("No models specified")
if not self.datasets:
raise ValueError("No datasets specified")

def execute(self):
M, D = len(self.models), len(self.datasets)
print(f"Starting sweep over {M} models and {D} datasets ({M * D} runs))")

root_dir = elk_reporter_dir() / "sweeps"
sweep_dir = memorably_named_dir(root_dir)
print(f"Saving sweep results to \033[1m{sweep_dir}\033[0m") # bold

for i, model_str in enumerate(self.models):
# Magenta color for the model name
print(f"\n\033[35m===== {model_str} ({i + 1} of {M}) =====\033[0m")

for dataset_str in self.datasets:
out_dir = sweep_dir / model_str / dataset_str

Elicit(
data=Extract(
model=model_str,
prompts=PromptConfig(
datasets=[dataset_str],
),
),
out_dir=out_dir,
).execute()