Skip to content

Commit

Permalink
use model name and dataset to organize reporters (EleutherAI#175)
Browse files Browse the repository at this point in the history
  • Loading branch information
norabelrose committed Apr 7, 2023
1 parent 51fab16 commit d83c5cb
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 17 deletions.
17 changes: 2 additions & 15 deletions elk/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os
import random
from pathlib import Path
from typing import Optional

import yaml
from simple_parsing import Serializable
Expand All @@ -31,7 +30,9 @@ def memorably_named_dir(parent: Path):
with open(resource_dir / "names.json", "r") as f:
names = json.load(f)

parent.mkdir(parents=True, exist_ok=True)
sub_dir = "."

while parent.joinpath(sub_dir).exists():
adj = random.choice(adjectives)
name = random.choice(names)
Expand All @@ -42,20 +43,6 @@ def memorably_named_dir(parent: Path):
return out_dir


def create_output_directory(
out_dir: Optional[Path] = None, default_root_dir: Path = elk_reporter_dir()
) -> Path:
"""Creates an output directory"""
if out_dir is None:
out_dir = memorably_named_dir(default_root_dir)
out_dir.mkdir(parents=True, exist_ok=True)

# Print the output directory in bold with escape codes
print(f"Output directory at \033[1m{out_dir}\033[0m")

return out_dir


def save_config(cfg: Serializable, out_dir: Path):
"""Save the config to a file"""

Expand Down
15 changes: 13 additions & 2 deletions elk/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from tqdm import tqdm

from elk.extraction.extraction import extract
from elk.files import create_output_directory, save_config, save_meta
from elk.files import elk_reporter_dir, memorably_named_dir, save_config, save_meta
from elk.training.preprocessing import normalize
from elk.utils.csv import Log, write_iterator_to_file
from elk.utils.data_utils import get_layers, select_train_val_splits
Expand All @@ -40,7 +40,18 @@ def __post_init__(self):
# Extract the hidden states first if necessary
self.dataset = extract(self.cfg.data, num_gpus=self.cfg.num_gpus)

self.out_dir = create_output_directory(self.out_dir)
if self.out_dir is None:
# Save in a memorably-named directory inside of
# ELK_REPORTER_DIR/<model_name>/<dataset_name>
ds_name = ", ".join(self.cfg.data.prompts.datasets)
root = elk_reporter_dir() / self.cfg.data.model / ds_name

self.out_dir = memorably_named_dir(root)

# Print the output directory in bold with escape codes
print(f"Output directory at \033[1m{self.out_dir}\033[0m")
self.out_dir.mkdir(parents=True, exist_ok=True)

save_config(self.cfg, self.out_dir)
save_meta(self.dataset, self.out_dir)

Expand Down

0 comments on commit d83c5cb

Please sign in to comment.