Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

train probe per prompt #271

Open
wants to merge 20 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
correctly eval with multiple probes and some renaming
  • Loading branch information
derpyplops committed Jul 20, 2023
commit 51b7d3c59ae45ca58284da81351a3d189cd1dc6b
13 changes: 7 additions & 6 deletions elk/evaluation/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ..files import elk_reporter_dir
from ..metrics import evaluate_preds
from ..run import Run, select_data
from ..training.multi_reporter import AnyReporter, MultiReporter
from ..training.multi_reporter import MultiReporter, SingleReporter
from ..utils import Color


Expand Down Expand Up @@ -38,11 +38,11 @@
device = self.get_device(devices, world_size)
val_output = self.prepare_data(device, layer, "val")

val_output = select_data(val_output, self.prompt_indices)

Check failure on line 41 in elk/evaluation/evaluate.py

View workflow job for this annotation

GitHub Actions / run-tests (3.10, macos-latest)

Argument of type "tuple[int, ...]" cannot be assigned to parameter "prompt_indices" of type "list[int]" in function "select_data"   "tuple[int, ...]" is incompatible with "list[int]" (reportGeneralTypeIssues)

Check failure on line 41 in elk/evaluation/evaluate.py

View workflow job for this annotation

GitHub Actions / run-tests (3.11, macos-latest)

Argument of type "tuple[int, ...]" cannot be assigned to parameter "prompt_indices" of type "list[int]" in function "select_data"   "tuple[int, ...]" is incompatible with "list[int]" (reportGeneralTypeIssues)

experiment_dir = elk_reporter_dir() / self.source

def load_reporter() -> AnyReporter | MultiReporter:
def load_reporter() -> SingleReporter | MultiReporter:
# check if experiment_dir / "reporters" has .pt files
first = next((experiment_dir / "reporters").iterdir())
if not first.suffix == ".pt":
Expand All @@ -58,23 +58,24 @@
row_bufs = defaultdict(list)

def eval_all(
reporter: AnyReporter | MultiReporter,
reporter: SingleReporter | MultiReporter,
prompt_index: int | Literal["multi"] | None = None,
i: int = 0,
):
prompt_index = (

Check failure on line 65 in elk/evaluation/evaluate.py

View workflow job for this annotation

GitHub Actions / run-tests (3.10, macos-latest)

Expression of type "dict[str, int | str]" cannot be assigned to declared type "int | Literal['multi'] | None"   Type "dict[str, int | str]" cannot be assigned to type "int | Literal['multi'] | None"     "dict[str, int | str]" is incompatible with "int"     Type cannot be assigned to type "None"     "dict[str, int | str]" cannot be assigned to type "Literal['multi']" (reportGeneralTypeIssues)

Check failure on line 65 in elk/evaluation/evaluate.py

View workflow job for this annotation

GitHub Actions / run-tests (3.11, macos-latest)

Expression of type "dict[str, int | str]" cannot be assigned to declared type "int | Literal['multi'] | None"   Type "dict[str, int | str]" cannot be assigned to type "int | Literal['multi'] | None"     "dict[str, int | str]" is incompatible with "int"     Type cannot be assigned to type "None"     "dict[str, int | str]" cannot be assigned to type "Literal['multi']" (reportGeneralTypeIssues)
{"prompt_index": prompt_index} if prompt_index is not None else {}
)
for ds_name, (val_h, val_gt, _) in val_output.items():
meta = {"dataset": ds_name, "layer": layer}
val_credences = reporter(val_h[:, [i], :, :])

val_credences = reporter(val_h)
for mode in ("none", "partial", "full"):
row_bufs["eval"].append(
{
**meta,
"ensembling": mode,
**evaluate_preds(val_gt, val_credences, mode).to_dict(),
**prompt_index,

Check failure on line 78 in elk/evaluation/evaluate.py

View workflow job for this annotation

GitHub Actions / run-tests (3.10, macos-latest)

Expected mapping for dictionary unpack operator (reportGeneralTypeIssues)

Check failure on line 78 in elk/evaluation/evaluate.py

View workflow job for this annotation

GitHub Actions / run-tests (3.11, macos-latest)

Expected mapping for dictionary unpack operator (reportGeneralTypeIssues)
}
)

Expand All @@ -101,8 +102,8 @@
)

if isinstance(reporter, MultiReporter):
for prompt_index, single_reporter in enumerate(reporter.reporters):
eval_all(single_reporter, prompt_index)
for i, res in enumerate(reporter.reporter_w_infos):
eval_all(res.model, res.prompt_index, i)
eval_all(reporter, "multi")
else:
eval_all(reporter)
Expand Down
44 changes: 23 additions & 21 deletions elk/training/multi_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,46 +6,48 @@
from elk.training import CcsReporter
from elk.training.common import Reporter

AnyReporter = CcsReporter | Reporter
SingleReporter = CcsReporter | Reporter


@dataclass
class ReporterTrainResult:
reporter: AnyReporter
train_loss: float | None
prompt_index: int | None
class ReporterWithInfo: # I don't love this name but I have no choice because
# of the other Reporter
model: SingleReporter
train_loss: float | None = None
prompt_index: int | None = None


class MultiReporter:
def __init__(self, reporter_results: list[ReporterTrainResult]):
assert len(reporter_results) > 0, "Must have at least one reporter"
self.reporter_results: list[ReporterTrainResult] = reporter_results
self.reporters = [r.reporter for r in reporter_results]
def __init__(self, reporter: list[ReporterWithInfo]):
assert len(reporter) > 0, "Must have at least one reporter"
self.reporter_w_infos: list[ReporterWithInfo] = reporter
self.models = [r.model for r in reporter]
train_losses = (
[r.train_loss for r in reporter_results]
if reporter_results[0].train_loss is not None
[r.train_loss for r in reporter]
if reporter[0].train_loss is not None
else None
)
self.train_loss = (
sum(train_losses) / len(train_losses) if train_losses is not None else None

Check failure on line 31 in elk/training/multi_reporter.py

View workflow job for this annotation

GitHub Actions / run-tests (3.10, macos-latest)

Argument of type "list[float | None]" cannot be assigned to parameter "__iterable" of type "Iterable[_SupportsSumNoDefaultT@sum]" in function "sum"   "list[float | None]" is incompatible with "Iterable[_SupportsSumNoDefaultT@sum]"     TypeVar "_T_co@Iterable" is covariant       Type "float | None" cannot be assigned to type "_SupportsSumWithNoDefaultGiven"         Type "float | None" cannot be assigned to type "_SupportsSumWithNoDefaultGiven"           "__add__" is not present           "__radd__" is not present (reportGeneralTypeIssues)

Check failure on line 31 in elk/training/multi_reporter.py

View workflow job for this annotation

GitHub Actions / run-tests (3.11, macos-latest)

Argument of type "list[float | None]" cannot be assigned to parameter "__iterable" of type "Iterable[_SupportsSumNoDefaultT@sum]" in function "sum"   "list[float | None]" is incompatible with "Iterable[_SupportsSumNoDefaultT@sum]"     TypeVar "_T_co@Iterable" is covariant       Type "float | None" cannot be assigned to type "_SupportsSumWithNoDefaultGiven"         Type "float | None" cannot be assigned to type "_SupportsSumWithNoDefaultGiven"           "__add__" is not present           "__radd__" is not present (reportGeneralTypeIssues)
)

def __call__(self, h):
num_variants = h.shape[1]
assert len(self.reporters) == num_variants
assert len(self.models) == num_variants
credences = []
for i, reporter in enumerate(self.reporters):
for i, reporter in enumerate(self.models):
credences.append(reporter(h[:, [i], :, :]))
return t.stack(credences, dim=0).mean(dim=0)

@staticmethod
def load(path: Path, layer: int, device: str):
prompt_folders = [p for p in path.iterdir() if p.is_dir()]
reporters = []
for folder in prompt_folders:
path = folder / "reporters" / f"layer_{layer}.pt"
reporter = t.load(path, map_location=device)
prompt_index = int(folder.name.split("_")[-1])
reporters.append((reporter, prompt_index))
# TODO for now I don't care about the train losses
return MultiReporter([ReporterTrainResult(r, None, pi) for r, pi in reporters])
reporters = [
(
t.load(folder / "reporters" / f"layer_{layer}.pt", map_location=device),
int(folder.name.split("_")[-1]), # prompt index
)
for folder in prompt_folders
]
# we don't care about the train losses for evaluating
return MultiReporter([ReporterWithInfo(r, None, pi) for r, pi in reporters])
50 changes: 23 additions & 27 deletions elk/training/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
from .ccs_reporter import CcsConfig, CcsReporter
from .common import FitterConfig
from .eigen_reporter import EigenFitter, EigenFitterConfig
from .multi_reporter import AnyReporter, MultiReporter, ReporterTrainResult
from .multi_reporter import MultiReporter, ReporterWithInfo, SingleReporter


def evaluate_and_save(
train_loss: float | None,
reporter: AnyReporter | MultiReporter,
reporter: SingleReporter | MultiReporter,
train_dict: PreparedData,
val_dict: PreparedData,
lr_models: list[Classifier],
Expand All @@ -35,7 +35,7 @@
meta = {"dataset": ds_name, "layer": layer}

def eval_all(
reporter: AnyReporter | MultiReporter,
reporter: SingleReporter | MultiReporter,
prompt_index: int | Literal["multi"] | None = None,
i: int = 0,
):
Expand All @@ -45,15 +45,17 @@
else:
val_credences = reporter(val_h)
train_credences = reporter(train_h)
prompt_index = {"prompt_index": prompt_index}
prompt_index_dict = (
{"prompt_index": prompt_index} if prompt_index is not None else {}
)
for mode in ("none", "partial", "full"):
row_bufs["eval"].append(
{
**meta,
"ensembling": mode,
**evaluate_preds(val_gt, val_credences, mode).to_dict(),
"train_loss": train_loss,
**prompt_index,
**prompt_index_dict,
}
)

Expand All @@ -63,7 +65,7 @@
"ensembling": mode,
**evaluate_preds(train_gt, train_credences, mode).to_dict(),
"train_loss": train_loss,
**prompt_index,
**prompt_index_dict,
}
)

Expand All @@ -73,7 +75,7 @@
**meta,
"ensembling": mode,
**evaluate_preds(val_gt, val_lm_preds, mode).to_dict(),
**prompt_index,
**prompt_index_dict,
}
)

Expand All @@ -83,7 +85,7 @@
**meta,
"ensembling": mode,
**evaluate_preds(train_gt, train_lm_preds, mode).to_dict(),
**prompt_index,
**prompt_index_dict,
}
)

Expand All @@ -94,13 +96,13 @@
"ensembling": mode,
"inlp_iter": lr_model_num,
**evaluate_preds(val_gt, model(val_h), mode).to_dict(),
**prompt_index,
**prompt_index_dict,
}
)

if isinstance(reporter, MultiReporter):
for i, reporter_result in enumerate(reporter.reporter_results):
eval_all(reporter_result.reporter, reporter_result.prompt_index, i)
for i, reporter_result in enumerate(reporter.reporter_w_infos):
eval_all(reporter_result.model, reporter_result.prompt_index, i)
eval_all(reporter, prompt_index="multi")
else:
eval_all(reporter, prompt_index=None)
Expand Down Expand Up @@ -145,7 +147,7 @@
# Create a separate function to handle the reporter training.
def train_and_save_reporter(
self, device, layer, out_dir, train_dict, prompt_index=None
) -> ReporterTrainResult:
) -> ReporterWithInfo:
(first_train_h, train_gt, _), *rest = train_dict.values() # TODO can remove?
(_, v, k, d) = first_train_h.shape
if not all(other_h.shape[-1] == d for other_h, _, _ in rest):
Expand Down Expand Up @@ -199,7 +201,7 @@
out_dir.mkdir(parents=True, exist_ok=True)
torch.save(reporter, out_dir / f"layer_{layer}.pt")

return ReporterTrainResult(reporter, train_loss, prompt_index)
return ReporterWithInfo(reporter, train_loss, prompt_index)

def train_lr_model(self, train_dict, device, layer, out_dir) -> list[Classifier]:
if self.supervised != "none":
Expand Down Expand Up @@ -231,14 +233,14 @@
self.make_reproducible(seed=self.net.seed + layer)
device = self.get_device(devices, world_size)

train_dict = self.prepare_data(device, layer, "train") # prepare data no
# longer does anything on prompt indices
train_dict = self.prepare_data(device, layer, "train")
val_dict = self.prepare_data(device, layer, "val")

(first_train_h, train_gt, _), *rest = train_dict.values()
(_, v, k, d) = first_train_h.shape

if probe_per_prompt:
# self.prompt_indices being () actually means "all prompts"
prompt_indices = self.prompt_indices if self.prompt_indices else range(v)
prompt_train_dicts = [
{
Expand All @@ -261,33 +263,27 @@
str_i = str(prompt_index).zfill(2)
base = self.out_dir / "reporters" / f"prompt_{str_i}"
reporters_path = base / "reporters"
lr_path = base / "lr_models"
base / "lr_models"

Check warning on line 266 in elk/training/train.py

View workflow job for this annotation

GitHub Actions / run-tests (3.10, macos-latest)

Expression value is unused (reportUnusedExpression)

Check warning on line 266 in elk/training/train.py

View workflow job for this annotation

GitHub Actions / run-tests (3.11, macos-latest)

Expression value is unused (reportUnusedExpression)

reporter_train_result = self.train_and_save_reporter(
device, layer, reporters_path, prompt_train_dict, prompt_index
)
results.append(reporter_train_result)

lr_models = self.train_lr_model(
prompt_train_dict, device, layer, lr_path
)

# it is called maybe_multi_reporter because it might be a single reporter
maybe_multi_reporter = MultiReporter(results)
train_loss = maybe_multi_reporter.train_loss

# TODO fix lr_models

else:
reporter_train_result = self.train_and_save_reporter(
device, layer, self.out_dir / "reporters", train_dict
)

maybe_multi_reporter = reporter_train_result.reporter
maybe_multi_reporter = reporter_train_result.model
train_loss = reporter_train_result.train_loss

lr_models = self.train_lr_model(
train_dict, device, layer, self.out_dir / "lr_models"
)
lr_models = self.train_lr_model(
train_dict, device, layer, self.out_dir / "lr_models"
)

return evaluate_and_save(
train_loss, maybe_multi_reporter, train_dict, val_dict, lr_models, layer
Expand Down
Loading