Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add chat template #1873

Merged
merged 30 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
62df55d
initial chat template
KonradSzafer May 8, 2024
f4902e0
tokenizer attribute check
KonradSzafer May 8, 2024
4b790fa
variable rename
KonradSzafer May 8, 2024
cd9e454
interface update
KonradSzafer May 8, 2024
9dfb58a
system instruction
KonradSzafer May 12, 2024
3369f88
system inst default update
KonradSzafer May 14, 2024
921c4d6
fewshot as multiturn
KonradSzafer May 14, 2024
a4bc484
typing update
KonradSzafer May 14, 2024
d01032d
indent update
KonradSzafer May 14, 2024
8a0ce59
added comments
KonradSzafer May 14, 2024
9bd948d
Merge branch 'main' into chat_template
KonradSzafer May 22, 2024
691e0c0
Adding a fewshot in a more readable way
KonradSzafer May 22, 2024
1162e34
linting
KonradSzafer May 22, 2024
c370665
Moved apply chat template to LM
KonradSzafer May 29, 2024
899a544
multiturn alternation fix
KonradSzafer May 30, 2024
f8771d2
cache key update
KonradSzafer May 30, 2024
52df595
apply chat template method fix
KonradSzafer May 30, 2024
615352c
add system prompt hash to cache_key
KonradSzafer May 30, 2024
d7b8fd9
tokenizer name property for cache_key
KonradSzafer May 30, 2024
6f76522
property name fix
KonradSzafer May 30, 2024
4b0c49a
linting backward compatibility fix
KonradSzafer May 31, 2024
dca730a
docs and errors update
KonradSzafer May 31, 2024
a6d3c05
add documentation on adding chat template compatibility to model_guide
haileyschoelkopf May 31, 2024
16715f2
fewshot as multiturn check fix
KonradSzafer May 31, 2024
0ee30f1
Merge pull request #9 from EleutherAI/chat_template
KonradSzafer May 31, 2024
8ed9d77
saving system inst and chat template in results
KonradSzafer Jun 3, 2024
222dae3
eval tracker update
KonradSzafer Jun 3, 2024
2db5209
docs update
KonradSzafer Jun 3, 2024
54ef077
merge main
KonradSzafer Jun 3, 2024
4bcd0ae
Apply suggestions from code review
clefourrier Jun 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions lm_eval/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,6 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
args.hf_hub_log_args = f"output_path={args.output_path},token={os.environ.get('HF_TOKEN')},{args.hf_hub_log_args}"
evaluation_tracker_args = simple_parse_args_string(args.hf_hub_log_args)
evaluation_tracker = EvaluationTracker(**evaluation_tracker_args)
evaluation_tracker.general_config_tracker.log_experiment_args(
model_source=args.model,
model_args=args.model_args,
)

if args.predict_only:
args.log_samples = True
Expand Down Expand Up @@ -387,6 +383,7 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
check_integrity=args.check_integrity,
write_out=args.write_out,
log_samples=args.log_samples,
evaluation_tracker=evaluation_tracker,
system_instruction=args.system_instruction,
apply_chat_template=args.apply_chat_template,
fewshot_as_multiturn=args.fewshot_as_multiturn,
Expand Down
10 changes: 10 additions & 0 deletions lm_eval/api/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,16 @@ def tokenizer_name(self) -> str:
"To use this model with chat templates, please implement the 'tokenizer_name' property."
)

@property
def chat_template(self) -> str:
"""Must be defined for LM subclasses that implement Chat Templating.
Should return the structure of the chat template applied to user/assistant messages.
This is used only to save in the experiment results for reproducibility.
"""
raise NotImplementedError(
KonradSzafer marked this conversation as resolved.
Show resolved Hide resolved
"To use this model with chat templates, please implement the 'chat_template' property."
)

def set_cache_hook(self, cache_hook) -> None:
self.cache_hook = cache_hook

Expand Down
14 changes: 14 additions & 0 deletions lm_eval/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
print_writeout,
run_task_tests,
)
from lm_eval.logging import EvaluationTracker
from lm_eval.logging.utils import add_env_info, get_git_commit_hash
from lm_eval.tasks import TaskManager, get_task_dict
from lm_eval.utils import (
Expand Down Expand Up @@ -55,6 +56,7 @@ def simple_evaluate(
check_integrity: bool = False,
write_out: bool = False,
log_samples: bool = True,
evaluation_tracker: Optional[EvaluationTracker] = None,
system_instruction: Optional[str] = None,
apply_chat_template: bool = False,
fewshot_as_multiturn: bool = False,
Expand Down Expand Up @@ -156,6 +158,11 @@ def simple_evaluate(
"No tasks specified, or no tasks found. Please verify the task names."
)

if evaluation_tracker is None:
raise ValueError(
"EvaluationTracker not provided. Please provide an instance of EvaluationTracker."
)

KonradSzafer marked this conversation as resolved.
Show resolved Hide resolved
if gen_kwargs is not None:
gen_kwargs = simple_parse_args_string(gen_kwargs)
eval_logger.warning(
Expand Down Expand Up @@ -262,6 +269,13 @@ def simple_evaluate(
if check_integrity:
run_task_tests(task_list=tasks)

evaluation_tracker.general_config_tracker.log_experiment_args(
model_source=model,
model_args=model_args,
system_instruction=system_instruction,
chat_template=lm.chat_template,
)

KonradSzafer marked this conversation as resolved.
Show resolved Hide resolved
results = evaluate(
lm=lm,
task_dict=task_dict,
Expand Down
10 changes: 10 additions & 0 deletions lm_eval/logging/evaluation_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ class GeneralConfigTracker:
model_source: str = None
model_name: str = None
model_name_sanitized: str = None
system_instruction: str = None
system_instruction_sha: str = None
chat_template: str = None
chat_template_sha: str = None
start_time: float = None
end_time: float = None
total_evaluation_time_seconds: str = None
Expand Down Expand Up @@ -59,13 +63,19 @@ def log_experiment_args(
self,
model_source: str,
model_args: str,
system_instruction: str,
chat_template: str,
) -> None:
"""Logs model parameters and job ID."""
self.model_source = model_source
self.model_name = GeneralConfigTracker._get_model_name(model_args)
self.model_name_sanitized = re.sub(
r"[\"<>:/\|\\?\*\[\]]+", "__", self.model_name
)
self.system_instruction = system_instruction
self.system_instruction_sha = hash_string(system_instruction)
self.chat_template = chat_template
self.chat_template_sha = hash_string(chat_template)

def log_end_time(self) -> None:
"""Logs the end time of the evaluation and calculates the total evaluation time."""
Expand Down
6 changes: 6 additions & 0 deletions lm_eval/models/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,12 @@ def world_size(self):
def tokenizer_name(self) -> str:
return self.tokenizer.name_or_path.replace("/", "__")

@property
def chat_template(self) -> str:
if self.tokenizer.chat_template is not None:
return self.tokenizer.chat_template
return self.tokenizer.default_chat_template

def _get_backend(
self,
config: Union[transformers.PretrainedConfig, transformers.AutoConfig],
Expand Down