EleutherAI · haileyschoelkopf · Jun 3, 2024 · May 8, 2024 · May 8, 2024 · May 8, 2024
@@ -276,10 +276,6 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
  args.hf_hub_log_args = f"output_path={args.output_path},token={os.environ.get('HF_TOKEN')},{args.hf_hub_log_args}"
  evaluation_tracker_args = simple_parse_args_string(args.hf_hub_log_args)
  evaluation_tracker = EvaluationTracker(**evaluation_tracker_args)
- evaluation_tracker.general_config_tracker.log_experiment_args(
- model_source=args.model,
- model_args=args.model_args,
- )
 
  if args.predict_only:
  args.log_samples = True
@@ -387,6 +383,7 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
  check_integrity=args.check_integrity,
  write_out=args.write_out,
  log_samples=args.log_samples,
+ evaluation_tracker=evaluation_tracker,
  system_instruction=args.system_instruction,
  apply_chat_template=args.apply_chat_template,
  fewshot_as_multiturn=args.fewshot_as_multiturn,

@@ -193,6 +193,16 @@ def tokenizer_name(self) -> str:
  "To use this model with chat templates, please implement the 'tokenizer_name' property."
  )
 
+ @property
+ def chat_template(self) -> str:
+ """Must be defined for LM subclasses that implement Chat Templating.
+ Should return the structure of the chat template applied to user/assistant messages.
+ This is used only to save in the experiment results for reproducibility.
+ """
+ raise NotImplementedError(
+ "To use this model with chat templates, please implement the 'chat_template' property."
+ )
+
  def set_cache_hook(self, cache_hook) -> None:
  self.cache_hook = cache_hook
 

@@ -21,6 +21,7 @@
  print_writeout,
  run_task_tests,
 )
+from lm_eval.logging import EvaluationTracker
 from lm_eval.logging.utils import add_env_info, get_git_commit_hash
 from lm_eval.tasks import TaskManager, get_task_dict
 from lm_eval.utils import (
@@ -55,6 +56,7 @@ def simple_evaluate(
  check_integrity: bool = False,
  write_out: bool = False,
  log_samples: bool = True,
+ evaluation_tracker: Optional[EvaluationTracker] = None,
  system_instruction: Optional[str] = None,
  apply_chat_template: bool = False,
  fewshot_as_multiturn: bool = False,
@@ -156,6 +158,11 @@ def simple_evaluate(
  "No tasks specified, or no tasks found. Please verify the task names."
  )
 
+ if evaluation_tracker is None:
+ raise ValueError(
+ "EvaluationTracker not provided. Please provide an instance of EvaluationTracker."
+ )
+
  if gen_kwargs is not None:
  gen_kwargs = simple_parse_args_string(gen_kwargs)
  eval_logger.warning(
@@ -262,6 +269,13 @@ def simple_evaluate(
  if check_integrity:
  run_task_tests(task_list=tasks)
 
+ evaluation_tracker.general_config_tracker.log_experiment_args(
+ model_source=model,
+ model_args=model_args,
+ system_instruction=system_instruction,
+ chat_template=lm.chat_template,
+ )
+
  results = evaluate(
  lm=lm,
  task_dict=task_dict,

diff --git a/lm_eval/logging/evaluation_tracker.py b/lm_eval/logging/evaluation_tracker.py
@@ -31,6 +31,10 @@ class GeneralConfigTracker:
  model_source: str = None
  model_name: str = None
  model_name_sanitized: str = None
+ system_instruction: str = None
+ system_instruction_sha: str = None
+ chat_template: str = None
+ chat_template_sha: str = None
  start_time: float = None
  end_time: float = None
  total_evaluation_time_seconds: str = None
@@ -59,13 +63,19 @@ def log_experiment_args(
  self,
  model_source: str,
  model_args: str,
+ system_instruction: str,
+ chat_template: str,
  ) -> None:
  """Logs model parameters and job ID."""
  self.model_source = model_source
  self.model_name = GeneralConfigTracker._get_model_name(model_args)
  self.model_name_sanitized = re.sub(
  r"[\"<>:/\|\\?\*\[\]]+", "__", self.model_name
  )
+ self.system_instruction = system_instruction
+ self.system_instruction_sha = hash_string(system_instruction)
+ self.chat_template = chat_template
+ self.chat_template_sha = hash_string(chat_template)
 
  def log_end_time(self) -> None:
  """Logs the end time of the evaluation and calculates the total evaluation time."""

@@ -419,6 +419,12 @@ def world_size(self):
  def tokenizer_name(self) -> str:
  return self.tokenizer.name_or_path.replace("/", "__")
 
+ @property
+ def chat_template(self) -> str:
+ if self.tokenizer.chat_template is not None:
+ return self.tokenizer.chat_template
+ return self.tokenizer.default_chat_template
+
  def _get_backend(
  self,
  config: Union[transformers.PretrainedConfig, transformers.AutoConfig],