Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add chat templating for HF models #1287

Closed
wants to merge 31 commits into from
Closed
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
3824828
first stab at wrap_chat_template
daniel-furman Jan 7, 2024
a784417
first stab at wrap_chat_template, strip error fix
daniel-furman Jan 7, 2024
53c68db
first stab at wrap_chat_template, rfind continuation fix
daniel-furman Jan 7, 2024
3e27f9d
first stab at wrap_chat_template, formatting in function
daniel-furman Jan 7, 2024
87dff8b
first stab at wrap_chat_template, print statements in loglikelihood f…
daniel-furman Jan 7, 2024
5c4d9c7
first stab at wrap_chat_template, remove system for now
daniel-furman Jan 7, 2024
e689727
first stab at wrap_chat_template, remove special chars from continuation
daniel-furman Jan 10, 2024
337c084
first stab at wrap_chat_template, remove special chars tab indenting …
daniel-furman Jan 10, 2024
6c68fd1
Merge branch 'EleutherAI:main' into main
daniel-furman Jan 10, 2024
34b32f7
first stab at wrap_chat_template, various
daniel-furman Jan 10, 2024
59e3b17
first stab at wrap_chat_template, various
daniel-furman Jan 10, 2024
7191904
first stab at wrap_chat_template, arc conversation test
daniel-furman Jan 10, 2024
9949e4f
first stab at wrap_chat_template, arc conversation test
daniel-furman Jan 10, 2024
2d3c835
first stab at wrap_chat_template, remove arc experiment
daniel-furman Jan 10, 2024
49f43f9
first stab at wrap_chat_template, various
daniel-furman Jan 10, 2024
021232b
llama test
daniel-furman Jan 11, 2024
b6c75ed
llama test
daniel-furman Jan 11, 2024
047dde8
llama test
daniel-furman Jan 11, 2024
c38b9d2
llama test
daniel-furman Jan 11, 2024
1ea8470
llama test
daniel-furman Jan 11, 2024
2e27053
llama test
daniel-furman Jan 11, 2024
43dee06
llama test
daniel-furman Jan 13, 2024
39a11d0
llama test
daniel-furman Jan 13, 2024
bbcdffb
remove system
daniel-furman Jan 13, 2024
2b40017
Merge branch 'main' into add-chat-templating
haileyschoelkopf Jan 15, 2024
c47de8b
update Instance.args setter
haileyschoelkopf Jan 15, 2024
6ca8ab1
clean up wrap_chat_template + add TODOs
haileyschoelkopf Jan 15, 2024
b8bda47
Merge branch 'main' into add-chat-templating
haileyschoelkopf Jan 15, 2024
68c30aa
push most recent code
haileyschoelkopf Jan 16, 2024
37db34c
Update lm_eval/models/huggingface.py
haileyschoelkopf Feb 27, 2024
495d50b
Update lm_eval/models/huggingface.py
haileyschoelkopf Feb 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
clean up wrap_chat_template + add TODOs
  • Loading branch information
haileyschoelkopf committed Jan 15, 2024
commit 6ca8ab156bc01e68658412aefda518bd3349a8f2
70 changes: 21 additions & 49 deletions lm_eval/models/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,60 +662,35 @@ def tok_decode(self, tokens):
return self.tokenizer.decode(tokens)
elif self.AUTO_MODEL_CLASS == transformers.AutoModelForSeq2SeqLM:
return self.tokenizer.decode(tokens, skip_special_tokens=True)

def tok_wrap_chat_template(self, requests: List[Instance]) -> List[Instance]:

def wrap_chat_template(
self, requests: List[Instance], generate=False
) -> List[Instance]:
"""
Utility for adding chat templates via the apply_chat_template() method
"""
new_reqs = []
for req in requests:
context, continuation = req.args[0].strip(), req.args[1]
chat = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": context},
]
context = self.tokenizer.apply_chat_template(
chat,
tokenize=False,
add_generation_prompt=True,
)
req.args = (context, continuation)
new_reqs.append(req)
return new_reqs

def tok_wrap_chat_template(self, requests: List[Instance]) -> List[Instance]:
"""
Utility for adding chat templates via the apply_chat_template() method
"""
# TODO: handle repeats > 1 case?
# TODO: raise an error if system prompt not compatible with template
new_reqs = []
for req in requests:
context = req.args[0].strip()
#system_prompt = "You are a helpful assistant."

# arc experiment with few-shot formatting
import re
elements = re.split('Answer:|Question:', context.replace('\n', ' '))
new_elements = []
for element in elements[1:-1]:
new_elements.append(element.strip())
new_elements
#chat = [{"role": "system", "content": system_prompt}]
context, continuation = req.args[0].strip(), req.args[1]
chat = []
for i in range(len(new_elements)):
if i % 2 == 0:
chat.append({"role": "user", "content": f"Question: {new_elements[i]} Answer:"})
else:
chat.append({"role": "assistant", "content": f"{new_elements[i]}"})
if self.system_prompt is not None:
chat += {"role": "system", "content": "You are a helpful assistant."}

chat += ({"role": "user", "content": context},)
# TODO: expose settings for chat formatting:
# - whether some "trigger" / start of assistant response might be placed in assistant's generation for it
# - if few-shot, should the fewshots be placed in separate convo turns? provided in user's single turn?...
context = self.tokenizer.apply_chat_template(
chat,
chat,
tokenize=False,
add_generation_prompt=True,
)
req.args = (context, req.args[1].strip())
req.args = (context, continuation)
new_reqs.append(req)
return new_reqs


def _model_call(self, inps, attn_mask=None, labels=None):
"""
:param inps: torch.Tensor
Expand Down Expand Up @@ -796,10 +771,8 @@ def _encode_pair(
return context_enc, continuation_enc

def loglikelihood(self, requests: List[Instance]) -> List[Tuple[float, bool]]:

print("Loglikelihood invoked")
print(f"First element before prompt formatting...\n{requests[0].args}")
requests = self.tok_wrap_chat_template(requests)
requests = self.wrap_chat_template(requests)
print(f"First element after prompt formatting...\n{requests[0].args}")

new_reqs = []
Expand All @@ -820,6 +793,8 @@ def loglikelihood(self, requests: List[Instance]) -> List[Tuple[float, bool]]:
def loglikelihood_rolling(self, requests: List[Instance]) -> List[float]:
loglikelihoods = []

# TODO: add a warning that chat templates are ignored for ppl evals

adaptive_batch_size = None
if self.batch_size == "auto":
# using rolling window with maximum context
Expand Down Expand Up @@ -896,7 +871,6 @@ def _loglikelihood_tokens(
disable_tqdm: bool = False,
override_bs: int = None,
) -> List[Tuple[float, bool]]:
# TODO: implement some kind of efficient-request-middleware that lumps together requests with the same context
res = []

def _collate(x):
Expand Down Expand Up @@ -1075,12 +1049,10 @@ def _collate(x):
return re_ord.get_original(res)

def generate_until(self, requests: List[Instance]) -> List[str]:

print("Generate_until invoked")
print(f"First element before prompt formatting...\n{requests[0].args}")
requests = self.tok_wrap_chat_template(requests)
requests = self.tok_chat_template(requests)
print(f"First element after prompt formatting...\n{requests[0].args}")

res = []

def _collate(x):
Expand Down
Loading