diff --git a/lm_eval/models/openai_completions.py b/lm_eval/models/openai_completions.py index 87e4bd2bbe..c8f27ff75d 100644 --- a/lm_eval/models/openai_completions.py +++ b/lm_eval/models/openai_completions.py @@ -14,13 +14,11 @@ from lm_eval.utils import eval_logger -def get_result(response, ctxlen: int) -> Tuple[float, bool]: +def get_result(response) -> Tuple[float, bool]: """Process results from OpenAI API response. :param response: dict OpenAI API Response - :param ctxlen: int - Length of context (so we can slice them away and only keep the predictions) :return: continuation_logprobs: np.array Log probabilities of continuation tokens @@ -29,9 +27,9 @@ def get_result(response, ctxlen: int) -> Tuple[float, bool]: """ is_greedy = True logprobs = response.logprobs.token_logprobs - continuation_logprobs = sum(logprobs[ctxlen:]) + continuation_logprobs = sum(logprobs) - for i in range(ctxlen, len(response.logprobs.token_logprobs)): + for i in range(len(response.logprobs.token_logprobs)): token = response.logprobs.token_logprobs[i] top_tokens = response.logprobs.top_logprobs[i] top_token = max(top_tokens.keys(), key=lambda x: top_tokens[x]) @@ -212,7 +210,6 @@ def _collate(x): client=self.client, model=self.model, prompt=inps, - echo=True, max_tokens=0, temperature=0.0, logprobs=10, @@ -222,7 +219,7 @@ def _collate(x): for resp, ctxlen, (cache_key, context_enc, continuation_enc) in zip( response.choices, ctxlens, chunk ): - answer = get_result(resp, ctxlen) + answer = get_result(resp) res.append(answer)