Skip to content

Commit

Permalink
Change CI to use a dummy model call
Browse files Browse the repository at this point in the history
  • Loading branch information
andrew-openai committed Mar 18, 2023
1 parent 1838a5f commit feed6c7
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 8 deletions.
7 changes: 2 additions & 5 deletions .github/workflows/test_eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,15 @@ jobs:
echo "new_files=$(cat new_files)" >> $GITHUB_ENV
- name: Run oaieval command for each new YAML file
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
files="${{ env.new_files }}"
if [ -n "$files" ]; then
for file in $files; do
echo "Processing $file"
first_key=$(python .github/workflows/parse_yaml.py $file)
echo "Eval Name: $first_key"
# Replace the following line with the actual oaieval command if needed
echo "Running: oaieval gpt-4 $first_key"
oaieval gpt-4 $first_key --max_samples 10
oaieval dummy-chat $first_key --max_samples 10
oaieval dummy-completion $first_key --max_samples 10
done
else
echo "No new YAML files found in evals/registry/evals"
Expand Down
11 changes: 11 additions & 0 deletions evals/cli/oaieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def n_ctx_from_model_name(model_name: str) -> Optional[int]:
"""Returns n_ctx for a given API model name. Model list last updated 2023-03-14."""
# note that for most models, the max tokens is n_ctx + 1
DICT_OF_N_CTX_BY_MODEL_NAME_PREFIX: dict[str, int] = {
"dummy-": 2048,
"gpt-3.5-turbo-": 4096,
"gpt-4-": 8192,
"gpt-4-32k-": 32768,
Expand Down Expand Up @@ -92,9 +93,19 @@ class ModelResolver:
"gpt-4-0314",
"gpt-4-32k",
"gpt-4-32k-0314",
"dummy-chat",
}

DUMMY_MODELS = {
"dummy-chat",
"dummy-completion",
}

def resolve(self, name: str) -> ModelSpec:
if name in self.DUMMY_MODELS:
result = ModelSpec(name=name, model=name, is_chat=(name in self.CHAT_MODELS))
return result

if name in self.api_model_ids:
result = ModelSpec(
name=name,
Expand Down
11 changes: 8 additions & 3 deletions evals/elsuite/modelgraded/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@
""".strip(),
}

EVAL_MODELSPEC = ModelSpec(name="gpt-3.5-turbo", model="gpt-3.5-turbo", is_chat=True)


def choice_to_str(choice_strings: Iterable[str]) -> str:
"""Return a string of choices, e.g. '"Yes" or "No" or "Maybe"'."""
Expand Down Expand Up @@ -119,6 +117,13 @@ def __init__(
self.multicomp_temperature = multicomp_temperature
self.samples_renamings = samples_renamings or {}

if self.model_spec.name == "dummy-completion" or self.model_spec.name == "dummy-chat":
self.eval_modelspec = self.model_spec
else:
self.eval_modelspec = ModelSpec(
name="gpt-3.5-turbo", model="gpt-3.5-turbo", is_chat=True
)

"""import prompt and set attributes"""
modelgraded_specs = load_modelgraded_specs(modelgraded_spec_file)

Expand Down Expand Up @@ -254,7 +259,7 @@ def eval_sample(self, test_sample: dict, rng: Random) -> None:
metrics = {}
evaluate = PromptFn(
self.prompt,
model_spec=EVAL_MODELSPEC,
model_spec=self.eval_modelspec,
max_tokens=self.max_tokens,
)
eval_kwargs = dict(**completions, **test_sample)
Expand Down
41 changes: 41 additions & 0 deletions evals/utils/api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,41 @@
import openai


def generate_dummy_chat_completion():
return {
"id": "dummy-id",
"object": "chat.completion",
"created": 12345,
"model": "dummy-chat",
"usage": {"prompt_tokens": 56, "completion_tokens": 6, "total_tokens": 62},
"choices": [
{
"message": {"role": "assistant", "content": "This is a dummy response."},
"finish_reason": "stop",
"index": 0,
}
],
}


def generate_dummy_completion():
return {
"id": "dummy-id",
"object": "text_completion",
"created": 12345,
"model": "dummy-completion",
"choices": [
{
"text": "This is a dummy response.",
"index": 0,
"logprobs": None,
"finish_reason": "stop",
}
],
"usage": {"prompt_tokens": 5, "completion_tokens": 6, "total_tokens": 11},
}


@backoff.on_exception(
wait_gen=backoff.expo,
exception=(
Expand All @@ -24,6 +59,9 @@ def openai_completion_create_retrying(*args, **kwargs):
Helper function for creating a completion.
`args` and `kwargs` match what is accepted by `openai.Completion.create`.
"""
if kwargs["model"] == "dummy-completion":
return generate_dummy_completion()

result = openai.Completion.create(*args, **kwargs)
if "error" in result:
logging.warning(result)
Expand All @@ -48,6 +86,9 @@ def openai_chat_completion_create_retrying(*args, **kwargs):
Helper function for creating a chat completion.
`args` and `kwargs` match what is accepted by `openai.ChatCompletion.create`.
"""
if kwargs["model"] == "dummy-chat":
return generate_dummy_chat_completion()

result = openai.ChatCompletion.create(*args, **kwargs)
if "error" in result:
logging.warning(result)
Expand Down

0 comments on commit feed6c7

Please sign in to comment.