forked from openai/evals
-
Notifications
You must be signed in to change notification settings - Fork 0
/
base.py
152 lines (123 loc) · 3.89 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
"""
This file defines the base specifications for models, evals, and runs. Running
evals and most development work should not require familiarity with this file.
"""
import base64
import datetime
import os
from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional, Sequence
if TYPE_CHECKING:
from dataclasses import dataclass
else:
from pydantic.dataclasses import dataclass
@dataclass
class ModelSpec:
"""
Specification for a model.
"""
name: str
model: Optional[str] = None
is_chat: bool = False
encoding: Optional[str] = None
organization: Optional[str] = None
api_key: Optional[str] = None
extra_options: Optional[Mapping[str, Any]] = None
headers: Optional[Mapping[str, Any]] = None
strip_completion: bool = True
n_ctx: Optional[int] = None
format: Optional[str] = None
key: Optional[str] = None
group: Optional[str] = None
def __post_init__(self):
if self.extra_options is None:
self.extra_options = {}
if self.headers is None:
self.headers = {}
if self.model is None:
raise ValueError(f"Must specify a model")
@dataclass
class BaseEvalSpec:
"""
Specification for a base eval.
"""
id: Optional[str] = None
metrics: Optional[Sequence[str]] = None
description: Optional[str] = None
disclaimer: Optional[str] = None
"""
True if higher values are better, False if lower values are better.
This should really be part of a metric, but it's easier to put it here.
"""
higher_is_better: bool = True
key: Optional[str] = None
group: Optional[str] = None
@dataclass
class EvalSpec:
"""
Specification for an eval.
"""
cls: str
args: Optional[Dict[str, Any]] = None
key: Optional[str] = None
group: Optional[str] = None
@dataclass
class EvalSetSpec:
"""
Specification for an eval set.
"""
evals: Sequence[str]
key: Optional[str] = None
group: Optional[str] = None
@dataclass
class ModelSpecs:
completions_: Optional[Sequence[ModelSpec]] = None
embedding_: Optional[ModelSpec] = None
ranking_: Optional[ModelSpec] = None
@property
def embedding(self) -> ModelSpec:
if self.embedding_ is None:
raise ValueError("Embedding model was not specified")
return self.embedding_
@property
def ranking(self) -> ModelSpec:
if self.ranking_ is None:
raise ValueError("Ranking model was not specified")
return self.ranking_
@property
def completion(self) -> ModelSpec:
if self.completions_ is None:
raise ValueError("Completion model was not specified")
if len(self.completions_) != 1:
raise ValueError("ModelSpecs.completion only works with a single completion model")
return self.completions_[0]
@property
def completions(self) -> Sequence[ModelSpec]:
if self.completions_ is None:
raise ValueError("Completion model was not specified")
return self.completions_
@property
def names(self) -> dict[str, Sequence[str]]:
dict = {}
if self.completions_ is not None:
dict["completions"] = [model.name for model in self.completions_]
if self.embedding_ is not None:
dict["embedding"] = [self.embedding_.name]
if self.ranking_ is not None:
dict["ranking"] = [self.ranking_.name]
return dict
@dataclass
class RunSpec:
model_name: str
model_names: dict[str, Sequence[str]]
eval_name: str
base_eval: str
split: str
run_config: Dict[str, Any]
created_by: str
run_id: str = None
created_at: str = None
def __post_init__(self):
now = datetime.datetime.utcnow()
rand_suffix = base64.b32encode(os.urandom(5)).decode("ascii")
self.run_id = now.strftime("%y%m%d%H%M%S") + rand_suffix
self.created_at = str(now)