From 1edc9b4a399653c749fd920ad111ba177e782d9c Mon Sep 17 00:00:00 2001 From: Andrew Kondrich Date: Fri, 14 Apr 2023 14:49:14 -0700 Subject: [PATCH 1/3] json validation eval --- evals/elsuite/basic/json_validator.py | 49 +++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 evals/elsuite/basic/json_validator.py diff --git a/evals/elsuite/basic/json_validator.py b/evals/elsuite/basic/json_validator.py new file mode 100644 index 0000000000..89bfbb9e39 --- /dev/null +++ b/evals/elsuite/basic/json_validator.py @@ -0,0 +1,49 @@ +import json +from typing import Any + +import numpy as np + +import evals +import evals.metrics +import evals.record +from evals.api import CompletionFn + + +def is_valid_json(s): + try: + json.loads(s) + return True + except ValueError: + return False + + +class JsonValidator(evals.Eval): + def __init__( + self, + completion_fns: list[CompletionFn], + samples_jsonl: str, + *args, + max_tokens: int = 500, + **kwargs, + ): + super().__init__(completion_fns, *args, **kwargs) + assert len(completion_fns) == 1, "Match only supports one completion fn" + self.max_tokens = max_tokens + self.samples_jsonl = samples_jsonl + + def eval_sample(self, sample: Any, *_): + prompt = sample["input"] + result = self.completion_fn( + prompt=prompt, + temperature=0.0, + ) + sampled = result.get_completions()[0] + return evals.record.record_metrics( + is_valid_json=is_valid_json(sampled), + ) + + def run(self, recorder): + samples = self.get_samples() + self.eval_all_samples(recorder, samples) + metrics = recorder.get_metrics() + return {"num_valid_json": np.mean([m["is_valid_json"] for m in metrics])} From bdffa4c277635ee6016ae568e3edc99c12438124 Mon Sep 17 00:00:00 2001 From: Andrew Kondrich Date: Fri, 14 Apr 2023 14:49:48 -0700 Subject: [PATCH 2/3] json validation eval --- evals/elsuite/basic/json_validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evals/elsuite/basic/json_validator.py b/evals/elsuite/basic/json_validator.py index 89bfbb9e39..c43b3b5d3a 100644 --- a/evals/elsuite/basic/json_validator.py +++ b/evals/elsuite/basic/json_validator.py @@ -27,7 +27,7 @@ def __init__( **kwargs, ): super().__init__(completion_fns, *args, **kwargs) - assert len(completion_fns) == 1, "Match only supports one completion fn" + assert len(completion_fns) == 1, "JsonValidator only supports one completion fn" self.max_tokens = max_tokens self.samples_jsonl = samples_jsonl From 456820895907566a83daac664bedbc21010fa4a2 Mon Sep 17 00:00:00 2001 From: Andrew Kondrich Date: Fri, 14 Apr 2023 14:52:23 -0700 Subject: [PATCH 3/3] rm extra import --- evals/elsuite/basic/json_validator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/evals/elsuite/basic/json_validator.py b/evals/elsuite/basic/json_validator.py index c43b3b5d3a..7ac8abf89f 100644 --- a/evals/elsuite/basic/json_validator.py +++ b/evals/elsuite/basic/json_validator.py @@ -4,7 +4,6 @@ import numpy as np import evals -import evals.metrics import evals.record from evals.api import CompletionFn