From a6fe832e7456ce82337f3f355b859c317a94c280 Mon Sep 17 00:00:00 2001 From: Andrew Kondrich <120423412+andrew-openai@users.noreply.github.com> Date: Fri, 14 Apr 2023 17:53:26 -0700 Subject: [PATCH] [Evals] Update JSON validator to use match check (#682) --- evals/elsuite/basic/json_validator.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/evals/elsuite/basic/json_validator.py b/evals/elsuite/basic/json_validator.py index 7ac8abf89f..721a461097 100644 --- a/evals/elsuite/basic/json_validator.py +++ b/evals/elsuite/basic/json_validator.py @@ -1,9 +1,8 @@ import json from typing import Any -import numpy as np - import evals +import evals.metrics import evals.record from evals.api import CompletionFn @@ -22,12 +21,10 @@ def __init__( completion_fns: list[CompletionFn], samples_jsonl: str, *args, - max_tokens: int = 500, **kwargs, ): super().__init__(completion_fns, *args, **kwargs) assert len(completion_fns) == 1, "JsonValidator only supports one completion fn" - self.max_tokens = max_tokens self.samples_jsonl = samples_jsonl def eval_sample(self, sample: Any, *_): @@ -37,12 +34,12 @@ def eval_sample(self, sample: Any, *_): temperature=0.0, ) sampled = result.get_completions()[0] - return evals.record.record_metrics( - is_valid_json=is_valid_json(sampled), - ) + return evals.record.record_match(is_valid_json(sampled), expected=None, picked=sampled) def run(self, recorder): samples = self.get_samples() self.eval_all_samples(recorder, samples) - metrics = recorder.get_metrics() - return {"num_valid_json": np.mean([m["is_valid_json"] for m in metrics])} + events = recorder.get_events("match") + return { + "accuracy": evals.metrics.get_accuracy(events), + }