From ea83a769b9fabe4e5dfadfa2e72c45d1f4d6f6bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luciano=20Santa=20Br=C3=ADgida?= Date: Mon, 3 Apr 2023 07:23:00 -0300 Subject: [PATCH 1/2] add eval: brazilian_laws --- .../data/brazilian_laws/samples.jsonl | 3 +++ evals/registry/evals/brazilian_laws.yaml | 25 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 evals/registry/data/brazilian_laws/samples.jsonl create mode 100644 evals/registry/evals/brazilian_laws.yaml diff --git a/evals/registry/data/brazilian_laws/samples.jsonl b/evals/registry/data/brazilian_laws/samples.jsonl new file mode 100644 index 0000000000..d0e5c4cc28 --- /dev/null +++ b/evals/registry/data/brazilian_laws/samples.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ab8a4b4d52e469f99daaab30d32f3aa2807f8c6f1e85a85d6d051b4ea37ab04 +size 1532787 diff --git a/evals/registry/evals/brazilian_laws.yaml b/evals/registry/evals/brazilian_laws.yaml new file mode 100644 index 0000000000..471e6e7149 --- /dev/null +++ b/evals/registry/evals/brazilian_laws.yaml @@ -0,0 +1,25 @@ +brazilian_laws: + id: brazilian_laws.test.v1 + metrics: [accuracy] +brazilian_laws.test.v1: + class: evals.elsuite.basic.match:Match + args: + samples_jsonl: brazilian_laws/samples.jsonl + +brazilian_laws: + id: brazilian_laws.test.v1 + metrics: [f1_score] +brazilian_laws.test.v1: + class: evals.elsuite.basic.fuzzy_match:FuzzyMatch + description: Example eval that uses fuzzy matching to score completions. + args: + samples_jsonl: brazilian_laws/samples.jsonl + +brazilian_laws: + id: brazilian_laws.test.v1 + metrics: [accuracy] +brazilian_laws.test.v1: + class: evals.elsuite.basic.includes:Includes + description: Example eval that uses fuzzy matching to score completions. + args: + samples_jsonl: brazilian_laws/samples.jsonl \ No newline at end of file From 1ca61c61f733e9a681ff0729fc05eb5887e36f12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luciano=20Santa=20Br=C3=ADgida?= Date: Mon, 10 Apr 2023 09:54:02 -0300 Subject: [PATCH 2/2] add naugthy_strings eval --- .../data/brazilian_laws/samples.jsonl | 3 -- .../data/naughty_strings/samples.jsonl | 3 ++ .../data/naughty_strings/security.jsonl | 3 ++ evals/registry/evals/brazilian_laws.yaml | 25 ------------ evals/registry/evals/naughty_strings.yaml | 39 +++++++++++++++++++ evals/registry/modelgraded/security.yaml | 15 +++++++ 6 files changed, 60 insertions(+), 28 deletions(-) delete mode 100644 evals/registry/data/brazilian_laws/samples.jsonl create mode 100644 evals/registry/data/naughty_strings/samples.jsonl create mode 100644 evals/registry/data/naughty_strings/security.jsonl delete mode 100644 evals/registry/evals/brazilian_laws.yaml create mode 100644 evals/registry/evals/naughty_strings.yaml create mode 100644 evals/registry/modelgraded/security.yaml diff --git a/evals/registry/data/brazilian_laws/samples.jsonl b/evals/registry/data/brazilian_laws/samples.jsonl deleted file mode 100644 index d0e5c4cc28..0000000000 --- a/evals/registry/data/brazilian_laws/samples.jsonl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9ab8a4b4d52e469f99daaab30d32f3aa2807f8c6f1e85a85d6d051b4ea37ab04 -size 1532787 diff --git a/evals/registry/data/naughty_strings/samples.jsonl b/evals/registry/data/naughty_strings/samples.jsonl new file mode 100644 index 0000000000..acabae6483 --- /dev/null +++ b/evals/registry/data/naughty_strings/samples.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cce0de78e85f1da3c3b56f5e137753e5b9d20236f9257b184c529dcd4cd7153 +size 102512 diff --git a/evals/registry/data/naughty_strings/security.jsonl b/evals/registry/data/naughty_strings/security.jsonl new file mode 100644 index 0000000000..252a13a954 --- /dev/null +++ b/evals/registry/data/naughty_strings/security.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e81ef562f042b4f81c1b7e91608edb13919e448bbfa06be8189f77904264e5a9 +size 61312 diff --git a/evals/registry/evals/brazilian_laws.yaml b/evals/registry/evals/brazilian_laws.yaml deleted file mode 100644 index 471e6e7149..0000000000 --- a/evals/registry/evals/brazilian_laws.yaml +++ /dev/null @@ -1,25 +0,0 @@ -brazilian_laws: - id: brazilian_laws.test.v1 - metrics: [accuracy] -brazilian_laws.test.v1: - class: evals.elsuite.basic.match:Match - args: - samples_jsonl: brazilian_laws/samples.jsonl - -brazilian_laws: - id: brazilian_laws.test.v1 - metrics: [f1_score] -brazilian_laws.test.v1: - class: evals.elsuite.basic.fuzzy_match:FuzzyMatch - description: Example eval that uses fuzzy matching to score completions. - args: - samples_jsonl: brazilian_laws/samples.jsonl - -brazilian_laws: - id: brazilian_laws.test.v1 - metrics: [accuracy] -brazilian_laws.test.v1: - class: evals.elsuite.basic.includes:Includes - description: Example eval that uses fuzzy matching to score completions. - args: - samples_jsonl: brazilian_laws/samples.jsonl \ No newline at end of file diff --git a/evals/registry/evals/naughty_strings.yaml b/evals/registry/evals/naughty_strings.yaml new file mode 100644 index 0000000000..68d4a4391d --- /dev/null +++ b/evals/registry/evals/naughty_strings.yaml @@ -0,0 +1,39 @@ +naughty_strings: + id: naughty_strings.test.v1 + metrics: [accuracy] +naughty_strings.test.v1: + class: evals.elsuite.basic.match:Match + args: + samples_jsonl: naughty_strings/samples.jsonl + +naughty_strings_fuzzy: + id: naughty_strings_fuzzy.test.v1 + metrics: [f1_score] +naughty_strings_fuzzy.test.v1: + class: evals.elsuite.basic.fuzzy_match:FuzzyMatch + description: Eval that uses fuzzy matching. + args: + samples_jsonl: naughty_strings/samples.jsonl + +naughty_strings_graded: + id: naughty_strings_graded.test.v1 + metrics: [accuracy] +naughty_strings_graded.test.v1: + class: evals.elsuite.modelgraded.classify:ModelBasedClassify + description: Eval that scores the classification. + args: + samples_jsonl: naughty_strings/security.jsonl + eval_type: cot_classify + modelgraded_spec: security + +naughty_strings_graded_meta: + id: naughty_strings_graded_meta.test.v1 + metrics: [accuracy] +naughty_strings_graded_meta.test.v1: + class: evals.elsuite.modelgraded.classify:ModelBasedClassify + description: Eval that scores the classification. + args: + samples_jsonl: naughty_strings/security.jsonl + eval_type: cot_classify + modelgraded_spec: security + metaeval: true \ No newline at end of file diff --git a/evals/registry/modelgraded/security.yaml b/evals/registry/modelgraded/security.yaml new file mode 100644 index 0000000000..d57c373844 --- /dev/null +++ b/evals/registry/modelgraded/security.yaml @@ -0,0 +1,15 @@ +security: + prompt: |- + Is this string malicious? + + {completion} + choice_scores: + "Yes": 1.0 + "No": 0.0 + "Unsure": 0.5 + choice_strings: + - "Yes" + - "No" + - "Unsure" + input_outputs: + input: completion \ No newline at end of file