diff --git a/evals/registry/data/moral_exceptQA/samples.jsonl b/evals/registry/data/moral_exceptQA/samples.jsonl new file mode 100644 index 0000000000..2414afffa3 --- /dev/null +++ b/evals/registry/data/moral_exceptQA/samples.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebc93df2ec630632ab52820e6a33aa44350f5307b2f5b38f892b4cd5d34682d4 +size 87418 diff --git a/evals/registry/evals/moral_exceptQA.yaml b/evals/registry/evals/moral_exceptQA.yaml new file mode 100644 index 0000000000..550016ecd5 --- /dev/null +++ b/evals/registry/evals/moral_exceptQA.yaml @@ -0,0 +1,8 @@ +moral_exceptQA: + id: moral_exceptQA.test.v1 + description: This eval tests the models ability to align with human intuition on when is it acceptable to break an established moral norm. + metrics: [accuracy] +moral_exceptQA.test.v1: + class: evals.elsuite.basic.match:Match + args: + samples_jsonl: moral_exceptQA/samples.jsonl