From f8b7045c86d130fb8d384c04c228f0620600c109 Mon Sep 17 00:00:00 2001
From: Kevin Blin <kblin@outlook.com>
Date: Fri, 24 Mar 2023 11:02:59 +0100
Subject: [PATCH 1/4] feat: add moral-exceptQA evals

---
 evals/registry/data/moral-exceptQA/samples_2.jsonl |  3 +++
 evals/registry/data/moral-exceptQA/samples_3.jsonl |  3 +++
 evals/registry/evals/moral-exceptQA.yaml           | 12 ++++++++++++
 3 files changed, 18 insertions(+)
 create mode 100644 evals/registry/data/moral-exceptQA/samples_2.jsonl
 create mode 100644 evals/registry/data/moral-exceptQA/samples_3.jsonl
 create mode 100644 evals/registry/evals/moral-exceptQA.yaml

diff --git a/evals/registry/data/moral-exceptQA/samples_2.jsonl b/evals/registry/data/moral-exceptQA/samples_2.jsonl
new file mode 100644
index 0000000000..68fe3014c4
--- /dev/null
+++ b/evals/registry/data/moral-exceptQA/samples_2.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81ebe19e89e9cd7f2eb1be5aa9f4c28f42f8ccbf09ecd26bb248b1c32d224fb1
+size 88223
diff --git a/evals/registry/data/moral-exceptQA/samples_3.jsonl b/evals/registry/data/moral-exceptQA/samples_3.jsonl
new file mode 100644
index 0000000000..a44c686b8f
--- /dev/null
+++ b/evals/registry/data/moral-exceptQA/samples_3.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68d5aedf8b070fcfe6730113ad89ee027c34b49a5be35cf7569febf1e73531db
+size 91183
diff --git a/evals/registry/evals/moral-exceptQA.yaml b/evals/registry/evals/moral-exceptQA.yaml
new file mode 100644
index 0000000000..dadebe7f83
--- /dev/null
+++ b/evals/registry/evals/moral-exceptQA.yaml
@@ -0,0 +1,12 @@
+moral-exceptQA:
+  id: match_moral_exceptQA.test.v1
+  description: Testing the models ability to align with human intuition on when is it acceptable to break an established moral norm.
+  metrics: [accuracy]
+moral-exceptQA.test.v2:
+  class: evals.elsuite.basic.match:Match
+  args:
+    samples_jsonl: moral-exceptQA/samples_2.jsonl
+moral-exceptQA.test.v3:
+  class: evals.elsuite.basic.match:Match
+  args:
+    samples_jsonl: moral-exceptQA/samples_3.jsonl

From 53b0072c88602f0002239f383ea9b60dd53fbb7f Mon Sep 17 00:00:00 2001
From: Kevin Blin <kblin@outlook.com>
Date: Fri, 31 Mar 2023 15:49:15 +0200
Subject: [PATCH 2/4] fix: improve prompt and remove 3rd choice option

---
 evals/registry/data/moral-exceptQA/samples.jsonl   |  3 +++
 evals/registry/data/moral-exceptQA/samples_2.jsonl |  3 ---
 evals/registry/data/moral-exceptQA/samples_3.jsonl |  3 ---
 evals/registry/evals/moral-exceptQA.yaml           | 10 +++-------
 4 files changed, 6 insertions(+), 13 deletions(-)
 create mode 100644 evals/registry/data/moral-exceptQA/samples.jsonl
 delete mode 100644 evals/registry/data/moral-exceptQA/samples_2.jsonl
 delete mode 100644 evals/registry/data/moral-exceptQA/samples_3.jsonl

diff --git a/evals/registry/data/moral-exceptQA/samples.jsonl b/evals/registry/data/moral-exceptQA/samples.jsonl
new file mode 100644
index 0000000000..64bf4afaf6
--- /dev/null
+++ b/evals/registry/data/moral-exceptQA/samples.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd16d05af13b06239f584186b5e72dc21a32760bf06ec22ba274b29b336eba4a
+size 87566
diff --git a/evals/registry/data/moral-exceptQA/samples_2.jsonl b/evals/registry/data/moral-exceptQA/samples_2.jsonl
deleted file mode 100644
index 68fe3014c4..0000000000
--- a/evals/registry/data/moral-exceptQA/samples_2.jsonl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:81ebe19e89e9cd7f2eb1be5aa9f4c28f42f8ccbf09ecd26bb248b1c32d224fb1
-size 88223
diff --git a/evals/registry/data/moral-exceptQA/samples_3.jsonl b/evals/registry/data/moral-exceptQA/samples_3.jsonl
deleted file mode 100644
index a44c686b8f..0000000000
--- a/evals/registry/data/moral-exceptQA/samples_3.jsonl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:68d5aedf8b070fcfe6730113ad89ee027c34b49a5be35cf7569febf1e73531db
-size 91183
diff --git a/evals/registry/evals/moral-exceptQA.yaml b/evals/registry/evals/moral-exceptQA.yaml
index dadebe7f83..b03a7f3753 100644
--- a/evals/registry/evals/moral-exceptQA.yaml
+++ b/evals/registry/evals/moral-exceptQA.yaml
@@ -1,12 +1,8 @@
 moral-exceptQA:
   id: match_moral_exceptQA.test.v1
-  description: Testing the models ability to align with human intuition on when is it acceptable to break an established moral norm.
+  description: This eval tests the models ability to align with human intuition on when is it acceptable to break an established moral norm.
   metrics: [accuracy]
-moral-exceptQA.test.v2:
+moral-exceptQA.test.v1:
   class: evals.elsuite.basic.match:Match
   args:
-    samples_jsonl: moral-exceptQA/samples_2.jsonl
-moral-exceptQA.test.v3:
-  class: evals.elsuite.basic.match:Match
-  args:
-    samples_jsonl: moral-exceptQA/samples_3.jsonl
+    samples_jsonl: moral-exceptQA/samples.jsonl

From e412c2a26b7772f15e43251ad4b03a235389d552 Mon Sep 17 00:00:00 2001
From: Kevin Blin <kblin@outlook.com>
Date: Sun, 2 Apr 2023 10:31:23 +0200
Subject: [PATCH 3/4] fix: typos in system prompts

---
 evals/registry/data/moral-exceptQA/samples.jsonl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evals/registry/data/moral-exceptQA/samples.jsonl b/evals/registry/data/moral-exceptQA/samples.jsonl
index 64bf4afaf6..2414afffa3 100644
--- a/evals/registry/data/moral-exceptQA/samples.jsonl
+++ b/evals/registry/data/moral-exceptQA/samples.jsonl
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd16d05af13b06239f584186b5e72dc21a32760bf06ec22ba274b29b336eba4a
-size 87566
+oid sha256:ebc93df2ec630632ab52820e6a33aa44350f5307b2f5b38f892b4cd5d34682d4
+size 87418

From fe64c7c5434b9203f142f55952f70fae06874964 Mon Sep 17 00:00:00 2001
From: Kevin Blin <kblin@outlook.com>
Date: Wed, 12 Apr 2023 15:39:35 +0200
Subject: [PATCH 4/4] fix: correctly rename yaml test

---
 .../data/{moral-exceptQA => moral_exceptQA}/samples.jsonl | 0
 .../evals/{moral-exceptQA.yaml => moral_exceptQA.yaml}    | 8 ++++----
 2 files changed, 4 insertions(+), 4 deletions(-)
 rename evals/registry/data/{moral-exceptQA => moral_exceptQA}/samples.jsonl (100%)
 rename evals/registry/evals/{moral-exceptQA.yaml => moral_exceptQA.yaml} (63%)

diff --git a/evals/registry/data/moral-exceptQA/samples.jsonl b/evals/registry/data/moral_exceptQA/samples.jsonl
similarity index 100%
rename from evals/registry/data/moral-exceptQA/samples.jsonl
rename to evals/registry/data/moral_exceptQA/samples.jsonl
diff --git a/evals/registry/evals/moral-exceptQA.yaml b/evals/registry/evals/moral_exceptQA.yaml
similarity index 63%
rename from evals/registry/evals/moral-exceptQA.yaml
rename to evals/registry/evals/moral_exceptQA.yaml
index b03a7f3753..550016ecd5 100644
--- a/evals/registry/evals/moral-exceptQA.yaml
+++ b/evals/registry/evals/moral_exceptQA.yaml
@@ -1,8 +1,8 @@
-moral-exceptQA:
-  id: match_moral_exceptQA.test.v1
+moral_exceptQA:
+  id: moral_exceptQA.test.v1
   description: This eval tests the models ability to align with human intuition on when is it acceptable to break an established moral norm.
   metrics: [accuracy]
-moral-exceptQA.test.v1:
+moral_exceptQA.test.v1:
   class: evals.elsuite.basic.match:Match
   args:
-    samples_jsonl: moral-exceptQA/samples.jsonl
+    samples_jsonl: moral_exceptQA/samples.jsonl