From 7b68f5f6734d7ad9903112d10fcc381e7f85fd54 Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Thu, 9 Mar 2023 08:21:39 +0000 Subject: [PATCH 01/36] add boolq_pt template and christykoh as included user --- elk/promptsource/templates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elk/promptsource/templates.py b/elk/promptsource/templates.py index 5f8a4f34..f34a1933 100644 --- a/elk/promptsource/templates.py +++ b/elk/promptsource/templates.py @@ -24,7 +24,7 @@ # These are users whose datasets should be included in the results returned by # filter_english_datasets (regardless of their metadata) -INCLUDED_USERS = {"Zaid", "craffel", "lauritowal"} +INCLUDED_USERS = {"Zaid", "craffel", "lauritowal", "christykoh"} def highlight(input): From edfdd6d5f44e55533c5d67767a11f45ecea52d79 Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Tue, 14 Mar 2023 05:17:05 +0000 Subject: [PATCH 02/36] add templates.yaml for boolqpt --- .../christykoh/boolq_pt/templates.yaml | 189 ++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 elk/promptsource/templates/christykoh/boolq_pt/templates.yaml diff --git a/elk/promptsource/templates/christykoh/boolq_pt/templates.yaml b/elk/promptsource/templates/christykoh/boolq_pt/templates.yaml new file mode 100644 index 00000000..fbf4b5b0 --- /dev/null +++ b/elk/promptsource/templates/christykoh/boolq_pt/templates.yaml @@ -0,0 +1,189 @@ +dataset: boolq_pt +templates: + 3e386463-1715-4578-9cba-07d11a0d3b61: !Template + answer_choices: False ||| True + id: 3e386463-1715-4578-9cba-07d11a0d3b61 + jinja: 'Passagem: {{passage}} + + + Depois de ler esta passagem, tenho uma pergunta: {{question}}? Verdadeiro ou falso? + ||| + + {% if label != -1 %} + + {{answer_choices[label]}} + + {% endif %}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: after_reading + reference: '' + 492f0f88-4370-46cd-839b-1de37a55aeda: !Template + answer_choices: No ||| Yes + id: 492f0f88-4370-46cd-839b-1de37a55aeda + jinja: "{{ passage }} \nPergunta: {{ question }}\nResposta: ||| \n{% if label !=\ + \ -1 %}\n{{ answer_choices[label] }}\n{% endif %}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: GPT-3 Style + reference: Same as Figure G29, p. 58 of the GPT-3 paper + 6cb6a026-c070-470a-b75d-bb8fdf424e35: !Template + answer_choices: No ||| Yes + id: 6cb6a026-c070-470a-b75d-bb8fdf424e35 + jinja: "{{ passage }}\n\nDepois de ler isso, eu me pergunto {{ question }}? |||\n{% if\ + \ label != -1 %}\n{{ answer_choices[label] }} \n{% endif %}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: "I wonder\u2026" + reference: '' + 7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5: !Template + answer_choices: No ||| Yes + id: 7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5 + jinja: 'Texto: {{passage}} + + + Responda sim/não à seguinte pergunta: {{question}}? Sim ou não? ||| + + {% if label != -1 %} + + {{answer_choices[label]}} + + {% endif %}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: yes_no_question + reference: '' + 7d21d974-0624-4d4f-9e8c-644e2d009cb5: !Template + answer_choices: No ||| Yes + id: 7d21d974-0624-4d4f-9e8c-644e2d009cb5 + jinja: "{{ passage }}\n\nDepois de ler isso, você poderia me dizer {{ question }}? \ + \ ||| {% if label != -1 %}{{ answer_choices[label] }}\n{% endif %}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: "could you tell me\u2026" + reference: '' + 922d3e87-ac58-4731-84d1-f0a40e47afb5: !Template + answer_choices: No ||| Yes + id: 922d3e87-ac58-4731-84d1-f0a40e47afb5 + jinja: "EXAME\n1. Responda sim ou não.\nDocumento: {{passage}}\nPergunta: {{question}}? \ + \ ||| \n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}" + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: exam + reference: '' + 9a1bf459-8047-437c-9def-f21e960429cc: !Template + answer_choices: No ||| Yes + id: 9a1bf459-8047-437c-9def-f21e960429cc + jinja: 'Com base na seguinte passagem, {{ question }}? {{ passage }} + + + ||| + + {% if label != -1 %} + + {{ answer_choices[label] }} + + {% endif %}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: based on the following passage + reference: "Adapted from Perez et al. 2021 and Schick & Sch\xFCtz 2021." + 9f4c6b0a-437b-40c0-b467-db4b7218d38d: !Template + answer_choices: False ||| True + id: 9f4c6b0a-437b-40c0-b467-db4b7218d38d + jinja: 'Exercício: leia o texto e responda à questão com Verdadeiro ou Falso. + + + Texto: {{passage}} + + Pergunta: {{question}}? ||| + + {% if label != -1 %} + + {{answer_choices[label]}} + + {% endif %}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: exercise + reference: '' + b2b3cb60-d6e3-491c-a09a-8201e13e417e: !Template + answer_choices: No ||| Yes + id: b2b3cb60-d6e3-491c-a09a-8201e13e417e + jinja: '{{ passage }} + + Com base na passagem anterior, {{ question }}? ||| {% if label != -1 %}{{ answer_choices[label] + }} + + {% endif %}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: based on the previous passage + reference: "Adapted from Perez et al. 2021 and Schick & Sch\xFCtz 2021." + eb78772c-e81e-4b8a-a77b-b75efd1c212a: !Template + answer_choices: False ||| True + id: eb78772c-e81e-4b8a-a77b-b75efd1c212a + jinja: '{{passage}} + + + P: {{question}}? Verdadeiro ou falso? ||| + + {% if label != -1 %} + + {{answer_choices[label]}} + + {% endif %}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: valid_binary + reference: '' From d2857985e596a567dc5af6244da46f7a245e938b Mon Sep 17 00:00:00 2001 From: Reagan Lee Date: Sun, 26 Mar 2023 07:25:38 +0000 Subject: [PATCH 03/36] pt yaml --- .../templates/boolq_pt/templates.yaml | 189 ++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 elk/promptsource/templates/boolq_pt/templates.yaml diff --git a/elk/promptsource/templates/boolq_pt/templates.yaml b/elk/promptsource/templates/boolq_pt/templates.yaml new file mode 100644 index 00000000..93528f32 --- /dev/null +++ b/elk/promptsource/templates/boolq_pt/templates.yaml @@ -0,0 +1,189 @@ +dataset: boolq_pt +templates: + 3e386463-1715-4578-9cba-07d11a0d3b61: !Template + answer_choices: False ||| True + id: 3e386463-1715-4578-9cba-07d11a0d3b61 + jinja: 'Passagem: {{passage}} + + + Depois de ler esta passagem, tenho uma pergunta: {{question}}? Verdadeiro ou falso? + ||| + + {% if label != -1 %} + + {{answer_choices[label]}} + + {% endif %}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: after_reading + reference: '' + 492f0f88-4370-46cd-839b-1de37a55aeda: !Template + answer_choices: No ||| Yes + id: 492f0f88-4370-46cd-839b-1de37a55aeda + jinja: "{{ passage }} \nPergunta: {{ question }}\nResposta: ||| \n{% if label !=\ + \ -1 %}\n{{ answer_choices[label] }}\n{% endif %}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: GPT-3 Style + reference: Same as Figure G29, p. 58 of the GPT-3 paper + 6cb6a026-c070-470a-b75d-bb8fdf424e35: !Template + answer_choices: No ||| Yes + id: 6cb6a026-c070-470a-b75d-bb8fdf424e35 + jinja: "{{ passage }}\n\nDepois de ler isso, eu me pergunto {{ question }}? |||\n{% if\ + \ label != -1 %}\n{{ answer_choices[label] }} \n{% endif %}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: "I wonder\u2026" + reference: '' + 7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5: !Template + answer_choices: No ||| Yes + id: 7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5 + jinja: 'Texto: {{passage}} + + + Responda sim/não à seguinte pergunta: {{question}}? Sim ou não? ||| + + {% if label != -1 %} + + {{answer_choices[label]}} + + {% endif %}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: yes_no_question + reference: '' + 7d21d974-0624-4d4f-9e8c-644e2d009cb5: !Template + answer_choices: No ||| Yes + id: 7d21d974-0624-4d4f-9e8c-644e2d009cb5 + jinja: "{{ passage }}\n\nDepois de ler isso, você poderia me dizer {{ question }}? \ + \ ||| {% if label != -1 %}{{ answer_choices[label] }}\n{% endif %}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: "could you tell me\u2026" + reference: '' + 922d3e87-ac58-4731-84d1-f0a40e47afb5: !Template + answer_choices: No ||| Yes + id: 922d3e87-ac58-4731-84d1-f0a40e47afb5 + jinja: "EXAME\n1. Responda sim ou não.\nDocumento: {{passage}}\nPergunta: {{question}}? \ + \ ||| \n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}" + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: exam + reference: '' + 9a1bf459-8047-437c-9def-f21e960429cc: !Template + answer_choices: No ||| Yes + id: 9a1bf459-8047-437c-9def-f21e960429cc + jinja: 'Com base na seguinte passagem, {{ question }}? {{ passage }} + + + ||| + + {% if label != -1 %} + + {{ answer_choices[label] }} + + {% endif %}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: based on the following passage + reference: "Adapted from Perez et al. 2021 and Schick & Sch\xFCtz 2021." + 9f4c6b0a-437b-40c0-b467-db4b7218d38d: !Template + answer_choices: False ||| True + id: 9f4c6b0a-437b-40c0-b467-db4b7218d38d + jinja: 'Exercício: leia o texto e responda à questão com Verdadeiro ou Falso. + + + Texto: {{passage}} + + Pergunta: {{question}}? ||| + + {% if label != -1 %} + + {{answer_choices[label]}} + + {% endif %}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: exercise + reference: '' + b2b3cb60-d6e3-491c-a09a-8201e13e417e: !Template + answer_choices: No ||| Yes + id: b2b3cb60-d6e3-491c-a09a-8201e13e417e + jinja: '{{ passage }} + + Com base na passagem anterior, {{ question }}? ||| {% if label != -1 %}{{ answer_choices[label] + }} + + {% endif %}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: based on the previous passage + reference: "Adapted from Perez et al. 2021 and Schick & Sch\xFCtz 2021." + eb78772c-e81e-4b8a-a77b-b75efd1c212a: !Template + answer_choices: False ||| True + id: eb78772c-e81e-4b8a-a77b-b75efd1c212a + jinja: '{{passage}} + + + P: {{question}}? Verdadeiro ou falso? ||| + + {% if label != -1 %} + + {{answer_choices[label]}} + + {% endif %}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: valid_binary + reference: '' From 0ff1609b4d7fd7dbd50b825e1e3039eeeaec1465 Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 5 Apr 2023 08:08:17 +0000 Subject: [PATCH 04/36] add ag_news template, translated to pt --- elk/promptsource/templates.py | 2 +- .../christykoh/ag_news_pt/templates.yaml | 215 ++++++++++++++++++ 2 files changed, 216 insertions(+), 1 deletion(-) create mode 100644 elk/promptsource/templates/christykoh/ag_news_pt/templates.yaml diff --git a/elk/promptsource/templates.py b/elk/promptsource/templates.py index a19526e1..36480b1e 100644 --- a/elk/promptsource/templates.py +++ b/elk/promptsource/templates.py @@ -24,7 +24,7 @@ # These are users whose datasets should be included in the results returned by # filter_english_datasets (regardless of their metadata) -INCLUDED_USERS = {"Zaid", "craffel", "lauritowal"} +INCLUDED_USERS = {"Zaid", "craffel", "lauritowal", "christykoh"} def highlight(input): diff --git a/elk/promptsource/templates/christykoh/ag_news_pt/templates.yaml b/elk/promptsource/templates/christykoh/ag_news_pt/templates.yaml new file mode 100644 index 00000000..2dc94749 --- /dev/null +++ b/elk/promptsource/templates/christykoh/ag_news_pt/templates.yaml @@ -0,0 +1,215 @@ +dataset: ag_news_pt +templates: + 24e44a81-a18a-42dd-a71c-5b31b2d2cb39: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: 24e44a81-a18a-42dd-a71c-5b31b2d2cb39 + jinja: "Qual rótulo melhor descreve este artigo de notícias?\n{{text}} ||| \n{{answer_choices[label]\ + \ }}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: classify_question_first + reference: '' + 8fdc1056-1029-41a1-9c67-354fc2b8ceaf: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: 8fdc1056-1029-41a1-9c67-354fc2b8ceaf + jinja: "Isso é uma notícia sobre {{\"política mundial, esportes, negócios,\ + \ ou ciência e tecnologia\"}}?\n{{text}} \n||| \n{{answer_choices[label] }}" + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: classify_with_choices_question_first + reference: '' + 918267e0-af68-4117-892d-2dbe66a58ce9: !Template + answer_choices: Político ||| Atleta ||| Executivo de negócios ||| Cientista + id: 918267e0-af68-4117-892d-2dbe66a58ce9 + jinja: 'Você recomendaria o seguinte artigo para um {{"político"}}, um {{"atleta"}}, + um {{"executivo de negócios"}} ou um {{"cientista"}}? + + + {{ text }} + + ||| + + {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: recommend + reference: '' + 9345df33-4f23-4944-a33c-eef94e626862: !Template + answer_choices: Notícias do mundo ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: 9345df33-4f23-4944-a33c-eef94e626862 + jinja: "{{text}} \n\nQual das seguintes seções de um jornal seria esta\ + \ artigo provavelmente aparece em? {{\"Notícias do mundo\"}}, {{\"Esportes\"}}, {{\"Negócios\"\ + }} ou {{\"Ciência e Tecnologia\"}}? ||| \n{{answer_choices[label] }}" + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: which_section_choices + reference: '' + 98534347-fff7-4c39-a795-4e69a44791f7: !Template + answer_choices: Notícias do mundo ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: 98534347-fff7-4c39-a795-4e69a44791f7 + jinja: "{{text}} \n\nEm qual seção de um jornal este artigo provavelmente apareceria\ + \ em? ||| \n{{answer_choices[label] }}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: which_section + reference: '' + b1d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b1d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: 'Considere o seguinte exemplo: '''''' {{text}} '''''' + Opção 1: {{answer_choices[label]}}. + Opção 2: {{answer_choices[1 - label]}}. Entre a opção 1 e a opção 2, o tópico deste exemplo é ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_1 + reference: Burns et al + b2d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b2d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: 'Consider the following example: '''''' {{text}} '''''' + Opção 1: {{answer_choices[label]}}. + Opção 2: {{answer_choices[1 - label]}}. Entre a opção 1 e a opção 2, qual é o tópico deste exemplo? ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_2 + reference: Burns et al + b3d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b3d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: 'Considere o seguinte exemplo: '''''' {{text}} '''''' + Qual é o tópico deste exemplo, opção 1: {{answer_choices[label]}}, + ou opção 2: {{answer_choices[1 - label]}}? ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_3 + reference: Burns et al + b4d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b4d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: '{{text}} + Opção 1: {{answer_choices[label]}}. Opção 2: {{answer_choices[1 - label]}}. Entre a opção 1 e a opção 2, o tópico deste exemplo é ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_4 + reference: Burns et al + b5d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b5d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: '{{text}} + Opção 1: {{answer_choices[label]}}. Opção 2: {{answer_choices[1 - label]}}. Entre a opção 1 e a opção 2, qual é o tema deste exemplo? ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_5 + reference: Burns et al + b6d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b6d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: '{{text}} + Qual é o tópico deste exemplo, escolha 1: {{answer_choices[label]}}, ou escolha 2: {{answer_choices[1 - label]}}? ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_6 + reference: Burns et al + b7d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b7d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: '{{text}} + Qual rótulo melhor descreve este artigo de notícias, escolha 1: {{answer_choices[label]}}, ou escolha 2: {{answer_choices[1 - label]}}? ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_7 + reference: Burns et al + b8d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b8d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: '{{text}} + Em qual seção de um jornal esse artigo provavelmente apareceria, escolha 1: {{answer_choices[label]}}, ou escolha 2: {{answer_choices[1 - label]}}? ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_8 + reference: Burns et al + b401b0ee-6ffe-4a91-8e15-77ee073cd858: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b401b0ee-6ffe-4a91-8e15-77ee073cd858 + jinja: "{{text}} \nIsto é uma notícia sobre {{\"política mundial, esportes,\ + \ negócios ou ciência e tecnologia\"}}? ||| \n{{answer_choices[label] }}" + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: classify_with_choices + reference: '' + cb355f33-7e8c-4455-a72b-48d315bd4f60: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: cb355f33-7e8c-4455-a72b-48d315bd4f60 + jinja: "{{text}} \nQual rótulo melhor descreve esta notícia? ||| \n{{answer_choices[label]\ + \ }}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: classify + reference: '' From 7180a643a9cd1acf9b02b9154cf517cf792cc752 Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 5 Apr 2023 08:08:17 +0000 Subject: [PATCH 05/36] add ag_news template, translated to pt --- .../christykoh/ag_news_pt/templates.yaml | 215 ++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 elk/promptsource/templates/christykoh/ag_news_pt/templates.yaml diff --git a/elk/promptsource/templates/christykoh/ag_news_pt/templates.yaml b/elk/promptsource/templates/christykoh/ag_news_pt/templates.yaml new file mode 100644 index 00000000..2dc94749 --- /dev/null +++ b/elk/promptsource/templates/christykoh/ag_news_pt/templates.yaml @@ -0,0 +1,215 @@ +dataset: ag_news_pt +templates: + 24e44a81-a18a-42dd-a71c-5b31b2d2cb39: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: 24e44a81-a18a-42dd-a71c-5b31b2d2cb39 + jinja: "Qual rótulo melhor descreve este artigo de notícias?\n{{text}} ||| \n{{answer_choices[label]\ + \ }}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: classify_question_first + reference: '' + 8fdc1056-1029-41a1-9c67-354fc2b8ceaf: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: 8fdc1056-1029-41a1-9c67-354fc2b8ceaf + jinja: "Isso é uma notícia sobre {{\"política mundial, esportes, negócios,\ + \ ou ciência e tecnologia\"}}?\n{{text}} \n||| \n{{answer_choices[label] }}" + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: classify_with_choices_question_first + reference: '' + 918267e0-af68-4117-892d-2dbe66a58ce9: !Template + answer_choices: Político ||| Atleta ||| Executivo de negócios ||| Cientista + id: 918267e0-af68-4117-892d-2dbe66a58ce9 + jinja: 'Você recomendaria o seguinte artigo para um {{"político"}}, um {{"atleta"}}, + um {{"executivo de negócios"}} ou um {{"cientista"}}? + + + {{ text }} + + ||| + + {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: recommend + reference: '' + 9345df33-4f23-4944-a33c-eef94e626862: !Template + answer_choices: Notícias do mundo ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: 9345df33-4f23-4944-a33c-eef94e626862 + jinja: "{{text}} \n\nQual das seguintes seções de um jornal seria esta\ + \ artigo provavelmente aparece em? {{\"Notícias do mundo\"}}, {{\"Esportes\"}}, {{\"Negócios\"\ + }} ou {{\"Ciência e Tecnologia\"}}? ||| \n{{answer_choices[label] }}" + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: which_section_choices + reference: '' + 98534347-fff7-4c39-a795-4e69a44791f7: !Template + answer_choices: Notícias do mundo ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: 98534347-fff7-4c39-a795-4e69a44791f7 + jinja: "{{text}} \n\nEm qual seção de um jornal este artigo provavelmente apareceria\ + \ em? ||| \n{{answer_choices[label] }}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: which_section + reference: '' + b1d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b1d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: 'Considere o seguinte exemplo: '''''' {{text}} '''''' + Opção 1: {{answer_choices[label]}}. + Opção 2: {{answer_choices[1 - label]}}. Entre a opção 1 e a opção 2, o tópico deste exemplo é ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_1 + reference: Burns et al + b2d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b2d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: 'Consider the following example: '''''' {{text}} '''''' + Opção 1: {{answer_choices[label]}}. + Opção 2: {{answer_choices[1 - label]}}. Entre a opção 1 e a opção 2, qual é o tópico deste exemplo? ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_2 + reference: Burns et al + b3d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b3d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: 'Considere o seguinte exemplo: '''''' {{text}} '''''' + Qual é o tópico deste exemplo, opção 1: {{answer_choices[label]}}, + ou opção 2: {{answer_choices[1 - label]}}? ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_3 + reference: Burns et al + b4d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b4d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: '{{text}} + Opção 1: {{answer_choices[label]}}. Opção 2: {{answer_choices[1 - label]}}. Entre a opção 1 e a opção 2, o tópico deste exemplo é ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_4 + reference: Burns et al + b5d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b5d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: '{{text}} + Opção 1: {{answer_choices[label]}}. Opção 2: {{answer_choices[1 - label]}}. Entre a opção 1 e a opção 2, qual é o tema deste exemplo? ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_5 + reference: Burns et al + b6d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b6d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: '{{text}} + Qual é o tópico deste exemplo, escolha 1: {{answer_choices[label]}}, ou escolha 2: {{answer_choices[1 - label]}}? ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_6 + reference: Burns et al + b7d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b7d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: '{{text}} + Qual rótulo melhor descreve este artigo de notícias, escolha 1: {{answer_choices[label]}}, ou escolha 2: {{answer_choices[1 - label]}}? ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_7 + reference: Burns et al + b8d49782-dfd9-41a5-87a6-8fceab9c2198: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b8d49782-dfd9-41a5-87a6-8fceab9c2198 + jinja: '{{text}} + Em qual seção de um jornal esse artigo provavelmente apareceria, escolha 1: {{answer_choices[label]}}, ou escolha 2: {{answer_choices[1 - label]}}? ||| {{answer_choices[label]}}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: [] + original_task: true + name: burns_8 + reference: Burns et al + b401b0ee-6ffe-4a91-8e15-77ee073cd858: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: b401b0ee-6ffe-4a91-8e15-77ee073cd858 + jinja: "{{text}} \nIsto é uma notícia sobre {{\"política mundial, esportes,\ + \ negócios ou ciência e tecnologia\"}}? ||| \n{{answer_choices[label] }}" + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: classify_with_choices + reference: '' + cb355f33-7e8c-4455-a72b-48d315bd4f60: !Template + answer_choices: Política mundial ||| Esportes ||| Negócios ||| Ciência e Tecnologia + id: cb355f33-7e8c-4455-a72b-48d315bd4f60 + jinja: "{{text}} \nQual rótulo melhor descreve esta notícia? ||| \n{{answer_choices[label]\ + \ }}" + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: classify + reference: '' From b8f5e8b58f70524e27fc6faa8fe76dbb174682c1 Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Fri, 7 Apr 2023 07:38:44 +0000 Subject: [PATCH 06/36] save eval runs to separate subfolders by target dataset --- elk/evaluation/evaluate.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 17aa073c..5f2fd1d2 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -39,13 +39,16 @@ class Eval(Serializable): normalization: Literal["legacy", "none", "elementwise", "meanonly"] = "meanonly" debug: bool = False - out_dir: Optional[Path] = None + out_dir_suffix: Optional[Path] = None # custom name for subdir in transfer_eval folder num_gpus: int = -1 concatenated_layer_offset: int = 0 def execute(self): - transfer_eval = elk_reporter_dir() / self.source / "transfer_eval" + if self.out_dir_suffix == None: + self.out_dir_suffix = '-'.join(self.data.prompts.datasets).replace(' ', '_') + + transfer_eval = elk_reporter_dir() / self.source / "transfer_eval" / self.out_dir_suffix run = Evaluate(cfg=self, out_dir=transfer_eval) run.evaluate() From 51fed5d5046b30cbae22f0e4425315547b2741a0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 7 Apr 2023 07:41:08 +0000 Subject: [PATCH 07/36] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/evaluation/evaluate.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 5f2fd1d2..6daa7bd8 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -39,16 +39,20 @@ class Eval(Serializable): normalization: Literal["legacy", "none", "elementwise", "meanonly"] = "meanonly" debug: bool = False - out_dir_suffix: Optional[Path] = None # custom name for subdir in transfer_eval folder + out_dir_suffix: Optional[ + Path + ] = None # custom name for subdir in transfer_eval folder num_gpus: int = -1 concatenated_layer_offset: int = 0 def execute(self): - if self.out_dir_suffix == None: - self.out_dir_suffix = '-'.join(self.data.prompts.datasets).replace(' ', '_') + if self.out_dir_suffix is None: + self.out_dir_suffix = "-".join(self.data.prompts.datasets).replace(" ", "_") - transfer_eval = elk_reporter_dir() / self.source / "transfer_eval" / self.out_dir_suffix + transfer_eval = ( + elk_reporter_dir() / self.source / "transfer_eval" / self.out_dir_suffix + ) run = Evaluate(cfg=self, out_dir=transfer_eval) run.evaluate() From 266084463343ac11b8e956ad0910685ae0455a40 Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Fri, 7 Apr 2023 09:14:00 +0000 Subject: [PATCH 08/36] eval multiple datasets --- elk/evaluation/evaluate.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 5f2fd1d2..51d6b838 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -39,19 +39,19 @@ class Eval(Serializable): normalization: Literal["legacy", "none", "elementwise", "meanonly"] = "meanonly" debug: bool = False - out_dir_suffix: Optional[Path] = None # custom name for subdir in transfer_eval folder + out_dir: Optional[Path] = None num_gpus: int = -1 concatenated_layer_offset: int = 0 def execute(self): - if self.out_dir_suffix == None: - self.out_dir_suffix = '-'.join(self.data.prompts.datasets).replace(' ', '_') + datasets = self.data.prompts.datasets - transfer_eval = elk_reporter_dir() / self.source / "transfer_eval" / self.out_dir_suffix + transfer_dir = elk_reporter_dir() / self.source / "transfer_eval" - run = Evaluate(cfg=self, out_dir=transfer_eval) - run.evaluate() + for dataset in datasets: + run = Evaluate(cfg=self, out_dir=transfer_dir / dataset) + run.evaluate() @dataclass From b2302099165c8b82a1239b2b02da6e2d6c48b42a Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Fri, 7 Apr 2023 17:40:19 +0000 Subject: [PATCH 09/36] change prompt answer chouces to portuguese --- .../templates/christykoh/boolq_pt/templates.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/elk/promptsource/templates/christykoh/boolq_pt/templates.yaml b/elk/promptsource/templates/christykoh/boolq_pt/templates.yaml index fbf4b5b0..1120d3cd 100644 --- a/elk/promptsource/templates/christykoh/boolq_pt/templates.yaml +++ b/elk/promptsource/templates/christykoh/boolq_pt/templates.yaml @@ -1,7 +1,7 @@ dataset: boolq_pt templates: 3e386463-1715-4578-9cba-07d11a0d3b61: !Template - answer_choices: False ||| True + answer_choices: Falso ||| Verdadeiro id: 3e386463-1715-4578-9cba-07d11a0d3b61 jinja: 'Passagem: {{passage}} @@ -24,7 +24,7 @@ templates: name: after_reading reference: '' 492f0f88-4370-46cd-839b-1de37a55aeda: !Template - answer_choices: No ||| Yes + answer_choices: Não ||| Sim id: 492f0f88-4370-46cd-839b-1de37a55aeda jinja: "{{ passage }} \nPergunta: {{ question }}\nResposta: ||| \n{% if label !=\ \ -1 %}\n{{ answer_choices[label] }}\n{% endif %}" @@ -38,7 +38,7 @@ templates: name: GPT-3 Style reference: Same as Figure G29, p. 58 of the GPT-3 paper 6cb6a026-c070-470a-b75d-bb8fdf424e35: !Template - answer_choices: No ||| Yes + answer_choices: Não ||| Sim id: 6cb6a026-c070-470a-b75d-bb8fdf424e35 jinja: "{{ passage }}\n\nDepois de ler isso, eu me pergunto {{ question }}? |||\n{% if\ \ label != -1 %}\n{{ answer_choices[label] }} \n{% endif %}" @@ -52,7 +52,7 @@ templates: name: "I wonder\u2026" reference: '' 7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5: !Template - answer_choices: No ||| Yes + answer_choices: Não ||| Sim id: 7cf7acdf-e3a2-459f-a3e8-2e2d27dd6aa5 jinja: 'Texto: {{passage}} @@ -74,7 +74,7 @@ templates: name: yes_no_question reference: '' 7d21d974-0624-4d4f-9e8c-644e2d009cb5: !Template - answer_choices: No ||| Yes + answer_choices: Não ||| Sim id: 7d21d974-0624-4d4f-9e8c-644e2d009cb5 jinja: "{{ passage }}\n\nDepois de ler isso, você poderia me dizer {{ question }}? \ \ ||| {% if label != -1 %}{{ answer_choices[label] }}\n{% endif %}" @@ -88,7 +88,7 @@ templates: name: "could you tell me\u2026" reference: '' 922d3e87-ac58-4731-84d1-f0a40e47afb5: !Template - answer_choices: No ||| Yes + answer_choices: Não ||| Sim id: 922d3e87-ac58-4731-84d1-f0a40e47afb5 jinja: "EXAME\n1. Responda sim ou não.\nDocumento: {{passage}}\nPergunta: {{question}}? \ \ ||| \n{% if label != -1 %}\n{{answer_choices[label]}}\n{% endif %}" @@ -102,7 +102,7 @@ templates: name: exam reference: '' 9a1bf459-8047-437c-9def-f21e960429cc: !Template - answer_choices: No ||| Yes + answer_choices: Não ||| Sim id: 9a1bf459-8047-437c-9def-f21e960429cc jinja: 'Com base na seguinte passagem, {{ question }}? {{ passage }} @@ -148,7 +148,7 @@ templates: name: exercise reference: '' b2b3cb60-d6e3-491c-a09a-8201e13e417e: !Template - answer_choices: No ||| Yes + answer_choices: Não ||| Sim id: b2b3cb60-d6e3-491c-a09a-8201e13e417e jinja: '{{ passage }} From 74c9915bb31c4e353941b71011062168552baabe Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Tue, 11 Apr 2023 21:31:55 +0000 Subject: [PATCH 10/36] add imdb_pt template --- .../christykoh/imdb_pt/templates.yaml | 219 ++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 elk/promptsource/templates/christykoh/imdb_pt/templates.yaml diff --git a/elk/promptsource/templates/christykoh/imdb_pt/templates.yaml b/elk/promptsource/templates/christykoh/imdb_pt/templates.yaml new file mode 100644 index 00000000..8fba1f39 --- /dev/null +++ b/elk/promptsource/templates/christykoh/imdb_pt/templates.yaml @@ -0,0 +1,219 @@ +dataset: imdb_pt +templates: + 02ff2949-0f45-4d97-941e-6fa4c0afbc2d: !Template + answer_choices: negativo ||| positivo + id: 02ff2949-0f45-4d97-941e-6fa4c0afbc2d + jinja: 'A crítica de filme a seguir expressa que sentimento? {{text}} + + + ||| {{ answer_choices + [label] }}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: Movie Expressed Sentiment 2 + reference: '' + 2351d12a-e630-4d19-8b41-e199266e38f7: !Template + answer_choices: ruim ||| bom + id: 2351d12a-e630-4d19-8b41-e199266e38f7 + jinja: '{{text}} + + + O crítico achou este filme {{"bom ou mal"}}? ||| {{ answer_choices + [label] }}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: Reviewer Opinion bad good choices + reference: '' + 5f372fb1-795a-47b6-8ddf-c4fd1579e76a: !Template + answer_choices: negativo ||| positivo + id: 5f372fb1-795a-47b6-8ddf-c4fd1579e76a + jinja: "{{text}} \nEsta avaliação é {{\"positivo ou negativo\"}}? ||| \n{{answer_choices[label]}}" + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: 'Sentiment with choices ' + reference: '' + 866474a5-1498-46b7-bfee-ac0c5160707f: !Template + answer_choices: negativo ||| positivo + id: 866474a5-1498-46b7-bfee-ac0c5160707f + jinja: '{{text}} + + + Como o espectador se sente sobre o filme? + + + ||| {{ answer_choices + [label] }}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: Reviewer Sentiment Feeling + reference: '' + 96538f30-f2c1-430e-8fc6-936a16966d9c: !Template + answer_choices: negativo ||| positivo + id: 96538f30-f2c1-430e-8fc6-936a16966d9c + jinja: '{{text}} Que sentimento o escritor expressa pelo filme? ||| {{ + answer_choices [label] }}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: Writer Expressed Sentiment + reference: '' + af51297c-38a3-4d6c-a8b5-04b1243d7443: !Template + answer_choices: negativo ||| positivo + id: af51297c-38a3-4d6c-a8b5-04b1243d7443 + jinja: '{{text}} + + O sentimento expresso pelo filme é ||| {{ answer_choices + [label] }}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: Movie Expressed Sentiment + reference: '' + b93b74ac-fe95-40b4-9610-318b46ab820f: !Template + answer_choices: negativo ||| positivo + id: b93b74ac-fe95-40b4-9610-318b46ab820f + jinja: '{{text}} + + + Qual é o sentimento expresso neste texto? + + + ||| {{ answer_choices + [label] }}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: Text Expressed Sentiment + reference: '' + b9b5d79d-f0b3-4bec-a724-f585db3e93ff: !Template + answer_choices: negativo ||| positivo + id: b9b5d79d-f0b3-4bec-a724-f585db3e93ff + jinja: '{{text}} + + + Isso definitivamente não é um ||| {{ answer_choices [1-label]}} avaliação.' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: false + name: Negation template for positive and negative + reference: '' + bd82ba0f-01d4-4fa1-bf8d-07e392c00cd9: !Template + answer_choices: Não ||| Sim + id: bd82ba0f-01d4-4fa1-bf8d-07e392c00cd9 + jinja: '{{text}} + + O crítico gostou do filme? ||| {{ answer_choices [label]}}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: Reviewer Enjoyment Yes No + reference: '' + c70d1687-2421-49a2-9553-91b8bac4cfbe: !Template + answer_choices: negativo ||| positivo + id: c70d1687-2421-49a2-9553-91b8bac4cfbe + jinja: '{{text}} + + Qual é o sentimento expresso pelo crítico para o filme? + + ||| {{ answer_choices [label] }}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: Reviewer Expressed Sentiment + reference: '' + dacb5f03-dc80-428c-b707-8574436675c9: !Template + answer_choices: 0 ||| 1 + id: dacb5f03-dc80-428c-b707-8574436675c9 + jinja: 'Considere o seguinte exemplo: '''''' {{text}} '''''' + + + Entre {{ answer_choices[0] }} e {{answer_choices[1] }}, que é o sentimento + deste exemplo? + + ||| {{ answer_choices[label] }}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: burns_2 + reference: '' + e01970ab-42c0-4e6e-a08f-4940d889ef37: !Template + answer_choices: Eles não gostaram! ||| Eles adoraram + id: e01970ab-42c0-4e6e-a08f-4940d889ef37 + jinja: '{{text}} + + Como o crítico se sente sobre o filme? ||| + + {{ answer_choices[label] }}' + metadata: !TemplateMetadata + choices_in_prompt: false + languages: + - pt + metrics: + - Accuracy + original_task: true + name: Reviewer Enjoyment + reference: '' + eb791ab2-d2b4-4be6-a569-64086983abee: !Template + answer_choices: 0 ||| 1 + id: eb791ab2-d2b4-4be6-a569-64086983abee + jinja: 'Considere o seguinte exemplo: '''''' {{text}} '''''' + + Entre {{ answer_choices[0] }} e {{answer_choices[1] }}, o sentimento de + este exemplo é ||| {{ answer_choices[label] }}' + metadata: !TemplateMetadata + choices_in_prompt: true + languages: + - pt + metrics: + - Accuracy + original_task: true + name: burns_1 + reference: '' From 85fd9e40b56ff6ba4b103d48c6962c0b24b12b6b Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Tue, 11 Apr 2023 21:36:00 +0000 Subject: [PATCH 11/36] implement prompt sharing, generate combined templates.yaml --- elk/extraction/extraction.py | 2 ++ elk/extraction/prompt_loading.py | 31 +++++++++++++++++++++++++++++-- elk/promptsource/templates.py | 10 ++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/elk/extraction/extraction.py b/elk/extraction/extraction.py index 187428fc..3285a491 100644 --- a/elk/extraction/extraction.py +++ b/elk/extraction/extraction.py @@ -54,6 +54,7 @@ class Extract(Serializable): token_loc: Literal["first", "last", "mean"] = "last" min_gpu_mem: Optional[int] = None num_gpus: int = -1 + combined_prompter_path: Optional[str] = None # if template file does not exist, combine from datasets and save to this path def __post_init__(self, layer_stride: int): if self.layers and layer_stride > 1: @@ -98,6 +99,7 @@ def extract_hiddens( stream=cfg.prompts.stream, rank=rank, world_size=world_size, + combined_prompter_path=cfg.combined_prompter_path ) # this dataset is already sharded, but hasn't been truncated to max_examples # AutoModel should do the right thing here in nearly all cases. We don't actually diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index a494be5a..e1322f73 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -1,3 +1,4 @@ +from os.path import exists from dataclasses import dataclass from random import Random from typing import Any, Iterator, Literal, Optional @@ -40,7 +41,7 @@ class PromptConfig(Serializable): num_shots: The number of examples to use in few-shot prompts. If zero, prompts are zero-shot. Defaults to 0. num_variants: The number of prompt templates to apply to each predicate upon - call to __getitem__. Use -1 to apply all available templates. Defaults to 1. + call to __getitem__. Use -1 to apply all available templates. Defaults to -1. seed: The seed to use for prompt randomization. Defaults to 42. stream: Whether to stream the dataset from the Internet. Defaults to False. """ @@ -78,6 +79,7 @@ def load_prompts( stream: bool = False, rank: int = 0, world_size: int = 1, + combined_prompter_path: str = "" ) -> Iterator[dict]: """Load a dataset full of prompts generated from the specified datasets. @@ -100,10 +102,24 @@ def load_prompts( train_datasets = [] rng = Random(seed) + # If combined template is not empty and does not exist as a file yet, need to aggregate + # Init/create a new file for combining templates + combined_prompter = None + if combined_prompter_path: + print("Combining templates into shared prompter.") + combined_prompter = DatasetTemplates("combined_templates", combined_prompter_path) + # should_aggregate_templates = (combined_prompter and not exists(combined_prompter.yaml_path)) + # print("should aggregate: ", should_aggregate_templates) + # First load the datasets and prompters. We need to know the minimum number of # templates for any dataset in order to make sure we don't run out of prompts. for ds_string in dataset_strings: ds_name, _, config_name = ds_string.partition(" ") + prompter = DatasetTemplates(ds_name, config_name) + # Populate combined prompter with templates from different datasets + # if should_aggregate_templates: + combined_prompter.templates.update(prompter.get_templates_with_new_uuids()) + print("len of prompter templates is ", len(combined_prompter.templates)) prompters.append(DatasetTemplates(ds_name, config_name)) ds_dict = assert_type( @@ -136,11 +152,22 @@ def load_prompts( train_datasets.append(train_ds) min_num_templates = min(len(prompter.templates) for prompter in prompters) + # if should_aggregate_templates: + + if combined_prompter: + # save combined templates to yaml file + print("saving aggregate templates") + combined_prompter.sync_mapping() + combined_prompter.write_to_file() + min_num_templates = len(combined_prompter.templates) + print("length of combined_prompter templates is ", min_num_templates) + num_variants = ( min_num_templates if num_variants == -1 else min(num_variants, min_num_templates) ) + print() assert num_variants > 0 if rank == 0: print(f"Using {num_variants} variants of each prompt") @@ -179,7 +206,7 @@ def load_prompts( label_column=label_column, num_classes=num_classes, num_variants=num_variants, - prompter=prompter, + prompter=prompter if not combined_prompter else combined_prompter, rng=rng, fewshot_iter=fewshot_iter, ) diff --git a/elk/promptsource/templates.py b/elk/promptsource/templates.py index ea4e9196..68855cf6 100644 --- a/elk/promptsource/templates.py +++ b/elk/promptsource/templates.py @@ -543,6 +543,16 @@ def delete_folder(self) -> None: if len(os.listdir(base_folder)) == 0: rmtree(base_folder) + def get_templates_with_new_uuids(self) -> dict: + """ + Generate new uuids for templates, used when merging template datasets. + """ + new_templates = {} + for template in self.templates.values(): + template.id = str(uuid.uuid4()) + new_templates[template.id] = template + return new_templates + def __getitem__(self, template_key: str) -> "Template": return self.templates[self.name_to_id_mapping[template_key]] From 8383f26d922c83be45ffa54f39faea49b933726b Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 12 Apr 2023 06:25:57 +0000 Subject: [PATCH 12/36] fix num templates logic --- elk/extraction/prompt_loading.py | 15 ++++++++------- .../templates/christykoh/imdb_pt/templates.yaml | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index e1322f73..8af7cf67 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -118,9 +118,10 @@ def load_prompts( prompter = DatasetTemplates(ds_name, config_name) # Populate combined prompter with templates from different datasets # if should_aggregate_templates: - combined_prompter.templates.update(prompter.get_templates_with_new_uuids()) - print("len of prompter templates is ", len(combined_prompter.templates)) - prompters.append(DatasetTemplates(ds_name, config_name)) + if combined_prompter: + combined_prompter.templates.update(prompter.get_templates_with_new_uuids()) + print("len of prompter templates is ", len(combined_prompter.templates)) + prompters.append(DatasetTemplates(ds_name, config_name)) ds_dict = assert_type( dict, load_dataset(ds_name, config_name or None, streaming=stream) @@ -150,17 +151,17 @@ def load_prompts( raw_datasets.append(split) train_datasets.append(train_ds) - - min_num_templates = min(len(prompter.templates) for prompter in prompters) - # if should_aggregate_templates: - if combined_prompter: + min_num_templates = -1 + if combined_prompter != None: # save combined templates to yaml file print("saving aggregate templates") combined_prompter.sync_mapping() combined_prompter.write_to_file() min_num_templates = len(combined_prompter.templates) print("length of combined_prompter templates is ", min_num_templates) + else: + min_num_templates = min(len(prompter.templates) for prompter in prompters) num_variants = ( min_num_templates diff --git a/elk/promptsource/templates/christykoh/imdb_pt/templates.yaml b/elk/promptsource/templates/christykoh/imdb_pt/templates.yaml index 8fba1f39..bc18de77 100644 --- a/elk/promptsource/templates/christykoh/imdb_pt/templates.yaml +++ b/elk/promptsource/templates/christykoh/imdb_pt/templates.yaml @@ -18,7 +18,7 @@ templates: name: Movie Expressed Sentiment 2 reference: '' 2351d12a-e630-4d19-8b41-e199266e38f7: !Template - answer_choices: ruim ||| bom + answer_choices: mal ||| bom id: 2351d12a-e630-4d19-8b41-e199266e38f7 jinja: '{{text}} From df41ab49b4b4499504261cd7b6c7ada916e12c2b Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 12 Apr 2023 06:33:00 +0000 Subject: [PATCH 13/36] fix pt answer choice --- elk/promptsource/templates/christykoh/imdb_pt/templates.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elk/promptsource/templates/christykoh/imdb_pt/templates.yaml b/elk/promptsource/templates/christykoh/imdb_pt/templates.yaml index 8fba1f39..bc18de77 100644 --- a/elk/promptsource/templates/christykoh/imdb_pt/templates.yaml +++ b/elk/promptsource/templates/christykoh/imdb_pt/templates.yaml @@ -18,7 +18,7 @@ templates: name: Movie Expressed Sentiment 2 reference: '' 2351d12a-e630-4d19-8b41-e199266e38f7: !Template - answer_choices: ruim ||| bom + answer_choices: mal ||| bom id: 2351d12a-e630-4d19-8b41-e199266e38f7 jinja: '{{text}} From a132f4011b095f219bc50700540b3064658ea786 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Apr 2023 06:43:27 +0000 Subject: [PATCH 14/36] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/promptsource/templates/christykoh/boolq_pt/templates.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elk/promptsource/templates/christykoh/boolq_pt/templates.yaml b/elk/promptsource/templates/christykoh/boolq_pt/templates.yaml index 1120d3cd..a83c4d95 100644 --- a/elk/promptsource/templates/christykoh/boolq_pt/templates.yaml +++ b/elk/promptsource/templates/christykoh/boolq_pt/templates.yaml @@ -12,7 +12,7 @@ templates: {% if label != -1 %} {{answer_choices[label]}} - + {% endif %}' metadata: !TemplateMetadata choices_in_prompt: true @@ -112,7 +112,7 @@ templates: {% if label != -1 %} {{ answer_choices[label] }} - + {% endif %}' metadata: !TemplateMetadata choices_in_prompt: false From c71bf1ce6862ca217f5e63c6f7d1685e8b6c974a Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 12 Apr 2023 06:47:15 +0000 Subject: [PATCH 15/36] remove empty prompt_dataset file --- elk/extraction/prompt_dataset.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 elk/extraction/prompt_dataset.py diff --git a/elk/extraction/prompt_dataset.py b/elk/extraction/prompt_dataset.py deleted file mode 100644 index e69de29b..00000000 From 1ec5787ab12a6b68b83d909c6385ce7b466b86cb Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 12 Apr 2023 07:01:16 +0000 Subject: [PATCH 16/36] fix empty prompters bug --- elk/extraction/prompt_loading.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index 77be12b1..bfbf905b 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -118,7 +118,7 @@ def load_prompts( if combined_prompter: combined_prompter.templates.update(prompter.get_templates_with_new_uuids()) print("len of prompter templates is ", len(combined_prompter.templates)) - prompters.append(DatasetTemplates(ds_name, config_name)) + prompters.append(DatasetTemplates(ds_name, config_name)) ds_dict = assert_type( dict, load_dataset(ds_name, config_name or None, streaming=stream) @@ -150,7 +150,7 @@ def load_prompts( train_datasets.append(train_ds) min_num_templates = -1 - if combined_prompter != None: + if combined_prompter: # save combined templates to yaml file print("saving aggregate templates") combined_prompter.sync_mapping() From 66c7a6b82ca8fd7c15a59d78fc1aa75b3eaade34 Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 12 Apr 2023 10:00:31 +0000 Subject: [PATCH 17/36] fix multiclass label bug --- elk/extraction/prompt_loading.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index c46d3d7b..3d0d6587 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -259,7 +259,7 @@ def qa_cat(q: str, a: str) -> str: raise ValueError(f'Prompt duplicated {dup_count} times! "{maybe_dup}"') return dict( - label=label, + label=new_label, prompts=prompts, template_names=prompter.all_template_names, ) From d91acac50a6eff252a9caede0d282a60f92b85af Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 12 Apr 2023 10:33:33 +0000 Subject: [PATCH 18/36] move prompt combination to PromptConfig post_init logic --- elk/extraction/extraction.py | 4 +++ elk/extraction/prompt_loading.py | 50 +++++++++++++++----------------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/elk/extraction/extraction.py b/elk/extraction/extraction.py index 4b6cd3a9..c17d080f 100644 --- a/elk/extraction/extraction.py +++ b/elk/extraction/extraction.py @@ -270,8 +270,12 @@ def get_splits() -> SplitDict: model_cfg = AutoConfig.from_pretrained(cfg.model) num_variants = cfg.prompts.num_variants + # if combined prompter flag is set, combine prompt templates + + # extraneous, remove ? ds_name, _, config_name = cfg.prompts.datasets[0].partition(" ") info = get_dataset_config_info(ds_name, config_name or None) + # ? end layer_cols = { f"hidden_{layer}": Array3D( diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index bfbf905b..332d5e7d 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -26,8 +26,8 @@ class PromptConfig(Serializable): """ Args: - dataset: Space-delimited name of the HuggingFace dataset to use, e.g. - `"super_glue boolq"` or `"imdb"`. + datasets: List of space-delimited names of the HuggingFace datasets to use, e.g. + [`"super_glue boolq", "imdb"]`. balance: Whether to force class balance in the dataset using undersampling. data_dir: The directory to use for caching the dataset. Defaults to `~/.cache/huggingface/datasets`. @@ -44,6 +44,9 @@ class PromptConfig(Serializable): call to __getitem__. Use -1 to apply all available templates. Defaults to -1. seed: The seed to use for prompt randomization. Defaults to 42. stream: Whether to stream the dataset from the Internet. Defaults to False. + combined_prompter_path: Path to save a combined template file to, when testing + prompt invariance across multiple datasets, and will be interpreted as a subpath + of `combined_paths` in the promptsource templates dir. Defaults to empty string. """ datasets: list[str] = field(positional=True) @@ -55,6 +58,7 @@ class PromptConfig(Serializable): num_variants: int = -1 seed: int = 42 stream: bool = False + combined_prompter_path: str = "" def __post_init__(self): if len(self.max_examples) > 2: @@ -69,6 +73,21 @@ def __post_init__(self): if len(self.max_examples) == 1: self.max_examples *= 2 + # Combining prompts + if self.combined_prompter_path: + print("Copying templates across datasets to combined_templates/ " + + f"{self.combined_prompter_path}/templates.yaml") + combined_prompter = DatasetTemplates("combined_templates", self.combined_prompter_path) + for ds_string in self.datasets: + ds_name, _, config_name = ds_string.partition(" ") + prompter = DatasetTemplates(ds_name, config_name) + combined_prompter.templates.update(prompter.get_templates_with_new_uuids()) + print("len of prompter templates is ", len(combined_prompter.templates)) + combined_prompter.write_to_file() + + # Update datasets reference to use combined prompter + self.datasets = [f"combined_templates {self.combined_prompter_path}"] * len(self.datasets) + def load_prompts( *dataset_strings: str, @@ -78,8 +97,7 @@ def load_prompts( split_type: Literal["train", "val"] = "train", stream: bool = False, rank: int = 0, - world_size: int = 1, - combined_prompter_path: str = "" + world_size: int = 1 ) -> Iterator[dict]: """Load a dataset full of prompts generated from the specified datasets. @@ -102,22 +120,11 @@ def load_prompts( train_datasets = [] rng = Random(seed) - # If flag is set, init/create a new file for combining templates - combined_prompter = None - if combined_prompter_path: - print("Combining templates into shared prompter.") - combined_prompter = DatasetTemplates("combined_templates", combined_prompter_path) - # First load the datasets and prompters. We need to know the minimum number of # templates for any dataset in order to make sure we don't run out of prompts. for ds_string in dataset_strings: ds_name, _, config_name = ds_string.partition(" ") prompter = DatasetTemplates(ds_name, config_name) - # Populate combined prompter with templates from different datasets - # if should_aggregate_templates: - if combined_prompter: - combined_prompter.templates.update(prompter.get_templates_with_new_uuids()) - print("len of prompter templates is ", len(combined_prompter.templates)) prompters.append(DatasetTemplates(ds_name, config_name)) ds_dict = assert_type( @@ -149,16 +156,7 @@ def load_prompts( raw_datasets.append(split) train_datasets.append(train_ds) - min_num_templates = -1 - if combined_prompter: - # save combined templates to yaml file - print("saving aggregate templates") - combined_prompter.sync_mapping() - combined_prompter.write_to_file() - min_num_templates = len(combined_prompter.templates) - print("length of combined_prompter templates is ", min_num_templates) - else: - min_num_templates = min(len(prompter.templates) for prompter in prompters) + min_num_templates = min(len(prompter.templates) for prompter in prompters) num_variants = ( min_num_templates @@ -283,7 +281,7 @@ def qa_cat(q: str, a: str) -> str: raise ValueError(f'Prompt duplicated {dup_count} times! "{maybe_dup}"') return dict( - label=label, + label=new_label, prompts=prompts, template_names=prompter.all_template_names, ) From 0c2f5c4d6ab6bc6eafccfba2bf954f8d009b01eb Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 12 Apr 2023 10:52:10 +0000 Subject: [PATCH 19/36] fix refactor bugs, runnable state --- elk/extraction/extraction.py | 6 +----- elk/extraction/prompt_loading.py | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/elk/extraction/extraction.py b/elk/extraction/extraction.py index c17d080f..88e39328 100644 --- a/elk/extraction/extraction.py +++ b/elk/extraction/extraction.py @@ -59,7 +59,6 @@ class Extract(Serializable): token_loc: Literal["first", "last", "mean"] = "last" min_gpu_mem: Optional[int] = None num_gpus: int = -1 - combined_prompter_path: Optional[str] = None # if template file does not exist, combine from datasets and save to this path def __post_init__(self, layer_stride: int): if self.layers and layer_stride > 1: @@ -103,8 +102,7 @@ def extract_hiddens( split_type=split_type, stream=cfg.prompts.stream, rank=rank, - world_size=world_size, - combined_prompter_path=cfg.combined_prompter_path + world_size=world_size ) # this dataset is already sharded, but hasn't been truncated to max_examples model = instantiate_model( @@ -270,8 +268,6 @@ def get_splits() -> SplitDict: model_cfg = AutoConfig.from_pretrained(cfg.model) num_variants = cfg.prompts.num_variants - # if combined prompter flag is set, combine prompt templates - # extraneous, remove ? ds_name, _, config_name = cfg.prompts.datasets[0].partition(" ") info = get_dataset_config_info(ds_name, config_name or None) diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index d80da4c4..9821ddcf 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -202,7 +202,7 @@ def load_prompts( label_column=label_column, num_classes=num_classes, num_variants=num_variants, - prompter=prompter if not combined_prompter else combined_prompter, + prompter=prompter, rng=rng, fewshot_iter=fewshot_iter, ) From 066cd447f6e5abe4fe64c0af0d3f18284cad588b Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 12 Apr 2023 12:09:05 +0000 Subject: [PATCH 20/36] rewrite template merging, regenerate prompter every run --- elk/extraction/extraction.py | 3 ++- elk/extraction/prompt_loading.py | 34 +++++++++++++++++++------------- elk/promptsource/templates.py | 13 ++++++------ 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/elk/extraction/extraction.py b/elk/extraction/extraction.py index 88e39328..fe70603a 100644 --- a/elk/extraction/extraction.py +++ b/elk/extraction/extraction.py @@ -102,7 +102,8 @@ def extract_hiddens( split_type=split_type, stream=cfg.prompts.stream, rank=rank, - world_size=world_size + world_size=world_size, + combined_template_path=cfg.prompts.combined_template_path ) # this dataset is already sharded, but hasn't been truncated to max_examples model = instantiate_model( diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index 9821ddcf..c840f212 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -44,7 +44,7 @@ class PromptConfig(Serializable): call to __getitem__. Use -1 to apply all available templates. Defaults to -1. seed: The seed to use for prompt randomization. Defaults to 42. stream: Whether to stream the dataset from the Internet. Defaults to False. - combined_prompter_path: Path to save a combined template file to, when testing + combined_template_path: Path to save a combined template file to, when testing prompt invariance across multiple datasets, and will be interpreted as a subpath of `combined_paths` in the promptsource templates dir. Defaults to empty string. """ @@ -58,7 +58,7 @@ class PromptConfig(Serializable): num_variants: int = -1 seed: int = 42 stream: bool = False - combined_prompter_path: str = "" + combined_template_path: str = "" def __post_init__(self): if len(self.max_examples) > 2: @@ -74,21 +74,20 @@ def __post_init__(self): self.max_examples *= 2 # Combining prompts - if self.combined_prompter_path: + if self.combined_template_path: print("Copying templates across datasets to combined_templates/ " + - f"{self.combined_prompter_path}/templates.yaml") - combined_prompter = DatasetTemplates("combined_templates", self.combined_prompter_path) + f"{self.combined_template_path}/templates.yaml") + combined_prompter = DatasetTemplates("combined_templates", self.combined_template_path) + combined_prompter.templates = {} for ds_string in self.datasets: ds_name, _, config_name = ds_string.partition(" ") prompter = DatasetTemplates(ds_name, config_name) - combined_prompter.templates.update(prompter.get_templates_with_new_uuids()) - print("len of prompter templates is ", len(combined_prompter.templates)) + # TODO: Verify that cols are same; if not, warn that templates could not be combined. + combined_prompter.merge_templates_from(prompter) + # combined_prompter.templates.update(prompter.get_templates_with_new_uuids()) + print("Total number of templates gathered: ", len(combined_prompter.templates)) combined_prompter.write_to_file() - # Update datasets reference to use combined prompter - self.datasets = [f"combined_templates {self.combined_prompter_path}"] * len(self.datasets) - - def load_prompts( *dataset_strings: str, num_shots: int = 0, @@ -97,7 +96,8 @@ def load_prompts( split_type: Literal["train", "val"] = "train", stream: bool = False, rank: int = 0, - world_size: int = 1 + world_size: int = 1, + combined_template_path: str = "" ) -> Iterator[dict]: """Load a dataset full of prompts generated from the specified datasets. @@ -124,8 +124,10 @@ def load_prompts( # templates for any dataset in order to make sure we don't run out of prompts. for ds_string in dataset_strings: ds_name, _, config_name = ds_string.partition(" ") - prompter = DatasetTemplates(ds_name, config_name) - prompters.append(DatasetTemplates(ds_name, config_name)) + + if combined_template_path == "": + prompter = DatasetTemplates(ds_name, config_name) + prompters.append(DatasetTemplates(ds_name, config_name)) ds_dict = assert_type( dict, load_dataset(ds_name, config_name or None, streaming=stream) @@ -156,6 +158,10 @@ def load_prompts( raw_datasets.append(split) train_datasets.append(train_ds) + if combined_template_path: + combined_prompter = DatasetTemplates("combined_templates", combined_template_path) + prompters = [combined_prompter] * len(dataset_strings) + min_num_templates = min(len(prompter.templates) for prompter in prompters) num_variants = ( diff --git a/elk/promptsource/templates.py b/elk/promptsource/templates.py index 68855cf6..38d3f87f 100644 --- a/elk/promptsource/templates.py +++ b/elk/promptsource/templates.py @@ -543,15 +543,14 @@ def delete_folder(self) -> None: if len(os.listdir(base_folder)) == 0: rmtree(base_folder) - def get_templates_with_new_uuids(self) -> dict: + def merge_templates_from(self, src: "DatasetTemplates"): """ - Generate new uuids for templates, used when merging template datasets. + Merge templates from src. """ - new_templates = {} - for template in self.templates.values(): - template.id = str(uuid.uuid4()) - new_templates[template.id] = template - return new_templates + for template in src.templates.values(): + template_id = str(uuid.uuid4()) + self.templates[template_id] = template + self.sync_mapping() def __getitem__(self, template_key: str) -> "Template": return self.templates[self.name_to_id_mapping[template_key]] From 846b78c0f664a48c556231f41a7717cf399b348b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Apr 2023 12:16:15 +0000 Subject: [PATCH 21/36] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/extraction/extraction.py | 2 +- elk/extraction/prompt_loading.py | 23 ++++++++++++++++------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/elk/extraction/extraction.py b/elk/extraction/extraction.py index fe70603a..037aa828 100644 --- a/elk/extraction/extraction.py +++ b/elk/extraction/extraction.py @@ -103,7 +103,7 @@ def extract_hiddens( stream=cfg.prompts.stream, rank=rank, world_size=world_size, - combined_template_path=cfg.prompts.combined_template_path + combined_template_path=cfg.prompts.combined_template_path, ) # this dataset is already sharded, but hasn't been truncated to max_examples model = instantiate_model( diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index c840f212..7f8b5b36 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -75,9 +75,13 @@ def __post_init__(self): # Combining prompts if self.combined_template_path: - print("Copying templates across datasets to combined_templates/ " + - f"{self.combined_template_path}/templates.yaml") - combined_prompter = DatasetTemplates("combined_templates", self.combined_template_path) + print( + "Copying templates across datasets to combined_templates/ " + + f"{self.combined_template_path}/templates.yaml" + ) + combined_prompter = DatasetTemplates( + "combined_templates", self.combined_template_path + ) combined_prompter.templates = {} for ds_string in self.datasets: ds_name, _, config_name = ds_string.partition(" ") @@ -85,9 +89,12 @@ def __post_init__(self): # TODO: Verify that cols are same; if not, warn that templates could not be combined. combined_prompter.merge_templates_from(prompter) # combined_prompter.templates.update(prompter.get_templates_with_new_uuids()) - print("Total number of templates gathered: ", len(combined_prompter.templates)) + print( + "Total number of templates gathered: ", len(combined_prompter.templates) + ) combined_prompter.write_to_file() + def load_prompts( *dataset_strings: str, num_shots: int = 0, @@ -97,7 +104,7 @@ def load_prompts( stream: bool = False, rank: int = 0, world_size: int = 1, - combined_template_path: str = "" + combined_template_path: str = "", ) -> Iterator[dict]: """Load a dataset full of prompts generated from the specified datasets. @@ -157,9 +164,11 @@ def load_prompts( raw_datasets.append(split) train_datasets.append(train_ds) - + if combined_template_path: - combined_prompter = DatasetTemplates("combined_templates", combined_template_path) + combined_prompter = DatasetTemplates( + "combined_templates", combined_template_path + ) prompters = [combined_prompter] * len(dataset_strings) min_num_templates = min(len(prompter.templates) for prompter in prompters) From 1aecd7ecd18a738bba07340b28d5e79277d62e4e Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 12 Apr 2023 12:22:58 +0000 Subject: [PATCH 22/36] line len fixes --- elk/extraction/prompt_loading.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index c840f212..1ccba7ae 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -41,12 +41,13 @@ class PromptConfig(Serializable): num_shots: The number of examples to use in few-shot prompts. If zero, prompts are zero-shot. Defaults to 0. num_variants: The number of prompt templates to apply to each predicate upon - call to __getitem__. Use -1 to apply all available templates. Defaults to -1. + call to __getitem__. Use -1 to apply all available templates. Defaults to + -1. seed: The seed to use for prompt randomization. Defaults to 42. stream: Whether to stream the dataset from the Internet. Defaults to False. combined_template_path: Path to save a combined template file to, when testing - prompt invariance across multiple datasets, and will be interpreted as a subpath - of `combined_paths` in the promptsource templates dir. Defaults to empty string. + prompt invariance across multiple datasets, and will be interpreted as a + subpath of `combined_paths` in the templates dir. Defaults to empty string. """ datasets: list[str] = field(positional=True) @@ -77,15 +78,16 @@ def __post_init__(self): if self.combined_template_path: print("Copying templates across datasets to combined_templates/ " + f"{self.combined_template_path}/templates.yaml") - combined_prompter = DatasetTemplates("combined_templates", self.combined_template_path) + combined_prompter = DatasetTemplates("combined_templates", + self.combined_template_path) combined_prompter.templates = {} for ds_string in self.datasets: ds_name, _, config_name = ds_string.partition(" ") prompter = DatasetTemplates(ds_name, config_name) - # TODO: Verify that cols are same; if not, warn that templates could not be combined. + # TODO: Verify that cols are same; if not, warn that templates + # could not be combined. combined_prompter.merge_templates_from(prompter) - # combined_prompter.templates.update(prompter.get_templates_with_new_uuids()) - print("Total number of templates gathered: ", len(combined_prompter.templates)) + print("Total number of templates: ", len(combined_prompter.templates)) combined_prompter.write_to_file() def load_prompts( From b0c0f6331221c15ffc71248ee835514a097ff68a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Apr 2023 12:26:30 +0000 Subject: [PATCH 23/36] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/extraction/prompt_loading.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index c6715f6b..6e5edc54 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -46,7 +46,7 @@ class PromptConfig(Serializable): seed: The seed to use for prompt randomization. Defaults to 42. stream: Whether to stream the dataset from the Internet. Defaults to False. combined_template_path: Path to save a combined template file to, when testing - prompt invariance across multiple datasets, and will be interpreted as a + prompt invariance across multiple datasets, and will be interpreted as a subpath of `combined_paths` in the templates dir. Defaults to empty string. """ @@ -87,7 +87,7 @@ def __post_init__(self): for ds_string in self.datasets: ds_name, _, config_name = ds_string.partition(" ") prompter = DatasetTemplates(ds_name, config_name) - # TODO: Verify that cols are same; if not, warn that templates + # TODO: Verify that cols are same; if not, warn that templates # could not be combined. combined_prompter.merge_templates_from(prompter) print("Total number of templates: ", len(combined_prompter.templates)) From 2da069adb7fad011e17417f5fa3e5c4f6f61e63a Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 12 Apr 2023 17:25:14 +0000 Subject: [PATCH 24/36] update README with prompt invariance argument --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 96d51ee1..5c7fbd3c 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,11 @@ The following will generate a CCS (Contrast Consistent Search) reporter instead elk elicit microsoft/deberta-v2-xxlarge-mnli imdb --net ccs ``` +To test prompt invariance across multiple datasets, use the `--combined_template_path` command line argument, which will create a new `templates.yaml` file with templates from all the datasets. +```bash +elk elicit bigscience/bloomz-560m christykoh/ag_news_pt ag_news --combined_template_path=spar_w/ag_news +``` + The following command will evaluate the probe from the run naughty-northcutt on the hidden states extracted from the model deberta-v2-xxlarge-mnli for the imdb dataset. It will result in an `eval.csv` and `cfg.yaml` file, which are stored under a subfolder in `elk-reporters/naughty-northcutt/transfer_eval`. ```bash From 4c6d344b0bc004ab9b20461c7278df4a4fc44b66 Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 12 Apr 2023 20:01:27 +0000 Subject: [PATCH 25/36] fix bugs, add dataset col checks --- elk/extraction/extraction.py | 3 +-- elk/extraction/prompt_loading.py | 33 +++++++++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/elk/extraction/extraction.py b/elk/extraction/extraction.py index 51210a11..81cab1cc 100644 --- a/elk/extraction/extraction.py +++ b/elk/extraction/extraction.py @@ -266,10 +266,9 @@ def get_splits() -> SplitDict: model_cfg = AutoConfig.from_pretrained(cfg.model) num_variants = cfg.prompts.num_variants - # extraneous, remove ? + # Retrieve info, used to get splits ds_name, _, config_name = cfg.prompts.datasets[0].partition(" ") info = get_dataset_config_info(ds_name, config_name or None) - # ? end layer_cols = { f"hidden_{layer}": Array3D( diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index 6e5edc54..6cfcb18f 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -7,6 +7,7 @@ Dataset, Features, load_dataset, + load_dataset_builder, ) from datasets.distributed import split_dataset_by_node from simple_parsing.helpers import Serializable, field @@ -84,14 +85,41 @@ def __post_init__(self): "combined_templates", self.combined_template_path ) combined_prompter.templates = {} + prev_num_features = 0 + prev_num_label_classes = 0 for ds_string in self.datasets: ds_name, _, config_name = ds_string.partition(" ") prompter = DatasetTemplates(ds_name, config_name) - # TODO: Verify that cols are same; if not, warn that templates - # could not be combined. + + # Verify that number of features and number of classes for ClassLabel + # are the same across datasets. + ds_builder = load_dataset_builder(ds_name, config_name or None) + num_features = len(ds_builder.info.features) + if prev_num_features > 0 and num_features != prev_num_features: + print("WARNING: Datasets do not have the same number of features;", + f"{ds_name} has {num_features} features while prev has", + f"{prev_num_features}. Prompting datasets separately.") + self.combined_template_path = "" + break + prev_num_features = num_features + num_classes = ds_builder.info.features['label'].num_classes + if prev_num_label_classes > 0 and num_classes != prev_num_label_classes: + print("WARNING: Datasets do not have the same number of ClassLabel", + f"classes; {ds_name} has {num_classes} classes while prev has", + f"{prev_num_label_classes}. Prompting datasets separately.") + self.combined_template_path = "" + break + prev_num_label_classes = num_classes + + # Once verified, merge templates. combined_prompter.merge_templates_from(prompter) + + # Write to file if successfully merged all prompts. + if self.combined_template_path: print("Total number of templates: ", len(combined_prompter.templates)) combined_prompter.write_to_file() + print("Saved to promptsource/templates/combined_templates/" + + f"{self.combined_template_path}.yaml") def load_prompts( @@ -177,7 +205,6 @@ def load_prompts( if num_variants == -1 else min(num_variants, min_num_templates) ) - print() assert num_variants > 0 if rank == 0: print(f"Using {num_variants} variants of each prompt") From 53d186b7ecfdb7e1b7d449bc1cb8d743e52369db Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Apr 2023 20:01:54 +0000 Subject: [PATCH 26/36] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/extraction/prompt_loading.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index 6cfcb18f..57c3d64f 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -96,30 +96,36 @@ def __post_init__(self): ds_builder = load_dataset_builder(ds_name, config_name or None) num_features = len(ds_builder.info.features) if prev_num_features > 0 and num_features != prev_num_features: - print("WARNING: Datasets do not have the same number of features;", - f"{ds_name} has {num_features} features while prev has", - f"{prev_num_features}. Prompting datasets separately.") + print( + "WARNING: Datasets do not have the same number of features;", + f"{ds_name} has {num_features} features while prev has", + f"{prev_num_features}. Prompting datasets separately.", + ) self.combined_template_path = "" break prev_num_features = num_features - num_classes = ds_builder.info.features['label'].num_classes + num_classes = ds_builder.info.features["label"].num_classes if prev_num_label_classes > 0 and num_classes != prev_num_label_classes: - print("WARNING: Datasets do not have the same number of ClassLabel", - f"classes; {ds_name} has {num_classes} classes while prev has", - f"{prev_num_label_classes}. Prompting datasets separately.") + print( + "WARNING: Datasets do not have the same number of ClassLabel", + f"classes; {ds_name} has {num_classes} classes while prev has", + f"{prev_num_label_classes}. Prompting datasets separately.", + ) self.combined_template_path = "" break prev_num_label_classes = num_classes # Once verified, merge templates. combined_prompter.merge_templates_from(prompter) - + # Write to file if successfully merged all prompts. if self.combined_template_path: print("Total number of templates: ", len(combined_prompter.templates)) combined_prompter.write_to_file() - print("Saved to promptsource/templates/combined_templates/" + - f"{self.combined_template_path}.yaml") + print( + "Saved to promptsource/templates/combined_templates/" + + f"{self.combined_template_path}.yaml" + ) def load_prompts( From b78355f9ce5bb1c1770022652de3a50d71c04881 Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 12 Apr 2023 20:13:54 +0000 Subject: [PATCH 27/36] fix prompter init typing --- elk/extraction/prompt_loading.py | 1 + 1 file changed, 1 insertion(+) diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index 6cfcb18f..478c540c 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -76,6 +76,7 @@ def __post_init__(self): self.max_examples *= 2 # Combining prompts + combined_prompter: "DatasetTemplates" = None if self.combined_template_path: print( "Copying templates across datasets to combined_templates/ " From 1975410f9d26ca0076283b9d45076cfcb1d9e849 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Wed, 12 Apr 2023 13:21:22 -0700 Subject: [PATCH 28/36] Update README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 5c7fbd3c..21828f7b 100644 --- a/README.md +++ b/README.md @@ -26,17 +26,17 @@ The following will generate a CCS (Contrast Consistent Search) reporter instead elk elicit microsoft/deberta-v2-xxlarge-mnli imdb --net ccs ``` -To test prompt invariance across multiple datasets, use the `--combined_template_path` command line argument, which will create a new `templates.yaml` file with templates from all the datasets. -```bash -elk elicit bigscience/bloomz-560m christykoh/ag_news_pt ag_news --combined_template_path=spar_w/ag_news -``` - The following command will evaluate the probe from the run naughty-northcutt on the hidden states extracted from the model deberta-v2-xxlarge-mnli for the imdb dataset. It will result in an `eval.csv` and `cfg.yaml` file, which are stored under a subfolder in `elk-reporters/naughty-northcutt/transfer_eval`. ```bash elk eval naughty-northcutt microsoft/deberta-v2-xxlarge-mnli imdb ``` +For prompt invariance across multiple datasets, use the `--combined_template_path` command line argument, which will create a new `templates.yaml` file with templates from all the datasets. +```bash +elk elicit bigscience/bloomz-560m christykoh/ag_news_pt ag_news --combined_template_path=spar_w/ag_news +``` + ## Caching The hidden states resulting from `elk elicit` are cached as a HuggingFace dataset to avoid having to recompute them every time we want to train a probe. The cache is stored in the same place as all other HuggingFace datasets, which is usually `~/.cache/huggingface/datasets`. From 8a5fb0d819b7a7ed9a1b7c4e4cea2a1403aecea2 Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 12 Apr 2023 20:23:37 +0000 Subject: [PATCH 29/36] try to fix typing again --- elk/extraction/prompt_loading.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index c62209a1..9b2f8e78 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -76,7 +76,7 @@ def __post_init__(self): self.max_examples *= 2 # Combining prompts - combined_prompter: "DatasetTemplates" = None + combined_prompter = None if self.combined_template_path: print( "Copying templates across datasets to combined_templates/ " From a7f5a8b8ec2ada9f476368f6fd1df15bfc2a6f42 Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 12 Apr 2023 20:30:01 +0000 Subject: [PATCH 30/36] assert datasettemplates type --- elk/extraction/prompt_loading.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index 9b2f8e78..9a80fe86 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -121,8 +121,9 @@ def __post_init__(self): # Write to file if successfully merged all prompts. if self.combined_template_path: - print("Total number of templates: ", len(combined_prompter.templates)) - combined_prompter.write_to_file() + prompter = assert_type(DatasetTemplates, combined_prompter) + print("Total number of templates: ", len(prompter.templates)) + prompter.write_to_file() print( "Saved to promptsource/templates/combined_templates/" + f"{self.combined_template_path}.yaml" From 7af1a1bba97b6ba193deb6488e07e3363870564b Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Thu, 13 Apr 2023 09:24:59 +0000 Subject: [PATCH 31/36] bugfix to run eval separately on each dataset --- elk/evaluation/evaluate.py | 1 + 1 file changed, 1 insertion(+) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 6aca58f5..b954e661 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -48,6 +48,7 @@ def execute(self): transfer_dir = elk_reporter_dir() / self.source / "transfer_eval" for dataset in datasets: + self.data.prompts.datasets = [dataset] run = Evaluate(cfg=self, out_dir=transfer_dir / dataset) run.evaluate() From 74551fa1a6a60399e41cbcf109de578ba0bc725b Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Thu, 13 Apr 2023 09:34:36 +0000 Subject: [PATCH 32/36] add combine_evals flag to differentiate a multi dataset eval from a batch of single-dataset evals --- elk/evaluation/evaluate.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index b954e661..9cfdd6eb 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -41,16 +41,22 @@ class Eval(Serializable): num_gpus: int = -1 skip_baseline: bool = False concatenated_layer_offset: int = 0 + combine_evals: bool = False def execute(self): datasets = self.data.prompts.datasets transfer_dir = elk_reporter_dir() / self.source / "transfer_eval" - for dataset in datasets: - self.data.prompts.datasets = [dataset] - run = Evaluate(cfg=self, out_dir=transfer_dir / dataset) + if self.combine_evals: + run = Evaluate(cfg=self, out_dir=transfer_dir / ", ".join(datasets)) run.evaluate() + else: + # eval on each dataset separately + for dataset in datasets: + self.data.prompts.datasets = [dataset] + run = Evaluate(cfg=self, out_dir=transfer_dir / dataset) + run.evaluate() @dataclass From f7a4713f3601bd82d5659a53479d0ed61060cbdf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 19 Apr 2023 15:44:40 +0000 Subject: [PATCH 33/36] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- elk/extraction/prompt_loading.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index e6432500..f0ca68e6 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -1,8 +1,8 @@ -from os.path import exists from collections import Counter from copy import deepcopy from dataclasses import dataclass from itertools import zip_longest +from os.path import exists from random import Random from typing import Any, Iterator, Literal, Optional @@ -48,7 +48,7 @@ class PromptConfig(Serializable): -1. seed: The seed to use for prompt randomization. Defaults to 42. stream: Whether to stream the dataset from the Internet. Defaults to False. - combined_template_output_path: Path to save a combined template file to, when + combined_template_output_path: Path to save a combined template file to, when applying prompt invariance across multiple datasets. Interpreted as a subpath of `combined_paths` in the templates dir. Defaults to empty string. """ @@ -127,11 +127,11 @@ def combine_templates(self): "Saved to promptsource/templates/combined_templates/" + f"{self.combined_template_output_path}.yaml" ) - + def verify_cols(self, ds_builder, ref_ds_builder) -> bool: - '''Verify that number of features and number of classes for ClassLabel - match the expected values. - ''' + """Verify that number of features and number of classes for ClassLabel + match the expected values. + """ expected_features = len(ref_ds_builder.info.features) expected_classes = ref_ds_builder.info.features["label"].num_classes num_features = len(ds_builder.info.features) @@ -198,15 +198,13 @@ def load_prompts( An iterable of prompt dictionaries. """ ds_name, _, config_name = ds_string.partition(" ") - + prompter = None if combined_template_output_path and exists(combined_template_output_path): - prompter = DatasetTemplates( - "combined_templates", combined_template_output_path - ) + prompter = DatasetTemplates("combined_templates", combined_template_output_path) else: prompter = DatasetTemplates(ds_name, config_name) - + ds_dict = assert_type( dict, load_dataset(ds_name, config_name or None, streaming=stream) ) From 6e2f54c0a8fa14c140056c8f72263bf69e7398aa Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Wed, 19 Apr 2023 15:51:00 +0000 Subject: [PATCH 34/36] define ds_name --- elk/extraction/prompt_loading.py | 1 + 1 file changed, 1 insertion(+) diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index e6432500..733027dc 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -136,6 +136,7 @@ def verify_cols(self, ds_builder, ref_ds_builder) -> bool: expected_classes = ref_ds_builder.info.features["label"].num_classes num_features = len(ds_builder.info.features) num_classes = ds_builder.info.features["label"].num_classes + ds_name = ds_builder.builder_name if expected_features > 0 and num_features != expected_features: print( "WARNING: Datasets do not have the same number of features;", From 5f0f32aed9a4547bc6720286135a995e188a2673 Mon Sep 17 00:00:00 2001 From: Christy Koh Date: Thu, 20 Apr 2023 07:38:39 +0000 Subject: [PATCH 35/36] fix ds_name bug --- elk/extraction/prompt_loading.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/elk/extraction/prompt_loading.py b/elk/extraction/prompt_loading.py index a4102554..88687e0c 100644 --- a/elk/extraction/prompt_loading.py +++ b/elk/extraction/prompt_loading.py @@ -115,7 +115,7 @@ def combine_templates(self): if i == 0: # Set first dataset as reference ref_ds_builder = ds_builder - elif not self.verify_cols(ds_builder, ref_ds_builder): + elif not self.verify_cols(ref_ds_builder, ds_builder, ds_name): return # Once verified, merge templates. @@ -128,7 +128,7 @@ def combine_templates(self): + f"{self.combined_template_output_path}.yaml" ) - def verify_cols(self, ds_builder, ref_ds_builder) -> bool: + def verify_cols(self, ref_ds_builder, ds_builder, ds_name) -> bool: """Verify that number of features and number of classes for ClassLabel match the expected values. """ @@ -136,7 +136,6 @@ def verify_cols(self, ds_builder, ref_ds_builder) -> bool: expected_classes = ref_ds_builder.info.features["label"].num_classes num_features = len(ds_builder.info.features) num_classes = ds_builder.info.features["label"].num_classes - ds_name = ds_builder.builder_name if expected_features > 0 and num_features != expected_features: print( "WARNING: Datasets do not have the same number of features;", From 8fa07b4672dee1e9c6ac386f246c8d38d564fbb1 Mon Sep 17 00:00:00 2001 From: Walter Laurito Date: Thu, 20 Apr 2023 04:06:35 -0700 Subject: [PATCH 36/36] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4c0c35dd..d3f55e5b 100644 --- a/README.md +++ b/README.md @@ -32,9 +32,9 @@ The following command will evaluate the probe from the run naughty-northcutt on elk eval naughty-northcutt microsoft/deberta-v2-xxlarge-mnli imdb ``` -For prompt invariance across multiple datasets, use the `--combined_template_path` command line argument, which will create a new `templates.yaml` file with templates from all the datasets. +For prompt invariance across multiple datasets, use the `--combined_template_output_path` command line argument, which will create a new `templates.yaml` file with templates from all the datasets. ```bash -elk elicit bigscience/bloomz-560m christykoh/ag_news_pt ag_news --combined_template_path=spar_w/ag_news +elk elicit bigscience/bloomz-560m christykoh/ag_news_pt ag_news --combined_template_output_path=spar_w/ag_news ``` The following runs `elicit` on the Cartesian product of the listed models and datasets, storing it in a special folder ELK_DIR/sweeps/. Moreover, `--add_pooled` adds an additional dataset that pools all of the datasets together.