-
Notifications
You must be signed in to change notification settings - Fork 55
/
tasks_table.jsonl
1235 lines (1235 loc) · 539 KB
/
tasks_table.jsonl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
{"name":"abstract_narrative_understanding","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"abstract_narrative_understanding","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:aqua-rat","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-aqua-rat","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:gaokao-biology","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-gaokao-biology","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:gaokao-chemistry","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-gaokao-chemistry","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:gaokao-chinese","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-gaokao-chinese","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:gaokao-english","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-gaokao-english","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:gaokao-geography","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-gaokao-geography","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:gaokao-history","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-gaokao-history","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:gaokao-mathqa","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-gaokao-mathqa","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:gaokao-physics","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-gaokao-physics","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:logiqa-en","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-logiqa-en","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:logiqa-zh","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-logiqa-zh","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:lsat-ar","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-lsat-ar","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:lsat-lr","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-lsat-lr","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:lsat-rc","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-lsat-rc","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:sat-en","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-sat-en","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:sat-en-without-passage","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-sat-en-without-passage","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"agieval:sat-math","suite":["lighteval"],"prompt_function":"agieval","hf_repo":"dmayhem93/agieval-sat-math","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":null,"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"anachronisms","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"anachronisms","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"analogical_similarity","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"analogical_similarity","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"analytic_entailment","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"analytic_entailment","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"anli","suite":["lighteval","anli"],"prompt_function":"anli","hf_repo":"anli","hf_subset":"plain_text","hf_avail_splits":["train_r1","dev_r1","train_r2","dev_r2","train_r3","dev_r3","test_r1","test_r2","test_r3"],"evaluation_splits":["test_r1","test_r2","test_r3"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"anli:r1","suite":["lighteval","anli"],"prompt_function":"anli","hf_repo":"anli","hf_subset":"plain_text","hf_avail_splits":["train_r1","dev_r1","test_r1"],"evaluation_splits":["test_r1"],"few_shots_split":"train_r1","few_shots_select":"random_sampling_from_train","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"anli:r2","suite":["lighteval","anli"],"prompt_function":"anli","hf_repo":"anli","hf_subset":"plain_text","hf_avail_splits":["train_r2","dev_r2","test_r2"],"evaluation_splits":["test_r2"],"few_shots_split":"train_r2","few_shots_select":"random_sampling_from_train","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"anli:r3","suite":["lighteval","anli"],"prompt_function":"anli","hf_repo":"anli","hf_subset":"plain_text","hf_avail_splits":["train_r3","dev_r3","test_r3"],"evaluation_splits":["test_r3"],"few_shots_split":"train_r3","few_shots_select":"random_sampling_from_train","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arc:c:letters","suite":["original","arc"],"prompt_function":"arc_with_options_letters_predict","hf_repo":"ai2_arc","hf_subset":"ARC-Challenge","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arc:c:options","suite":["original","arc"],"prompt_function":"arc_with_options","hf_repo":"ai2_arc","hf_subset":"ARC-Challenge","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arc:c:simple","suite":["original","arc"],"prompt_function":"arc","hf_repo":"ai2_arc","hf_subset":"ARC-Challenge","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arc:challenge","suite":["leaderboard","arc"],"prompt_function":"arc","hf_repo":"ai2_arc","hf_subset":"ARC-Challenge","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling_from_train","generation_size":1,"metric":["loglikelihood_acc", "loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arc:easy","suite":["lighteval","arc"],"prompt_function":"arc","hf_repo":"ai2_arc","hf_subset":"ARC-Easy","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling_from_train","generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arithmetic:1dc","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_1dc","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arithmetic:2da","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_2da","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arithmetic:2dm","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_2dm","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arithmetic:2ds","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_2ds","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arithmetic:3da","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_3da","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arithmetic:3ds","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_3ds","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arithmetic:4da","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_4da","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arithmetic:4ds","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_4ds","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arithmetic:5da","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_5da","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arithmetic:5ds","suite":["lighteval","arithmetic"],"prompt_function":"arithmetic","hf_repo":"EleutherAI\/arithmetic","hf_subset":"arithmetic_5ds","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"arithmetic_bb","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"arithmetic","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"ascii_word_recognition","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"ascii_word_recognition","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"asdiv","suite":["lighteval"],"prompt_function":"asdiv","hf_repo":"EleutherAI\/asdiv","hf_subset":"asdiv","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"authorship_verification","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"authorship_verification","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"auto_categorization","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"auto_categorization","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"auto_debugging","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_and_after_query","hf_repo":"bigbench","hf_subset":"auto_debugging","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["perfect_exact_match"],"stop_sequence":null,"output_regex":"[^\\.\\?\\!\\;\\n]+", "trust_dataset": true,"version":0}
{"name":"babi_qa","suite":["helm"],"prompt_function":"babi_qa","hf_repo":"facebook\/babi_qa","hf_subset":"en-valid-qa1","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:causal_judgment","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"causal_judgement","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:date_understanding","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"date_understanding","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:disambiguation_qa","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"disambiguation_qa","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:geometric_shapes","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"geometric_shapes","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:logical_deduction_five_objects","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"logical_deduction_five_objects","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:logical_deduction_seven_objects","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"logical_deduction_seven_objects","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:logical_deduction_three_objects","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"logical_deduction_three_objects","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:movie_recommendation","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"movie_recommendation","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:navigate","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"navigate","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:reasoning_about_colored_objects","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"reasoning_about_colored_objects","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:ruin_names","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"ruin_names","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:salient_translation_error_detection","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"salient_translation_error_detection","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:snarks","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"snarks","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:sports_understanding","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"sports_understanding","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:temporal_sequences","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"temporal_sequences","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:tracking_shuffled_objects_five_objects","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"tracking_shuffled_objects_five_objects","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:tracking_shuffled_objects_seven_objects","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"tracking_shuffled_objects_seven_objects","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:tracking_shuffled_objects_three_objects","suite":["lighteval"],"prompt_function":"bbh_lighteval","hf_repo":"lighteval/bbh","hf_subset":"tracking_shuffled_objects_three_objects","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bigbench:causal_judgment","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"causal_judgement","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:date_understanding","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"date_understanding","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:disambiguation_qa","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"disambiguation_qa","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:geometric_shapes","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"geometric_shapes","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:logical_deduction_five_objects","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"logical_deduction_five_objects","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:logical_deduction_seven_objects","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"logical_deduction_seven_objects","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:logical_deduction_three_objects","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"logical_deduction_three_objects","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:movie_recommendation","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"movie_recommendation","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:navigate","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"navigate","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:reasoning_about_colored_objects","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"reasoning_about_colored_objects","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:ruin_names","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"ruin_names","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:salient_translation_error_detection","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"salient_translation_error_detection","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:snarks","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"snarks","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:sports_understanding","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"sports_understanding","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:temporal_sequences","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"temporal_sequences","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:tracking_shuffled_objects_five_objects","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"tracking_shuffled_objects_five_objects","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:tracking_shuffled_objects_seven_objects","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"tracking_shuffled_objects_seven_objects","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bigbench:tracking_shuffled_objects_three_objects","suite":["harness"],"prompt_function":"bbh_harness","hf_repo":"lighteval/bbh","hf_subset":"tracking_shuffled_objects_three_objects","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "must_remove_duplicate_docs": true, "trust_dataset":true,"version":0}
{"name":"bbh:boolean_expressions","suite":["harness"],"prompt_function":"bbh_boolean_expressions","hf_repo":"lukaemon/bbh","hf_subset":"boolean_expressions","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:causal_judgment","suite":["harness"],"prompt_function":"bbh_causal_judgment","hf_repo":"lukaemon/bbh","hf_subset":"causal_judgement","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:date_understanding","suite":["harness"],"prompt_function":"bbh_date_understanding","hf_repo":"lukaemon/bbh","hf_subset":"date_understanding","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:disambiguation_qa","suite":["harness"],"prompt_function":"bbh_disambiguation_qa","hf_repo":"lukaemon/bbh","hf_subset":"disambiguation_qa","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:dyck_languages","suite":["harness"],"prompt_function":"bbh_dyck_languages","hf_repo":"lukaemon/bbh","hf_subset":"dyck_languages","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:formal_fallacies","suite":["harness"],"prompt_function":"bbh_formal_fallacies","hf_repo":"lukaemon/bbh","hf_subset":"formal_fallacies","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:geometric_shapes","suite":["harness"],"prompt_function":"bbh_geometric_shapes","hf_repo":"lukaemon/bbh","hf_subset":"geometric_shapes","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:hyperbaton","suite":["harness"],"prompt_function":"bbh_hyperbaton","hf_repo":"lukaemon/bbh","hf_subset":"hyperbaton","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:logical_deduction_five_objects","suite":["harness"],"prompt_function":"bbh_logical_deduction_five_objects","hf_repo":"lukaemon/bbh","hf_subset":"logical_deduction_five_objects","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:logical_deduction_seven_objects","suite":["harness"],"prompt_function":"bbh_logical_deduction_seven_objects","hf_repo":"lukaemon/bbh","hf_subset":"logical_deduction_seven_objects","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:logical_deduction_three_objects","suite":["harness"],"prompt_function":"bbh_logical_deduction_three_objects","hf_repo":"lukaemon/bbh","hf_subset":"logical_deduction_three_objects","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:movie_recommendation","suite":["harness"],"prompt_function":"bbh_movie_recommendation","hf_repo":"lukaemon/bbh","hf_subset":"movie_recommendation","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:multistep_arithmetic_two","suite":["harness"],"prompt_function":"bbh_multistep_arithmetic_two","hf_repo":"lukaemon/bbh","hf_subset":"multistep_arithmetic_two","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:navigate","suite":["harness"],"prompt_function":"bbh_navigate","hf_repo":"lukaemon/bbh","hf_subset":"navigate","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:object_counting","suite":["harness"],"prompt_function":"bbh_object_counting","hf_repo":"lukaemon/bbh","hf_subset":"object_counting","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:penguins_in_a_table","suite":["harness"],"prompt_function":"bbh_penguins_in_a_table","hf_repo":"lukaemon/bbh","hf_subset":"penguins_in_a_table","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:reasoning_about_colored_objects","suite":["harness"],"prompt_function":"bbh_reasoning_about_colored_objects","hf_repo":"lukaemon/bbh","hf_subset":"reasoning_about_colored_objects","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:ruin_names","suite":["harness"],"prompt_function":"bbh_ruin_names","hf_repo":"lukaemon/bbh","hf_subset":"ruin_names","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:salient_translation_error_detection","suite":["harness"],"prompt_function":"bbh_salient_translation_error_detection","hf_repo":"lukaemon/bbh","hf_subset":"salient_translation_error_detection","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:snarks","suite":["harness"],"prompt_function":"bbh_snarks","hf_repo":"lukaemon/bbh","hf_subset":"snarks","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:sports_understanding","suite":["harness"],"prompt_function":"bbh_sports_understanding","hf_repo":"lukaemon/bbh","hf_subset":"sports_understanding","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:temporal_sequences","suite":["harness"],"prompt_function":"bbh_temporal_sequences","hf_repo":"lukaemon/bbh","hf_subset":"temporal_sequences","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:tracking_shuffled_objects_five_objects","suite":["harness"],"prompt_function":"bbh_tracking_shuffled_objects_five_objects","hf_repo":"lukaemon/bbh","hf_subset":"tracking_shuffled_objects_five_objects","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:tracking_shuffled_objects_seven_objects","suite":["harness"],"prompt_function":"bbh_tracking_shuffled_objects_seven_objects","hf_repo":"lukaemon/bbh","hf_subset":"tracking_shuffled_objects_seven_objects","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:tracking_shuffled_objects_three_objects","suite":["harness"],"prompt_function":"bbh_tracking_shuffled_objects_three_objects","hf_repo":"lukaemon/bbh","hf_subset":"tracking_shuffled_objects_three_objects","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:web_of_lies","suite":["harness"],"prompt_function":"bbh_web_of_lies","hf_repo":"lukaemon/bbh","hf_subset":"web_of_lies","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbh:word_sorting","suite":["harness"],"prompt_function":"bbh_word_sorting","hf_repo":"lukaemon/bbh","hf_subset":"word_sorting","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["</s>", "Q:", "\n\n"],"output_regex":null,"frozen":false, "trust_dataset":true,"version":0}
{"name":"bbq","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"all","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bbq:Age","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Age","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bbq:Disability_status","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Disability_status","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bbq:Gender_identity","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Gender_identity","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bbq:Nationality","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Nationality","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bbq:Physical_appearance","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Physical_appearance","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bbq:Race_ethnicity","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Race_ethnicity","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bbq:Race_x_SES","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Race_x_SES","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bbq:Race_x_gender","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Race_x_gender","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bbq:Religion","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Religion","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bbq:SES","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"SES","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bbq:Sexual_orientation","suite":["helm"],"prompt_function":"bbq","hf_repo":"lighteval\/bbq_helm","hf_subset":"Sexual_orientation","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bbq_lite_json","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"bbq_lite_json","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:auto_debugging","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"auto_debugging","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:age_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-age_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:age_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-age_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:disability_status_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-disability_status_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:disability_status_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-disability_status_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:gender_identity_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-gender_identity_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:gender_identity_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-gender_identity_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:nationality_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-nationality_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:nationality_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-nationality_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:physical_appearance_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-physical_appearance_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:physical_appearance_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-physical_appearance_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:race_ethnicity_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-race_ethnicity_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:race_ethnicity_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-race_ethnicity_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:religion_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-religion_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:religion_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-religion_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:ses_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-ses_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:ses_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-ses_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:sexual_orientation_ambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-sexual_orientation_ambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:bbq_lite_json:sexual_orientation_disambig","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"bbq_lite_json-sexual_orientation_disambig","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:code_line_description","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"code_line_description","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conceptual_combinations:contradictions","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conceptual_combinations-contradictions","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conceptual_combinations:emergent_properties","suite":["helm"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conceptual_combinations-emergent_properties","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conceptual_combinations:fanciful_fictional_combinations","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conceptual_combinations-fanciful_fictional_combinations","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conceptual_combinations:homonyms","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conceptual_combinations-homonyms","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conceptual_combinations:invented_words","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conceptual_combinations-invented_words","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:adna_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-adna_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:adna_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-adna_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:atikampe_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-atikampe_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:atikampe_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-atikampe_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:gornam_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-gornam_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:gornam_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-gornam_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:holuan_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-holuan_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:holuan_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-holuan_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:mkafala_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-mkafala_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:mkafala_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-mkafala_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:postpositive_english_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-postpositive_english_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:postpositive_english_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-postpositive_english_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:unapuri_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-unapuri_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:unapuri_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-unapuri_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:vaomi_from","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-vaomi_from","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:conlang_translation:vaomi_to","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"conlang_translation-vaomi_to","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge1","rouge2","rougeL"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:emoji_movie","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"emoji_movie","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:formal_fallacies_syllogisms_negation","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"formal_fallacies_syllogisms_negation","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:hindu_knowledge","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"hindu_knowledge","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:known_unknowns","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"known_unknowns","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:language_identification","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"language_identification","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:linguistics_puzzles","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"linguistics_puzzles","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:logic_grid_puzzle","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"logic_grid_puzzle","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:logical_deduction-five_objects","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"logical_deduction-five_objects","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:logical_deduction-seven_objects","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"logical_deduction-seven_objects","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:logical_deduction-three_objects","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"logical_deduction-three_objects","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:misconceptions_russian","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"misconceptions_russian","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:novel_concepts","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"novel_concepts","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:operators","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"operators","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:parsinlu_reading_comprehension","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"parsinlu_reading_comprehension","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:play_dialog_same_or_different","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"play_dialog_same_or_different","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:repeat_copy_logic","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"repeat_copy_logic","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:strange_stories-boolean","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"strange_stories-boolean","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:strange_stories-multiple_choice","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"strange_stories-multiple_choice","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:strategyqa","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"strategyqa","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:symbol_interpretation-adversarial","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"symbol_interpretation-adversarial","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:symbol_interpretation-emoji_agnostic","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"symbol_interpretation-emoji_agnostic","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:symbol_interpretation-name_agnostic","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"symbol_interpretation-name_agnostic","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:symbol_interpretation-plain","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"symbol_interpretation-plain","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:symbol_interpretation-tricky","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"symbol_interpretation-tricky","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:vitaminc_fact_verification","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"vitaminc_fact_verification","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bigbench:winowhy","suite":["helm","bigbench_scenario"],"prompt_function":"bigbench_helm","hf_repo":"lighteval\/bigbench_helm","hf_subset":"winowhy","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:adjunct_island","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"adjunct_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:adjunct_island","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"adjunct_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:anaphor_gender_agreement","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"anaphor_gender_agreement","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:anaphor_gender_agreement","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"anaphor_gender_agreement","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:anaphor_number_agreement","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"anaphor_number_agreement","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:anaphor_number_agreement","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"anaphor_number_agreement","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:animate_subject_passive","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"animate_subject_passive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:animate_subject_passive","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"animate_subject_passive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:animate_subject_trans","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"animate_subject_trans","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:animate_subject_trans","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"animate_subject_trans","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:causative","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"causative","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:causative","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"causative","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:complex_NP_island","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"complex_NP_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:complex_NP_island","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"complex_NP_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:coordinate_structure_constraint_complex_left_branch","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"coordinate_structure_constraint_complex_left_branch","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:coordinate_structure_constraint_complex_left_branch","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"coordinate_structure_constraint_complex_left_branch","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:coordinate_structure_constraint_object_extraction","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"coordinate_structure_constraint_object_extraction","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:coordinate_structure_constraint_object_extraction","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"coordinate_structure_constraint_object_extraction","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_irregular_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_irregular_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_irregular_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_irregular_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_irregular_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_irregular_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_irregular_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_irregular_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_with_adj_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_with_adj_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_with_adj_irregular_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_irregular_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_with_adj_irregular_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_irregular_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_with_adj_irregular_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_irregular_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_with_adj_irregular_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adj_irregular_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_with_adjective_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adjective_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:determiner_noun_agreement_with_adjective_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"determiner_noun_agreement_with_adjective_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:distractor_agreement_relational_noun","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"distractor_agreement_relational_noun","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:distractor_agreement_relational_noun","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"distractor_agreement_relational_noun","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:distractor_agreement_relative_clause","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"distractor_agreement_relative_clause","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:distractor_agreement_relative_clause","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"distractor_agreement_relative_clause","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:drop_argument","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"drop_argument","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:drop_argument","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"drop_argument","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:ellipsis_n_bar_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"ellipsis_n_bar_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:ellipsis_n_bar_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"ellipsis_n_bar_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:ellipsis_n_bar_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"ellipsis_n_bar_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:ellipsis_n_bar_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"ellipsis_n_bar_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:existential_there_object_raising","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"existential_there_object_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:existential_there_object_raising","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"existential_there_object_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:existential_there_quantifiers_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"existential_there_quantifiers_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:existential_there_quantifiers_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"existential_there_quantifiers_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:existential_there_quantifiers_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"existential_there_quantifiers_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:existential_there_quantifiers_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"existential_there_quantifiers_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:existential_there_subject_raising","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"existential_there_subject_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:existential_there_subject_raising","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"existential_there_subject_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:expletive_it_object_raising","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"expletive_it_object_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:expletive_it_object_raising","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"expletive_it_object_raising","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:inchoative","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"inchoative","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:inchoative","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"inchoative","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:intransitive","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"intransitive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:intransitive","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"intransitive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:irregular_past_participle_adjectives","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"irregular_past_participle_adjectives","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:irregular_past_participle_adjectives","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"irregular_past_participle_adjectives","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:irregular_past_participle_verbs","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"irregular_past_participle_verbs","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:irregular_past_participle_verbs","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"irregular_past_participle_verbs","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:irregular_plural_subject_verb_agreement_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"irregular_plural_subject_verb_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:irregular_plural_subject_verb_agreement_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"irregular_plural_subject_verb_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:irregular_plural_subject_verb_agreement_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"irregular_plural_subject_verb_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:irregular_plural_subject_verb_agreement_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"irregular_plural_subject_verb_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:left_branch_island_echo_question","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"left_branch_island_echo_question","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:left_branch_island_echo_question","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"left_branch_island_echo_question","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:left_branch_island_simple_question","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"left_branch_island_simple_question","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:left_branch_island_simple_question","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"left_branch_island_simple_question","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:matrix_question_npi_licensor_present","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"matrix_question_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:matrix_question_npi_licensor_present","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"matrix_question_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:npi_present_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"npi_present_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:npi_present_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"npi_present_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:npi_present_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"npi_present_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:npi_present_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"npi_present_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:only_npi_licensor_present","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"only_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:only_npi_licensor_present","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"only_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:only_npi_scope","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"only_npi_scope","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:only_npi_scope","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"only_npi_scope","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:passive_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"passive_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:passive_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"passive_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:passive_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"passive_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:passive_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"passive_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:principle_A_c_command","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_c_command","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:principle_A_c_command","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_c_command","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:principle_A_case_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_case_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:principle_A_case_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_case_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:principle_A_case_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_case_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:principle_A_case_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_case_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:principle_A_domain_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_domain_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:principle_A_domain_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_domain_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:principle_A_domain_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_domain_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:principle_A_domain_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_domain_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:principle_A_domain_3","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_domain_3","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:principle_A_domain_3","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_domain_3","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:principle_A_reconstruction","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"principle_A_reconstruction","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:principle_A_reconstruction","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"principle_A_reconstruction","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:regular_plural_subject_verb_agreement_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"regular_plural_subject_verb_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:regular_plural_subject_verb_agreement_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"regular_plural_subject_verb_agreement_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:regular_plural_subject_verb_agreement_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"regular_plural_subject_verb_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:regular_plural_subject_verb_agreement_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"regular_plural_subject_verb_agreement_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:sentential_negation_npi_licensor_present","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"sentential_negation_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:sentential_negation_npi_licensor_present","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"sentential_negation_npi_licensor_present","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:sentential_negation_npi_scope","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"sentential_negation_npi_scope","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:sentential_negation_npi_scope","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"sentential_negation_npi_scope","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:sentential_subject_island","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"sentential_subject_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:sentential_subject_island","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"sentential_subject_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:superlative_quantifiers_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"superlative_quantifiers_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:superlative_quantifiers_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"superlative_quantifiers_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:superlative_quantifiers_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"superlative_quantifiers_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:superlative_quantifiers_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"superlative_quantifiers_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:tough_vs_raising_1","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"tough_vs_raising_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:tough_vs_raising_1","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"tough_vs_raising_1","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:tough_vs_raising_2","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"tough_vs_raising_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:tough_vs_raising_2","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"tough_vs_raising_2","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:transitive","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"transitive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:transitive","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"transitive","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_island","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_island","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_island","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_questions_object_gap","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_questions_object_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_questions_object_gap","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_questions_object_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_questions_subject_gap","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_questions_subject_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_questions_subject_gap","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_questions_subject_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_questions_subject_gap_long_distance","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_questions_subject_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_questions_subject_gap_long_distance","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_questions_subject_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_vs_that_no_gap","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_vs_that_no_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_vs_that_no_gap","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_vs_that_no_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_vs_that_no_gap_long_distance","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_vs_that_no_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_vs_that_no_gap_long_distance","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_vs_that_no_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_vs_that_with_gap","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_vs_that_with_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_vs_that_with_gap","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_vs_that_with_gap","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_vs_that_with_gap_long_distance","suite":["lighteval","blimp"],"prompt_function":"blimp","hf_repo":"blimp","hf_subset":"wh_vs_that_with_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"blimp:wh_vs_that_with_gap_long_distance","suite":["helm","blimp"],"prompt_function":"blimp_helm","hf_repo":"blimp","hf_subset":"wh_vs_that_with_gap_long_distance","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bold","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"all","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bold:gender","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"gender","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bold:political_ideology","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"political_ideology","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bold:profession","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"profession","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bold:race","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"race","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bold:religious_ideology","suite":["helm"],"prompt_function":"bold","hf_repo":"lighteval\/bold_helm","hf_subset":"religious_ideology","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"boolq","suite":["helm","helm_general"],"prompt_function":"boolq_helm","hf_repo":"lighteval\/boolq_helm","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"boolq:contrastset","suite":["helm"],"prompt_function":"boolq_helm_contrastset","hf_repo":"lighteval\/boolq_helm","hf_subset":"default","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"bridging_anaphora_resolution_barqa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"bridging_anaphora_resolution_barqa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"causal_judgment","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"causal_judgment","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"cause_and_effect","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"cause_and_effect","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"checkmate_in_one","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"checkmate_in_one","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"chess_state_tracking","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"chess_state_tracking","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"chinese_remainder_theorem","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"chinese_remainder_theorem","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"cifar10_classification","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"cifar10_classification","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"civil_comments","suite":["helm","helm_general"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"all","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"civil_comments:LGBTQ","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"LGBTQ","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"civil_comments:black","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"black","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"civil_comments:christian","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"christian","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"civil_comments:female","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"female","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"civil_comments:male","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"male","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"civil_comments:muslim","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"muslim","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"civil_comments:other_religions","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"other_religions","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"civil_comments:white","suite":["helm"],"prompt_function":"civil_comments","hf_repo":"lighteval\/civil_comments_helm","hf_subset":"white","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"code_line_description","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_and_after_query","hf_repo":"bigbench","hf_subset":"code_line_description","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"codenames","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"codenames","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","bleu","bleu"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"color","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"color","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","bleu","loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"common_morpheme","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"common_morpheme","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"commonsenseqa","suite":["helm","commonsense_scenario"],"prompt_function":"commonsense_qa","hf_repo":"commonsense_qa","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"conceptual_combinations","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"conceptual_combinations","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"conlang_translation","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"conlang_translation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["rouge_t5","bleu","perfect_exact_match"],"stop_sequence":[".",";","!","?"],"output_regex":"[^\\.\\?\\!\\;\\n]+", "trust_dataset": true,"version":0}
{"name":"contextual_parametric_knowledge_conflicts","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"contextual_parametric_knowledge_conflicts","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:n_books_1000-extractions_per_book_1-prefix_length_125","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_1-prefix_length_125","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:n_books_1000-extractions_per_book_1-prefix_length_25","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_1-prefix_length_25","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:n_books_1000-extractions_per_book_1-prefix_length_5","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_1-prefix_length_5","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:n_books_1000-extractions_per_book_3-prefix_length_125","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_3-prefix_length_125","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:n_books_1000-extractions_per_book_3-prefix_length_25","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_3-prefix_length_25","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:n_books_1000-extractions_per_book_3-prefix_length_5","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"n_books_1000-extractions_per_book_3-prefix_length_5","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:oh_the_places","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"oh_the_places","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:pilot","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"pilot","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:popular_books-prefix_length_10","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_10","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:popular_books-prefix_length_125","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_125","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:popular_books-prefix_length_25","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_25","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:popular_books-prefix_length_250","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_250","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:popular_books-prefix_length_5","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_5","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:popular_books-prefix_length_50","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"popular_books-prefix_length_50","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:prompt_num_line_1-min_lines_20","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"prompt_num_line_1-min_lines_20","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:prompt_num_line_10-min_lines_20","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"prompt_num_line_10-min_lines_20","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"copyright:prompt_num_line_5-min_lines_20","suite":["helm","copyright_scenario"],"prompt_function":"copyright","hf_repo":"lighteval\/copyright_helm","hf_subset":"prompt_num_line_5-min_lines_20","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["copyright"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"coqa","suite":["lighteval"],"prompt_function":"coqa","hf_repo":"coqa","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["perfect_exact_match","f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"coqa_bb","suite":["lighteval","bigbench_programmatic","bigbench"],"prompt_function":"coqa","hf_repo":"coqa","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["perfect_exact_match","f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"covid_dialogue","suite":["helm"],"prompt_function":"covid_dialogue","hf_repo":"lighteval\/covid_dialogue","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":128,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"crash_blossom","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"crash_blossom","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"crass_ai","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"crass_ai","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"cryobiology_spanish","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"cryobiology_spanish","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"cryptonite","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"cryptonite","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"cs_algorithms","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"cs_algorithms","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"dark_humor_detection","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"dark_humor_detection","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"date_understanding","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"date_understanding","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"disambiguation_qa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"disambiguation_qa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"discourse_marker_prediction","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"discourse_marker_prediction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"disfl_qa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"disfl_qa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"drop","suite":["lighteval"],"prompt_function":"drop","hf_repo":"lighteval/drop_harness","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":"train","few_shots_select":"random_sampling_from_train","generation_size":null,"metric":["drop"],"stop_sequence":["."],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"dyck_language:2","suite":["helm"],"prompt_function":"dyck_language","hf_repo":"lighteval\/DyckLanguage","hf_subset":"2","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"dyck_language:3","suite":["helm"],"prompt_function":"dyck_language","hf_repo":"lighteval\/DyckLanguage","hf_subset":"3","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"dyck_language:4","suite":["helm"],"prompt_function":"dyck_language","hf_repo":"lighteval\/DyckLanguage","hf_subset":"4","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"dyck_languages","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"dyck_languages","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"elementary_math_qa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"elementary_math_qa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"emoji_movie","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"emoji_movie","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","bleu","loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"emojis_emotion_prediction","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"emojis_emotion_prediction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"empirical_judgments","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"empirical_judgments","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"english_proverbs","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"english_proverbs","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"english_russian_proverbs","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"english_russian_proverbs","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entailed_polarity","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"entailed_polarity","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entailed_polarity_hindi","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"entailed_polarity_hindi","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entity_data_imputation:Buy","suite":["helm"],"prompt_function":"entity_data_imputation","hf_repo":"lighteval\/Buy","hf_subset":"default","hf_avail_splits":["train","test","valid"],"evaluation_splits":["valid","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entity_data_imputation:Restaurant","suite":["helm"],"prompt_function":"entity_data_imputation","hf_repo":"lighteval\/Restaurant","hf_subset":"default","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entity_matching:Abt_Buy","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Abt_Buy","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entity_matching:Amazon_Google","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Amazon_Google","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entity_matching:Beer","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Beer","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entity_matching:Company","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Company","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entity_matching:DBLP_ACM","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"DBLP_ACM","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entity_matching:DBLP_GoogleScholar","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"DBLP_GoogleScholar","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entity_matching:Dirty_DBLP_ACM","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Dirty_DBLP_ACM","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entity_matching:Dirty_DBLP_GoogleScholar","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Dirty_DBLP_GoogleScholar","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entity_matching:Dirty_Walmart_Amazon","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Dirty_Walmart_Amazon","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entity_matching:Dirty_iTunes_Amazon","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Dirty_iTunes_Amazon","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entity_matching:Fodors_Zagats","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Fodors_Zagats","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entity_matching:Walmart_Amazon","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"Walmart_Amazon","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"entity_matching:iTunes_Amazon","suite":["helm"],"prompt_function":"entity_matching","hf_repo":"lighteval\/EntityMatching","hf_subset":"iTunes_Amazon","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"epistemic_reasoning","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"epistemic_reasoning","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"ethics:commonsense","suite":["lighteval","ethics"],"prompt_function":"ethics_commonsense","hf_repo":"lighteval\/hendrycks_ethics","hf_subset":"commonsense","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"ethics:deontology","suite":["lighteval","ethics"],"prompt_function":"ethics_deontology","hf_repo":"lighteval\/hendrycks_ethics","hf_subset":"deontology","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"ethics:justice","suite":["lighteval","ethics"],"prompt_function":"ethics_justice","hf_repo":"lighteval\/hendrycks_ethics","hf_subset":"justice","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"ethics:utilitarianism","suite":["lighteval","ethics"],"prompt_function":"ethics_utilitarianism","hf_repo":"lighteval\/hendrycks_ethics","hf_subset":"utilitarianism","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"ethics:virtue","suite":["lighteval","ethics"],"prompt_function":"ethics_virtue","hf_repo":"lighteval\/hendrycks_ethics","hf_subset":"virtue","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"evaluating_information_essentiality","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"evaluating_information_essentiality","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"fact_checker","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"fact_checker","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"fantasy_reasoning","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"fantasy_reasoning","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"few_shot_nlg","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"few_shot_nlg","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","bleurt"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"figure_of_speech_detection","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"figure_of_speech_detection","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"formal_fallacies_syllogisms_negation","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"formal_fallacies_syllogisms_negation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"gem","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"gem","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"gender_inclusive_sentences_german","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"gender_inclusive_sentences_german","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"general_knowledge","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"general_knowledge","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"geometric_shapes","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"geometric_shapes","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","bleu","loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"glue:cola","suite":["lighteval","glue"],"prompt_function":"cola","hf_repo":"glue","hf_subset":"cola","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token", "mcc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"glue:mnli","suite":["lighteval","glue"],"prompt_function":"mnli","hf_repo":"glue","hf_subset":"mnli_matched","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"glue:mnli_mismatched","suite":["lighteval","glue"],"prompt_function":"mnli","hf_repo":"glue","hf_subset":"mnli_mismatched","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"glue:mrpc","suite":["lighteval","glue"],"prompt_function":"mrpc","hf_repo":"glue","hf_subset":"mrpc","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc", "loglikelihood_f1"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"glue:qnli","suite":["lighteval","glue"],"prompt_function":"qnli","hf_repo":"glue","hf_subset":"qnli","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"glue:qqp","suite":["lighteval","glue"],"prompt_function":"qqp","hf_repo":"glue","hf_subset":"qqp","hf_avail_splits":["train","validation","test"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc", "loglikelihood_f1"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"glue:rte","suite":["lighteval","glue"],"prompt_function":"rte","hf_repo":"glue","hf_subset":"rte","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"glue:sst2","suite":["lighteval","glue"],"prompt_function":"sst","hf_repo":"glue","hf_subset":"sst2","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"glue:stsb","suite":["lighteval","glue"],"prompt_function":"stsb","hf_repo":"glue","hf_subset":"stsb","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"glue:wnli","suite":["lighteval","glue"],"prompt_function":"wnli","hf_repo":"glue","hf_subset":"wnli","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"goal_step_wikihow","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"goal_step_wikihow","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"gpqa","suite":["lighteval"],"prompt_function":"gpqa","hf_repo":"Idavidrein/gpqa","hf_subset":"gpqa_main","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":"random_sampling","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"gre_reading_comprehension","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"gre_reading_comprehension","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"gsm8k","suite":["leaderboard"],"prompt_function":"gsm8k","hf_repo":"gsm8k","hf_subset":"main","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling_from_train","generation_size":256,"metric":["quasi_exact_match_gsm8k"],"stop_sequence":["Question:","Question",":"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"gsm8k","suite":["lighteval"],"prompt_function":"gsm8k","hf_repo":"gsm8k","hf_subset":"main","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":"random_sampling_from_train","generation_size":256,"metric":["quasi_exact_match_gsm8k","maj_at_8_gsm8k"],"stop_sequence":["Question:"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"headqa:en","suite":["lighteval","headqa"],"prompt_function":"headqa","hf_repo":"lighteval/headqa_harness","hf_subset":"en","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"headqa:es","suite":["lighteval","headqa"],"prompt_function":"headqa","hf_repo":"lighteval/headqa_harness","hf_subset":"es","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"hellaswag","suite":["leaderboard"],"prompt_function":"hellaswag_harness","hf_repo":"hellaswag","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":"random_sampling_from_train","generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"hellaswag","suite":["helm","helm_general"],"prompt_function":"hellaswag_helm","hf_repo":"hellaswag","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"hhh_alignment","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"hhh_alignment","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"hindi_question_answering","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"hindi_question_answering","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"hindu_knowledge","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"hindu_knowledge","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"hinglish_toxicity","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"hinglish_toxicity","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"human_organs_senses","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"human_organs_senses","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"humaneval","suite":["helm","code_scenario"],"prompt_function":"humaneval","hf_repo":"openai_humaneval","hf_subset":"openai_humaneval","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":600,"metric":["code_humaneval"],"stop_sequence":["\nclass","\ndef","\nif","\nprint"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"hyperbaton","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"hyperbaton","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"identify_math_theorems","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"identify_math_theorems","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"identify_odd_metaphor","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"identify_odd_metaphor","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"imdb","suite":["helm","helm_general"],"prompt_function":"imdb","hf_repo":"lighteval\/IMDB_helm","hf_subset":"default","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"imdb:contrastset","suite":["helm"],"prompt_function":"imdb_contrastset","hf_repo":"lighteval\/IMDB_helm","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"implicatures","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"implicatures","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"implicit_relations","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"implicit_relations","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"intent_recognition","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"intent_recognition","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"interactive_qa_mmlu:abstract_algebra","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_abstract_algebra","hf_repo":"lighteval\/mmlu","hf_subset":"abstract_algebra","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"interactive_qa_mmlu:college_chemistry","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_college_chemistry","hf_repo":"lighteval\/mmlu","hf_subset":"college_chemistry","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"interactive_qa_mmlu:global_facts","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_global_facts","hf_repo":"lighteval\/mmlu","hf_subset":"global_facts","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"interactive_qa_mmlu:miscellaneous","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_miscellaneous","hf_repo":"lighteval\/mmlu","hf_subset":"miscellaneous","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"interactive_qa_mmlu:nutrition","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_nutrition","hf_repo":"lighteval\/mmlu","hf_subset":"nutrition","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"interactive_qa_mmlu:us_foreign_policy","suite":["helm","interactive_qa_mmlu_scenario"],"prompt_function":"mmlu_qa_us_foreign_policy","hf_repo":"lighteval\/mmlu","hf_subset":"us_foreign_policy","hf_avail_splits":["dev","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"international_phonetic_alphabet_nli","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"international_phonetic_alphabet_nli","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"international_phonetic_alphabet_transliterate","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"international_phonetic_alphabet_transliterate","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"intersect_geometry","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"intersect_geometry","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"irony_identification","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"irony_identification","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"iwslt17:ar-en","suite":["lighteval","harness_selection"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_ar-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"iwslt17:de-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_de-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"iwslt17:en-ar","suite":["lighteval","harness_selection"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_ar-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"iwslt17:en-de","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_en-de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"iwslt17:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"iwslt17:en-ja","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_en-ja","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"iwslt17:en-ko","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_en-ko","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"iwslt17:en-zh","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_en-zh","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"iwslt17:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"iwslt17:ja-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_ja-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"iwslt17:ko-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_ko-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"iwslt17:zh-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"iwslt17_zh-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"kanji_ascii","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"kanji_ascii","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"kannada","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"kannada","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"key_value_maps","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"key_value_maps","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"known_unknowns","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"known_unknowns","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lambada:standard","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"lambada","hf_subset":"plain_text","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lambada:standard_cloze","suite":["lighteval","lambada"],"prompt_function":"lambada_cloze","hf_repo":"lambada","hf_subset":"plain_text","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lambada:openai","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lambada:openai:de","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"de","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lambada:openai:en","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lambada:openai:es","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"es","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lambada:openai:fr","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lambada:openai:it","suite":["lighteval","lambada"],"prompt_function":"lambada","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"it","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lambada:openai_cloze","suite":["lighteval","lambada"],"prompt_function":"lambada_cloze","hf_repo":"EleutherAI\/lambada_openai","hf_subset":"en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["target_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"language_games","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"language_games","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"language_identification","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"language_identification","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"legal_summarization:billsum","suite":["helm"],"prompt_function":"legal_summarization","hf_repo":"lighteval\/legal_summarization","hf_subset":"BillSum","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1024,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"legal_summarization:eurlexsum","suite":["helm"],"prompt_function":"legal_summarization","hf_repo":"lighteval\/legal_summarization","hf_subset":"EurLexSum","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"legal_summarization:multilexsum","suite":["helm"],"prompt_function":"multilexsum","hf_repo":"lighteval\/legal_summarization","hf_subset":"MultiLexSum","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":256,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"legalsupport","suite":["helm"],"prompt_function":"legal_support","hf_repo":"lighteval\/LegalSupport","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["loglikelihood_acc","exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lexglue:case_hold","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_case_hold","hf_repo":"lighteval\/lexglue","hf_subset":"case_hold","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lexglue:ecthr_a","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_ecthr_a","hf_repo":"lighteval\/lexglue","hf_subset":"ecthr_a","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lexglue:ecthr_b","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_ecthr_b","hf_repo":"lighteval\/lexglue","hf_subset":"ecthr_b","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lexglue:eurlex","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_eurlex","hf_repo":"lighteval\/lexglue","hf_subset":"eurlex","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lexglue:ledgar","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_ledgar","hf_repo":"lighteval\/lexglue","hf_subset":"ledgar","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lexglue:scotus","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_scotus","hf_repo":"lighteval\/lexglue","hf_subset":"scotus","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lexglue:unfair_tos","suite":["helm","lex_glue_scenario"],"prompt_function":"lex_glue_unfair_tos","hf_repo":"lighteval\/lexglue","hf_subset":"unfair_tos","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:brazilian_court_decisions_judgment","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_brazilian_court_decisions_judgment","hf_repo":"lighteval\/lextreme","hf_subset":"brazilian_court_decisions_judgment","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:brazilian_court_decisions_unanimity","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_brazilian_court_decisions_unanimity","hf_repo":"lighteval\/lextreme","hf_subset":"brazilian_court_decisions_unanimity","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:covid19_emergency_event","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_covid19_emergency_event","hf_repo":"lighteval\/lextreme","hf_subset":"covid19_emergency_event","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:german_argument_mining","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_german_argument_mining","hf_repo":"lighteval\/lextreme","hf_subset":"german_argument_mining","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:greek_legal_code_chapter","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_greek_legal_code_chapter","hf_repo":"lighteval\/lextreme","hf_subset":"greek_legal_code_chapter","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:greek_legal_code_subject","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_greek_legal_code_subject","hf_repo":"lighteval\/lextreme","hf_subset":"greek_legal_code_subject","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:greek_legal_code_volume","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_greek_legal_code_volume","hf_repo":"lighteval\/lextreme","hf_subset":"greek_legal_code_volume","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match","f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:greek_legal_ner","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_greek_legal_ner","hf_repo":"lighteval\/lextreme","hf_subset":"greek_legal_ner","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":430,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:legalnero","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_legalnero","hf_repo":"lighteval\/lextreme","hf_subset":"legalnero","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":788,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:lener_br","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_lener_br","hf_repo":"lighteval\/lextreme","hf_subset":"lener_br","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":338,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:mapa_coarse","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_mapa_coarse","hf_repo":"lighteval\/lextreme","hf_subset":"mapa_coarse","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":274,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:mapa_fine","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_mapa_fine","hf_repo":"lighteval\/lextreme","hf_subset":"mapa_fine","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":274,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:multi_eurlex_level_1","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_multi_eurlex_level_1","hf_repo":"lighteval\/lextreme","hf_subset":"multi_eurlex_level_1","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:multi_eurlex_level_2","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_multi_eurlex_level_2","hf_repo":"lighteval\/lextreme","hf_subset":"multi_eurlex_level_2","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:multi_eurlex_level_3","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_multi_eurlex_level_3","hf_repo":"lighteval\/lextreme","hf_subset":"multi_eurlex_level_3","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:online_terms_of_service_clause_topics","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_online_terms_of_service_clause_topics","hf_repo":"lighteval\/lextreme","hf_subset":"online_terms_of_service_clause_topics","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:online_terms_of_service_unfairness_levels","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_online_terms_of_service_unfairness_levels","hf_repo":"lighteval\/lextreme","hf_subset":"online_terms_of_service_unfairness_levels","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":10,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lextreme:swiss_judgment_prediction","suite":["helm","lextreme_scenario"],"prompt_function":"lextreme_swiss_judgment_prediction","hf_repo":"lighteval\/lextreme","hf_subset":"swiss_judgment_prediction","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","f1_score","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"linguistic_mappings","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"linguistic_mappings","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"linguistics_puzzles","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"linguistics_puzzles","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":null,"output_regex":"[^\\.\\?\\!\\;\\n]+", "trust_dataset": true,"version":0}
{"name":"logic_grid_puzzle","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"logic_grid_puzzle","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"logical_args","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"logical_args","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"logical_deduction","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"logical_deduction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"logical_fallacy_detection","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"logical_fallacy_detection","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"logical_sequence","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"logical_sequence","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"logiqa","suite":["lighteval"],"prompt_function":"logiqa","hf_repo":"lighteval/logiqa_harness","hf_subset":"logiqa","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lsat_qa","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"all","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lsat_qa:assignment","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"assignment","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lsat_qa:grouping","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"grouping","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lsat_qa:miscellaneous","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"miscellaneous","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"lsat_qa:ordering","suite":["helm","lsat_qa_scenario"],"prompt_function":"lsat_qa","hf_repo":"lighteval\/lsat_qa","hf_subset":"ordering","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"math:algebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math","maj_at_4_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":1}
{"name":"math:counting_and_probability","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"counting_and_probability","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math","maj_at_4_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":1}
{"name":"math:geometry","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"geometry","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math","maj_at_4_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":1}
{"name":"math:intermediate_algebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"intermediate_algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math","maj_at_4_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":1}
{"name":"math:number_theory","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"number_theory","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math","maj_at_4_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":1}
{"name":"math:prealgebra","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"prealgebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math","maj_at_4_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":1}
{"name":"math:precalculus","suite":["lighteval","math"],"prompt_function":"math","hf_repo":"lighteval\/MATH","hf_subset":"precalculus","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math","maj_at_4_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":1}
{"name":"math_cot:algebra","suite":["lighteval","math"],"prompt_function":"math_cot","hf_repo":"lighteval\/MATH","hf_subset":"algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math","maj_at_4_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"math_cot:counting_and_probability","suite":["lighteval","math"],"prompt_function":"math_cot","hf_repo":"lighteval\/MATH","hf_subset":"counting_and_probability","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math","maj_at_4_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"math_cot:geometry","suite":["lighteval","math"],"prompt_function":"math_cot","hf_repo":"lighteval\/MATH","hf_subset":"geometry","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math","maj_at_4_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"math_cot:intermediate_algebra","suite":["lighteval","math"],"prompt_function":"math_cot","hf_repo":"lighteval\/MATH","hf_subset":"intermediate_algebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math","maj_at_4_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"math_cot:number_theory","suite":["lighteval","math"],"prompt_function":"math_cot","hf_repo":"lighteval\/MATH","hf_subset":"number_theory","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math","maj_at_4_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"math_cot:prealgebra","suite":["lighteval","math"],"prompt_function":"math_cot","hf_repo":"lighteval\/MATH","hf_subset":"prealgebra","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math","maj_at_4_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"math_cot:precalculus","suite":["lighteval","math"],"prompt_function":"math_cot","hf_repo":"lighteval\/MATH","hf_subset":"precalculus","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":2048,"metric":["quasi_exact_match_math","maj_at_4_math"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mathematical_induction","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"mathematical_induction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mathqa","suite":["lighteval"],"prompt_function":"mathqa","hf_repo":"math_qa","hf_subset":"default","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"matrixshapes","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"matrixshapes","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"me_q_sum","suite":["helm"],"prompt_function":"me_q_sum","hf_repo":"lighteval\/me_q_sum","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":128,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"med_dialog:healthcaremagic","suite":["helm"],"prompt_function":"med_dialog","hf_repo":"lighteval\/med_dialog","hf_subset":"healthcaremagic","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":128,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"med_dialog:icliniq","suite":["helm"],"prompt_function":"med_dialog","hf_repo":"lighteval\/med_dialog","hf_subset":"icliniq","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":128,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"med_mcqa","suite":["helm"],"prompt_function":"med_mcqa","hf_repo":"lighteval\/med_mcqa","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc","exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"med_paragraph_simplification","suite":["helm"],"prompt_function":"med_paragraph_simplification","hf_repo":"lighteval\/med_paragraph_simplification","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":512,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"med_qa","suite":["helm"],"prompt_function":"med_qa","hf_repo":"bigbio\/med_qa","hf_subset":"med_qa_en_source","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc","exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"metaphor_boolean","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"metaphor_boolean","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"metaphor_understanding","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"metaphor_understanding","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mgsm:en","suite":["lighteval"],"prompt_function":"mgsm_en","hf_repo":"juletxara/mgsm","hf_subset":"en","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "Question:"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mgsm:es","suite":["lighteval"],"prompt_function":"mgsm_es","hf_repo":"juletxara/mgsm","hf_subset":"es","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "Pregunta:"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mgsm:fr","suite":["lighteval"],"prompt_function":"mgsm_fr","hf_repo":"juletxara/mgsm","hf_subset":"fr","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "Question:"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mgsm:de","suite":["lighteval"],"prompt_function":"mgsm_de","hf_repo":"juletxara/mgsm","hf_subset":"de","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "Frage:"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mgsm:ru","suite":["lighteval"],"prompt_function":"mgsm_ru","hf_repo":"juletxara/mgsm","hf_subset":"ru","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u0417\u0430\u0434\u0430\u0447\u0430:"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mgsm:zh","suite":["lighteval"],"prompt_function":"mgsm_zh","hf_repo":"juletxara/mgsm","hf_subset":"zh","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u95ee\u9898:"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mgsm:ja","suite":["lighteval"],"prompt_function":"mgsm_ja","hf_repo":"juletxara/mgsm","hf_subset":"ja","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u554f\u984c:"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mgsm:th","suite":["lighteval"],"prompt_function":"mgsm_th","hf_repo":"juletxara/mgsm","hf_subset":"th","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u0e42\u0e08\u0e17\u0e22\u0e4c:"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mgsm:sw","suite":["lighteval"],"prompt_function":"mgsm_sw","hf_repo":"juletxara/mgsm","hf_subset":"sw","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "Swali:"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mgsm:bn","suite":["lighteval"],"prompt_function":"mgsm_bn","hf_repo":"juletxara/mgsm","hf_subset":"bn","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u09aa\u09cd\u09b0\u09b6\u09cd\u09a8:"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mgsm:te","suite":["lighteval"],"prompt_function":"mgsm_te","hf_repo":"juletxara/mgsm","hf_subset":"te","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":null,"metric":["exact_match", "quasi_exact_match"],"stop_sequence":["\n", ":", "\u0c2a\u0c4d\u0c30\u0c36\u0c4d\u0c28:"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"minute_mysteries_qa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"minute_mysteries_qa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","rouge_t5"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"misconceptions","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"misconceptions","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"misconceptions_russian","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"misconceptions_russian","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"all","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu","suite":["original"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"all","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":5,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:abstract_algebra","suite":["original","mmlu"],"prompt_function":"mmlu_abstract_algebra","hf_repo":"cais\/mmlu","hf_subset":"abstract_algebra","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:abstract_algebra","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"abstract_algebra","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:abstract_algebra","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"abstract_algebra","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:anatomy","suite":["original","mmlu"],"prompt_function":"mmlu_anatomy","hf_repo":"cais\/mmlu","hf_subset":"anatomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:anatomy","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"anatomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:anatomy","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"anatomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:astronomy","suite":["original","mmlu"],"prompt_function":"mmlu_astronomy","hf_repo":"cais\/mmlu","hf_subset":"astronomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:astronomy","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"astronomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:astronomy","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"astronomy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:business_ethics","suite":["original","mmlu"],"prompt_function":"mmlu_business_ethics","hf_repo":"cais\/mmlu","hf_subset":"business_ethics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:business_ethics","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"business_ethics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:business_ethics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"business_ethics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:clinical_knowledge","suite":["original","mmlu"],"prompt_function":"mmlu_clinical_knowledge","hf_repo":"cais\/mmlu","hf_subset":"clinical_knowledge","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:clinical_knowledge","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"clinical_knowledge","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:clinical_knowledge","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"clinical_knowledge","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_biology","suite":["original","mmlu"],"prompt_function":"mmlu_college_biology","hf_repo":"cais\/mmlu","hf_subset":"college_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_biology","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_biology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_chemistry","suite":["original","mmlu"],"prompt_function":"mmlu_college_chemistry","hf_repo":"cais\/mmlu","hf_subset":"college_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_chemistry","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_chemistry","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_computer_science","suite":["original","mmlu"],"prompt_function":"mmlu_college_computer_science","hf_repo":"cais\/mmlu","hf_subset":"college_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_computer_science","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_computer_science","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_mathematics","suite":["original","mmlu"],"prompt_function":"mmlu_college_mathematics","hf_repo":"cais\/mmlu","hf_subset":"college_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_mathematics","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_mathematics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_medicine","suite":["original","mmlu"],"prompt_function":"mmlu_college_medicine","hf_repo":"cais\/mmlu","hf_subset":"college_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_medicine","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_medicine","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_physics","suite":["original","mmlu"],"prompt_function":"mmlu_college_physics","hf_repo":"cais\/mmlu","hf_subset":"college_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_physics","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"college_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:college_physics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"college_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:computer_security","suite":["original","mmlu"],"prompt_function":"mmlu_computer_security","hf_repo":"cais\/mmlu","hf_subset":"computer_security","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:computer_security","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"computer_security","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:computer_security","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"computer_security","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:conceptual_physics","suite":["original","mmlu"],"prompt_function":"mmlu_conceptual_physics","hf_repo":"cais\/mmlu","hf_subset":"conceptual_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:conceptual_physics","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"conceptual_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:conceptual_physics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"conceptual_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:econometrics","suite":["original","mmlu"],"prompt_function":"mmlu_econometrics","hf_repo":"cais\/mmlu","hf_subset":"econometrics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:econometrics","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"econometrics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:econometrics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"econometrics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:electrical_engineering","suite":["original","mmlu"],"prompt_function":"mmlu_electrical_engineering","hf_repo":"cais\/mmlu","hf_subset":"electrical_engineering","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:electrical_engineering","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"electrical_engineering","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:electrical_engineering","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"electrical_engineering","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:elementary_mathematics","suite":["original","mmlu"],"prompt_function":"mmlu_elementary_mathematics","hf_repo":"cais\/mmlu","hf_subset":"elementary_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:elementary_mathematics","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"elementary_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:elementary_mathematics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"elementary_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:formal_logic","suite":["original","mmlu"],"prompt_function":"mmlu_formal_logic","hf_repo":"cais\/mmlu","hf_subset":"formal_logic","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:formal_logic","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"formal_logic","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:formal_logic","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"formal_logic","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:global_facts","suite":["original","mmlu"],"prompt_function":"mmlu_global_facts","hf_repo":"cais\/mmlu","hf_subset":"global_facts","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:global_facts","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"global_facts","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:global_facts","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"global_facts","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_biology","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_biology","hf_repo":"cais\/mmlu","hf_subset":"high_school_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_biology","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_biology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_biology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_chemistry","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_chemistry","hf_repo":"cais\/mmlu","hf_subset":"high_school_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_chemistry","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_chemistry","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_chemistry","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_computer_science","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_computer_science","hf_repo":"cais\/mmlu","hf_subset":"high_school_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_computer_science","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_computer_science","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_computer_science","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_european_history","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_european_history","hf_repo":"cais\/mmlu","hf_subset":"high_school_european_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_european_history","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_european_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_european_history","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_european_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_geography","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_geography","hf_repo":"cais\/mmlu","hf_subset":"high_school_geography","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_geography","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_geography","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_geography","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_geography","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_government_and_politics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_government_and_politics","hf_repo":"cais\/mmlu","hf_subset":"high_school_government_and_politics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_government_and_politics","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_government_and_politics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_government_and_politics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_government_and_politics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_macroeconomics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_macroeconomics","hf_repo":"cais\/mmlu","hf_subset":"high_school_macroeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_macroeconomics","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_macroeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_macroeconomics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_macroeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_mathematics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_mathematics","hf_repo":"cais\/mmlu","hf_subset":"high_school_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_mathematics","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_mathematics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_mathematics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_microeconomics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_microeconomics","hf_repo":"cais\/mmlu","hf_subset":"high_school_microeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_microeconomics","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_microeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_microeconomics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_microeconomics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_physics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_physics","hf_repo":"cais\/mmlu","hf_subset":"high_school_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_physics","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_physics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_physics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_psychology","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_psychology","hf_repo":"cais\/mmlu","hf_subset":"high_school_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_psychology","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_psychology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_statistics","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_statistics","hf_repo":"cais\/mmlu","hf_subset":"high_school_statistics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_statistics","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_statistics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_statistics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_statistics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_us_history","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_us_history","hf_repo":"cais\/mmlu","hf_subset":"high_school_us_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_us_history","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_us_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_us_history","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_us_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_world_history","suite":["original","mmlu"],"prompt_function":"mmlu_high_school_world_history","hf_repo":"cais\/mmlu","hf_subset":"high_school_world_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_world_history","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_world_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:high_school_world_history","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"high_school_world_history","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:human_aging","suite":["original","mmlu"],"prompt_function":"mmlu_human_aging","hf_repo":"cais\/mmlu","hf_subset":"human_aging","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:human_aging","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"human_aging","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:human_aging","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"human_aging","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:human_sexuality","suite":["original","mmlu"],"prompt_function":"mmlu_human_sexuality","hf_repo":"cais\/mmlu","hf_subset":"human_sexuality","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:human_sexuality","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"human_sexuality","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:human_sexuality","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"human_sexuality","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:international_law","suite":["original","mmlu"],"prompt_function":"mmlu_international_law","hf_repo":"cais\/mmlu","hf_subset":"international_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:international_law","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"international_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:international_law","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"international_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:jurisprudence","suite":["original","mmlu"],"prompt_function":"mmlu_jurisprudence","hf_repo":"cais\/mmlu","hf_subset":"jurisprudence","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:jurisprudence","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"jurisprudence","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:jurisprudence","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"jurisprudence","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:logical_fallacies","suite":["original","mmlu"],"prompt_function":"mmlu_logical_fallacies","hf_repo":"cais\/mmlu","hf_subset":"logical_fallacies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:logical_fallacies","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"logical_fallacies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:logical_fallacies","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"logical_fallacies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:machine_learning","suite":["original","mmlu"],"prompt_function":"mmlu_machine_learning","hf_repo":"cais\/mmlu","hf_subset":"machine_learning","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:machine_learning","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"machine_learning","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:machine_learning","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"machine_learning","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:management","suite":["original","mmlu"],"prompt_function":"mmlu_management","hf_repo":"cais\/mmlu","hf_subset":"management","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:management","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"management","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:management","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"management","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:marketing","suite":["original","mmlu"],"prompt_function":"mmlu_marketing","hf_repo":"cais\/mmlu","hf_subset":"marketing","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:marketing","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"marketing","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:marketing","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"marketing","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:medical_genetics","suite":["original","mmlu"],"prompt_function":"mmlu_medical_genetics","hf_repo":"cais\/mmlu","hf_subset":"medical_genetics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:medical_genetics","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"medical_genetics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:medical_genetics","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"medical_genetics","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:miscellaneous","suite":["original","mmlu"],"prompt_function":"mmlu_miscellaneous","hf_repo":"cais\/mmlu","hf_subset":"miscellaneous","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:miscellaneous","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"miscellaneous","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:miscellaneous","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"miscellaneous","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:moral_disputes","suite":["original","mmlu"],"prompt_function":"mmlu_moral_disputes","hf_repo":"cais\/mmlu","hf_subset":"moral_disputes","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:moral_disputes","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"moral_disputes","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:moral_disputes","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"moral_disputes","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:moral_scenarios","suite":["original","mmlu"],"prompt_function":"mmlu_moral_scenarios","hf_repo":"cais\/mmlu","hf_subset":"moral_scenarios","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:moral_scenarios","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"moral_scenarios","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:moral_scenarios","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"moral_scenarios","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:nutrition","suite":["original","mmlu"],"prompt_function":"mmlu_nutrition","hf_repo":"cais\/mmlu","hf_subset":"nutrition","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:nutrition","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"nutrition","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:nutrition","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"nutrition","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:philosophy","suite":["original","mmlu"],"prompt_function":"mmlu_philosophy","hf_repo":"cais\/mmlu","hf_subset":"philosophy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:philosophy","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"philosophy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:philosophy","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"philosophy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:prehistory","suite":["original","mmlu"],"prompt_function":"mmlu_prehistory","hf_repo":"cais\/mmlu","hf_subset":"prehistory","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:prehistory","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"prehistory","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:prehistory","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"prehistory","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:professional_accounting","suite":["original","mmlu"],"prompt_function":"mmlu_professional_accounting","hf_repo":"cais\/mmlu","hf_subset":"professional_accounting","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:professional_accounting","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"professional_accounting","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:professional_accounting","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"professional_accounting","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:professional_law","suite":["original","mmlu"],"prompt_function":"mmlu_professional_law","hf_repo":"cais\/mmlu","hf_subset":"professional_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:professional_law","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"professional_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:professional_law","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"professional_law","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:professional_medicine","suite":["original","mmlu"],"prompt_function":"mmlu_professional_medicine","hf_repo":"cais\/mmlu","hf_subset":"professional_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:professional_medicine","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"professional_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:professional_medicine","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"professional_medicine","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:professional_psychology","suite":["original","mmlu"],"prompt_function":"mmlu_professional_psychology","hf_repo":"cais\/mmlu","hf_subset":"professional_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:professional_psychology","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"professional_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:professional_psychology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"professional_psychology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:public_relations","suite":["original","mmlu"],"prompt_function":"mmlu_public_relations","hf_repo":"cais\/mmlu","hf_subset":"public_relations","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:public_relations","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"public_relations","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:public_relations","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"public_relations","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:security_studies","suite":["original","mmlu"],"prompt_function":"mmlu_security_studies","hf_repo":"cais\/mmlu","hf_subset":"security_studies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:security_studies","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"security_studies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:security_studies","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"security_studies","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:sociology","suite":["original","mmlu"],"prompt_function":"mmlu_sociology","hf_repo":"cais\/mmlu","hf_subset":"sociology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:sociology","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"sociology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:sociology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"sociology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:us_foreign_policy","suite":["original","mmlu"],"prompt_function":"mmlu_us_foreign_policy","hf_repo":"cais\/mmlu","hf_subset":"us_foreign_policy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:us_foreign_policy","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"us_foreign_policy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:us_foreign_policy","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"us_foreign_policy","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:virology","suite":["original","mmlu"],"prompt_function":"mmlu_virology","hf_repo":"cais\/mmlu","hf_subset":"virology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:virology","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"virology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:virology","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"virology","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:world_religions","suite":["original","mmlu"],"prompt_function":"mmlu_world_religions","hf_repo":"cais\/mmlu","hf_subset":"world_religions","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:world_religions","suite":["leaderboard","mmlu"],"prompt_function":"mmlu_harness","hf_repo":"lighteval\/mmlu","hf_subset":"world_religions","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":"sequential","generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mmlu:world_religions","suite":["helm","helm_general"],"prompt_function":"mmlu_helm","hf_repo":"lighteval\/mmlu","hf_subset":"world_religions","hf_avail_splits":["auxiliary_train","test","validation","dev"],"evaluation_splits":["test"],"few_shots_split":"dev","few_shots_select":null,"generation_size":5,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mnist_ascii","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"mnist_ascii","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"modified_arithmetic","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"modified_arithmetic","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"moral_permissibility","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"moral_permissibility","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"movie_dialog_same_or_different","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"movie_dialog_same_or_different","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"movie_recommendation","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"movie_recommendation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mtnt2019:en-fr","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"mtnt2019_en-fr","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":200,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mtnt2019:en-ja","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"mtnt2019_en-ja","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":200,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mtnt2019:fr-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"mtnt2019_fr-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":200,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mtnt2019:ja-en","suite":["lighteval","sacrebleu"],"prompt_function":"wmt_reverse_alphabetical","hf_repo":"lighteval\/sacrebleu_manual","hf_subset":"mtnt2019_ja-en","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":200,"metric":["bleu","chrf","ter"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mult_data_wrangling","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"mult_data_wrangling","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"multiemo","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"multiemo","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mutual","suite":["lighteval"],"prompt_function":"mutual","hf_repo":"lighteval\/mutual_harness","hf_subset":"mutual","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["recall_at_1","recall_at_2","mrr"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"mutual_plus","suite":["lighteval"],"prompt_function":"mutual","hf_repo":"lighteval\/mutual_harness","hf_subset":"mutual_plus","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["recall_at_1","recall_at_2","mrr"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"narrativeqa","suite":["helm","helm_general"],"prompt_function":"narrativeqa","hf_repo":"lighteval/narrative_qa_helm","hf_subset":"default","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match","quasi_exact_match","f1_score","rougeL","bleu_1","bleu_4"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"natural_instructions","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"natural_instructions","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"navigate","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"navigate","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"nonsense_words_grammar","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"nonsense_words_grammar","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"novel_concepts","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"novel_concepts","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"numeracy:linear_example","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"linear_example","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"numeracy:linear_standard","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"linear_standard","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"numeracy:parabola_example","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"parabola_example","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"numeracy:parabola_standard","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"parabola_standard","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"numeracy:paraboloid_example","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"paraboloid_example","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"numeracy:paraboloid_standard","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"paraboloid_standard","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"numeracy:plane_example","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"plane_example","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"numeracy:plane_standard","suite":["helm"],"prompt_function":"numeracy","hf_repo":"lighteval\/numeracy","hf_subset":"plane_standard","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"object_counting","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"object_counting","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"odd_one_out","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"odd_one_out","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"openbookqa","suite":["helm","commonsense_scenario","helm_general"],"prompt_function":"openbookqa_helm","hf_repo":"openbookqa","hf_subset":"main","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"openbookqa","suite":["lighteval"],"prompt_function":"openbookqa","hf_repo":"openbookqa","hf_subset":"main","hf_avail_splits":["train","test","validation"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"operators","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"operators","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":"([-+]?\\d+)[.]{0,1}$", "trust_dataset": true,"version":0}
{"name":"paragraph_segmentation","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"paragraph_segmentation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"parsinlu_qa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"parsinlu_qa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"parsinlu_reading_comprehension","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"parsinlu_reading_comprehension","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["perfect_exact_match"],"stop_sequence":null,"output_regex":"[^\\.\\?\\!\\;\\n]+", "trust_dataset": true,"version":0}
{"name":"penguins_in_a_table","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"penguins_in_a_table","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"periodic_elements","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"periodic_elements","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"persian_idioms","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"persian_idioms","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"phrase_relatedness","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"phrase_relatedness","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"physical_intuition","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"physical_intuition","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"physics","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"physics","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"physics_questions","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"physics_questions","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"piqa","suite":["lighteval"],"prompt_function":"piqa_harness","hf_repo":"piqa","hf_subset":"plain_text","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"piqa","suite":["helm","commonsense_scenario"],"prompt_function":"piqa_helm","hf_repo":"piqa","hf_subset":"plain_text","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"play_dialog_same_or_different","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"play_dialog_same_or_different","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"polish_sequence_labeling","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"polish_sequence_labeling","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"presuppositions_as_nli","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"presuppositions_as_nli","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"prost","suite":["lighteval"],"prompt_function":"prost","hf_repo":"corypaik\/prost","hf_subset":"default","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"pubmedqa","suite":["lighteval"],"prompt_function":"pubmed_qa","hf_repo":"pubmed_qa","hf_subset":"pqa_labeled","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"pubmedqa","suite":["helm"],"prompt_function":"pubmed_qa_helm","hf_repo":"pubmed_qa","hf_subset":"pqa_labeled","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"qa4mre:2011","suite":["lighteval"],"prompt_function":"qa4mre","hf_repo":"qa4mre","hf_subset":"2011.main.EN","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"qa4mre:2012","suite":["lighteval"],"prompt_function":"qa4mre","hf_repo":"qa4mre","hf_subset":"2012.main.EN","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"qa4mre:2013","suite":["lighteval"],"prompt_function":"qa4mre","hf_repo":"qa4mre","hf_subset":"2013.main.EN","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"qa_wikidata","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"qa_wikidata","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleurt","bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"qasper","suite":["lighteval"],"prompt_function":"qasper","hf_repo":"qasper","hf_subset":"qasper","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["f1_score_quasi"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"qasper_ll","suite":["lighteval"],"prompt_function":"qasper_ll","hf_repo":"qasper","hf_subset":"qasper","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"quac","suite":["helm"],"prompt_function":"quac","hf_repo":"lighteval/quac_helm","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["exact_match", "quasi_exact_match", "f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"question_selection","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"question_selection","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"race:high","suite":["lighteval","race"],"prompt_function":"race","hf_repo":"EleutherAI/race","hf_subset":"high","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"raft:ade_corpus_v2","suite":["helm","helm_general"],"prompt_function":"raft_ade_corpus_v2","hf_repo":"ought\/raft","hf_subset":"ade_corpus_v2","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"raft:banking_77","suite":["helm","helm_general"],"prompt_function":"raft_banking_77","hf_repo":"ought\/raft","hf_subset":"banking_77","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"raft:neurips_impact_statement_risks","suite":["helm","helm_general"],"prompt_function":"raft_neurips_impact_statement_risks","hf_repo":"ought\/raft","hf_subset":"neurips_impact_statement_risks","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"raft:one_stop_english","suite":["helm","helm_general"],"prompt_function":"raft_one_stop_english","hf_repo":"ought\/raft","hf_subset":"one_stop_english","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"raft:overruling","suite":["helm","helm_general"],"prompt_function":"raft_overruling","hf_repo":"ought\/raft","hf_subset":"overruling","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"raft:semiconductor_org_types","suite":["helm","helm_general"],"prompt_function":"raft_semiconductor_org_types","hf_repo":"ought\/raft","hf_subset":"semiconductor_org_types","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"raft:systematic_review_inclusion","suite":["helm","helm_general"],"prompt_function":"raft_systematic_review_inclusion","hf_repo":"ought\/raft","hf_subset":"systematic_review_inclusion","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"raft:tai_safety_research","suite":["helm","helm_general"],"prompt_function":"raft_tai_safety_research","hf_repo":"ought\/raft","hf_subset":"tai_safety_research","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"raft:terms_of_service","suite":["helm","helm_general"],"prompt_function":"raft_terms_of_service","hf_repo":"ought\/raft","hf_subset":"terms_of_service","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"raft:tweet_eval_hate","suite":["helm","helm_general"],"prompt_function":"raft_tweet_eval_hate","hf_repo":"ought\/raft","hf_subset":"tweet_eval_hate","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"raft:twitter_complaints","suite":["helm","helm_general"],"prompt_function":"raft_twitter_complaints","hf_repo":"ought\/raft","hf_subset":"twitter_complaints","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":30,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match","f1_score_macro","f1_score_micro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"real_or_fake_text","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"real_or_fake_text","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"real_toxicity_prompts","suite":["helm"],"prompt_function":"real_toxicity_prompts","hf_repo":"allenai\/real-toxicity-prompts","hf_subset":"default","hf_avail_splits":["train"],"evaluation_splits":["train"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["prediction_perplexity"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"reasoning_about_colored_objects","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"reasoning_about_colored_objects","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"repeat_copy_logic","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"repeat_copy_logic","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":100,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"rephrase","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"rephrase","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["rouge_t5","bleu","loglikelihood_acc","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"rhyming","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"rhyming","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"riddle_sense","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"riddle_sense","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"ruin_names","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"ruin_names","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"salient_translation_error_detection","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"salient_translation_error_detection","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"scientific_press_release","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"scientific_press_release","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"sciq","suite":["lighteval"],"prompt_function":"sciq","hf_repo":"sciq","hf_subset":"default","hf_avail_splits":["train","validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"semantic_parsing_in_context_sparc","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"semantic_parsing_in_context_sparc","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"semantic_parsing_spider","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"semantic_parsing_spider","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"sentence_ambiguity","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"sentence_ambiguity","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"similarities_abstraction","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"similarities_abstraction","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"simp_turing_concept","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simp_turing_concept","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"simple_arithmetic_json","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_arithmetic_json","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"simple_arithmetic_json_multiple_choice","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_arithmetic_json_multiple_choice","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"simple_arithmetic_json_subtasks","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_arithmetic_json_subtasks","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"simple_arithmetic_multiple_targets_json","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_arithmetic_multiple_targets_json","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"simple_ethical_questions","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_ethical_questions","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"simple_text_editing","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"simple_text_editing","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"siqa","suite":["helm","commonsense_scenario"],"prompt_function":"siqa","hf_repo":"social_i_qa","hf_subset":"default","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"snarks","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"snarks","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"social_iqa","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"social_iqa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"social_support","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"social_support","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["f1_score_macro"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"sports_understanding","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"sports_understanding","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"storycloze:2016","suite":["lighteval","storycloze"],"prompt_function":"storycloze","hf_repo":"story_cloze","hf_subset":"2016","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"storycloze:2018","suite":["lighteval","storycloze"],"prompt_function":"storycloze","hf_repo":"story_cloze","hf_subset":"2018","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"strange_stories","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"strange_stories","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"strategyqa","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"strategyqa","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"sufficient_information","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"sufficient_information","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"suicide_risk","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"suicide_risk","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"summarization:cnn-dm","suite":["helm","helm_general"],"prompt_function":"cnn_dm","hf_repo":"lighteval\/summarization","hf_subset":"cnn-dm","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":128,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"summarization:xsum","suite":["helm","helm_general"],"prompt_function":"xsum","hf_repo":"lighteval\/summarization","hf_subset":"xsum","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":64,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"summarization:xsum-sampled","suite":["helm"],"prompt_function":"xsum","hf_repo":"lighteval\/summarization","hf_subset":"xsum-sampled","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":64,"metric":["rouge1","rouge2","rougeL","faithfulness","extractiveness","bert_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"super_glue:boolq","suite":["lighteval","superglue"],"prompt_function":"boolq_harness","hf_repo":"super_glue","hf_subset":"boolq","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"super_glue:cb","suite":["lighteval","superglue"],"prompt_function":"cb","hf_repo":"super_glue","hf_subset":"cb","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc_single_token", "multi_f1_numeric"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"super_glue:copa","suite":["lighteval","superglue"],"prompt_function":"copa","hf_repo":"super_glue","hf_subset":"copa","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"super_glue:rte","suite":["lighteval","superglue"],"prompt_function":"rte","hf_repo":"super_glue","hf_subset":"rte","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"super_glue:multirc","suite":["lighteval","superglue"],"prompt_function":"multirc","hf_repo":"super_glue","hf_subset":"multirc","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"super_glue:wic","suite":["lighteval","superglue"],"prompt_function":"wic","hf_repo":"super_glue","hf_subset":"wic","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"super_glue:wsc","suite":["lighteval","superglue"],"prompt_function":"wsc","hf_repo":"super_glue","hf_subset":"wsc","hf_avail_splits":["test","train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc_single_token"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"swahili_english_proverbs","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"swahili_english_proverbs","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"swag","suite":["lighteval"],"prompt_function":"swag","hf_repo":"swag","hf_subset":"regular","hf_avail_splits":["train","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm_nospace"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"swedish_to_german_proverbs","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"swedish_to_german_proverbs","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"symbol_interpretation","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_linefeed_before_whitespace_after_query","hf_repo":"bigbench","hf_subset":"symbol_interpretation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"synthetic_reasoning:induction","suite":["helm"],"prompt_function":"synthetic_reasoning","hf_repo":"lighteval\/synthetic_reasoning","hf_subset":"induction","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":50,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"synthetic_reasoning:natural_easy","suite":["helm"],"prompt_function":"synthetic_reasoning_natural","hf_repo":"lighteval\/synthetic_reasoning_natural","hf_subset":"easy","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"synthetic_reasoning:natural_hard","suite":["helm"],"prompt_function":"synthetic_reasoning_natural","hf_repo":"lighteval\/synthetic_reasoning_natural","hf_subset":"hard","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["exact_match","f1_score"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"synthetic_reasoning:pattern_match","suite":["helm"],"prompt_function":"synthetic_reasoning","hf_repo":"lighteval\/synthetic_reasoning","hf_subset":"pattern_match","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":50,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"synthetic_reasoning:variable_substitution","suite":["helm"],"prompt_function":"synthetic_reasoning","hf_repo":"lighteval\/synthetic_reasoning","hf_subset":"variable_substitution","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation","test"],"few_shots_split":null,"few_shots_select":null,"generation_size":50,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"tellmewhy","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"tellmewhy","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"temporal_sequences","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"temporal_sequences","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"tense","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"tense","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:arxiv","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_arxiv","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:arxiv","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"arxiv","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:bibliotik","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"bibliotik","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:bookcorpus2","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_bookcorpus2","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:books3","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_books3","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:commoncrawl","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"commoncrawl","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:dm-mathematics","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_dm-mathematics","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:dm-mathematics","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"dm-mathematics","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:enron","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_enron","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:enron","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"enron","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:europarl","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_europarl","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:europarl","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"europarl","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:freelaw","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_freelaw","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:freelaw","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"freelaw","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:github","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_github","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:github","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"github","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:gutenberg","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_gutenberg","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:gutenberg","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"gutenberg","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:hackernews","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_hackernews","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:hackernews","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"hackernews","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:nih-exporter","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_nih-exporter","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:nih-exporter","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"nih-exporter","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:opensubtitles","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_opensubtitles","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:opensubtitles","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"opensubtitles","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:openwebtext2","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_openwebtext2","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:openwebtext2","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"openwebtext2","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:philpapers","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_philpapers","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:pile-cc","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_pile-cc","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:pubmed-abstracts","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_pubmed-abstracts","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:pubmed-abstracts","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"pubmed-abstracts","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:pubmed-central","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_pubmed-central","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:pubmed-central","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"pubmed-central","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:stackexchange","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_stackexchange","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:stackexchange","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"stackexchange","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:ubuntu-irc","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_ubuntu-irc","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:uspto","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_upsto","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:upsto","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"uspto","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:wikipedia","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_wikipedia","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:wikipedia","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"wikipedia","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:youtubesubtitles","suite":["lighteval","pile"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile","hf_subset":"pile_youtubesubtitles","hf_avail_splits":["validation","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"the_pile:youtubesubtitles","suite":["helm"],"prompt_function":"the_pile","hf_repo":"lighteval\/pile_helm","hf_subset":"youtubesubtitles","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"timedial","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"timedial","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"toxigen","suite":["lighteval"],"prompt_function":"toxigen","hf_repo":"skg/toxigen-data","hf_subset":"annotated","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["loglikelihood_acc","loglikelihood_acc_norm"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"topical_chat","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"topical_chat","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["bleu","rouge_t5","loglikelihood_acc","bleurt"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"tracking_shuffled_objects","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"tracking_shuffled_objects","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"triviaqa","suite":["lighteval"],"prompt_function":"triviaqa","hf_repo":"trivia_qa","hf_subset":"rc.nocontext","hf_avail_splits":["train","test","validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":20,"metric":["quasi_exact_match_triviaqa"],"stop_sequence":["\n", ".", ","],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"truthfulqa:gen","suite":["lighteval"],"prompt_function":"truthful_qa_generative","hf_repo":"truthful_qa","hf_subset":"generation","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":200,"metric":["bleu","rouge_t5"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"truthfulqa:mc","suite":["leaderboard"],"prompt_function":"truthful_qa_multiple_choice","hf_repo":"truthful_qa","hf_subset":"multiple_choice","hf_avail_splits":["validation"],"evaluation_splits":["validation"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["truthfulqa_mc_metrics"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"truthfulqa","suite":["helm","helm_general"],"prompt_function":"truthful_qa_helm","hf_repo":"lighteval\/truthfulqa_helm","hf_subset":"default","hf_avail_splits":["train","valid"],"evaluation_splits":["valid"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["loglikelihood_acc","exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"twitterAAE:aa","suite":["helm"],"prompt_function":"twitter_aae","hf_repo":"lighteval\/twitterAAE","hf_subset":"aa","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"twitterAAE:white","suite":["helm"],"prompt_function":"twitter_aae","hf_repo":"lighteval\/twitterAAE","hf_subset":"white","hf_avail_splits":["test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["word_perplexity","byte_perplexity","bits_per_byte"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"understanding_fables","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"understanding_fables","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"undo_permutation","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"undo_permutation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"unit_conversion","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"unit_conversion","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"unit_interpretation","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"unit_interpretation","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"unnatural_in_context_learning","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"unnatural_in_context_learning","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"unscramble:anagrams1","suite":["lighteval","unscramble"],"prompt_function":"unscramble","hf_repo":"lighteval\/GPT3_unscramble","hf_subset":"default","hf_avail_splits":["mid_word_1_anagrams"],"evaluation_splits":["mid_word_1_anagrams"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"unscramble:anagrams2","suite":["lighteval","unscramble"],"prompt_function":"unscramble","hf_repo":"lighteval\/GPT3_unscramble","hf_subset":"default","hf_avail_splits":["mid_word_2_anagrams"],"evaluation_splits":["mid_word_2_anagrams"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"unscramble:cycle_letters","suite":["lighteval","unscramble"],"prompt_function":"unscramble","hf_repo":"lighteval\/GPT3_unscramble","hf_subset":"default","hf_avail_splits":["cycle_letters_in_word"],"evaluation_splits":["cycle_letters_in_word"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"unscramble:random_insertion","suite":["lighteval","unscramble"],"prompt_function":"unscramble","hf_repo":"lighteval\/GPT3_unscramble","hf_subset":"default","hf_avail_splits":["random_insertion_in_word"],"evaluation_splits":["random_insertion_in_word"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"unscramble:reversed_words","suite":["lighteval","unscramble"],"prompt_function":"unscramble","hf_repo":"lighteval\/GPT3_unscramble","hf_subset":"default","hf_avail_splits":["reversed_words"],"evaluation_splits":["reversed_words"],"few_shots_split":null,"few_shots_select":null,"generation_size":5,"metric":["perfect_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"vitaminc_fact_verification","suite":["bigbench_lite","bigbench","bigbench_json"],"prompt_function":"bigbench_whitespace_after_query","hf_repo":"bigbench","hf_subset":"vitaminc_fact_verification","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"webqs","suite":["lighteval"],"prompt_function":"webqs","hf_repo":"web_questions","hf_subset":"default","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":-1,"metric":["acc_golds_likelihood"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"what_is_the_tao","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"what_is_the_tao","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"which_wiki_edit","suite":["bigbench","bigbench_json"],"prompt_function":"bigbench","hf_repo":"bigbench","hf_subset":"which_wiki_edit","hf_avail_splits":["default","train","validation"],"evaluation_splits":["default"],"few_shots_split":null,"few_shots_select":null,"generation_size":1,"metric":["loglikelihood_acc"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:applies_to_jurisdiction","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"applies_to_jurisdiction","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:atomic_number","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"atomic_number","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:author","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"author","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:award_received","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"award_received","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:basic_form_of_government","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"basic_form_of_government","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:capital","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"capital","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:capital_of","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"capital_of","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:central_bank","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"central_bank","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:composer","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"composer","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:continent","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"continent","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:country","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"country","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:country_of_citizenship","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"country_of_citizenship","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:country_of_origin","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"country_of_origin","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:creator","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"creator","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:currency","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"currency","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:defendant","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"defendant","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:developer","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"developer","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:diplomatic_relation","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"diplomatic_relation","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:director","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"director","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:discoverer_or_inventor","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"discoverer_or_inventor","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:drug_or_therapy_used_for_treatment","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"drug_or_therapy_used_for_treatment","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:educated_at","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"educated_at","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:electron_configuration","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"electron_configuration","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:employer","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"employer","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:field_of_work","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"field_of_work","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:file_extension","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"file_extension","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:genetic_association","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"genetic_association","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:genre","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"genre","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:has_part","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"has_part","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:head_of_government","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"head_of_government","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:head_of_state","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"head_of_state","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:headquarters_location","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"headquarters_location","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:industry","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"industry","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:influenced_by","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"influenced_by","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:instance_of","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"instance_of","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:instrument","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"instrument","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:language_of_work_or_name","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"language_of_work_or_name","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:languages_spoken_written_or_signed","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"languages_spoken_written_or_signed","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:laws_applied","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"laws_applied","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:located_in_the_administrative_territorial_entity","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"located_in_the_administrative_territorial_entity","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:location","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"location","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:location_of_discovery","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"location_of_discovery","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:location_of_formation","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"location_of_formation","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:majority_opinion_by","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"majority_opinion_by","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:manufacturer","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"manufacturer","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:measured_physical_quantity","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"measured_physical_quantity","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:medical_condition_treated","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"medical_condition_treated","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:member_of","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"member_of","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:member_of_political_party","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"member_of_political_party","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:member_of_sports_team","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"member_of_sports_team","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:movement","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"movement","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:named_after","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"named_after","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:native_language","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"native_language","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:number_of_processor_cores","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"number_of_processor_cores","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:occupation","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"occupation","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:office_held_by_head_of_government","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"office_held_by_head_of_government","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}
{"name":"wikifact:office_held_by_head_of_state","suite":["helm"],"prompt_function":"wikifact","hf_repo":"lighteval\/wikifact","hf_subset":"office_held_by_head_of_state","hf_avail_splits":["train","test"],"evaluation_splits":["test"],"few_shots_split":null,"few_shots_select":null,"generation_size":8,"metric":["exact_match","quasi_exact_match","prefix_exact_match","prefix_quasi_exact_match"],"stop_sequence":["\n"],"output_regex":null,"frozen":false, "trust_dataset": true,"version":0}