Skip to content

Commit

Permalink
add ESTJ
Browse files Browse the repository at this point in the history
  • Loading branch information
WangRongsheng committed Jan 28, 2024
1 parent 5b9c821 commit 0d35c19
Showing 1 changed file with 91 additions and 74 deletions.
165 changes: 91 additions & 74 deletions results/ESTJ.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
dataset,version,metric,mode,opencompass.models.huggingface.HuggingFace_FarReelAILab_Machine_Mindset_en_ESTJ
GaokaoBench_2010-2022_Math_II_MCQs,-,-,-,-
GaokaoBench_2010-2022_Math_I_MCQs,-,-,-,-
GaokaoBench_2010-2022_History_MCQs,-,-,-,-
GaokaoBench_2010-2022_Biology_MCQs,-,-,-,-
GaokaoBench_2010-2022_Political_Science_MCQs,-,-,-,-
GaokaoBench_2010-2022_Math_II_MCQs,5b0b29,score,gen,0.46
GaokaoBench_2010-2022_Math_I_MCQs,5b0b29,score,gen,0
GaokaoBench_2010-2022_History_MCQs,3613b5,score,gen,0.35
GaokaoBench_2010-2022_Biology_MCQs,d26e80,score,gen,0.67
GaokaoBench_2010-2022_Political_Science_MCQs,70fce9,score,gen,0
GaokaoBench_2010-2022_Physics_MCQs,8a0c30,score,gen,0
GaokaoBench_2010-2022_Chemistry_MCQs,852bbd,score,gen,0
GaokaoBench_2010-2013_English_MCQs,01b50f,score,gen,4.76
Expand Down Expand Up @@ -31,22 +31,22 @@ GaokaoBench_2010-2022_Chinese_Language_Practical_Text_Reading,4dea5a,score,gen,0
GaokaoBench_2010-2022_Chinese_Language_Literary_Text_Reading,979d8b,score,gen,0
GaokaoBench_2010-2022_Chinese_Language_Classical_Chinese_Reading,9de717,score,gen,0
GaokaoBench_2010-2022_Chinese_Language_Language_and_Writing_Skills_Open-ended_Questions,d2ed84,score,gen,0
agieval-gaokao-chinese,-,-,-,-
agieval-gaokao-english,-,-,-,-
agieval-gaokao-chinese,774562,accuracy,gen,1.22
agieval-gaokao-english,cb5bc6,accuracy,gen,6.21
agieval-gaokao-geography,2ca56f,accuracy,gen,3.52
agieval-gaokao-history,-,-,-,-
agieval-gaokao-biology,-,-,-,-
agieval-gaokao-chemistry,-,-,-,-
agieval-gaokao-history,9c3ae0,accuracy,gen,2.98
agieval-gaokao-biology,277c85,accuracy,gen,3.33
agieval-gaokao-chemistry,d62fd4,accuracy,gen,1.93
agieval-gaokao-mathqa,e2ea74,accuracy,gen,3.13
agieval-logiqa-zh,03474d,accuracy,gen,5.53
agieval-lsat-ar,-,-,-,-
agieval-lsat-ar,ed1edf,accuracy,gen,0
agieval-lsat-lr,ec6882,accuracy,gen,0.2
agieval-lsat-rc,-,-,-,-
agieval-lsat-rc,33077d,accuracy,gen,0.74
agieval-logiqa-en,c6ee60,accuracy,gen,0.31
agieval-sat-math,-,-,-,-
agieval-sat-math,6c970d,accuracy,gen,2.73
agieval-sat-en,4e3fef,accuracy,gen,10.68
agieval-sat-en-without-passage,4e3fef,accuracy,gen,0.49
agieval-aqua-rat,-,-,-,-
agieval-aqua-rat,c090ca,accuracy,gen,0
agieval-gaokao-physics,7.70E+06,accuracy,gen,3.5
agieval-jec-qa-kd,b1e586,accuracy,gen,2.5
agieval-jec-qa-ca,47bc29,accuracy,gen,1.9
Expand Down Expand Up @@ -104,135 +104,152 @@ ceval-fire_engineer,bc23f5,accuracy,gen,22.58
ceval-environmental_impact_assessment_engineer,c64e2d,accuracy,gen,22.58
ceval-tax_accountant,3a5e3c,accuracy,gen,30.61
ceval-physician,6e277d,accuracy,gen,28.57
cmmlu-agronomy,-,-,-,-
cmmlu-anatomy,-,-,-,-
cmmlu-agronomy,4c7f2c,accuracy,gen,21.89
cmmlu-anatomy,ea09bf,accuracy,gen,25.68
cmmlu-ancient_chinese,f7c97f,accuracy,gen,26.22
cmmlu-arts,dd77b8,accuracy,gen,25
cmmlu-astronomy,-,-,-,-
cmmlu-business_ethics,-,-,-,-
cmmlu-astronomy,1e49db,accuracy,gen,15.76
cmmlu-business_ethics,dc78cb,accuracy,gen,21.05
cmmlu-chinese_civil_service_exam,1de82c,accuracy,gen,13.75
cmmlu-chinese_driving_rule,b8a42b,accuracy,gen,16.79
cmmlu-chinese_food_culture,-,-,-,-
cmmlu-chinese_food_culture,2d568a,accuracy,gen,25
cmmlu-chinese_foreign_policy,dc2427,accuracy,gen,18.69
cmmlu-chinese_history,-,-,-,-
cmmlu-chinese_history,4cc7ed,accuracy,gen,6.81
cmmlu-chinese_literature,af3c41,accuracy,gen,23.04
cmmlu-chinese_teacher_qualification,-,-,-,-
cmmlu-clinical_knowledge,-,-,-,-
cmmlu-chinese_teacher_qualification,87de11,accuracy,gen,17.88
cmmlu-clinical_knowledge,c55b1d,accuracy,gen,10.55
cmmlu-college_actuarial_science,d3c360,accuracy,gen,0
cmmlu-college_education,df8790,accuracy,gen,30.84
cmmlu-college_engineering_hydrology,673f23,accuracy,gen,21.7
cmmlu-college_law,524c3a,accuracy,gen,11.11
cmmlu-college_mathematics,e4ebad,accuracy,gen,8.57
cmmlu-college_medical_statistics,55af35,accuracy,gen,16.98
cmmlu-college_medicine,-,-,-,-
cmmlu-college_medicine,702f48,accuracy,gen,23.08
cmmlu-computer_science,637007,accuracy,gen,18.63
cmmlu-computer_security,-,-,-,-
cmmlu-conceptual_physics,-,-,-,-
cmmlu-construction_project_management,-,-,-,-
cmmlu-computer_security,932b6b,accuracy,gen,19.3
cmmlu-conceptual_physics,cfc077,accuracy,gen,2.72
cmmlu-construction_project_management,968a4a,accuracy,gen,19.42
cmmlu-economics,ddaf7c,accuracy,gen,23.9
cmmlu-education,c35963,accuracy,gen,22.09
cmmlu-electrical_engineering,-,-,-,-
cmmlu-elementary_chinese,-,-,-,-
cmmlu-elementary_commonsense,-,-,-,-
cmmlu-elementary_information_and_technology,-,-,-,-
cmmlu-elementary_mathematics,-,-,-,-
cmmlu-ethnology,-,-,-,-
cmmlu-food_science,-,-,-,-
cmmlu-genetics,-,-,-,-
cmmlu-global_facts,-,-,-,-
cmmlu-high_school_biology,-,-,-,-
cmmlu-electrical_engineering,70e98a,accuracy,gen,20.93
cmmlu-elementary_chinese,cbcd6a,accuracy,gen,28.17
cmmlu-elementary_commonsense,a67f37,accuracy,gen,22.73
cmmlu-elementary_information_and_technology,d34d2a,accuracy,gen,19.75
cmmlu-elementary_mathematics,a9d403,accuracy,gen,24.78
cmmlu-ethnology,31955f,accuracy,gen,9.63
cmmlu-food_science,741d8e,accuracy,gen,24.48
cmmlu-genetics,c326f7,accuracy,gen,21.02
cmmlu-global_facts,0a1236,accuracy,gen,22.15
cmmlu-high_school_biology,2be811,accuracy,gen,8.28
cmmlu-high_school_chemistry,d63c05,accuracy,gen,18.18
cmmlu-high_school_geography,5cd489,accuracy,gen,22.03
cmmlu-high_school_mathematics,6b2087,accuracy,gen,17.68
cmmlu-high_school_physics,3df353,accuracy,gen,2.73
cmmlu-high_school_politics,-,-,-,-
cmmlu-high_school_politics,7a88d8,accuracy,gen,7.69
cmmlu-human_sexuality,54ac98,accuracy,gen,20.63
cmmlu-international_law,-,-,-,-
cmmlu-journalism,-,-,-,-
cmmlu-international_law,0f5d40,accuracy,gen,22.16
cmmlu-journalism,a4f6a0,accuracy,gen,19.77
cmmlu-jurisprudence,7843da,accuracy,gen,22.38
cmmlu-legal_and_moral_basis,-,-,-,-
cmmlu-legal_and_moral_basis,f906b0,accuracy,gen,20.56
cmmlu-logical,15a71b,accuracy,gen,22.76
cmmlu-machine_learning,bc6ad4,accuracy,gen,6.56
cmmlu-management,-,-,-,-
cmmlu-marketing,-,-,-,-
cmmlu-marxist_theory,-,-,-,-
cmmlu-management,e5e8db,accuracy,gen,23.33
cmmlu-marketing,8b4c18,accuracy,gen,14.44
cmmlu-marxist_theory,75eb79,accuracy,gen,23.28
cmmlu-modern_chinese,83a9b7,accuracy,gen,23.28
cmmlu-nutrition,-,-,-,-
cmmlu-nutrition,adfff7,accuracy,gen,20.69
cmmlu-philosophy,75e22d,accuracy,gen,23.81
cmmlu-professional_accounting,-,-,-,-
cmmlu-professional_accounting,0edc91,accuracy,gen,23.43
cmmlu-professional_law,d24af5,accuracy,gen,17.54
cmmlu-professional_medicine,134139,accuracy,gen,18.88
cmmlu-professional_psychology,-,-,-,-
cmmlu-public_relations,-,-,-,-
cmmlu-professional_psychology,ec920e,accuracy,gen,24.57
cmmlu-public_relations,70ee06,accuracy,gen,22.99
cmmlu-security_study,45f96f,accuracy,gen,23.7
cmmlu-sociology,-,-,-,-
cmmlu-sociology,485285,accuracy,gen,25.22
cmmlu-sports_science,838cfe,accuracy,gen,21.82
cmmlu-traditional_chinese_medicine,-,-,-,-
cmmlu-virology,-,-,-,-
cmmlu-traditional_chinese_medicine,3bbf64,accuracy,gen,15.14
cmmlu-virology,8925bf,accuracy,gen,25.44
cmmlu-world_history,57c97c,accuracy,gen,0.62
cmmlu-world_religions,1d0f4b,accuracy,gen,21.25
gsm8k,1d7fe4,accuracy,gen,2.81
math,-,-,-,-
lukaemon_mmlu_college_biology,-,-,-,-
math,265cce,accuracy,gen,0.12
lukaemon_mmlu_college_biology,8c2e29,accuracy,gen,0
lukaemon_mmlu_college_chemistry,0afccd,accuracy,gen,5
lukaemon_mmlu_college_computer_science,c1c1b4,accuracy,gen,0
lukaemon_mmlu_college_mathematics,9deed0,accuracy,gen,2
lukaemon_mmlu_college_physics,f5cf5e,accuracy,gen,0
lukaemon_mmlu_electrical_engineering,-,-,-,-
lukaemon_mmlu_electrical_engineering,3d694d,accuracy,gen,0.69
lukaemon_mmlu_astronomy,7ef16f,accuracy,gen,0
lukaemon_mmlu_anatomy,2d597d,accuracy,gen,0
lukaemon_mmlu_abstract_algebra,ec092c,accuracy,gen,1
lukaemon_mmlu_machine_learning,d489ae,accuracy,gen,0.89
lukaemon_mmlu_clinical_knowledge,-,-,-,-
lukaemon_mmlu_clinical_knowledge,af10df,accuracy,gen,1.51
lukaemon_mmlu_global_facts,cad9e0,accuracy,gen,1
lukaemon_mmlu_management,65f310,accuracy,gen,0
lukaemon_mmlu_nutrition,-,-,-,-
lukaemon_mmlu_marketing,-,-,-,-
lukaemon_mmlu_professional_accounting,-,-,-,-
lukaemon_mmlu_high_school_geography,-,-,-,-
lukaemon_mmlu_nutrition,80bf96,accuracy,gen,0.33
lukaemon_mmlu_marketing,9a98c0,accuracy,gen,0.43
lukaemon_mmlu_professional_accounting,9cc7e2,accuracy,gen,0
lukaemon_mmlu_high_school_geography,c28a4c,accuracy,gen,0.51
lukaemon_mmlu_international_law,408d4e,accuracy,gen,0
lukaemon_mmlu_moral_scenarios,9f30a6,accuracy,gen,0
lukaemon_mmlu_computer_security,2753c1,accuracy,gen,0
lukaemon_mmlu_high_school_microeconomics,-,-,-,-
lukaemon_mmlu_professional_law,-,-,-,-
lukaemon_mmlu_high_school_microeconomics,af9eae,accuracy,gen,0
lukaemon_mmlu_professional_law,7c7a62,accuracy,gen,7.3
lukaemon_mmlu_medical_genetics,b1a3a7,accuracy,gen,0
lukaemon_mmlu_professional_psychology,c6b790,accuracy,gen,0.65
lukaemon_mmlu_jurisprudence,f41074,accuracy,gen,0
lukaemon_mmlu_world_religions,-,-,-,-
lukaemon_mmlu_philosophy,-,-,-,-
lukaemon_mmlu_virology,-,-,-,-
lukaemon_mmlu_world_religions,d44a95,accuracy,gen,2.92
lukaemon_mmlu_philosophy,d36ef3,accuracy,gen,1.29
lukaemon_mmlu_virology,0a5f8e,accuracy,gen,0
lukaemon_mmlu_high_school_chemistry,5b2ef9,accuracy,gen,0.49
lukaemon_mmlu_public_relations,4c7898,accuracy,gen,0.91
lukaemon_mmlu_high_school_macroeconomics,3f841b,accuracy,gen,0
lukaemon_mmlu_human_sexuality,4d1f3e,accuracy,gen,0.76
lukaemon_mmlu_elementary_mathematics,0f5d3a,accuracy,gen,1.06
lukaemon_mmlu_high_school_physics,-,-,-,-
lukaemon_mmlu_high_school_physics,0dd929,accuracy,gen,0.66
lukaemon_mmlu_high_school_computer_science,bf31fd,accuracy,gen,1
lukaemon_mmlu_high_school_european_history,d1b67e,accuracy,gen,3.03
lukaemon_mmlu_business_ethics,af53f3,accuracy,gen,0
lukaemon_mmlu_moral_disputes,48239e,accuracy,gen,0
lukaemon_mmlu_high_school_statistics,-,-,-,-
lukaemon_mmlu_high_school_statistics,47e18e,accuracy,gen,0
lukaemon_mmlu_miscellaneous,573569,accuracy,gen,1.53
lukaemon_mmlu_formal_logic,7a0414,accuracy,gen,0.79
lukaemon_mmlu_high_school_government_and_politics,-,-,-,-
lukaemon_mmlu_high_school_government_and_politics,d907eb,accuracy,gen,0
lukaemon_mmlu_prehistory,65aa94,accuracy,gen,0.62
lukaemon_mmlu_security_studies,-,-,-,-
lukaemon_mmlu_high_school_biology,-,-,-,-
lukaemon_mmlu_security_studies,9ea7d3,accuracy,gen,0
lukaemon_mmlu_high_school_biology,775183,accuracy,gen,0
lukaemon_mmlu_logical_fallacies,19746a,accuracy,gen,0
lukaemon_mmlu_high_school_world_history,-,-,-,-
lukaemon_mmlu_professional_medicine,-,-,-,-
lukaemon_mmlu_high_school_mathematics,-,-,-,-
lukaemon_mmlu_college_medicine,-,-,-,-
lukaemon_mmlu_high_school_world_history,6665dc,accuracy,gen,1.27
lukaemon_mmlu_professional_medicine,a05bab,accuracy,gen,0
lukaemon_mmlu_high_school_mathematics,0e6a7e,accuracy,gen,1.85
lukaemon_mmlu_college_medicine,5215f1,accuracy,gen,0
lukaemon_mmlu_high_school_us_history,b5f235,accuracy,gen,3.92
lukaemon_mmlu_sociology,4980ec,accuracy,gen,0
lukaemon_mmlu_econometrics,4d590b,accuracy,gen,0.88
lukaemon_mmlu_high_school_psychology,4.40E+98,accuracy,gen,0
lukaemon_mmlu_human_aging,-,-,-,-
lukaemon_mmlu_human_aging,d0a8e1,accuracy,gen,1.79
lukaemon_mmlu_us_foreign_policy,adcc88,accuracy,gen,0
lukaemon_mmlu_conceptual_physics,-,-,-,-
lukaemon_mmlu_conceptual_physics,a111d3,accuracy,gen,0.43
agieval-chinese,-,naive_average,gen,3.05
agieval-english,-,naive_average,gen,1.72
agieval-gaokao,-,naive_average,gen,2.96
agieval,-,naive_average,gen,2.48
mmlu-humanities,-,naive_average,gen,1.63
mmlu-stem,-,naive_average,gen,0.79
mmlu-social-science,-,naive_average,gen,0.31
mmlu-other,-,naive_average,gen,0.51
mmlu,-,naive_average,gen,0.82
mmlu-weighted,-,weighted_average,gen,1.38
cmmlu-humanities,-,naive_average,gen,18.61
cmmlu-stem,-,naive_average,gen,15.04
cmmlu-social-science,-,naive_average,gen,21.26
cmmlu-other,-,naive_average,gen,19.71
cmmlu-china-specific,-,naive_average,gen,18.28
cmmlu,-,naive_average,gen,18.82
ceval-stem,-,naive_average,gen,27.04
ceval-social-science,-,naive_average,gen,22.68
ceval-humanities,-,naive_average,gen,23.01
ceval-other,-,naive_average,gen,27.42
ceval-hard,-,naive_average,gen,28.92
ceval,-,naive_average,gen,25.43
GaokaoBench,-,weighted_average,gen,2.36

0 comments on commit 0d35c19

Please sign in to comment.