| task |
dataset |
metrics |
| type |
name |
| text-generation |
Single Choice Question |
|
| type |
name |
| lianghsun/tw-legal-benchmark-v1 |
tw-legal-benchmark-v1 |
|
| name |
type |
value |
| single choice |
accuracy |
31.1 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| text-generation |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| lianghsun/Formosa-bench |
(Society) Formosa Taiwan Knowledge Bench |
society |
test |
v2024.11.27 |
|
| name |
type |
value |
| single choice |
accuracy |
60.42 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| text-generation |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| lianghsun/Formosa-bench |
(Governmnt) Formosa Taiwan Knowledge Bench |
governmnt |
test |
v2024.11.27 |
|
| name |
type |
value |
| single choice |
accuracy |
44.25 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| text-generation |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| lianghsun/Formosa-bench |
(Geography) Formosa Taiwan Knowledge Bench |
geography |
test |
v2024.11.27 |
|
| name |
type |
value |
| single choice |
accuracy |
47.54 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| text-generation |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| lianghsun/Formosa-bench |
(History) Formosa Taiwan Knowledge Bench |
history |
test |
v2024.11.27 |
|
| name |
type |
value |
| single choice |
accuracy |
60 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(geography_of_taiwan) tmmlu++ |
geography_of_taiwan |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
36.2 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(dentistry) tmmlu++ |
dentistry |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
33.83 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(technical) tmmlu++ |
technical |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
35.07 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(statistics_and_machine_learning) tmmlu++ |
statistics_and_machine_learning |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
28.57 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(clinical_psychology) tmmlu++ |
clinical_psychology |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
29.6 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(tve_design) tmmlu++ |
tve_design |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
38.54 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(three_principles_of_people) tmmlu++ |
three_principles_of_people |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
48.2 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(introduction_to_law) tmmlu++ |
introduction_to_law |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
29.96 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(linear_algebra) tmmlu++ |
linear_algebra |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
21.43 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(agriculture) tmmlu++ |
agriculture |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
24.5 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(jce_humanities) tmmlu++ |
jce_humanities |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
38.89 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(music) tmmlu++ |
music |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
25.9 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(secondary_physics) tmmlu++ |
secondary_physics |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
33.04 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(physics) tmmlu++ |
physics |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
27.84 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(advance_chemistry) tmmlu++ |
advance_chemistry |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
27.64 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(junior_science_exam) tmmlu++ |
junior_science_exam |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
30.05 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(veterinary_pathology) tmmlu++ |
veterinary_pathology |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
25.09 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(financial_analysis) tmmlu++ |
financial_analysis |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
25.13 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(national_protection) tmmlu++ |
national_protection |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
42.65 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(macroeconomics) tmmlu++ |
macroeconomics |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
26.76 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(politic_science) tmmlu++ |
politic_science |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
27.44 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(ttqav2) tmmlu++ |
ttqav2 |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
61.06 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(junior_chinese_exam) tmmlu++ |
junior_chinese_exam |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
30.86 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(traditional_chinese_medicine_clinical_medicine) tmmlu++ |
traditional_chinese_medicine_clinical_medicine |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
25.9 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(junior_math_exam) tmmlu++ |
junior_math_exam |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
21.71 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(auditing) tmmlu++ |
auditing |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
21.82 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(anti_money_laundering) tmmlu++ |
anti_money_laundering |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
37.31 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(pharmacology) tmmlu++ |
pharmacology |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
30.68 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(trust_practice) tmmlu++ |
trust_practice |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
28.18 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(tve_mathematics) tmmlu++ |
tve_mathematics |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
18.67 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(human_behavior) tmmlu++ |
human_behavior |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
32.04 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(pharmacy) tmmlu++ |
pharmacy |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
22.76 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(tve_chinese_language) tmmlu++ |
tve_chinese_language |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
36.65 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(optometry) tmmlu++ |
optometry |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
25.11 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(physical_education) tmmlu++ |
physical_education |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
30.73 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(organic_chemistry) tmmlu++ |
organic_chemistry |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
35.78 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(tve_natural_sciences) tmmlu++ |
tve_natural_sciences |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
33.73 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(education) tmmlu++ |
education |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
37.9 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(mechanical) tmmlu++ |
mechanical |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
42.37 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(taiwanese_hokkien) tmmlu++ |
taiwanese_hokkien |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
14.73 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(nautical_science) tmmlu++ |
nautical_science |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
30.49 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(business_management) tmmlu++ |
business_management |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
39.57 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(logic_reasoning) tmmlu++ |
logic_reasoning |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
27.34 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(marketing_management) tmmlu++ |
marketing_management |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
39.78 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(economics) tmmlu++ |
economics |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
25.95 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(basic_medical_science) tmmlu++ |
basic_medical_science |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
28.41 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(occupational_therapy_for_psychological_disorders) tmmlu++ |
occupational_therapy_for_psychological_disorders |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
35.73 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(general_principles_of_law) tmmlu++ |
general_principles_of_law |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
31.13 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(junior_chemistry) tmmlu++ |
junior_chemistry |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
24.88 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(veterinary_pharmacology) tmmlu++ |
veterinary_pharmacology |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
36.3 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(educational_psychology) tmmlu++ |
educational_psychology |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
33.52 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(finance_banking) tmmlu++ |
finance_banking |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
32.59 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(official_document_management) tmmlu++ |
official_document_management |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
32.43 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(fire_science) tmmlu++ |
fire_science |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
30.65 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(junior_social_studies) tmmlu++ |
junior_social_studies |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
47.62 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(accounting) tmmlu++ |
accounting |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
20.94 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(engineering_math) tmmlu++ |
engineering_math |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
27.18 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(education_(profession_level)) tmmlu++ |
education_(profession_level) |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
24.07 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(chinese_language_and_literature) tmmlu++ |
chinese_language_and_literature |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
27.64 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(management_accounting) tmmlu++ |
management_accounting |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
24.19 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(culinary_skills) tmmlu++ |
culinary_skills |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
39.38 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(administrative_law) tmmlu++ |
administrative_law |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
25.71 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(insurance_studies) tmmlu++ |
insurance_studies |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
33.42 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(real_estate) tmmlu++ |
real_estate |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
22.83 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(computer_science) tmmlu++ |
computer_science |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
31.61 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(taxation) tmmlu++ |
taxation |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
27.47 |
|
|
|
| task |
dataset |
metrics |
| type |
name |
| question-answering |
Single Choice Question |
|
| type |
name |
config |
split |
revision |
| ikala/tmmluplus |
(trade) tmmlu++ |
trade |
test |
c0e8ae955997300d5dbf0e382bf0ba5115f85e8c |
|
| name |
type |
value |
| single choice |
accuracy |
20.32 |
|
|
|