初始化项目,由ModelHub XC社区提供模型
Model: veyra-ai/veyra2-15m-base-1b-tokens Source: Original Platform
This commit is contained in:
481
blimp_eval_results.json
Normal file
481
blimp_eval_results.json
Normal file
@@ -0,0 +1,481 @@
|
||||
{
|
||||
"model_dir": "/content/drive/MyDrive/veyra_runs/veyra2_15m_base_pretrain_1b/checkpoints/final_hf",
|
||||
"dataset": "nyu-mll/blimp",
|
||||
"num_subsets": 67,
|
||||
"total_examples": 67000,
|
||||
"total_correct": 38750,
|
||||
"overall_accuracy": 0.5783582089552238,
|
||||
"elapsed_seconds": 110.03305959701538,
|
||||
"scoring": "mean token log-likelihood, add_special_tokens=False",
|
||||
"results": {
|
||||
"adjunct_island": {
|
||||
"accuracy": 0.706,
|
||||
"correct": 706,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "island_effects"
|
||||
},
|
||||
"anaphor_gender_agreement": {
|
||||
"accuracy": 0.731,
|
||||
"correct": 731,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "anaphor_agreement"
|
||||
},
|
||||
"anaphor_number_agreement": {
|
||||
"accuracy": 0.708,
|
||||
"correct": 708,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "anaphor_agreement"
|
||||
},
|
||||
"animate_subject_passive": {
|
||||
"accuracy": 0.519,
|
||||
"correct": 519,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "s-selection"
|
||||
},
|
||||
"animate_subject_trans": {
|
||||
"accuracy": 0.424,
|
||||
"correct": 424,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "s-selection"
|
||||
},
|
||||
"causative": {
|
||||
"accuracy": 0.626,
|
||||
"correct": 626,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "argument_structure"
|
||||
},
|
||||
"complex_NP_island": {
|
||||
"accuracy": 0.502,
|
||||
"correct": 502,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "island_effects"
|
||||
},
|
||||
"coordinate_structure_constraint_complex_left_branch": {
|
||||
"accuracy": 0.315,
|
||||
"correct": 315,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "island_effects"
|
||||
},
|
||||
"coordinate_structure_constraint_object_extraction": {
|
||||
"accuracy": 0.635,
|
||||
"correct": 635,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "island_effects"
|
||||
},
|
||||
"determiner_noun_agreement_1": {
|
||||
"accuracy": 0.737,
|
||||
"correct": 737,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "determiner_noun_agreement"
|
||||
},
|
||||
"determiner_noun_agreement_2": {
|
||||
"accuracy": 0.812,
|
||||
"correct": 812,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "determiner_noun_agreement"
|
||||
},
|
||||
"determiner_noun_agreement_irregular_1": {
|
||||
"accuracy": 0.646,
|
||||
"correct": 646,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "determiner_noun_agreement"
|
||||
},
|
||||
"determiner_noun_agreement_irregular_2": {
|
||||
"accuracy": 0.735,
|
||||
"correct": 735,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "determiner_noun_agreement"
|
||||
},
|
||||
"determiner_noun_agreement_with_adj_2": {
|
||||
"accuracy": 0.779,
|
||||
"correct": 779,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "determiner_noun_agreement"
|
||||
},
|
||||
"determiner_noun_agreement_with_adj_irregular_1": {
|
||||
"accuracy": 0.637,
|
||||
"correct": 637,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "determiner_noun_agreement"
|
||||
},
|
||||
"determiner_noun_agreement_with_adj_irregular_2": {
|
||||
"accuracy": 0.687,
|
||||
"correct": 687,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "determiner_noun_agreement"
|
||||
},
|
||||
"determiner_noun_agreement_with_adjective_1": {
|
||||
"accuracy": 0.704,
|
||||
"correct": 704,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "determiner_noun_agreement"
|
||||
},
|
||||
"distractor_agreement_relational_noun": {
|
||||
"accuracy": 0.291,
|
||||
"correct": 291,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "subject_verb_agreement"
|
||||
},
|
||||
"distractor_agreement_relative_clause": {
|
||||
"accuracy": 0.345,
|
||||
"correct": 345,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "subject_verb_agreement"
|
||||
},
|
||||
"drop_argument": {
|
||||
"accuracy": 0.44,
|
||||
"correct": 440,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "argument_structure"
|
||||
},
|
||||
"ellipsis_n_bar_1": {
|
||||
"accuracy": 0.609,
|
||||
"correct": 609,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "ellipsis"
|
||||
},
|
||||
"ellipsis_n_bar_2": {
|
||||
"accuracy": 0.867,
|
||||
"correct": 867,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "ellipsis"
|
||||
},
|
||||
"existential_there_object_raising": {
|
||||
"accuracy": 0.633,
|
||||
"correct": 633,
|
||||
"total": 1000,
|
||||
"field": "syntax_semantics",
|
||||
"linguistics_term": "control_raising"
|
||||
},
|
||||
"existential_there_quantifiers_1": {
|
||||
"accuracy": 0.882,
|
||||
"correct": 882,
|
||||
"total": 1000,
|
||||
"field": "semantics",
|
||||
"linguistics_term": "quantifiers"
|
||||
},
|
||||
"existential_there_quantifiers_2": {
|
||||
"accuracy": 0.119,
|
||||
"correct": 119,
|
||||
"total": 1000,
|
||||
"field": "semantics",
|
||||
"linguistics_term": "quantifiers"
|
||||
},
|
||||
"existential_there_subject_raising": {
|
||||
"accuracy": 0.599,
|
||||
"correct": 599,
|
||||
"total": 1000,
|
||||
"field": "syntax_semantics",
|
||||
"linguistics_term": "control_raising"
|
||||
},
|
||||
"expletive_it_object_raising": {
|
||||
"accuracy": 0.583,
|
||||
"correct": 583,
|
||||
"total": 1000,
|
||||
"field": "syntax_semantics",
|
||||
"linguistics_term": "control_raising"
|
||||
},
|
||||
"inchoative": {
|
||||
"accuracy": 0.567,
|
||||
"correct": 567,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "argument_structure"
|
||||
},
|
||||
"intransitive": {
|
||||
"accuracy": 0.49,
|
||||
"correct": 490,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "argument_structure"
|
||||
},
|
||||
"irregular_past_participle_adjectives": {
|
||||
"accuracy": 0.56,
|
||||
"correct": 560,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "irregular_forms"
|
||||
},
|
||||
"irregular_past_participle_verbs": {
|
||||
"accuracy": 0.81,
|
||||
"correct": 810,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "irregular_forms"
|
||||
},
|
||||
"irregular_plural_subject_verb_agreement_1": {
|
||||
"accuracy": 0.69,
|
||||
"correct": 690,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "subject_verb_agreement"
|
||||
},
|
||||
"irregular_plural_subject_verb_agreement_2": {
|
||||
"accuracy": 0.596,
|
||||
"correct": 596,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "subject_verb_agreement"
|
||||
},
|
||||
"left_branch_island_echo_question": {
|
||||
"accuracy": 0.278,
|
||||
"correct": 278,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "island_effects"
|
||||
},
|
||||
"left_branch_island_simple_question": {
|
||||
"accuracy": 0.377,
|
||||
"correct": 377,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "island_effects"
|
||||
},
|
||||
"matrix_question_npi_licensor_present": {
|
||||
"accuracy": 0.227,
|
||||
"correct": 227,
|
||||
"total": 1000,
|
||||
"field": "semantics",
|
||||
"linguistics_term": "npi_licensing"
|
||||
},
|
||||
"npi_present_1": {
|
||||
"accuracy": 0.4,
|
||||
"correct": 400,
|
||||
"total": 1000,
|
||||
"field": "semantics",
|
||||
"linguistics_term": "npi_licensing"
|
||||
},
|
||||
"npi_present_2": {
|
||||
"accuracy": 0.432,
|
||||
"correct": 432,
|
||||
"total": 1000,
|
||||
"field": "semantics",
|
||||
"linguistics_term": "npi_licensing"
|
||||
},
|
||||
"only_npi_licensor_present": {
|
||||
"accuracy": 0.315,
|
||||
"correct": 315,
|
||||
"total": 1000,
|
||||
"field": "semantics",
|
||||
"linguistics_term": "npi_licensing"
|
||||
},
|
||||
"only_npi_scope": {
|
||||
"accuracy": 0.553,
|
||||
"correct": 553,
|
||||
"total": 1000,
|
||||
"field": "syntax_semantics",
|
||||
"linguistics_term": "npi_licensing"
|
||||
},
|
||||
"passive_1": {
|
||||
"accuracy": 0.657,
|
||||
"correct": 657,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "argument_structure"
|
||||
},
|
||||
"passive_2": {
|
||||
"accuracy": 0.645,
|
||||
"correct": 645,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "argument_structure"
|
||||
},
|
||||
"principle_A_c_command": {
|
||||
"accuracy": 0.374,
|
||||
"correct": 374,
|
||||
"total": 1000,
|
||||
"field": "syntax_semantics",
|
||||
"linguistics_term": "binding"
|
||||
},
|
||||
"principle_A_case_1": {
|
||||
"accuracy": 0.859,
|
||||
"correct": 859,
|
||||
"total": 1000,
|
||||
"field": "syntax_semantics",
|
||||
"linguistics_term": "binding"
|
||||
},
|
||||
"principle_A_case_2": {
|
||||
"accuracy": 0.818,
|
||||
"correct": 818,
|
||||
"total": 1000,
|
||||
"field": "syntax/semantics",
|
||||
"linguistics_term": "binding"
|
||||
},
|
||||
"principle_A_domain_1": {
|
||||
"accuracy": 0.513,
|
||||
"correct": 513,
|
||||
"total": 1000,
|
||||
"field": "syntax_semantics",
|
||||
"linguistics_term": "binding"
|
||||
},
|
||||
"principle_A_domain_2": {
|
||||
"accuracy": 0.483,
|
||||
"correct": 483,
|
||||
"total": 1000,
|
||||
"field": "syntax_semantics",
|
||||
"linguistics_term": "binding"
|
||||
},
|
||||
"principle_A_domain_3": {
|
||||
"accuracy": 0.514,
|
||||
"correct": 514,
|
||||
"total": 1000,
|
||||
"field": "syntax_semantics",
|
||||
"linguistics_term": "binding"
|
||||
},
|
||||
"principle_A_reconstruction": {
|
||||
"accuracy": 0.373,
|
||||
"correct": 373,
|
||||
"total": 1000,
|
||||
"field": "syntax_semantics",
|
||||
"linguistics_term": "binding"
|
||||
},
|
||||
"regular_plural_subject_verb_agreement_1": {
|
||||
"accuracy": 0.676,
|
||||
"correct": 676,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "subject_verb_agreement"
|
||||
},
|
||||
"regular_plural_subject_verb_agreement_2": {
|
||||
"accuracy": 0.699,
|
||||
"correct": 699,
|
||||
"total": 1000,
|
||||
"field": "morphology",
|
||||
"linguistics_term": "subject_verb_agreement"
|
||||
},
|
||||
"sentential_negation_npi_licensor_present": {
|
||||
"accuracy": 0.989,
|
||||
"correct": 989,
|
||||
"total": 1000,
|
||||
"field": "semantics",
|
||||
"linguistics_term": "npi_licensing"
|
||||
},
|
||||
"sentential_negation_npi_scope": {
|
||||
"accuracy": 0.302,
|
||||
"correct": 302,
|
||||
"total": 1000,
|
||||
"field": "syntax_semantics",
|
||||
"linguistics_term": "npi_licensing"
|
||||
},
|
||||
"sentential_subject_island": {
|
||||
"accuracy": 0.363,
|
||||
"correct": 363,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "island_effects"
|
||||
},
|
||||
"superlative_quantifiers_1": {
|
||||
"accuracy": 0.797,
|
||||
"correct": 797,
|
||||
"total": 1000,
|
||||
"field": "semantics",
|
||||
"linguistics_term": "quantifiers"
|
||||
},
|
||||
"superlative_quantifiers_2": {
|
||||
"accuracy": 0.578,
|
||||
"correct": 578,
|
||||
"total": 1000,
|
||||
"field": "semantics",
|
||||
"linguistics_term": "quantifiers"
|
||||
},
|
||||
"tough_vs_raising_1": {
|
||||
"accuracy": 0.694,
|
||||
"correct": 694,
|
||||
"total": 1000,
|
||||
"field": "syntax_semantics",
|
||||
"linguistics_term": "control_raising"
|
||||
},
|
||||
"tough_vs_raising_2": {
|
||||
"accuracy": 0.396,
|
||||
"correct": 396,
|
||||
"total": 1000,
|
||||
"field": "syntax_semantics",
|
||||
"linguistics_term": "control_raising"
|
||||
},
|
||||
"transitive": {
|
||||
"accuracy": 0.61,
|
||||
"correct": 610,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "argument_structure"
|
||||
},
|
||||
"wh_island": {
|
||||
"accuracy": 0.566,
|
||||
"correct": 566,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "island_effects"
|
||||
},
|
||||
"wh_questions_object_gap": {
|
||||
"accuracy": 0.45,
|
||||
"correct": 450,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "filler_gap_dependency"
|
||||
},
|
||||
"wh_questions_subject_gap": {
|
||||
"accuracy": 0.792,
|
||||
"correct": 792,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "filler_gap_dependency"
|
||||
},
|
||||
"wh_questions_subject_gap_long_distance": {
|
||||
"accuracy": 0.904,
|
||||
"correct": 904,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "filler_gap_dependency"
|
||||
},
|
||||
"wh_vs_that_no_gap": {
|
||||
"accuracy": 0.943,
|
||||
"correct": 943,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "filler_gap_dependency"
|
||||
},
|
||||
"wh_vs_that_no_gap_long_distance": {
|
||||
"accuracy": 0.959,
|
||||
"correct": 959,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "filler_gap_dependency"
|
||||
},
|
||||
"wh_vs_that_with_gap": {
|
||||
"accuracy": 0.152,
|
||||
"correct": 152,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "filler_gap_dependency"
|
||||
},
|
||||
"wh_vs_that_with_gap_long_distance": {
|
||||
"accuracy": 0.078,
|
||||
"correct": 78,
|
||||
"total": 1000,
|
||||
"field": "syntax",
|
||||
"linguistics_term": "filler_gap_dependency"
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user