481 lines
12 KiB
JSON
481 lines
12 KiB
JSON
|
|
{
|
||
|
|
"model_dir": "/content/drive/MyDrive/veyra_runs/veyra2_30m_base_pretrain_2b/checkpoints/final_hf",
|
||
|
|
"dataset": "nyu-mll/blimp",
|
||
|
|
"num_subsets": 67,
|
||
|
|
"total_examples": 67000,
|
||
|
|
"total_correct": 42809,
|
||
|
|
"overall_accuracy": 0.6389402985074627,
|
||
|
|
"elapsed_seconds": 120.80895161628723,
|
||
|
|
"scoring": "mean token log-likelihood, add_special_tokens=False",
|
||
|
|
"results": {
|
||
|
|
"adjunct_island": {
|
||
|
|
"accuracy": 0.605,
|
||
|
|
"correct": 605,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "island_effects"
|
||
|
|
},
|
||
|
|
"anaphor_gender_agreement": {
|
||
|
|
"accuracy": 0.78,
|
||
|
|
"correct": 780,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "anaphor_agreement"
|
||
|
|
},
|
||
|
|
"anaphor_number_agreement": {
|
||
|
|
"accuracy": 0.801,
|
||
|
|
"correct": 801,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "anaphor_agreement"
|
||
|
|
},
|
||
|
|
"animate_subject_passive": {
|
||
|
|
"accuracy": 0.572,
|
||
|
|
"correct": 572,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "s-selection"
|
||
|
|
},
|
||
|
|
"animate_subject_trans": {
|
||
|
|
"accuracy": 0.603,
|
||
|
|
"correct": 603,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "s-selection"
|
||
|
|
},
|
||
|
|
"causative": {
|
||
|
|
"accuracy": 0.633,
|
||
|
|
"correct": 633,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "argument_structure"
|
||
|
|
},
|
||
|
|
"complex_NP_island": {
|
||
|
|
"accuracy": 0.557,
|
||
|
|
"correct": 557,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "island_effects"
|
||
|
|
},
|
||
|
|
"coordinate_structure_constraint_complex_left_branch": {
|
||
|
|
"accuracy": 0.37,
|
||
|
|
"correct": 370,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "island_effects"
|
||
|
|
},
|
||
|
|
"coordinate_structure_constraint_object_extraction": {
|
||
|
|
"accuracy": 0.674,
|
||
|
|
"correct": 674,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "island_effects"
|
||
|
|
},
|
||
|
|
"determiner_noun_agreement_1": {
|
||
|
|
"accuracy": 0.797,
|
||
|
|
"correct": 797,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "determiner_noun_agreement"
|
||
|
|
},
|
||
|
|
"determiner_noun_agreement_2": {
|
||
|
|
"accuracy": 0.894,
|
||
|
|
"correct": 894,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "determiner_noun_agreement"
|
||
|
|
},
|
||
|
|
"determiner_noun_agreement_irregular_1": {
|
||
|
|
"accuracy": 0.692,
|
||
|
|
"correct": 692,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "determiner_noun_agreement"
|
||
|
|
},
|
||
|
|
"determiner_noun_agreement_irregular_2": {
|
||
|
|
"accuracy": 0.825,
|
||
|
|
"correct": 825,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "determiner_noun_agreement"
|
||
|
|
},
|
||
|
|
"determiner_noun_agreement_with_adj_2": {
|
||
|
|
"accuracy": 0.875,
|
||
|
|
"correct": 875,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "determiner_noun_agreement"
|
||
|
|
},
|
||
|
|
"determiner_noun_agreement_with_adj_irregular_1": {
|
||
|
|
"accuracy": 0.701,
|
||
|
|
"correct": 701,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "determiner_noun_agreement"
|
||
|
|
},
|
||
|
|
"determiner_noun_agreement_with_adj_irregular_2": {
|
||
|
|
"accuracy": 0.791,
|
||
|
|
"correct": 791,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "determiner_noun_agreement"
|
||
|
|
},
|
||
|
|
"determiner_noun_agreement_with_adjective_1": {
|
||
|
|
"accuracy": 0.801,
|
||
|
|
"correct": 801,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "determiner_noun_agreement"
|
||
|
|
},
|
||
|
|
"distractor_agreement_relational_noun": {
|
||
|
|
"accuracy": 0.499,
|
||
|
|
"correct": 499,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "subject_verb_agreement"
|
||
|
|
},
|
||
|
|
"distractor_agreement_relative_clause": {
|
||
|
|
"accuracy": 0.422,
|
||
|
|
"correct": 422,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "subject_verb_agreement"
|
||
|
|
},
|
||
|
|
"drop_argument": {
|
||
|
|
"accuracy": 0.428,
|
||
|
|
"correct": 428,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "argument_structure"
|
||
|
|
},
|
||
|
|
"ellipsis_n_bar_1": {
|
||
|
|
"accuracy": 0.632,
|
||
|
|
"correct": 632,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "ellipsis"
|
||
|
|
},
|
||
|
|
"ellipsis_n_bar_2": {
|
||
|
|
"accuracy": 0.863,
|
||
|
|
"correct": 863,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "ellipsis"
|
||
|
|
},
|
||
|
|
"existential_there_object_raising": {
|
||
|
|
"accuracy": 0.622,
|
||
|
|
"correct": 622,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax_semantics",
|
||
|
|
"linguistics_term": "control_raising"
|
||
|
|
},
|
||
|
|
"existential_there_quantifiers_1": {
|
||
|
|
"accuracy": 0.891,
|
||
|
|
"correct": 891,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "semantics",
|
||
|
|
"linguistics_term": "quantifiers"
|
||
|
|
},
|
||
|
|
"existential_there_quantifiers_2": {
|
||
|
|
"accuracy": 0.342,
|
||
|
|
"correct": 342,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "semantics",
|
||
|
|
"linguistics_term": "quantifiers"
|
||
|
|
},
|
||
|
|
"existential_there_subject_raising": {
|
||
|
|
"accuracy": 0.57,
|
||
|
|
"correct": 570,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax_semantics",
|
||
|
|
"linguistics_term": "control_raising"
|
||
|
|
},
|
||
|
|
"expletive_it_object_raising": {
|
||
|
|
"accuracy": 0.651,
|
||
|
|
"correct": 651,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax_semantics",
|
||
|
|
"linguistics_term": "control_raising"
|
||
|
|
},
|
||
|
|
"inchoative": {
|
||
|
|
"accuracy": 0.625,
|
||
|
|
"correct": 625,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "argument_structure"
|
||
|
|
},
|
||
|
|
"intransitive": {
|
||
|
|
"accuracy": 0.549,
|
||
|
|
"correct": 549,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "argument_structure"
|
||
|
|
},
|
||
|
|
"irregular_past_participle_adjectives": {
|
||
|
|
"accuracy": 0.644,
|
||
|
|
"correct": 644,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "irregular_forms"
|
||
|
|
},
|
||
|
|
"irregular_past_participle_verbs": {
|
||
|
|
"accuracy": 0.823,
|
||
|
|
"correct": 823,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "irregular_forms"
|
||
|
|
},
|
||
|
|
"irregular_plural_subject_verb_agreement_1": {
|
||
|
|
"accuracy": 0.764,
|
||
|
|
"correct": 764,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "subject_verb_agreement"
|
||
|
|
},
|
||
|
|
"irregular_plural_subject_verb_agreement_2": {
|
||
|
|
"accuracy": 0.762,
|
||
|
|
"correct": 762,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "subject_verb_agreement"
|
||
|
|
},
|
||
|
|
"left_branch_island_echo_question": {
|
||
|
|
"accuracy": 0.233,
|
||
|
|
"correct": 233,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "island_effects"
|
||
|
|
},
|
||
|
|
"left_branch_island_simple_question": {
|
||
|
|
"accuracy": 0.431,
|
||
|
|
"correct": 431,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "island_effects"
|
||
|
|
},
|
||
|
|
"matrix_question_npi_licensor_present": {
|
||
|
|
"accuracy": 0.367,
|
||
|
|
"correct": 367,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "semantics",
|
||
|
|
"linguistics_term": "npi_licensing"
|
||
|
|
},
|
||
|
|
"npi_present_1": {
|
||
|
|
"accuracy": 0.431,
|
||
|
|
"correct": 431,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "semantics",
|
||
|
|
"linguistics_term": "npi_licensing"
|
||
|
|
},
|
||
|
|
"npi_present_2": {
|
||
|
|
"accuracy": 0.445,
|
||
|
|
"correct": 445,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "semantics",
|
||
|
|
"linguistics_term": "npi_licensing"
|
||
|
|
},
|
||
|
|
"only_npi_licensor_present": {
|
||
|
|
"accuracy": 0.833,
|
||
|
|
"correct": 833,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "semantics",
|
||
|
|
"linguistics_term": "npi_licensing"
|
||
|
|
},
|
||
|
|
"only_npi_scope": {
|
||
|
|
"accuracy": 0.772,
|
||
|
|
"correct": 772,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax_semantics",
|
||
|
|
"linguistics_term": "npi_licensing"
|
||
|
|
},
|
||
|
|
"passive_1": {
|
||
|
|
"accuracy": 0.711,
|
||
|
|
"correct": 711,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "argument_structure"
|
||
|
|
},
|
||
|
|
"passive_2": {
|
||
|
|
"accuracy": 0.672,
|
||
|
|
"correct": 672,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "argument_structure"
|
||
|
|
},
|
||
|
|
"principle_A_c_command": {
|
||
|
|
"accuracy": 0.421,
|
||
|
|
"correct": 421,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax_semantics",
|
||
|
|
"linguistics_term": "binding"
|
||
|
|
},
|
||
|
|
"principle_A_case_1": {
|
||
|
|
"accuracy": 0.871,
|
||
|
|
"correct": 871,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax_semantics",
|
||
|
|
"linguistics_term": "binding"
|
||
|
|
},
|
||
|
|
"principle_A_case_2": {
|
||
|
|
"accuracy": 0.847,
|
||
|
|
"correct": 847,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax/semantics",
|
||
|
|
"linguistics_term": "binding"
|
||
|
|
},
|
||
|
|
"principle_A_domain_1": {
|
||
|
|
"accuracy": 0.56,
|
||
|
|
"correct": 560,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax_semantics",
|
||
|
|
"linguistics_term": "binding"
|
||
|
|
},
|
||
|
|
"principle_A_domain_2": {
|
||
|
|
"accuracy": 0.484,
|
||
|
|
"correct": 484,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax_semantics",
|
||
|
|
"linguistics_term": "binding"
|
||
|
|
},
|
||
|
|
"principle_A_domain_3": {
|
||
|
|
"accuracy": 0.505,
|
||
|
|
"correct": 505,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax_semantics",
|
||
|
|
"linguistics_term": "binding"
|
||
|
|
},
|
||
|
|
"principle_A_reconstruction": {
|
||
|
|
"accuracy": 0.526,
|
||
|
|
"correct": 526,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax_semantics",
|
||
|
|
"linguistics_term": "binding"
|
||
|
|
},
|
||
|
|
"regular_plural_subject_verb_agreement_1": {
|
||
|
|
"accuracy": 0.743,
|
||
|
|
"correct": 743,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "subject_verb_agreement"
|
||
|
|
},
|
||
|
|
"regular_plural_subject_verb_agreement_2": {
|
||
|
|
"accuracy": 0.755,
|
||
|
|
"correct": 755,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "morphology",
|
||
|
|
"linguistics_term": "subject_verb_agreement"
|
||
|
|
},
|
||
|
|
"sentential_negation_npi_licensor_present": {
|
||
|
|
"accuracy": 0.899,
|
||
|
|
"correct": 899,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "semantics",
|
||
|
|
"linguistics_term": "npi_licensing"
|
||
|
|
},
|
||
|
|
"sentential_negation_npi_scope": {
|
||
|
|
"accuracy": 0.46,
|
||
|
|
"correct": 460,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax_semantics",
|
||
|
|
"linguistics_term": "npi_licensing"
|
||
|
|
},
|
||
|
|
"sentential_subject_island": {
|
||
|
|
"accuracy": 0.326,
|
||
|
|
"correct": 326,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "island_effects"
|
||
|
|
},
|
||
|
|
"superlative_quantifiers_1": {
|
||
|
|
"accuracy": 0.762,
|
||
|
|
"correct": 762,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "semantics",
|
||
|
|
"linguistics_term": "quantifiers"
|
||
|
|
},
|
||
|
|
"superlative_quantifiers_2": {
|
||
|
|
"accuracy": 0.726,
|
||
|
|
"correct": 726,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "semantics",
|
||
|
|
"linguistics_term": "quantifiers"
|
||
|
|
},
|
||
|
|
"tough_vs_raising_1": {
|
||
|
|
"accuracy": 0.717,
|
||
|
|
"correct": 717,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax_semantics",
|
||
|
|
"linguistics_term": "control_raising"
|
||
|
|
},
|
||
|
|
"tough_vs_raising_2": {
|
||
|
|
"accuracy": 0.449,
|
||
|
|
"correct": 449,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax_semantics",
|
||
|
|
"linguistics_term": "control_raising"
|
||
|
|
},
|
||
|
|
"transitive": {
|
||
|
|
"accuracy": 0.637,
|
||
|
|
"correct": 637,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "argument_structure"
|
||
|
|
},
|
||
|
|
"wh_island": {
|
||
|
|
"accuracy": 0.712,
|
||
|
|
"correct": 712,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "island_effects"
|
||
|
|
},
|
||
|
|
"wh_questions_object_gap": {
|
||
|
|
"accuracy": 0.515,
|
||
|
|
"correct": 515,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "filler_gap_dependency"
|
||
|
|
},
|
||
|
|
"wh_questions_subject_gap": {
|
||
|
|
"accuracy": 0.732,
|
||
|
|
"correct": 732,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "filler_gap_dependency"
|
||
|
|
},
|
||
|
|
"wh_questions_subject_gap_long_distance": {
|
||
|
|
"accuracy": 0.878,
|
||
|
|
"correct": 878,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "filler_gap_dependency"
|
||
|
|
},
|
||
|
|
"wh_vs_that_no_gap": {
|
||
|
|
"accuracy": 0.913,
|
||
|
|
"correct": 913,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "filler_gap_dependency"
|
||
|
|
},
|
||
|
|
"wh_vs_that_no_gap_long_distance": {
|
||
|
|
"accuracy": 0.946,
|
||
|
|
"correct": 946,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "filler_gap_dependency"
|
||
|
|
},
|
||
|
|
"wh_vs_that_with_gap": {
|
||
|
|
"accuracy": 0.324,
|
||
|
|
"correct": 324,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "filler_gap_dependency"
|
||
|
|
},
|
||
|
|
"wh_vs_that_with_gap_long_distance": {
|
||
|
|
"accuracy": 0.148,
|
||
|
|
"correct": 148,
|
||
|
|
"total": 1000,
|
||
|
|
"field": "syntax",
|
||
|
|
"linguistics_term": "filler_gap_dependency"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|