{ "model_dir": "/content/drive/MyDrive/veyra_runs/veyra2_15m_base_pretrain_1b/checkpoints/final_hf", "dataset": "nyu-mll/blimp", "num_subsets": 67, "total_examples": 67000, "total_correct": 38750, "overall_accuracy": 0.5783582089552238, "elapsed_seconds": 110.03305959701538, "scoring": "mean token log-likelihood, add_special_tokens=False", "results": { "adjunct_island": { "accuracy": 0.706, "correct": 706, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "anaphor_gender_agreement": { "accuracy": 0.731, "correct": 731, "total": 1000, "field": "morphology", "linguistics_term": "anaphor_agreement" }, "anaphor_number_agreement": { "accuracy": 0.708, "correct": 708, "total": 1000, "field": "morphology", "linguistics_term": "anaphor_agreement" }, "animate_subject_passive": { "accuracy": 0.519, "correct": 519, "total": 1000, "field": "syntax", "linguistics_term": "s-selection" }, "animate_subject_trans": { "accuracy": 0.424, "correct": 424, "total": 1000, "field": "syntax", "linguistics_term": "s-selection" }, "causative": { "accuracy": 0.626, "correct": 626, "total": 1000, "field": "syntax", "linguistics_term": "argument_structure" }, "complex_NP_island": { "accuracy": 0.502, "correct": 502, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "coordinate_structure_constraint_complex_left_branch": { "accuracy": 0.315, "correct": 315, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "coordinate_structure_constraint_object_extraction": { "accuracy": 0.635, "correct": 635, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "determiner_noun_agreement_1": { "accuracy": 0.737, "correct": 737, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "determiner_noun_agreement_2": { "accuracy": 0.812, "correct": 812, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "determiner_noun_agreement_irregular_1": { "accuracy": 0.646, "correct": 646, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "determiner_noun_agreement_irregular_2": { "accuracy": 0.735, "correct": 735, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "determiner_noun_agreement_with_adj_2": { "accuracy": 0.779, "correct": 779, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "determiner_noun_agreement_with_adj_irregular_1": { "accuracy": 0.637, "correct": 637, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "determiner_noun_agreement_with_adj_irregular_2": { "accuracy": 0.687, "correct": 687, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "determiner_noun_agreement_with_adjective_1": { "accuracy": 0.704, "correct": 704, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "distractor_agreement_relational_noun": { "accuracy": 0.291, "correct": 291, "total": 1000, "field": "morphology", "linguistics_term": "subject_verb_agreement" }, "distractor_agreement_relative_clause": { "accuracy": 0.345, "correct": 345, "total": 1000, "field": "morphology", "linguistics_term": "subject_verb_agreement" }, "drop_argument": { "accuracy": 0.44, "correct": 440, "total": 1000, "field": "syntax", "linguistics_term": "argument_structure" }, "ellipsis_n_bar_1": { "accuracy": 0.609, "correct": 609, "total": 1000, "field": "syntax", "linguistics_term": "ellipsis" }, "ellipsis_n_bar_2": { "accuracy": 0.867, "correct": 867, "total": 1000, "field": "syntax", "linguistics_term": "ellipsis" }, "existential_there_object_raising": { "accuracy": 0.633, "correct": 633, "total": 1000, "field": "syntax_semantics", "linguistics_term": "control_raising" }, "existential_there_quantifiers_1": { "accuracy": 0.882, "correct": 882, "total": 1000, "field": "semantics", "linguistics_term": "quantifiers" }, "existential_there_quantifiers_2": { "accuracy": 0.119, "correct": 119, "total": 1000, "field": "semantics", "linguistics_term": "quantifiers" }, "existential_there_subject_raising": { "accuracy": 0.599, "correct": 599, "total": 1000, "field": "syntax_semantics", "linguistics_term": "control_raising" }, "expletive_it_object_raising": { "accuracy": 0.583, "correct": 583, "total": 1000, "field": "syntax_semantics", "linguistics_term": "control_raising" }, "inchoative": { "accuracy": 0.567, "correct": 567, "total": 1000, "field": "syntax", "linguistics_term": "argument_structure" }, "intransitive": { "accuracy": 0.49, "correct": 490, "total": 1000, "field": "syntax", "linguistics_term": "argument_structure" }, "irregular_past_participle_adjectives": { "accuracy": 0.56, "correct": 560, "total": 1000, "field": "morphology", "linguistics_term": "irregular_forms" }, "irregular_past_participle_verbs": { "accuracy": 0.81, "correct": 810, "total": 1000, "field": "morphology", "linguistics_term": "irregular_forms" }, "irregular_plural_subject_verb_agreement_1": { "accuracy": 0.69, "correct": 690, "total": 1000, "field": "morphology", "linguistics_term": "subject_verb_agreement" }, "irregular_plural_subject_verb_agreement_2": { "accuracy": 0.596, "correct": 596, "total": 1000, "field": "morphology", "linguistics_term": "subject_verb_agreement" }, "left_branch_island_echo_question": { "accuracy": 0.278, "correct": 278, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "left_branch_island_simple_question": { "accuracy": 0.377, "correct": 377, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "matrix_question_npi_licensor_present": { "accuracy": 0.227, "correct": 227, "total": 1000, "field": "semantics", "linguistics_term": "npi_licensing" }, "npi_present_1": { "accuracy": 0.4, "correct": 400, "total": 1000, "field": "semantics", "linguistics_term": "npi_licensing" }, "npi_present_2": { "accuracy": 0.432, "correct": 432, "total": 1000, "field": "semantics", "linguistics_term": "npi_licensing" }, "only_npi_licensor_present": { "accuracy": 0.315, "correct": 315, "total": 1000, "field": "semantics", "linguistics_term": "npi_licensing" }, "only_npi_scope": { "accuracy": 0.553, "correct": 553, "total": 1000, "field": "syntax_semantics", "linguistics_term": "npi_licensing" }, "passive_1": { "accuracy": 0.657, "correct": 657, "total": 1000, "field": "syntax", "linguistics_term": "argument_structure" }, "passive_2": { "accuracy": 0.645, "correct": 645, "total": 1000, "field": "syntax", "linguistics_term": "argument_structure" }, "principle_A_c_command": { "accuracy": 0.374, "correct": 374, "total": 1000, "field": "syntax_semantics", "linguistics_term": "binding" }, "principle_A_case_1": { "accuracy": 0.859, "correct": 859, "total": 1000, "field": "syntax_semantics", "linguistics_term": "binding" }, "principle_A_case_2": { "accuracy": 0.818, "correct": 818, "total": 1000, "field": "syntax/semantics", "linguistics_term": "binding" }, "principle_A_domain_1": { "accuracy": 0.513, "correct": 513, "total": 1000, "field": "syntax_semantics", "linguistics_term": "binding" }, "principle_A_domain_2": { "accuracy": 0.483, "correct": 483, "total": 1000, "field": "syntax_semantics", "linguistics_term": "binding" }, "principle_A_domain_3": { "accuracy": 0.514, "correct": 514, "total": 1000, "field": "syntax_semantics", "linguistics_term": "binding" }, "principle_A_reconstruction": { "accuracy": 0.373, "correct": 373, "total": 1000, "field": "syntax_semantics", "linguistics_term": "binding" }, "regular_plural_subject_verb_agreement_1": { "accuracy": 0.676, "correct": 676, "total": 1000, "field": "morphology", "linguistics_term": "subject_verb_agreement" }, "regular_plural_subject_verb_agreement_2": { "accuracy": 0.699, "correct": 699, "total": 1000, "field": "morphology", "linguistics_term": "subject_verb_agreement" }, "sentential_negation_npi_licensor_present": { "accuracy": 0.989, "correct": 989, "total": 1000, "field": "semantics", "linguistics_term": "npi_licensing" }, "sentential_negation_npi_scope": { "accuracy": 0.302, "correct": 302, "total": 1000, "field": "syntax_semantics", "linguistics_term": "npi_licensing" }, "sentential_subject_island": { "accuracy": 0.363, "correct": 363, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "superlative_quantifiers_1": { "accuracy": 0.797, "correct": 797, "total": 1000, "field": "semantics", "linguistics_term": "quantifiers" }, "superlative_quantifiers_2": { "accuracy": 0.578, "correct": 578, "total": 1000, "field": "semantics", "linguistics_term": "quantifiers" }, "tough_vs_raising_1": { "accuracy": 0.694, "correct": 694, "total": 1000, "field": "syntax_semantics", "linguistics_term": "control_raising" }, "tough_vs_raising_2": { "accuracy": 0.396, "correct": 396, "total": 1000, "field": "syntax_semantics", "linguistics_term": "control_raising" }, "transitive": { "accuracy": 0.61, "correct": 610, "total": 1000, "field": "syntax", "linguistics_term": "argument_structure" }, "wh_island": { "accuracy": 0.566, "correct": 566, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "wh_questions_object_gap": { "accuracy": 0.45, "correct": 450, "total": 1000, "field": "syntax", "linguistics_term": "filler_gap_dependency" }, "wh_questions_subject_gap": { "accuracy": 0.792, "correct": 792, "total": 1000, "field": "syntax", "linguistics_term": "filler_gap_dependency" }, "wh_questions_subject_gap_long_distance": { "accuracy": 0.904, "correct": 904, "total": 1000, "field": "syntax", "linguistics_term": "filler_gap_dependency" }, "wh_vs_that_no_gap": { "accuracy": 0.943, "correct": 943, "total": 1000, "field": "syntax", "linguistics_term": "filler_gap_dependency" }, "wh_vs_that_no_gap_long_distance": { "accuracy": 0.959, "correct": 959, "total": 1000, "field": "syntax", "linguistics_term": "filler_gap_dependency" }, "wh_vs_that_with_gap": { "accuracy": 0.152, "correct": 152, "total": 1000, "field": "syntax", "linguistics_term": "filler_gap_dependency" }, "wh_vs_that_with_gap_long_distance": { "accuracy": 0.078, "correct": 78, "total": 1000, "field": "syntax", "linguistics_term": "filler_gap_dependency" } } }