{ "model_dir": "/content/drive/MyDrive/veyra_runs/veyra2_30m_base_pretrain_2b/checkpoints/final_hf", "dataset": "nyu-mll/blimp", "num_subsets": 67, "total_examples": 67000, "total_correct": 42809, "overall_accuracy": 0.6389402985074627, "elapsed_seconds": 120.80895161628723, "scoring": "mean token log-likelihood, add_special_tokens=False", "results": { "adjunct_island": { "accuracy": 0.605, "correct": 605, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "anaphor_gender_agreement": { "accuracy": 0.78, "correct": 780, "total": 1000, "field": "morphology", "linguistics_term": "anaphor_agreement" }, "anaphor_number_agreement": { "accuracy": 0.801, "correct": 801, "total": 1000, "field": "morphology", "linguistics_term": "anaphor_agreement" }, "animate_subject_passive": { "accuracy": 0.572, "correct": 572, "total": 1000, "field": "syntax", "linguistics_term": "s-selection" }, "animate_subject_trans": { "accuracy": 0.603, "correct": 603, "total": 1000, "field": "syntax", "linguistics_term": "s-selection" }, "causative": { "accuracy": 0.633, "correct": 633, "total": 1000, "field": "syntax", "linguistics_term": "argument_structure" }, "complex_NP_island": { "accuracy": 0.557, "correct": 557, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "coordinate_structure_constraint_complex_left_branch": { "accuracy": 0.37, "correct": 370, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "coordinate_structure_constraint_object_extraction": { "accuracy": 0.674, "correct": 674, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "determiner_noun_agreement_1": { "accuracy": 0.797, "correct": 797, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "determiner_noun_agreement_2": { "accuracy": 0.894, "correct": 894, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "determiner_noun_agreement_irregular_1": { "accuracy": 0.692, "correct": 692, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "determiner_noun_agreement_irregular_2": { "accuracy": 0.825, "correct": 825, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "determiner_noun_agreement_with_adj_2": { "accuracy": 0.875, "correct": 875, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "determiner_noun_agreement_with_adj_irregular_1": { "accuracy": 0.701, "correct": 701, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "determiner_noun_agreement_with_adj_irregular_2": { "accuracy": 0.791, "correct": 791, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "determiner_noun_agreement_with_adjective_1": { "accuracy": 0.801, "correct": 801, "total": 1000, "field": "morphology", "linguistics_term": "determiner_noun_agreement" }, "distractor_agreement_relational_noun": { "accuracy": 0.499, "correct": 499, "total": 1000, "field": "morphology", "linguistics_term": "subject_verb_agreement" }, "distractor_agreement_relative_clause": { "accuracy": 0.422, "correct": 422, "total": 1000, "field": "morphology", "linguistics_term": "subject_verb_agreement" }, "drop_argument": { "accuracy": 0.428, "correct": 428, "total": 1000, "field": "syntax", "linguistics_term": "argument_structure" }, "ellipsis_n_bar_1": { "accuracy": 0.632, "correct": 632, "total": 1000, "field": "syntax", "linguistics_term": "ellipsis" }, "ellipsis_n_bar_2": { "accuracy": 0.863, "correct": 863, "total": 1000, "field": "syntax", "linguistics_term": "ellipsis" }, "existential_there_object_raising": { "accuracy": 0.622, "correct": 622, "total": 1000, "field": "syntax_semantics", "linguistics_term": "control_raising" }, "existential_there_quantifiers_1": { "accuracy": 0.891, "correct": 891, "total": 1000, "field": "semantics", "linguistics_term": "quantifiers" }, "existential_there_quantifiers_2": { "accuracy": 0.342, "correct": 342, "total": 1000, "field": "semantics", "linguistics_term": "quantifiers" }, "existential_there_subject_raising": { "accuracy": 0.57, "correct": 570, "total": 1000, "field": "syntax_semantics", "linguistics_term": "control_raising" }, "expletive_it_object_raising": { "accuracy": 0.651, "correct": 651, "total": 1000, "field": "syntax_semantics", "linguistics_term": "control_raising" }, "inchoative": { "accuracy": 0.625, "correct": 625, "total": 1000, "field": "syntax", "linguistics_term": "argument_structure" }, "intransitive": { "accuracy": 0.549, "correct": 549, "total": 1000, "field": "syntax", "linguistics_term": "argument_structure" }, "irregular_past_participle_adjectives": { "accuracy": 0.644, "correct": 644, "total": 1000, "field": "morphology", "linguistics_term": "irregular_forms" }, "irregular_past_participle_verbs": { "accuracy": 0.823, "correct": 823, "total": 1000, "field": "morphology", "linguistics_term": "irregular_forms" }, "irregular_plural_subject_verb_agreement_1": { "accuracy": 0.764, "correct": 764, "total": 1000, "field": "morphology", "linguistics_term": "subject_verb_agreement" }, "irregular_plural_subject_verb_agreement_2": { "accuracy": 0.762, "correct": 762, "total": 1000, "field": "morphology", "linguistics_term": "subject_verb_agreement" }, "left_branch_island_echo_question": { "accuracy": 0.233, "correct": 233, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "left_branch_island_simple_question": { "accuracy": 0.431, "correct": 431, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "matrix_question_npi_licensor_present": { "accuracy": 0.367, "correct": 367, "total": 1000, "field": "semantics", "linguistics_term": "npi_licensing" }, "npi_present_1": { "accuracy": 0.431, "correct": 431, "total": 1000, "field": "semantics", "linguistics_term": "npi_licensing" }, "npi_present_2": { "accuracy": 0.445, "correct": 445, "total": 1000, "field": "semantics", "linguistics_term": "npi_licensing" }, "only_npi_licensor_present": { "accuracy": 0.833, "correct": 833, "total": 1000, "field": "semantics", "linguistics_term": "npi_licensing" }, "only_npi_scope": { "accuracy": 0.772, "correct": 772, "total": 1000, "field": "syntax_semantics", "linguistics_term": "npi_licensing" }, "passive_1": { "accuracy": 0.711, "correct": 711, "total": 1000, "field": "syntax", "linguistics_term": "argument_structure" }, "passive_2": { "accuracy": 0.672, "correct": 672, "total": 1000, "field": "syntax", "linguistics_term": "argument_structure" }, "principle_A_c_command": { "accuracy": 0.421, "correct": 421, "total": 1000, "field": "syntax_semantics", "linguistics_term": "binding" }, "principle_A_case_1": { "accuracy": 0.871, "correct": 871, "total": 1000, "field": "syntax_semantics", "linguistics_term": "binding" }, "principle_A_case_2": { "accuracy": 0.847, "correct": 847, "total": 1000, "field": "syntax/semantics", "linguistics_term": "binding" }, "principle_A_domain_1": { "accuracy": 0.56, "correct": 560, "total": 1000, "field": "syntax_semantics", "linguistics_term": "binding" }, "principle_A_domain_2": { "accuracy": 0.484, "correct": 484, "total": 1000, "field": "syntax_semantics", "linguistics_term": "binding" }, "principle_A_domain_3": { "accuracy": 0.505, "correct": 505, "total": 1000, "field": "syntax_semantics", "linguistics_term": "binding" }, "principle_A_reconstruction": { "accuracy": 0.526, "correct": 526, "total": 1000, "field": "syntax_semantics", "linguistics_term": "binding" }, "regular_plural_subject_verb_agreement_1": { "accuracy": 0.743, "correct": 743, "total": 1000, "field": "morphology", "linguistics_term": "subject_verb_agreement" }, "regular_plural_subject_verb_agreement_2": { "accuracy": 0.755, "correct": 755, "total": 1000, "field": "morphology", "linguistics_term": "subject_verb_agreement" }, "sentential_negation_npi_licensor_present": { "accuracy": 0.899, "correct": 899, "total": 1000, "field": "semantics", "linguistics_term": "npi_licensing" }, "sentential_negation_npi_scope": { "accuracy": 0.46, "correct": 460, "total": 1000, "field": "syntax_semantics", "linguistics_term": "npi_licensing" }, "sentential_subject_island": { "accuracy": 0.326, "correct": 326, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "superlative_quantifiers_1": { "accuracy": 0.762, "correct": 762, "total": 1000, "field": "semantics", "linguistics_term": "quantifiers" }, "superlative_quantifiers_2": { "accuracy": 0.726, "correct": 726, "total": 1000, "field": "semantics", "linguistics_term": "quantifiers" }, "tough_vs_raising_1": { "accuracy": 0.717, "correct": 717, "total": 1000, "field": "syntax_semantics", "linguistics_term": "control_raising" }, "tough_vs_raising_2": { "accuracy": 0.449, "correct": 449, "total": 1000, "field": "syntax_semantics", "linguistics_term": "control_raising" }, "transitive": { "accuracy": 0.637, "correct": 637, "total": 1000, "field": "syntax", "linguistics_term": "argument_structure" }, "wh_island": { "accuracy": 0.712, "correct": 712, "total": 1000, "field": "syntax", "linguistics_term": "island_effects" }, "wh_questions_object_gap": { "accuracy": 0.515, "correct": 515, "total": 1000, "field": "syntax", "linguistics_term": "filler_gap_dependency" }, "wh_questions_subject_gap": { "accuracy": 0.732, "correct": 732, "total": 1000, "field": "syntax", "linguistics_term": "filler_gap_dependency" }, "wh_questions_subject_gap_long_distance": { "accuracy": 0.878, "correct": 878, "total": 1000, "field": "syntax", "linguistics_term": "filler_gap_dependency" }, "wh_vs_that_no_gap": { "accuracy": 0.913, "correct": 913, "total": 1000, "field": "syntax", "linguistics_term": "filler_gap_dependency" }, "wh_vs_that_no_gap_long_distance": { "accuracy": 0.946, "correct": 946, "total": 1000, "field": "syntax", "linguistics_term": "filler_gap_dependency" }, "wh_vs_that_with_gap": { "accuracy": 0.324, "correct": 324, "total": 1000, "field": "syntax", "linguistics_term": "filler_gap_dependency" }, "wh_vs_that_with_gap_long_distance": { "accuracy": 0.148, "correct": 148, "total": 1000, "field": "syntax", "linguistics_term": "filler_gap_dependency" } } }