tim-lawson's picture
Upload folder using huggingface_hub
6d44e1e verified
{"arc_easy": {"alias": "arc_easy", "acc,none": 0.31776094276094274, "acc_stderr,none": 0.009554033064443069, "acc_norm,none": 0.3042929292929293, "acc_norm_stderr,none": 0.009441202922359051}, "blimp": {"acc,none": 0.6106268656716418, "acc_stderr,none": 0.0017406485035674183, "alias": "blimp"}, "blimp_adjunct_island": {"alias": " - blimp_adjunct_island", "acc,none": 0.878, "acc_stderr,none": 0.010354864712936776}, "blimp_anaphor_gender_agreement": {"alias": " - blimp_anaphor_gender_agreement", "acc,none": 0.788, "acc_stderr,none": 0.01293148186493811}, "blimp_anaphor_number_agreement": {"alias": " - blimp_anaphor_number_agreement", "acc,none": 0.77, "acc_stderr,none": 0.01331455133593608}, "blimp_animate_subject_passive": {"alias": " - blimp_animate_subject_passive", "acc,none": 0.565, "acc_stderr,none": 0.015685057252717346}, "blimp_animate_subject_trans": {"alias": " - blimp_animate_subject_trans", "acc,none": 0.543, "acc_stderr,none": 0.0157606915901365}, "blimp_causative": {"alias": " - blimp_causative", "acc,none": 0.652, "acc_stderr,none": 0.015070604603768328}, "blimp_complex_NP_island": {"alias": " - blimp_complex_NP_island", "acc,none": 0.539, "acc_stderr,none": 0.015771104201283047}, "blimp_coordinate_structure_constraint_complex_left_branch": {"alias": " - blimp_coordinate_structure_constraint_complex_left_branch", "acc,none": 0.597, "acc_stderr,none": 0.015518757419066512}, "blimp_coordinate_structure_constraint_object_extraction": {"alias": " - blimp_coordinate_structure_constraint_object_extraction", "acc,none": 0.525, "acc_stderr,none": 0.015799513429996023}, "blimp_determiner_noun_agreement_1": {"alias": " - blimp_determiner_noun_agreement_1", "acc,none": 0.86, "acc_stderr,none": 0.010978183844357888}, "blimp_determiner_noun_agreement_2": {"alias": " - blimp_determiner_noun_agreement_2", "acc,none": 0.723, "acc_stderr,none": 0.014158794845306273}, "blimp_determiner_noun_agreement_irregular_1": {"alias": " - blimp_determiner_noun_agreement_irregular_1", "acc,none": 0.801, "acc_stderr,none": 0.01263164908309911}, "blimp_determiner_noun_agreement_irregular_2": {"alias": " - blimp_determiner_noun_agreement_irregular_2", "acc,none": 0.773, "acc_stderr,none": 0.013253174964763977}, "blimp_determiner_noun_agreement_with_adj_2": {"alias": " - blimp_determiner_noun_agreement_with_adj_2", "acc,none": 0.615, "acc_stderr,none": 0.015395194445410945}, "blimp_determiner_noun_agreement_with_adj_irregular_1": {"alias": " - blimp_determiner_noun_agreement_with_adj_irregular_1", "acc,none": 0.64, "acc_stderr,none": 0.015186527932039985}, "blimp_determiner_noun_agreement_with_adj_irregular_2": {"alias": " - blimp_determiner_noun_agreement_with_adj_irregular_2", "acc,none": 0.584, "acc_stderr,none": 0.015594460144140522}, "blimp_determiner_noun_agreement_with_adjective_1": {"alias": " - blimp_determiner_noun_agreement_with_adjective_1", "acc,none": 0.726, "acc_stderr,none": 0.01411109928825969}, "blimp_distractor_agreement_relational_noun": {"alias": " - blimp_distractor_agreement_relational_noun", "acc,none": 0.649, "acc_stderr,none": 0.015100563798316508}, "blimp_distractor_agreement_relative_clause": {"alias": " - blimp_distractor_agreement_relative_clause", "acc,none": 0.575, "acc_stderr,none": 0.01564032031704017}, "blimp_drop_argument": {"alias": " - blimp_drop_argument", "acc,none": 0.765, "acc_stderr,none": 0.013414729030247124}, "blimp_ellipsis_n_bar_1": {"alias": " - blimp_ellipsis_n_bar_1", "acc,none": 0.716, "acc_stderr,none": 0.014267009061031241}, "blimp_ellipsis_n_bar_2": {"alias": " - blimp_ellipsis_n_bar_2", "acc,none": 0.383, "acc_stderr,none": 0.0153801023256527}, "blimp_existential_there_object_raising": {"alias": " - blimp_existential_there_object_raising", "acc,none": 0.618, "acc_stderr,none": 0.015372453034968502}, "blimp_existential_there_quantifiers_1": {"alias": " - blimp_existential_there_quantifiers_1", "acc,none": 0.823, "acc_stderr,none": 0.012075463420375087}, "blimp_existential_there_quantifiers_2": {"alias": " - blimp_existential_there_quantifiers_2", "acc,none": 0.325, "acc_stderr,none": 0.01481872445909548}, "blimp_existential_there_subject_raising": {"alias": " - blimp_existential_there_subject_raising", "acc,none": 0.488, "acc_stderr,none": 0.015814743314581686}, "blimp_expletive_it_object_raising": {"alias": " - blimp_expletive_it_object_raising", "acc,none": 0.556, "acc_stderr,none": 0.015719768163402127}, "blimp_inchoative": {"alias": " - blimp_inchoative", "acc,none": 0.746, "acc_stderr,none": 0.013772206565168622}, "blimp_intransitive": {"alias": " - blimp_intransitive", "acc,none": 0.872, "acc_stderr,none": 0.010570133761108595}, "blimp_irregular_past_participle_adjectives": {"alias": " - blimp_irregular_past_participle_adjectives", "acc,none": 0.809, "acc_stderr,none": 0.012436787112179482}, "blimp_irregular_past_participle_verbs": {"alias": " - blimp_irregular_past_participle_verbs", "acc,none": 0.571, "acc_stderr,none": 0.015658997547870177}, "blimp_irregular_plural_subject_verb_agreement_1": {"alias": " - blimp_irregular_plural_subject_verb_agreement_1", "acc,none": 0.729, "acc_stderr,none": 0.014062601350986125}, "blimp_irregular_plural_subject_verb_agreement_2": {"alias": " - blimp_irregular_plural_subject_verb_agreement_2", "acc,none": 0.733, "acc_stderr,none": 0.013996674851796299}, "blimp_left_branch_island_echo_question": {"alias": " - blimp_left_branch_island_echo_question", "acc,none": 0.045, "acc_stderr,none": 0.006558812241406063}, "blimp_left_branch_island_simple_question": {"alias": " - blimp_left_branch_island_simple_question", "acc,none": 0.571, "acc_stderr,none": 0.015658997547870177}, "blimp_matrix_question_npi_licensor_present": {"alias": " - blimp_matrix_question_npi_licensor_present", "acc,none": 0.784, "acc_stderr,none": 0.013019735539307761}, "blimp_npi_present_1": {"alias": " - blimp_npi_present_1", "acc,none": 0.477, "acc_stderr,none": 0.015802554246726087}, "blimp_npi_present_2": {"alias": " - blimp_npi_present_2", "acc,none": 0.255, "acc_stderr,none": 0.013790038620872863}, "blimp_only_npi_licensor_present": {"alias": " - blimp_only_npi_licensor_present", "acc,none": 0.832, "acc_stderr,none": 0.011828605831454352}, "blimp_only_npi_scope": {"alias": " - blimp_only_npi_scope", "acc,none": 0.819, "acc_stderr,none": 0.012181436179178027}, "blimp_passive_1": {"alias": " - blimp_passive_1", "acc,none": 0.543, "acc_stderr,none": 0.0157606915901365}, "blimp_passive_2": {"alias": " - blimp_passive_2", "acc,none": 0.716, "acc_stderr,none": 0.014267009061031241}, "blimp_principle_A_c_command": {"alias": " - blimp_principle_A_c_command", "acc,none": 0.504, "acc_stderr,none": 0.01581879370351084}, "blimp_principle_A_case_1": {"alias": " - blimp_principle_A_case_1", "acc,none": 0.98, "acc_stderr,none": 0.004429403980178329}, "blimp_principle_A_case_2": {"alias": " - blimp_principle_A_case_2", "acc,none": 0.472, "acc_stderr,none": 0.015794475789511517}, "blimp_principle_A_domain_1": {"alias": " - blimp_principle_A_domain_1", "acc,none": 0.292, "acc_stderr,none": 0.014385511563477432}, "blimp_principle_A_domain_2": {"alias": " - blimp_principle_A_domain_2", "acc,none": 0.674, "acc_stderr,none": 0.014830507204541049}, "blimp_principle_A_domain_3": {"alias": " - blimp_principle_A_domain_3", "acc,none": 0.532, "acc_stderr,none": 0.015786868759359002}, "blimp_principle_A_reconstruction": {"alias": " - blimp_principle_A_reconstruction", "acc,none": 0.528, "acc_stderr,none": 0.015794475789511517}, "blimp_regular_plural_subject_verb_agreement_1": {"alias": " - blimp_regular_plural_subject_verb_agreement_1", "acc,none": 0.522, "acc_stderr,none": 0.01580397942816194}, "blimp_regular_plural_subject_verb_agreement_2": {"alias": " - blimp_regular_plural_subject_verb_agreement_2", "acc,none": 0.695, "acc_stderr,none": 0.014566646394664325}, "blimp_sentential_negation_npi_licensor_present": {"alias": " - blimp_sentential_negation_npi_licensor_present", "acc,none": 0.618, "acc_stderr,none": 0.015372453034968502}, "blimp_sentential_negation_npi_scope": {"alias": " - blimp_sentential_negation_npi_scope", "acc,none": 0.208, "acc_stderr,none": 0.01284137457209706}, "blimp_sentential_subject_island": {"alias": " - blimp_sentential_subject_island", "acc,none": 0.526, "acc_stderr,none": 0.015797897758042797}, "blimp_superlative_quantifiers_1": {"alias": " - blimp_superlative_quantifiers_1", "acc,none": 0.595, "acc_stderr,none": 0.01553113699045296}, "blimp_superlative_quantifiers_2": {"alias": " - blimp_superlative_quantifiers_2", "acc,none": 0.195, "acc_stderr,none": 0.012535235623319256}, "blimp_tough_vs_raising_1": {"alias": " - blimp_tough_vs_raising_1", "acc,none": 0.915, "acc_stderr,none": 0.008823426366942316}, "blimp_tough_vs_raising_2": {"alias": " - blimp_tough_vs_raising_2", "acc,none": 0.289, "acc_stderr,none": 0.014341711358296287}, "blimp_transitive": {"alias": " - blimp_transitive", "acc,none": 0.517, "acc_stderr,none": 0.015810153729833274}, "blimp_wh_island": {"alias": " - blimp_wh_island", "acc,none": 0.728, "acc_stderr,none": 0.014078856992462642}, "blimp_wh_questions_object_gap": {"alias": " - blimp_wh_questions_object_gap", "acc,none": 0.528, "acc_stderr,none": 0.015794475789511517}, "blimp_wh_questions_subject_gap": {"alias": " - blimp_wh_questions_subject_gap", "acc,none": 0.667, "acc_stderr,none": 0.014910846164230029}, "blimp_wh_questions_subject_gap_long_distance": {"alias": " - blimp_wh_questions_subject_gap_long_distance", "acc,none": 0.485, "acc_stderr,none": 0.01581217964181488}, "blimp_wh_vs_that_no_gap": {"alias": " - blimp_wh_vs_that_no_gap", "acc,none": 0.395, "acc_stderr,none": 0.015466551464829328}, "blimp_wh_vs_that_no_gap_long_distance": {"alias": " - blimp_wh_vs_that_no_gap_long_distance", "acc,none": 0.486, "acc_stderr,none": 0.01581309754773093}, "blimp_wh_vs_that_with_gap": {"alias": " - blimp_wh_vs_that_with_gap", "acc,none": 0.917, "acc_stderr,none": 0.008728527206074756}, "blimp_wh_vs_that_with_gap_long_distance": {"alias": " - blimp_wh_vs_that_with_gap_long_distance", "acc,none": 0.665, "acc_stderr,none": 0.014933117490932629}, "c4": {"alias": "c4", "word_perplexity,none": 636.7122239909488, "word_perplexity_stderr,none": "N/A", "byte_perplexity,none": 2.9407928004510886, "byte_perplexity_stderr,none": "N/A", "bits_per_byte,none": 1.5562051397869785, "bits_per_byte_stderr,none": "N/A"}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2621987651862179, "acc_stderr,none": 0.004389312748012449, "acc_norm,none": 0.26379207329217286, "acc_norm_stderr,none": 0.004397872471854581}, "piqa": {"alias": "piqa", "acc,none": 0.5750816104461371, "acc_stderr,none": 0.011533547946654874, "acc_norm,none": 0.5756256800870512, "acc_norm_stderr,none": 0.011531612758871207}, "wikitext": {"alias": "wikitext", "word_perplexity,none": 525.0611032812346, "word_perplexity_stderr,none": "N/A", "byte_perplexity,none": 3.226211529882843, "byte_perplexity_stderr,none": "N/A", "bits_per_byte,none": 1.6898410333814244, "bits_per_byte_stderr,none": "N/A"}}