初始化项目,由ModelHub XC社区提供模型
Model: Polygl0t/Tucano2-qwen-3.7B-Base Source: Original Platform
This commit is contained in:
190
evals.yaml
Normal file
190
evals.yaml
Normal file
@@ -0,0 +1,190 @@
|
||||
evaluations:
|
||||
arc_challenge_poly_pt_acc: 0.5230769230769231
|
||||
arc_challenge_poly_pt_acc_norm: 0.5777777777777777
|
||||
arc_challenge_poly_pt_acc_norm_stderr: 0.014445870094078068
|
||||
arc_challenge_poly_pt_acc_stderr: 0.014608300475750825
|
||||
arc_challenge_poly_pt_alias: arc_challenge_poly_pt
|
||||
assin2_rte_acc,all: 0.9252450980392157
|
||||
assin2_rte_acc_stderr,all: 0.0037560275279046665
|
||||
assin2_rte_alias: assin2_rte
|
||||
assin2_rte_f1_macro,all: 0.9251590635527494
|
||||
assin2_rte_f1_macro_stderr,all: 0.0037610728425282497
|
||||
assin2_sts_alias: assin2_sts
|
||||
assin2_sts_mse,all: 0.5572916666666665
|
||||
assin2_sts_mse_stderr,all: N/A
|
||||
assin2_sts_pearson,all: 0.7701197353926412
|
||||
assin2_sts_pearson_stderr,all: 0.006649667590414615
|
||||
assin_entailment_acc: 0.704
|
||||
assin_entailment_acc_stderr: 0.00721865827261647
|
||||
assin_entailment_alias: assin_entailment
|
||||
assin_paraphrase_acc: 0.694
|
||||
assin_paraphrase_acc_stderr: 0.007287268079947193
|
||||
assin_paraphrase_alias: assin_paraphrase
|
||||
belebele_por_Latn_acc: 0.8366666666666667
|
||||
belebele_por_Latn_acc_norm: 0.8366666666666667
|
||||
belebele_por_Latn_acc_norm_stderr: 0.012329168844652528
|
||||
belebele_por_Latn_acc_stderr: 0.012329168844652528
|
||||
belebele_por_Latn_alias: belebele_por_Latn
|
||||
bluex_acc,all: 0.6620305980528511
|
||||
bluex_acc,exam_id__UNICAMP_2018: 0.6481481481481481
|
||||
bluex_acc,exam_id__UNICAMP_2019: 0.64
|
||||
bluex_acc,exam_id__UNICAMP_2020: 0.6909090909090909
|
||||
bluex_acc,exam_id__UNICAMP_2021_1: 0.6521739130434783
|
||||
bluex_acc,exam_id__UNICAMP_2021_2: 0.5882352941176471
|
||||
bluex_acc,exam_id__UNICAMP_2022: 0.6666666666666666
|
||||
bluex_acc,exam_id__UNICAMP_2023: 0.7209302325581395
|
||||
bluex_acc,exam_id__UNICAMP_2024: 0.6444444444444445
|
||||
bluex_acc,exam_id__USP_2018: 0.5925925925925926
|
||||
bluex_acc,exam_id__USP_2019: 0.7
|
||||
bluex_acc,exam_id__USP_2020: 0.6607142857142857
|
||||
bluex_acc,exam_id__USP_2021: 0.75
|
||||
bluex_acc,exam_id__USP_2022: 0.5918367346938775
|
||||
bluex_acc,exam_id__USP_2023: 0.7045454545454546
|
||||
bluex_acc,exam_id__USP_2024: 0.7073170731707317
|
||||
bluex_acc_stderr,all: 0.010157757528559894
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2018: 0.037594875406546435
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2019: 0.03953375278949041
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2020: 0.03605420458368598
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2021_1: 0.0405315180666698
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2021_2: 0.039752636457935614
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2022: 0.04360776726045774
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2023: 0.039432470230869696
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2024: 0.041213076472343936
|
||||
bluex_acc_stderr,exam_id__USP_2018: 0.038650432775192395
|
||||
bluex_acc_stderr,exam_id__USP_2019: 0.04175277819931915
|
||||
bluex_acc_stderr,exam_id__USP_2020: 0.03644397183647981
|
||||
bluex_acc_stderr,exam_id__USP_2021: 0.034773619645811646
|
||||
bluex_acc_stderr,exam_id__USP_2022: 0.040532053004604704
|
||||
bluex_acc_stderr,exam_id__USP_2023: 0.039742872820681924
|
||||
bluex_acc_stderr,exam_id__USP_2024: 0.040951553558739306
|
||||
bluex_alias: bluex
|
||||
calame_pt_acc: 0.6107899807321773
|
||||
calame_pt_acc_stderr: 0.0107035762556229
|
||||
calame_pt_alias: calame_pt
|
||||
calame_pt_perplexity: 5.713055201421455
|
||||
calame_pt_perplexity_stderr: 0.29495381614560345
|
||||
enem_challenge_acc,all: 0.7753673897830651
|
||||
enem_challenge_acc,exam_id__2009: 0.7478260869565218
|
||||
enem_challenge_acc,exam_id__2010: 0.811965811965812
|
||||
enem_challenge_acc,exam_id__2011: 0.8461538461538461
|
||||
enem_challenge_acc,exam_id__2012: 0.8448275862068966
|
||||
enem_challenge_acc,exam_id__2013: 0.7777777777777778
|
||||
enem_challenge_acc,exam_id__2014: 0.8073394495412844
|
||||
enem_challenge_acc,exam_id__2015: 0.8067226890756303
|
||||
enem_challenge_acc,exam_id__2016: 0.743801652892562
|
||||
enem_challenge_acc,exam_id__2016_2: 0.7154471544715447
|
||||
enem_challenge_acc,exam_id__2017: 0.75
|
||||
enem_challenge_acc,exam_id__2022: 0.6842105263157895
|
||||
enem_challenge_acc,exam_id__2023: 0.7851851851851852
|
||||
enem_challenge_acc_stderr,all: 0.006377145135723042
|
||||
enem_challenge_acc_stderr,exam_id__2009: 0.023447252641875988
|
||||
enem_challenge_acc_stderr,exam_id__2010: 0.02087704326839612
|
||||
enem_challenge_acc_stderr,exam_id__2011: 0.01921565112452091
|
||||
enem_challenge_acc_stderr,exam_id__2012: 0.01944793905815595
|
||||
enem_challenge_acc_stderr,exam_id__2013: 0.023084030560191867
|
||||
enem_challenge_acc_stderr,exam_id__2014: 0.021892563584984096
|
||||
enem_challenge_acc_stderr,exam_id__2015: 0.020893018955083217
|
||||
enem_challenge_acc_stderr,exam_id__2016: 0.022776450345788787
|
||||
enem_challenge_acc_stderr,exam_id__2016_2: 0.023503035027562222
|
||||
enem_challenge_acc_stderr,exam_id__2017: 0.023138027075607918
|
||||
enem_challenge_acc_stderr,exam_id__2022: 0.02320588990454305
|
||||
enem_challenge_acc_stderr,exam_id__2023: 0.020404682391600704
|
||||
enem_challenge_alias: enem
|
||||
faquad_nli_acc,all: 0.7876923076923077
|
||||
faquad_nli_acc_stderr,all: 0.01133278097111669
|
||||
faquad_nli_alias: faquad_nli
|
||||
faquad_nli_f1_macro,all: 0.45449901481427424
|
||||
faquad_nli_f1_macro_stderr,all: 0.008069363645658589
|
||||
global_piqa_completions_por_latn_braz_acc: 0.84
|
||||
global_piqa_completions_por_latn_braz_acc_bytes: 0.83
|
||||
global_piqa_completions_por_latn_braz_acc_bytes_stderr: 0.03775251680686369
|
||||
global_piqa_completions_por_latn_braz_acc_norm: 0.83
|
||||
global_piqa_completions_por_latn_braz_acc_norm_stderr: 0.03775251680686369
|
||||
global_piqa_completions_por_latn_braz_acc_stderr: 0.03684529491774706
|
||||
global_piqa_completions_por_latn_braz_alias: global_piqa_completions_por_latn_braz
|
||||
hatebr_offensive_acc,all: 0.665
|
||||
hatebr_offensive_acc_stderr,all: 0.00890653495166499
|
||||
hatebr_offensive_alias: hatebr_offensive_binary
|
||||
hatebr_offensive_f1_macro,all: 0.6234605955470173
|
||||
hatebr_offensive_f1_macro_stderr,all: 0.00951858317428499
|
||||
hellaswag_poly_pt_acc: 0.4838010618701918
|
||||
hellaswag_poly_pt_acc_norm: 0.6531585220500596
|
||||
hellaswag_poly_pt_acc_norm_stderr: 0.004954741713215018
|
||||
hellaswag_poly_pt_acc_stderr: 0.00520221346811777
|
||||
hellaswag_poly_pt_alias: hellaswag_poly_pt
|
||||
lambada_poly_pt_acc: 0.6252668348534834
|
||||
lambada_poly_pt_acc_stderr: 0.006743817908692071
|
||||
lambada_poly_pt_alias: lambada_poly_pt
|
||||
lambada_poly_pt_perplexity: 6.574656712295472
|
||||
lambada_poly_pt_perplexity_stderr: 0.18832300331707774
|
||||
mmlu_poly_pt_acc: 0.6540078054638246
|
||||
mmlu_poly_pt_acc_stderr: 0.004121199159002156
|
||||
mmlu_poly_pt_alias: mmlu_poly_pt
|
||||
oab_exams_acc,all: 0.584510250569476
|
||||
oab_exams_acc,exam_id__2010-01: 0.3764705882352941
|
||||
oab_exams_acc,exam_id__2010-02: 0.59
|
||||
oab_exams_acc,exam_id__2011-03: 0.5656565656565656
|
||||
oab_exams_acc,exam_id__2011-04: 0.5125
|
||||
oab_exams_acc,exam_id__2011-05: 0.6625
|
||||
oab_exams_acc,exam_id__2012-06: 0.625
|
||||
oab_exams_acc,exam_id__2012-06a: 0.7375
|
||||
oab_exams_acc,exam_id__2012-07: 0.6125
|
||||
oab_exams_acc,exam_id__2012-08: 0.55
|
||||
oab_exams_acc,exam_id__2012-09: 0.4805194805194805
|
||||
oab_exams_acc,exam_id__2013-10: 0.65
|
||||
oab_exams_acc,exam_id__2013-11: 0.625
|
||||
oab_exams_acc,exam_id__2013-12: 0.65
|
||||
oab_exams_acc,exam_id__2014-13: 0.525
|
||||
oab_exams_acc,exam_id__2014-14: 0.625
|
||||
oab_exams_acc,exam_id__2014-15: 0.6538461538461539
|
||||
oab_exams_acc,exam_id__2015-16: 0.5875
|
||||
oab_exams_acc,exam_id__2015-17: 0.5897435897435898
|
||||
oab_exams_acc,exam_id__2015-18: 0.575
|
||||
oab_exams_acc,exam_id__2016-19: 0.5897435897435898
|
||||
oab_exams_acc,exam_id__2016-20: 0.6125
|
||||
oab_exams_acc,exam_id__2016-20a: 0.55
|
||||
oab_exams_acc,exam_id__2016-21: 0.4875
|
||||
oab_exams_acc,exam_id__2017-22: 0.6375
|
||||
oab_exams_acc,exam_id__2017-23: 0.525
|
||||
oab_exams_acc,exam_id__2017-24: 0.6375
|
||||
oab_exams_acc,exam_id__2018-25: 0.5625
|
||||
oab_exams_acc_stderr,all: 0.006071412237214423
|
||||
oab_exams_acc_stderr,exam_id__2010-01: 0.030361740131894334
|
||||
oab_exams_acc_stderr,exam_id__2010-02: 0.028445183897774385
|
||||
oab_exams_acc_stderr,exam_id__2011-03: 0.02862790167127372
|
||||
oab_exams_acc_stderr,exam_id__2011-04: 0.03224493616787287
|
||||
oab_exams_acc_stderr,exam_id__2011-05: 0.030467009008680036
|
||||
oab_exams_acc_stderr,exam_id__2012-06: 0.031220571629946996
|
||||
oab_exams_acc_stderr,exam_id__2012-06a: 0.02843559794708646
|
||||
oab_exams_acc_stderr,exam_id__2012-07: 0.03139589988285276
|
||||
oab_exams_acc_stderr,exam_id__2012-08: 0.03226132851591818
|
||||
oab_exams_acc_stderr,exam_id__2012-09: 0.032830805301195386
|
||||
oab_exams_acc_stderr,exam_id__2013-10: 0.030611536360793473
|
||||
oab_exams_acc_stderr,exam_id__2013-11: 0.031310092407276585
|
||||
oab_exams_acc_stderr,exam_id__2013-12: 0.030692719997990617
|
||||
oab_exams_acc_stderr,exam_id__2014-13: 0.032358129209763435
|
||||
oab_exams_acc_stderr,exam_id__2014-14: 0.031382714558388446
|
||||
oab_exams_acc_stderr,exam_id__2014-15: 0.03109637957099322
|
||||
oab_exams_acc_stderr,exam_id__2015-16: 0.031957806650269406
|
||||
oab_exams_acc_stderr,exam_id__2015-17: 0.03208206142728883
|
||||
oab_exams_acc_stderr,exam_id__2015-18: 0.03182114971496286
|
||||
oab_exams_acc_stderr,exam_id__2016-19: 0.03228511767428725
|
||||
oab_exams_acc_stderr,exam_id__2016-20: 0.031372223696958024
|
||||
oab_exams_acc_stderr,exam_id__2016-20a: 0.0321935167686262
|
||||
oab_exams_acc_stderr,exam_id__2016-21: 0.03209267502993051
|
||||
oab_exams_acc_stderr,exam_id__2017-22: 0.03105400471909683
|
||||
oab_exams_acc_stderr,exam_id__2017-23: 0.03235792164586319
|
||||
oab_exams_acc_stderr,exam_id__2017-24: 0.031098329350728315
|
||||
oab_exams_acc_stderr,exam_id__2018-25: 0.03209246971016282
|
||||
oab_exams_alias: oab_exams
|
||||
portuguese_hate_speech_acc,all: 0.6145710928319624
|
||||
portuguese_hate_speech_acc_stderr,all: 0.011835075822813054
|
||||
portuguese_hate_speech_alias: portuguese_hate_speech_binary
|
||||
portuguese_hate_speech_f1_macro,all: 0.6103088177807561
|
||||
portuguese_hate_speech_f1_macro_stderr,all: 0.011900091760317547
|
||||
tweetsentbr_acc,all: 0.7298507462686568
|
||||
tweetsentbr_acc_stderr,all: 0.006986496038388035
|
||||
tweetsentbr_alias: tweetsentbr
|
||||
tweetsentbr_f1_macro,all: 0.7027752533485003
|
||||
tweetsentbr_f1_macro_stderr,all: 0.007392699151541939
|
||||
step: 50000
|
||||
Reference in New Issue
Block a user