初始化项目,由ModelHub XC社区提供模型

Model: Polygl0t/Tucano2-qwen-1.5B-Base
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-07 02:18:18 +08:00
commit 9f2b6e29ef
33 changed files with 465309 additions and 0 deletions

190
evals.yaml Normal file
View File

@@ -0,0 +1,190 @@
evaluations:
arc_challenge_poly_pt_acc: 0.44529914529914527
arc_challenge_poly_pt_acc_norm: 0.48205128205128206
arc_challenge_poly_pt_acc_norm_stderr: 0.014614459118720773
arc_challenge_poly_pt_acc_stderr: 0.014536106383401307
arc_challenge_poly_pt_alias: arc_challenge_poly_pt
assin2_rte_acc,all: 0.8766339869281046
assin2_rte_acc_stderr,all: 0.004699176594010998
assin2_rte_alias: assin2_rte
assin2_rte_f1_macro,all: 0.8755544782450612
assin2_rte_f1_macro_stderr,all: 0.004739218474976754
assin2_sts_alias: assin2_sts
assin2_sts_mse,all: 1.0735661764705884
assin2_sts_mse_stderr,all: N/A
assin2_sts_pearson,all: 0.6290850483582386
assin2_sts_pearson_stderr,all: 0.009612669804680212
assin_entailment_acc: 0.708
assin_entailment_acc_stderr: 0.007190057317647597
assin_entailment_alias: assin_entailment
assin_paraphrase_acc: 0.72475
assin_paraphrase_acc_stderr: 0.007062884004258771
assin_paraphrase_alias: assin_paraphrase
belebele_por_Latn_acc: 0.74
belebele_por_Latn_acc_norm: 0.74
belebele_por_Latn_acc_norm_stderr: 0.014629271097998421
belebele_por_Latn_acc_stderr: 0.014629271097998421
belebele_por_Latn_alias: belebele_por_Latn
bluex_acc,all: 0.5591098748261474
bluex_acc,exam_id__UNICAMP_2018: 0.5370370370370371
bluex_acc,exam_id__UNICAMP_2019: 0.6
bluex_acc,exam_id__UNICAMP_2020: 0.509090909090909
bluex_acc,exam_id__UNICAMP_2021_1: 0.6304347826086957
bluex_acc,exam_id__UNICAMP_2021_2: 0.47058823529411764
bluex_acc,exam_id__UNICAMP_2022: 0.6923076923076923
bluex_acc,exam_id__UNICAMP_2023: 0.6511627906976745
bluex_acc,exam_id__UNICAMP_2024: 0.5555555555555556
bluex_acc,exam_id__USP_2018: 0.42592592592592593
bluex_acc,exam_id__USP_2019: 0.4
bluex_acc,exam_id__USP_2020: 0.5535714285714286
bluex_acc,exam_id__USP_2021: 0.6346153846153846
bluex_acc,exam_id__USP_2022: 0.4897959183673469
bluex_acc,exam_id__USP_2023: 0.6136363636363636
bluex_acc,exam_id__USP_2024: 0.6829268292682927
bluex_acc_stderr,all: 0.01069785624296974
bluex_acc_stderr,exam_id__UNICAMP_2018: 0.039296745462938605
bluex_acc_stderr,exam_id__UNICAMP_2019: 0.04014798243504816
bluex_acc_stderr,exam_id__UNICAMP_2020: 0.03888891915912078
bluex_acc_stderr,exam_id__UNICAMP_2021_1: 0.0411282805992433
bluex_acc_stderr,exam_id__UNICAMP_2021_2: 0.04024244267609041
bluex_acc_stderr,exam_id__UNICAMP_2022: 0.04269098796102326
bluex_acc_stderr,exam_id__UNICAMP_2023: 0.041929332285094205
bluex_acc_stderr,exam_id__UNICAMP_2024: 0.04271556020713639
bluex_acc_stderr,exam_id__USP_2018: 0.038960456443585575
bluex_acc_stderr,exam_id__USP_2019: 0.04470992542423865
bluex_acc_stderr,exam_id__USP_2020: 0.03835558472845869
bluex_acc_stderr,exam_id__USP_2021: 0.03851223021094464
bluex_acc_stderr,exam_id__USP_2022: 0.0410194387799713
bluex_acc_stderr,exam_id__USP_2023: 0.04234932088737962
bluex_acc_stderr,exam_id__USP_2024: 0.0418113153523233
bluex_alias: bluex
calame_pt_acc: 0.5905587668593449
calame_pt_acc_stderr: 0.010794891914388602
calame_pt_alias: calame_pt
calame_pt_perplexity: 7.008747913313241
calame_pt_perplexity_stderr: 0.40940358093832135
enem_challenge_acc,all: 0.6871938418474458
enem_challenge_acc,exam_id__2009: 0.6782608695652174
enem_challenge_acc,exam_id__2010: 0.717948717948718
enem_challenge_acc,exam_id__2011: 0.7521367521367521
enem_challenge_acc,exam_id__2012: 0.7068965517241379
enem_challenge_acc,exam_id__2013: 0.6666666666666666
enem_challenge_acc,exam_id__2014: 0.6972477064220184
enem_challenge_acc,exam_id__2015: 0.7058823529411765
enem_challenge_acc,exam_id__2016: 0.6611570247933884
enem_challenge_acc,exam_id__2016_2: 0.6422764227642277
enem_challenge_acc,exam_id__2017: 0.6896551724137931
enem_challenge_acc,exam_id__2022: 0.631578947368421
enem_challenge_acc,exam_id__2023: 0.7037037037037037
enem_challenge_acc_stderr,all: 0.0070891143834158395
enem_challenge_acc_stderr,exam_id__2009: 0.0251403029631727
enem_challenge_acc_stderr,exam_id__2010: 0.02405435432253117
enem_challenge_acc_stderr,exam_id__2011: 0.023038334357693698
enem_challenge_acc_stderr,exam_id__2012: 0.02443296265724745
enem_challenge_acc_stderr,exam_id__2013: 0.02625818903872996
enem_challenge_acc_stderr,exam_id__2014: 0.02543475203567573
enem_challenge_acc_stderr,exam_id__2015: 0.0241011316238719
enem_challenge_acc_stderr,exam_id__2016: 0.02479881231135445
enem_challenge_acc_stderr,exam_id__2016_2: 0.024985945100694615
enem_challenge_acc_stderr,exam_id__2017: 0.024741511708920926
enem_challenge_acc_stderr,exam_id__2022: 0.024112138519174948
enem_challenge_acc_stderr,exam_id__2023: 0.022685440228473772
enem_challenge_alias: enem
faquad_nli_acc,all: 0.7846153846153846
faquad_nli_acc_stderr,all: 0.011396120309131366
faquad_nli_alias: faquad_nli
faquad_nli_f1_macro,all: 0.4396551724137931
faquad_nli_f1_macro_stderr,all: 0.00357969847290883
global_piqa_completions_por_latn_braz_acc: 0.8
global_piqa_completions_por_latn_braz_acc_bytes: 0.77
global_piqa_completions_por_latn_braz_acc_bytes_stderr: 0.042295258468165065
global_piqa_completions_por_latn_braz_acc_norm: 0.77
global_piqa_completions_por_latn_braz_acc_norm_stderr: 0.042295258468165065
global_piqa_completions_por_latn_braz_acc_stderr: 0.04020151261036849
global_piqa_completions_por_latn_braz_alias: global_piqa_completions_por_latn_braz
hatebr_offensive_acc,all: 0.8064285714285714
hatebr_offensive_acc_stderr,all: 0.0074826455677965455
hatebr_offensive_alias: hatebr_offensive_binary
hatebr_offensive_f1_macro,all: 0.801107069296415
hatebr_offensive_f1_macro_stderr,all: 0.007665138669900729
hellaswag_poly_pt_acc: 0.42539820132192
hellaswag_poly_pt_acc_norm: 0.5624661393433742
hellaswag_poly_pt_acc_norm_stderr: 0.005164166461307016
hellaswag_poly_pt_acc_stderr: 0.005146684217488626
hellaswag_poly_pt_alias: hellaswag_poly_pt
lambada_poly_pt_acc: 0.5420143605666602
lambada_poly_pt_acc_stderr: 0.006941341313928234
lambada_poly_pt_alias: lambada_poly_pt
lambada_poly_pt_perplexity: 9.820716308685725
lambada_poly_pt_perplexity_stderr: 0.3120846033602529
mmlu_poly_pt_acc: 0.5403782647853498
mmlu_poly_pt_acc_stderr: 0.004317657624183865
mmlu_poly_pt_alias: mmlu_poly_pt
oab_exams_acc,all: 0.48291571753986334
oab_exams_acc,exam_id__2010-01: 0.4588235294117647
oab_exams_acc,exam_id__2010-02: 0.51
oab_exams_acc,exam_id__2011-03: 0.46464646464646464
oab_exams_acc,exam_id__2011-04: 0.45
oab_exams_acc,exam_id__2011-05: 0.5
oab_exams_acc,exam_id__2012-06: 0.4625
oab_exams_acc,exam_id__2012-06a: 0.525
oab_exams_acc,exam_id__2012-07: 0.5
oab_exams_acc,exam_id__2012-08: 0.4625
oab_exams_acc,exam_id__2012-09: 0.33766233766233766
oab_exams_acc,exam_id__2013-10: 0.525
oab_exams_acc,exam_id__2013-11: 0.525
oab_exams_acc,exam_id__2013-12: 0.525
oab_exams_acc,exam_id__2014-13: 0.475
oab_exams_acc,exam_id__2014-14: 0.5375
oab_exams_acc,exam_id__2014-15: 0.5641025641025641
oab_exams_acc,exam_id__2015-16: 0.5375
oab_exams_acc,exam_id__2015-17: 0.5384615384615384
oab_exams_acc,exam_id__2015-18: 0.4625
oab_exams_acc,exam_id__2016-19: 0.48717948717948717
oab_exams_acc,exam_id__2016-20: 0.45
oab_exams_acc,exam_id__2016-20a: 0.425
oab_exams_acc,exam_id__2016-21: 0.4625
oab_exams_acc,exam_id__2017-22: 0.45
oab_exams_acc,exam_id__2017-23: 0.45
oab_exams_acc,exam_id__2017-24: 0.5
oab_exams_acc,exam_id__2018-25: 0.45
oab_exams_acc_stderr,all: 0.006164493571290463
oab_exams_acc_stderr,exam_id__2010-01: 0.03120711424338333
oab_exams_acc_stderr,exam_id__2010-02: 0.028912621193308535
oab_exams_acc_stderr,exam_id__2011-03: 0.028826912523627856
oab_exams_acc_stderr,exam_id__2011-04: 0.03204801747078995
oab_exams_acc_stderr,exam_id__2011-05: 0.03224202969176272
oab_exams_acc_stderr,exam_id__2012-06: 0.03222923233485234
oab_exams_acc_stderr,exam_id__2012-06a: 0.03229751885191722
oab_exams_acc_stderr,exam_id__2012-07: 0.03239443199904663
oab_exams_acc_stderr,exam_id__2012-08: 0.032144839789965185
oab_exams_acc_stderr,exam_id__2012-09: 0.03103244684042299
oab_exams_acc_stderr,exam_id__2013-10: 0.032222242709920586
oab_exams_acc_stderr,exam_id__2013-11: 0.032249698626176736
oab_exams_acc_stderr,exam_id__2013-12: 0.03225675063294939
oab_exams_acc_stderr,exam_id__2014-13: 0.03217856982922958
oab_exams_acc_stderr,exam_id__2014-14: 0.032246622088818386
oab_exams_acc_stderr,exam_id__2014-15: 0.032435167155658584
oab_exams_acc_stderr,exam_id__2015-16: 0.03223354880595777
oab_exams_acc_stderr,exam_id__2015-17: 0.032573794785528166
oab_exams_acc_stderr,exam_id__2015-18: 0.032123574402475284
oab_exams_acc_stderr,exam_id__2016-19: 0.03271170717682627
oab_exams_acc_stderr,exam_id__2016-20: 0.03203769414642788
oab_exams_acc_stderr,exam_id__2016-20a: 0.031951776527517205
oab_exams_acc_stderr,exam_id__2016-21: 0.03217984644292296
oab_exams_acc_stderr,exam_id__2017-22: 0.03205629372165545
oab_exams_acc_stderr,exam_id__2017-23: 0.03221345216992268
oab_exams_acc_stderr,exam_id__2017-24: 0.03232207361521986
oab_exams_acc_stderr,exam_id__2018-25: 0.03198727711742204
oab_exams_alias: oab_exams
portuguese_hate_speech_acc,all: 0.7297297297297297
portuguese_hate_speech_acc_stderr,all: 0.010749375621571917
portuguese_hate_speech_alias: portuguese_hate_speech_binary
portuguese_hate_speech_f1_macro,all: 0.679463244638342
portuguese_hate_speech_f1_macro_stderr,all: 0.01220967447481398
tweetsentbr_acc,all: 0.7014925373134329
tweetsentbr_acc_stderr,all: 0.007246042251471291
tweetsentbr_alias: tweetsentbr
tweetsentbr_f1_macro,all: 0.6540958473356445
tweetsentbr_f1_macro_stderr,all: 0.007812938746547184
step: 100000