Files
Qwen2.5-7B-Open-R1-GRPO-mat…/trainer_state.json
ModelHub XC 9f04346d79 初始化项目,由ModelHub XC社区提供模型
Model: Lansechen/Qwen2.5-7B-Open-R1-GRPO-math-lighteval-1epochstop-withformat
Source: Original Platform
2026-06-07 22:56:24 +08:00

967 lines
30 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9850746268656716,
"eval_steps": 100,
"global_step": 66,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio": 0.0,
"completion_length": 468.4821586608887,
"epoch": 0.014925373134328358,
"grad_norm": 0.6393710374832153,
"learning_rate": 1.4285714285714285e-07,
"loss": 0.0029,
"num_tokens": 546936.0,
"reward": 0.27120537124574184,
"reward_std": 0.39265505224466324,
"rewards/accuracy_reward": 0.20647320989519358,
"rewards/format_reward": 0.06473214365541935,
"step": 1
},
{
"clip_ratio": 0.0,
"completion_length": 471.2355079650879,
"epoch": 0.029850746268656716,
"grad_norm": 0.4749118387699127,
"learning_rate": 2.857142857142857e-07,
"loss": 0.0029,
"num_tokens": 1100635.0,
"reward": 0.29352679662406445,
"reward_std": 0.3840556889772415,
"rewards/accuracy_reward": 0.22767856996506453,
"rewards/format_reward": 0.06584821548312902,
"step": 2
},
{
"clip_ratio": 0.0,
"completion_length": 492.3962287902832,
"epoch": 0.04477611940298507,
"grad_norm": 0.5371558666229248,
"learning_rate": 4.285714285714285e-07,
"loss": -0.002,
"num_tokens": 1688726.0,
"reward": 0.2566964402794838,
"reward_std": 0.37864362075924873,
"rewards/accuracy_reward": 0.19866071455180645,
"rewards/format_reward": 0.05803571594879031,
"step": 3
},
{
"clip_ratio": 0.0,
"completion_length": 545.9107437133789,
"epoch": 0.05970149253731343,
"grad_norm": 0.3668884336948395,
"learning_rate": 5.714285714285714e-07,
"loss": 0.0224,
"num_tokens": 2307374.0,
"reward": 0.2321428693830967,
"reward_std": 0.31682609394192696,
"rewards/accuracy_reward": 0.16964286006987095,
"rewards/format_reward": 0.06250000069849193,
"step": 4
},
{
"clip_ratio": 0.0,
"completion_length": 497.13953399658203,
"epoch": 0.07462686567164178,
"grad_norm": 0.47015145421028137,
"learning_rate": 7.142857142857143e-07,
"loss": 0.0286,
"num_tokens": 2890315.0,
"reward": 0.2924107275903225,
"reward_std": 0.3944641724228859,
"rewards/accuracy_reward": 0.20424106856808066,
"rewards/format_reward": 0.08816964318975806,
"step": 5
},
{
"clip_ratio": 0.0,
"completion_length": 456.66073989868164,
"epoch": 0.08955223880597014,
"grad_norm": 0.6775171160697937,
"learning_rate": 8.57142857142857e-07,
"loss": 0.0111,
"num_tokens": 3423971.0,
"reward": 0.3270089440047741,
"reward_std": 0.4205157272517681,
"rewards/accuracy_reward": 0.22544642724096775,
"rewards/format_reward": 0.10156249976716936,
"step": 6
},
{
"clip_ratio": 0.0,
"completion_length": 481.45315170288086,
"epoch": 0.1044776119402985,
"grad_norm": 0.4945193827152252,
"learning_rate": 1e-06,
"loss": 0.0147,
"num_tokens": 3984401.0,
"reward": 0.3616071566939354,
"reward_std": 0.4703046642243862,
"rewards/accuracy_reward": 0.1919642873108387,
"rewards/format_reward": 0.1696428582072258,
"step": 7
},
{
"clip_ratio": 0.0,
"completion_length": 510.45984268188477,
"epoch": 0.11940298507462686,
"grad_norm": 5.969944477081299,
"learning_rate": 9.99314767377287e-07,
"loss": 0.0298,
"num_tokens": 4559421.0,
"reward": 0.4609375223517418,
"reward_std": 0.5105233080685139,
"rewards/accuracy_reward": 0.253348208963871,
"rewards/format_reward": 0.2075892835855484,
"step": 8
},
{
"clip_ratio": 0.0,
"completion_length": 481.9788246154785,
"epoch": 0.13432835820895522,
"grad_norm": 25.794984817504883,
"learning_rate": 9.972609476841365e-07,
"loss": 0.0144,
"num_tokens": 5116610.0,
"reward": 0.5703125260770321,
"reward_std": 0.5514599978923798,
"rewards/accuracy_reward": 0.23772321827709675,
"rewards/format_reward": 0.3325892873108387,
"step": 9
},
{
"clip_ratio": 0.0,
"completion_length": 429.76899337768555,
"epoch": 0.14925373134328357,
"grad_norm": 0.5547317862510681,
"learning_rate": 9.938441702975689e-07,
"loss": 0.0116,
"num_tokens": 5626323.0,
"reward": 0.7957589626312256,
"reward_std": 0.5967141911387444,
"rewards/accuracy_reward": 0.2444196417927742,
"rewards/format_reward": 0.551339291036129,
"step": 10
},
{
"clip_ratio": 0.0,
"completion_length": 492.5379638671875,
"epoch": 0.16417910447761194,
"grad_norm": 0.47283390164375305,
"learning_rate": 9.890738003669027e-07,
"loss": 0.0151,
"num_tokens": 6197693.0,
"reward": 0.848214328289032,
"reward_std": 0.5331848785281181,
"rewards/accuracy_reward": 0.2321428544819355,
"rewards/format_reward": 0.6160714328289032,
"step": 11
},
{
"clip_ratio": 0.0,
"completion_length": 482.45203018188477,
"epoch": 0.1791044776119403,
"grad_norm": 0.5417584180831909,
"learning_rate": 9.82962913144534e-07,
"loss": 0.0703,
"num_tokens": 6760050.0,
"reward": 0.9642857611179352,
"reward_std": 0.5401003882288933,
"rewards/accuracy_reward": 0.24665178172290325,
"rewards/format_reward": 0.7176339253783226,
"step": 12
},
{
"clip_ratio": 0.0,
"completion_length": 491.64622497558594,
"epoch": 0.19402985074626866,
"grad_norm": 1.8367348909378052,
"learning_rate": 9.755282581475767e-07,
"loss": 0.0145,
"num_tokens": 7323749.0,
"reward": 1.0502232685685158,
"reward_std": 0.5132231153547764,
"rewards/accuracy_reward": 0.2834821417927742,
"rewards/format_reward": 0.7667410746216774,
"step": 13
},
{
"clip_ratio": 0.0,
"completion_length": 450.22099685668945,
"epoch": 0.208955223880597,
"grad_norm": 0.8013429045677185,
"learning_rate": 9.667902132486008e-07,
"loss": 0.0108,
"num_tokens": 7851019.0,
"reward": 1.1729911118745804,
"reward_std": 0.48798326775431633,
"rewards/accuracy_reward": 0.3158482164144516,
"rewards/format_reward": 0.8571428582072258,
"step": 14
},
{
"clip_ratio": 0.0,
"completion_length": 441.9620704650879,
"epoch": 0.22388059701492538,
"grad_norm": 0.37359559535980225,
"learning_rate": 9.567727288213004e-07,
"loss": 0.0302,
"num_tokens": 8361521.0,
"reward": 1.2600446939468384,
"reward_std": 0.44664183259010315,
"rewards/accuracy_reward": 0.3526785708963871,
"rewards/format_reward": 0.9073660746216774,
"step": 15
},
{
"clip_ratio": 0.0,
"completion_length": 511.1897659301758,
"epoch": 0.23880597014925373,
"grad_norm": 0.5735632181167603,
"learning_rate": 9.455032620941839e-07,
"loss": 0.0096,
"num_tokens": 8942915.0,
"reward": 1.3303572088479996,
"reward_std": 0.47631101682782173,
"rewards/accuracy_reward": 0.425223208963871,
"rewards/format_reward": 0.9051339253783226,
"step": 16
},
{
"clip_ratio": 0.0,
"completion_length": 490.0714530944824,
"epoch": 0.2537313432835821,
"grad_norm": 0.5138530731201172,
"learning_rate": 9.330127018922193e-07,
"loss": 0.0355,
"num_tokens": 9531755.0,
"reward": 1.4218750447034836,
"reward_std": 0.4510103240609169,
"rewards/accuracy_reward": 0.4899553582072258,
"rewards/format_reward": 0.9319196492433548,
"step": 17
},
{
"clip_ratio": 0.0,
"completion_length": 508.3884162902832,
"epoch": 0.26865671641791045,
"grad_norm": 3.1237614154815674,
"learning_rate": 9.19335283972712e-07,
"loss": 0.0378,
"num_tokens": 10123455.0,
"reward": 1.428571492433548,
"reward_std": 0.4577597416937351,
"rewards/accuracy_reward": 0.4933035671710968,
"rewards/format_reward": 0.9352678507566452,
"step": 18
},
{
"clip_ratio": 0.0,
"completion_length": 551.1908760070801,
"epoch": 0.2835820895522388,
"grad_norm": 1.3872253894805908,
"learning_rate": 9.045084971874737e-07,
"loss": 0.0408,
"num_tokens": 10742114.0,
"reward": 1.5089286267757416,
"reward_std": 0.452670868486166,
"rewards/accuracy_reward": 0.5837053507566452,
"rewards/format_reward": 0.9252232164144516,
"step": 19
},
{
"clip_ratio": 0.0,
"completion_length": 533.873908996582,
"epoch": 0.29850746268656714,
"grad_norm": 2.3338828086853027,
"learning_rate": 8.885729807284854e-07,
"loss": 0.0508,
"num_tokens": 11347745.0,
"reward": 1.578125074505806,
"reward_std": 0.40557559579610825,
"rewards/accuracy_reward": 0.640625,
"rewards/format_reward": 0.9375000074505806,
"step": 20
},
{
"clip_ratio": 0.0,
"completion_length": 578.6529312133789,
"epoch": 0.31343283582089554,
"grad_norm": 0.34423601627349854,
"learning_rate": 8.71572412738697e-07,
"loss": 0.0341,
"num_tokens": 11990690.0,
"reward": 1.5825893580913544,
"reward_std": 0.37782258354127407,
"rewards/accuracy_reward": 0.6372767984867096,
"rewards/format_reward": 0.9453125,
"step": 21
},
{
"clip_ratio": 0.0,
"completion_length": 512.1685447692871,
"epoch": 0.3283582089552239,
"grad_norm": 0.3256766200065613,
"learning_rate": 8.535533905932737e-07,
"loss": 0.0125,
"num_tokens": 12579057.0,
"reward": 1.6216518580913544,
"reward_std": 0.37416161969304085,
"rewards/accuracy_reward": 0.6540178582072258,
"rewards/format_reward": 0.9676339402794838,
"step": 22
},
{
"clip_ratio": 0.0,
"completion_length": 506.10828018188477,
"epoch": 0.34328358208955223,
"grad_norm": 0.27199289202690125,
"learning_rate": 8.34565303179429e-07,
"loss": 0.054,
"num_tokens": 13163106.0,
"reward": 1.6227679401636124,
"reward_std": 0.27844817750155926,
"rewards/accuracy_reward": 0.671875,
"rewards/format_reward": 0.9508928656578064,
"step": 23
},
{
"clip_ratio": 0.0,
"completion_length": 604.025707244873,
"epoch": 0.3582089552238806,
"grad_norm": 1.3046621084213257,
"learning_rate": 8.146601955249187e-07,
"loss": 0.0376,
"num_tokens": 13840433.0,
"reward": 1.6439733058214188,
"reward_std": 0.28095651790499687,
"rewards/accuracy_reward": 0.6886160597205162,
"rewards/format_reward": 0.9553571566939354,
"step": 24
},
{
"clip_ratio": 0.0,
"completion_length": 550.097110748291,
"epoch": 0.373134328358209,
"grad_norm": 0.45065104961395264,
"learning_rate": 7.938926261462365e-07,
"loss": 0.0341,
"num_tokens": 14448480.0,
"reward": 1.7399554401636124,
"reward_std": 0.24296983890235424,
"rewards/accuracy_reward": 0.7712053507566452,
"rewards/format_reward": 0.9687499925494194,
"step": 25
},
{
"clip_ratio": 0.0,
"completion_length": 614.7064971923828,
"epoch": 0.3880597014925373,
"grad_norm": 2.617610454559326,
"learning_rate": 7.723195175075135e-07,
"loss": 0.05,
"num_tokens": 15136857.0,
"reward": 1.639508992433548,
"reward_std": 0.28644888289272785,
"rewards/accuracy_reward": 0.6819196492433548,
"rewards/format_reward": 0.9575892835855484,
"step": 26
},
{
"clip_ratio": 0.0,
"completion_length": 610.8203430175781,
"epoch": 0.40298507462686567,
"grad_norm": 0.1424814909696579,
"learning_rate": 7.5e-07,
"loss": 0.0141,
"num_tokens": 15810840.0,
"reward": 1.664062574505806,
"reward_std": 0.2185768410563469,
"rewards/accuracy_reward": 0.686383917927742,
"rewards/format_reward": 0.9776785746216774,
"step": 27
},
{
"clip_ratio": 0.0,
"completion_length": 574.0815010070801,
"epoch": 0.417910447761194,
"grad_norm": 0.23577629029750824,
"learning_rate": 7.269952498697734e-07,
"loss": 0.0452,
"num_tokens": 16444609.0,
"reward": 1.6852679401636124,
"reward_std": 0.25254260189831257,
"rewards/accuracy_reward": 0.7120535634458065,
"rewards/format_reward": 0.9732142835855484,
"step": 28
},
{
"clip_ratio": 0.0,
"completion_length": 652.5870895385742,
"epoch": 0.43283582089552236,
"grad_norm": 0.15183551609516144,
"learning_rate": 7.033683215379002e-07,
"loss": 0.0389,
"num_tokens": 17168567.0,
"reward": 1.6551340073347092,
"reward_std": 0.24212115444242954,
"rewards/accuracy_reward": 0.6729910671710968,
"rewards/format_reward": 0.9821428507566452,
"step": 29
},
{
"clip_ratio": 0.0,
"completion_length": 620.8114051818848,
"epoch": 0.44776119402985076,
"grad_norm": 0.13523727655410767,
"learning_rate": 6.7918397477265e-07,
"loss": 0.0611,
"num_tokens": 17847534.0,
"reward": 1.7377232909202576,
"reward_std": 0.21810074150562286,
"rewards/accuracy_reward": 0.7622767835855484,
"rewards/format_reward": 0.975446417927742,
"step": 30
},
{
"clip_ratio": 0.0,
"completion_length": 609.9408836364746,
"epoch": 0.4626865671641791,
"grad_norm": 0.11713448166847229,
"learning_rate": 6.545084971874736e-07,
"loss": 0.0305,
"num_tokens": 18517833.0,
"reward": 1.6886161416769028,
"reward_std": 0.16379249095916748,
"rewards/accuracy_reward": 0.7020089328289032,
"rewards/format_reward": 0.9866071417927742,
"step": 31
},
{
"clip_ratio": 0.0,
"completion_length": 588.9922142028809,
"epoch": 0.47761194029850745,
"grad_norm": 0.19670455157756805,
"learning_rate": 6.294095225512604e-07,
"loss": 0.0329,
"num_tokens": 19174306.0,
"reward": 1.7399554401636124,
"reward_std": 0.19168315595015883,
"rewards/accuracy_reward": 0.7555803507566452,
"rewards/format_reward": 0.984375,
"step": 32
},
{
"clip_ratio": 0.0,
"completion_length": 651.0279312133789,
"epoch": 0.4925373134328358,
"grad_norm": 0.19632503390312195,
"learning_rate": 6.039558454088795e-07,
"loss": 0.0406,
"num_tokens": 19883139.0,
"reward": 1.6774554550647736,
"reward_std": 0.20999335870146751,
"rewards/accuracy_reward": 0.7008928656578064,
"rewards/format_reward": 0.9765624925494194,
"step": 33
},
{
"clip_ratio": 0.0,
"completion_length": 669.9475708007812,
"epoch": 0.5074626865671642,
"grad_norm": 0.1526719331741333,
"learning_rate": 5.782172325201155e-07,
"loss": 0.0386,
"num_tokens": 20613844.0,
"reward": 1.6763393729925156,
"reward_std": 0.20889410376548767,
"rewards/accuracy_reward": 0.6919642873108387,
"rewards/format_reward": 0.984375,
"step": 34
},
{
"clip_ratio": 0.0,
"completion_length": 623.3448867797852,
"epoch": 0.5223880597014925,
"grad_norm": 0.20824865996837616,
"learning_rate": 5.522642316338268e-07,
"loss": 0.0249,
"num_tokens": 21312353.0,
"reward": 1.7287947088479996,
"reward_std": 0.17593990080058575,
"rewards/accuracy_reward": 0.7388392835855484,
"rewards/format_reward": 0.9899553507566452,
"step": 35
},
{
"clip_ratio": 0.0,
"completion_length": 646.650691986084,
"epoch": 0.5373134328358209,
"grad_norm": 0.09576990455389023,
"learning_rate": 5.26167978121472e-07,
"loss": 0.048,
"num_tokens": 22035288.0,
"reward": 1.6785715073347092,
"reward_std": 0.18887418508529663,
"rewards/accuracy_reward": 0.7031250037252903,
"rewards/format_reward": 0.9754464328289032,
"step": 36
},
{
"clip_ratio": 0.0,
"completion_length": 607.474365234375,
"epoch": 0.5522388059701493,
"grad_norm": 0.23552455008029938,
"learning_rate": 5e-07,
"loss": 0.0183,
"num_tokens": 22707009.0,
"reward": 1.7220982909202576,
"reward_std": 0.16549584455788136,
"rewards/accuracy_reward": 0.731026791036129,
"rewards/format_reward": 0.991071417927742,
"step": 37
},
{
"clip_ratio": 0.0,
"completion_length": 629.219898223877,
"epoch": 0.5671641791044776,
"grad_norm": 0.13172753155231476,
"learning_rate": 4.7383202187852804e-07,
"loss": 0.0201,
"num_tokens": 23416574.0,
"reward": 1.7444197237491608,
"reward_std": 0.15772132016718388,
"rewards/accuracy_reward": 0.7566964253783226,
"rewards/format_reward": 0.987723208963871,
"step": 38
},
{
"clip_ratio": 0.0,
"completion_length": 605.5770378112793,
"epoch": 0.582089552238806,
"grad_norm": 0.10828184336423874,
"learning_rate": 4.477357683661733e-07,
"loss": 0.0169,
"num_tokens": 24085835.0,
"reward": 1.681919738650322,
"reward_std": 0.16571981832385063,
"rewards/accuracy_reward": 0.6975446455180645,
"rewards/format_reward": 0.9843749925494194,
"step": 39
},
{
"clip_ratio": 0.0,
"completion_length": 706.116096496582,
"epoch": 0.5970149253731343,
"grad_norm": 0.14090992510318756,
"learning_rate": 4.2178276747988444e-07,
"loss": 0.0255,
"num_tokens": 24842211.0,
"reward": 1.7020090222358704,
"reward_std": 0.19248592853546143,
"rewards/accuracy_reward": 0.7220982238650322,
"rewards/format_reward": 0.979910708963871,
"step": 40
},
{
"clip_ratio": 0.0,
"completion_length": 628.4498100280762,
"epoch": 0.6119402985074627,
"grad_norm": 0.20408028364181519,
"learning_rate": 3.960441545911204e-07,
"loss": 0.0282,
"num_tokens": 25545910.0,
"reward": 1.6808036416769028,
"reward_std": 0.19085692055523396,
"rewards/accuracy_reward": 0.6941964328289032,
"rewards/format_reward": 0.9866071417927742,
"step": 41
},
{
"clip_ratio": 0.0,
"completion_length": 606.4631958007812,
"epoch": 0.6268656716417911,
"grad_norm": 0.3082476258277893,
"learning_rate": 3.7059047744873955e-07,
"loss": 0.0413,
"num_tokens": 26211781.0,
"reward": 1.8024554252624512,
"reward_std": 0.16285591386258602,
"rewards/accuracy_reward": 0.8169642761349678,
"rewards/format_reward": 0.9854910746216774,
"step": 42
},
{
"clip_ratio": 0.0,
"completion_length": 617.654052734375,
"epoch": 0.6417910447761194,
"grad_norm": 0.3439674377441406,
"learning_rate": 3.454915028125263e-07,
"loss": 0.031,
"num_tokens": 26900815.0,
"reward": 1.7377232909202576,
"reward_std": 0.16336329095065594,
"rewards/accuracy_reward": 0.7455357164144516,
"rewards/format_reward": 0.9921874925494194,
"step": 43
},
{
"clip_ratio": 0.0,
"completion_length": 597.3973541259766,
"epoch": 0.6567164179104478,
"grad_norm": 0.2667296826839447,
"learning_rate": 3.2081602522734985e-07,
"loss": 0.0359,
"num_tokens": 27562995.0,
"reward": 1.7243304550647736,
"reward_std": 0.17545861564576626,
"rewards/accuracy_reward": 0.7421874925494194,
"rewards/format_reward": 0.9821428433060646,
"step": 44
},
{
"clip_ratio": 0.0,
"completion_length": 590.3047065734863,
"epoch": 0.6716417910447762,
"grad_norm": 0.12688565254211426,
"learning_rate": 2.9663167846209996e-07,
"loss": 0.0193,
"num_tokens": 28218116.0,
"reward": 1.7667411416769028,
"reward_std": 0.1432758029550314,
"rewards/accuracy_reward": 0.7700892761349678,
"rewards/format_reward": 0.9966517761349678,
"step": 45
},
{
"clip_ratio": 0.0,
"completion_length": 694.7522659301758,
"epoch": 0.6865671641791045,
"grad_norm": 0.13461622595787048,
"learning_rate": 2.730047501302266e-07,
"loss": 0.0493,
"num_tokens": 28974406.0,
"reward": 1.6484375894069672,
"reward_std": 0.21462283097207546,
"rewards/accuracy_reward": 0.6729910671710968,
"rewards/format_reward": 0.9754464253783226,
"step": 46
},
{
"clip_ratio": 0.0,
"completion_length": 662.5826187133789,
"epoch": 0.7014925373134329,
"grad_norm": 0.10844791680574417,
"learning_rate": 2.500000000000001e-07,
"loss": 0.0217,
"num_tokens": 29684136.0,
"reward": 1.733258992433548,
"reward_std": 0.16403476987034082,
"rewards/accuracy_reward": 0.7511160746216774,
"rewards/format_reward": 0.9821428507566452,
"step": 47
},
{
"clip_ratio": 0.0,
"completion_length": 654.8058242797852,
"epoch": 0.7164179104477612,
"grad_norm": 0.269379585981369,
"learning_rate": 2.2768048249248644e-07,
"loss": 0.0356,
"num_tokens": 30397658.0,
"reward": 1.7533482909202576,
"reward_std": 0.18037123046815395,
"rewards/accuracy_reward": 0.7656250074505806,
"rewards/format_reward": 0.987723208963871,
"step": 48
},
{
"clip_ratio": 0.0,
"completion_length": 662.7210159301758,
"epoch": 0.7313432835820896,
"grad_norm": 0.1654261350631714,
"learning_rate": 2.0610737385376348e-07,
"loss": 0.0374,
"num_tokens": 31127856.0,
"reward": 1.7254465073347092,
"reward_std": 0.1822904385626316,
"rewards/accuracy_reward": 0.7332589328289032,
"rewards/format_reward": 0.9921875,
"step": 49
},
{
"clip_ratio": 0.0,
"completion_length": 608.631721496582,
"epoch": 0.746268656716418,
"grad_norm": 0.1254522204399109,
"learning_rate": 1.8533980447508135e-07,
"loss": 0.0104,
"num_tokens": 31811342.0,
"reward": 1.6908482909202576,
"reward_std": 0.16885741148144007,
"rewards/accuracy_reward": 0.6975446380674839,
"rewards/format_reward": 0.9933035671710968,
"step": 50
},
{
"clip_ratio": 0.0,
"completion_length": 637.1618576049805,
"epoch": 0.7611940298507462,
"grad_norm": 0.13198821246623993,
"learning_rate": 1.6543469682057104e-07,
"loss": 0.0518,
"num_tokens": 32520023.0,
"reward": 1.7220982909202576,
"reward_std": 0.20328251458704472,
"rewards/accuracy_reward": 0.7388392835855484,
"rewards/format_reward": 0.9832589253783226,
"step": 51
},
{
"clip_ratio": 0.0,
"completion_length": 639.1216850280762,
"epoch": 0.7761194029850746,
"grad_norm": 0.14550577104091644,
"learning_rate": 1.4644660940672627e-07,
"loss": 0.0437,
"num_tokens": 33221324.0,
"reward": 1.6729911267757416,
"reward_std": 0.18635992892086506,
"rewards/accuracy_reward": 0.6941964365541935,
"rewards/format_reward": 0.9787946343421936,
"step": 52
},
{
"clip_ratio": 0.0,
"completion_length": 630.694221496582,
"epoch": 0.7910447761194029,
"grad_norm": 0.14397642016410828,
"learning_rate": 1.284275872613028e-07,
"loss": 0.0351,
"num_tokens": 33914098.0,
"reward": 1.6830357909202576,
"reward_std": 0.20892403088510036,
"rewards/accuracy_reward": 0.6886160746216774,
"rewards/format_reward": 0.9944196343421936,
"step": 53
},
{
"clip_ratio": 0.0,
"completion_length": 606.3326225280762,
"epoch": 0.8059701492537313,
"grad_norm": 0.1218072697520256,
"learning_rate": 1.1142701927151454e-07,
"loss": 0.0235,
"num_tokens": 34592204.0,
"reward": 1.7310268729925156,
"reward_std": 0.1702114064246416,
"rewards/accuracy_reward": 0.7343750074505806,
"rewards/format_reward": 0.9966517761349678,
"step": 54
},
{
"clip_ratio": 0.0,
"completion_length": 601.4486846923828,
"epoch": 0.8208955223880597,
"grad_norm": 0.14515279233455658,
"learning_rate": 9.549150281252632e-08,
"loss": 0.0206,
"num_tokens": 35270222.0,
"reward": 1.7477679252624512,
"reward_std": 0.1629035547375679,
"rewards/accuracy_reward": 0.7522321417927742,
"rewards/format_reward": 0.9955357015132904,
"step": 55
},
{
"clip_ratio": 0.0,
"completion_length": 633.4152069091797,
"epoch": 0.835820895522388,
"grad_norm": 0.16478443145751953,
"learning_rate": 8.066471602728803e-08,
"loss": 0.0416,
"num_tokens": 35968818.0,
"reward": 1.6875000596046448,
"reward_std": 0.18482761643826962,
"rewards/accuracy_reward": 0.6986607164144516,
"rewards/format_reward": 0.9888392686843872,
"step": 56
},
{
"clip_ratio": 0.0,
"completion_length": 603.7109565734863,
"epoch": 0.8507462686567164,
"grad_norm": 0.14361166954040527,
"learning_rate": 6.698729810778064e-08,
"loss": 0.0288,
"num_tokens": 36632743.0,
"reward": 1.7745536714792252,
"reward_std": 0.17002357356250286,
"rewards/accuracy_reward": 0.7890625074505806,
"rewards/format_reward": 0.9854910597205162,
"step": 57
},
{
"clip_ratio": 0.0,
"completion_length": 628.2723541259766,
"epoch": 0.8656716417910447,
"grad_norm": 0.10697366297245026,
"learning_rate": 5.44967379058161e-08,
"loss": 0.0286,
"num_tokens": 37321643.0,
"reward": 1.70870541036129,
"reward_std": 0.16991718113422394,
"rewards/accuracy_reward": 0.7198660746216774,
"rewards/format_reward": 0.9888392761349678,
"step": 58
},
{
"clip_ratio": 0.0,
"completion_length": 631.0524940490723,
"epoch": 0.8805970149253731,
"grad_norm": 0.11336029320955276,
"learning_rate": 4.322727117869951e-08,
"loss": 0.0218,
"num_tokens": 38012474.0,
"reward": 1.7656251043081284,
"reward_std": 0.19230836629867554,
"rewards/accuracy_reward": 0.777901791036129,
"rewards/format_reward": 0.9877232164144516,
"step": 59
},
{
"clip_ratio": 0.0,
"completion_length": 600.5245742797852,
"epoch": 0.8955223880597015,
"grad_norm": 0.19460614025592804,
"learning_rate": 3.3209786751399184e-08,
"loss": 0.025,
"num_tokens": 38685576.0,
"reward": 1.73214291036129,
"reward_std": 0.1749271210283041,
"rewards/accuracy_reward": 0.7399553507566452,
"rewards/format_reward": 0.9921874925494194,
"step": 60
},
{
"clip_ratio": 0.0,
"completion_length": 623.6350631713867,
"epoch": 0.9104477611940298,
"grad_norm": 0.2211906909942627,
"learning_rate": 2.4471741852423233e-08,
"loss": 0.0398,
"num_tokens": 39386529.0,
"reward": 1.6808036267757416,
"reward_std": 0.18810790218412876,
"rewards/accuracy_reward": 0.699776791036129,
"rewards/format_reward": 0.981026791036129,
"step": 61
},
{
"clip_ratio": 0.0,
"completion_length": 615.8917694091797,
"epoch": 0.9253731343283582,
"grad_norm": 0.21188978850841522,
"learning_rate": 1.7037086855465898e-08,
"loss": 0.0261,
"num_tokens": 40063784.0,
"reward": 1.7633929401636124,
"reward_std": 0.162169449031353,
"rewards/accuracy_reward": 0.7745535783469677,
"rewards/format_reward": 0.988839291036129,
"step": 62
},
{
"clip_ratio": 0.0,
"completion_length": 665.5602951049805,
"epoch": 0.9402985074626866,
"grad_norm": 0.10184086114168167,
"learning_rate": 1.0926199633097154e-08,
"loss": 0.0423,
"num_tokens": 40789638.0,
"reward": 1.727678656578064,
"reward_std": 0.21804202906787395,
"rewards/accuracy_reward": 0.7488839328289032,
"rewards/format_reward": 0.9787946417927742,
"step": 63
},
{
"clip_ratio": 0.0,
"completion_length": 579.4073905944824,
"epoch": 0.9552238805970149,
"grad_norm": 0.17883718013763428,
"learning_rate": 6.15582970243117e-09,
"loss": 0.0162,
"num_tokens": 41445467.0,
"reward": 1.742187574505806,
"reward_std": 0.1791787538677454,
"rewards/accuracy_reward": 0.7500000074505806,
"rewards/format_reward": 0.9921874925494194,
"step": 64
},
{
"clip_ratio": 0.0,
"completion_length": 600.4989166259766,
"epoch": 0.9701492537313433,
"grad_norm": 0.09666120260953903,
"learning_rate": 2.739052315863355e-09,
"loss": 0.0247,
"num_tokens": 42116706.0,
"reward": 1.7354911714792252,
"reward_std": 0.15130825340747833,
"rewards/accuracy_reward": 0.7421875,
"rewards/format_reward": 0.9933035671710968,
"step": 65
},
{
"clip_ratio": 0.0,
"completion_length": 660.8865814208984,
"epoch": 0.9850746268656716,
"grad_norm": 1.0719019174575806,
"learning_rate": 6.852326227130833e-10,
"loss": 0.0373,
"num_tokens": 42832111.0,
"reward": 1.695312574505806,
"reward_std": 0.21052085421979427,
"rewards/accuracy_reward": 0.7198660597205162,
"rewards/format_reward": 0.9754464253783226,
"step": 66
},
{
"epoch": 0.9850746268656716,
"step": 66,
"total_flos": 0.0,
"train_loss": 0.02949339959234225,
"train_runtime": 14062.8623,
"train_samples_per_second": 0.533,
"train_steps_per_second": 0.005
}
],
"logging_steps": 1,
"max_steps": 67,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}