967 lines
30 KiB
JSON
967 lines
30 KiB
JSON
|
|
{
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 0.9850746268656716,
|
||
|
|
"eval_steps": 100,
|
||
|
|
"global_step": 66,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 468.4821586608887,
|
||
|
|
"epoch": 0.014925373134328358,
|
||
|
|
"grad_norm": 0.6393710374832153,
|
||
|
|
"learning_rate": 1.4285714285714285e-07,
|
||
|
|
"loss": 0.0029,
|
||
|
|
"num_tokens": 546936.0,
|
||
|
|
"reward": 0.27120537124574184,
|
||
|
|
"reward_std": 0.39265505224466324,
|
||
|
|
"rewards/accuracy_reward": 0.20647320989519358,
|
||
|
|
"rewards/format_reward": 0.06473214365541935,
|
||
|
|
"step": 1
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 471.2355079650879,
|
||
|
|
"epoch": 0.029850746268656716,
|
||
|
|
"grad_norm": 0.4749118387699127,
|
||
|
|
"learning_rate": 2.857142857142857e-07,
|
||
|
|
"loss": 0.0029,
|
||
|
|
"num_tokens": 1100635.0,
|
||
|
|
"reward": 0.29352679662406445,
|
||
|
|
"reward_std": 0.3840556889772415,
|
||
|
|
"rewards/accuracy_reward": 0.22767856996506453,
|
||
|
|
"rewards/format_reward": 0.06584821548312902,
|
||
|
|
"step": 2
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 492.3962287902832,
|
||
|
|
"epoch": 0.04477611940298507,
|
||
|
|
"grad_norm": 0.5371558666229248,
|
||
|
|
"learning_rate": 4.285714285714285e-07,
|
||
|
|
"loss": -0.002,
|
||
|
|
"num_tokens": 1688726.0,
|
||
|
|
"reward": 0.2566964402794838,
|
||
|
|
"reward_std": 0.37864362075924873,
|
||
|
|
"rewards/accuracy_reward": 0.19866071455180645,
|
||
|
|
"rewards/format_reward": 0.05803571594879031,
|
||
|
|
"step": 3
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 545.9107437133789,
|
||
|
|
"epoch": 0.05970149253731343,
|
||
|
|
"grad_norm": 0.3668884336948395,
|
||
|
|
"learning_rate": 5.714285714285714e-07,
|
||
|
|
"loss": 0.0224,
|
||
|
|
"num_tokens": 2307374.0,
|
||
|
|
"reward": 0.2321428693830967,
|
||
|
|
"reward_std": 0.31682609394192696,
|
||
|
|
"rewards/accuracy_reward": 0.16964286006987095,
|
||
|
|
"rewards/format_reward": 0.06250000069849193,
|
||
|
|
"step": 4
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 497.13953399658203,
|
||
|
|
"epoch": 0.07462686567164178,
|
||
|
|
"grad_norm": 0.47015145421028137,
|
||
|
|
"learning_rate": 7.142857142857143e-07,
|
||
|
|
"loss": 0.0286,
|
||
|
|
"num_tokens": 2890315.0,
|
||
|
|
"reward": 0.2924107275903225,
|
||
|
|
"reward_std": 0.3944641724228859,
|
||
|
|
"rewards/accuracy_reward": 0.20424106856808066,
|
||
|
|
"rewards/format_reward": 0.08816964318975806,
|
||
|
|
"step": 5
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 456.66073989868164,
|
||
|
|
"epoch": 0.08955223880597014,
|
||
|
|
"grad_norm": 0.6775171160697937,
|
||
|
|
"learning_rate": 8.57142857142857e-07,
|
||
|
|
"loss": 0.0111,
|
||
|
|
"num_tokens": 3423971.0,
|
||
|
|
"reward": 0.3270089440047741,
|
||
|
|
"reward_std": 0.4205157272517681,
|
||
|
|
"rewards/accuracy_reward": 0.22544642724096775,
|
||
|
|
"rewards/format_reward": 0.10156249976716936,
|
||
|
|
"step": 6
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 481.45315170288086,
|
||
|
|
"epoch": 0.1044776119402985,
|
||
|
|
"grad_norm": 0.4945193827152252,
|
||
|
|
"learning_rate": 1e-06,
|
||
|
|
"loss": 0.0147,
|
||
|
|
"num_tokens": 3984401.0,
|
||
|
|
"reward": 0.3616071566939354,
|
||
|
|
"reward_std": 0.4703046642243862,
|
||
|
|
"rewards/accuracy_reward": 0.1919642873108387,
|
||
|
|
"rewards/format_reward": 0.1696428582072258,
|
||
|
|
"step": 7
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 510.45984268188477,
|
||
|
|
"epoch": 0.11940298507462686,
|
||
|
|
"grad_norm": 5.969944477081299,
|
||
|
|
"learning_rate": 9.99314767377287e-07,
|
||
|
|
"loss": 0.0298,
|
||
|
|
"num_tokens": 4559421.0,
|
||
|
|
"reward": 0.4609375223517418,
|
||
|
|
"reward_std": 0.5105233080685139,
|
||
|
|
"rewards/accuracy_reward": 0.253348208963871,
|
||
|
|
"rewards/format_reward": 0.2075892835855484,
|
||
|
|
"step": 8
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 481.9788246154785,
|
||
|
|
"epoch": 0.13432835820895522,
|
||
|
|
"grad_norm": 25.794984817504883,
|
||
|
|
"learning_rate": 9.972609476841365e-07,
|
||
|
|
"loss": 0.0144,
|
||
|
|
"num_tokens": 5116610.0,
|
||
|
|
"reward": 0.5703125260770321,
|
||
|
|
"reward_std": 0.5514599978923798,
|
||
|
|
"rewards/accuracy_reward": 0.23772321827709675,
|
||
|
|
"rewards/format_reward": 0.3325892873108387,
|
||
|
|
"step": 9
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 429.76899337768555,
|
||
|
|
"epoch": 0.14925373134328357,
|
||
|
|
"grad_norm": 0.5547317862510681,
|
||
|
|
"learning_rate": 9.938441702975689e-07,
|
||
|
|
"loss": 0.0116,
|
||
|
|
"num_tokens": 5626323.0,
|
||
|
|
"reward": 0.7957589626312256,
|
||
|
|
"reward_std": 0.5967141911387444,
|
||
|
|
"rewards/accuracy_reward": 0.2444196417927742,
|
||
|
|
"rewards/format_reward": 0.551339291036129,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 492.5379638671875,
|
||
|
|
"epoch": 0.16417910447761194,
|
||
|
|
"grad_norm": 0.47283390164375305,
|
||
|
|
"learning_rate": 9.890738003669027e-07,
|
||
|
|
"loss": 0.0151,
|
||
|
|
"num_tokens": 6197693.0,
|
||
|
|
"reward": 0.848214328289032,
|
||
|
|
"reward_std": 0.5331848785281181,
|
||
|
|
"rewards/accuracy_reward": 0.2321428544819355,
|
||
|
|
"rewards/format_reward": 0.6160714328289032,
|
||
|
|
"step": 11
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 482.45203018188477,
|
||
|
|
"epoch": 0.1791044776119403,
|
||
|
|
"grad_norm": 0.5417584180831909,
|
||
|
|
"learning_rate": 9.82962913144534e-07,
|
||
|
|
"loss": 0.0703,
|
||
|
|
"num_tokens": 6760050.0,
|
||
|
|
"reward": 0.9642857611179352,
|
||
|
|
"reward_std": 0.5401003882288933,
|
||
|
|
"rewards/accuracy_reward": 0.24665178172290325,
|
||
|
|
"rewards/format_reward": 0.7176339253783226,
|
||
|
|
"step": 12
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 491.64622497558594,
|
||
|
|
"epoch": 0.19402985074626866,
|
||
|
|
"grad_norm": 1.8367348909378052,
|
||
|
|
"learning_rate": 9.755282581475767e-07,
|
||
|
|
"loss": 0.0145,
|
||
|
|
"num_tokens": 7323749.0,
|
||
|
|
"reward": 1.0502232685685158,
|
||
|
|
"reward_std": 0.5132231153547764,
|
||
|
|
"rewards/accuracy_reward": 0.2834821417927742,
|
||
|
|
"rewards/format_reward": 0.7667410746216774,
|
||
|
|
"step": 13
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 450.22099685668945,
|
||
|
|
"epoch": 0.208955223880597,
|
||
|
|
"grad_norm": 0.8013429045677185,
|
||
|
|
"learning_rate": 9.667902132486008e-07,
|
||
|
|
"loss": 0.0108,
|
||
|
|
"num_tokens": 7851019.0,
|
||
|
|
"reward": 1.1729911118745804,
|
||
|
|
"reward_std": 0.48798326775431633,
|
||
|
|
"rewards/accuracy_reward": 0.3158482164144516,
|
||
|
|
"rewards/format_reward": 0.8571428582072258,
|
||
|
|
"step": 14
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 441.9620704650879,
|
||
|
|
"epoch": 0.22388059701492538,
|
||
|
|
"grad_norm": 0.37359559535980225,
|
||
|
|
"learning_rate": 9.567727288213004e-07,
|
||
|
|
"loss": 0.0302,
|
||
|
|
"num_tokens": 8361521.0,
|
||
|
|
"reward": 1.2600446939468384,
|
||
|
|
"reward_std": 0.44664183259010315,
|
||
|
|
"rewards/accuracy_reward": 0.3526785708963871,
|
||
|
|
"rewards/format_reward": 0.9073660746216774,
|
||
|
|
"step": 15
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 511.1897659301758,
|
||
|
|
"epoch": 0.23880597014925373,
|
||
|
|
"grad_norm": 0.5735632181167603,
|
||
|
|
"learning_rate": 9.455032620941839e-07,
|
||
|
|
"loss": 0.0096,
|
||
|
|
"num_tokens": 8942915.0,
|
||
|
|
"reward": 1.3303572088479996,
|
||
|
|
"reward_std": 0.47631101682782173,
|
||
|
|
"rewards/accuracy_reward": 0.425223208963871,
|
||
|
|
"rewards/format_reward": 0.9051339253783226,
|
||
|
|
"step": 16
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 490.0714530944824,
|
||
|
|
"epoch": 0.2537313432835821,
|
||
|
|
"grad_norm": 0.5138530731201172,
|
||
|
|
"learning_rate": 9.330127018922193e-07,
|
||
|
|
"loss": 0.0355,
|
||
|
|
"num_tokens": 9531755.0,
|
||
|
|
"reward": 1.4218750447034836,
|
||
|
|
"reward_std": 0.4510103240609169,
|
||
|
|
"rewards/accuracy_reward": 0.4899553582072258,
|
||
|
|
"rewards/format_reward": 0.9319196492433548,
|
||
|
|
"step": 17
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 508.3884162902832,
|
||
|
|
"epoch": 0.26865671641791045,
|
||
|
|
"grad_norm": 3.1237614154815674,
|
||
|
|
"learning_rate": 9.19335283972712e-07,
|
||
|
|
"loss": 0.0378,
|
||
|
|
"num_tokens": 10123455.0,
|
||
|
|
"reward": 1.428571492433548,
|
||
|
|
"reward_std": 0.4577597416937351,
|
||
|
|
"rewards/accuracy_reward": 0.4933035671710968,
|
||
|
|
"rewards/format_reward": 0.9352678507566452,
|
||
|
|
"step": 18
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 551.1908760070801,
|
||
|
|
"epoch": 0.2835820895522388,
|
||
|
|
"grad_norm": 1.3872253894805908,
|
||
|
|
"learning_rate": 9.045084971874737e-07,
|
||
|
|
"loss": 0.0408,
|
||
|
|
"num_tokens": 10742114.0,
|
||
|
|
"reward": 1.5089286267757416,
|
||
|
|
"reward_std": 0.452670868486166,
|
||
|
|
"rewards/accuracy_reward": 0.5837053507566452,
|
||
|
|
"rewards/format_reward": 0.9252232164144516,
|
||
|
|
"step": 19
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 533.873908996582,
|
||
|
|
"epoch": 0.29850746268656714,
|
||
|
|
"grad_norm": 2.3338828086853027,
|
||
|
|
"learning_rate": 8.885729807284854e-07,
|
||
|
|
"loss": 0.0508,
|
||
|
|
"num_tokens": 11347745.0,
|
||
|
|
"reward": 1.578125074505806,
|
||
|
|
"reward_std": 0.40557559579610825,
|
||
|
|
"rewards/accuracy_reward": 0.640625,
|
||
|
|
"rewards/format_reward": 0.9375000074505806,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 578.6529312133789,
|
||
|
|
"epoch": 0.31343283582089554,
|
||
|
|
"grad_norm": 0.34423601627349854,
|
||
|
|
"learning_rate": 8.71572412738697e-07,
|
||
|
|
"loss": 0.0341,
|
||
|
|
"num_tokens": 11990690.0,
|
||
|
|
"reward": 1.5825893580913544,
|
||
|
|
"reward_std": 0.37782258354127407,
|
||
|
|
"rewards/accuracy_reward": 0.6372767984867096,
|
||
|
|
"rewards/format_reward": 0.9453125,
|
||
|
|
"step": 21
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 512.1685447692871,
|
||
|
|
"epoch": 0.3283582089552239,
|
||
|
|
"grad_norm": 0.3256766200065613,
|
||
|
|
"learning_rate": 8.535533905932737e-07,
|
||
|
|
"loss": 0.0125,
|
||
|
|
"num_tokens": 12579057.0,
|
||
|
|
"reward": 1.6216518580913544,
|
||
|
|
"reward_std": 0.37416161969304085,
|
||
|
|
"rewards/accuracy_reward": 0.6540178582072258,
|
||
|
|
"rewards/format_reward": 0.9676339402794838,
|
||
|
|
"step": 22
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 506.10828018188477,
|
||
|
|
"epoch": 0.34328358208955223,
|
||
|
|
"grad_norm": 0.27199289202690125,
|
||
|
|
"learning_rate": 8.34565303179429e-07,
|
||
|
|
"loss": 0.054,
|
||
|
|
"num_tokens": 13163106.0,
|
||
|
|
"reward": 1.6227679401636124,
|
||
|
|
"reward_std": 0.27844817750155926,
|
||
|
|
"rewards/accuracy_reward": 0.671875,
|
||
|
|
"rewards/format_reward": 0.9508928656578064,
|
||
|
|
"step": 23
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 604.025707244873,
|
||
|
|
"epoch": 0.3582089552238806,
|
||
|
|
"grad_norm": 1.3046621084213257,
|
||
|
|
"learning_rate": 8.146601955249187e-07,
|
||
|
|
"loss": 0.0376,
|
||
|
|
"num_tokens": 13840433.0,
|
||
|
|
"reward": 1.6439733058214188,
|
||
|
|
"reward_std": 0.28095651790499687,
|
||
|
|
"rewards/accuracy_reward": 0.6886160597205162,
|
||
|
|
"rewards/format_reward": 0.9553571566939354,
|
||
|
|
"step": 24
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 550.097110748291,
|
||
|
|
"epoch": 0.373134328358209,
|
||
|
|
"grad_norm": 0.45065104961395264,
|
||
|
|
"learning_rate": 7.938926261462365e-07,
|
||
|
|
"loss": 0.0341,
|
||
|
|
"num_tokens": 14448480.0,
|
||
|
|
"reward": 1.7399554401636124,
|
||
|
|
"reward_std": 0.24296983890235424,
|
||
|
|
"rewards/accuracy_reward": 0.7712053507566452,
|
||
|
|
"rewards/format_reward": 0.9687499925494194,
|
||
|
|
"step": 25
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 614.7064971923828,
|
||
|
|
"epoch": 0.3880597014925373,
|
||
|
|
"grad_norm": 2.617610454559326,
|
||
|
|
"learning_rate": 7.723195175075135e-07,
|
||
|
|
"loss": 0.05,
|
||
|
|
"num_tokens": 15136857.0,
|
||
|
|
"reward": 1.639508992433548,
|
||
|
|
"reward_std": 0.28644888289272785,
|
||
|
|
"rewards/accuracy_reward": 0.6819196492433548,
|
||
|
|
"rewards/format_reward": 0.9575892835855484,
|
||
|
|
"step": 26
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 610.8203430175781,
|
||
|
|
"epoch": 0.40298507462686567,
|
||
|
|
"grad_norm": 0.1424814909696579,
|
||
|
|
"learning_rate": 7.5e-07,
|
||
|
|
"loss": 0.0141,
|
||
|
|
"num_tokens": 15810840.0,
|
||
|
|
"reward": 1.664062574505806,
|
||
|
|
"reward_std": 0.2185768410563469,
|
||
|
|
"rewards/accuracy_reward": 0.686383917927742,
|
||
|
|
"rewards/format_reward": 0.9776785746216774,
|
||
|
|
"step": 27
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 574.0815010070801,
|
||
|
|
"epoch": 0.417910447761194,
|
||
|
|
"grad_norm": 0.23577629029750824,
|
||
|
|
"learning_rate": 7.269952498697734e-07,
|
||
|
|
"loss": 0.0452,
|
||
|
|
"num_tokens": 16444609.0,
|
||
|
|
"reward": 1.6852679401636124,
|
||
|
|
"reward_std": 0.25254260189831257,
|
||
|
|
"rewards/accuracy_reward": 0.7120535634458065,
|
||
|
|
"rewards/format_reward": 0.9732142835855484,
|
||
|
|
"step": 28
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 652.5870895385742,
|
||
|
|
"epoch": 0.43283582089552236,
|
||
|
|
"grad_norm": 0.15183551609516144,
|
||
|
|
"learning_rate": 7.033683215379002e-07,
|
||
|
|
"loss": 0.0389,
|
||
|
|
"num_tokens": 17168567.0,
|
||
|
|
"reward": 1.6551340073347092,
|
||
|
|
"reward_std": 0.24212115444242954,
|
||
|
|
"rewards/accuracy_reward": 0.6729910671710968,
|
||
|
|
"rewards/format_reward": 0.9821428507566452,
|
||
|
|
"step": 29
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 620.8114051818848,
|
||
|
|
"epoch": 0.44776119402985076,
|
||
|
|
"grad_norm": 0.13523727655410767,
|
||
|
|
"learning_rate": 6.7918397477265e-07,
|
||
|
|
"loss": 0.0611,
|
||
|
|
"num_tokens": 17847534.0,
|
||
|
|
"reward": 1.7377232909202576,
|
||
|
|
"reward_std": 0.21810074150562286,
|
||
|
|
"rewards/accuracy_reward": 0.7622767835855484,
|
||
|
|
"rewards/format_reward": 0.975446417927742,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 609.9408836364746,
|
||
|
|
"epoch": 0.4626865671641791,
|
||
|
|
"grad_norm": 0.11713448166847229,
|
||
|
|
"learning_rate": 6.545084971874736e-07,
|
||
|
|
"loss": 0.0305,
|
||
|
|
"num_tokens": 18517833.0,
|
||
|
|
"reward": 1.6886161416769028,
|
||
|
|
"reward_std": 0.16379249095916748,
|
||
|
|
"rewards/accuracy_reward": 0.7020089328289032,
|
||
|
|
"rewards/format_reward": 0.9866071417927742,
|
||
|
|
"step": 31
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 588.9922142028809,
|
||
|
|
"epoch": 0.47761194029850745,
|
||
|
|
"grad_norm": 0.19670455157756805,
|
||
|
|
"learning_rate": 6.294095225512604e-07,
|
||
|
|
"loss": 0.0329,
|
||
|
|
"num_tokens": 19174306.0,
|
||
|
|
"reward": 1.7399554401636124,
|
||
|
|
"reward_std": 0.19168315595015883,
|
||
|
|
"rewards/accuracy_reward": 0.7555803507566452,
|
||
|
|
"rewards/format_reward": 0.984375,
|
||
|
|
"step": 32
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 651.0279312133789,
|
||
|
|
"epoch": 0.4925373134328358,
|
||
|
|
"grad_norm": 0.19632503390312195,
|
||
|
|
"learning_rate": 6.039558454088795e-07,
|
||
|
|
"loss": 0.0406,
|
||
|
|
"num_tokens": 19883139.0,
|
||
|
|
"reward": 1.6774554550647736,
|
||
|
|
"reward_std": 0.20999335870146751,
|
||
|
|
"rewards/accuracy_reward": 0.7008928656578064,
|
||
|
|
"rewards/format_reward": 0.9765624925494194,
|
||
|
|
"step": 33
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 669.9475708007812,
|
||
|
|
"epoch": 0.5074626865671642,
|
||
|
|
"grad_norm": 0.1526719331741333,
|
||
|
|
"learning_rate": 5.782172325201155e-07,
|
||
|
|
"loss": 0.0386,
|
||
|
|
"num_tokens": 20613844.0,
|
||
|
|
"reward": 1.6763393729925156,
|
||
|
|
"reward_std": 0.20889410376548767,
|
||
|
|
"rewards/accuracy_reward": 0.6919642873108387,
|
||
|
|
"rewards/format_reward": 0.984375,
|
||
|
|
"step": 34
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 623.3448867797852,
|
||
|
|
"epoch": 0.5223880597014925,
|
||
|
|
"grad_norm": 0.20824865996837616,
|
||
|
|
"learning_rate": 5.522642316338268e-07,
|
||
|
|
"loss": 0.0249,
|
||
|
|
"num_tokens": 21312353.0,
|
||
|
|
"reward": 1.7287947088479996,
|
||
|
|
"reward_std": 0.17593990080058575,
|
||
|
|
"rewards/accuracy_reward": 0.7388392835855484,
|
||
|
|
"rewards/format_reward": 0.9899553507566452,
|
||
|
|
"step": 35
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 646.650691986084,
|
||
|
|
"epoch": 0.5373134328358209,
|
||
|
|
"grad_norm": 0.09576990455389023,
|
||
|
|
"learning_rate": 5.26167978121472e-07,
|
||
|
|
"loss": 0.048,
|
||
|
|
"num_tokens": 22035288.0,
|
||
|
|
"reward": 1.6785715073347092,
|
||
|
|
"reward_std": 0.18887418508529663,
|
||
|
|
"rewards/accuracy_reward": 0.7031250037252903,
|
||
|
|
"rewards/format_reward": 0.9754464328289032,
|
||
|
|
"step": 36
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 607.474365234375,
|
||
|
|
"epoch": 0.5522388059701493,
|
||
|
|
"grad_norm": 0.23552455008029938,
|
||
|
|
"learning_rate": 5e-07,
|
||
|
|
"loss": 0.0183,
|
||
|
|
"num_tokens": 22707009.0,
|
||
|
|
"reward": 1.7220982909202576,
|
||
|
|
"reward_std": 0.16549584455788136,
|
||
|
|
"rewards/accuracy_reward": 0.731026791036129,
|
||
|
|
"rewards/format_reward": 0.991071417927742,
|
||
|
|
"step": 37
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 629.219898223877,
|
||
|
|
"epoch": 0.5671641791044776,
|
||
|
|
"grad_norm": 0.13172753155231476,
|
||
|
|
"learning_rate": 4.7383202187852804e-07,
|
||
|
|
"loss": 0.0201,
|
||
|
|
"num_tokens": 23416574.0,
|
||
|
|
"reward": 1.7444197237491608,
|
||
|
|
"reward_std": 0.15772132016718388,
|
||
|
|
"rewards/accuracy_reward": 0.7566964253783226,
|
||
|
|
"rewards/format_reward": 0.987723208963871,
|
||
|
|
"step": 38
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 605.5770378112793,
|
||
|
|
"epoch": 0.582089552238806,
|
||
|
|
"grad_norm": 0.10828184336423874,
|
||
|
|
"learning_rate": 4.477357683661733e-07,
|
||
|
|
"loss": 0.0169,
|
||
|
|
"num_tokens": 24085835.0,
|
||
|
|
"reward": 1.681919738650322,
|
||
|
|
"reward_std": 0.16571981832385063,
|
||
|
|
"rewards/accuracy_reward": 0.6975446455180645,
|
||
|
|
"rewards/format_reward": 0.9843749925494194,
|
||
|
|
"step": 39
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 706.116096496582,
|
||
|
|
"epoch": 0.5970149253731343,
|
||
|
|
"grad_norm": 0.14090992510318756,
|
||
|
|
"learning_rate": 4.2178276747988444e-07,
|
||
|
|
"loss": 0.0255,
|
||
|
|
"num_tokens": 24842211.0,
|
||
|
|
"reward": 1.7020090222358704,
|
||
|
|
"reward_std": 0.19248592853546143,
|
||
|
|
"rewards/accuracy_reward": 0.7220982238650322,
|
||
|
|
"rewards/format_reward": 0.979910708963871,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 628.4498100280762,
|
||
|
|
"epoch": 0.6119402985074627,
|
||
|
|
"grad_norm": 0.20408028364181519,
|
||
|
|
"learning_rate": 3.960441545911204e-07,
|
||
|
|
"loss": 0.0282,
|
||
|
|
"num_tokens": 25545910.0,
|
||
|
|
"reward": 1.6808036416769028,
|
||
|
|
"reward_std": 0.19085692055523396,
|
||
|
|
"rewards/accuracy_reward": 0.6941964328289032,
|
||
|
|
"rewards/format_reward": 0.9866071417927742,
|
||
|
|
"step": 41
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 606.4631958007812,
|
||
|
|
"epoch": 0.6268656716417911,
|
||
|
|
"grad_norm": 0.3082476258277893,
|
||
|
|
"learning_rate": 3.7059047744873955e-07,
|
||
|
|
"loss": 0.0413,
|
||
|
|
"num_tokens": 26211781.0,
|
||
|
|
"reward": 1.8024554252624512,
|
||
|
|
"reward_std": 0.16285591386258602,
|
||
|
|
"rewards/accuracy_reward": 0.8169642761349678,
|
||
|
|
"rewards/format_reward": 0.9854910746216774,
|
||
|
|
"step": 42
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 617.654052734375,
|
||
|
|
"epoch": 0.6417910447761194,
|
||
|
|
"grad_norm": 0.3439674377441406,
|
||
|
|
"learning_rate": 3.454915028125263e-07,
|
||
|
|
"loss": 0.031,
|
||
|
|
"num_tokens": 26900815.0,
|
||
|
|
"reward": 1.7377232909202576,
|
||
|
|
"reward_std": 0.16336329095065594,
|
||
|
|
"rewards/accuracy_reward": 0.7455357164144516,
|
||
|
|
"rewards/format_reward": 0.9921874925494194,
|
||
|
|
"step": 43
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 597.3973541259766,
|
||
|
|
"epoch": 0.6567164179104478,
|
||
|
|
"grad_norm": 0.2667296826839447,
|
||
|
|
"learning_rate": 3.2081602522734985e-07,
|
||
|
|
"loss": 0.0359,
|
||
|
|
"num_tokens": 27562995.0,
|
||
|
|
"reward": 1.7243304550647736,
|
||
|
|
"reward_std": 0.17545861564576626,
|
||
|
|
"rewards/accuracy_reward": 0.7421874925494194,
|
||
|
|
"rewards/format_reward": 0.9821428433060646,
|
||
|
|
"step": 44
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 590.3047065734863,
|
||
|
|
"epoch": 0.6716417910447762,
|
||
|
|
"grad_norm": 0.12688565254211426,
|
||
|
|
"learning_rate": 2.9663167846209996e-07,
|
||
|
|
"loss": 0.0193,
|
||
|
|
"num_tokens": 28218116.0,
|
||
|
|
"reward": 1.7667411416769028,
|
||
|
|
"reward_std": 0.1432758029550314,
|
||
|
|
"rewards/accuracy_reward": 0.7700892761349678,
|
||
|
|
"rewards/format_reward": 0.9966517761349678,
|
||
|
|
"step": 45
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 694.7522659301758,
|
||
|
|
"epoch": 0.6865671641791045,
|
||
|
|
"grad_norm": 0.13461622595787048,
|
||
|
|
"learning_rate": 2.730047501302266e-07,
|
||
|
|
"loss": 0.0493,
|
||
|
|
"num_tokens": 28974406.0,
|
||
|
|
"reward": 1.6484375894069672,
|
||
|
|
"reward_std": 0.21462283097207546,
|
||
|
|
"rewards/accuracy_reward": 0.6729910671710968,
|
||
|
|
"rewards/format_reward": 0.9754464253783226,
|
||
|
|
"step": 46
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 662.5826187133789,
|
||
|
|
"epoch": 0.7014925373134329,
|
||
|
|
"grad_norm": 0.10844791680574417,
|
||
|
|
"learning_rate": 2.500000000000001e-07,
|
||
|
|
"loss": 0.0217,
|
||
|
|
"num_tokens": 29684136.0,
|
||
|
|
"reward": 1.733258992433548,
|
||
|
|
"reward_std": 0.16403476987034082,
|
||
|
|
"rewards/accuracy_reward": 0.7511160746216774,
|
||
|
|
"rewards/format_reward": 0.9821428507566452,
|
||
|
|
"step": 47
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 654.8058242797852,
|
||
|
|
"epoch": 0.7164179104477612,
|
||
|
|
"grad_norm": 0.269379585981369,
|
||
|
|
"learning_rate": 2.2768048249248644e-07,
|
||
|
|
"loss": 0.0356,
|
||
|
|
"num_tokens": 30397658.0,
|
||
|
|
"reward": 1.7533482909202576,
|
||
|
|
"reward_std": 0.18037123046815395,
|
||
|
|
"rewards/accuracy_reward": 0.7656250074505806,
|
||
|
|
"rewards/format_reward": 0.987723208963871,
|
||
|
|
"step": 48
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 662.7210159301758,
|
||
|
|
"epoch": 0.7313432835820896,
|
||
|
|
"grad_norm": 0.1654261350631714,
|
||
|
|
"learning_rate": 2.0610737385376348e-07,
|
||
|
|
"loss": 0.0374,
|
||
|
|
"num_tokens": 31127856.0,
|
||
|
|
"reward": 1.7254465073347092,
|
||
|
|
"reward_std": 0.1822904385626316,
|
||
|
|
"rewards/accuracy_reward": 0.7332589328289032,
|
||
|
|
"rewards/format_reward": 0.9921875,
|
||
|
|
"step": 49
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 608.631721496582,
|
||
|
|
"epoch": 0.746268656716418,
|
||
|
|
"grad_norm": 0.1254522204399109,
|
||
|
|
"learning_rate": 1.8533980447508135e-07,
|
||
|
|
"loss": 0.0104,
|
||
|
|
"num_tokens": 31811342.0,
|
||
|
|
"reward": 1.6908482909202576,
|
||
|
|
"reward_std": 0.16885741148144007,
|
||
|
|
"rewards/accuracy_reward": 0.6975446380674839,
|
||
|
|
"rewards/format_reward": 0.9933035671710968,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 637.1618576049805,
|
||
|
|
"epoch": 0.7611940298507462,
|
||
|
|
"grad_norm": 0.13198821246623993,
|
||
|
|
"learning_rate": 1.6543469682057104e-07,
|
||
|
|
"loss": 0.0518,
|
||
|
|
"num_tokens": 32520023.0,
|
||
|
|
"reward": 1.7220982909202576,
|
||
|
|
"reward_std": 0.20328251458704472,
|
||
|
|
"rewards/accuracy_reward": 0.7388392835855484,
|
||
|
|
"rewards/format_reward": 0.9832589253783226,
|
||
|
|
"step": 51
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 639.1216850280762,
|
||
|
|
"epoch": 0.7761194029850746,
|
||
|
|
"grad_norm": 0.14550577104091644,
|
||
|
|
"learning_rate": 1.4644660940672627e-07,
|
||
|
|
"loss": 0.0437,
|
||
|
|
"num_tokens": 33221324.0,
|
||
|
|
"reward": 1.6729911267757416,
|
||
|
|
"reward_std": 0.18635992892086506,
|
||
|
|
"rewards/accuracy_reward": 0.6941964365541935,
|
||
|
|
"rewards/format_reward": 0.9787946343421936,
|
||
|
|
"step": 52
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 630.694221496582,
|
||
|
|
"epoch": 0.7910447761194029,
|
||
|
|
"grad_norm": 0.14397642016410828,
|
||
|
|
"learning_rate": 1.284275872613028e-07,
|
||
|
|
"loss": 0.0351,
|
||
|
|
"num_tokens": 33914098.0,
|
||
|
|
"reward": 1.6830357909202576,
|
||
|
|
"reward_std": 0.20892403088510036,
|
||
|
|
"rewards/accuracy_reward": 0.6886160746216774,
|
||
|
|
"rewards/format_reward": 0.9944196343421936,
|
||
|
|
"step": 53
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 606.3326225280762,
|
||
|
|
"epoch": 0.8059701492537313,
|
||
|
|
"grad_norm": 0.1218072697520256,
|
||
|
|
"learning_rate": 1.1142701927151454e-07,
|
||
|
|
"loss": 0.0235,
|
||
|
|
"num_tokens": 34592204.0,
|
||
|
|
"reward": 1.7310268729925156,
|
||
|
|
"reward_std": 0.1702114064246416,
|
||
|
|
"rewards/accuracy_reward": 0.7343750074505806,
|
||
|
|
"rewards/format_reward": 0.9966517761349678,
|
||
|
|
"step": 54
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 601.4486846923828,
|
||
|
|
"epoch": 0.8208955223880597,
|
||
|
|
"grad_norm": 0.14515279233455658,
|
||
|
|
"learning_rate": 9.549150281252632e-08,
|
||
|
|
"loss": 0.0206,
|
||
|
|
"num_tokens": 35270222.0,
|
||
|
|
"reward": 1.7477679252624512,
|
||
|
|
"reward_std": 0.1629035547375679,
|
||
|
|
"rewards/accuracy_reward": 0.7522321417927742,
|
||
|
|
"rewards/format_reward": 0.9955357015132904,
|
||
|
|
"step": 55
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 633.4152069091797,
|
||
|
|
"epoch": 0.835820895522388,
|
||
|
|
"grad_norm": 0.16478443145751953,
|
||
|
|
"learning_rate": 8.066471602728803e-08,
|
||
|
|
"loss": 0.0416,
|
||
|
|
"num_tokens": 35968818.0,
|
||
|
|
"reward": 1.6875000596046448,
|
||
|
|
"reward_std": 0.18482761643826962,
|
||
|
|
"rewards/accuracy_reward": 0.6986607164144516,
|
||
|
|
"rewards/format_reward": 0.9888392686843872,
|
||
|
|
"step": 56
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 603.7109565734863,
|
||
|
|
"epoch": 0.8507462686567164,
|
||
|
|
"grad_norm": 0.14361166954040527,
|
||
|
|
"learning_rate": 6.698729810778064e-08,
|
||
|
|
"loss": 0.0288,
|
||
|
|
"num_tokens": 36632743.0,
|
||
|
|
"reward": 1.7745536714792252,
|
||
|
|
"reward_std": 0.17002357356250286,
|
||
|
|
"rewards/accuracy_reward": 0.7890625074505806,
|
||
|
|
"rewards/format_reward": 0.9854910597205162,
|
||
|
|
"step": 57
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 628.2723541259766,
|
||
|
|
"epoch": 0.8656716417910447,
|
||
|
|
"grad_norm": 0.10697366297245026,
|
||
|
|
"learning_rate": 5.44967379058161e-08,
|
||
|
|
"loss": 0.0286,
|
||
|
|
"num_tokens": 37321643.0,
|
||
|
|
"reward": 1.70870541036129,
|
||
|
|
"reward_std": 0.16991718113422394,
|
||
|
|
"rewards/accuracy_reward": 0.7198660746216774,
|
||
|
|
"rewards/format_reward": 0.9888392761349678,
|
||
|
|
"step": 58
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 631.0524940490723,
|
||
|
|
"epoch": 0.8805970149253731,
|
||
|
|
"grad_norm": 0.11336029320955276,
|
||
|
|
"learning_rate": 4.322727117869951e-08,
|
||
|
|
"loss": 0.0218,
|
||
|
|
"num_tokens": 38012474.0,
|
||
|
|
"reward": 1.7656251043081284,
|
||
|
|
"reward_std": 0.19230836629867554,
|
||
|
|
"rewards/accuracy_reward": 0.777901791036129,
|
||
|
|
"rewards/format_reward": 0.9877232164144516,
|
||
|
|
"step": 59
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 600.5245742797852,
|
||
|
|
"epoch": 0.8955223880597015,
|
||
|
|
"grad_norm": 0.19460614025592804,
|
||
|
|
"learning_rate": 3.3209786751399184e-08,
|
||
|
|
"loss": 0.025,
|
||
|
|
"num_tokens": 38685576.0,
|
||
|
|
"reward": 1.73214291036129,
|
||
|
|
"reward_std": 0.1749271210283041,
|
||
|
|
"rewards/accuracy_reward": 0.7399553507566452,
|
||
|
|
"rewards/format_reward": 0.9921874925494194,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 623.6350631713867,
|
||
|
|
"epoch": 0.9104477611940298,
|
||
|
|
"grad_norm": 0.2211906909942627,
|
||
|
|
"learning_rate": 2.4471741852423233e-08,
|
||
|
|
"loss": 0.0398,
|
||
|
|
"num_tokens": 39386529.0,
|
||
|
|
"reward": 1.6808036267757416,
|
||
|
|
"reward_std": 0.18810790218412876,
|
||
|
|
"rewards/accuracy_reward": 0.699776791036129,
|
||
|
|
"rewards/format_reward": 0.981026791036129,
|
||
|
|
"step": 61
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 615.8917694091797,
|
||
|
|
"epoch": 0.9253731343283582,
|
||
|
|
"grad_norm": 0.21188978850841522,
|
||
|
|
"learning_rate": 1.7037086855465898e-08,
|
||
|
|
"loss": 0.0261,
|
||
|
|
"num_tokens": 40063784.0,
|
||
|
|
"reward": 1.7633929401636124,
|
||
|
|
"reward_std": 0.162169449031353,
|
||
|
|
"rewards/accuracy_reward": 0.7745535783469677,
|
||
|
|
"rewards/format_reward": 0.988839291036129,
|
||
|
|
"step": 62
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 665.5602951049805,
|
||
|
|
"epoch": 0.9402985074626866,
|
||
|
|
"grad_norm": 0.10184086114168167,
|
||
|
|
"learning_rate": 1.0926199633097154e-08,
|
||
|
|
"loss": 0.0423,
|
||
|
|
"num_tokens": 40789638.0,
|
||
|
|
"reward": 1.727678656578064,
|
||
|
|
"reward_std": 0.21804202906787395,
|
||
|
|
"rewards/accuracy_reward": 0.7488839328289032,
|
||
|
|
"rewards/format_reward": 0.9787946417927742,
|
||
|
|
"step": 63
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 579.4073905944824,
|
||
|
|
"epoch": 0.9552238805970149,
|
||
|
|
"grad_norm": 0.17883718013763428,
|
||
|
|
"learning_rate": 6.15582970243117e-09,
|
||
|
|
"loss": 0.0162,
|
||
|
|
"num_tokens": 41445467.0,
|
||
|
|
"reward": 1.742187574505806,
|
||
|
|
"reward_std": 0.1791787538677454,
|
||
|
|
"rewards/accuracy_reward": 0.7500000074505806,
|
||
|
|
"rewards/format_reward": 0.9921874925494194,
|
||
|
|
"step": 64
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 600.4989166259766,
|
||
|
|
"epoch": 0.9701492537313433,
|
||
|
|
"grad_norm": 0.09666120260953903,
|
||
|
|
"learning_rate": 2.739052315863355e-09,
|
||
|
|
"loss": 0.0247,
|
||
|
|
"num_tokens": 42116706.0,
|
||
|
|
"reward": 1.7354911714792252,
|
||
|
|
"reward_std": 0.15130825340747833,
|
||
|
|
"rewards/accuracy_reward": 0.7421875,
|
||
|
|
"rewards/format_reward": 0.9933035671710968,
|
||
|
|
"step": 65
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"clip_ratio": 0.0,
|
||
|
|
"completion_length": 660.8865814208984,
|
||
|
|
"epoch": 0.9850746268656716,
|
||
|
|
"grad_norm": 1.0719019174575806,
|
||
|
|
"learning_rate": 6.852326227130833e-10,
|
||
|
|
"loss": 0.0373,
|
||
|
|
"num_tokens": 42832111.0,
|
||
|
|
"reward": 1.695312574505806,
|
||
|
|
"reward_std": 0.21052085421979427,
|
||
|
|
"rewards/accuracy_reward": 0.7198660597205162,
|
||
|
|
"rewards/format_reward": 0.9754464253783226,
|
||
|
|
"step": 66
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9850746268656716,
|
||
|
|
"step": 66,
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_loss": 0.02949339959234225,
|
||
|
|
"train_runtime": 14062.8623,
|
||
|
|
"train_samples_per_second": 0.533,
|
||
|
|
"train_steps_per_second": 0.005
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 1,
|
||
|
|
"max_steps": 67,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 1,
|
||
|
|
"save_steps": 500,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": false
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_batch_size": 16,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|