1548 lines
49 KiB
JSON
1548 lines
49 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 200,
|
|
"global_step": 955,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0010471204188481676,
|
|
"grad_norm": 45.70638656616211,
|
|
"kl": 0.03365863859653473,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -133350016.0,
|
|
"logits/rejected": -100751848.0,
|
|
"logps/chosen": -199.38916015625,
|
|
"logps/rejected": -248.57103704637098,
|
|
"loss": 1.9996,
|
|
"rewards/chosen": -0.006603976993849783,
|
|
"rewards/margins": 0.002509254980413562,
|
|
"rewards/rejected": -0.009113231974263345,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.010471204188481676,
|
|
"grad_norm": 45.63622283935547,
|
|
"kl": 0.05001102015376091,
|
|
"learning_rate": 4.6875e-08,
|
|
"logits/chosen": -114836760.0,
|
|
"logits/rejected": -115496552.0,
|
|
"logps/chosen": -280.943407960199,
|
|
"logps/rejected": -255.2594489981785,
|
|
"loss": 2.0019,
|
|
"rewards/chosen": -0.001783605436385172,
|
|
"rewards/margins": -0.0038228675079356074,
|
|
"rewards/rejected": 0.0020392620715504353,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.020942408376963352,
|
|
"grad_norm": 49.609336853027344,
|
|
"kl": 0.050704918801784515,
|
|
"learning_rate": 9.895833333333332e-08,
|
|
"logits/chosen": -125602032.0,
|
|
"logits/rejected": -116276880.0,
|
|
"logps/chosen": -279.09286115269464,
|
|
"logps/rejected": -261.840047998366,
|
|
"loss": 1.9992,
|
|
"rewards/chosen": 0.0030849476060467564,
|
|
"rewards/margins": 0.0017726313628812977,
|
|
"rewards/rejected": 0.0013123162431654587,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.031413612565445025,
|
|
"grad_norm": 45.7076301574707,
|
|
"kl": 0.0796850323677063,
|
|
"learning_rate": 1.5104166666666664e-07,
|
|
"logits/chosen": -117311640.0,
|
|
"logits/rejected": -114144304.0,
|
|
"logps/chosen": -294.7149727852853,
|
|
"logps/rejected": -242.79242162052117,
|
|
"loss": 1.9982,
|
|
"rewards/chosen": 0.009787982648557372,
|
|
"rewards/margins": 0.002934079104880057,
|
|
"rewards/rejected": 0.006853903543677315,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.041884816753926704,
|
|
"grad_norm": 53.927669525146484,
|
|
"kl": 0.20030224323272705,
|
|
"learning_rate": 2.03125e-07,
|
|
"logits/chosen": -119643952.0,
|
|
"logits/rejected": -120977776.0,
|
|
"logps/chosen": -306.0683379120879,
|
|
"logps/rejected": -278.4179383748056,
|
|
"loss": 1.9963,
|
|
"rewards/chosen": 0.03329542031280661,
|
|
"rewards/margins": 0.007960547439639757,
|
|
"rewards/rejected": 0.025334872873166856,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.05235602094240838,
|
|
"grad_norm": 51.90446090698242,
|
|
"kl": 0.34236329793930054,
|
|
"learning_rate": 2.552083333333333e-07,
|
|
"logits/chosen": -119124032.0,
|
|
"logits/rejected": -115569560.0,
|
|
"logps/chosen": -311.1760096153846,
|
|
"logps/rejected": -268.8574900793651,
|
|
"loss": 1.9864,
|
|
"rewards/chosen": 0.07745902428260217,
|
|
"rewards/margins": 0.02584011520367111,
|
|
"rewards/rejected": 0.051618909078931054,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.06282722513089005,
|
|
"grad_norm": 51.83867263793945,
|
|
"kl": 0.503300666809082,
|
|
"learning_rate": 3.0729166666666665e-07,
|
|
"logits/chosen": -114966488.0,
|
|
"logits/rejected": -120589360.0,
|
|
"logps/chosen": -299.37085962145113,
|
|
"logps/rejected": -272.62553212074306,
|
|
"loss": 1.9757,
|
|
"rewards/chosen": 0.12960234199788667,
|
|
"rewards/margins": 0.050474802944834835,
|
|
"rewards/rejected": 0.07912753905305184,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.07329842931937172,
|
|
"grad_norm": 55.578922271728516,
|
|
"kl": 0.31499481201171875,
|
|
"learning_rate": 3.59375e-07,
|
|
"logits/chosen": -111468848.0,
|
|
"logits/rejected": -121516712.0,
|
|
"logps/chosen": -281.68463625401927,
|
|
"logps/rejected": -274.1886635638298,
|
|
"loss": 1.9691,
|
|
"rewards/chosen": 0.17197007304985806,
|
|
"rewards/margins": 0.07172654231262758,
|
|
"rewards/rejected": 0.10024353073723048,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.08376963350785341,
|
|
"grad_norm": 53.44794464111328,
|
|
"kl": 0.14020584523677826,
|
|
"learning_rate": 4.114583333333333e-07,
|
|
"logits/chosen": -124242176.0,
|
|
"logits/rejected": -114853336.0,
|
|
"logps/chosen": -309.7781393568147,
|
|
"logps/rejected": -254.96207137161085,
|
|
"loss": 1.919,
|
|
"rewards/chosen": 0.2425672171857054,
|
|
"rewards/margins": 0.16257205424617438,
|
|
"rewards/rejected": 0.079995162939531,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.09424083769633508,
|
|
"grad_norm": 45.9337043762207,
|
|
"kl": 0.010224603116512299,
|
|
"learning_rate": 4.6354166666666664e-07,
|
|
"logits/chosen": -114247248.0,
|
|
"logits/rejected": -121002304.0,
|
|
"logps/chosen": -255.63410433070865,
|
|
"logps/rejected": -255.94968507751938,
|
|
"loss": 1.9103,
|
|
"rewards/chosen": 0.2659489669199065,
|
|
"rewards/margins": 0.1896964983080636,
|
|
"rewards/rejected": 0.0762524686118429,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.10471204188481675,
|
|
"grad_norm": 47.694000244140625,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.999849525959245e-07,
|
|
"logits/chosen": -116696832.0,
|
|
"logits/rejected": -135091520.0,
|
|
"logps/chosen": -299.64565284653463,
|
|
"logps/rejected": -257.15984328635017,
|
|
"loss": 1.8452,
|
|
"rewards/chosen": 0.3131070278658725,
|
|
"rewards/margins": 0.35410608100594115,
|
|
"rewards/rejected": -0.04099905314006862,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.11518324607329843,
|
|
"grad_norm": 45.92950439453125,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.997174935782199e-07,
|
|
"logits/chosen": -114301056.0,
|
|
"logits/rejected": -125180608.0,
|
|
"logps/chosen": -289.91893468118195,
|
|
"logps/rejected": -249.99958300627944,
|
|
"loss": 1.8212,
|
|
"rewards/chosen": 0.0881744610206521,
|
|
"rewards/margins": 0.3989718005393923,
|
|
"rewards/rejected": -0.31079733951874017,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.1256544502617801,
|
|
"grad_norm": 44.388973236083984,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.9911605954668e-07,
|
|
"logits/chosen": -128287936.0,
|
|
"logits/rejected": -123292968.0,
|
|
"logps/chosen": -274.05433947772656,
|
|
"logps/rejected": -291.2209608505564,
|
|
"loss": 1.7655,
|
|
"rewards/chosen": 0.06379634663805983,
|
|
"rewards/margins": 0.5666303574066409,
|
|
"rewards/rejected": -0.502834010768581,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.13612565445026178,
|
|
"grad_norm": 43.125389099121094,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.981814548660135e-07,
|
|
"logits/chosen": -118625360.0,
|
|
"logits/rejected": -142319376.0,
|
|
"logps/chosen": -286.44763163349916,
|
|
"logps/rejected": -260.8665758862629,
|
|
"loss": 1.688,
|
|
"rewards/chosen": 0.16187965810595462,
|
|
"rewards/margins": 0.7612699556618077,
|
|
"rewards/rejected": -0.599390297555853,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.14659685863874344,
|
|
"grad_norm": 43.5549201965332,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.969149294871417e-07,
|
|
"logits/chosen": -134174704.0,
|
|
"logits/rejected": -129014448.0,
|
|
"logps/chosen": -270.30858126996804,
|
|
"logps/rejected": -286.30975248470946,
|
|
"loss": 1.6923,
|
|
"rewards/chosen": -0.13654317642553165,
|
|
"rewards/margins": 0.839808220327622,
|
|
"rewards/rejected": -0.9763513967531536,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.15706806282722513,
|
|
"grad_norm": 38.105342864990234,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.953181772754997e-07,
|
|
"logits/chosen": -140920352.0,
|
|
"logits/rejected": -132664600.0,
|
|
"logps/chosen": -272.86307251908397,
|
|
"logps/rejected": -266.89385,
|
|
"loss": 1.6369,
|
|
"rewards/chosen": -0.0188656086230096,
|
|
"rewards/margins": 1.1427980632519903,
|
|
"rewards/rejected": -1.161663671875,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.16753926701570682,
|
|
"grad_norm": 38.238868713378906,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.93393333745642e-07,
|
|
"logits/chosen": -130763216.0,
|
|
"logits/rejected": -132181120.0,
|
|
"logps/chosen": -267.7964184253247,
|
|
"logps/rejected": -265.75249435240966,
|
|
"loss": 1.6156,
|
|
"rewards/chosen": 0.03252483962418197,
|
|
"rewards/margins": 1.1381634585530223,
|
|
"rewards/rejected": -1.1056386189288403,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.17801047120418848,
|
|
"grad_norm": 149.7418212890625,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.9114297320518e-07,
|
|
"logits/chosen": -148455008.0,
|
|
"logits/rejected": -148213216.0,
|
|
"logps/chosen": -292.17505877742946,
|
|
"logps/rejected": -289.26684190031153,
|
|
"loss": 1.6163,
|
|
"rewards/chosen": -0.49163005195067594,
|
|
"rewards/margins": 1.4294290544185801,
|
|
"rewards/rejected": -1.9210591063692561,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.18848167539267016,
|
|
"grad_norm": 43.059410095214844,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.885701053118751e-07,
|
|
"logits/chosen": -147940400.0,
|
|
"logits/rejected": -144332512.0,
|
|
"logps/chosen": -282.0602057573416,
|
|
"logps/rejected": -282.34577409162716,
|
|
"loss": 1.5876,
|
|
"rewards/chosen": 0.034519776310397446,
|
|
"rewards/margins": 1.3361885912602889,
|
|
"rewards/rejected": -1.3016688149498914,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.19895287958115182,
|
|
"grad_norm": 40.49046325683594,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.856781710484872e-07,
|
|
"logits/chosen": -139495344.0,
|
|
"logits/rejected": -144202096.0,
|
|
"logps/chosen": -280.0632974481659,
|
|
"logps/rejected": -290.6424196018377,
|
|
"loss": 1.5551,
|
|
"rewards/chosen": 0.12813351523172722,
|
|
"rewards/margins": 1.4480502681389824,
|
|
"rewards/rejected": -1.319916752907255,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.2094240837696335,
|
|
"grad_norm": 40.46213912963867,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.824710381207655e-07,
|
|
"logits/chosen": -144665424.0,
|
|
"logits/rejected": -151805984.0,
|
|
"logps/chosen": -291.33322447749197,
|
|
"logps/rejected": -285.26674107142856,
|
|
"loss": 1.5841,
|
|
"rewards/chosen": 0.006562507420874102,
|
|
"rewards/margins": 1.4862394700443544,
|
|
"rewards/rejected": -1.4796769626234803,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.2094240837696335,
|
|
"eval_kl": 0.0,
|
|
"eval_logits/chosen": -151004736.0,
|
|
"eval_logits/rejected": -149476768.0,
|
|
"eval_logps/chosen": -289.55475,
|
|
"eval_logps/rejected": -284.09784375,
|
|
"eval_loss": 0.39710375666618347,
|
|
"eval_rewards/chosen": -0.16988844299316405,
|
|
"eval_rewards/margins": 1.544695541381836,
|
|
"eval_rewards/rejected": -1.714583984375,
|
|
"eval_runtime": 92.5994,
|
|
"eval_samples_per_second": 43.197,
|
|
"eval_steps_per_second": 1.35,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.2198952879581152,
|
|
"grad_norm": 36.308265686035156,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.789529957847353e-07,
|
|
"logits/chosen": -152687040.0,
|
|
"logits/rejected": -144599248.0,
|
|
"logps/chosen": -300.46617366412215,
|
|
"logps/rejected": -276.6361,
|
|
"loss": 1.5542,
|
|
"rewards/chosen": -0.020091176215018935,
|
|
"rewards/margins": 1.684735972222481,
|
|
"rewards/rejected": -1.7048271484375,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.23036649214659685,
|
|
"grad_norm": 42.04066467285156,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.751287491101977e-07,
|
|
"logits/chosen": -153300528.0,
|
|
"logits/rejected": -141240576.0,
|
|
"logps/chosen": -284.2961228649068,
|
|
"logps/rejected": -269.83765723270443,
|
|
"loss": 1.626,
|
|
"rewards/chosen": -0.18326281908876407,
|
|
"rewards/margins": 1.4757443193374349,
|
|
"rewards/rejected": -1.6590071384261988,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.24083769633507854,
|
|
"grad_norm": 36.196807861328125,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.710034126881159e-07,
|
|
"logits/chosen": -159471936.0,
|
|
"logits/rejected": -135821408.0,
|
|
"logps/chosen": -301.3033342430859,
|
|
"logps/rejected": -293.3502055227656,
|
|
"loss": 1.5672,
|
|
"rewards/chosen": 0.1034569663945938,
|
|
"rewards/margins": 1.6865190209305867,
|
|
"rewards/rejected": -1.5830620545359928,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.2513089005235602,
|
|
"grad_norm": 36.31395721435547,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.665825037903035e-07,
|
|
"logits/chosen": -151562448.0,
|
|
"logits/rejected": -142325232.0,
|
|
"logps/chosen": -277.93509244992293,
|
|
"logps/rejected": -272.8287688193344,
|
|
"loss": 1.5525,
|
|
"rewards/chosen": 0.3289636691288882,
|
|
"rewards/margins": 1.6360129685110691,
|
|
"rewards/rejected": -1.307049299382181,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.2617801047120419,
|
|
"grad_norm": 39.01026916503906,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.618719349905619e-07,
|
|
"logits/chosen": -156142944.0,
|
|
"logits/rejected": -141863280.0,
|
|
"logps/chosen": -294.4734971374046,
|
|
"logps/rejected": -273.881775,
|
|
"loss": 1.525,
|
|
"rewards/chosen": 0.3023297957791627,
|
|
"rewards/margins": 1.7884680770291626,
|
|
"rewards/rejected": -1.48613828125,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.27225130890052357,
|
|
"grad_norm": 40.547950744628906,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.568780062571374e-07,
|
|
"logits/chosen": -152771744.0,
|
|
"logits/rejected": -151975136.0,
|
|
"logps/chosen": -278.8975861378205,
|
|
"logps/rejected": -288.9916634908537,
|
|
"loss": 1.5231,
|
|
"rewards/chosen": 0.07649397850036621,
|
|
"rewards/margins": 1.9374570090596268,
|
|
"rewards/rejected": -1.8609630305592606,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.28272251308900526,
|
|
"grad_norm": 38.78754806518555,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.516073965270717e-07,
|
|
"logits/chosen": -147246848.0,
|
|
"logits/rejected": -140734832.0,
|
|
"logps/chosen": -275.6838321596244,
|
|
"logps/rejected": -295.4493467238689,
|
|
"loss": 1.5248,
|
|
"rewards/chosen": 0.03273308743520149,
|
|
"rewards/margins": 1.9670623063070813,
|
|
"rewards/rejected": -1.93432921887188,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.2931937172774869,
|
|
"grad_norm": 38.30440902709961,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.460671547737158e-07,
|
|
"logits/chosen": -139242080.0,
|
|
"logits/rejected": -144891984.0,
|
|
"logps/chosen": -307.5467996382637,
|
|
"logps/rejected": -275.4886018237082,
|
|
"loss": 1.5128,
|
|
"rewards/chosen": -0.2156752313448302,
|
|
"rewards/margins": 1.9168140977062718,
|
|
"rewards/rejected": -2.132489329051102,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.3036649214659686,
|
|
"grad_norm": 52.507747650146484,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.40264690579353e-07,
|
|
"logits/chosen": -153254432.0,
|
|
"logits/rejected": -148096032.0,
|
|
"logps/chosen": -296.4355230564024,
|
|
"logps/rejected": -277.0494791666667,
|
|
"loss": 1.5223,
|
|
"rewards/chosen": -0.014792419061428162,
|
|
"rewards/margins": 2.2441466881976866,
|
|
"rewards/rejected": -2.2589391072591147,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.31413612565445026,
|
|
"grad_norm": 42.4326286315918,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.3420776422553916e-07,
|
|
"logits/chosen": -146678224.0,
|
|
"logits/rejected": -144266464.0,
|
|
"logps/chosen": -289.21781823394497,
|
|
"logps/rejected": -280.0624001597444,
|
|
"loss": 1.5308,
|
|
"rewards/chosen": 0.00835518734899865,
|
|
"rewards/margins": 2.1249945409457442,
|
|
"rewards/rejected": -2.1166393535967454,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.32460732984293195,
|
|
"grad_norm": 48.941036224365234,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.279044763144141e-07,
|
|
"logits/chosen": -139984352.0,
|
|
"logits/rejected": -148744560.0,
|
|
"logps/chosen": -271.40555111821084,
|
|
"logps/rejected": -308.63379204892965,
|
|
"loss": 1.5119,
|
|
"rewards/chosen": -0.12764440176966854,
|
|
"rewards/margins": 2.093687366674521,
|
|
"rewards/rejected": -2.22133176844419,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.33507853403141363,
|
|
"grad_norm": 30.565563201904297,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.213632569348639e-07,
|
|
"logits/chosen": -160167840.0,
|
|
"logits/rejected": -138572496.0,
|
|
"logps/chosen": -290.84521412884334,
|
|
"logps/rejected": -290.7674309045226,
|
|
"loss": 1.5273,
|
|
"rewards/chosen": -0.013026183032012091,
|
|
"rewards/margins": 2.366796105493951,
|
|
"rewards/rejected": -2.379822288525963,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.34554973821989526,
|
|
"grad_norm": 37.133567810058594,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.145928543880249e-07,
|
|
"logits/chosen": -143369248.0,
|
|
"logits/rejected": -145142368.0,
|
|
"logps/chosen": -288.8722847551343,
|
|
"logps/rejected": -284.5384949768161,
|
|
"loss": 1.4706,
|
|
"rewards/chosen": 0.3644241646379468,
|
|
"rewards/margins": 2.3581714100373867,
|
|
"rewards/rejected": -1.9937472453994398,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.35602094240837695,
|
|
"grad_norm": 42.86240005493164,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.076023234872057e-07,
|
|
"logits/chosen": -134226816.0,
|
|
"logits/rejected": -150647472.0,
|
|
"logps/chosen": -290.2114297253635,
|
|
"logps/rejected": -284.6018816187595,
|
|
"loss": 1.4998,
|
|
"rewards/chosen": 0.00031860425514627927,
|
|
"rewards/margins": 2.320413741535309,
|
|
"rewards/rejected": -2.3200951372801626,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.36649214659685864,
|
|
"grad_norm": 43.323272705078125,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.004010134478771e-07,
|
|
"logits/chosen": -153940128.0,
|
|
"logits/rejected": -142368416.0,
|
|
"logps/chosen": -277.28628954475306,
|
|
"logps/rejected": -281.2114319620253,
|
|
"loss": 1.4997,
|
|
"rewards/chosen": 0.20580647315508055,
|
|
"rewards/margins": 2.183485568845844,
|
|
"rewards/rejected": -1.9776790956907635,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.3769633507853403,
|
|
"grad_norm": 46.848731994628906,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.9299855538392534e-07,
|
|
"logits/chosen": -146128720.0,
|
|
"logits/rejected": -144031184.0,
|
|
"logps/chosen": -288.1243632445141,
|
|
"logps/rejected": -282.6408197040498,
|
|
"loss": 1.4687,
|
|
"rewards/chosen": 0.38341842699200385,
|
|
"rewards/margins": 2.226742892164317,
|
|
"rewards/rejected": -1.843324465172313,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.387434554973822,
|
|
"grad_norm": 41.83399963378906,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.8540484942689075e-07,
|
|
"logits/chosen": -145012880.0,
|
|
"logits/rejected": -147854064.0,
|
|
"logps/chosen": -280.8901771496815,
|
|
"logps/rejected": -297.4892398389571,
|
|
"loss": 1.4964,
|
|
"rewards/chosen": 0.17396898937832778,
|
|
"rewards/margins": 2.185319656196771,
|
|
"rewards/rejected": -2.0113506668184433,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.39790575916230364,
|
|
"grad_norm": 41.95068359375,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.77630051485419e-07,
|
|
"logits/chosen": -158335904.0,
|
|
"logits/rejected": -135963312.0,
|
|
"logps/chosen": -299.00375645280235,
|
|
"logps/rejected": -290.30250726744185,
|
|
"loss": 1.5113,
|
|
"rewards/chosen": -0.136355295997102,
|
|
"rewards/margins": 2.5501342781437826,
|
|
"rewards/rejected": -2.6864895741408845,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.4083769633507853,
|
|
"grad_norm": 52.55752944946289,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.696845596626342e-07,
|
|
"logits/chosen": -136675232.0,
|
|
"logits/rejected": -139554592.0,
|
|
"logps/chosen": -267.1172371031746,
|
|
"logps/rejected": -287.2497836538462,
|
|
"loss": 1.4728,
|
|
"rewards/chosen": 0.29143521747891865,
|
|
"rewards/margins": 2.468798123127957,
|
|
"rewards/rejected": -2.1773629056490384,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.418848167539267,
|
|
"grad_norm": 42.524559020996094,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.61579000349597e-07,
|
|
"logits/chosen": -145115344.0,
|
|
"logits/rejected": -138730784.0,
|
|
"logps/chosen": -291.6719227828746,
|
|
"logps/rejected": -286.7621056309904,
|
|
"loss": 1.404,
|
|
"rewards/chosen": 0.3901278061239727,
|
|
"rewards/margins": 2.5747456987457777,
|
|
"rewards/rejected": -2.184617892621805,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.418848167539267,
|
|
"eval_kl": 0.0,
|
|
"eval_logits/chosen": -143785152.0,
|
|
"eval_logits/rejected": -142386976.0,
|
|
"eval_logps/chosen": -288.1983125,
|
|
"eval_logps/rejected": -290.82553125,
|
|
"eval_loss": 0.3773096799850464,
|
|
"eval_rewards/chosen": -0.03424349975585937,
|
|
"eval_rewards/margins": 2.3531070861816406,
|
|
"eval_rewards/rejected": -2.3873505859375,
|
|
"eval_runtime": 92.585,
|
|
"eval_samples_per_second": 43.204,
|
|
"eval_steps_per_second": 1.35,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.4293193717277487,
|
|
"grad_norm": 42.05149459838867,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.5332421401344837e-07,
|
|
"logits/chosen": -123967896.0,
|
|
"logits/rejected": -151420832.0,
|
|
"logps/chosen": -290.8375683922559,
|
|
"logps/rejected": -284.5394497084548,
|
|
"loss": 1.5018,
|
|
"rewards/chosen": -0.1862513866488781,
|
|
"rewards/margins": 2.4929647368820076,
|
|
"rewards/rejected": -2.679216123530886,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.4397905759162304,
|
|
"grad_norm": 45.76744842529297,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.4493124069924635e-07,
|
|
"logits/chosen": -141672128.0,
|
|
"logits/rejected": -143752144.0,
|
|
"logps/chosen": -296.493825,
|
|
"logps/rejected": -276.59630248091605,
|
|
"loss": 1.489,
|
|
"rewards/chosen": -0.059238671875,
|
|
"rewards/margins": 2.682879686009065,
|
|
"rewards/rejected": -2.742118357884065,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.450261780104712,
|
|
"grad_norm": 48.224552154541016,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.3641130526488335e-07,
|
|
"logits/chosen": -128212800.0,
|
|
"logits/rejected": -138041008.0,
|
|
"logps/chosen": -262.0190345368917,
|
|
"logps/rejected": -302.6821539657854,
|
|
"loss": 1.5488,
|
|
"rewards/chosen": 0.1527433649898511,
|
|
"rewards/margins": 2.138388427805753,
|
|
"rewards/rejected": -1.985645062815902,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.4607329842931937,
|
|
"grad_norm": 38.78076171875,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.2777580236883473e-07,
|
|
"logits/chosen": -137593360.0,
|
|
"logits/rejected": -143156048.0,
|
|
"logps/chosen": -262.82073682108626,
|
|
"logps/rejected": -282.15691896024464,
|
|
"loss": 1.4863,
|
|
"rewards/chosen": 0.48399031276520066,
|
|
"rewards/margins": 2.222597083598534,
|
|
"rewards/rejected": -1.7386067708333333,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.4712041884816754,
|
|
"grad_norm": 32.755828857421875,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.1903628123081196e-07,
|
|
"logits/chosen": -145392688.0,
|
|
"logits/rejected": -133878232.0,
|
|
"logps/chosen": -279.9309734083851,
|
|
"logps/rejected": -278.34424135220127,
|
|
"loss": 1.4144,
|
|
"rewards/chosen": 0.4770013560419497,
|
|
"rewards/margins": 2.7351111625106013,
|
|
"rewards/rejected": -2.2581098064686516,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.4816753926701571,
|
|
"grad_norm": 42.82301330566406,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.1020443018570556e-07,
|
|
"logits/chosen": -127823832.0,
|
|
"logits/rejected": -148714048.0,
|
|
"logps/chosen": -277.72029728084414,
|
|
"logps/rejected": -274.04221573795184,
|
|
"loss": 1.4946,
|
|
"rewards/chosen": 0.2742920664997844,
|
|
"rewards/margins": 2.2432816111039826,
|
|
"rewards/rejected": -1.968989544604198,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.49214659685863876,
|
|
"grad_norm": 35.25486755371094,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.0129206105147343e-07,
|
|
"logits/chosen": -128224592.0,
|
|
"logits/rejected": -141323744.0,
|
|
"logps/chosen": -297.7064896003263,
|
|
"logps/rejected": -277.0822245127436,
|
|
"loss": 1.5014,
|
|
"rewards/chosen": -0.1534212246221197,
|
|
"rewards/margins": 2.4885884590126253,
|
|
"rewards/rejected": -2.642009683634745,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.5026178010471204,
|
|
"grad_norm": 49.871315002441406,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.923110933318805e-07,
|
|
"logits/chosen": -138666448.0,
|
|
"logits/rejected": -125876032.0,
|
|
"logps/chosen": -282.11912313432833,
|
|
"logps/rejected": -271.0550204918033,
|
|
"loss": 1.5235,
|
|
"rewards/chosen": -0.26497638759328357,
|
|
"rewards/margins": 2.5066780853831507,
|
|
"rewards/rejected": -2.7716544729764343,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.5130890052356021,
|
|
"grad_norm": 39.03130340576172,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.832735382752194e-07,
|
|
"logits/chosen": -144244752.0,
|
|
"logits/rejected": -139237664.0,
|
|
"logps/chosen": -280.307546898928,
|
|
"logps/rejected": -291.85860247208933,
|
|
"loss": 1.5082,
|
|
"rewards/chosen": -0.07937168746410796,
|
|
"rewards/margins": 2.5424585728902582,
|
|
"rewards/rejected": -2.621830260354366,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.5235602094240838,
|
|
"grad_norm": 31.400175094604492,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.741914828103307e-07,
|
|
"logits/chosen": -134795200.0,
|
|
"logits/rejected": -140993584.0,
|
|
"logps/chosen": -274.8821624803768,
|
|
"logps/rejected": -273.449115474339,
|
|
"loss": 1.4409,
|
|
"rewards/chosen": 0.3371262003900118,
|
|
"rewards/margins": 2.429343961677434,
|
|
"rewards/rejected": -2.092217761287422,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.5340314136125655,
|
|
"grad_norm": 39.398651123046875,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.650770733814065e-07,
|
|
"logits/chosen": -139524336.0,
|
|
"logits/rejected": -137561184.0,
|
|
"logps/chosen": -281.1636513157895,
|
|
"logps/rejected": -272.26749138591117,
|
|
"loss": 1.4701,
|
|
"rewards/chosen": 0.545446806547174,
|
|
"rewards/margins": 2.443065145583353,
|
|
"rewards/rejected": -1.8976183390361792,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.5445026178010471,
|
|
"grad_norm": 40.88848114013672,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.55942499703198e-07,
|
|
"logits/chosen": -147061424.0,
|
|
"logits/rejected": -143406240.0,
|
|
"logps/chosen": -285.4889,
|
|
"logps/rejected": -274.23685591603055,
|
|
"loss": 1.4519,
|
|
"rewards/chosen": 0.4090586181640625,
|
|
"rewards/margins": 2.4761641167327646,
|
|
"rewards/rejected": -2.067105498568702,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.5549738219895288,
|
|
"grad_norm": 41.25908660888672,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.467999784583527e-07,
|
|
"logits/chosen": -131054160.0,
|
|
"logits/rejected": -139775840.0,
|
|
"logps/chosen": -270.7588608226837,
|
|
"logps/rejected": -278.70869170489294,
|
|
"loss": 1.4667,
|
|
"rewards/chosen": 0.1376024556997866,
|
|
"rewards/margins": 2.5865234959926955,
|
|
"rewards/rejected": -2.448921040292909,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.5654450261780105,
|
|
"grad_norm": 58.745155334472656,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.3766173695868388e-07,
|
|
"logits/chosen": -139035088.0,
|
|
"logits/rejected": -133750928.0,
|
|
"logps/chosen": -290.9726024119449,
|
|
"logps/rejected": -290.5950209330144,
|
|
"loss": 1.5503,
|
|
"rewards/chosen": -0.12228842205146918,
|
|
"rewards/margins": 2.3166335900099346,
|
|
"rewards/rejected": -2.4389220120614037,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.5759162303664922,
|
|
"grad_norm": 66.44160461425781,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.285399967922253e-07,
|
|
"logits/chosen": -140837504.0,
|
|
"logits/rejected": -148332576.0,
|
|
"logps/chosen": -269.52528454472844,
|
|
"logps/rejected": -282.80800840978594,
|
|
"loss": 1.4314,
|
|
"rewards/chosen": -0.13683468998430637,
|
|
"rewards/margins": 2.8514840770383425,
|
|
"rewards/rejected": -2.988318767022649,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.5863874345549738,
|
|
"grad_norm": 34.65999221801758,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.194469574779397e-07,
|
|
"logits/chosen": -155893536.0,
|
|
"logits/rejected": -136393088.0,
|
|
"logps/chosen": -289.53893209408193,
|
|
"logps/rejected": -284.4916465378422,
|
|
"loss": 1.4864,
|
|
"rewards/chosen": -0.09103836383732751,
|
|
"rewards/margins": 2.7043548873452408,
|
|
"rewards/rejected": -2.7953932511825683,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.5968586387434555,
|
|
"grad_norm": 50.38192367553711,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.1039478014994441e-07,
|
|
"logits/chosen": -140968768.0,
|
|
"logits/rejected": -132994816.0,
|
|
"logps/chosen": -269.5297433903577,
|
|
"logps/rejected": -288.17197311616957,
|
|
"loss": 1.4443,
|
|
"rewards/chosen": 0.11533069758912082,
|
|
"rewards/margins": 2.7949352204379236,
|
|
"rewards/rejected": -2.679604522848803,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.6073298429319371,
|
|
"grad_norm": 39.10985565185547,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.0139557129307149e-07,
|
|
"logits/chosen": -141384624.0,
|
|
"logits/rejected": -141585264.0,
|
|
"logps/chosen": -298.5900179140127,
|
|
"logps/rejected": -308.7128067484663,
|
|
"loss": 1.4304,
|
|
"rewards/chosen": -0.01027871393094397,
|
|
"rewards/margins": 2.8328863970257276,
|
|
"rewards/rejected": -2.843165110956672,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.6178010471204188,
|
|
"grad_norm": 48.67289733886719,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.9246136655151808e-07,
|
|
"logits/chosen": -145905728.0,
|
|
"logits/rejected": -138221376.0,
|
|
"logps/chosen": -293.7313226744186,
|
|
"logps/rejected": -306.55437992125985,
|
|
"loss": 1.4509,
|
|
"rewards/chosen": -0.13455553868020229,
|
|
"rewards/margins": 2.8649036994472583,
|
|
"rewards/rejected": -2.9994592381274607,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.6282722513089005,
|
|
"grad_norm": 68.55772399902344,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.8360411463223873e-07,
|
|
"logits/chosen": -136852608.0,
|
|
"logits/rejected": -143516144.0,
|
|
"logps/chosen": -284.4403070349762,
|
|
"logps/rejected": -294.88001632104454,
|
|
"loss": 1.4253,
|
|
"rewards/chosen": -0.10330413672806538,
|
|
"rewards/margins": 2.9733640825379677,
|
|
"rewards/rejected": -3.076668219266033,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.6282722513089005,
|
|
"eval_kl": 0.0,
|
|
"eval_logits/chosen": -145117536.0,
|
|
"eval_logits/rejected": -143700400.0,
|
|
"eval_logps/chosen": -291.06696875,
|
|
"eval_logps/rejected": -298.3589375,
|
|
"eval_loss": 0.36837950348854065,
|
|
"eval_rewards/chosen": -0.32111080932617186,
|
|
"eval_rewards/margins": 2.819581085205078,
|
|
"eval_rewards/rejected": -3.14069189453125,
|
|
"eval_runtime": 92.5853,
|
|
"eval_samples_per_second": 43.203,
|
|
"eval_steps_per_second": 1.35,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.6387434554973822,
|
|
"grad_norm": 46.867210388183594,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.7483566132460865e-07,
|
|
"logits/chosen": -136255600.0,
|
|
"logits/rejected": -144252464.0,
|
|
"logps/chosen": -299.9070411392405,
|
|
"logps/rejected": -282.2795138888889,
|
|
"loss": 1.4918,
|
|
"rewards/chosen": -0.4866646392435967,
|
|
"rewards/margins": 2.6991346651063264,
|
|
"rewards/rejected": -3.185799304349923,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.6492146596858639,
|
|
"grad_norm": 66.88858032226562,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.66167733657731e-07,
|
|
"logits/chosen": -140277344.0,
|
|
"logits/rejected": -142182944.0,
|
|
"logps/chosen": -301.9623953349282,
|
|
"logps/rejected": -294.2529192189893,
|
|
"loss": 1.4825,
|
|
"rewards/chosen": -0.35411040140301037,
|
|
"rewards/margins": 2.692060965126469,
|
|
"rewards/rejected": -3.0461713665294794,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.6596858638743456,
|
|
"grad_norm": 62.48853302001953,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.5761192421657456e-07,
|
|
"logits/chosen": -133893392.0,
|
|
"logits/rejected": -143692496.0,
|
|
"logps/chosen": -292.95065395367413,
|
|
"logps/rejected": -299.167311735474,
|
|
"loss": 1.4055,
|
|
"rewards/chosen": 0.11951812159139127,
|
|
"rewards/margins": 2.9790377767782226,
|
|
"rewards/rejected": -2.859519655186831,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.6701570680628273,
|
|
"grad_norm": 38.459293365478516,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.491796756379185e-07,
|
|
"logits/chosen": -148631472.0,
|
|
"logits/rejected": -137124976.0,
|
|
"logps/chosen": -307.80620335820896,
|
|
"logps/rejected": -284.41946721311473,
|
|
"loss": 1.4798,
|
|
"rewards/chosen": 0.12505948650303172,
|
|
"rewards/margins": 2.7576216423047737,
|
|
"rewards/rejected": -2.632562155801742,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.680628272251309,
|
|
"grad_norm": 51.62284469604492,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.4088226530684071e-07,
|
|
"logits/chosen": -145016352.0,
|
|
"logits/rejected": -135913600.0,
|
|
"logps/chosen": -293.0742607526882,
|
|
"logps/rejected": -286.65267289348174,
|
|
"loss": 1.4106,
|
|
"rewards/chosen": 0.37428661059307794,
|
|
"rewards/margins": 2.8034505085213373,
|
|
"rewards/rejected": -2.4291638979282593,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.6910994764397905,
|
|
"grad_norm": 44.217506408691406,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.327307902742142e-07,
|
|
"logits/chosen": -153775056.0,
|
|
"logits/rejected": -142987488.0,
|
|
"logps/chosen": -278.56211538461537,
|
|
"logps/rejected": -295.2840277777778,
|
|
"loss": 1.4113,
|
|
"rewards/chosen": 0.3199942486102764,
|
|
"rewards/margins": 3.0220053085681133,
|
|
"rewards/rejected": -2.702011059957837,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.7015706806282722,
|
|
"grad_norm": 52.56444549560547,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.2473615241538523e-07,
|
|
"logits/chosen": -138428624.0,
|
|
"logits/rejected": -125599760.0,
|
|
"logps/chosen": -270.62024962742174,
|
|
"logps/rejected": -297.9065578817734,
|
|
"loss": 1.5102,
|
|
"rewards/chosen": 0.21654559389844916,
|
|
"rewards/margins": 2.3964325355420244,
|
|
"rewards/rejected": -2.1798869416435753,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.7120418848167539,
|
|
"grad_norm": 38.48976516723633,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.169090438498816e-07,
|
|
"logits/chosen": -140096608.0,
|
|
"logits/rejected": -141314656.0,
|
|
"logps/chosen": -289.16740023474176,
|
|
"logps/rejected": -289.05847796411854,
|
|
"loss": 1.4,
|
|
"rewards/chosen": 0.4871681464110182,
|
|
"rewards/margins": 2.8861619137789,
|
|
"rewards/rejected": -2.398993767367882,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.7225130890052356,
|
|
"grad_norm": 56.998104095458984,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.0925993264165045e-07,
|
|
"logits/chosen": -136509200.0,
|
|
"logits/rejected": -140580992.0,
|
|
"logps/chosen": -284.6041084265176,
|
|
"logps/rejected": -296.2347333715596,
|
|
"loss": 1.4483,
|
|
"rewards/chosen": 0.1843722529304675,
|
|
"rewards/margins": 2.723567089094936,
|
|
"rewards/rejected": -2.5391948361644685,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.7329842931937173,
|
|
"grad_norm": 45.59560012817383,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.0179904879894998e-07,
|
|
"logits/chosen": -139792672.0,
|
|
"logits/rejected": -133128280.0,
|
|
"logps/chosen": -282.0086206896552,
|
|
"logps/rejected": -297.9873685747664,
|
|
"loss": 1.4197,
|
|
"rewards/chosen": 0.06277323516558704,
|
|
"rewards/margins": 3.0981276117475574,
|
|
"rewards/rejected": -3.0353543765819704,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.743455497382199,
|
|
"grad_norm": 48.215816497802734,
|
|
"kl": 0.0,
|
|
"learning_rate": 9.453637059262117e-08,
|
|
"logits/chosen": -127794064.0,
|
|
"logits/rejected": -130284464.0,
|
|
"logps/chosen": -276.90582061068704,
|
|
"logps/rejected": -275.3603,
|
|
"loss": 1.5249,
|
|
"rewards/chosen": -0.14867283478947996,
|
|
"rewards/margins": 2.47092736052302,
|
|
"rewards/rejected": -2.6196001953125,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.7539267015706806,
|
|
"grad_norm": 61.68962097167969,
|
|
"kl": 0.0,
|
|
"learning_rate": 8.748161121103406e-08,
|
|
"logits/chosen": -140951328.0,
|
|
"logits/rejected": -141405104.0,
|
|
"logps/chosen": -288.2717027559055,
|
|
"logps/rejected": -306.3112403100775,
|
|
"loss": 1.3695,
|
|
"rewards/chosen": 0.361008819820374,
|
|
"rewards/margins": 3.0993951070660133,
|
|
"rewards/rejected": -2.7383862872456395,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.7643979057591623,
|
|
"grad_norm": 43.57563400268555,
|
|
"kl": 0.0,
|
|
"learning_rate": 8.064420576955965e-08,
|
|
"logits/chosen": -144350032.0,
|
|
"logits/rejected": -144956128.0,
|
|
"logps/chosen": -289.1480224609375,
|
|
"logps/rejected": -297.3230224609375,
|
|
"loss": 1.4858,
|
|
"rewards/chosen": 0.04629603624343872,
|
|
"rewards/margins": 2.6986050724983217,
|
|
"rewards/rejected": -2.652309036254883,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.774869109947644,
|
|
"grad_norm": 47.147090911865234,
|
|
"kl": 0.0,
|
|
"learning_rate": 7.403329869193922e-08,
|
|
"logits/chosen": -135583312.0,
|
|
"logits/rejected": -131832256.0,
|
|
"logps/chosen": -277.6656105990783,
|
|
"logps/rejected": -275.17182531796504,
|
|
"loss": 1.3656,
|
|
"rewards/chosen": 0.1653434061967466,
|
|
"rewards/margins": 3.2219976161281854,
|
|
"rewards/rejected": -3.056654209931439,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.7853403141361257,
|
|
"grad_norm": 44.07842254638672,
|
|
"kl": 0.0,
|
|
"learning_rate": 6.765773148042858e-08,
|
|
"logits/chosen": -143625632.0,
|
|
"logits/rejected": -132837352.0,
|
|
"logps/chosen": -285.6507056451613,
|
|
"logps/rejected": -281.92257054848966,
|
|
"loss": 1.4614,
|
|
"rewards/chosen": 0.21915514311665946,
|
|
"rewards/margins": 2.62252863091972,
|
|
"rewards/rejected": -2.4033734878030604,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.7958115183246073,
|
|
"grad_norm": 37.42493438720703,
|
|
"kl": 0.0,
|
|
"learning_rate": 6.152603089107139e-08,
|
|
"logits/chosen": -136608224.0,
|
|
"logits/rejected": -131950376.0,
|
|
"logps/chosen": -276.34929128614914,
|
|
"logps/rejected": -277.7825792536116,
|
|
"loss": 1.4942,
|
|
"rewards/chosen": 0.24006759666779634,
|
|
"rewards/margins": 2.4807506731374995,
|
|
"rewards/rejected": -2.240683076469703,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.806282722513089,
|
|
"grad_norm": 68.92852020263672,
|
|
"kl": 0.0,
|
|
"learning_rate": 5.5646397529920175e-08,
|
|
"logits/chosen": -132386256.0,
|
|
"logits/rejected": -138109456.0,
|
|
"logps/chosen": -303.72342011128774,
|
|
"logps/rejected": -284.04025057603684,
|
|
"loss": 1.3831,
|
|
"rewards/chosen": 0.32797061695772056,
|
|
"rewards/margins": 2.99956472398057,
|
|
"rewards/rejected": -2.6715941070228495,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.8167539267015707,
|
|
"grad_norm": 36.25162124633789,
|
|
"kl": 0.0,
|
|
"learning_rate": 5.002669488545111e-08,
|
|
"logits/chosen": -126804304.0,
|
|
"logits/rejected": -149925328.0,
|
|
"logps/chosen": -280.9178725369458,
|
|
"logps/rejected": -298.3454033159463,
|
|
"loss": 1.4707,
|
|
"rewards/chosen": 0.24425755893851345,
|
|
"rewards/margins": 2.512562284278491,
|
|
"rewards/rejected": -2.2683047253399775,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.8272251308900523,
|
|
"grad_norm": 45.3673095703125,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.467443881184646e-08,
|
|
"logits/chosen": -137377824.0,
|
|
"logits/rejected": -140888416.0,
|
|
"logps/chosen": -284.73014937106916,
|
|
"logps/rejected": -271.3363742236025,
|
|
"loss": 1.4641,
|
|
"rewards/chosen": 0.15725456093842127,
|
|
"rewards/margins": 2.4592872211917207,
|
|
"rewards/rejected": -2.3020326602532997,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.837696335078534,
|
|
"grad_norm": 44.92776870727539,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.959678747720488e-08,
|
|
"logits/chosen": -147636928.0,
|
|
"logits/rejected": -129594688.0,
|
|
"logps/chosen": -278.11655092592594,
|
|
"logps/rejected": -284.1502840909091,
|
|
"loss": 1.4432,
|
|
"rewards/chosen": 0.22765783239293982,
|
|
"rewards/margins": 2.823709614417733,
|
|
"rewards/rejected": -2.5960517820247935,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.837696335078534,
|
|
"eval_kl": 0.0,
|
|
"eval_logits/chosen": -140467840.0,
|
|
"eval_logits/rejected": -139209600.0,
|
|
"eval_logps/chosen": -286.2336875,
|
|
"eval_logps/rejected": -292.39625,
|
|
"eval_loss": 0.3657679557800293,
|
|
"eval_rewards/chosen": 0.16221832275390624,
|
|
"eval_rewards/margins": 2.7066421508789062,
|
|
"eval_rewards/rejected": -2.544423828125,
|
|
"eval_runtime": 92.5899,
|
|
"eval_samples_per_second": 43.201,
|
|
"eval_steps_per_second": 1.35,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.8481675392670157,
|
|
"grad_norm": 64.69525909423828,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.480053179012654e-08,
|
|
"logits/chosen": -129839872.0,
|
|
"logits/rejected": -140454848.0,
|
|
"logps/chosen": -266.87487579491255,
|
|
"logps/rejected": -288.485599078341,
|
|
"loss": 1.5392,
|
|
"rewards/chosen": -0.023006766702867273,
|
|
"rewards/margins": 2.3186599749686,
|
|
"rewards/rejected": -2.341666741671467,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.8586387434554974,
|
|
"grad_norm": 63.29912185668945,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.029208631747446e-08,
|
|
"logits/chosen": -138798032.0,
|
|
"logits/rejected": -130695232.0,
|
|
"logps/chosen": -273.63010448619633,
|
|
"logps/rejected": -289.54072949840764,
|
|
"loss": 1.3905,
|
|
"rewards/chosen": 0.2373036811688195,
|
|
"rewards/margins": 3.154649639319695,
|
|
"rewards/rejected": -2.9173459581508756,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.8691099476439791,
|
|
"grad_norm": 35.773826599121094,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.607748070546037e-08,
|
|
"logits/chosen": -138341072.0,
|
|
"logits/rejected": -140245856.0,
|
|
"logps/chosen": -276.33953568611986,
|
|
"logps/rejected": -295.26535893962847,
|
|
"loss": 1.4761,
|
|
"rewards/chosen": 0.13098442968133872,
|
|
"rewards/margins": 2.7483675067300037,
|
|
"rewards/rejected": -2.617383077048665,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.8795811518324608,
|
|
"grad_norm": 36.50845718383789,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.2162351615526544e-08,
|
|
"logits/chosen": -140731280.0,
|
|
"logits/rejected": -148560064.0,
|
|
"logps/chosen": -301.4704117063492,
|
|
"logps/rejected": -290.1577644230769,
|
|
"loss": 1.4465,
|
|
"rewards/chosen": 0.18666185651506698,
|
|
"rewards/margins": 2.7894090019477593,
|
|
"rewards/rejected": -2.6027471454326925,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.8900523560209425,
|
|
"grad_norm": 55.24102020263672,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.8551935185811717e-08,
|
|
"logits/chosen": -132794856.0,
|
|
"logits/rejected": -138046480.0,
|
|
"logps/chosen": -282.9409226190476,
|
|
"logps/rejected": -302.71471153846153,
|
|
"loss": 1.4063,
|
|
"rewards/chosen": 0.10835386003766741,
|
|
"rewards/margins": 3.033290759076129,
|
|
"rewards/rejected": -2.9249368990384617,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.900523560209424,
|
|
"grad_norm": 70.16515350341797,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.5251060028279612e-08,
|
|
"logits/chosen": -143098928.0,
|
|
"logits/rejected": -126897944.0,
|
|
"logps/chosen": -272.46225367078824,
|
|
"logps/rejected": -305.9686759478673,
|
|
"loss": 1.5084,
|
|
"rewards/chosen": 0.03112101370621317,
|
|
"rewards/margins": 2.5436736215754867,
|
|
"rewards/rejected": -2.5125526078692735,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.9109947643979057,
|
|
"grad_norm": 28.95891761779785,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.2264140770878839e-08,
|
|
"logits/chosen": -137280736.0,
|
|
"logits/rejected": -143405088.0,
|
|
"logps/chosen": -299.961469889065,
|
|
"logps/rejected": -295.115875385208,
|
|
"loss": 1.4681,
|
|
"rewards/chosen": -0.01648792260800224,
|
|
"rewards/margins": 2.59923304083335,
|
|
"rewards/rejected": -2.615720963441352,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.9214659685863874,
|
|
"grad_norm": 37.45644760131836,
|
|
"kl": 0.0,
|
|
"learning_rate": 9.59517215336922e-09,
|
|
"logits/chosen": -128477976.0,
|
|
"logits/rejected": -129677584.0,
|
|
"logps/chosen": -280.0816461267606,
|
|
"logps/rejected": -291.42394695787834,
|
|
"loss": 1.4164,
|
|
"rewards/chosen": -0.018490225682982444,
|
|
"rewards/margins": 3.1044779987100557,
|
|
"rewards/rejected": -3.122968224393038,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.9319371727748691,
|
|
"grad_norm": 43.50038528442383,
|
|
"kl": 0.0,
|
|
"learning_rate": 7.247723684711382e-09,
|
|
"logits/chosen": -137378768.0,
|
|
"logits/rejected": -126123072.0,
|
|
"logps/chosen": -271.5022151295732,
|
|
"logps/rejected": -294.2158453525641,
|
|
"loss": 1.4426,
|
|
"rewards/chosen": 0.19363278877444384,
|
|
"rewards/margins": 2.807892557529452,
|
|
"rewards/rejected": -2.614259768755008,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.9424083769633508,
|
|
"grad_norm": 58.68267059326172,
|
|
"kl": 0.0,
|
|
"learning_rate": 5.224934869164976e-09,
|
|
"logits/chosen": -139006912.0,
|
|
"logits/rejected": -141954272.0,
|
|
"logps/chosen": -292.2946211507293,
|
|
"logps/rejected": -301.43368212669685,
|
|
"loss": 1.4844,
|
|
"rewards/chosen": -0.05539148785113515,
|
|
"rewards/margins": 2.60611269089368,
|
|
"rewards/rejected": -2.6615041787448153,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.9528795811518325,
|
|
"grad_norm": 51.242652893066406,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.529511007479946e-09,
|
|
"logits/chosen": -140018880.0,
|
|
"logits/rejected": -134809904.0,
|
|
"logps/chosen": -292.4076660906298,
|
|
"logps/rejected": -281.15220091414943,
|
|
"loss": 1.4564,
|
|
"rewards/chosen": 0.1075638450235815,
|
|
"rewards/margins": 2.6941378960967137,
|
|
"rewards/rejected": -2.586574051073132,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.9633507853403142,
|
|
"grad_norm": 60.67096710205078,
|
|
"kl": 0.0,
|
|
"learning_rate": 2.1637195787966857e-09,
|
|
"logits/chosen": -132927744.0,
|
|
"logits/rejected": -145037952.0,
|
|
"logps/chosen": -294.478180176565,
|
|
"logps/rejected": -286.2923801369863,
|
|
"loss": 1.4298,
|
|
"rewards/chosen": 0.23743079906481993,
|
|
"rewards/margins": 2.895192835903671,
|
|
"rewards/rejected": -2.657762036838851,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.9738219895287958,
|
|
"grad_norm": 44.87836837768555,
|
|
"kl": 0.0,
|
|
"learning_rate": 1.1293872080934963e-09,
|
|
"logits/chosen": -125942888.0,
|
|
"logits/rejected": -145652336.0,
|
|
"logps/chosen": -283.0784801136364,
|
|
"logps/rejected": -296.9692206325301,
|
|
"loss": 1.4259,
|
|
"rewards/chosen": 0.11567323858087714,
|
|
"rewards/margins": 2.9895860038999658,
|
|
"rewards/rejected": -2.873912765319089,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.9842931937172775,
|
|
"grad_norm": 43.4354362487793,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.2789722323760546e-10,
|
|
"logits/chosen": -142621872.0,
|
|
"logits/rejected": -136877328.0,
|
|
"logps/chosen": -288.74076066616766,
|
|
"logps/rejected": -292.02558210784315,
|
|
"loss": 1.4129,
|
|
"rewards/chosen": 0.27186810613392354,
|
|
"rewards/margins": 3.1179713504915623,
|
|
"rewards/rejected": -2.846103244357639,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.9947643979057592,
|
|
"grad_norm": 49.643978118896484,
|
|
"kl": 0.0,
|
|
"learning_rate": 6.018780490690822e-11,
|
|
"logits/chosen": -147837520.0,
|
|
"logits/rejected": -131119704.0,
|
|
"logps/chosen": -285.26368371212124,
|
|
"logps/rejected": -282.18916330645163,
|
|
"loss": 1.4033,
|
|
"rewards/chosen": 0.14758417534105706,
|
|
"rewards/margins": 3.1335240379922666,
|
|
"rewards/rejected": -2.9859398626512097,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 955,
|
|
"total_flos": 0.0,
|
|
"train_loss": 1.5426912418834826,
|
|
"train_runtime": 5367.6535,
|
|
"train_samples_per_second": 22.779,
|
|
"train_steps_per_second": 0.178
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 955,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|