2717 lines
92 KiB
JSON
2717 lines
92 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9998691270776077,
|
|
"eval_steps": 1000,
|
|
"global_step": 1910,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.005234916895694281,
|
|
"grad_norm": 3.790904594664249,
|
|
"learning_rate": 5.2356020942408376e-08,
|
|
"logits/chosen": -1.074317216873169,
|
|
"logits/rejected": -1.2653461694717407,
|
|
"logps/chosen": -0.4452144503593445,
|
|
"logps/rejected": -0.6091843247413635,
|
|
"loss": 1.4151,
|
|
"rewards/accuracies": 0.3499999940395355,
|
|
"rewards/chosen": 4.849554898100905e-05,
|
|
"rewards/rejected": -3.19069076795131e-05,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.010469833791388562,
|
|
"grad_norm": 4.012740047235236,
|
|
"learning_rate": 1.0471204188481675e-07,
|
|
"logits/chosen": -0.9944978952407837,
|
|
"logits/rejected": -1.1899915933609009,
|
|
"logps/chosen": -0.4916958212852478,
|
|
"logps/rejected": -0.6457526683807373,
|
|
"loss": 2.9024,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 5.545483509195037e-05,
|
|
"rewards/rejected": 4.941503357258625e-05,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.015704750687082842,
|
|
"grad_norm": 7.854381323221543,
|
|
"learning_rate": 1.5706806282722514e-07,
|
|
"logits/chosen": -1.0470011234283447,
|
|
"logits/rejected": -1.308021068572998,
|
|
"logps/chosen": -0.4594300389289856,
|
|
"logps/rejected": -0.6046071648597717,
|
|
"loss": 1.2899,
|
|
"rewards/accuracies": 0.5249999761581421,
|
|
"rewards/chosen": -3.4633787436177954e-05,
|
|
"rewards/rejected": -0.00032249835203401744,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.020939667582777124,
|
|
"grad_norm": 3.6452610558813654,
|
|
"learning_rate": 2.094240837696335e-07,
|
|
"logits/chosen": -1.1655104160308838,
|
|
"logits/rejected": -1.3452240228652954,
|
|
"logps/chosen": -0.39312028884887695,
|
|
"logps/rejected": -0.5556824207305908,
|
|
"loss": 2.2173,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": -8.240896568167955e-05,
|
|
"rewards/rejected": -0.001136856502853334,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.026174584478471406,
|
|
"grad_norm": 8.500475399649632,
|
|
"learning_rate": 2.6178010471204185e-07,
|
|
"logits/chosen": -1.1437081098556519,
|
|
"logits/rejected": -1.4434831142425537,
|
|
"logps/chosen": -0.43456870317459106,
|
|
"logps/rejected": -0.5780390501022339,
|
|
"loss": 1.2709,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.00010843189375009388,
|
|
"rewards/rejected": -0.001327984849922359,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.031409501374165684,
|
|
"grad_norm": 20.273102817060455,
|
|
"learning_rate": 3.1413612565445027e-07,
|
|
"logits/chosen": -1.0886688232421875,
|
|
"logits/rejected": -1.2838691473007202,
|
|
"logps/chosen": -0.44190168380737305,
|
|
"logps/rejected": -0.6152902841567993,
|
|
"loss": 1.8135,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.0007149001467041671,
|
|
"rewards/rejected": -0.005795066244900227,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.036644418269859966,
|
|
"grad_norm": 43.16254505746086,
|
|
"learning_rate": 3.6649214659685864e-07,
|
|
"logits/chosen": -1.094167709350586,
|
|
"logits/rejected": -1.3319257497787476,
|
|
"logps/chosen": -0.4520147740840912,
|
|
"logps/rejected": -0.6239620447158813,
|
|
"loss": 1.7838,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.000759888265747577,
|
|
"rewards/rejected": -0.0066768391989171505,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.04187933516555425,
|
|
"grad_norm": 11.998623689285427,
|
|
"learning_rate": 4.18848167539267e-07,
|
|
"logits/chosen": -1.2943776845932007,
|
|
"logits/rejected": -1.503999948501587,
|
|
"logps/chosen": -0.3685997724533081,
|
|
"logps/rejected": -0.5420706272125244,
|
|
"loss": 2.3502,
|
|
"rewards/accuracies": 0.8374999761581421,
|
|
"rewards/chosen": -0.0021785215940326452,
|
|
"rewards/rejected": -0.016134750097990036,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.04711425206124853,
|
|
"grad_norm": 24.994033392121455,
|
|
"learning_rate": 4.712041884816754e-07,
|
|
"logits/chosen": -1.2916871309280396,
|
|
"logits/rejected": -1.3448667526245117,
|
|
"logps/chosen": -0.4025228023529053,
|
|
"logps/rejected": -0.5868708491325378,
|
|
"loss": 1.8316,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.007973430678248405,
|
|
"rewards/rejected": -0.02014215663075447,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.05234916895694281,
|
|
"grad_norm": 22.171843701204743,
|
|
"learning_rate": 5.235602094240837e-07,
|
|
"logits/chosen": -1.4637973308563232,
|
|
"logits/rejected": -1.6466014385223389,
|
|
"logps/chosen": -0.32837918400764465,
|
|
"logps/rejected": -0.515870213508606,
|
|
"loss": 1.8409,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.01690755970776081,
|
|
"rewards/rejected": -0.050148021429777145,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.057584085852637086,
|
|
"grad_norm": 41.913524204299165,
|
|
"learning_rate": 5.759162303664922e-07,
|
|
"logits/chosen": -1.4786913394927979,
|
|
"logits/rejected": -1.5899611711502075,
|
|
"logps/chosen": -0.42642760276794434,
|
|
"logps/rejected": -0.6284693479537964,
|
|
"loss": 2.2794,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.009459340013563633,
|
|
"rewards/rejected": -0.022919194772839546,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.06281900274833137,
|
|
"grad_norm": 9.042363746985718,
|
|
"learning_rate": 6.282722513089005e-07,
|
|
"logits/chosen": -1.6049375534057617,
|
|
"logits/rejected": -1.756801962852478,
|
|
"logps/chosen": -0.47461098432540894,
|
|
"logps/rejected": -0.6772693991661072,
|
|
"loss": 0.8005,
|
|
"rewards/accuracies": 0.762499988079071,
|
|
"rewards/chosen": -0.014842224307358265,
|
|
"rewards/rejected": -0.04914752393960953,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.06805391964402566,
|
|
"grad_norm": 7.811632000475067,
|
|
"learning_rate": 6.806282722513089e-07,
|
|
"logits/chosen": -1.5858232975006104,
|
|
"logits/rejected": -1.7477552890777588,
|
|
"logps/chosen": -0.46297797560691833,
|
|
"logps/rejected": -0.6604179739952087,
|
|
"loss": 0.9989,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": -0.0641578882932663,
|
|
"rewards/rejected": -0.09264906495809555,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.07328883653971993,
|
|
"grad_norm": 15.65125219657381,
|
|
"learning_rate": 7.329842931937173e-07,
|
|
"logits/chosen": -1.5584498643875122,
|
|
"logits/rejected": -1.709670066833496,
|
|
"logps/chosen": -0.5157219171524048,
|
|
"logps/rejected": -0.7746927738189697,
|
|
"loss": 0.9063,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.09854600578546524,
|
|
"rewards/rejected": -0.13169622421264648,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.0785237534354142,
|
|
"grad_norm": 44.24701426992428,
|
|
"learning_rate": 7.853403141361256e-07,
|
|
"logits/chosen": -1.7312242984771729,
|
|
"logits/rejected": -1.827368140220642,
|
|
"logps/chosen": -0.4762954115867615,
|
|
"logps/rejected": -0.7440527081489563,
|
|
"loss": 1.3761,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.1354602426290512,
|
|
"rewards/rejected": -0.22626717388629913,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.0837586703311085,
|
|
"grad_norm": 44.94371714749407,
|
|
"learning_rate": 8.37696335078534e-07,
|
|
"logits/chosen": -1.8007497787475586,
|
|
"logits/rejected": -1.8725961446762085,
|
|
"logps/chosen": -0.4830542504787445,
|
|
"logps/rejected": -0.7854688763618469,
|
|
"loss": 1.1893,
|
|
"rewards/accuracies": 0.762499988079071,
|
|
"rewards/chosen": -0.07177692651748657,
|
|
"rewards/rejected": -0.15431641042232513,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.08899358722680277,
|
|
"grad_norm": 31.115198767499926,
|
|
"learning_rate": 8.900523560209424e-07,
|
|
"logits/chosen": -1.9976160526275635,
|
|
"logits/rejected": -2.054600954055786,
|
|
"logps/chosen": -0.539434015750885,
|
|
"logps/rejected": -0.8588225245475769,
|
|
"loss": 1.3249,
|
|
"rewards/accuracies": 0.7250000238418579,
|
|
"rewards/chosen": -0.12421885877847672,
|
|
"rewards/rejected": -0.3051489591598511,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.09422850412249706,
|
|
"grad_norm": 12.347508697538823,
|
|
"learning_rate": 9.424083769633508e-07,
|
|
"logits/chosen": -2.005187749862671,
|
|
"logits/rejected": -1.9666427373886108,
|
|
"logps/chosen": -0.5240647196769714,
|
|
"logps/rejected": -0.9816449880599976,
|
|
"loss": 1.2205,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.0668339729309082,
|
|
"rewards/rejected": -0.3174983561038971,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.09946342101819133,
|
|
"grad_norm": 2.227548071196379,
|
|
"learning_rate": 9.947643979057591e-07,
|
|
"logits/chosen": -2.117922782897949,
|
|
"logits/rejected": -2.1063754558563232,
|
|
"logps/chosen": -0.6270676851272583,
|
|
"logps/rejected": -1.067392110824585,
|
|
"loss": 0.5393,
|
|
"rewards/accuracies": 0.8374999761581421,
|
|
"rewards/chosen": -0.05558066442608833,
|
|
"rewards/rejected": -0.2207319438457489,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.10469833791388562,
|
|
"grad_norm": 7.953337477610359,
|
|
"learning_rate": 9.999323662872996e-07,
|
|
"logits/chosen": -2.4052085876464844,
|
|
"logits/rejected": -2.478701591491699,
|
|
"logps/chosen": -0.6346315145492554,
|
|
"logps/rejected": -1.130063772201538,
|
|
"loss": 0.4773,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.12082117795944214,
|
|
"rewards/rejected": -0.4421643316745758,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.1099332548095799,
|
|
"grad_norm": 21.087545681911077,
|
|
"learning_rate": 9.996985942280678e-07,
|
|
"logits/chosen": -2.4521114826202393,
|
|
"logits/rejected": -2.5669069290161133,
|
|
"logps/chosen": -0.6473835110664368,
|
|
"logps/rejected": -1.1176555156707764,
|
|
"loss": 1.0686,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.11380704492330551,
|
|
"rewards/rejected": -0.4065336287021637,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.11516817170527417,
|
|
"grad_norm": 15.632346870535166,
|
|
"learning_rate": 9.99297926897573e-07,
|
|
"logits/chosen": -2.7503812313079834,
|
|
"logits/rejected": -2.812741994857788,
|
|
"logps/chosen": -0.585160493850708,
|
|
"logps/rejected": -1.0489227771759033,
|
|
"loss": 0.2239,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.1262103021144867,
|
|
"rewards/rejected": -0.5222524404525757,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.12040308860096846,
|
|
"grad_norm": 12.519053406194372,
|
|
"learning_rate": 9.987304981154493e-07,
|
|
"logits/chosen": -2.837965965270996,
|
|
"logits/rejected": -2.9414877891540527,
|
|
"logps/chosen": -0.722270131111145,
|
|
"logps/rejected": -1.3173713684082031,
|
|
"loss": 0.654,
|
|
"rewards/accuracies": 0.7250000238418579,
|
|
"rewards/chosen": -0.14661520719528198,
|
|
"rewards/rejected": -0.5090414881706238,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.12563800549666274,
|
|
"grad_norm": 289.81551499279595,
|
|
"learning_rate": 9.979964973983e-07,
|
|
"logits/chosen": -2.9277539253234863,
|
|
"logits/rejected": -3.028458833694458,
|
|
"logps/chosen": -0.7722570300102234,
|
|
"logps/rejected": -1.3954808712005615,
|
|
"loss": 0.5797,
|
|
"rewards/accuracies": 0.7250000238418579,
|
|
"rewards/chosen": -0.2518690526485443,
|
|
"rewards/rejected": -0.5512933135032654,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.130872922392357,
|
|
"grad_norm": 7.898569451837497,
|
|
"learning_rate": 9.970961698964024e-07,
|
|
"logits/chosen": -2.903446912765503,
|
|
"logits/rejected": -2.9949727058410645,
|
|
"logps/chosen": -0.6554209589958191,
|
|
"logps/rejected": -1.3086931705474854,
|
|
"loss": 0.2079,
|
|
"rewards/accuracies": 0.762499988079071,
|
|
"rewards/chosen": -0.12546400725841522,
|
|
"rewards/rejected": -0.540800929069519,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.1361078392880513,
|
|
"grad_norm": 35.37908090460439,
|
|
"learning_rate": 9.960298163118284e-07,
|
|
"logits/chosen": -2.862393617630005,
|
|
"logits/rejected": -3.070669174194336,
|
|
"logps/chosen": -0.6949301958084106,
|
|
"logps/rejected": -1.335038185119629,
|
|
"loss": 0.2834,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.1361745148897171,
|
|
"rewards/rejected": -0.5548567771911621,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.1413427561837456,
|
|
"grad_norm": 2.8575991741134716,
|
|
"learning_rate": 9.94797792798013e-07,
|
|
"logits/chosen": -3.01228666305542,
|
|
"logits/rejected": -3.385458469390869,
|
|
"logps/chosen": -0.6274330615997314,
|
|
"logps/rejected": -1.239761471748352,
|
|
"loss": 0.6421,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.14010125398635864,
|
|
"rewards/rejected": -0.600482702255249,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.14657767307943986,
|
|
"grad_norm": 59.20898505824435,
|
|
"learning_rate": 9.934005108408016e-07,
|
|
"logits/chosen": -3.242931842803955,
|
|
"logits/rejected": -3.3713455200195312,
|
|
"logps/chosen": -0.7664941549301147,
|
|
"logps/rejected": -1.5041484832763672,
|
|
"loss": 0.2062,
|
|
"rewards/accuracies": 0.9125000238418579,
|
|
"rewards/chosen": -0.10625378042459488,
|
|
"rewards/rejected": -0.4088156819343567,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.15181258997513414,
|
|
"grad_norm": 67.37145724737809,
|
|
"learning_rate": 9.918384371210175e-07,
|
|
"logits/chosen": -3.297367572784424,
|
|
"logits/rejected": -3.390179395675659,
|
|
"logps/chosen": -0.6818624138832092,
|
|
"logps/rejected": -1.3045790195465088,
|
|
"loss": 0.2825,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.14947417378425598,
|
|
"rewards/rejected": -0.5264440774917603,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.1570475068708284,
|
|
"grad_norm": 6.991708239819734,
|
|
"learning_rate": 9.901120933585937e-07,
|
|
"logits/chosen": -2.914552688598633,
|
|
"logits/rejected": -2.9296183586120605,
|
|
"logps/chosen": -0.7332116961479187,
|
|
"logps/rejected": -1.3207851648330688,
|
|
"loss": 0.2746,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.1801702082157135,
|
|
"rewards/rejected": -0.4402903616428375,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.16228242376652272,
|
|
"grad_norm": 5.467852048052765,
|
|
"learning_rate": 9.882220561383237e-07,
|
|
"logits/chosen": -2.6258440017700195,
|
|
"logits/rejected": -2.7410550117492676,
|
|
"logps/chosen": -0.6673339605331421,
|
|
"logps/rejected": -1.259270429611206,
|
|
"loss": 0.2535,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.1613113135099411,
|
|
"rewards/rejected": -0.5873299837112427,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.167517340662217,
|
|
"grad_norm": 3.8941661088693595,
|
|
"learning_rate": 9.861689567172849e-07,
|
|
"logits/chosen": -2.7174181938171387,
|
|
"logits/rejected": -2.859903573989868,
|
|
"logps/chosen": -0.7855610251426697,
|
|
"logps/rejected": -1.3645145893096924,
|
|
"loss": 0.571,
|
|
"rewards/accuracies": 0.762499988079071,
|
|
"rewards/chosen": -0.11017797142267227,
|
|
"rewards/rejected": -0.33003589510917664,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.17275225755791127,
|
|
"grad_norm": 16.181666110930603,
|
|
"learning_rate": 9.839534808140065e-07,
|
|
"logits/chosen": -2.7446448802948,
|
|
"logits/rejected": -2.9309000968933105,
|
|
"logps/chosen": -0.6837766170501709,
|
|
"logps/rejected": -1.2414804697036743,
|
|
"loss": 0.4195,
|
|
"rewards/accuracies": 0.7250000238418579,
|
|
"rewards/chosen": -0.17641454935073853,
|
|
"rewards/rejected": -0.6049523949623108,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.17798717445360554,
|
|
"grad_norm": 6.6623417517030274,
|
|
"learning_rate": 9.815763683794431e-07,
|
|
"logits/chosen": -3.110708713531494,
|
|
"logits/rejected": -3.232154130935669,
|
|
"logps/chosen": -0.9199361801147461,
|
|
"logps/rejected": -1.6282669305801392,
|
|
"loss": 0.3528,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.16633550822734833,
|
|
"rewards/rejected": -0.5285095572471619,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.18322209134929984,
|
|
"grad_norm": 42.75001280615641,
|
|
"learning_rate": 9.790384133498377e-07,
|
|
"logits/chosen": -3.181398868560791,
|
|
"logits/rejected": -3.380305528640747,
|
|
"logps/chosen": -0.6931721568107605,
|
|
"logps/rejected": -1.301574468612671,
|
|
"loss": 0.4167,
|
|
"rewards/accuracies": 0.762499988079071,
|
|
"rewards/chosen": -0.238613560795784,
|
|
"rewards/rejected": -0.5942808985710144,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.18845700824499412,
|
|
"grad_norm": 25.407488562124865,
|
|
"learning_rate": 9.763404633815536e-07,
|
|
"logits/chosen": -3.138686418533325,
|
|
"logits/rejected": -3.3759498596191406,
|
|
"logps/chosen": -0.7499098777770996,
|
|
"logps/rejected": -1.2932870388031006,
|
|
"loss": 0.1701,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.13701362907886505,
|
|
"rewards/rejected": -0.4852725863456726,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.1936919251406884,
|
|
"grad_norm": 1.4892370693809598,
|
|
"learning_rate": 9.73483419567964e-07,
|
|
"logits/chosen": -3.4019501209259033,
|
|
"logits/rejected": -3.549142837524414,
|
|
"logps/chosen": -0.7944781184196472,
|
|
"logps/rejected": -1.424883484840393,
|
|
"loss": 0.1402,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.12177709490060806,
|
|
"rewards/rejected": -0.3804728090763092,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.19892684203638267,
|
|
"grad_norm": 54.39502830839909,
|
|
"learning_rate": 9.70468236138494e-07,
|
|
"logits/chosen": -3.2426352500915527,
|
|
"logits/rejected": -3.4838757514953613,
|
|
"logps/chosen": -0.6787932515144348,
|
|
"logps/rejected": -1.1444861888885498,
|
|
"loss": 0.5084,
|
|
"rewards/accuracies": 0.7250000238418579,
|
|
"rewards/chosen": -0.1906341016292572,
|
|
"rewards/rejected": -0.5342355966567993,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.20416175893207694,
|
|
"grad_norm": 4.102532676377055,
|
|
"learning_rate": 9.672959201399155e-07,
|
|
"logits/chosen": -3.1436872482299805,
|
|
"logits/rejected": -3.3000025749206543,
|
|
"logps/chosen": -0.6593716144561768,
|
|
"logps/rejected": -1.2297086715698242,
|
|
"loss": 0.2254,
|
|
"rewards/accuracies": 0.8374999761581421,
|
|
"rewards/chosen": -0.1245049387216568,
|
|
"rewards/rejected": -0.63857102394104,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.20939667582777124,
|
|
"grad_norm": 26.264208746234342,
|
|
"learning_rate": 9.639675311000027e-07,
|
|
"logits/chosen": -2.8664770126342773,
|
|
"logits/rejected": -3.217289686203003,
|
|
"logps/chosen": -0.5279222726821899,
|
|
"logps/rejected": -0.9587762951850891,
|
|
"loss": 0.2976,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.08593594282865524,
|
|
"rewards/rejected": -0.5586038827896118,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.21463159272346552,
|
|
"grad_norm": 38.41571021062487,
|
|
"learning_rate": 9.60484180673657e-07,
|
|
"logits/chosen": -3.0221009254455566,
|
|
"logits/rejected": -3.3375930786132812,
|
|
"logps/chosen": -0.6153509020805359,
|
|
"logps/rejected": -1.208428144454956,
|
|
"loss": 0.2007,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.1186663880944252,
|
|
"rewards/rejected": -0.7561261057853699,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.2198665096191598,
|
|
"grad_norm": 26.129325664394095,
|
|
"learning_rate": 9.568470322716246e-07,
|
|
"logits/chosen": -3.197547435760498,
|
|
"logits/rejected": -3.473541736602783,
|
|
"logps/chosen": -0.7100402116775513,
|
|
"logps/rejected": -1.3731144666671753,
|
|
"loss": 0.2129,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.15885800123214722,
|
|
"rewards/rejected": -0.7041777968406677,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.22510142651485407,
|
|
"grad_norm": 3.3568794858215614,
|
|
"learning_rate": 9.530573006719263e-07,
|
|
"logits/chosen": -3.1833243370056152,
|
|
"logits/rejected": -3.5457568168640137,
|
|
"logps/chosen": -0.7644280195236206,
|
|
"logps/rejected": -1.4675564765930176,
|
|
"loss": 0.3047,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.10174532234668732,
|
|
"rewards/rejected": -0.43558469414711,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.23033634341054834,
|
|
"grad_norm": 6.858790048554929,
|
|
"learning_rate": 9.491162516141307e-07,
|
|
"logits/chosen": -3.0963964462280273,
|
|
"logits/rejected": -3.347712993621826,
|
|
"logps/chosen": -0.7016817927360535,
|
|
"logps/rejected": -1.3338514566421509,
|
|
"loss": 0.3073,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.09215731918811798,
|
|
"rewards/rejected": -0.45697230100631714,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.23557126030624265,
|
|
"grad_norm": 23.530428559730304,
|
|
"learning_rate": 9.450252013766092e-07,
|
|
"logits/chosen": -3.2469124794006348,
|
|
"logits/rejected": -3.4776294231414795,
|
|
"logps/chosen": -0.7150281667709351,
|
|
"logps/rejected": -1.317091703414917,
|
|
"loss": 0.2759,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.11948645114898682,
|
|
"rewards/rejected": -0.5289221405982971,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.24080617720193692,
|
|
"grad_norm": 3.0816780004971567,
|
|
"learning_rate": 9.407855163369078e-07,
|
|
"logits/chosen": -3.1366970539093018,
|
|
"logits/rejected": -3.4390456676483154,
|
|
"logps/chosen": -0.7695866823196411,
|
|
"logps/rejected": -1.28377366065979,
|
|
"loss": 0.1834,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.10271289199590683,
|
|
"rewards/rejected": -0.4681883454322815,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.2460410940976312,
|
|
"grad_norm": 0.8968075253326218,
|
|
"learning_rate": 9.3639861251539e-07,
|
|
"logits/chosen": -3.204808473587036,
|
|
"logits/rejected": -3.410945415496826,
|
|
"logps/chosen": -0.7953635454177856,
|
|
"logps/rejected": -1.4356180429458618,
|
|
"loss": 0.2817,
|
|
"rewards/accuracies": 0.887499988079071,
|
|
"rewards/chosen": -0.11539553105831146,
|
|
"rewards/rejected": -0.5434930324554443,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.25127601099332547,
|
|
"grad_norm": 15.250575277553056,
|
|
"learning_rate": 9.318659551022955e-07,
|
|
"logits/chosen": -3.5916152000427246,
|
|
"logits/rejected": -3.745251417160034,
|
|
"logps/chosen": -0.7927001118659973,
|
|
"logps/rejected": -1.4079248905181885,
|
|
"loss": 0.3906,
|
|
"rewards/accuracies": 0.762499988079071,
|
|
"rewards/chosen": -0.14077363908290863,
|
|
"rewards/rejected": -0.6647804379463196,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.25651092788901975,
|
|
"grad_norm": 8.302573476788423,
|
|
"learning_rate": 9.271890579683804e-07,
|
|
"logits/chosen": -3.6112866401672363,
|
|
"logits/rejected": -3.7867329120635986,
|
|
"logps/chosen": -0.7936150431632996,
|
|
"logps/rejected": -1.3792264461517334,
|
|
"loss": 0.155,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.1597827672958374,
|
|
"rewards/rejected": -0.7419155240058899,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.261745844784714,
|
|
"grad_norm": 19.15043456191315,
|
|
"learning_rate": 9.223694831592952e-07,
|
|
"logits/chosen": -3.4215950965881348,
|
|
"logits/rejected": -3.5204520225524902,
|
|
"logps/chosen": -0.6265555024147034,
|
|
"logps/rejected": -1.1579577922821045,
|
|
"loss": 0.465,
|
|
"rewards/accuracies": 0.762499988079071,
|
|
"rewards/chosen": -0.20739369094371796,
|
|
"rewards/rejected": -0.7289354205131531,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.2669807616804083,
|
|
"grad_norm": 64.7184486122584,
|
|
"learning_rate": 9.174088403738755e-07,
|
|
"logits/chosen": -3.099766969680786,
|
|
"logits/rejected": -3.3204002380371094,
|
|
"logps/chosen": -0.690481424331665,
|
|
"logps/rejected": -1.2163944244384766,
|
|
"loss": 0.3773,
|
|
"rewards/accuracies": 0.862500011920929,
|
|
"rewards/chosen": -0.13189923763275146,
|
|
"rewards/rejected": -0.520399808883667,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.2722156785761026,
|
|
"grad_norm": 17.501316331807786,
|
|
"learning_rate": 9.123087864265147e-07,
|
|
"logits/chosen": -3.1762442588806152,
|
|
"logits/rejected": -3.2784526348114014,
|
|
"logps/chosen": -0.6847952008247375,
|
|
"logps/rejected": -1.1162774562835693,
|
|
"loss": 0.1267,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.06572765856981277,
|
|
"rewards/rejected": -0.351901113986969,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.2774505954717969,
|
|
"grad_norm": 14.212473293142173,
|
|
"learning_rate": 9.070710246938016e-07,
|
|
"logits/chosen": -3.2481791973114014,
|
|
"logits/rejected": -3.489774227142334,
|
|
"logps/chosen": -0.772381603717804,
|
|
"logps/rejected": -1.4248247146606445,
|
|
"loss": 0.2427,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.09138830751180649,
|
|
"rewards/rejected": -0.4058937132358551,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.2826855123674912,
|
|
"grad_norm": 3.7993918380305227,
|
|
"learning_rate": 9.016973045456073e-07,
|
|
"logits/chosen": -3.5233864784240723,
|
|
"logits/rejected": -3.5879790782928467,
|
|
"logps/chosen": -0.6680731773376465,
|
|
"logps/rejected": -1.2521740198135376,
|
|
"loss": 0.233,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.16955754160881042,
|
|
"rewards/rejected": -0.6247085928916931,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.28792042926318545,
|
|
"grad_norm": 31.268425824083035,
|
|
"learning_rate": 8.961894207608087e-07,
|
|
"logits/chosen": -3.3877577781677246,
|
|
"logits/rejected": -3.6265366077423096,
|
|
"logps/chosen": -0.7721540331840515,
|
|
"logps/rejected": -1.4250587224960327,
|
|
"loss": 0.2769,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.16779252886772156,
|
|
"rewards/rejected": -0.6825228333473206,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.2931553461588797,
|
|
"grad_norm": 7.718273564892154,
|
|
"learning_rate": 8.905492129278477e-07,
|
|
"logits/chosen": -3.29071044921875,
|
|
"logits/rejected": -3.561675548553467,
|
|
"logps/chosen": -0.8162961006164551,
|
|
"logps/rejected": -1.4463467597961426,
|
|
"loss": 0.1915,
|
|
"rewards/accuracies": 0.762499988079071,
|
|
"rewards/chosen": -0.204677551984787,
|
|
"rewards/rejected": -0.47629499435424805,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.298390263054574,
|
|
"grad_norm": 12.025693422800462,
|
|
"learning_rate": 8.847785648303233e-07,
|
|
"logits/chosen": -3.204369306564331,
|
|
"logits/rejected": -3.2629711627960205,
|
|
"logps/chosen": -0.7215951085090637,
|
|
"logps/rejected": -1.2496535778045654,
|
|
"loss": 0.3283,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.17979201674461365,
|
|
"rewards/rejected": -0.6653040647506714,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.3036251799502683,
|
|
"grad_norm": 16.91677605832855,
|
|
"learning_rate": 8.788794038178232e-07,
|
|
"logits/chosen": -3.354393482208252,
|
|
"logits/rejected": -3.566246747970581,
|
|
"logps/chosen": -0.6297786831855774,
|
|
"logps/rejected": -1.2458020448684692,
|
|
"loss": 0.4011,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.1064259260892868,
|
|
"rewards/rejected": -0.6175190210342407,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.30886009684596255,
|
|
"grad_norm": 3.0634420462864362,
|
|
"learning_rate": 8.728537001622049e-07,
|
|
"logits/chosen": -3.4152801036834717,
|
|
"logits/rejected": -3.7001967430114746,
|
|
"logps/chosen": -0.6182211637496948,
|
|
"logps/rejected": -1.2031091451644897,
|
|
"loss": 0.3326,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.12622275948524475,
|
|
"rewards/rejected": -0.6566742062568665,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.3140950137416568,
|
|
"grad_norm": 35.90548539348865,
|
|
"learning_rate": 8.667034663995408e-07,
|
|
"logits/chosen": -3.3982882499694824,
|
|
"logits/rejected": -3.6578991413116455,
|
|
"logps/chosen": -0.6449892520904541,
|
|
"logps/rejected": -1.2093414068222046,
|
|
"loss": 0.3771,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.0947725772857666,
|
|
"rewards/rejected": -0.46876105666160583,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.31932993063735116,
|
|
"grad_norm": 1.5338911609375667,
|
|
"learning_rate": 8.604307566579472e-07,
|
|
"logits/chosen": -3.5967631340026855,
|
|
"logits/rejected": -3.748669385910034,
|
|
"logps/chosen": -0.759170651435852,
|
|
"logps/rejected": -1.21732497215271,
|
|
"loss": 0.0886,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.12660792469978333,
|
|
"rewards/rejected": -0.35170167684555054,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.32456484753304543,
|
|
"grad_norm": 3.030605610103173,
|
|
"learning_rate": 8.540376659715225e-07,
|
|
"logits/chosen": -3.6591286659240723,
|
|
"logits/rejected": -3.9070792198181152,
|
|
"logps/chosen": -0.6764650344848633,
|
|
"logps/rejected": -1.1898220777511597,
|
|
"loss": 0.1434,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.1459110528230667,
|
|
"rewards/rejected": -0.43774351477622986,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.3297997644287397,
|
|
"grad_norm": 10.069918354827117,
|
|
"learning_rate": 8.47526329580623e-07,
|
|
"logits/chosen": -3.5482306480407715,
|
|
"logits/rejected": -3.7556564807891846,
|
|
"logps/chosen": -0.6558570265769958,
|
|
"logps/rejected": -1.2123215198516846,
|
|
"loss": 0.516,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.10284946113824844,
|
|
"rewards/rejected": -0.5016738772392273,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.335034681324434,
|
|
"grad_norm": 3.5244932761338124,
|
|
"learning_rate": 8.408989222187096e-07,
|
|
"logits/chosen": -3.4110941886901855,
|
|
"logits/rejected": -3.6678364276885986,
|
|
"logps/chosen": -0.6549906730651855,
|
|
"logps/rejected": -1.246897578239441,
|
|
"loss": 0.7311,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.10875538736581802,
|
|
"rewards/rejected": -0.5850102305412292,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.34026959822012826,
|
|
"grad_norm": 4.0414984437360175,
|
|
"learning_rate": 8.341576573860047e-07,
|
|
"logits/chosen": -3.478461503982544,
|
|
"logits/rejected": -3.702072858810425,
|
|
"logps/chosen": -0.7687514424324036,
|
|
"logps/rejected": -1.43941330909729,
|
|
"loss": 0.2588,
|
|
"rewards/accuracies": 0.762499988079071,
|
|
"rewards/chosen": -0.16434721648693085,
|
|
"rewards/rejected": -0.5769973993301392,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.34550451511582253,
|
|
"grad_norm": 17.089029660346316,
|
|
"learning_rate": 8.27304786610201e-07,
|
|
"logits/chosen": -3.6008193492889404,
|
|
"logits/rejected": -3.9538185596466064,
|
|
"logps/chosen": -0.6982223987579346,
|
|
"logps/rejected": -1.2972527742385864,
|
|
"loss": 0.2549,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.0916595607995987,
|
|
"rewards/rejected": -0.38794606924057007,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.3507394320115168,
|
|
"grad_norm": 23.354286685814746,
|
|
"learning_rate": 8.203425986944696e-07,
|
|
"logits/chosen": -3.7454254627227783,
|
|
"logits/rejected": -3.954153537750244,
|
|
"logps/chosen": -0.6409385800361633,
|
|
"logps/rejected": -1.1634663343429565,
|
|
"loss": 0.1437,
|
|
"rewards/accuracies": 0.8374999761581421,
|
|
"rewards/chosen": -0.09134040772914886,
|
|
"rewards/rejected": -0.4564918577671051,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.3559743489072111,
|
|
"grad_norm": 12.016594060247717,
|
|
"learning_rate": 8.132734189530182e-07,
|
|
"logits/chosen": -3.7062535285949707,
|
|
"logits/rejected": -3.933081865310669,
|
|
"logps/chosen": -0.5595335960388184,
|
|
"logps/rejected": -1.047524333000183,
|
|
"loss": 0.1974,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.20238538086414337,
|
|
"rewards/rejected": -0.5873401165008545,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.36120926580290535,
|
|
"grad_norm": 13.607141196369907,
|
|
"learning_rate": 8.060996084344553e-07,
|
|
"logits/chosen": -3.6081855297088623,
|
|
"logits/rejected": -3.7106146812438965,
|
|
"logps/chosen": -0.7058667540550232,
|
|
"logps/rejected": -1.333534836769104,
|
|
"loss": 0.2337,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.20908991992473602,
|
|
"rewards/rejected": -0.5528732538223267,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.3664441826985997,
|
|
"grad_norm": 9.343448893616527,
|
|
"learning_rate": 7.98823563133219e-07,
|
|
"logits/chosen": -3.7106995582580566,
|
|
"logits/rejected": -3.8569560050964355,
|
|
"logps/chosen": -0.5677663087844849,
|
|
"logps/rejected": -1.077682614326477,
|
|
"loss": 0.1728,
|
|
"rewards/accuracies": 0.7250000238418579,
|
|
"rewards/chosen": -0.1268927901983261,
|
|
"rewards/rejected": -0.4453812539577484,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.37167909959429396,
|
|
"grad_norm": 1.674723781078412,
|
|
"learning_rate": 7.914477131893342e-07,
|
|
"logits/chosen": -3.6300597190856934,
|
|
"logits/rejected": -3.8654580116271973,
|
|
"logps/chosen": -0.6926698684692383,
|
|
"logps/rejected": -1.3483922481536865,
|
|
"loss": 0.2708,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.11889272928237915,
|
|
"rewards/rejected": -0.7035388350486755,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.37691401648998824,
|
|
"grad_norm": 26.516721992313247,
|
|
"learning_rate": 7.839745220767661e-07,
|
|
"logits/chosen": -3.356396436691284,
|
|
"logits/rejected": -3.629464626312256,
|
|
"logps/chosen": -0.6213072538375854,
|
|
"logps/rejected": -1.278612494468689,
|
|
"loss": 0.4189,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.17910563945770264,
|
|
"rewards/rejected": -0.6009346842765808,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.3821489333856825,
|
|
"grad_norm": 29.551216070368934,
|
|
"learning_rate": 7.764064857806389e-07,
|
|
"logits/chosen": -3.349151611328125,
|
|
"logits/rejected": -3.5175952911376953,
|
|
"logps/chosen": -0.6614469289779663,
|
|
"logps/rejected": -1.1986842155456543,
|
|
"loss": 0.3052,
|
|
"rewards/accuracies": 0.7250000238418579,
|
|
"rewards/chosen": -0.1393895149230957,
|
|
"rewards/rejected": -0.49458661675453186,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.3873838502813768,
|
|
"grad_norm": 25.357071934733334,
|
|
"learning_rate": 7.68746131963598e-07,
|
|
"logits/chosen": -3.4714951515197754,
|
|
"logits/rejected": -3.677035093307495,
|
|
"logps/chosen": -0.620179295539856,
|
|
"logps/rejected": -1.2890572547912598,
|
|
"loss": 0.228,
|
|
"rewards/accuracies": 0.862500011920929,
|
|
"rewards/chosen": -0.13783490657806396,
|
|
"rewards/rejected": -0.794097363948822,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.39261876717707106,
|
|
"grad_norm": 25.198350240467338,
|
|
"learning_rate": 7.609960191215909e-07,
|
|
"logits/chosen": -3.4828593730926514,
|
|
"logits/rejected": -3.795466899871826,
|
|
"logps/chosen": -0.6967580318450928,
|
|
"logps/rejected": -1.377361536026001,
|
|
"loss": 0.3695,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.19926968216896057,
|
|
"rewards/rejected": -0.9696201086044312,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.39785368407276533,
|
|
"grad_norm": 36.505889073660846,
|
|
"learning_rate": 7.531587357293505e-07,
|
|
"logits/chosen": -3.595017910003662,
|
|
"logits/rejected": -3.8477072715759277,
|
|
"logps/chosen": -0.7997711896896362,
|
|
"logps/rejected": -1.3395113945007324,
|
|
"loss": 0.1863,
|
|
"rewards/accuracies": 0.7250000238418579,
|
|
"rewards/chosen": -0.1399848610162735,
|
|
"rewards/rejected": -0.5361682176589966,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.4030886009684596,
|
|
"grad_norm": 9.852370874435213,
|
|
"learning_rate": 7.452368993758645e-07,
|
|
"logits/chosen": -3.4401755332946777,
|
|
"logits/rejected": -3.7114880084991455,
|
|
"logps/chosen": -0.6346908211708069,
|
|
"logps/rejected": -1.3159992694854736,
|
|
"loss": 0.2026,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.16581778228282928,
|
|
"rewards/rejected": -0.7039278745651245,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.4083235178641539,
|
|
"grad_norm": 39.35986226984939,
|
|
"learning_rate": 7.372331558901237e-07,
|
|
"logits/chosen": -3.411632537841797,
|
|
"logits/rejected": -3.6088695526123047,
|
|
"logps/chosen": -0.6711673140525818,
|
|
"logps/rejected": -1.2047076225280762,
|
|
"loss": 0.1863,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.1048843041062355,
|
|
"rewards/rejected": -0.6397637128829956,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.4135584347598482,
|
|
"grad_norm": 7.036281988177846,
|
|
"learning_rate": 7.291501784574355e-07,
|
|
"logits/chosen": -3.5028297901153564,
|
|
"logits/rejected": -3.7100181579589844,
|
|
"logps/chosen": -0.6011011600494385,
|
|
"logps/rejected": -1.1766241788864136,
|
|
"loss": 0.3423,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.12209127098321915,
|
|
"rewards/rejected": -0.5233365893363953,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.4187933516555425,
|
|
"grad_norm": 15.877033932538007,
|
|
"learning_rate": 7.209906667266017e-07,
|
|
"logits/chosen": -3.6198973655700684,
|
|
"logits/rejected": -3.881483793258667,
|
|
"logps/chosen": -0.7075928449630737,
|
|
"logps/rejected": -1.1975640058517456,
|
|
"loss": 0.1767,
|
|
"rewards/accuracies": 0.8374999761581421,
|
|
"rewards/chosen": -0.13878247141838074,
|
|
"rewards/rejected": -0.5438691973686218,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.42402826855123676,
|
|
"grad_norm": 8.32857274649763,
|
|
"learning_rate": 7.12757345908258e-07,
|
|
"logits/chosen": -3.478787660598755,
|
|
"logits/rejected": -3.5588173866271973,
|
|
"logps/chosen": -0.5804450511932373,
|
|
"logps/rejected": -1.1082279682159424,
|
|
"loss": 0.1955,
|
|
"rewards/accuracies": 0.925000011920929,
|
|
"rewards/chosen": -0.10243809223175049,
|
|
"rewards/rejected": -0.5699917674064636,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.42926318544693104,
|
|
"grad_norm": 1.971725698476674,
|
|
"learning_rate": 7.044529658646761e-07,
|
|
"logits/chosen": -3.325711488723755,
|
|
"logits/rejected": -3.489297866821289,
|
|
"logps/chosen": -0.6689733266830444,
|
|
"logps/rejected": -1.2249457836151123,
|
|
"loss": 0.1751,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.15426844358444214,
|
|
"rewards/rejected": -0.5962169170379639,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.4344981023426253,
|
|
"grad_norm": 6.500882333020027,
|
|
"learning_rate": 6.960803001913314e-07,
|
|
"logits/chosen": -3.3526389598846436,
|
|
"logits/rejected": -3.616393566131592,
|
|
"logps/chosen": -0.6438730955123901,
|
|
"logps/rejected": -1.241818904876709,
|
|
"loss": 0.241,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.1811794489622116,
|
|
"rewards/rejected": -0.5559738874435425,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.4397330192383196,
|
|
"grad_norm": 1.5485575267591558,
|
|
"learning_rate": 6.876421452905448e-07,
|
|
"logits/chosen": -3.6445419788360596,
|
|
"logits/rejected": -3.844398021697998,
|
|
"logps/chosen": -0.6539190411567688,
|
|
"logps/rejected": -1.1970140933990479,
|
|
"loss": 0.1112,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.12994791567325592,
|
|
"rewards/rejected": -0.5939737558364868,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.44496793613401386,
|
|
"grad_norm": 8.846891414177467,
|
|
"learning_rate": 6.791413194375076e-07,
|
|
"logits/chosen": -3.665837049484253,
|
|
"logits/rejected": -4.038450717926025,
|
|
"logps/chosen": -0.6240882873535156,
|
|
"logps/rejected": -1.2376606464385986,
|
|
"loss": 0.1749,
|
|
"rewards/accuracies": 0.737500011920929,
|
|
"rewards/chosen": -0.13635075092315674,
|
|
"rewards/rejected": -0.5054816007614136,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.45020285302970814,
|
|
"grad_norm": 50.539531832216845,
|
|
"learning_rate": 6.705806618389997e-07,
|
|
"logits/chosen": -3.5491700172424316,
|
|
"logits/rejected": -3.891484498977661,
|
|
"logps/chosen": -0.6607200503349304,
|
|
"logps/rejected": -1.2415850162506104,
|
|
"loss": 0.2544,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.11792077124118805,
|
|
"rewards/rejected": -0.7187885642051697,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.4554377699254024,
|
|
"grad_norm": 2.281104923342322,
|
|
"learning_rate": 6.619630316851182e-07,
|
|
"logits/chosen": -3.623032331466675,
|
|
"logits/rejected": -3.9570438861846924,
|
|
"logps/chosen": -0.5454970598220825,
|
|
"logps/rejected": -1.008527398109436,
|
|
"loss": 0.4412,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.11349859088659286,
|
|
"rewards/rejected": -0.49833065271377563,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.4606726868210967,
|
|
"grad_norm": 21.825427920475462,
|
|
"learning_rate": 6.532913071943307e-07,
|
|
"logits/chosen": -3.726950168609619,
|
|
"logits/rejected": -3.893709182739258,
|
|
"logps/chosen": -0.7641295194625854,
|
|
"logps/rejected": -1.277066946029663,
|
|
"loss": 0.0858,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.11578428745269775,
|
|
"rewards/rejected": -0.31977829337120056,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.465907603716791,
|
|
"grad_norm": 10.863674979284927,
|
|
"learning_rate": 6.445683846521738e-07,
|
|
"logits/chosen": -3.648641586303711,
|
|
"logits/rejected": -3.897275447845459,
|
|
"logps/chosen": -0.7207273244857788,
|
|
"logps/rejected": -1.2213026285171509,
|
|
"loss": 0.1486,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.11959713697433472,
|
|
"rewards/rejected": -0.580722987651825,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.4711425206124853,
|
|
"grad_norm": 11.024990455629961,
|
|
"learning_rate": 6.357971774439177e-07,
|
|
"logits/chosen": -3.67216157913208,
|
|
"logits/rejected": -3.8727009296417236,
|
|
"logps/chosen": -0.5953903198242188,
|
|
"logps/rejected": -1.187514066696167,
|
|
"loss": 0.3925,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.162687286734581,
|
|
"rewards/rejected": -0.5742394328117371,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.47637743750817957,
|
|
"grad_norm": 9.020917089934892,
|
|
"learning_rate": 6.269806150815187e-07,
|
|
"logits/chosen": -3.646181583404541,
|
|
"logits/rejected": -3.939856767654419,
|
|
"logps/chosen": -0.6697233319282532,
|
|
"logps/rejected": -1.2958990335464478,
|
|
"loss": 0.14,
|
|
"rewards/accuracies": 0.8374999761581421,
|
|
"rewards/chosen": -0.1477135419845581,
|
|
"rewards/rejected": -0.5787355899810791,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.48161235440387384,
|
|
"grad_norm": 17.07185019161522,
|
|
"learning_rate": 6.181216422251862e-07,
|
|
"logits/chosen": -3.652355909347534,
|
|
"logits/rejected": -3.9332756996154785,
|
|
"logps/chosen": -0.6171292066574097,
|
|
"logps/rejected": -1.1538760662078857,
|
|
"loss": 0.1442,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.1485549807548523,
|
|
"rewards/rejected": -0.5386639833450317,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.4868472712995681,
|
|
"grad_norm": 11.35798051607949,
|
|
"learning_rate": 6.092232176998897e-07,
|
|
"logits/chosen": -3.3274683952331543,
|
|
"logits/rejected": -3.683140993118286,
|
|
"logps/chosen": -0.6949746012687683,
|
|
"logps/rejected": -1.2979357242584229,
|
|
"loss": 0.2627,
|
|
"rewards/accuracies": 0.887499988079071,
|
|
"rewards/chosen": -0.10100005567073822,
|
|
"rewards/rejected": -0.40933284163475037,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.4920821881952624,
|
|
"grad_norm": 7.48204999503568,
|
|
"learning_rate": 6.002883135071362e-07,
|
|
"logits/chosen": -3.6361114978790283,
|
|
"logits/rejected": -3.845881700515747,
|
|
"logps/chosen": -0.6067990064620972,
|
|
"logps/rejected": -1.1521469354629517,
|
|
"loss": 0.2342,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.08356883376836777,
|
|
"rewards/rejected": -0.3899988830089569,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.49731710509095667,
|
|
"grad_norm": 9.65932504460624,
|
|
"learning_rate": 5.913199138323448e-07,
|
|
"logits/chosen": -3.4517874717712402,
|
|
"logits/rejected": -3.6863322257995605,
|
|
"logps/chosen": -0.6873298287391663,
|
|
"logps/rejected": -1.2405993938446045,
|
|
"loss": 0.0732,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.13713274896144867,
|
|
"rewards/rejected": -0.5883907079696655,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.5025520219866509,
|
|
"grad_norm": 6.954926765138611,
|
|
"learning_rate": 5.82321014048154e-07,
|
|
"logits/chosen": -3.531513214111328,
|
|
"logits/rejected": -3.8438732624053955,
|
|
"logps/chosen": -0.6925119161605835,
|
|
"logps/rejected": -1.3587197065353394,
|
|
"loss": 0.2015,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.12155506759881973,
|
|
"rewards/rejected": -0.5510644912719727,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.5077869388823453,
|
|
"grad_norm": 29.092706700807746,
|
|
"learning_rate": 5.732946197139906e-07,
|
|
"logits/chosen": -3.543038845062256,
|
|
"logits/rejected": -3.7100837230682373,
|
|
"logps/chosen": -0.6176570057868958,
|
|
"logps/rejected": -1.1735661029815674,
|
|
"loss": 0.1762,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.11741694062948227,
|
|
"rewards/rejected": -0.5648257732391357,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.5130218557780395,
|
|
"grad_norm": 15.364134485434684,
|
|
"learning_rate": 5.642437455722381e-07,
|
|
"logits/chosen": -3.527390718460083,
|
|
"logits/rejected": -3.6467444896698,
|
|
"logps/chosen": -0.5736885070800781,
|
|
"logps/rejected": -1.1105291843414307,
|
|
"loss": 0.1348,
|
|
"rewards/accuracies": 0.8374999761581421,
|
|
"rewards/chosen": -0.1023680791258812,
|
|
"rewards/rejected": -0.5240501761436462,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.5182567726737338,
|
|
"grad_norm": 10.7420392782178,
|
|
"learning_rate": 5.551714145413368e-07,
|
|
"logits/chosen": -3.633018970489502,
|
|
"logits/rejected": -3.8861050605773926,
|
|
"logps/chosen": -0.6110260486602783,
|
|
"logps/rejected": -1.197975516319275,
|
|
"loss": 0.1833,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.2393743246793747,
|
|
"rewards/rejected": -0.8618858456611633,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.523491689569428,
|
|
"grad_norm": 29.652979248675152,
|
|
"learning_rate": 5.460806567061533e-07,
|
|
"logits/chosen": -3.5295844078063965,
|
|
"logits/rejected": -3.756152629852295,
|
|
"logps/chosen": -0.6682409048080444,
|
|
"logps/rejected": -1.2239763736724854,
|
|
"loss": 0.1562,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.061064548790454865,
|
|
"rewards/rejected": -0.29901862144470215,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.5287266064651224,
|
|
"grad_norm": 4.935111757809973,
|
|
"learning_rate": 5.369745083059577e-07,
|
|
"logits/chosen": -3.706066608428955,
|
|
"logits/rejected": -3.871903657913208,
|
|
"logps/chosen": -0.8551700711250305,
|
|
"logps/rejected": -1.495203971862793,
|
|
"loss": 0.09,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.08738512545824051,
|
|
"rewards/rejected": -0.4607165455818176,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.5339615233608166,
|
|
"grad_norm": 14.950139526881932,
|
|
"learning_rate": 5.278560107203437e-07,
|
|
"logits/chosen": -3.6445841789245605,
|
|
"logits/rejected": -4.002659320831299,
|
|
"logps/chosen": -0.678175151348114,
|
|
"logps/rejected": -1.2137267589569092,
|
|
"loss": 0.164,
|
|
"rewards/accuracies": 0.7124999761581421,
|
|
"rewards/chosen": -0.16553157567977905,
|
|
"rewards/rejected": -0.7228254079818726,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.5391964402565109,
|
|
"grad_norm": 10.883180795778303,
|
|
"learning_rate": 5.18728209453432e-07,
|
|
"logits/chosen": -3.707653760910034,
|
|
"logits/rejected": -3.845078229904175,
|
|
"logps/chosen": -0.6454753875732422,
|
|
"logps/rejected": -1.1513105630874634,
|
|
"loss": 0.1559,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.1302897185087204,
|
|
"rewards/rejected": -0.5494991540908813,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.5444313571522053,
|
|
"grad_norm": 33.05212121743528,
|
|
"learning_rate": 5.095941531166982e-07,
|
|
"logits/chosen": -3.78800630569458,
|
|
"logits/rejected": -4.111274242401123,
|
|
"logps/chosen": -0.650190532207489,
|
|
"logps/rejected": -1.2724605798721313,
|
|
"loss": 0.3122,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.16642943024635315,
|
|
"rewards/rejected": -0.7723706960678101,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.5496662740478995,
|
|
"grad_norm": 25.0104518604515,
|
|
"learning_rate": 5.004568924107598e-07,
|
|
"logits/chosen": -3.5907185077667236,
|
|
"logits/rejected": -3.9407639503479004,
|
|
"logps/chosen": -0.6616253852844238,
|
|
"logps/rejected": -1.3152577877044678,
|
|
"loss": 0.3186,
|
|
"rewards/accuracies": 0.8374999761581421,
|
|
"rewards/chosen": -0.16016852855682373,
|
|
"rewards/rejected": -0.4862311780452728,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.5549011909435938,
|
|
"grad_norm": 31.981933797923496,
|
|
"learning_rate": 4.913194791064675e-07,
|
|
"logits/chosen": -3.687349796295166,
|
|
"logits/rejected": -3.9528133869171143,
|
|
"logps/chosen": -0.8306125402450562,
|
|
"logps/rejected": -1.4087917804718018,
|
|
"loss": 0.362,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.1095658391714096,
|
|
"rewards/rejected": -0.4266139566898346,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.560136107839288,
|
|
"grad_norm": 43.09016084350836,
|
|
"learning_rate": 4.82184965025639e-07,
|
|
"logits/chosen": -3.7304611206054688,
|
|
"logits/rejected": -3.975867748260498,
|
|
"logps/chosen": -0.6367403268814087,
|
|
"logps/rejected": -1.249473214149475,
|
|
"loss": 0.2017,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.23390457034111023,
|
|
"rewards/rejected": -0.6657984256744385,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.5653710247349824,
|
|
"grad_norm": 14.680677683118907,
|
|
"learning_rate": 4.73056401021775e-07,
|
|
"logits/chosen": -3.6824183464050293,
|
|
"logits/rejected": -3.9073410034179688,
|
|
"logps/chosen": -0.7108127474784851,
|
|
"logps/rejected": -1.4023791551589966,
|
|
"loss": 0.479,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.14911451935768127,
|
|
"rewards/rejected": -0.5604814291000366,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.5706059416306766,
|
|
"grad_norm": 15.630100484670548,
|
|
"learning_rate": 4.639368359610982e-07,
|
|
"logits/chosen": -3.694349765777588,
|
|
"logits/rejected": -4.01293420791626,
|
|
"logps/chosen": -0.7198182344436646,
|
|
"logps/rejected": -1.2933294773101807,
|
|
"loss": 0.3266,
|
|
"rewards/accuracies": 0.737500011920929,
|
|
"rewards/chosen": -0.21065545082092285,
|
|
"rewards/rejected": -0.7207514047622681,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.5758408585263709,
|
|
"grad_norm": 13.347415996971426,
|
|
"learning_rate": 4.5482931570425803e-07,
|
|
"logits/chosen": -3.7276809215545654,
|
|
"logits/rejected": -4.011933326721191,
|
|
"logps/chosen": -0.6607510447502136,
|
|
"logps/rejected": -1.281141757965088,
|
|
"loss": 0.1704,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.17264564335346222,
|
|
"rewards/rejected": -0.6156941652297974,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.5810757754220651,
|
|
"grad_norm": 12.382613041230709,
|
|
"learning_rate": 4.4573688208903686e-07,
|
|
"logits/chosen": -3.6291985511779785,
|
|
"logits/rejected": -3.9804370403289795,
|
|
"logps/chosen": -0.6637237071990967,
|
|
"logps/rejected": -1.391247034072876,
|
|
"loss": 0.2221,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.15261736512184143,
|
|
"rewards/rejected": -0.8548731803894043,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.5863106923177595,
|
|
"grad_norm": 7.303791497022784,
|
|
"learning_rate": 4.366625719144016e-07,
|
|
"logits/chosen": -3.506834030151367,
|
|
"logits/rejected": -3.755375623703003,
|
|
"logps/chosen": -0.7788494825363159,
|
|
"logps/rejected": -1.385221004486084,
|
|
"loss": 0.2242,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.10770156234502792,
|
|
"rewards/rejected": -0.43017715215682983,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.5915456092134538,
|
|
"grad_norm": 14.381226143020704,
|
|
"learning_rate": 4.276094159262368e-07,
|
|
"logits/chosen": -3.543980360031128,
|
|
"logits/rejected": -3.700058698654175,
|
|
"logps/chosen": -0.7008415460586548,
|
|
"logps/rejected": -1.2009141445159912,
|
|
"loss": 0.1348,
|
|
"rewards/accuracies": 0.8374999761581421,
|
|
"rewards/chosen": -0.08333039283752441,
|
|
"rewards/rejected": -0.4378291666507721,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.596780526109148,
|
|
"grad_norm": 8.760901934055333,
|
|
"learning_rate": 4.1858043780510135e-07,
|
|
"logits/chosen": -3.62018084526062,
|
|
"logits/rejected": -3.911879062652588,
|
|
"logps/chosen": -0.5855687260627747,
|
|
"logps/rejected": -1.1562426090240479,
|
|
"loss": 0.205,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.10116372257471085,
|
|
"rewards/rejected": -0.6775528192520142,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.6020154430048423,
|
|
"grad_norm": 64.65530781329107,
|
|
"learning_rate": 4.0957865315634204e-07,
|
|
"logits/chosen": -3.5645194053649902,
|
|
"logits/rejected": -3.8217787742614746,
|
|
"logps/chosen": -0.641860842704773,
|
|
"logps/rejected": -1.2221088409423828,
|
|
"loss": 0.2331,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.08705325424671173,
|
|
"rewards/rejected": -0.46784210205078125,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.6072503599005366,
|
|
"grad_norm": 3.8658549598616143,
|
|
"learning_rate": 4.006070685029075e-07,
|
|
"logits/chosen": -3.679039716720581,
|
|
"logits/rejected": -3.946254253387451,
|
|
"logps/chosen": -0.6737911105155945,
|
|
"logps/rejected": -1.2060964107513428,
|
|
"loss": 0.259,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.1347390115261078,
|
|
"rewards/rejected": -0.5623631477355957,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.6124852767962309,
|
|
"grad_norm": 0.4610785350786279,
|
|
"learning_rate": 3.916686802811927e-07,
|
|
"logits/chosen": -3.583909511566162,
|
|
"logits/rejected": -3.8461241722106934,
|
|
"logps/chosen": -0.6507914662361145,
|
|
"logps/rejected": -1.2234233617782593,
|
|
"loss": 0.1494,
|
|
"rewards/accuracies": 0.8374999761581421,
|
|
"rewards/chosen": -0.09081225097179413,
|
|
"rewards/rejected": -0.700011670589447,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.6177201936919251,
|
|
"grad_norm": 1.6721740201035047,
|
|
"learning_rate": 3.8276647384025467e-07,
|
|
"logits/chosen": -3.608611583709717,
|
|
"logits/rejected": -3.8778247833251953,
|
|
"logps/chosen": -0.6140819191932678,
|
|
"logps/rejected": -1.1463892459869385,
|
|
"loss": 0.417,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.09192916750907898,
|
|
"rewards/rejected": -0.569677472114563,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.6229551105876194,
|
|
"grad_norm": 34.92800950922805,
|
|
"learning_rate": 3.7390342244472883e-07,
|
|
"logits/chosen": -3.686276912689209,
|
|
"logits/rejected": -3.9486172199249268,
|
|
"logps/chosen": -0.6567327976226807,
|
|
"logps/rejected": -1.2959892749786377,
|
|
"loss": 0.1887,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.13273802399635315,
|
|
"rewards/rejected": -0.6054214239120483,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.6281900274833137,
|
|
"grad_norm": 28.837778937392045,
|
|
"learning_rate": 3.6508248628178446e-07,
|
|
"logits/chosen": -3.635249376296997,
|
|
"logits/rejected": -3.995410203933716,
|
|
"logps/chosen": -0.6689791679382324,
|
|
"logps/rejected": -1.2368990182876587,
|
|
"loss": 0.1684,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.11262966692447662,
|
|
"rewards/rejected": -0.5506707429885864,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.633424944379008,
|
|
"grad_norm": 6.4862027458335465,
|
|
"learning_rate": 3.563066114724441e-07,
|
|
"logits/chosen": -3.7043285369873047,
|
|
"logits/rejected": -3.9376754760742188,
|
|
"logps/chosen": -0.6666765213012695,
|
|
"logps/rejected": -1.1883481740951538,
|
|
"loss": 0.1755,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.24648375809192657,
|
|
"rewards/rejected": -0.5887495279312134,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.6386598612747023,
|
|
"grad_norm": 29.832662347559868,
|
|
"learning_rate": 3.475787290876055e-07,
|
|
"logits/chosen": -3.6531460285186768,
|
|
"logits/rejected": -3.931300640106201,
|
|
"logps/chosen": -0.7262079119682312,
|
|
"logps/rejected": -1.4471489191055298,
|
|
"loss": 0.4461,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.13697785139083862,
|
|
"rewards/rejected": -0.5294802188873291,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.6438947781703965,
|
|
"grad_norm": 8.313230076052804,
|
|
"learning_rate": 3.389017541690854e-07,
|
|
"logits/chosen": -3.6925830841064453,
|
|
"logits/rejected": -3.8890395164489746,
|
|
"logps/chosen": -0.6669970154762268,
|
|
"logps/rejected": -1.1209336519241333,
|
|
"loss": 0.1875,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.11731680482625961,
|
|
"rewards/rejected": -0.4473400115966797,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.6491296950660909,
|
|
"grad_norm": 6.599402039349072,
|
|
"learning_rate": 3.30278584756021e-07,
|
|
"logits/chosen": -3.7490150928497314,
|
|
"logits/rejected": -3.9958243370056152,
|
|
"logps/chosen": -0.6234461069107056,
|
|
"logps/rejected": -1.2914457321166992,
|
|
"loss": 0.2222,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.19926027953624725,
|
|
"rewards/rejected": -0.7430733442306519,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.6543646119617851,
|
|
"grad_norm": 26.42550655780738,
|
|
"learning_rate": 3.2171210091694735e-07,
|
|
"logits/chosen": -3.5512046813964844,
|
|
"logits/rejected": -3.8761677742004395,
|
|
"logps/chosen": -0.6285715699195862,
|
|
"logps/rejected": -1.1303393840789795,
|
|
"loss": 0.4599,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.10718154907226562,
|
|
"rewards/rejected": -0.5068109035491943,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.6595995288574794,
|
|
"grad_norm": 43.52112297389642,
|
|
"learning_rate": 3.132051637878789e-07,
|
|
"logits/chosen": -3.754105806350708,
|
|
"logits/rejected": -3.9892616271972656,
|
|
"logps/chosen": -0.604164183139801,
|
|
"logps/rejected": -1.2354532480239868,
|
|
"loss": 0.3654,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.1477893888950348,
|
|
"rewards/rejected": -0.6003723740577698,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.6648344457531736,
|
|
"grad_norm": 2.249013697408821,
|
|
"learning_rate": 3.0476061461671155e-07,
|
|
"logits/chosen": -3.6410465240478516,
|
|
"logits/rejected": -3.9180960655212402,
|
|
"logps/chosen": -0.6587497591972351,
|
|
"logps/rejected": -1.34711754322052,
|
|
"loss": 0.258,
|
|
"rewards/accuracies": 0.862500011920929,
|
|
"rewards/chosen": -0.13485677540302277,
|
|
"rewards/rejected": -0.5674414038658142,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.670069362648868,
|
|
"grad_norm": 4.4535505774168636,
|
|
"learning_rate": 2.9638127381427127e-07,
|
|
"logits/chosen": -3.6331870555877686,
|
|
"logits/rejected": -3.924232006072998,
|
|
"logps/chosen": -0.6530889272689819,
|
|
"logps/rejected": -1.2471270561218262,
|
|
"loss": 0.1464,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.13776201009750366,
|
|
"rewards/rejected": -0.5627979040145874,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.6753042795445622,
|
|
"grad_norm": 13.03175390211842,
|
|
"learning_rate": 2.8806994001231766e-07,
|
|
"logits/chosen": -3.5974411964416504,
|
|
"logits/rejected": -3.770061492919922,
|
|
"logps/chosen": -0.57194584608078,
|
|
"logps/rejected": -1.1356195211410522,
|
|
"loss": 0.2263,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.21213237941265106,
|
|
"rewards/rejected": -0.47332197427749634,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.6805391964402565,
|
|
"grad_norm": 9.542565711403059,
|
|
"learning_rate": 2.7982938912882544e-07,
|
|
"logits/chosen": -3.5941874980926514,
|
|
"logits/rejected": -3.9326794147491455,
|
|
"logps/chosen": -0.6405996084213257,
|
|
"logps/rejected": -1.4193586111068726,
|
|
"loss": 0.1943,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.1434379667043686,
|
|
"rewards/rejected": -0.8874284625053406,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.6857741133359508,
|
|
"grad_norm": 20.899845143040352,
|
|
"learning_rate": 2.716623734408488e-07,
|
|
"logits/chosen": -3.7071640491485596,
|
|
"logits/rejected": -3.954035997390747,
|
|
"logps/chosen": -0.7084048390388489,
|
|
"logps/rejected": -1.408484697341919,
|
|
"loss": 0.16,
|
|
"rewards/accuracies": 0.8374999761581421,
|
|
"rewards/chosen": -0.11447383463382721,
|
|
"rewards/rejected": -0.5277458429336548,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.6910090302316451,
|
|
"grad_norm": 26.585542302796576,
|
|
"learning_rate": 2.635716206652843e-07,
|
|
"logits/chosen": -3.568807601928711,
|
|
"logits/rejected": -3.851666212081909,
|
|
"logps/chosen": -0.6219191551208496,
|
|
"logps/rejected": -1.185127854347229,
|
|
"loss": 0.2097,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.16521432995796204,
|
|
"rewards/rejected": -0.8127703666687012,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.6962439471273394,
|
|
"grad_norm": 0.9910292983788035,
|
|
"learning_rate": 2.5555983304783515e-07,
|
|
"logits/chosen": -3.706960678100586,
|
|
"logits/rejected": -3.9914677143096924,
|
|
"logps/chosen": -0.6214891076087952,
|
|
"logps/rejected": -1.2848238945007324,
|
|
"loss": 0.2059,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.09211207926273346,
|
|
"rewards/rejected": -0.45670217275619507,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.7014788640230336,
|
|
"grad_norm": 19.940007489482195,
|
|
"learning_rate": 2.4762968646048356e-07,
|
|
"logits/chosen": -3.6894028186798096,
|
|
"logits/rejected": -4.012866497039795,
|
|
"logps/chosen": -0.6447241902351379,
|
|
"logps/rejected": -1.2848459482192993,
|
|
"loss": 0.3272,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.10874289274215698,
|
|
"rewards/rejected": -0.6314564943313599,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.7067137809187279,
|
|
"grad_norm": 1.6579625246627596,
|
|
"learning_rate": 2.397838295077703e-07,
|
|
"logits/chosen": -3.513641357421875,
|
|
"logits/rejected": -3.8426365852355957,
|
|
"logps/chosen": -0.6752243041992188,
|
|
"logps/rejected": -1.2607519626617432,
|
|
"loss": 0.1058,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.1565995216369629,
|
|
"rewards/rejected": -0.6347146034240723,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.7119486978144222,
|
|
"grad_norm": 5.659662645549925,
|
|
"learning_rate": 2.3202488264218357e-07,
|
|
"logits/chosen": -3.4555447101593018,
|
|
"logits/rejected": -3.8530869483947754,
|
|
"logps/chosen": -0.697325587272644,
|
|
"logps/rejected": -1.2927258014678955,
|
|
"loss": 0.0785,
|
|
"rewards/accuracies": 0.862500011920929,
|
|
"rewards/chosen": -0.08387357741594315,
|
|
"rewards/rejected": -0.39989030361175537,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.7171836147101165,
|
|
"grad_norm": 4.657928400009495,
|
|
"learning_rate": 2.243554372889479e-07,
|
|
"logits/chosen": -3.5664660930633545,
|
|
"logits/rejected": -3.876011371612549,
|
|
"logps/chosen": -0.705878734588623,
|
|
"logps/rejected": -1.364867925643921,
|
|
"loss": 0.1093,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.12067972123622894,
|
|
"rewards/rejected": -0.6570446491241455,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.7224185316058107,
|
|
"grad_norm": 1.3444622420831092,
|
|
"learning_rate": 2.1677805498050998e-07,
|
|
"logits/chosen": -3.3227603435516357,
|
|
"logits/rejected": -3.743194580078125,
|
|
"logps/chosen": -0.6672931909561157,
|
|
"logps/rejected": -1.1365772485733032,
|
|
"loss": 0.1243,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.09715026617050171,
|
|
"rewards/rejected": -0.43300461769104004,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.727653448501505,
|
|
"grad_norm": 8.614820635695539,
|
|
"learning_rate": 2.0929526650100716e-07,
|
|
"logits/chosen": -3.5229296684265137,
|
|
"logits/rejected": -3.9142937660217285,
|
|
"logps/chosen": -0.6047448515892029,
|
|
"logps/rejected": -1.2415525913238525,
|
|
"loss": 0.2105,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.23621472716331482,
|
|
"rewards/rejected": -0.8043211698532104,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.7328883653971994,
|
|
"grad_norm": 5.179700896632157,
|
|
"learning_rate": 2.0190957104100692e-07,
|
|
"logits/chosen": -3.4930293560028076,
|
|
"logits/rejected": -3.749809741973877,
|
|
"logps/chosen": -0.6624347567558289,
|
|
"logps/rejected": -1.1844425201416016,
|
|
"loss": 0.1966,
|
|
"rewards/accuracies": 0.762499988079071,
|
|
"rewards/chosen": -0.1073043942451477,
|
|
"rewards/rejected": -0.4727447032928467,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.7381232822928936,
|
|
"grad_norm": 21.802519076308094,
|
|
"learning_rate": 1.9462343536279612e-07,
|
|
"logits/chosen": -3.6438193321228027,
|
|
"logits/rejected": -4.09710168838501,
|
|
"logps/chosen": -0.6798110604286194,
|
|
"logps/rejected": -1.2490794658660889,
|
|
"loss": 0.18,
|
|
"rewards/accuracies": 0.762499988079071,
|
|
"rewards/chosen": -0.11491537094116211,
|
|
"rewards/rejected": -0.5919861197471619,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.7433581991885879,
|
|
"grad_norm": 8.921153921786187,
|
|
"learning_rate": 1.874392929765044e-07,
|
|
"logits/chosen": -3.7223763465881348,
|
|
"logits/rejected": -4.110812187194824,
|
|
"logps/chosen": -0.6291832327842712,
|
|
"logps/rejected": -1.2136573791503906,
|
|
"loss": 0.1329,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.10531127452850342,
|
|
"rewards/rejected": -0.5478615164756775,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.7485931160842821,
|
|
"grad_norm": 69.71187759138483,
|
|
"learning_rate": 1.8035954332732889e-07,
|
|
"logits/chosen": -3.6256279945373535,
|
|
"logits/rejected": -3.9363632202148438,
|
|
"logps/chosen": -0.6302305459976196,
|
|
"logps/rejected": -1.1628683805465698,
|
|
"loss": 0.1542,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.1714845895767212,
|
|
"rewards/rejected": -0.623928964138031,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.7538280329799765,
|
|
"grad_norm": 20.431726665972217,
|
|
"learning_rate": 1.733865509941419e-07,
|
|
"logits/chosen": -3.574036121368408,
|
|
"logits/rejected": -3.994558811187744,
|
|
"logps/chosen": -0.6578459143638611,
|
|
"logps/rejected": -1.2709509134292603,
|
|
"loss": 0.1268,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.12474487721920013,
|
|
"rewards/rejected": -0.477055162191391,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.7590629498756707,
|
|
"grad_norm": 9.763384229327023,
|
|
"learning_rate": 1.6652264489973861e-07,
|
|
"logits/chosen": -3.586714506149292,
|
|
"logits/rejected": -3.934051990509033,
|
|
"logps/chosen": -0.672654390335083,
|
|
"logps/rejected": -1.4327255487442017,
|
|
"loss": 0.1755,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.12246622890233994,
|
|
"rewards/rejected": -0.6903547644615173,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.764297866771365,
|
|
"grad_norm": 5.3595001978781704,
|
|
"learning_rate": 1.5977011753299724e-07,
|
|
"logits/chosen": -3.651690721511841,
|
|
"logits/rejected": -3.986185073852539,
|
|
"logps/chosen": -0.6952771544456482,
|
|
"logps/rejected": -1.2006165981292725,
|
|
"loss": 0.2141,
|
|
"rewards/accuracies": 0.7124999761581421,
|
|
"rewards/chosen": -0.23796899616718292,
|
|
"rewards/rejected": -0.6073740124702454,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.7695327836670592,
|
|
"grad_norm": 1.6432168817434278,
|
|
"learning_rate": 1.5313122418320496e-07,
|
|
"logits/chosen": -3.5539729595184326,
|
|
"logits/rejected": -3.897873640060425,
|
|
"logps/chosen": -0.632127583026886,
|
|
"logps/rejected": -1.2387049198150635,
|
|
"loss": 0.1406,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.10146383196115494,
|
|
"rewards/rejected": -0.5321124196052551,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.7747677005627536,
|
|
"grad_norm": 1.3794858766661577,
|
|
"learning_rate": 1.4660818218681125e-07,
|
|
"logits/chosen": -3.5363082885742188,
|
|
"logits/rejected": -3.8142707347869873,
|
|
"logps/chosen": -0.7643290758132935,
|
|
"logps/rejected": -1.4336775541305542,
|
|
"loss": 0.1708,
|
|
"rewards/accuracies": 0.862500011920929,
|
|
"rewards/chosen": -0.08295364677906036,
|
|
"rewards/rejected": -0.3842242360115051,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.7800026174584479,
|
|
"grad_norm": 14.698867702350263,
|
|
"learning_rate": 1.4020317018685362e-07,
|
|
"logits/chosen": -3.398146152496338,
|
|
"logits/rejected": -3.777660369873047,
|
|
"logps/chosen": -0.8031834363937378,
|
|
"logps/rejected": -1.415450096130371,
|
|
"loss": 0.1705,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.08375723659992218,
|
|
"rewards/rejected": -0.3890025019645691,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.7852375343541421,
|
|
"grad_norm": 12.210364521217109,
|
|
"learning_rate": 1.3391832740531055e-07,
|
|
"logits/chosen": -3.4719395637512207,
|
|
"logits/rejected": -3.8840765953063965,
|
|
"logps/chosen": -0.67162024974823,
|
|
"logps/rejected": -1.2897964715957642,
|
|
"loss": 0.0969,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": -0.15899525582790375,
|
|
"rewards/rejected": -0.5975922346115112,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.7904724512498364,
|
|
"grad_norm": 23.473420770496503,
|
|
"learning_rate": 1.2775575292861707e-07,
|
|
"logits/chosen": -3.528533458709717,
|
|
"logits/rejected": -3.907036304473877,
|
|
"logps/chosen": -0.5467859506607056,
|
|
"logps/rejected": -1.1106278896331787,
|
|
"loss": 0.1681,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.11632678657770157,
|
|
"rewards/rejected": -0.560685396194458,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.7957073681455307,
|
|
"grad_norm": 4.542700336855168,
|
|
"learning_rate": 1.21717505006588e-07,
|
|
"logits/chosen": -3.7147388458251953,
|
|
"logits/rejected": -3.999300003051758,
|
|
"logps/chosen": -0.7240277528762817,
|
|
"logps/rejected": -1.3829585313796997,
|
|
"loss": 0.1492,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.16688640415668488,
|
|
"rewards/rejected": -0.7482727766036987,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.800942285041225,
|
|
"grad_norm": 2.86910989170756,
|
|
"learning_rate": 1.1580560036497877e-07,
|
|
"logits/chosen": -3.5072569847106934,
|
|
"logits/rejected": -3.800105571746826,
|
|
"logps/chosen": -0.6719281077384949,
|
|
"logps/rejected": -1.196406602859497,
|
|
"loss": 0.2545,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.10692320019006729,
|
|
"rewards/rejected": -0.5672041177749634,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.8061772019369192,
|
|
"grad_norm": 0.7212916751703865,
|
|
"learning_rate": 1.1002201353191521e-07,
|
|
"logits/chosen": -3.5515499114990234,
|
|
"logits/rejected": -3.918053150177002,
|
|
"logps/chosen": -0.5516917109489441,
|
|
"logps/rejected": -1.1447056531906128,
|
|
"loss": 0.2219,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.17747844755649567,
|
|
"rewards/rejected": -0.8485193252563477,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.8114121188326135,
|
|
"grad_norm": 3.9789628772583874,
|
|
"learning_rate": 1.0436867617841766e-07,
|
|
"logits/chosen": -3.523468017578125,
|
|
"logits/rejected": -4.002907752990723,
|
|
"logps/chosen": -0.6106997728347778,
|
|
"logps/rejected": -1.2265563011169434,
|
|
"loss": 0.1674,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.1898810714483261,
|
|
"rewards/rejected": -0.8234789967536926,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.8166470357283078,
|
|
"grad_norm": 23.019959616553912,
|
|
"learning_rate": 9.884747647323854e-08,
|
|
"logits/chosen": -3.5710349082946777,
|
|
"logits/rejected": -3.8469862937927246,
|
|
"logps/chosen": -0.6847441792488098,
|
|
"logps/rejected": -1.2800266742706299,
|
|
"loss": 0.1525,
|
|
"rewards/accuracies": 0.887499988079071,
|
|
"rewards/chosen": -0.07254868000745773,
|
|
"rewards/rejected": -0.3605559766292572,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.8218819526240021,
|
|
"grad_norm": 1.9159672672104486,
|
|
"learning_rate": 9.346025845222871e-08,
|
|
"logits/chosen": -3.5710854530334473,
|
|
"logits/rejected": -3.9252638816833496,
|
|
"logps/chosen": -0.5996052026748657,
|
|
"logps/rejected": -1.185727834701538,
|
|
"loss": 0.0693,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.09702242910861969,
|
|
"rewards/rejected": -0.43671149015426636,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.8271168695196964,
|
|
"grad_norm": 6.0432637545944985,
|
|
"learning_rate": 8.82088214024454e-08,
|
|
"logits/chosen": -3.4795615673065186,
|
|
"logits/rejected": -3.900925397872925,
|
|
"logps/chosen": -0.6539788842201233,
|
|
"logps/rejected": -1.3062019348144531,
|
|
"loss": 0.0777,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.1087212786078453,
|
|
"rewards/rejected": -0.6267635226249695,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.8323517864153906,
|
|
"grad_norm": 0.8065986090798118,
|
|
"learning_rate": 8.309491926120393e-08,
|
|
"logits/chosen": -3.445683002471924,
|
|
"logits/rejected": -3.8484814167022705,
|
|
"logps/chosen": -0.6346088647842407,
|
|
"logps/rejected": -1.2594006061553955,
|
|
"loss": 0.1251,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.11368497461080551,
|
|
"rewards/rejected": -0.4606091380119324,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.837586703311085,
|
|
"grad_norm": 1.631423358922648,
|
|
"learning_rate": 7.812026003027771e-08,
|
|
"logits/chosen": -3.4795145988464355,
|
|
"logits/rejected": -3.843942165374756,
|
|
"logps/chosen": -0.6449909210205078,
|
|
"logps/rejected": -1.2673732042312622,
|
|
"loss": 0.1368,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.11242115497589111,
|
|
"rewards/rejected": -0.6334723234176636,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.8428216202067792,
|
|
"grad_norm": 28.643433953668108,
|
|
"learning_rate": 7.328650520543906e-08,
|
|
"logits/chosen": -3.5898594856262207,
|
|
"logits/rejected": -3.8868191242218018,
|
|
"logps/chosen": -0.6432119011878967,
|
|
"logps/rejected": -1.2170101404190063,
|
|
"loss": 0.24,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.14053374528884888,
|
|
"rewards/rejected": -0.6827019453048706,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.8480565371024735,
|
|
"grad_norm": 6.576620973204435,
|
|
"learning_rate": 6.859526922153352e-08,
|
|
"logits/chosen": -3.576659679412842,
|
|
"logits/rejected": -3.9092178344726562,
|
|
"logps/chosen": -0.5998526811599731,
|
|
"logps/rejected": -1.1230926513671875,
|
|
"loss": 0.1143,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.0742657333612442,
|
|
"rewards/rejected": -0.3632586598396301,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.8532914539981677,
|
|
"grad_norm": 19.103957999815783,
|
|
"learning_rate": 6.40481189132711e-08,
|
|
"logits/chosen": -3.498664379119873,
|
|
"logits/rejected": -3.8451638221740723,
|
|
"logps/chosen": -0.6496328711509705,
|
|
"logps/rejected": -1.1756832599639893,
|
|
"loss": 0.148,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": -0.12481925636529922,
|
|
"rewards/rejected": -0.3642726540565491,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.8585263708938621,
|
|
"grad_norm": 2.479610731568999,
|
|
"learning_rate": 5.964657299191711e-08,
|
|
"logits/chosen": -3.6090331077575684,
|
|
"logits/rejected": -3.8709425926208496,
|
|
"logps/chosen": -0.7074568867683411,
|
|
"logps/rejected": -1.3075045347213745,
|
|
"loss": 0.1935,
|
|
"rewards/accuracies": 0.862500011920929,
|
|
"rewards/chosen": -0.0973527580499649,
|
|
"rewards/rejected": -0.4661819338798523,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.8637612877895563,
|
|
"grad_norm": 24.31365663092268,
|
|
"learning_rate": 5.53921015380539e-08,
|
|
"logits/chosen": -3.368114948272705,
|
|
"logits/rejected": -3.887836456298828,
|
|
"logps/chosen": -0.6916152238845825,
|
|
"logps/rejected": -1.3344032764434814,
|
|
"loss": 0.2021,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.13118405640125275,
|
|
"rewards/rejected": -0.5621457695960999,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.8689962046852506,
|
|
"grad_norm": 18.53418978223593,
|
|
"learning_rate": 5.1286125510586805e-08,
|
|
"logits/chosen": -3.587425708770752,
|
|
"logits/rejected": -3.909331798553467,
|
|
"logps/chosen": -0.6051632165908813,
|
|
"logps/rejected": -1.1239159107208252,
|
|
"loss": 0.2629,
|
|
"rewards/accuracies": 0.887499988079071,
|
|
"rewards/chosen": -0.08930396288633347,
|
|
"rewards/rejected": -0.4709382951259613,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.8742311215809448,
|
|
"grad_norm": 4.50359805992076,
|
|
"learning_rate": 4.733001627215466e-08,
|
|
"logits/chosen": -3.5434436798095703,
|
|
"logits/rejected": -3.836855411529541,
|
|
"logps/chosen": -0.6643694043159485,
|
|
"logps/rejected": -1.2192775011062622,
|
|
"loss": 0.3901,
|
|
"rewards/accuracies": 0.862500011920929,
|
|
"rewards/chosen": -0.17054535448551178,
|
|
"rewards/rejected": -0.6519421339035034,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.8794660384766392,
|
|
"grad_norm": 34.178405196863906,
|
|
"learning_rate": 4.352509513110658e-08,
|
|
"logits/chosen": -3.487307071685791,
|
|
"logits/rejected": -3.8664581775665283,
|
|
"logps/chosen": -0.6555562615394592,
|
|
"logps/rejected": -1.2320266962051392,
|
|
"loss": 0.2195,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.17077895998954773,
|
|
"rewards/rejected": -0.64589923620224,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.8847009553723335,
|
|
"grad_norm": 2.86389693559937,
|
|
"learning_rate": 3.9872632900194936e-08,
|
|
"logits/chosen": -3.507810592651367,
|
|
"logits/rejected": -3.888345241546631,
|
|
"logps/chosen": -0.7281653881072998,
|
|
"logps/rejected": -1.3483150005340576,
|
|
"loss": 0.1462,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.13594810664653778,
|
|
"rewards/rejected": -0.5338603258132935,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.8899358722680277,
|
|
"grad_norm": 10.406439092961616,
|
|
"learning_rate": 3.6373849472134954e-08,
|
|
"logits/chosen": -3.631108045578003,
|
|
"logits/rejected": -3.8256747722625732,
|
|
"logps/chosen": -0.6505134701728821,
|
|
"logps/rejected": -1.196921706199646,
|
|
"loss": 0.0768,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.17471420764923096,
|
|
"rewards/rejected": -0.6126881241798401,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.8951707891637221,
|
|
"grad_norm": 15.532461573765454,
|
|
"learning_rate": 3.302991341216976e-08,
|
|
"logits/chosen": -3.6967296600341797,
|
|
"logits/rejected": -4.029541015625,
|
|
"logps/chosen": -0.6618956327438354,
|
|
"logps/rejected": -1.3089344501495361,
|
|
"loss": 0.1687,
|
|
"rewards/accuracies": 0.7250000238418579,
|
|
"rewards/chosen": -0.14995837211608887,
|
|
"rewards/rejected": -0.5820103883743286,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.9004057060594163,
|
|
"grad_norm": 5.162585825584226,
|
|
"learning_rate": 2.9841941567779474e-08,
|
|
"logits/chosen": -3.644044876098633,
|
|
"logits/rejected": -3.9487557411193848,
|
|
"logps/chosen": -0.7292143106460571,
|
|
"logps/rejected": -1.3466829061508179,
|
|
"loss": 0.1542,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.11079509556293488,
|
|
"rewards/rejected": -0.4718669056892395,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.9056406229551106,
|
|
"grad_norm": 6.250911119837395,
|
|
"learning_rate": 2.681099869566328e-08,
|
|
"logits/chosen": -3.541680097579956,
|
|
"logits/rejected": -3.892336368560791,
|
|
"logps/chosen": -0.6768237948417664,
|
|
"logps/rejected": -1.1760004758834839,
|
|
"loss": 0.3289,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.10029733180999756,
|
|
"rewards/rejected": -0.7108488082885742,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.9108755398508048,
|
|
"grad_norm": 12.552976379248005,
|
|
"learning_rate": 2.3938097106119216e-08,
|
|
"logits/chosen": -3.7278189659118652,
|
|
"logits/rejected": -3.972843647003174,
|
|
"logps/chosen": -0.6094867587089539,
|
|
"logps/rejected": -1.1807337999343872,
|
|
"loss": 0.1072,
|
|
"rewards/accuracies": 0.8374999761581421,
|
|
"rewards/chosen": -0.15854167938232422,
|
|
"rewards/rejected": -0.5542012453079224,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.9161104567464992,
|
|
"grad_norm": 2.1007604039088514,
|
|
"learning_rate": 2.12241963249406e-08,
|
|
"logits/chosen": -3.621340274810791,
|
|
"logits/rejected": -4.070878505706787,
|
|
"logps/chosen": -0.6373471617698669,
|
|
"logps/rejected": -1.2419856786727905,
|
|
"loss": 0.1475,
|
|
"rewards/accuracies": 0.762499988079071,
|
|
"rewards/chosen": -0.14094902575016022,
|
|
"rewards/rejected": -0.5264729857444763,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.9213453736421934,
|
|
"grad_norm": 12.311524530866398,
|
|
"learning_rate": 1.8670202772942568e-08,
|
|
"logits/chosen": -3.631922483444214,
|
|
"logits/rejected": -3.9891021251678467,
|
|
"logps/chosen": -0.6251589059829712,
|
|
"logps/rejected": -1.3054759502410889,
|
|
"loss": 0.2536,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.19476152956485748,
|
|
"rewards/rejected": -0.747911810874939,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.9265802905378877,
|
|
"grad_norm": 6.827588288274221,
|
|
"learning_rate": 1.6276969463224545e-08,
|
|
"logits/chosen": -3.5727906227111816,
|
|
"logits/rejected": -3.9334945678710938,
|
|
"logps/chosen": -0.7015948295593262,
|
|
"logps/rejected": -1.320188045501709,
|
|
"loss": 0.1134,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.09665700793266296,
|
|
"rewards/rejected": -0.5031177401542664,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.931815207433582,
|
|
"grad_norm": 2.07334457771776,
|
|
"learning_rate": 1.4045295716271e-08,
|
|
"logits/chosen": -3.584230899810791,
|
|
"logits/rejected": -4.013778209686279,
|
|
"logps/chosen": -0.6425756216049194,
|
|
"logps/rejected": -1.4021472930908203,
|
|
"loss": 0.1508,
|
|
"rewards/accuracies": 0.7875000238418579,
|
|
"rewards/chosen": -0.12355978786945343,
|
|
"rewards/rejected": -0.4425305426120758,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.9370501243292763,
|
|
"grad_norm": 6.207844735521484,
|
|
"learning_rate": 1.1975926892984766e-08,
|
|
"logits/chosen": -3.419482707977295,
|
|
"logits/rejected": -3.659214496612549,
|
|
"logps/chosen": -0.5653207898139954,
|
|
"logps/rejected": -1.1019750833511353,
|
|
"loss": 0.1604,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.1579764187335968,
|
|
"rewards/rejected": -0.5906132459640503,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.9422850412249706,
|
|
"grad_norm": 9.527007176662295,
|
|
"learning_rate": 1.0069554145742787e-08,
|
|
"logits/chosen": -3.579073667526245,
|
|
"logits/rejected": -3.9208247661590576,
|
|
"logps/chosen": -0.6493052840232849,
|
|
"logps/rejected": -1.1913435459136963,
|
|
"loss": 0.2047,
|
|
"rewards/accuracies": 0.737500011920929,
|
|
"rewards/chosen": -0.10687317699193954,
|
|
"rewards/rejected": -0.4368254542350769,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.9475199581206648,
|
|
"grad_norm": 31.675698349395965,
|
|
"learning_rate": 8.326814187556485e-09,
|
|
"logits/chosen": -3.670579433441162,
|
|
"logits/rejected": -4.052734375,
|
|
"logps/chosen": -0.6743156313896179,
|
|
"logps/rejected": -1.2803795337677002,
|
|
"loss": 0.2567,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.12054960429668427,
|
|
"rewards/rejected": -0.5073380470275879,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.9527548750163591,
|
|
"grad_norm": 42.855469273806456,
|
|
"learning_rate": 6.7482890794151594e-09,
|
|
"logits/chosen": -3.602036237716675,
|
|
"logits/rejected": -3.961235761642456,
|
|
"logps/chosen": -0.6719382405281067,
|
|
"logps/rejected": -1.306571364402771,
|
|
"loss": 0.1819,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": -0.09778688848018646,
|
|
"rewards/rejected": -0.545978844165802,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.9579897919120534,
|
|
"grad_norm": 14.021917338266821,
|
|
"learning_rate": 5.334506035882036e-09,
|
|
"logits/chosen": -3.5885958671569824,
|
|
"logits/rejected": -3.972801685333252,
|
|
"logps/chosen": -0.6078780293464661,
|
|
"logps/rejected": -1.2133899927139282,
|
|
"loss": 0.2865,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.18109995126724243,
|
|
"rewards/rejected": -0.8068861961364746,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.9632247088077477,
|
|
"grad_norm": 6.0298508108639925,
|
|
"learning_rate": 4.0859372490090194e-09,
|
|
"logits/chosen": -3.607355833053589,
|
|
"logits/rejected": -3.94720196723938,
|
|
"logps/chosen": -0.5455012917518616,
|
|
"logps/rejected": -1.1150436401367188,
|
|
"loss": 0.3449,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.1432439386844635,
|
|
"rewards/rejected": -0.6401658058166504,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.9684596257034419,
|
|
"grad_norm": 4.5937644631506105,
|
|
"learning_rate": 3.0029997306283416e-09,
|
|
"logits/chosen": -3.550992250442505,
|
|
"logits/rejected": -3.908057451248169,
|
|
"logps/chosen": -0.5658137798309326,
|
|
"logps/rejected": -1.1313598155975342,
|
|
"loss": 0.1792,
|
|
"rewards/accuracies": 0.862500011920929,
|
|
"rewards/chosen": -0.10125686228275299,
|
|
"rewards/rejected": -0.37562742829322815,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.9736945425991362,
|
|
"grad_norm": 0.9472001507121848,
|
|
"learning_rate": 2.0860551730742526e-09,
|
|
"logits/chosen": -3.4225573539733887,
|
|
"logits/rejected": -3.754948377609253,
|
|
"logps/chosen": -0.6793702244758606,
|
|
"logps/rejected": -1.271066427230835,
|
|
"loss": 0.075,
|
|
"rewards/accuracies": 0.862500011920929,
|
|
"rewards/chosen": -0.12655304372310638,
|
|
"rewards/rejected": -0.5372025966644287,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.9789294594948306,
|
|
"grad_norm": 28.725611670468133,
|
|
"learning_rate": 1.3354098283802628e-09,
|
|
"logits/chosen": -3.5682125091552734,
|
|
"logits/rejected": -3.978659152984619,
|
|
"logps/chosen": -0.6888954043388367,
|
|
"logps/rejected": -1.3216421604156494,
|
|
"loss": 0.2941,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": -0.17312012612819672,
|
|
"rewards/rejected": -0.5689764022827148,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.9841643763905248,
|
|
"grad_norm": 6.5557009909155335,
|
|
"learning_rate": 7.513144059937415e-10,
|
|
"logits/chosen": -3.502890110015869,
|
|
"logits/rejected": -3.8684380054473877,
|
|
"logps/chosen": -0.6373413801193237,
|
|
"logps/rejected": -1.253266453742981,
|
|
"loss": 0.1433,
|
|
"rewards/accuracies": 0.737500011920929,
|
|
"rewards/chosen": -0.14787249267101288,
|
|
"rewards/rejected": -0.5477157831192017,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.9893992932862191,
|
|
"grad_norm": 7.693007289411357,
|
|
"learning_rate": 3.3396398904106393e-10,
|
|
"logits/chosen": -3.721400737762451,
|
|
"logits/rejected": -4.00911808013916,
|
|
"logps/chosen": -0.5624986886978149,
|
|
"logps/rejected": -1.1344749927520752,
|
|
"loss": 0.3553,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.2794850170612335,
|
|
"rewards/rejected": -0.8449773788452148,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.9946342101819133,
|
|
"grad_norm": 10.230098634327602,
|
|
"learning_rate": 8.349796917112018e-11,
|
|
"logits/chosen": -3.5617072582244873,
|
|
"logits/rejected": -3.961862087249756,
|
|
"logps/chosen": -0.6174992322921753,
|
|
"logps/rejected": -1.282220482826233,
|
|
"loss": 0.2374,
|
|
"rewards/accuracies": 0.7749999761581421,
|
|
"rewards/chosen": -0.14698410034179688,
|
|
"rewards/rejected": -0.745864987373352,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.9998691270776077,
|
|
"grad_norm": 19.58846312067376,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -3.5836310386657715,
|
|
"logits/rejected": -3.961308717727661,
|
|
"logps/chosen": -0.7217192649841309,
|
|
"logps/rejected": -1.424443006515503,
|
|
"loss": 0.3428,
|
|
"rewards/accuracies": 0.887499988079071,
|
|
"rewards/chosen": -0.1093846932053566,
|
|
"rewards/rejected": -0.5250765681266785,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.9998691270776077,
|
|
"step": 1910,
|
|
"total_flos": 167763918716928.0,
|
|
"train_loss": 0.3737170026252407,
|
|
"train_runtime": 21184.4179,
|
|
"train_samples_per_second": 2.886,
|
|
"train_steps_per_second": 0.09
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 1910,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 800,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 167763918716928.0,
|
|
"train_batch_size": 2,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|