Files
kimi-k2-swesmith_with_plain…/trainer_state.json
ModelHub XC 8b62df5c9e 初始化项目,由ModelHub XC社区提供模型
Model: penfever/kimi-k2-swesmith_with_plain_docker-sandboxes-maxeps-32k
Source: Original Platform
2026-04-21 23:41:57 +08:00

1874 lines
52 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 833,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04219409282700422,
"grad_norm": 13.737735517308288,
"learning_rate": 1.904761904761905e-06,
"loss": 0.8138,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.42325860261917114,
"step": 5,
"valid_targets_mean": 9300.0,
"valid_targets_min": 7296
},
{
"epoch": 0.08438818565400844,
"grad_norm": 8.598223125161901,
"learning_rate": 4.2857142857142855e-06,
"loss": 0.7793,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.38333451747894287,
"step": 10,
"valid_targets_mean": 9275.5,
"valid_targets_min": 7158
},
{
"epoch": 0.12658227848101267,
"grad_norm": 3.908004842191187,
"learning_rate": 6.666666666666667e-06,
"loss": 0.6819,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34921586513519287,
"step": 15,
"valid_targets_mean": 8389.4,
"valid_targets_min": 6233
},
{
"epoch": 0.16877637130801687,
"grad_norm": 2.3954669922452503,
"learning_rate": 9.047619047619049e-06,
"loss": 0.6067,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2741207480430603,
"step": 20,
"valid_targets_mean": 8933.9,
"valid_targets_min": 6921
},
{
"epoch": 0.2109704641350211,
"grad_norm": 1.262022302568234,
"learning_rate": 1.1428571428571429e-05,
"loss": 0.5663,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27063384652137756,
"step": 25,
"valid_targets_mean": 8693.9,
"valid_targets_min": 5525
},
{
"epoch": 0.25316455696202533,
"grad_norm": 0.9175876804234998,
"learning_rate": 1.3809523809523811e-05,
"loss": 0.5036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24980702996253967,
"step": 30,
"valid_targets_mean": 9026.4,
"valid_targets_min": 7000
},
{
"epoch": 0.29535864978902954,
"grad_norm": 0.7282592599899416,
"learning_rate": 1.6190476190476193e-05,
"loss": 0.4921,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22725015878677368,
"step": 35,
"valid_targets_mean": 8819.1,
"valid_targets_min": 7233
},
{
"epoch": 0.33755274261603374,
"grad_norm": 0.5629122844919103,
"learning_rate": 1.8571428571428575e-05,
"loss": 0.4657,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21749253571033478,
"step": 40,
"valid_targets_mean": 9115.0,
"valid_targets_min": 6864
},
{
"epoch": 0.379746835443038,
"grad_norm": 0.5170770216436973,
"learning_rate": 2.0952380952380954e-05,
"loss": 0.4481,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2065398395061493,
"step": 45,
"valid_targets_mean": 8547.1,
"valid_targets_min": 6900
},
{
"epoch": 0.4219409282700422,
"grad_norm": 0.5034342142568142,
"learning_rate": 2.3333333333333336e-05,
"loss": 0.4218,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1998848021030426,
"step": 50,
"valid_targets_mean": 7835.2,
"valid_targets_min": 4698
},
{
"epoch": 0.4641350210970464,
"grad_norm": 0.4545350747053312,
"learning_rate": 2.5714285714285718e-05,
"loss": 0.4183,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21893973648548126,
"step": 55,
"valid_targets_mean": 9180.1,
"valid_targets_min": 7051
},
{
"epoch": 0.5063291139240507,
"grad_norm": 0.44398357660292653,
"learning_rate": 2.8095238095238096e-05,
"loss": 0.407,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19019050896167755,
"step": 60,
"valid_targets_mean": 8196.6,
"valid_targets_min": 1389
},
{
"epoch": 0.5485232067510548,
"grad_norm": 0.43196150110984444,
"learning_rate": 3.047619047619048e-05,
"loss": 0.3993,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18999773263931274,
"step": 65,
"valid_targets_mean": 8886.4,
"valid_targets_min": 5947
},
{
"epoch": 0.5907172995780591,
"grad_norm": 0.4356694664798871,
"learning_rate": 3.285714285714286e-05,
"loss": 0.3694,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18235838413238525,
"step": 70,
"valid_targets_mean": 8577.5,
"valid_targets_min": 5854
},
{
"epoch": 0.6329113924050633,
"grad_norm": 0.4879795309609486,
"learning_rate": 3.523809523809524e-05,
"loss": 0.3914,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2244984209537506,
"step": 75,
"valid_targets_mean": 9445.2,
"valid_targets_min": 7589
},
{
"epoch": 0.6751054852320675,
"grad_norm": 0.4591287171448197,
"learning_rate": 3.761904761904762e-05,
"loss": 0.3652,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19888997077941895,
"step": 80,
"valid_targets_mean": 8648.0,
"valid_targets_min": 6995
},
{
"epoch": 0.7172995780590717,
"grad_norm": 0.43370135355956757,
"learning_rate": 4e-05,
"loss": 0.3363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15653540194034576,
"step": 85,
"valid_targets_mean": 7976.5,
"valid_targets_min": 5676
},
{
"epoch": 0.759493670886076,
"grad_norm": 0.43614169159655086,
"learning_rate": 3.9995601949609725e-05,
"loss": 0.3515,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16819864511489868,
"step": 90,
"valid_targets_mean": 8489.6,
"valid_targets_min": 5651
},
{
"epoch": 0.8016877637130801,
"grad_norm": 0.47270502658473995,
"learning_rate": 3.998240973272361e-05,
"loss": 0.3544,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20371896028518677,
"step": 95,
"valid_targets_mean": 8980.9,
"valid_targets_min": 5911
},
{
"epoch": 0.8438818565400844,
"grad_norm": 0.44366932292888417,
"learning_rate": 3.996042915134512e-05,
"loss": 0.355,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20148850977420807,
"step": 100,
"valid_targets_mean": 9147.2,
"valid_targets_min": 6069
},
{
"epoch": 0.8860759493670886,
"grad_norm": 0.450738888687614,
"learning_rate": 3.9929669872644716e-05,
"loss": 0.3417,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13800635933876038,
"step": 105,
"valid_targets_mean": 6815.9,
"valid_targets_min": 1124
},
{
"epoch": 0.9282700421940928,
"grad_norm": 0.41039396298425773,
"learning_rate": 3.989014542470815e-05,
"loss": 0.3378,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16703501343727112,
"step": 110,
"valid_targets_mean": 10058.6,
"valid_targets_min": 8736
},
{
"epoch": 0.9704641350210971,
"grad_norm": 0.40658466169041685,
"learning_rate": 3.98418731905868e-05,
"loss": 0.3441,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1470247507095337,
"step": 115,
"valid_targets_mean": 8467.8,
"valid_targets_min": 4203
},
{
"epoch": 1.0084388185654007,
"grad_norm": 0.42706146525288,
"learning_rate": 3.978487440065248e-05,
"loss": 0.3478,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1719146966934204,
"step": 120,
"valid_targets_mean": 9109.1,
"valid_targets_min": 6322
},
{
"epoch": 1.0506329113924051,
"grad_norm": 0.3862263984474135,
"learning_rate": 3.9719174123260214e-05,
"loss": 0.3238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15366646647453308,
"step": 125,
"valid_targets_mean": 9161.2,
"valid_targets_min": 6619
},
{
"epoch": 1.0928270042194093,
"grad_norm": 0.46771718995267086,
"learning_rate": 3.964480125372307e-05,
"loss": 0.3114,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1375672072172165,
"step": 130,
"valid_targets_mean": 7859.0,
"valid_targets_min": 1079
},
{
"epoch": 1.1350210970464134,
"grad_norm": 0.43449504183040244,
"learning_rate": 3.9561788501603836e-05,
"loss": 0.3161,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12888583540916443,
"step": 135,
"valid_targets_mean": 7176.8,
"valid_targets_min": 1462
},
{
"epoch": 1.1772151898734178,
"grad_norm": 0.43422331056103003,
"learning_rate": 3.94701723763292e-05,
"loss": 0.3225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15962457656860352,
"step": 140,
"valid_targets_mean": 8854.5,
"valid_targets_min": 6056
},
{
"epoch": 1.219409282700422,
"grad_norm": 0.47515202494067976,
"learning_rate": 3.936999317113271e-05,
"loss": 0.3243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1411084532737732,
"step": 145,
"valid_targets_mean": 8753.8,
"valid_targets_min": 5301
},
{
"epoch": 1.261603375527426,
"grad_norm": 0.40784537683056815,
"learning_rate": 3.926129494533362e-05,
"loss": 0.3125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14677849411964417,
"step": 150,
"valid_targets_mean": 8915.6,
"valid_targets_min": 5868
},
{
"epoch": 1.3037974683544304,
"grad_norm": 0.42669566237324436,
"learning_rate": 3.914412550495937e-05,
"loss": 0.3189,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13394805788993835,
"step": 155,
"valid_targets_mean": 7279.6,
"valid_targets_min": 1145
},
{
"epoch": 1.3459915611814346,
"grad_norm": 0.41003883522018736,
"learning_rate": 3.901853638172025e-05,
"loss": 0.3179,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16570201516151428,
"step": 160,
"valid_targets_mean": 9676.4,
"valid_targets_min": 8776
},
{
"epoch": 1.3881856540084387,
"grad_norm": 0.4090145854881866,
"learning_rate": 3.888458281034551e-05,
"loss": 0.2767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13220128417015076,
"step": 165,
"valid_targets_mean": 8463.6,
"valid_targets_min": 352
},
{
"epoch": 1.4303797468354431,
"grad_norm": 0.4158827283938909,
"learning_rate": 3.874232370429085e-05,
"loss": 0.3099,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1609145700931549,
"step": 170,
"valid_targets_mean": 9337.5,
"valid_targets_min": 8545
},
{
"epoch": 1.4725738396624473,
"grad_norm": 0.4624418763799199,
"learning_rate": 3.8591821629827946e-05,
"loss": 0.3101,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15375180542469025,
"step": 175,
"valid_targets_mean": 8644.0,
"valid_targets_min": 7177
},
{
"epoch": 1.5147679324894514,
"grad_norm": 0.44905907331860323,
"learning_rate": 3.8433142778527536e-05,
"loss": 0.3149,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13599535822868347,
"step": 180,
"valid_targets_mean": 7853.4,
"valid_targets_min": 4716
},
{
"epoch": 1.5569620253164556,
"grad_norm": 0.43202384347831446,
"learning_rate": 3.826635693814801e-05,
"loss": 0.3003,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14607158303260803,
"step": 185,
"valid_targets_mean": 7725.0,
"valid_targets_min": 5193
},
{
"epoch": 1.59915611814346,
"grad_norm": 0.4250126556019548,
"learning_rate": 3.8091537461942395e-05,
"loss": 0.3262,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1532595157623291,
"step": 190,
"valid_targets_mean": 8313.8,
"valid_targets_min": 1141
},
{
"epoch": 1.6413502109704643,
"grad_norm": 0.4401245618708091,
"learning_rate": 3.7908761236397265e-05,
"loss": 0.2855,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15331213176250458,
"step": 195,
"valid_targets_mean": 8088.1,
"valid_targets_min": 4238
},
{
"epoch": 1.6835443037974684,
"grad_norm": 0.466081852025086,
"learning_rate": 3.771810864741761e-05,
"loss": 0.2953,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.156716451048851,
"step": 200,
"valid_targets_mean": 9015.5,
"valid_targets_min": 4203
},
{
"epoch": 1.7257383966244726,
"grad_norm": 0.44420961731157504,
"learning_rate": 3.751966354497278e-05,
"loss": 0.3126,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12232231348752975,
"step": 205,
"valid_targets_mean": 7068.8,
"valid_targets_min": 1389
},
{
"epoch": 1.7679324894514767,
"grad_norm": 0.44094509994269193,
"learning_rate": 3.7313513206218794e-05,
"loss": 0.3115,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15219950675964355,
"step": 210,
"valid_targets_mean": 6983.6,
"valid_targets_min": 1497
},
{
"epoch": 1.810126582278481,
"grad_norm": 0.4363001268331526,
"learning_rate": 3.709974829711344e-05,
"loss": 0.3157,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13169416785240173,
"step": 215,
"valid_targets_mean": 7780.9,
"valid_targets_min": 1318
},
{
"epoch": 1.8523206751054853,
"grad_norm": 0.46903212787997367,
"learning_rate": 3.6878462832540903e-05,
"loss": 0.3129,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1425689458847046,
"step": 220,
"valid_targets_mean": 8625.2,
"valid_targets_min": 6122
},
{
"epoch": 1.8945147679324894,
"grad_norm": 0.41310139602425183,
"learning_rate": 3.6649754134963564e-05,
"loss": 0.3011,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13354924321174622,
"step": 225,
"valid_targets_mean": 8469.6,
"valid_targets_min": 4623
},
{
"epoch": 1.9367088607594938,
"grad_norm": 0.4087947045064101,
"learning_rate": 3.64137227916191e-05,
"loss": 0.3079,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15488475561141968,
"step": 230,
"valid_targets_mean": 8825.4,
"valid_targets_min": 6185
},
{
"epoch": 1.978902953586498,
"grad_norm": 0.4223091429231445,
"learning_rate": 3.6170472610281674e-05,
"loss": 0.2883,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11964912712574005,
"step": 235,
"valid_targets_mean": 7039.6,
"valid_targets_min": 775
},
{
"epoch": 2.0168776371308015,
"grad_norm": 0.42112252103307546,
"learning_rate": 3.592011057360678e-05,
"loss": 0.2784,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1303289830684662,
"step": 240,
"valid_targets_mean": 7897.1,
"valid_targets_min": 782
},
{
"epoch": 2.059071729957806,
"grad_norm": 0.4276205778137949,
"learning_rate": 3.5662746792079726e-05,
"loss": 0.2875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14901086688041687,
"step": 245,
"valid_targets_mean": 8519.6,
"valid_targets_min": 1031
},
{
"epoch": 2.1012658227848102,
"grad_norm": 0.4089545502307839,
"learning_rate": 3.53984944555885e-05,
"loss": 0.2765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14145910739898682,
"step": 250,
"valid_targets_mean": 9206.6,
"valid_targets_min": 7423
},
{
"epoch": 2.1434599156118144,
"grad_norm": 0.495227570105931,
"learning_rate": 3.512746978364227e-05,
"loss": 0.2902,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15283076465129852,
"step": 255,
"valid_targets_mean": 8669.4,
"valid_targets_min": 5525
},
{
"epoch": 2.1856540084388185,
"grad_norm": 0.42413672320413554,
"learning_rate": 3.484979197425745e-05,
"loss": 0.287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11562255024909973,
"step": 260,
"valid_targets_mean": 8506.1,
"valid_targets_min": 5868
},
{
"epoch": 2.2278481012658227,
"grad_norm": 0.4429223995215785,
"learning_rate": 3.456558315153382e-05,
"loss": 0.278,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11651501059532166,
"step": 265,
"valid_targets_mean": 7150.5,
"valid_targets_min": 850
},
{
"epoch": 2.270042194092827,
"grad_norm": 0.3916309614586102,
"learning_rate": 3.427496831194378e-05,
"loss": 0.2854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11857372522354126,
"step": 270,
"valid_targets_mean": 9030.6,
"valid_targets_min": 7393
},
{
"epoch": 2.3122362869198314,
"grad_norm": 0.4408587328354608,
"learning_rate": 3.3978075269358175e-05,
"loss": 0.2797,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14032971858978271,
"step": 275,
"valid_targets_mean": 8928.5,
"valid_targets_min": 6624
},
{
"epoch": 2.3544303797468356,
"grad_norm": 0.5255984072468615,
"learning_rate": 3.3675034598833196e-05,
"loss": 0.286,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1699741780757904,
"step": 280,
"valid_targets_mean": 9138.8,
"valid_targets_min": 7798
},
{
"epoch": 2.3966244725738397,
"grad_norm": 0.46154900583042857,
"learning_rate": 3.3365979579182774e-05,
"loss": 0.2841,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1370779126882553,
"step": 285,
"valid_targets_mean": 7857.2,
"valid_targets_min": 5341
},
{
"epoch": 2.438818565400844,
"grad_norm": 0.410815728442414,
"learning_rate": 3.3051046134361874e-05,
"loss": 0.2717,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13771235942840576,
"step": 290,
"valid_targets_mean": 8922.4,
"valid_targets_min": 5929
},
{
"epoch": 2.481012658227848,
"grad_norm": 0.42579984902443774,
"learning_rate": 3.27303727736865e-05,
"loss": 0.2703,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.150842547416687,
"step": 295,
"valid_targets_mean": 9481.6,
"valid_targets_min": 8016
},
{
"epoch": 2.523206751054852,
"grad_norm": 0.45157363816682605,
"learning_rate": 3.2404100530916555e-05,
"loss": 0.2754,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12719795107841492,
"step": 300,
"valid_targets_mean": 8190.1,
"valid_targets_min": 6069
},
{
"epoch": 2.5654008438818563,
"grad_norm": 0.429470249992375,
"learning_rate": 3.20723729022285e-05,
"loss": 0.2699,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10490024089813232,
"step": 305,
"valid_targets_mean": 7513.5,
"valid_targets_min": 1012
},
{
"epoch": 2.607594936708861,
"grad_norm": 0.44240780660157275,
"learning_rate": 3.173533578310503e-05,
"loss": 0.2846,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15472307801246643,
"step": 310,
"valid_targets_mean": 9026.9,
"valid_targets_min": 6796
},
{
"epoch": 2.649789029535865,
"grad_norm": 0.46171787256178776,
"learning_rate": 3.139313740416945e-05,
"loss": 0.2734,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14282463490962982,
"step": 315,
"valid_targets_mean": 8840.5,
"valid_targets_min": 5948
},
{
"epoch": 2.691983122362869,
"grad_norm": 0.5596242010714433,
"learning_rate": 3.104592826599319e-05,
"loss": 0.2682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13433107733726501,
"step": 320,
"valid_targets_mean": 8862.6,
"valid_targets_min": 7327
},
{
"epoch": 2.7341772151898733,
"grad_norm": 0.41372708901928346,
"learning_rate": 3.06938610729048e-05,
"loss": 0.2744,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13239914178848267,
"step": 325,
"valid_targets_mean": 8498.8,
"valid_targets_min": 5562
},
{
"epoch": 2.7763713080168775,
"grad_norm": 1.6545728711184768,
"learning_rate": 3.0337090665829883e-05,
"loss": 0.2861,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1271859109401703,
"step": 330,
"valid_targets_mean": 8136.5,
"valid_targets_min": 5643
},
{
"epoch": 2.818565400843882,
"grad_norm": 0.4317369059905672,
"learning_rate": 2.9975773954191246e-05,
"loss": 0.2847,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16068033874034882,
"step": 335,
"valid_targets_mean": 9313.6,
"valid_targets_min": 8083
},
{
"epoch": 2.8607594936708862,
"grad_norm": 0.4361200075559799,
"learning_rate": 2.9610069846899355e-05,
"loss": 0.2798,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12818226218223572,
"step": 340,
"valid_targets_mean": 8236.2,
"valid_targets_min": 1372
},
{
"epoch": 2.9029535864978904,
"grad_norm": 0.41932505340642173,
"learning_rate": 2.9240139182463384e-05,
"loss": 0.2798,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13603907823562622,
"step": 345,
"valid_targets_mean": 8153.4,
"valid_targets_min": 1722
},
{
"epoch": 2.9451476793248945,
"grad_norm": 0.4285742105739042,
"learning_rate": 2.8866144658253642e-05,
"loss": 0.2826,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13186733424663544,
"step": 350,
"valid_targets_mean": 7989.5,
"valid_targets_min": 4996
},
{
"epoch": 2.9873417721518987,
"grad_norm": 0.46406691637390524,
"learning_rate": 2.8488250758946453e-05,
"loss": 0.2709,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14630965888500214,
"step": 355,
"valid_targets_mean": 8137.5,
"valid_targets_min": 3982
},
{
"epoch": 3.0253164556962027,
"grad_norm": 0.43995204460384685,
"learning_rate": 2.8106623684182944e-05,
"loss": 0.2549,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13689684867858887,
"step": 360,
"valid_targets_mean": 8913.4,
"valid_targets_min": 7312
},
{
"epoch": 3.067510548523207,
"grad_norm": 0.45122017845034534,
"learning_rate": 2.7721431275473634e-05,
"loss": 0.2587,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13735206425189972,
"step": 365,
"valid_targets_mean": 8872.2,
"valid_targets_min": 4723
},
{
"epoch": 3.109704641350211,
"grad_norm": 0.41848489063381833,
"learning_rate": 2.733284294238086e-05,
"loss": 0.2606,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10563844442367554,
"step": 370,
"valid_targets_mean": 8790.6,
"valid_targets_min": 6799
},
{
"epoch": 3.151898734177215,
"grad_norm": 0.48612738303501324,
"learning_rate": 2.694102958801163e-05,
"loss": 0.261,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13847270607948303,
"step": 375,
"valid_targets_mean": 7784.8,
"valid_targets_min": 1899
},
{
"epoch": 3.1940928270042193,
"grad_norm": 0.43439045015611505,
"learning_rate": 2.654616353385354e-05,
"loss": 0.2545,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11931806802749634,
"step": 380,
"valid_targets_mean": 8220.5,
"valid_targets_min": 6466
},
{
"epoch": 3.2362869198312234,
"grad_norm": 0.4672629305905832,
"learning_rate": 2.6148418443986967e-05,
"loss": 0.2573,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12390561401844025,
"step": 385,
"valid_targets_mean": 8790.5,
"valid_targets_min": 7425
},
{
"epoch": 3.278481012658228,
"grad_norm": 0.4814651192611666,
"learning_rate": 2.5747969248706675e-05,
"loss": 0.2647,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15146447718143463,
"step": 390,
"valid_targets_mean": 9032.1,
"valid_targets_min": 5188
},
{
"epoch": 3.320675105485232,
"grad_norm": 0.4374830944824983,
"learning_rate": 2.5344992067586623e-05,
"loss": 0.2647,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13405266404151917,
"step": 395,
"valid_targets_mean": 8223.2,
"valid_targets_min": 3452
},
{
"epoch": 3.3628691983122363,
"grad_norm": 0.4732142324779574,
"learning_rate": 2.4939664132021685e-05,
"loss": 0.2558,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10236911475658417,
"step": 400,
"valid_targets_mean": 7064.0,
"valid_targets_min": 1079
},
{
"epoch": 3.4050632911392404,
"grad_norm": 0.43954374313846106,
"learning_rate": 2.4532163707280372e-05,
"loss": 0.2628,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12227586656808853,
"step": 405,
"valid_targets_mean": 8465.5,
"valid_targets_min": 6739
},
{
"epoch": 3.4472573839662446,
"grad_norm": 0.42292011324295486,
"learning_rate": 2.4122670014102905e-05,
"loss": 0.262,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11511904001235962,
"step": 410,
"valid_targets_mean": 9274.5,
"valid_targets_min": 6734
},
{
"epoch": 3.489451476793249,
"grad_norm": 0.4344835895735963,
"learning_rate": 2.371136314987898e-05,
"loss": 0.2822,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14791841804981232,
"step": 415,
"valid_targets_mean": 8766.1,
"valid_targets_min": 6028
},
{
"epoch": 3.5316455696202533,
"grad_norm": 0.4261397076907197,
"learning_rate": 2.329842400944008e-05,
"loss": 0.2528,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10815391689538956,
"step": 420,
"valid_targets_mean": 8315.0,
"valid_targets_min": 7097
},
{
"epoch": 3.5738396624472575,
"grad_norm": 0.5622442435750628,
"learning_rate": 2.2884034205500977e-05,
"loss": 0.2575,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13450974225997925,
"step": 425,
"valid_targets_mean": 8429.6,
"valid_targets_min": 6223
},
{
"epoch": 3.6160337552742616,
"grad_norm": 0.45056620552671345,
"learning_rate": 2.246837598878557e-05,
"loss": 0.2728,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12603911757469177,
"step": 430,
"valid_targets_mean": 8927.6,
"valid_targets_min": 5603
},
{
"epoch": 3.6582278481012658,
"grad_norm": 0.44591198215906896,
"learning_rate": 2.2051632167872072e-05,
"loss": 0.2645,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16440054774284363,
"step": 435,
"valid_targets_mean": 9716.9,
"valid_targets_min": 8934
},
{
"epoch": 3.70042194092827,
"grad_norm": 0.4244520408699371,
"learning_rate": 2.1633986028792915e-05,
"loss": 0.2468,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12980708479881287,
"step": 440,
"valid_targets_mean": 9191.9,
"valid_targets_min": 7691
},
{
"epoch": 3.742616033755274,
"grad_norm": 0.4400154252728221,
"learning_rate": 2.1215621254424592e-05,
"loss": 0.2621,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14869439601898193,
"step": 445,
"valid_targets_mean": 9304.6,
"valid_targets_min": 8242
},
{
"epoch": 3.7848101265822782,
"grad_norm": 0.4312069617379547,
"learning_rate": 2.0796721843703028e-05,
"loss": 0.2667,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14449357986450195,
"step": 450,
"valid_targets_mean": 8888.6,
"valid_targets_min": 4545
},
{
"epoch": 3.827004219409283,
"grad_norm": 0.6226769603382436,
"learning_rate": 2.0377472030699895e-05,
"loss": 0.2561,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13251638412475586,
"step": 455,
"valid_targets_mean": 9287.4,
"valid_targets_min": 7232
},
{
"epoch": 3.869198312236287,
"grad_norm": 0.41779119755506317,
"learning_rate": 1.995805620359557e-05,
"loss": 0.2535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11737249791622162,
"step": 460,
"valid_targets_mean": 8372.5,
"valid_targets_min": 6429
},
{
"epoch": 3.911392405063291,
"grad_norm": 0.42919575535179477,
"learning_rate": 1.9538658823584258e-05,
"loss": 0.2492,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12399604916572571,
"step": 465,
"valid_targets_mean": 8499.9,
"valid_targets_min": 7146
},
{
"epoch": 3.9535864978902953,
"grad_norm": 0.4282733272450913,
"learning_rate": 1.9119464343747048e-05,
"loss": 0.2576,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12035520374774933,
"step": 470,
"valid_targets_mean": 9111.9,
"valid_targets_min": 7021
},
{
"epoch": 3.9957805907173,
"grad_norm": 0.4688557446235436,
"learning_rate": 1.8700657127928495e-05,
"loss": 0.2579,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14246785640716553,
"step": 475,
"valid_targets_mean": 8446.9,
"valid_targets_min": 3548
},
{
"epoch": 4.033755274261603,
"grad_norm": 0.4179516240442328,
"learning_rate": 1.8282421369652514e-05,
"loss": 0.2473,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12743636965751648,
"step": 480,
"valid_targets_mean": 8593.9,
"valid_targets_min": 6807
},
{
"epoch": 4.075949367088608,
"grad_norm": 0.4159181042353588,
"learning_rate": 1.786494101111308e-05,
"loss": 0.2435,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12010753154754639,
"step": 485,
"valid_targets_mean": 9153.5,
"valid_targets_min": 7211
},
{
"epoch": 4.118143459915612,
"grad_norm": 0.43857477399241507,
"learning_rate": 1.7448399662275577e-05,
"loss": 0.2358,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11870156228542328,
"step": 490,
"valid_targets_mean": 8510.8,
"valid_targets_min": 5260
},
{
"epoch": 4.160337552742616,
"grad_norm": 0.7799080736665794,
"learning_rate": 1.70329805201242e-05,
"loss": 0.2587,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13719968497753143,
"step": 495,
"valid_targets_mean": 8487.0,
"valid_targets_min": 5677
},
{
"epoch": 4.2025316455696204,
"grad_norm": 0.4292260280318674,
"learning_rate": 1.661886628809096e-05,
"loss": 0.2417,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13319876790046692,
"step": 500,
"valid_targets_mean": 9258.0,
"valid_targets_min": 7850
},
{
"epoch": 4.244725738396625,
"grad_norm": 0.47075531459044107,
"learning_rate": 1.620623909570185e-05,
"loss": 0.2521,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12588399648666382,
"step": 505,
"valid_targets_mean": 7839.9,
"valid_targets_min": 709
},
{
"epoch": 4.286919831223629,
"grad_norm": 0.4443285639795416,
"learning_rate": 1.5795280418475313e-05,
"loss": 0.2501,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12543919682502747,
"step": 510,
"valid_targets_mean": 9025.4,
"valid_targets_min": 6790
},
{
"epoch": 4.329113924050633,
"grad_norm": 0.45446326155202266,
"learning_rate": 1.5386170998108432e-05,
"loss": 0.2508,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12875355780124664,
"step": 515,
"valid_targets_mean": 8992.2,
"valid_targets_min": 7538
},
{
"epoch": 4.371308016877637,
"grad_norm": 0.4188354388664049,
"learning_rate": 1.4979090762985793e-05,
"loss": 0.2393,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1216769590973854,
"step": 520,
"valid_targets_mean": 9567.9,
"valid_targets_min": 8414
},
{
"epoch": 4.413502109704641,
"grad_norm": 0.5144343057216146,
"learning_rate": 1.4574218749046097e-05,
"loss": 0.2479,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10991360992193222,
"step": 525,
"valid_targets_mean": 8216.4,
"valid_targets_min": 5783
},
{
"epoch": 4.455696202531645,
"grad_norm": 0.45077488627425316,
"learning_rate": 1.4171733021041236e-05,
"loss": 0.2522,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12308443337678909,
"step": 530,
"valid_targets_mean": 8366.5,
"valid_targets_min": 5486
},
{
"epoch": 4.4978902953586495,
"grad_norm": 0.4438440275448954,
"learning_rate": 1.3771810594222522e-05,
"loss": 0.2438,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1209210604429245,
"step": 535,
"valid_targets_mean": 8559.5,
"valid_targets_min": 6739
},
{
"epoch": 4.540084388185654,
"grad_norm": 0.4138439523530891,
"learning_rate": 1.3374627356488486e-05,
"loss": 0.2346,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13088755309581757,
"step": 540,
"valid_targets_mean": 9257.6,
"valid_targets_min": 5925
},
{
"epoch": 4.582278481012658,
"grad_norm": 0.4348705477932507,
"learning_rate": 1.2980357991028504e-05,
"loss": 0.2628,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12444625794887543,
"step": 545,
"valid_targets_mean": 9086.5,
"valid_targets_min": 7240
},
{
"epoch": 4.624472573839663,
"grad_norm": 0.4514383858436243,
"learning_rate": 1.2589175899496241e-05,
"loss": 0.252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14704394340515137,
"step": 550,
"valid_targets_mean": 9210.2,
"valid_targets_min": 7000
},
{
"epoch": 4.666666666666667,
"grad_norm": 0.43305229013411795,
"learning_rate": 1.2201253125746733e-05,
"loss": 0.2471,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1392921805381775,
"step": 555,
"valid_targets_mean": 9039.9,
"valid_targets_min": 7246
},
{
"epoch": 4.708860759493671,
"grad_norm": 0.4135427416829583,
"learning_rate": 1.1816760280170615e-05,
"loss": 0.2474,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10349678993225098,
"step": 560,
"valid_targets_mean": 8758.5,
"valid_targets_min": 6734
},
{
"epoch": 4.751054852320675,
"grad_norm": 0.4134170552023414,
"learning_rate": 1.1435866464658856e-05,
"loss": 0.236,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0989164412021637,
"step": 565,
"valid_targets_mean": 7774.6,
"valid_targets_min": 1033
},
{
"epoch": 4.793248945147679,
"grad_norm": 0.4202770280254775,
"learning_rate": 1.1058739198230856e-05,
"loss": 0.2307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10661663115024567,
"step": 570,
"valid_targets_mean": 8591.6,
"valid_targets_min": 5296
},
{
"epoch": 4.8354430379746836,
"grad_norm": 0.42451909244520697,
"learning_rate": 1.0685544343358729e-05,
"loss": 0.2498,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12151402235031128,
"step": 575,
"valid_targets_mean": 9392.6,
"valid_targets_min": 6279
},
{
"epoch": 4.877637130801688,
"grad_norm": 0.439789899437483,
"learning_rate": 1.0316446033020198e-05,
"loss": 0.2379,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09513577073812485,
"step": 580,
"valid_targets_mean": 8077.6,
"valid_targets_min": 4893
},
{
"epoch": 4.919831223628692,
"grad_norm": 0.4316282443408433,
"learning_rate": 9.951606598512042e-06,
"loss": 0.258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13961660861968994,
"step": 585,
"valid_targets_mean": 9551.0,
"valid_targets_min": 8353
},
{
"epoch": 4.962025316455696,
"grad_norm": 0.44460163153797444,
"learning_rate": 9.591186498056004e-06,
"loss": 0.2328,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11063335835933685,
"step": 590,
"valid_targets_mean": 8310.2,
"valid_targets_min": 5642
},
{
"epoch": 5.0,
"grad_norm": 0.6232611185111987,
"learning_rate": 9.235344246228415e-06,
"loss": 0.2463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21027398109436035,
"step": 595,
"valid_targets_mean": 9007.9,
"valid_targets_min": 7843
},
{
"epoch": 5.042194092827004,
"grad_norm": 0.45373400536723996,
"learning_rate": 8.884236344244734e-06,
"loss": 0.239,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11295656859874725,
"step": 600,
"valid_targets_mean": 8223.9,
"valid_targets_min": 6369
},
{
"epoch": 5.084388185654008,
"grad_norm": 0.44188846780421875,
"learning_rate": 8.538017211129501e-06,
"loss": 0.2267,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09402458369731903,
"step": 605,
"valid_targets_mean": 8217.8,
"valid_targets_min": 5192
},
{
"epoch": 5.1265822784810124,
"grad_norm": 0.4739239566419668,
"learning_rate": 8.196839115802071e-06,
"loss": 0.2416,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1246982291340828,
"step": 610,
"valid_targets_mean": 9335.2,
"valid_targets_min": 7233
},
{
"epoch": 5.168776371308017,
"grad_norm": 0.41388443568870104,
"learning_rate": 7.860852110107952e-06,
"loss": 0.236,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1162976399064064,
"step": 615,
"valid_targets_mean": 8525.2,
"valid_targets_min": 5947
},
{
"epoch": 5.210970464135021,
"grad_norm": 0.4584431908697567,
"learning_rate": 7.530203962825331e-06,
"loss": 0.2438,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11082412302494049,
"step": 620,
"valid_targets_mean": 8312.5,
"valid_targets_min": 3548
},
{
"epoch": 5.253164556962025,
"grad_norm": 0.5075457556667611,
"learning_rate": 7.205040094675502e-06,
"loss": 0.2403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1264800876379013,
"step": 625,
"valid_targets_mean": 9783.8,
"valid_targets_min": 8094
},
{
"epoch": 5.29535864978903,
"grad_norm": 0.43671359308707003,
"learning_rate": 6.885503514366203e-06,
"loss": 0.239,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1132010817527771,
"step": 630,
"valid_targets_mean": 8917.6,
"valid_targets_min": 6726
},
{
"epoch": 5.337552742616034,
"grad_norm": 0.4375519850374428,
"learning_rate": 6.571734755695584e-06,
"loss": 0.23,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12824538350105286,
"step": 635,
"valid_targets_mean": 8502.2,
"valid_targets_min": 1497
},
{
"epoch": 5.379746835443038,
"grad_norm": 0.4243347246282382,
"learning_rate": 6.2638718157448285e-06,
"loss": 0.2342,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11869437992572784,
"step": 640,
"valid_targets_mean": 9188.5,
"valid_targets_min": 7034
},
{
"epoch": 5.421940928270042,
"grad_norm": 0.46745881796696226,
"learning_rate": 5.962050094186236e-06,
"loss": 0.2402,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11892999708652496,
"step": 645,
"valid_targets_mean": 8592.6,
"valid_targets_min": 6624
},
{
"epoch": 5.4641350210970465,
"grad_norm": 0.469810046983012,
"learning_rate": 5.666402333733843e-06,
"loss": 0.2372,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12667159736156464,
"step": 650,
"valid_targets_mean": 7721.2,
"valid_targets_min": 5301
},
{
"epoch": 5.506329113924051,
"grad_norm": 0.440394400681658,
"learning_rate": 5.377058561762474e-06,
"loss": 0.2344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09980838745832443,
"step": 655,
"valid_targets_mean": 7530.0,
"valid_targets_min": 4441
},
{
"epoch": 5.548523206751055,
"grad_norm": 0.46129642001131443,
"learning_rate": 5.094146033121057e-06,
"loss": 0.231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10793110728263855,
"step": 660,
"valid_targets_mean": 8216.8,
"valid_targets_min": 4893
},
{
"epoch": 5.590717299578059,
"grad_norm": 0.48121058406225553,
"learning_rate": 4.817789174165281e-06,
"loss": 0.2572,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11774282157421112,
"step": 665,
"valid_targets_mean": 9377.8,
"valid_targets_min": 7466
},
{
"epoch": 5.632911392405063,
"grad_norm": 0.47185562628992206,
"learning_rate": 4.5481095280342925e-06,
"loss": 0.2359,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10965608060359955,
"step": 670,
"valid_targets_mean": 7637.9,
"valid_targets_min": 1141
},
{
"epoch": 5.675105485232067,
"grad_norm": 0.4605629407236577,
"learning_rate": 4.285225701195383e-06,
"loss": 0.2402,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14336133003234863,
"step": 675,
"valid_targets_mean": 8773.4,
"valid_targets_min": 5065
},
{
"epoch": 5.717299578059071,
"grad_norm": 0.39880864828224805,
"learning_rate": 4.029253311280281e-06,
"loss": 0.2439,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11196043342351913,
"step": 680,
"valid_targets_mean": 9331.1,
"valid_targets_min": 7073
},
{
"epoch": 5.759493670886076,
"grad_norm": 0.435020554643124,
"learning_rate": 3.7803049362359103e-06,
"loss": 0.2333,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11478637158870697,
"step": 685,
"valid_targets_mean": 8947.9,
"valid_targets_min": 8357
},
{
"epoch": 5.80168776371308,
"grad_norm": 0.4269181943885422,
"learning_rate": 3.5384900648120814e-06,
"loss": 0.2424,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1145181730389595,
"step": 690,
"valid_targets_mean": 9141.5,
"valid_targets_min": 6753
},
{
"epoch": 5.843881856540085,
"grad_norm": 0.40001789612061567,
"learning_rate": 3.3039150484077555e-06,
"loss": 0.2305,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10709261149168015,
"step": 695,
"valid_targets_mean": 8723.4,
"valid_targets_min": 5732
},
{
"epoch": 5.886075949367089,
"grad_norm": 0.4272824200506153,
"learning_rate": 3.0766830542971826e-06,
"loss": 0.2316,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08846732974052429,
"step": 700,
"valid_targets_mean": 7904.5,
"valid_targets_min": 4238
},
{
"epoch": 5.928270042194093,
"grad_norm": 0.4240311265649667,
"learning_rate": 2.856894020256389e-06,
"loss": 0.2315,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10147963464260101,
"step": 705,
"valid_targets_mean": 7797.0,
"valid_targets_min": 773
},
{
"epoch": 5.970464135021097,
"grad_norm": 0.44341039833186224,
"learning_rate": 2.6446446106100855e-06,
"loss": 0.2358,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11635808646678925,
"step": 710,
"valid_targets_mean": 8042.9,
"valid_targets_min": 4819
},
{
"epoch": 6.008438818565401,
"grad_norm": 0.43454237089026004,
"learning_rate": 2.4400281737181518e-06,
"loss": 0.2485,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12369026243686676,
"step": 715,
"valid_targets_mean": 7799.2,
"valid_targets_min": 1552
},
{
"epoch": 6.050632911392405,
"grad_norm": 0.4086719126374126,
"learning_rate": 2.2431347009206107e-06,
"loss": 0.2399,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11836208403110504,
"step": 720,
"valid_targets_mean": 8866.1,
"valid_targets_min": 5937
},
{
"epoch": 6.0928270042194095,
"grad_norm": 0.47554127803953666,
"learning_rate": 2.0540507869589365e-06,
"loss": 0.231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12429390847682953,
"step": 725,
"valid_targets_mean": 7500.9,
"valid_targets_min": 1372
},
{
"epoch": 6.135021097046414,
"grad_norm": 0.4138302974266342,
"learning_rate": 1.872859591891305e-06,
"loss": 0.235,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10978598892688751,
"step": 730,
"valid_targets_mean": 9062.0,
"valid_targets_min": 5651
},
{
"epoch": 6.177215189873418,
"grad_norm": 0.477745903678315,
"learning_rate": 1.6996408045183255e-06,
"loss": 0.231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0997207909822464,
"step": 735,
"valid_targets_mean": 8074.8,
"valid_targets_min": 4794
},
{
"epoch": 6.219409282700422,
"grad_norm": 0.39025085721052233,
"learning_rate": 1.5344706073355386e-06,
"loss": 0.2225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1053914874792099,
"step": 740,
"valid_targets_mean": 9646.5,
"valid_targets_min": 8467
},
{
"epoch": 6.261603375527426,
"grad_norm": 0.46907873784680004,
"learning_rate": 1.377421643027963e-06,
"loss": 0.2403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1020062193274498,
"step": 745,
"valid_targets_mean": 8838.5,
"valid_targets_min": 5929
},
{
"epoch": 6.30379746835443,
"grad_norm": 0.4255302670132069,
"learning_rate": 1.2285629825214817e-06,
"loss": 0.242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11597343534231186,
"step": 750,
"valid_targets_mean": 8416.2,
"valid_targets_min": 6747
},
{
"epoch": 6.345991561181434,
"grad_norm": 0.4209806375747758,
"learning_rate": 1.0879600946050783e-06,
"loss": 0.2198,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10497762262821198,
"step": 755,
"valid_targets_mean": 8873.2,
"valid_targets_min": 6692
},
{
"epoch": 6.3881856540084385,
"grad_norm": 0.3994620526352628,
"learning_rate": 9.556748171373663e-07,
"loss": 0.2363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11500047892332077,
"step": 760,
"valid_targets_mean": 9397.5,
"valid_targets_min": 6864
},
{
"epoch": 6.430379746835443,
"grad_norm": 0.48476265690454917,
"learning_rate": 8.317653298499651e-07,
"loss": 0.2378,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11502040922641754,
"step": 765,
"valid_targets_mean": 8142.6,
"valid_targets_min": 1255
},
{
"epoch": 6.472573839662447,
"grad_norm": 0.4559778814783951,
"learning_rate": 7.162861287597666e-07,
"loss": 0.2408,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15468385815620422,
"step": 770,
"valid_targets_mean": 9261.8,
"valid_targets_min": 6256
},
{
"epoch": 6.514767932489452,
"grad_norm": 0.40532634311815335,
"learning_rate": 6.092880022013115e-07,
"loss": 0.2402,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12497968971729279,
"step": 775,
"valid_targets_mean": 8960.5,
"valid_targets_min": 5005
},
{
"epoch": 6.556962025316456,
"grad_norm": 0.43147417534943683,
"learning_rate": 5.108180084898284e-07,
"loss": 0.2321,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10713645815849304,
"step": 780,
"valid_targets_mean": 8562.0,
"valid_targets_min": 5754
},
{
"epoch": 6.59915611814346,
"grad_norm": 0.4178467879199177,
"learning_rate": 4.209194552247442e-07,
"loss": 0.2387,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11786434799432755,
"step": 785,
"valid_targets_mean": 8937.8,
"valid_targets_min": 6908
},
{
"epoch": 6.641350210970464,
"grad_norm": 0.4296586265719399,
"learning_rate": 3.3963188024278783e-07,
"loss": 0.2253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12450215220451355,
"step": 790,
"valid_targets_mean": 8674.1,
"valid_targets_min": 5603
},
{
"epoch": 6.6835443037974684,
"grad_norm": 0.4648325639162333,
"learning_rate": 2.6699103422904494e-07,
"loss": 0.2329,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13395778834819794,
"step": 795,
"valid_targets_mean": 7350.9,
"valid_targets_min": 1899
},
{
"epoch": 6.725738396624473,
"grad_norm": 0.4397893931280253,
"learning_rate": 2.030288649936285e-07,
"loss": 0.2385,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10574311763048172,
"step": 800,
"valid_targets_mean": 7157.0,
"valid_targets_min": 850
},
{
"epoch": 6.767932489451477,
"grad_norm": 0.43270603582901673,
"learning_rate": 1.477735034208805e-07,
"loss": 0.2305,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12294603884220123,
"step": 805,
"valid_targets_mean": 8442.5,
"valid_targets_min": 3355
},
{
"epoch": 6.810126582278481,
"grad_norm": 0.4125142256044064,
"learning_rate": 1.0124925109725514e-07,
"loss": 0.2394,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14932481944561005,
"step": 810,
"valid_targets_mean": 9925.2,
"valid_targets_min": 8745
},
{
"epoch": 6.852320675105485,
"grad_norm": 0.41761981991702074,
"learning_rate": 6.347656962335524e-08,
"loss": 0.226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09456837177276611,
"step": 815,
"valid_targets_mean": 7658.6,
"valid_targets_min": 4375
},
{
"epoch": 6.894514767932489,
"grad_norm": 0.5241537287811314,
"learning_rate": 3.447207161483279e-08,
"loss": 0.2253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10600066184997559,
"step": 820,
"valid_targets_mean": 7051.8,
"valid_targets_min": 878
},
{
"epoch": 6.936708860759493,
"grad_norm": 0.42822827158756854,
"learning_rate": 1.424851339606903e-08,
"loss": 0.2264,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11232862621545792,
"step": 825,
"valid_targets_mean": 9165.9,
"valid_targets_min": 7143
},
{
"epoch": 6.978902953586498,
"grad_norm": 0.4390460209806349,
"learning_rate": 2.8147893898755605e-09,
"loss": 0.2299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10721021145582199,
"step": 830,
"valid_targets_mean": 9302.9,
"valid_targets_min": 8063
},
{
"epoch": 7.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22618243098258972,
"step": 833,
"total_flos": 6.212778952973353e+17,
"train_loss": 0.28838339830790105,
"train_runtime": 12171.8398,
"train_samples_per_second": 1.088,
"train_steps_per_second": 0.068,
"valid_targets_mean": 9237.2,
"valid_targets_min": 6486
}
],
"logging_steps": 5,
"max_steps": 833,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.212778952973353e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}