Files
Kimi-2-5-r2egym_sandboxes-m…/trainer_state.json
ModelHub XC ce36340a35 初始化项目,由ModelHub XC社区提供模型
Model: laion/Kimi-2-5-r2egym_sandboxes-maxeps-32k__Qwen3-8B
Source: Original Platform
2026-06-03 04:25:21 +08:00

4202 lines
116 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 1890,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.018518518518518517,
"grad_norm": 27.69713638899012,
"learning_rate": 8.465608465608466e-07,
"loss": 0.8879,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4629935324192047,
"step": 5,
"valid_targets_mean": 5865.4,
"valid_targets_min": 789
},
{
"epoch": 0.037037037037037035,
"grad_norm": 23.290742086079,
"learning_rate": 1.904761904761905e-06,
"loss": 0.87,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4101138412952423,
"step": 10,
"valid_targets_mean": 6581.6,
"valid_targets_min": 4761
},
{
"epoch": 0.05555555555555555,
"grad_norm": 13.68077632229009,
"learning_rate": 2.962962962962963e-06,
"loss": 0.7768,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.370077908039093,
"step": 15,
"valid_targets_mean": 7269.0,
"valid_targets_min": 3491
},
{
"epoch": 0.07407407407407407,
"grad_norm": 4.83254717026584,
"learning_rate": 4.0211640211640215e-06,
"loss": 0.6639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3309337794780731,
"step": 20,
"valid_targets_mean": 6683.6,
"valid_targets_min": 1240
},
{
"epoch": 0.09259259259259259,
"grad_norm": 2.428166466749575,
"learning_rate": 5.07936507936508e-06,
"loss": 0.6093,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3472573757171631,
"step": 25,
"valid_targets_mean": 5540.1,
"valid_targets_min": 501
},
{
"epoch": 0.1111111111111111,
"grad_norm": 1.8170151536729782,
"learning_rate": 6.137566137566138e-06,
"loss": 0.577,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.288746178150177,
"step": 30,
"valid_targets_mean": 7024.4,
"valid_targets_min": 413
},
{
"epoch": 0.12962962962962962,
"grad_norm": 1.360405445125232,
"learning_rate": 7.195767195767196e-06,
"loss": 0.546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3221224844455719,
"step": 35,
"valid_targets_mean": 6184.5,
"valid_targets_min": 4369
},
{
"epoch": 0.14814814814814814,
"grad_norm": 0.9196969375594887,
"learning_rate": 8.253968253968254e-06,
"loss": 0.5352,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2751440405845642,
"step": 40,
"valid_targets_mean": 6822.4,
"valid_targets_min": 4627
},
{
"epoch": 0.16666666666666666,
"grad_norm": 0.765133314763819,
"learning_rate": 9.312169312169313e-06,
"loss": 0.5021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3200388550758362,
"step": 45,
"valid_targets_mean": 7612.6,
"valid_targets_min": 4008
},
{
"epoch": 0.18518518518518517,
"grad_norm": 0.6845104561118748,
"learning_rate": 1.037037037037037e-05,
"loss": 0.4855,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2742794156074524,
"step": 50,
"valid_targets_mean": 7739.4,
"valid_targets_min": 391
},
{
"epoch": 0.2037037037037037,
"grad_norm": 0.627723124248625,
"learning_rate": 1.1428571428571429e-05,
"loss": 0.4741,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24860906600952148,
"step": 55,
"valid_targets_mean": 5759.1,
"valid_targets_min": 479
},
{
"epoch": 0.2222222222222222,
"grad_norm": 0.5488873482745417,
"learning_rate": 1.2486772486772486e-05,
"loss": 0.4688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23666323721408844,
"step": 60,
"valid_targets_mean": 7891.8,
"valid_targets_min": 6024
},
{
"epoch": 0.24074074074074073,
"grad_norm": 0.5404794526206105,
"learning_rate": 1.3544973544973545e-05,
"loss": 0.4433,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21840626001358032,
"step": 65,
"valid_targets_mean": 7151.1,
"valid_targets_min": 3780
},
{
"epoch": 0.25925925925925924,
"grad_norm": 0.5380782703461664,
"learning_rate": 1.4603174603174603e-05,
"loss": 0.4242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21182352304458618,
"step": 70,
"valid_targets_mean": 7776.1,
"valid_targets_min": 5679
},
{
"epoch": 0.2777777777777778,
"grad_norm": 0.5450893010749523,
"learning_rate": 1.5661375661375662e-05,
"loss": 0.4293,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20561331510543823,
"step": 75,
"valid_targets_mean": 6248.5,
"valid_targets_min": 445
},
{
"epoch": 0.2962962962962963,
"grad_norm": 0.5317555635167633,
"learning_rate": 1.671957671957672e-05,
"loss": 0.4256,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2224772423505783,
"step": 80,
"valid_targets_mean": 7044.6,
"valid_targets_min": 4290
},
{
"epoch": 0.3148148148148148,
"grad_norm": 0.5450655757884623,
"learning_rate": 1.7777777777777777e-05,
"loss": 0.3943,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23075850307941437,
"step": 85,
"valid_targets_mean": 6590.8,
"valid_targets_min": 2602
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.4891965175751207,
"learning_rate": 1.8835978835978836e-05,
"loss": 0.3825,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21840187907218933,
"step": 90,
"valid_targets_mean": 7733.4,
"valid_targets_min": 4802
},
{
"epoch": 0.35185185185185186,
"grad_norm": 0.5051228466360034,
"learning_rate": 1.9894179894179895e-05,
"loss": 0.3811,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18121924996376038,
"step": 95,
"valid_targets_mean": 7083.4,
"valid_targets_min": 3807
},
{
"epoch": 0.37037037037037035,
"grad_norm": 0.6045358159055502,
"learning_rate": 2.0952380952380954e-05,
"loss": 0.3904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2092483937740326,
"step": 100,
"valid_targets_mean": 5927.2,
"valid_targets_min": 3887
},
{
"epoch": 0.3888888888888889,
"grad_norm": 0.5039852241565205,
"learning_rate": 2.2010582010582013e-05,
"loss": 0.3697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16192597150802612,
"step": 105,
"valid_targets_mean": 6823.8,
"valid_targets_min": 2851
},
{
"epoch": 0.4074074074074074,
"grad_norm": 0.543344814617639,
"learning_rate": 2.3068783068783072e-05,
"loss": 0.366,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21563449501991272,
"step": 110,
"valid_targets_mean": 7248.0,
"valid_targets_min": 4263
},
{
"epoch": 0.42592592592592593,
"grad_norm": 0.51463194661137,
"learning_rate": 2.4126984126984128e-05,
"loss": 0.3673,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20645663142204285,
"step": 115,
"valid_targets_mean": 7274.1,
"valid_targets_min": 5029
},
{
"epoch": 0.4444444444444444,
"grad_norm": 0.5022729085110892,
"learning_rate": 2.5185185185185187e-05,
"loss": 0.3513,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1954728364944458,
"step": 120,
"valid_targets_mean": 8162.1,
"valid_targets_min": 4772
},
{
"epoch": 0.46296296296296297,
"grad_norm": 0.48849343768132747,
"learning_rate": 2.6243386243386246e-05,
"loss": 0.3764,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22054851055145264,
"step": 125,
"valid_targets_mean": 9386.2,
"valid_targets_min": 5811
},
{
"epoch": 0.48148148148148145,
"grad_norm": 0.6066721985482638,
"learning_rate": 2.7301587301587305e-05,
"loss": 0.3552,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17588719725608826,
"step": 130,
"valid_targets_mean": 4571.5,
"valid_targets_min": 1134
},
{
"epoch": 0.5,
"grad_norm": 0.4931053049682215,
"learning_rate": 2.835978835978836e-05,
"loss": 0.3687,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14577624201774597,
"step": 135,
"valid_targets_mean": 6307.6,
"valid_targets_min": 3966
},
{
"epoch": 0.5185185185185185,
"grad_norm": 0.6073682337976894,
"learning_rate": 2.941798941798942e-05,
"loss": 0.3514,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17465010285377502,
"step": 140,
"valid_targets_mean": 6394.0,
"valid_targets_min": 269
},
{
"epoch": 0.5370370370370371,
"grad_norm": 0.5024739310945823,
"learning_rate": 3.047619047619048e-05,
"loss": 0.3496,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1507670283317566,
"step": 145,
"valid_targets_mean": 6376.0,
"valid_targets_min": 466
},
{
"epoch": 0.5555555555555556,
"grad_norm": 0.5277273202526614,
"learning_rate": 3.153439153439154e-05,
"loss": 0.3585,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18677863478660583,
"step": 150,
"valid_targets_mean": 7588.2,
"valid_targets_min": 3447
},
{
"epoch": 0.5740740740740741,
"grad_norm": 0.4927981212250953,
"learning_rate": 3.259259259259259e-05,
"loss": 0.343,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15430906414985657,
"step": 155,
"valid_targets_mean": 6208.2,
"valid_targets_min": 1884
},
{
"epoch": 0.5925925925925926,
"grad_norm": 0.5955616973209993,
"learning_rate": 3.3650793650793656e-05,
"loss": 0.3475,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16549400985240936,
"step": 160,
"valid_targets_mean": 5810.4,
"valid_targets_min": 409
},
{
"epoch": 0.6111111111111112,
"grad_norm": 0.5308592994318894,
"learning_rate": 3.470899470899471e-05,
"loss": 0.3252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16406959295272827,
"step": 165,
"valid_targets_mean": 7197.9,
"valid_targets_min": 4781
},
{
"epoch": 0.6296296296296297,
"grad_norm": 0.5786264253850772,
"learning_rate": 3.576719576719577e-05,
"loss": 0.334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14584289491176605,
"step": 170,
"valid_targets_mean": 5856.1,
"valid_targets_min": 3947
},
{
"epoch": 0.6481481481481481,
"grad_norm": 0.5876267301381748,
"learning_rate": 3.682539682539683e-05,
"loss": 0.3364,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19383975863456726,
"step": 175,
"valid_targets_mean": 6151.6,
"valid_targets_min": 298
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.5579841097601295,
"learning_rate": 3.7883597883597885e-05,
"loss": 0.3355,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19771698117256165,
"step": 180,
"valid_targets_mean": 7475.2,
"valid_targets_min": 5181
},
{
"epoch": 0.6851851851851852,
"grad_norm": 0.5575120093361495,
"learning_rate": 3.894179894179894e-05,
"loss": 0.3306,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13605374097824097,
"step": 185,
"valid_targets_mean": 6225.5,
"valid_targets_min": 2414
},
{
"epoch": 0.7037037037037037,
"grad_norm": 0.5695877338034481,
"learning_rate": 4e-05,
"loss": 0.3331,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19986039400100708,
"step": 190,
"valid_targets_mean": 8463.9,
"valid_targets_min": 4419
},
{
"epoch": 0.7222222222222222,
"grad_norm": 0.5009496122576277,
"learning_rate": 3.999914723760517e-05,
"loss": 0.3364,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1776013821363449,
"step": 195,
"valid_targets_mean": 7864.9,
"valid_targets_min": 5879
},
{
"epoch": 0.7407407407407407,
"grad_norm": 0.5293768737385741,
"learning_rate": 3.999658902314104e-05,
"loss": 0.338,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1694238781929016,
"step": 200,
"valid_targets_mean": 8140.8,
"valid_targets_min": 5488
},
{
"epoch": 0.7592592592592593,
"grad_norm": 0.8375558031870537,
"learning_rate": 3.999232557476252e-05,
"loss": 0.3319,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16278810799121857,
"step": 205,
"valid_targets_mean": 6144.5,
"valid_targets_min": 2437
},
{
"epoch": 0.7777777777777778,
"grad_norm": 0.529694505104515,
"learning_rate": 3.9986357256040465e-05,
"loss": 0.324,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14480090141296387,
"step": 210,
"valid_targets_mean": 6128.9,
"valid_targets_min": 353
},
{
"epoch": 0.7962962962962963,
"grad_norm": 0.5687228641593676,
"learning_rate": 3.997868457593064e-05,
"loss": 0.3187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16306929290294647,
"step": 215,
"valid_targets_mean": 6418.0,
"valid_targets_min": 3908
},
{
"epoch": 0.8148148148148148,
"grad_norm": 0.47438386161926516,
"learning_rate": 3.996930818873035e-05,
"loss": 0.3186,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14409326016902924,
"step": 220,
"valid_targets_mean": 6047.1,
"valid_targets_min": 304
},
{
"epoch": 0.8333333333333334,
"grad_norm": 0.5486113720670046,
"learning_rate": 3.9958228894022645e-05,
"loss": 0.3106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15029951930046082,
"step": 225,
"valid_targets_mean": 6173.9,
"valid_targets_min": 3754
},
{
"epoch": 0.8518518518518519,
"grad_norm": 0.5170677946882957,
"learning_rate": 3.994544763660811e-05,
"loss": 0.3071,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16101841628551483,
"step": 230,
"valid_targets_mean": 8052.0,
"valid_targets_min": 5204
},
{
"epoch": 0.8703703703703703,
"grad_norm": 0.5874905209063319,
"learning_rate": 3.993096550642431e-05,
"loss": 0.3234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21433310210704803,
"step": 235,
"valid_targets_mean": 8033.4,
"valid_targets_min": 4617
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.5772717839196063,
"learning_rate": 3.991478373845286e-05,
"loss": 0.3187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15627962350845337,
"step": 240,
"valid_targets_mean": 6618.6,
"valid_targets_min": 2600
},
{
"epoch": 0.9074074074074074,
"grad_norm": 0.5510574817704281,
"learning_rate": 3.989690371261406e-05,
"loss": 0.3214,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16640323400497437,
"step": 245,
"valid_targets_mean": 6397.0,
"valid_targets_min": 456
},
{
"epoch": 0.9259259259259259,
"grad_norm": 0.530761854892717,
"learning_rate": 3.987732695364929e-05,
"loss": 0.3105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14477106928825378,
"step": 250,
"valid_targets_mean": 5501.1,
"valid_targets_min": 2206
},
{
"epoch": 0.9444444444444444,
"grad_norm": 0.5289325090905338,
"learning_rate": 3.985605513099093e-05,
"loss": 0.3163,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13415177166461945,
"step": 255,
"valid_targets_mean": 6082.1,
"valid_targets_min": 5234
},
{
"epoch": 0.9629629629629629,
"grad_norm": 0.569051693937061,
"learning_rate": 3.983309005862002e-05,
"loss": 0.3324,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13313525915145874,
"step": 260,
"valid_targets_mean": 4545.2,
"valid_targets_min": 669
},
{
"epoch": 0.9814814814814815,
"grad_norm": 0.5353604464324123,
"learning_rate": 3.980843369491159e-05,
"loss": 0.3171,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.199254110455513,
"step": 265,
"valid_targets_mean": 7222.6,
"valid_targets_min": 4705
},
{
"epoch": 1.0,
"grad_norm": 0.5103477604549704,
"learning_rate": 3.9782088142467595e-05,
"loss": 0.3107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1165643036365509,
"step": 270,
"valid_targets_mean": 5523.9,
"valid_targets_min": 396
},
{
"epoch": 1.0185185185185186,
"grad_norm": 0.5065171933118513,
"learning_rate": 3.975405564793768e-05,
"loss": 0.3119,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21201768517494202,
"step": 275,
"valid_targets_mean": 7694.2,
"valid_targets_min": 434
},
{
"epoch": 1.037037037037037,
"grad_norm": 0.5475032237106019,
"learning_rate": 3.972433860182757e-05,
"loss": 0.2949,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13425172865390778,
"step": 280,
"valid_targets_mean": 6122.1,
"valid_targets_min": 377
},
{
"epoch": 1.0555555555555556,
"grad_norm": 0.59475901438283,
"learning_rate": 3.969293953829519e-05,
"loss": 0.2955,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14180830121040344,
"step": 285,
"valid_targets_mean": 7330.9,
"valid_targets_min": 4746
},
{
"epoch": 1.074074074074074,
"grad_norm": 0.4732078699188954,
"learning_rate": 3.965986113493462e-05,
"loss": 0.2973,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14922069013118744,
"step": 290,
"valid_targets_mean": 7192.6,
"valid_targets_min": 3965
},
{
"epoch": 1.0925925925925926,
"grad_norm": 0.5946837128648786,
"learning_rate": 3.9625106212547696e-05,
"loss": 0.2955,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12483422458171844,
"step": 295,
"valid_targets_mean": 4485.5,
"valid_targets_min": 461
},
{
"epoch": 1.1111111111111112,
"grad_norm": 0.48537206737996996,
"learning_rate": 3.9588677734903505e-05,
"loss": 0.289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14004018902778625,
"step": 300,
"valid_targets_mean": 7126.4,
"valid_targets_min": 2065
},
{
"epoch": 1.1296296296296295,
"grad_norm": 0.5868179766871565,
"learning_rate": 3.955057880848563e-05,
"loss": 0.2944,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1457635760307312,
"step": 305,
"valid_targets_mean": 7584.9,
"valid_targets_min": 3016
},
{
"epoch": 1.1481481481481481,
"grad_norm": 0.5531573803747889,
"learning_rate": 3.9510812682227245e-05,
"loss": 0.3144,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1710018515586853,
"step": 310,
"valid_targets_mean": 6904.5,
"valid_targets_min": 4493
},
{
"epoch": 1.1666666666666667,
"grad_norm": 0.5478913235405003,
"learning_rate": 3.946938274723405e-05,
"loss": 0.2974,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15083035826683044,
"step": 315,
"valid_targets_mean": 5984.2,
"valid_targets_min": 308
},
{
"epoch": 1.1851851851851851,
"grad_norm": 0.47833830640118885,
"learning_rate": 3.9426292536495114e-05,
"loss": 0.3051,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15671434998512268,
"step": 320,
"valid_targets_mean": 8063.6,
"valid_targets_min": 5433
},
{
"epoch": 1.2037037037037037,
"grad_norm": 0.5349890076251889,
"learning_rate": 3.938154572458156e-05,
"loss": 0.2926,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14529670774936676,
"step": 325,
"valid_targets_mean": 6739.8,
"valid_targets_min": 5419
},
{
"epoch": 1.2222222222222223,
"grad_norm": 0.4994576555333995,
"learning_rate": 3.9335146127333245e-05,
"loss": 0.2906,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15106892585754395,
"step": 330,
"valid_targets_mean": 7467.2,
"valid_targets_min": 3877
},
{
"epoch": 1.2407407407407407,
"grad_norm": 0.5935948253238553,
"learning_rate": 3.928709770153332e-05,
"loss": 0.2957,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14414165914058685,
"step": 335,
"valid_targets_mean": 4819.1,
"valid_targets_min": 315
},
{
"epoch": 1.2592592592592593,
"grad_norm": 0.5524891188597137,
"learning_rate": 3.923740454457087e-05,
"loss": 0.2919,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1580236554145813,
"step": 340,
"valid_targets_mean": 6985.0,
"valid_targets_min": 474
},
{
"epoch": 1.2777777777777777,
"grad_norm": 0.5456561849473739,
"learning_rate": 3.9186070894091433e-05,
"loss": 0.3006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18431127071380615,
"step": 345,
"valid_targets_mean": 6896.9,
"valid_targets_min": 3800
},
{
"epoch": 1.2962962962962963,
"grad_norm": 0.5624233622754709,
"learning_rate": 3.9133101127635684e-05,
"loss": 0.2892,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15472449362277985,
"step": 350,
"valid_targets_mean": 7580.0,
"valid_targets_min": 367
},
{
"epoch": 1.3148148148148149,
"grad_norm": 0.5889370024503714,
"learning_rate": 3.9078499762266124e-05,
"loss": 0.2896,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12132951617240906,
"step": 355,
"valid_targets_mean": 5551.5,
"valid_targets_min": 470
},
{
"epoch": 1.3333333333333333,
"grad_norm": 0.5062166048938903,
"learning_rate": 3.902227145418185e-05,
"loss": 0.2883,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1225079745054245,
"step": 360,
"valid_targets_mean": 5971.0,
"valid_targets_min": 368
},
{
"epoch": 1.3518518518518519,
"grad_norm": 0.47488441470103737,
"learning_rate": 3.896442099832153e-05,
"loss": 0.2855,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12804746627807617,
"step": 365,
"valid_targets_mean": 6091.5,
"valid_targets_min": 1976
},
{
"epoch": 1.3703703703703702,
"grad_norm": 0.5406355069355051,
"learning_rate": 3.89049533279545e-05,
"loss": 0.295,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18195563554763794,
"step": 370,
"valid_targets_mean": 6963.6,
"valid_targets_min": 480
},
{
"epoch": 1.3888888888888888,
"grad_norm": 0.5574169233693884,
"learning_rate": 3.884387351426005e-05,
"loss": 0.2924,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15529048442840576,
"step": 375,
"valid_targets_mean": 6006.2,
"valid_targets_min": 541
},
{
"epoch": 1.4074074074074074,
"grad_norm": 0.535890457895036,
"learning_rate": 3.8781186765895e-05,
"loss": 0.2897,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15748731791973114,
"step": 380,
"valid_targets_mean": 6537.9,
"valid_targets_min": 4917
},
{
"epoch": 1.425925925925926,
"grad_norm": 0.5095040628778826,
"learning_rate": 3.8716898428549526e-05,
"loss": 0.2847,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09297989308834076,
"step": 385,
"valid_targets_mean": 4088.0,
"valid_targets_min": 304
},
{
"epoch": 1.4444444444444444,
"grad_norm": 0.4716182875282023,
"learning_rate": 3.865101398449127e-05,
"loss": 0.2942,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1432163268327713,
"step": 390,
"valid_targets_mean": 7432.4,
"valid_targets_min": 5799
},
{
"epoch": 1.462962962962963,
"grad_norm": 0.5386315145255598,
"learning_rate": 3.858353905209787e-05,
"loss": 0.2921,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17763468623161316,
"step": 395,
"valid_targets_mean": 7208.8,
"valid_targets_min": 4700
},
{
"epoch": 1.4814814814814814,
"grad_norm": 0.5378548282182605,
"learning_rate": 3.8514479385377813e-05,
"loss": 0.2919,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15650640428066254,
"step": 400,
"valid_targets_mean": 6712.2,
"valid_targets_min": 1883
},
{
"epoch": 1.5,
"grad_norm": 0.5142301123444354,
"learning_rate": 3.844384087347978e-05,
"loss": 0.3043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1322391927242279,
"step": 405,
"valid_targets_mean": 6352.9,
"valid_targets_min": 3290
},
{
"epoch": 1.5185185185185186,
"grad_norm": 0.5505544471616537,
"learning_rate": 3.837162954019042e-05,
"loss": 0.2814,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14132408797740936,
"step": 410,
"valid_targets_mean": 6377.4,
"valid_targets_min": 3935
},
{
"epoch": 1.5370370370370372,
"grad_norm": 0.6229636570648036,
"learning_rate": 3.829785154342069e-05,
"loss": 0.2912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15506146848201752,
"step": 415,
"valid_targets_mean": 7248.5,
"valid_targets_min": 5048
},
{
"epoch": 1.5555555555555556,
"grad_norm": 0.7699539965412168,
"learning_rate": 3.822251317468073e-05,
"loss": 0.2915,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13349926471710205,
"step": 420,
"valid_targets_mean": 6603.4,
"valid_targets_min": 336
},
{
"epoch": 1.574074074074074,
"grad_norm": 0.4695932327596188,
"learning_rate": 3.814562085854328e-05,
"loss": 0.2963,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14610108733177185,
"step": 425,
"valid_targets_mean": 7011.5,
"valid_targets_min": 457
},
{
"epoch": 1.5925925925925926,
"grad_norm": 0.572965259354651,
"learning_rate": 3.8067181152095935e-05,
"loss": 0.2755,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12474419176578522,
"step": 430,
"valid_targets_mean": 6207.2,
"valid_targets_min": 368
},
{
"epoch": 1.6111111111111112,
"grad_norm": 0.5072200634901262,
"learning_rate": 3.7987200744381866e-05,
"loss": 0.2787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14492423832416534,
"step": 435,
"valid_targets_mean": 6685.0,
"valid_targets_min": 3491
},
{
"epoch": 1.6296296296296298,
"grad_norm": 0.5315100425545446,
"learning_rate": 3.790568645582949e-05,
"loss": 0.2889,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12990285456180573,
"step": 440,
"valid_targets_mean": 5514.2,
"valid_targets_min": 487
},
{
"epoch": 1.6481481481481481,
"grad_norm": 0.48299403677554154,
"learning_rate": 3.7822645237670786e-05,
"loss": 0.2859,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1585882604122162,
"step": 445,
"valid_targets_mean": 8019.8,
"valid_targets_min": 4406
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.5281572977294482,
"learning_rate": 3.773808417134857e-05,
"loss": 0.2927,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1543073058128357,
"step": 450,
"valid_targets_mean": 7052.0,
"valid_targets_min": 3687
},
{
"epoch": 1.6851851851851851,
"grad_norm": 0.4845837149564397,
"learning_rate": 3.7652010467912586e-05,
"loss": 0.2949,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15963464975357056,
"step": 455,
"valid_targets_mean": 8213.2,
"valid_targets_min": 6411
},
{
"epoch": 1.7037037037037037,
"grad_norm": 0.5408032989769307,
"learning_rate": 3.756443146740457e-05,
"loss": 0.2984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13530519604682922,
"step": 460,
"valid_targets_mean": 5813.6,
"valid_targets_min": 3993
},
{
"epoch": 1.7222222222222223,
"grad_norm": 0.5353189538043924,
"learning_rate": 3.7475354638232364e-05,
"loss": 0.2744,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10430891811847687,
"step": 465,
"valid_targets_mean": 4659.6,
"valid_targets_min": 1661
},
{
"epoch": 1.7407407407407407,
"grad_norm": 0.5226421018247178,
"learning_rate": 3.7384787576532955e-05,
"loss": 0.2848,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1390485018491745,
"step": 470,
"valid_targets_mean": 7290.6,
"valid_targets_min": 810
},
{
"epoch": 1.7592592592592593,
"grad_norm": 0.49394601207037214,
"learning_rate": 3.729273800552482e-05,
"loss": 0.2847,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1647380143404007,
"step": 475,
"valid_targets_mean": 7383.2,
"valid_targets_min": 5781
},
{
"epoch": 1.7777777777777777,
"grad_norm": 0.6224532686212161,
"learning_rate": 3.719921377484919e-05,
"loss": 0.2841,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14958155155181885,
"step": 480,
"valid_targets_mean": 6402.1,
"valid_targets_min": 4041
},
{
"epoch": 1.7962962962962963,
"grad_norm": 0.5990070202349348,
"learning_rate": 3.710422285990078e-05,
"loss": 0.2858,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1453811526298523,
"step": 485,
"valid_targets_mean": 5552.4,
"valid_targets_min": 192
},
{
"epoch": 1.8148148148148149,
"grad_norm": 0.47961741304787425,
"learning_rate": 3.700777336114758e-05,
"loss": 0.2809,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13036441802978516,
"step": 490,
"valid_targets_mean": 6451.1,
"valid_targets_min": 307
},
{
"epoch": 1.8333333333333335,
"grad_norm": 0.5499567840797964,
"learning_rate": 3.690987350344017e-05,
"loss": 0.2686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11820720136165619,
"step": 495,
"valid_targets_mean": 6068.8,
"valid_targets_min": 487
},
{
"epoch": 1.8518518518518519,
"grad_norm": 0.5346461604872333,
"learning_rate": 3.681053163531024e-05,
"loss": 0.2839,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1634947955608368,
"step": 500,
"valid_targets_mean": 7725.8,
"valid_targets_min": 4799
},
{
"epoch": 1.8703703703703702,
"grad_norm": 0.5253112100424155,
"learning_rate": 3.6709756228258735e-05,
"loss": 0.2876,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13562235236167908,
"step": 505,
"valid_targets_mean": 6321.1,
"valid_targets_min": 3914
},
{
"epoch": 1.8888888888888888,
"grad_norm": 0.49379712991928504,
"learning_rate": 3.66075558760334e-05,
"loss": 0.2768,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11750568449497223,
"step": 510,
"valid_targets_mean": 6793.4,
"valid_targets_min": 321
},
{
"epoch": 1.9074074074074074,
"grad_norm": 0.5151893286966898,
"learning_rate": 3.6503939293895945e-05,
"loss": 0.2815,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14757773280143738,
"step": 515,
"valid_targets_mean": 8412.4,
"valid_targets_min": 6081
},
{
"epoch": 1.925925925925926,
"grad_norm": 0.5044055970161373,
"learning_rate": 3.639891531787885e-05,
"loss": 0.2634,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14695027470588684,
"step": 520,
"valid_targets_mean": 6752.4,
"valid_targets_min": 245
},
{
"epoch": 1.9444444444444444,
"grad_norm": 0.5430440383866891,
"learning_rate": 3.6292492904031844e-05,
"loss": 0.2853,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11272375285625458,
"step": 525,
"valid_targets_mean": 4375.9,
"valid_targets_min": 357
},
{
"epoch": 1.9629629629629628,
"grad_norm": 0.535911996260599,
"learning_rate": 3.6184681127658166e-05,
"loss": 0.2824,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1217561811208725,
"step": 530,
"valid_targets_mean": 4825.1,
"valid_targets_min": 403
},
{
"epoch": 1.9814814814814814,
"grad_norm": 0.4652442437519838,
"learning_rate": 3.607548918254068e-05,
"loss": 0.2859,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13177233934402466,
"step": 535,
"valid_targets_mean": 7426.4,
"valid_targets_min": 4939
},
{
"epoch": 2.0,
"grad_norm": 0.526551082703696,
"learning_rate": 3.5964926380157856e-05,
"loss": 0.2848,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16320262849330902,
"step": 540,
"valid_targets_mean": 7920.5,
"valid_targets_min": 4695
},
{
"epoch": 2.0185185185185186,
"grad_norm": 0.5416505181112615,
"learning_rate": 3.585300214888971e-05,
"loss": 0.2666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14433258771896362,
"step": 545,
"valid_targets_mean": 7458.1,
"valid_targets_min": 3929
},
{
"epoch": 2.037037037037037,
"grad_norm": 0.574840102953103,
"learning_rate": 3.5739726033213785e-05,
"loss": 0.2608,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11546637117862701,
"step": 550,
"valid_targets_mean": 5356.0,
"valid_targets_min": 385
},
{
"epoch": 2.0555555555555554,
"grad_norm": 0.5607643377480926,
"learning_rate": 3.562510769289124e-05,
"loss": 0.2689,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13067400455474854,
"step": 555,
"valid_targets_mean": 6073.8,
"valid_targets_min": 250
},
{
"epoch": 2.074074074074074,
"grad_norm": 0.5218117367647941,
"learning_rate": 3.550915690214313e-05,
"loss": 0.2641,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14398810267448425,
"step": 560,
"valid_targets_mean": 6835.8,
"valid_targets_min": 3422
},
{
"epoch": 2.0925925925925926,
"grad_norm": 0.5934505930381603,
"learning_rate": 3.539188354881685e-05,
"loss": 0.2715,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13066671788692474,
"step": 565,
"valid_targets_mean": 6965.4,
"valid_targets_min": 4297
},
{
"epoch": 2.111111111111111,
"grad_norm": 0.4708347758965413,
"learning_rate": 3.527329763354295e-05,
"loss": 0.26,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12042200565338135,
"step": 570,
"valid_targets_mean": 7412.5,
"valid_targets_min": 3336
},
{
"epoch": 2.1296296296296298,
"grad_norm": 0.5708517268952519,
"learning_rate": 3.515340926888236e-05,
"loss": 0.2575,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1318579465150833,
"step": 575,
"valid_targets_mean": 6028.6,
"valid_targets_min": 4547
},
{
"epoch": 2.148148148148148,
"grad_norm": 0.5178407866313944,
"learning_rate": 3.503222867846397e-05,
"loss": 0.2616,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10885745286941528,
"step": 580,
"valid_targets_mean": 5752.1,
"valid_targets_min": 257
},
{
"epoch": 2.1666666666666665,
"grad_norm": 0.5863919284533499,
"learning_rate": 3.490976619611282e-05,
"loss": 0.2655,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13897745311260223,
"step": 585,
"valid_targets_mean": 6269.6,
"valid_targets_min": 3887
},
{
"epoch": 2.185185185185185,
"grad_norm": 0.5881076768641105,
"learning_rate": 3.47860322649689e-05,
"loss": 0.2611,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08158881962299347,
"step": 590,
"valid_targets_mean": 4183.5,
"valid_targets_min": 353
},
{
"epoch": 2.2037037037037037,
"grad_norm": 0.5449342889554193,
"learning_rate": 3.4661037436596526e-05,
"loss": 0.2686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12910135090351105,
"step": 595,
"valid_targets_mean": 7711.6,
"valid_targets_min": 5163
},
{
"epoch": 2.2222222222222223,
"grad_norm": 0.5061684984724052,
"learning_rate": 3.453479237008465e-05,
"loss": 0.2632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15672308206558228,
"step": 600,
"valid_targets_mean": 8003.1,
"valid_targets_min": 5234
},
{
"epoch": 2.240740740740741,
"grad_norm": 0.524095008431537,
"learning_rate": 3.4407307831137775e-05,
"loss": 0.259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12406113743782043,
"step": 605,
"valid_targets_mean": 6011.2,
"valid_targets_min": 2463
},
{
"epoch": 2.259259259259259,
"grad_norm": 0.4572943901172685,
"learning_rate": 3.4278594691157985e-05,
"loss": 0.2769,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1535755693912506,
"step": 610,
"valid_targets_mean": 8442.5,
"valid_targets_min": 4737
},
{
"epoch": 2.2777777777777777,
"grad_norm": 0.48298994041640353,
"learning_rate": 3.4148663926317826e-05,
"loss": 0.2567,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10671807080507278,
"step": 615,
"valid_targets_mean": 6054.6,
"valid_targets_min": 446
},
{
"epoch": 2.2962962962962963,
"grad_norm": 0.5492340875103793,
"learning_rate": 3.401752661662431e-05,
"loss": 0.2618,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15312159061431885,
"step": 620,
"valid_targets_mean": 6727.8,
"valid_targets_min": 2786
},
{
"epoch": 2.314814814814815,
"grad_norm": 0.5392228802956868,
"learning_rate": 3.388519394497408e-05,
"loss": 0.2635,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13924045860767365,
"step": 625,
"valid_targets_mean": 6856.8,
"valid_targets_min": 5211
},
{
"epoch": 2.3333333333333335,
"grad_norm": 0.5233591164759019,
"learning_rate": 3.375167719619972e-05,
"loss": 0.2628,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14242339134216309,
"step": 630,
"valid_targets_mean": 6251.1,
"valid_targets_min": 405
},
{
"epoch": 2.351851851851852,
"grad_norm": 0.4419020062221808,
"learning_rate": 3.361698775610748e-05,
"loss": 0.2569,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12432446330785751,
"step": 635,
"valid_targets_mean": 6905.8,
"valid_targets_min": 4639
},
{
"epoch": 2.3703703703703702,
"grad_norm": 0.5468809658880694,
"learning_rate": 3.3481137110506305e-05,
"loss": 0.2613,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15010154247283936,
"step": 640,
"valid_targets_mean": 5742.6,
"valid_targets_min": 396
},
{
"epoch": 2.388888888888889,
"grad_norm": 0.4848873250194103,
"learning_rate": 3.334413684422839e-05,
"loss": 0.2691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14694947004318237,
"step": 645,
"valid_targets_mean": 8914.1,
"valid_targets_min": 6186
},
{
"epoch": 2.4074074074074074,
"grad_norm": 0.4912012424730424,
"learning_rate": 3.3205998640141255e-05,
"loss": 0.2658,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12130933254957199,
"step": 650,
"valid_targets_mean": 6549.1,
"valid_targets_min": 415
},
{
"epoch": 2.425925925925926,
"grad_norm": 0.4437323909959092,
"learning_rate": 3.3066734278151464e-05,
"loss": 0.2639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11655676364898682,
"step": 655,
"valid_targets_mean": 6480.2,
"valid_targets_min": 117
},
{
"epoch": 2.4444444444444446,
"grad_norm": 0.4719184803629788,
"learning_rate": 3.292635563420009e-05,
"loss": 0.2585,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12040881812572479,
"step": 660,
"valid_targets_mean": 5791.6,
"valid_targets_min": 421
},
{
"epoch": 2.462962962962963,
"grad_norm": 0.5216259861354975,
"learning_rate": 3.2784874679250026e-05,
"loss": 0.2588,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12373167276382446,
"step": 665,
"valid_targets_mean": 6689.1,
"valid_targets_min": 4072
},
{
"epoch": 2.4814814814814814,
"grad_norm": 0.4482229745629832,
"learning_rate": 3.264230347826504e-05,
"loss": 0.2689,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13821570575237274,
"step": 670,
"valid_targets_mean": 7872.4,
"valid_targets_min": 4265
},
{
"epoch": 2.5,
"grad_norm": 0.4688397809024902,
"learning_rate": 3.249865418918102e-05,
"loss": 0.2689,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12180155515670776,
"step": 675,
"valid_targets_mean": 8303.1,
"valid_targets_min": 5882
},
{
"epoch": 2.5185185185185186,
"grad_norm": 0.5067336932293169,
"learning_rate": 3.2353939061869145e-05,
"loss": 0.2624,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17937231063842773,
"step": 680,
"valid_targets_mean": 7677.5,
"valid_targets_min": 385
},
{
"epoch": 2.537037037037037,
"grad_norm": 0.495004907760756,
"learning_rate": 3.2208170437091267e-05,
"loss": 0.2754,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12227485328912735,
"step": 685,
"valid_targets_mean": 6554.2,
"valid_targets_min": 460
},
{
"epoch": 2.5555555555555554,
"grad_norm": 0.46262512362964875,
"learning_rate": 3.206136074544754e-05,
"loss": 0.2632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1247977614402771,
"step": 690,
"valid_targets_mean": 5935.9,
"valid_targets_min": 413
},
{
"epoch": 2.574074074074074,
"grad_norm": 0.577326499426772,
"learning_rate": 3.1913522506316396e-05,
"loss": 0.2688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12780506908893585,
"step": 695,
"valid_targets_mean": 6527.5,
"valid_targets_min": 229
},
{
"epoch": 2.5925925925925926,
"grad_norm": 0.5017879104054451,
"learning_rate": 3.17646683267869e-05,
"loss": 0.2683,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14587455987930298,
"step": 700,
"valid_targets_mean": 6552.1,
"valid_targets_min": 1068
},
{
"epoch": 2.611111111111111,
"grad_norm": 0.5078274256321608,
"learning_rate": 3.161481090058374e-05,
"loss": 0.2677,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13253484666347504,
"step": 705,
"valid_targets_mean": 7849.0,
"valid_targets_min": 5687
},
{
"epoch": 2.6296296296296298,
"grad_norm": 0.48831962900628234,
"learning_rate": 3.146396300698467e-05,
"loss": 0.2603,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11534512788057327,
"step": 710,
"valid_targets_mean": 5541.4,
"valid_targets_min": 475
},
{
"epoch": 2.648148148148148,
"grad_norm": 1.7586484361894488,
"learning_rate": 3.1312137509730776e-05,
"loss": 0.2474,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10342179238796234,
"step": 715,
"valid_targets_mean": 4904.9,
"valid_targets_min": 3551
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.5727289688315167,
"learning_rate": 3.115934735592954e-05,
"loss": 0.2506,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15417921543121338,
"step": 720,
"valid_targets_mean": 5637.8,
"valid_targets_min": 457
},
{
"epoch": 2.685185185185185,
"grad_norm": 0.5013909576024947,
"learning_rate": 3.10056055749507e-05,
"loss": 0.253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12603460252285004,
"step": 725,
"valid_targets_mean": 7198.4,
"valid_targets_min": 4333
},
{
"epoch": 2.7037037037037037,
"grad_norm": 0.5511286906952175,
"learning_rate": 3.0850925277315193e-05,
"loss": 0.2614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1324155628681183,
"step": 730,
"valid_targets_mean": 6283.8,
"valid_targets_min": 501
},
{
"epoch": 2.7222222222222223,
"grad_norm": 0.5435986623296872,
"learning_rate": 3.0695319653577116e-05,
"loss": 0.2538,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11377684026956558,
"step": 735,
"valid_targets_mean": 5828.4,
"valid_targets_min": 3486
},
{
"epoch": 2.7407407407407405,
"grad_norm": 0.48998670607021577,
"learning_rate": 3.0538801973198914e-05,
"loss": 0.2583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11242972314357758,
"step": 740,
"valid_targets_mean": 6409.4,
"valid_targets_min": 3687
},
{
"epoch": 2.7592592592592595,
"grad_norm": 0.5181670834094368,
"learning_rate": 3.0381385583419783e-05,
"loss": 0.2762,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11965565383434296,
"step": 745,
"valid_targets_mean": 6171.4,
"valid_targets_min": 3598
},
{
"epoch": 2.7777777777777777,
"grad_norm": 0.4892760482968971,
"learning_rate": 3.0223083908117466e-05,
"loss": 0.2551,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1174713522195816,
"step": 750,
"valid_targets_mean": 6405.0,
"valid_targets_min": 4515
},
{
"epoch": 2.7962962962962963,
"grad_norm": 0.6775974514440799,
"learning_rate": 3.0063910446663542e-05,
"loss": 0.2613,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12394280731678009,
"step": 755,
"valid_targets_mean": 5689.2,
"valid_targets_min": 472
},
{
"epoch": 2.814814814814815,
"grad_norm": 0.48632532617444213,
"learning_rate": 2.9903878772772227e-05,
"loss": 0.2561,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12731653451919556,
"step": 760,
"valid_targets_mean": 7426.2,
"valid_targets_min": 3712
},
{
"epoch": 2.8333333333333335,
"grad_norm": 0.46253818350114984,
"learning_rate": 2.9743002533342876e-05,
"loss": 0.2602,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11572107672691345,
"step": 765,
"valid_targets_mean": 7458.6,
"valid_targets_min": 3852
},
{
"epoch": 2.851851851851852,
"grad_norm": 0.5203069527342822,
"learning_rate": 2.9581295447296202e-05,
"loss": 0.262,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10665138065814972,
"step": 770,
"valid_targets_mean": 5312.9,
"valid_targets_min": 370
},
{
"epoch": 2.8703703703703702,
"grad_norm": 0.508992244499264,
"learning_rate": 2.9418771304404408e-05,
"loss": 0.2529,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13962328433990479,
"step": 775,
"valid_targets_mean": 6861.0,
"valid_targets_min": 2838
},
{
"epoch": 2.888888888888889,
"grad_norm": 0.48349040691849704,
"learning_rate": 2.9255443964115217e-05,
"loss": 0.266,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1340981125831604,
"step": 780,
"valid_targets_mean": 7161.6,
"valid_targets_min": 3762
},
{
"epoch": 2.9074074074074074,
"grad_norm": 0.47898223750895574,
"learning_rate": 2.9091327354370014e-05,
"loss": 0.2538,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13395154476165771,
"step": 785,
"valid_targets_mean": 6627.9,
"valid_targets_min": 69
},
{
"epoch": 2.925925925925926,
"grad_norm": 0.4541657970273562,
"learning_rate": 2.8926435470416123e-05,
"loss": 0.2639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1160229966044426,
"step": 790,
"valid_targets_mean": 6603.0,
"valid_targets_min": 572
},
{
"epoch": 2.9444444444444446,
"grad_norm": 0.5540368967272006,
"learning_rate": 2.8760782373613322e-05,
"loss": 0.2497,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13160249590873718,
"step": 795,
"valid_targets_mean": 6338.4,
"valid_targets_min": 411
},
{
"epoch": 2.962962962962963,
"grad_norm": 0.7359958376980468,
"learning_rate": 2.859438219023477e-05,
"loss": 0.2654,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15799115598201752,
"step": 800,
"valid_targets_mean": 5611.8,
"valid_targets_min": 273
},
{
"epoch": 2.9814814814814814,
"grad_norm": 0.4824226714339216,
"learning_rate": 2.8427249110262346e-05,
"loss": 0.2508,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1053648293018341,
"step": 805,
"valid_targets_mean": 5927.5,
"valid_targets_min": 489
},
{
"epoch": 3.0,
"grad_norm": 0.5382552432072585,
"learning_rate": 2.8259397386176616e-05,
"loss": 0.2581,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11860036849975586,
"step": 810,
"valid_targets_mean": 6269.6,
"valid_targets_min": 5439
},
{
"epoch": 3.0185185185185186,
"grad_norm": 1.273808109588435,
"learning_rate": 2.809084133174139e-05,
"loss": 0.2365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09889354556798935,
"step": 815,
"valid_targets_mean": 6494.2,
"valid_targets_min": 4860
},
{
"epoch": 3.037037037037037,
"grad_norm": 0.5296610389294772,
"learning_rate": 2.792159532078314e-05,
"loss": 0.2369,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1007370874285698,
"step": 820,
"valid_targets_mean": 6074.8,
"valid_targets_min": 3754
},
{
"epoch": 3.0555555555555554,
"grad_norm": 0.5109827190620847,
"learning_rate": 2.775167378596522e-05,
"loss": 0.2452,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1191176027059555,
"step": 825,
"valid_targets_mean": 6418.1,
"valid_targets_min": 965
},
{
"epoch": 3.074074074074074,
"grad_norm": 0.5158436295503185,
"learning_rate": 2.7581091217557134e-05,
"loss": 0.2425,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13595688343048096,
"step": 830,
"valid_targets_mean": 6856.4,
"valid_targets_min": 2920
},
{
"epoch": 3.0925925925925926,
"grad_norm": 0.5078354894585061,
"learning_rate": 2.740986216219884e-05,
"loss": 0.2413,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12502720952033997,
"step": 835,
"valid_targets_mean": 6669.6,
"valid_targets_min": 4233
},
{
"epoch": 3.111111111111111,
"grad_norm": 0.5054778272970494,
"learning_rate": 2.7238001221660257e-05,
"loss": 0.2398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12242163717746735,
"step": 840,
"valid_targets_mean": 8300.9,
"valid_targets_min": 7173
},
{
"epoch": 3.1296296296296298,
"grad_norm": 0.5126564174818152,
"learning_rate": 2.7065523051596114e-05,
"loss": 0.247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12792211771011353,
"step": 845,
"valid_targets_mean": 6557.4,
"valid_targets_min": 1560
},
{
"epoch": 3.148148148148148,
"grad_norm": 0.6105982078938521,
"learning_rate": 2.6892442360296152e-05,
"loss": 0.2414,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12742607295513153,
"step": 850,
"valid_targets_mean": 6655.6,
"valid_targets_min": 2065
},
{
"epoch": 3.1666666666666665,
"grad_norm": 0.49317723663403246,
"learning_rate": 2.6718773907430847e-05,
"loss": 0.2364,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11097238957881927,
"step": 855,
"valid_targets_mean": 6589.4,
"valid_targets_min": 250
},
{
"epoch": 3.185185185185185,
"grad_norm": 0.5015576290750604,
"learning_rate": 2.6544532502792778e-05,
"loss": 0.2467,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13182875514030457,
"step": 860,
"valid_targets_mean": 6727.2,
"valid_targets_min": 478
},
{
"epoch": 3.2037037037037037,
"grad_norm": 0.5212344941398274,
"learning_rate": 2.6369733005033693e-05,
"loss": 0.2559,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13697285950183868,
"step": 865,
"valid_targets_mean": 7515.0,
"valid_targets_min": 5976
},
{
"epoch": 3.2222222222222223,
"grad_norm": 0.5508648846761911,
"learning_rate": 2.6194390320397426e-05,
"loss": 0.2352,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09470212459564209,
"step": 870,
"valid_targets_mean": 6168.4,
"valid_targets_min": 306
},
{
"epoch": 3.240740740740741,
"grad_norm": 0.44089806796960995,
"learning_rate": 2.601851940144874e-05,
"loss": 0.2532,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09514069557189941,
"step": 875,
"valid_targets_mean": 7256.9,
"valid_targets_min": 5156
},
{
"epoch": 3.259259259259259,
"grad_norm": 0.9948233767091689,
"learning_rate": 2.5842135245798248e-05,
"loss": 0.2346,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11571880429983139,
"step": 880,
"valid_targets_mean": 7016.8,
"valid_targets_min": 488
},
{
"epoch": 3.2777777777777777,
"grad_norm": 0.5404910292302445,
"learning_rate": 2.5665252894823436e-05,
"loss": 0.244,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1577197015285492,
"step": 885,
"valid_targets_mean": 7343.0,
"valid_targets_min": 5338
},
{
"epoch": 3.2962962962962963,
"grad_norm": 0.47094356670247445,
"learning_rate": 2.5487887432386035e-05,
"loss": 0.2415,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13475333154201508,
"step": 890,
"valid_targets_mean": 7784.5,
"valid_targets_min": 6171
},
{
"epoch": 3.314814814814815,
"grad_norm": 0.5046387190727711,
"learning_rate": 2.531005398354569e-05,
"loss": 0.2566,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11572615802288055,
"step": 895,
"valid_targets_mean": 6777.4,
"valid_targets_min": 3828
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.5357822466955011,
"learning_rate": 2.5131767713270174e-05,
"loss": 0.2357,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12361173331737518,
"step": 900,
"valid_targets_mean": 6211.9,
"valid_targets_min": 407
},
{
"epoch": 3.351851851851852,
"grad_norm": 0.503314513417158,
"learning_rate": 2.4953043825142164e-05,
"loss": 0.2383,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10359904170036316,
"step": 905,
"valid_targets_mean": 6026.4,
"valid_targets_min": 398
},
{
"epoch": 3.3703703703703702,
"grad_norm": 0.5303301465182496,
"learning_rate": 2.477389756006276e-05,
"loss": 0.2406,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12895119190216064,
"step": 910,
"valid_targets_mean": 6841.1,
"valid_targets_min": 3687
},
{
"epoch": 3.388888888888889,
"grad_norm": 0.47181710270046207,
"learning_rate": 2.4594344194951748e-05,
"loss": 0.238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13982756435871124,
"step": 915,
"valid_targets_mean": 7695.1,
"valid_targets_min": 6655
},
{
"epoch": 3.4074074074074074,
"grad_norm": 0.6978123307943264,
"learning_rate": 2.4414399041444897e-05,
"loss": 0.2297,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12279509752988815,
"step": 920,
"valid_targets_mean": 5957.0,
"valid_targets_min": 424
},
{
"epoch": 3.425925925925926,
"grad_norm": 0.5103058494828053,
"learning_rate": 2.423407744458822e-05,
"loss": 0.2476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10531405359506607,
"step": 925,
"valid_targets_mean": 6795.9,
"valid_targets_min": 3746
},
{
"epoch": 3.4444444444444446,
"grad_norm": 0.4961262749151785,
"learning_rate": 2.405339478152938e-05,
"loss": 0.2424,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13874265551567078,
"step": 930,
"valid_targets_mean": 7975.0,
"valid_targets_min": 4268
},
{
"epoch": 3.462962962962963,
"grad_norm": 0.5574805174642966,
"learning_rate": 2.387236646020643e-05,
"loss": 0.2483,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12402337044477463,
"step": 935,
"valid_targets_mean": 8073.1,
"valid_targets_min": 3757
},
{
"epoch": 3.4814814814814814,
"grad_norm": 0.5498492265628724,
"learning_rate": 2.3691007918033858e-05,
"loss": 0.2526,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1583220362663269,
"step": 940,
"valid_targets_mean": 6594.8,
"valid_targets_min": 4154
},
{
"epoch": 3.5,
"grad_norm": 0.5001189338850895,
"learning_rate": 2.3509334620586127e-05,
"loss": 0.2445,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12958279252052307,
"step": 945,
"valid_targets_mean": 7600.1,
"valid_targets_min": 5250
},
{
"epoch": 3.5185185185185186,
"grad_norm": 0.4807963923696486,
"learning_rate": 2.332736206027887e-05,
"loss": 0.2466,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10260862857103348,
"step": 950,
"valid_targets_mean": 6256.6,
"valid_targets_min": 3839
},
{
"epoch": 3.537037037037037,
"grad_norm": 0.5061807303605752,
"learning_rate": 2.314510575504771e-05,
"loss": 0.2406,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13543078303337097,
"step": 955,
"valid_targets_mean": 7313.0,
"valid_targets_min": 4655
},
{
"epoch": 3.5555555555555554,
"grad_norm": 0.4561793894533891,
"learning_rate": 2.2962581247024983e-05,
"loss": 0.2394,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1189025267958641,
"step": 960,
"valid_targets_mean": 7561.6,
"valid_targets_min": 3970
},
{
"epoch": 3.574074074074074,
"grad_norm": 0.4825083581140084,
"learning_rate": 2.277980410121434e-05,
"loss": 0.2396,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10504335165023804,
"step": 965,
"valid_targets_mean": 6335.8,
"valid_targets_min": 2847
},
{
"epoch": 3.5925925925925926,
"grad_norm": 0.4931411405781181,
"learning_rate": 2.2596789904163453e-05,
"loss": 0.2448,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13161906599998474,
"step": 970,
"valid_targets_mean": 7651.8,
"valid_targets_min": 4413
},
{
"epoch": 3.611111111111111,
"grad_norm": 0.7353648756929703,
"learning_rate": 2.2413554262634802e-05,
"loss": 0.24,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1295996904373169,
"step": 975,
"valid_targets_mean": 6932.1,
"valid_targets_min": 314
},
{
"epoch": 3.6296296296296298,
"grad_norm": 0.4968603493634512,
"learning_rate": 2.223011280227485e-05,
"loss": 0.25,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11219480633735657,
"step": 980,
"valid_targets_mean": 6326.2,
"valid_targets_min": 255
},
{
"epoch": 3.648148148148148,
"grad_norm": 0.5295742966751648,
"learning_rate": 2.2046481166281496e-05,
"loss": 0.2451,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1228310838341713,
"step": 985,
"valid_targets_mean": 5468.6,
"valid_targets_min": 316
},
{
"epoch": 3.6666666666666665,
"grad_norm": 0.48970756187215925,
"learning_rate": 2.1862675014070106e-05,
"loss": 0.2404,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12276136130094528,
"step": 990,
"valid_targets_mean": 7585.4,
"valid_targets_min": 3731
},
{
"epoch": 3.685185185185185,
"grad_norm": 0.5126550687549059,
"learning_rate": 2.1678710019938136e-05,
"loss": 0.2496,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11687671393156052,
"step": 995,
"valid_targets_mean": 5928.8,
"valid_targets_min": 2060
},
{
"epoch": 3.7037037037037037,
"grad_norm": 0.5038883486908647,
"learning_rate": 2.149460187172849e-05,
"loss": 0.2418,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0843639075756073,
"step": 1000,
"valid_targets_mean": 4160.1,
"valid_targets_min": 470
},
{
"epoch": 3.7222222222222223,
"grad_norm": 0.437278082428542,
"learning_rate": 2.1310366269491693e-05,
"loss": 0.245,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12892818450927734,
"step": 1005,
"valid_targets_mean": 7916.6,
"valid_targets_min": 3774
},
{
"epoch": 3.7407407407407405,
"grad_norm": 0.7342847594893045,
"learning_rate": 2.1126018924147084e-05,
"loss": 0.2497,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1251305788755417,
"step": 1010,
"valid_targets_mean": 6748.9,
"valid_targets_min": 4396
},
{
"epoch": 3.7592592592592595,
"grad_norm": 0.5411710794385421,
"learning_rate": 2.094157555614304e-05,
"loss": 0.2424,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13948220014572144,
"step": 1015,
"valid_targets_mean": 7209.6,
"valid_targets_min": 245
},
{
"epoch": 3.7777777777777777,
"grad_norm": 0.4659496590666761,
"learning_rate": 2.0757051894116382e-05,
"loss": 0.2322,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12497460097074509,
"step": 1020,
"valid_targets_mean": 7786.8,
"valid_targets_min": 3204
},
{
"epoch": 3.7962962962962963,
"grad_norm": 0.8396535698509019,
"learning_rate": 2.057246367355109e-05,
"loss": 0.2363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13099229335784912,
"step": 1025,
"valid_targets_mean": 6280.6,
"valid_targets_min": 450
},
{
"epoch": 3.814814814814815,
"grad_norm": 0.49954658133269225,
"learning_rate": 2.038782663543649e-05,
"loss": 0.2489,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14834380149841309,
"step": 1030,
"valid_targets_mean": 7883.1,
"valid_targets_min": 5303
},
{
"epoch": 3.8333333333333335,
"grad_norm": 0.5628070531489748,
"learning_rate": 2.0203156524924847e-05,
"loss": 0.2434,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15435266494750977,
"step": 1035,
"valid_targets_mean": 7055.5,
"valid_targets_min": 4829
},
{
"epoch": 3.851851851851852,
"grad_norm": 0.5299852216946729,
"learning_rate": 2.0018469089988723e-05,
"loss": 0.2526,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17090864479541779,
"step": 1040,
"valid_targets_mean": 7314.8,
"valid_targets_min": 3083
},
{
"epoch": 3.8703703703703702,
"grad_norm": 0.5063447667820846,
"learning_rate": 1.9833780080078063e-05,
"loss": 0.2437,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13567772507667542,
"step": 1045,
"valid_targets_mean": 7075.2,
"valid_targets_min": 3665
},
{
"epoch": 3.888888888888889,
"grad_norm": 0.5295998490010958,
"learning_rate": 1.9649105244777097e-05,
"loss": 0.2391,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12415409088134766,
"step": 1050,
"valid_targets_mean": 6369.9,
"valid_targets_min": 332
},
{
"epoch": 3.9074074074074074,
"grad_norm": 0.5103141821329165,
"learning_rate": 1.946446033246132e-05,
"loss": 0.246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13250163197517395,
"step": 1055,
"valid_targets_mean": 6484.8,
"valid_targets_min": 4386
},
{
"epoch": 3.925925925925926,
"grad_norm": 0.46646251978801523,
"learning_rate": 1.927986108895448e-05,
"loss": 0.2446,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11562138795852661,
"step": 1060,
"valid_targets_mean": 7588.4,
"valid_targets_min": 5279
},
{
"epoch": 3.9444444444444446,
"grad_norm": 0.48394668574105776,
"learning_rate": 1.9095323256185877e-05,
"loss": 0.2449,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12256023287773132,
"step": 1065,
"valid_targets_mean": 6316.8,
"valid_targets_min": 1402
},
{
"epoch": 3.962962962962963,
"grad_norm": 0.48521748020882943,
"learning_rate": 1.8910862570847936e-05,
"loss": 0.2456,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12952281534671783,
"step": 1070,
"valid_targets_mean": 7025.2,
"valid_targets_min": 4953
},
{
"epoch": 3.9814814814814814,
"grad_norm": 0.49338916425063034,
"learning_rate": 1.872649476305423e-05,
"loss": 0.2298,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11373429000377655,
"step": 1075,
"valid_targets_mean": 5696.2,
"valid_targets_min": 498
},
{
"epoch": 4.0,
"grad_norm": 0.5727656140824072,
"learning_rate": 1.8542235554998097e-05,
"loss": 0.2552,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17409956455230713,
"step": 1080,
"valid_targets_mean": 8495.8,
"valid_targets_min": 598
},
{
"epoch": 4.018518518518518,
"grad_norm": 0.516032233014235,
"learning_rate": 1.835810065961189e-05,
"loss": 0.2343,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11870207637548447,
"step": 1085,
"valid_targets_mean": 7494.0,
"valid_targets_min": 5474
},
{
"epoch": 4.037037037037037,
"grad_norm": 0.5919098251042874,
"learning_rate": 1.8174105779227038e-05,
"loss": 0.222,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10515961796045303,
"step": 1090,
"valid_targets_mean": 5810.2,
"valid_targets_min": 810
},
{
"epoch": 4.055555555555555,
"grad_norm": 0.5018483993286512,
"learning_rate": 1.799026660423503e-05,
"loss": 0.2351,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10188450664281845,
"step": 1095,
"valid_targets_mean": 7080.5,
"valid_targets_min": 4348
},
{
"epoch": 4.074074074074074,
"grad_norm": 0.5299673754548403,
"learning_rate": 1.780659881174937e-05,
"loss": 0.2238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13417434692382812,
"step": 1100,
"valid_targets_mean": 6985.2,
"valid_targets_min": 3872
},
{
"epoch": 4.092592592592593,
"grad_norm": 0.5130673298943274,
"learning_rate": 1.7623118064268726e-05,
"loss": 0.2307,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09408153593540192,
"step": 1105,
"valid_targets_mean": 6263.2,
"valid_targets_min": 3473
},
{
"epoch": 4.111111111111111,
"grad_norm": 0.5462228358022995,
"learning_rate": 1.743984000834126e-05,
"loss": 0.2286,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09989724308252335,
"step": 1110,
"valid_targets_mean": 5982.1,
"valid_targets_min": 475
},
{
"epoch": 4.12962962962963,
"grad_norm": 0.5988706825678385,
"learning_rate": 1.7256780273230358e-05,
"loss": 0.2255,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10022996366024017,
"step": 1115,
"valid_targets_mean": 5157.0,
"valid_targets_min": 451
},
{
"epoch": 4.148148148148148,
"grad_norm": 0.48816249519586924,
"learning_rate": 1.707395446958183e-05,
"loss": 0.2304,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10353829711675644,
"step": 1120,
"valid_targets_mean": 6314.1,
"valid_targets_min": 423
},
{
"epoch": 4.166666666666667,
"grad_norm": 0.49451693646994704,
"learning_rate": 1.6891378188092694e-05,
"loss": 0.2417,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12381202727556229,
"step": 1125,
"valid_targets_mean": 7282.1,
"valid_targets_min": 3336
},
{
"epoch": 4.185185185185185,
"grad_norm": 0.5087635256864471,
"learning_rate": 1.6709066998181653e-05,
"loss": 0.2264,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12461232393980026,
"step": 1130,
"valid_targets_mean": 6933.9,
"valid_targets_min": 3762
},
{
"epoch": 4.203703703703703,
"grad_norm": 0.5084569033634785,
"learning_rate": 1.6527036446661396e-05,
"loss": 0.239,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15918824076652527,
"step": 1135,
"valid_targets_mean": 7666.1,
"valid_targets_min": 442
},
{
"epoch": 4.222222222222222,
"grad_norm": 0.5017372904791324,
"learning_rate": 1.634530205641283e-05,
"loss": 0.235,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1275438666343689,
"step": 1140,
"valid_targets_mean": 7447.4,
"valid_targets_min": 5245
},
{
"epoch": 4.2407407407407405,
"grad_norm": 0.5206246639358723,
"learning_rate": 1.616387932506135e-05,
"loss": 0.2252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10627258569002151,
"step": 1145,
"valid_targets_mean": 6473.4,
"valid_targets_min": 102
},
{
"epoch": 4.2592592592592595,
"grad_norm": 0.4977541341325297,
"learning_rate": 1.5982783723655225e-05,
"loss": 0.2333,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08717834949493408,
"step": 1150,
"valid_targets_mean": 6253.9,
"valid_targets_min": 2602
},
{
"epoch": 4.277777777777778,
"grad_norm": 0.5023452631707405,
"learning_rate": 1.580203069534634e-05,
"loss": 0.2325,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12174762040376663,
"step": 1155,
"valid_targets_mean": 6748.9,
"valid_targets_min": 2016
},
{
"epoch": 4.296296296296296,
"grad_norm": 0.5279594814422506,
"learning_rate": 1.5621635654073216e-05,
"loss": 0.2374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12284161150455475,
"step": 1160,
"valid_targets_mean": 6711.1,
"valid_targets_min": 4902
},
{
"epoch": 4.314814814814815,
"grad_norm": 0.49117787226016985,
"learning_rate": 1.5441613983246606e-05,
"loss": 0.2316,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1301436871290207,
"step": 1165,
"valid_targets_mean": 6814.2,
"valid_targets_min": 5645
},
{
"epoch": 4.333333333333333,
"grad_norm": 0.5240995867758355,
"learning_rate": 1.5261981034437617e-05,
"loss": 0.2275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10003925859928131,
"step": 1170,
"valid_targets_mean": 5936.5,
"valid_targets_min": 1476
},
{
"epoch": 4.351851851851852,
"grad_norm": 0.4887431417522901,
"learning_rate": 1.508275212606862e-05,
"loss": 0.2295,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.126789391040802,
"step": 1175,
"valid_targets_mean": 7479.0,
"valid_targets_min": 2546
},
{
"epoch": 4.37037037037037,
"grad_norm": 0.5010137089706221,
"learning_rate": 1.490394254210691e-05,
"loss": 0.2247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11638576537370682,
"step": 1180,
"valid_targets_mean": 7829.9,
"valid_targets_min": 6242
},
{
"epoch": 4.388888888888889,
"grad_norm": 0.5302823871361698,
"learning_rate": 1.4725567530761402e-05,
"loss": 0.2335,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09980429708957672,
"step": 1185,
"valid_targets_mean": 6296.0,
"valid_targets_min": 1823
},
{
"epoch": 4.407407407407407,
"grad_norm": 0.48938232381512503,
"learning_rate": 1.4547642303182282e-05,
"loss": 0.2324,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12234029173851013,
"step": 1190,
"valid_targets_mean": 8091.8,
"valid_targets_min": 5562
},
{
"epoch": 4.425925925925926,
"grad_norm": 0.5031522973767952,
"learning_rate": 1.4370182032163861e-05,
"loss": 0.2177,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07054075598716736,
"step": 1195,
"valid_targets_mean": 4125.9,
"valid_targets_min": 392
},
{
"epoch": 4.444444444444445,
"grad_norm": 0.5018613158088705,
"learning_rate": 1.4193201850850717e-05,
"loss": 0.2363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10027652978897095,
"step": 1200,
"valid_targets_mean": 6186.6,
"valid_targets_min": 860
},
{
"epoch": 4.462962962962963,
"grad_norm": 0.4856214263792827,
"learning_rate": 1.4016716851447173e-05,
"loss": 0.2303,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11477172374725342,
"step": 1205,
"valid_targets_mean": 7052.6,
"valid_targets_min": 4152
},
{
"epoch": 4.481481481481482,
"grad_norm": 0.4874276605163382,
"learning_rate": 1.3840742083930297e-05,
"loss": 0.2218,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10933384299278259,
"step": 1210,
"valid_targets_mean": 8173.6,
"valid_targets_min": 4973
},
{
"epoch": 4.5,
"grad_norm": 0.531455676384898,
"learning_rate": 1.3665292554766513e-05,
"loss": 0.2285,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11772537231445312,
"step": 1215,
"valid_targets_mean": 5933.6,
"valid_targets_min": 357
},
{
"epoch": 4.518518518518518,
"grad_norm": 0.5372681406699098,
"learning_rate": 1.3490383225631885e-05,
"loss": 0.221,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10665614157915115,
"step": 1220,
"valid_targets_mean": 7539.6,
"valid_targets_min": 4008
},
{
"epoch": 4.537037037037037,
"grad_norm": 0.5211115545139049,
"learning_rate": 1.3316029012136251e-05,
"loss": 0.2344,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12121891230344772,
"step": 1225,
"valid_targets_mean": 7162.0,
"valid_targets_min": 3992
},
{
"epoch": 4.555555555555555,
"grad_norm": 0.46584465301969513,
"learning_rate": 1.314224478255128e-05,
"loss": 0.2276,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11857609450817108,
"step": 1230,
"valid_targets_mean": 6463.0,
"valid_targets_min": 1340
},
{
"epoch": 4.574074074074074,
"grad_norm": 0.4588439125962153,
"learning_rate": 1.2969045356542558e-05,
"loss": 0.2279,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10397493839263916,
"step": 1235,
"valid_targets_mean": 7388.2,
"valid_targets_min": 4077
},
{
"epoch": 4.592592592592593,
"grad_norm": 0.50171375299644,
"learning_rate": 1.2796445503905797e-05,
"loss": 0.2221,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.119759202003479,
"step": 1240,
"valid_targets_mean": 7190.8,
"valid_targets_min": 1976
},
{
"epoch": 4.611111111111111,
"grad_norm": 0.5252807258839242,
"learning_rate": 1.2624459943307378e-05,
"loss": 0.2246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10422271490097046,
"step": 1245,
"valid_targets_mean": 6359.0,
"valid_targets_min": 4126
},
{
"epoch": 4.62962962962963,
"grad_norm": 0.4591959160558025,
"learning_rate": 1.2453103341029154e-05,
"loss": 0.2314,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13636991381645203,
"step": 1250,
"valid_targets_mean": 7624.4,
"valid_targets_min": 4176
},
{
"epoch": 4.648148148148148,
"grad_norm": 0.5397510993994119,
"learning_rate": 1.2282390309717776e-05,
"loss": 0.2255,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10083657503128052,
"step": 1255,
"valid_targets_mean": 5491.2,
"valid_targets_min": 231
},
{
"epoch": 4.666666666666667,
"grad_norm": 0.47818583241901386,
"learning_rate": 1.2112335407138582e-05,
"loss": 0.2246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11500853300094604,
"step": 1260,
"valid_targets_mean": 6869.8,
"valid_targets_min": 4280
},
{
"epoch": 4.685185185185185,
"grad_norm": 0.6028087003564473,
"learning_rate": 1.1942953134934185e-05,
"loss": 0.2303,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1319492757320404,
"step": 1265,
"valid_targets_mean": 5306.4,
"valid_targets_min": 3887
},
{
"epoch": 4.703703703703704,
"grad_norm": 0.487471515961342,
"learning_rate": 1.1774257937387774e-05,
"loss": 0.2241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10458207130432129,
"step": 1270,
"valid_targets_mean": 6610.9,
"valid_targets_min": 3766
},
{
"epoch": 4.722222222222222,
"grad_norm": 0.5162656468076039,
"learning_rate": 1.160626420019142e-05,
"loss": 0.2373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14040455222129822,
"step": 1275,
"valid_targets_mean": 7369.4,
"valid_targets_min": 1723
},
{
"epoch": 4.7407407407407405,
"grad_norm": 0.4992171347071927,
"learning_rate": 1.1438986249219292e-05,
"loss": 0.2274,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1111883670091629,
"step": 1280,
"valid_targets_mean": 7044.2,
"valid_targets_min": 1730
},
{
"epoch": 4.7592592592592595,
"grad_norm": 0.4980112243313627,
"learning_rate": 1.1272438349305996e-05,
"loss": 0.2216,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1132529079914093,
"step": 1285,
"valid_targets_mean": 6436.4,
"valid_targets_min": 4369
},
{
"epoch": 4.777777777777778,
"grad_norm": 0.4991126681737921,
"learning_rate": 1.1106634703030132e-05,
"loss": 0.227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13967010378837585,
"step": 1290,
"valid_targets_mean": 8305.5,
"valid_targets_min": 5394
},
{
"epoch": 4.796296296296296,
"grad_norm": 0.5155034087864115,
"learning_rate": 1.0941589449503152e-05,
"loss": 0.2262,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10799385607242584,
"step": 1295,
"valid_targets_mean": 6244.2,
"valid_targets_min": 413
},
{
"epoch": 4.814814814814815,
"grad_norm": 0.4522833911832173,
"learning_rate": 1.0777316663163604e-05,
"loss": 0.2297,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10367443412542343,
"step": 1300,
"valid_targets_mean": 7749.2,
"valid_targets_min": 3820
},
{
"epoch": 4.833333333333333,
"grad_norm": 0.4560347865249111,
"learning_rate": 1.061383035257697e-05,
"loss": 0.2308,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12242156267166138,
"step": 1305,
"valid_targets_mean": 7613.8,
"valid_targets_min": 5903
},
{
"epoch": 4.851851851851852,
"grad_norm": 0.477804618520794,
"learning_rate": 1.0451144459241021e-05,
"loss": 0.2257,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12609726190567017,
"step": 1310,
"valid_targets_mean": 7283.4,
"valid_targets_min": 4187
},
{
"epoch": 4.87037037037037,
"grad_norm": 0.4848059755823838,
"learning_rate": 1.0289272856396954e-05,
"loss": 0.2334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12737436592578888,
"step": 1315,
"valid_targets_mean": 6414.9,
"valid_targets_min": 3065
},
{
"epoch": 4.888888888888889,
"grad_norm": 0.5169709458077109,
"learning_rate": 1.0128229347846348e-05,
"loss": 0.2326,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09037278592586517,
"step": 1320,
"valid_targets_mean": 5083.1,
"valid_targets_min": 2463
},
{
"epoch": 4.907407407407407,
"grad_norm": 0.4691072483470426,
"learning_rate": 9.968027666774005e-06,
"loss": 0.2232,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11444838345050812,
"step": 1325,
"valid_targets_mean": 6681.6,
"valid_targets_min": 4390
},
{
"epoch": 4.925925925925926,
"grad_norm": 0.5394803424881776,
"learning_rate": 9.80868147457683e-06,
"loss": 0.2365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14064496755599976,
"step": 1330,
"valid_targets_mean": 7132.9,
"valid_targets_min": 4613
},
{
"epoch": 4.944444444444445,
"grad_norm": 0.519246694995982,
"learning_rate": 9.650204359698884e-06,
"loss": 0.2364,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10239046812057495,
"step": 1335,
"valid_targets_mean": 5749.8,
"valid_targets_min": 407
},
{
"epoch": 4.962962962962963,
"grad_norm": 0.5251939867353143,
"learning_rate": 9.492609836472563e-06,
"loss": 0.2264,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1245264858007431,
"step": 1340,
"valid_targets_mean": 8139.8,
"valid_targets_min": 304
},
{
"epoch": 4.981481481481482,
"grad_norm": 0.4841966754669591,
"learning_rate": 9.33591134396618e-06,
"loss": 0.2369,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09950557351112366,
"step": 1345,
"valid_targets_mean": 5948.6,
"valid_targets_min": 1240
},
{
"epoch": 5.0,
"grad_norm": 0.46635316251229963,
"learning_rate": 9.180122244837893e-06,
"loss": 0.2187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09984344244003296,
"step": 1350,
"valid_targets_mean": 6878.8,
"valid_targets_min": 4793
},
{
"epoch": 5.018518518518518,
"grad_norm": 0.4917225509060581,
"learning_rate": 9.025255824196234e-06,
"loss": 0.2078,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1116233840584755,
"step": 1355,
"valid_targets_mean": 6953.8,
"valid_targets_min": 3714
},
{
"epoch": 5.037037037037037,
"grad_norm": 0.5096642938537449,
"learning_rate": 8.871325288467188e-06,
"loss": 0.2184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1091044619679451,
"step": 1360,
"valid_targets_mean": 6872.6,
"valid_targets_min": 3982
},
{
"epoch": 5.055555555555555,
"grad_norm": 0.5130910816904468,
"learning_rate": 8.718343764267967e-06,
"loss": 0.2218,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08576367050409317,
"step": 1365,
"valid_targets_mean": 5957.0,
"valid_targets_min": 321
},
{
"epoch": 5.074074074074074,
"grad_norm": 0.506747302079751,
"learning_rate": 8.566324297287674e-06,
"loss": 0.2212,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14150217175483704,
"step": 1370,
"valid_targets_mean": 7874.0,
"valid_targets_min": 4982
},
{
"epoch": 5.092592592592593,
"grad_norm": 0.5233616721490927,
"learning_rate": 8.41527985117478e-06,
"loss": 0.2303,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09984303265810013,
"step": 1375,
"valid_targets_mean": 5710.6,
"valid_targets_min": 471
},
{
"epoch": 5.111111111111111,
"grad_norm": 0.5312752630451189,
"learning_rate": 8.265223306431644e-06,
"loss": 0.2156,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10283049941062927,
"step": 1380,
"valid_targets_mean": 5761.2,
"valid_targets_min": 427
},
{
"epoch": 5.12962962962963,
"grad_norm": 0.5693784646821431,
"learning_rate": 8.116167459316116e-06,
"loss": 0.2305,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12732897698879242,
"step": 1385,
"valid_targets_mean": 7584.5,
"valid_targets_min": 5131
},
{
"epoch": 5.148148148148148,
"grad_norm": 0.5027220474125025,
"learning_rate": 7.96812502075031e-06,
"loss": 0.2249,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11553220450878143,
"step": 1390,
"valid_targets_mean": 6780.9,
"valid_targets_min": 368
},
{
"epoch": 5.166666666666667,
"grad_norm": 0.6619482906498753,
"learning_rate": 7.821108615236663e-06,
"loss": 0.22,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11507928371429443,
"step": 1395,
"valid_targets_mean": 6894.5,
"valid_targets_min": 4617
},
{
"epoch": 5.185185185185185,
"grad_norm": 0.591868727798189,
"learning_rate": 7.675130779781385e-06,
"loss": 0.2169,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10910572856664658,
"step": 1400,
"valid_targets_mean": 6408.5,
"valid_targets_min": 3807
},
{
"epoch": 5.203703703703703,
"grad_norm": 0.5126683283711245,
"learning_rate": 7.530203962825331e-06,
"loss": 0.2166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10854263603687286,
"step": 1405,
"valid_targets_mean": 7869.9,
"valid_targets_min": 6327
},
{
"epoch": 5.222222222222222,
"grad_norm": 0.5236210165162991,
"learning_rate": 7.386340523182451e-06,
"loss": 0.2248,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.127744659781456,
"step": 1410,
"valid_targets_mean": 6906.0,
"valid_targets_min": 2789
},
{
"epoch": 5.2407407407407405,
"grad_norm": 0.5593021034033383,
"learning_rate": 7.243552728985879e-06,
"loss": 0.2182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11911047995090485,
"step": 1415,
"valid_targets_mean": 6434.8,
"valid_targets_min": 3960
},
{
"epoch": 5.2592592592592595,
"grad_norm": 0.49741040737686926,
"learning_rate": 7.1018527566417535e-06,
"loss": 0.2255,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.093578040599823,
"step": 1420,
"valid_targets_mean": 5445.6,
"valid_targets_min": 347
},
{
"epoch": 5.277777777777778,
"grad_norm": 0.50446609238118,
"learning_rate": 6.961252689790836e-06,
"loss": 0.2298,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12511374056339264,
"step": 1425,
"valid_targets_mean": 7610.8,
"valid_targets_min": 3551
},
{
"epoch": 5.296296296296296,
"grad_norm": 0.5170007137165736,
"learning_rate": 6.821764518278109e-06,
"loss": 0.211,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10103592276573181,
"step": 1430,
"valid_targets_mean": 5218.6,
"valid_targets_min": 402
},
{
"epoch": 5.314814814814815,
"grad_norm": 0.49515563805483387,
"learning_rate": 6.6834001371302874e-06,
"loss": 0.2123,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11039568483829498,
"step": 1435,
"valid_targets_mean": 7072.1,
"valid_targets_min": 4250
},
{
"epoch": 5.333333333333333,
"grad_norm": 0.45720233551933753,
"learning_rate": 6.546171345541474e-06,
"loss": 0.2261,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10742579400539398,
"step": 1440,
"valid_targets_mean": 7819.8,
"valid_targets_min": 5542
},
{
"epoch": 5.351851851851852,
"grad_norm": 0.4933421472302054,
"learning_rate": 6.410089845866969e-06,
"loss": 0.2222,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09236937761306763,
"step": 1445,
"valid_targets_mean": 7047.8,
"valid_targets_min": 5584
},
{
"epoch": 5.37037037037037,
"grad_norm": 0.46977639240502755,
"learning_rate": 6.275167242625331e-06,
"loss": 0.2178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12316617369651794,
"step": 1450,
"valid_targets_mean": 8225.5,
"valid_targets_min": 6143
},
{
"epoch": 5.388888888888889,
"grad_norm": 0.4919487785546802,
"learning_rate": 6.141415041508774e-06,
"loss": 0.2162,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10855259001255035,
"step": 1455,
"valid_targets_mean": 6035.5,
"valid_targets_min": 387
},
{
"epoch": 5.407407407407407,
"grad_norm": 0.48842724784791386,
"learning_rate": 6.008844648402037e-06,
"loss": 0.227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1209472045302391,
"step": 1460,
"valid_targets_mean": 7274.0,
"valid_targets_min": 335
},
{
"epoch": 5.425925925925926,
"grad_norm": 0.5168122874383904,
"learning_rate": 5.877467368409711e-06,
"loss": 0.2231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10533533990383148,
"step": 1465,
"valid_targets_mean": 6178.0,
"valid_targets_min": 1402
},
{
"epoch": 5.444444444444445,
"grad_norm": 0.4938494111570672,
"learning_rate": 5.74729440489219e-06,
"loss": 0.2105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10273820906877518,
"step": 1470,
"valid_targets_mean": 7238.5,
"valid_targets_min": 4875
},
{
"epoch": 5.462962962962963,
"grad_norm": 0.4908203224021245,
"learning_rate": 5.61833685851028e-06,
"loss": 0.2124,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1013522744178772,
"step": 1475,
"valid_targets_mean": 6711.8,
"valid_targets_min": 5280
},
{
"epoch": 5.481481481481482,
"grad_norm": 0.6323153682269085,
"learning_rate": 5.490605726278602e-06,
"loss": 0.2193,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09872453659772873,
"step": 1480,
"valid_targets_mean": 6050.2,
"valid_targets_min": 1873
},
{
"epoch": 5.5,
"grad_norm": 0.5059930160839521,
"learning_rate": 5.364111900627759e-06,
"loss": 0.2129,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10614493489265442,
"step": 1485,
"valid_targets_mean": 6757.8,
"valid_targets_min": 3389
},
{
"epoch": 5.518518518518518,
"grad_norm": 0.7298032265200861,
"learning_rate": 5.238866168475532e-06,
"loss": 0.2229,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09671928733587265,
"step": 1490,
"valid_targets_mean": 6525.5,
"valid_targets_min": 1156
},
{
"epoch": 5.537037037037037,
"grad_norm": 0.46965643801489465,
"learning_rate": 5.114879210306967e-06,
"loss": 0.2184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09873900562524796,
"step": 1495,
"valid_targets_mean": 6918.2,
"valid_targets_min": 4153
},
{
"epoch": 5.555555555555555,
"grad_norm": 0.5036641297823798,
"learning_rate": 4.9921615992636004e-06,
"loss": 0.2197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10597766935825348,
"step": 1500,
"valid_targets_mean": 6490.0,
"valid_targets_min": 456
},
{
"epoch": 5.574074074074074,
"grad_norm": 0.5055842380658947,
"learning_rate": 4.870723800241832e-06,
"loss": 0.2106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08831615746021271,
"step": 1505,
"valid_targets_mean": 5106.0,
"valid_targets_min": 245
},
{
"epoch": 5.592592592592593,
"grad_norm": 0.4574856423862591,
"learning_rate": 4.750576169000476e-06,
"loss": 0.224,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12107911705970764,
"step": 1510,
"valid_targets_mean": 8312.5,
"valid_targets_min": 7302
},
{
"epoch": 5.611111111111111,
"grad_norm": 0.5526024879531509,
"learning_rate": 4.631728951277716e-06,
"loss": 0.217,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1274055689573288,
"step": 1515,
"valid_targets_mean": 6396.0,
"valid_targets_min": 329
},
{
"epoch": 5.62962962962963,
"grad_norm": 0.493838956685955,
"learning_rate": 4.514192281917351e-06,
"loss": 0.2343,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12989237904548645,
"step": 1520,
"valid_targets_mean": 6073.9,
"valid_targets_min": 431
},
{
"epoch": 5.648148148148148,
"grad_norm": 0.4867215016980891,
"learning_rate": 4.397976184004553e-06,
"loss": 0.2122,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0971713662147522,
"step": 1525,
"valid_targets_mean": 6264.5,
"valid_targets_min": 390
},
{
"epoch": 5.666666666666667,
"grad_norm": 0.5159449102090277,
"learning_rate": 4.283090568011106e-06,
"loss": 0.215,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12054109573364258,
"step": 1530,
"valid_targets_mean": 7413.1,
"valid_targets_min": 5734
},
{
"epoch": 5.685185185185185,
"grad_norm": 0.4889188303592639,
"learning_rate": 4.169545230950321e-06,
"loss": 0.2116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1139330267906189,
"step": 1535,
"valid_targets_mean": 6801.9,
"valid_targets_min": 3473
},
{
"epoch": 5.703703703703704,
"grad_norm": 0.4840565682120755,
"learning_rate": 4.057349855541557e-06,
"loss": 0.2261,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10931402444839478,
"step": 1540,
"valid_targets_mean": 7520.5,
"valid_targets_min": 4113
},
{
"epoch": 5.722222222222222,
"grad_norm": 0.488027844595327,
"learning_rate": 3.9465140093845035e-06,
"loss": 0.2246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1238413006067276,
"step": 1545,
"valid_targets_mean": 7499.6,
"valid_targets_min": 4714
},
{
"epoch": 5.7407407407407405,
"grad_norm": 0.5478734177317678,
"learning_rate": 3.837047144143331e-06,
"loss": 0.2321,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12330850958824158,
"step": 1550,
"valid_targets_mean": 5695.8,
"valid_targets_min": 2294
},
{
"epoch": 5.7592592592592595,
"grad_norm": 0.4783212632587014,
"learning_rate": 3.7289585947406504e-06,
"loss": 0.2175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09635508060455322,
"step": 1555,
"valid_targets_mean": 6844.5,
"valid_targets_min": 4746
},
{
"epoch": 5.777777777777778,
"grad_norm": 0.4569367430559496,
"learning_rate": 3.6222575785614898e-06,
"loss": 0.2117,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11338578909635544,
"step": 1560,
"valid_targets_mean": 7313.2,
"valid_targets_min": 5594
},
{
"epoch": 5.796296296296296,
"grad_norm": 0.4652991348453085,
"learning_rate": 3.5169531946672563e-06,
"loss": 0.2301,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12462925910949707,
"step": 1565,
"valid_targets_mean": 8310.9,
"valid_targets_min": 2015
},
{
"epoch": 5.814814814814815,
"grad_norm": 0.45290586655386583,
"learning_rate": 3.413054423019815e-06,
"loss": 0.2188,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1050763726234436,
"step": 1570,
"valid_targets_mean": 7246.6,
"valid_targets_min": 477
},
{
"epoch": 5.833333333333333,
"grad_norm": 0.46941892907078986,
"learning_rate": 3.3105701237156885e-06,
"loss": 0.2247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09569090604782104,
"step": 1575,
"valid_targets_mean": 6234.6,
"valid_targets_min": 3204
},
{
"epoch": 5.851851851851852,
"grad_norm": 0.48656566073537605,
"learning_rate": 3.2095090362305316e-06,
"loss": 0.2267,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12894827127456665,
"step": 1580,
"valid_targets_mean": 7983.6,
"valid_targets_min": 6576
},
{
"epoch": 5.87037037037037,
"grad_norm": 0.5659066142032889,
"learning_rate": 3.1098797786738433e-06,
"loss": 0.2255,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12359024584293365,
"step": 1585,
"valid_targets_mean": 6504.0,
"valid_targets_min": 3646
},
{
"epoch": 5.888888888888889,
"grad_norm": 0.4995416346883199,
"learning_rate": 3.011690847054054e-06,
"loss": 0.2197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1010461077094078,
"step": 1590,
"valid_targets_mean": 6379.1,
"valid_targets_min": 2857
},
{
"epoch": 5.907407407407407,
"grad_norm": 0.5306353785153805,
"learning_rate": 2.9149506145540064e-06,
"loss": 0.2209,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09737998247146606,
"step": 1595,
"valid_targets_mean": 6443.9,
"valid_targets_min": 310
},
{
"epoch": 5.925925925925926,
"grad_norm": 0.4781652098007515,
"learning_rate": 2.819667330816942e-06,
"loss": 0.2114,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10525282472372055,
"step": 1600,
"valid_targets_mean": 7038.8,
"valid_targets_min": 2065
},
{
"epoch": 5.944444444444445,
"grad_norm": 0.5099409359460987,
"learning_rate": 2.725849121242976e-06,
"loss": 0.2117,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09639444947242737,
"step": 1605,
"valid_targets_mean": 7068.9,
"valid_targets_min": 2646
},
{
"epoch": 5.962962962962963,
"grad_norm": 0.482201179503476,
"learning_rate": 2.633503986296215e-06,
"loss": 0.2132,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09317293018102646,
"step": 1610,
"valid_targets_mean": 6141.4,
"valid_targets_min": 3965
},
{
"epoch": 5.981481481481482,
"grad_norm": 0.524434990755225,
"learning_rate": 2.5426398008225084e-06,
"loss": 0.214,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07734841853380203,
"step": 1615,
"valid_targets_mean": 5183.4,
"valid_targets_min": 535
},
{
"epoch": 6.0,
"grad_norm": 0.5477430720593653,
"learning_rate": 2.4532643133778922e-06,
"loss": 0.2186,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09451612830162048,
"step": 1620,
"valid_targets_mean": 6252.8,
"valid_targets_min": 409
},
{
"epoch": 6.018518518518518,
"grad_norm": 0.5122116198318503,
"learning_rate": 2.36538514556784e-06,
"loss": 0.2131,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12386941909790039,
"step": 1625,
"valid_targets_mean": 7532.4,
"valid_targets_min": 5944
},
{
"epoch": 6.037037037037037,
"grad_norm": 0.48210109707959664,
"learning_rate": 2.2790097913973154e-06,
"loss": 0.2141,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09711402654647827,
"step": 1630,
"valid_targets_mean": 6310.2,
"valid_targets_min": 3336
},
{
"epoch": 6.055555555555555,
"grad_norm": 0.5222607009794501,
"learning_rate": 2.1941456166316953e-06,
"loss": 0.221,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11883814632892609,
"step": 1635,
"valid_targets_mean": 6612.6,
"valid_targets_min": 460
},
{
"epoch": 6.074074074074074,
"grad_norm": 0.5229353313952704,
"learning_rate": 2.1107998581686793e-06,
"loss": 0.2128,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10010014474391937,
"step": 1640,
"valid_targets_mean": 6095.0,
"valid_targets_min": 446
},
{
"epoch": 6.092592592592593,
"grad_norm": 0.5109976673243143,
"learning_rate": 2.0289796234211235e-06,
"loss": 0.2226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10803158581256866,
"step": 1645,
"valid_targets_mean": 6716.4,
"valid_targets_min": 400
},
{
"epoch": 6.111111111111111,
"grad_norm": 0.4762813898465876,
"learning_rate": 1.9486918897109607e-06,
"loss": 0.2142,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1186881810426712,
"step": 1650,
"valid_targets_mean": 7162.5,
"valid_targets_min": 321
},
{
"epoch": 6.12962962962963,
"grad_norm": 0.5468221852953015,
"learning_rate": 1.8699435036741987e-06,
"loss": 0.2134,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11599022150039673,
"step": 1655,
"valid_targets_mean": 5360.0,
"valid_targets_min": 437
},
{
"epoch": 6.148148148148148,
"grad_norm": 0.4613166221297042,
"learning_rate": 1.792741180677069e-06,
"loss": 0.2116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11193803697824478,
"step": 1660,
"valid_targets_mean": 7607.8,
"valid_targets_min": 3046
},
{
"epoch": 6.166666666666667,
"grad_norm": 0.4992299365923156,
"learning_rate": 1.7170915042433468e-06,
"loss": 0.2112,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10833920538425446,
"step": 1665,
"valid_targets_mean": 6474.9,
"valid_targets_min": 2007
},
{
"epoch": 6.185185185185185,
"grad_norm": 0.48734826804572084,
"learning_rate": 1.643000925492959e-06,
"loss": 0.2167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10757909715175629,
"step": 1670,
"valid_targets_mean": 7296.0,
"valid_targets_min": 4685
},
{
"epoch": 6.203703703703703,
"grad_norm": 0.5038576702929699,
"learning_rate": 1.5704757625918454e-06,
"loss": 0.2198,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0956081971526146,
"step": 1675,
"valid_targets_mean": 6562.6,
"valid_targets_min": 530
},
{
"epoch": 6.222222222222222,
"grad_norm": 0.5014972764981547,
"learning_rate": 1.499522200213166e-06,
"loss": 0.2139,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13388274610042572,
"step": 1680,
"valid_targets_mean": 7633.9,
"valid_targets_min": 2642
},
{
"epoch": 6.2407407407407405,
"grad_norm": 0.5727109885681817,
"learning_rate": 1.4301462890099016e-06,
"loss": 0.2188,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1071832925081253,
"step": 1685,
"valid_targets_mean": 6702.2,
"valid_targets_min": 2613
},
{
"epoch": 6.2592592592592595,
"grad_norm": 0.526681748443717,
"learning_rate": 1.362353945098862e-06,
"loss": 0.2289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.149953693151474,
"step": 1690,
"valid_targets_mean": 7246.5,
"valid_targets_min": 2595
},
{
"epoch": 6.277777777777778,
"grad_norm": 0.4820760829827286,
"learning_rate": 1.2961509495562074e-06,
"loss": 0.2115,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09220828860998154,
"step": 1695,
"valid_targets_mean": 6987.0,
"valid_targets_min": 4035
},
{
"epoch": 6.296296296296296,
"grad_norm": 0.5236479137171796,
"learning_rate": 1.2315429479244378e-06,
"loss": 0.2169,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11444682627916336,
"step": 1700,
"valid_targets_mean": 6010.9,
"valid_targets_min": 385
},
{
"epoch": 6.314814814814815,
"grad_norm": 0.5245034887848089,
"learning_rate": 1.1685354497309764e-06,
"loss": 0.2141,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12459330260753632,
"step": 1705,
"valid_targets_mean": 6315.4,
"valid_targets_min": 4419
},
{
"epoch": 6.333333333333333,
"grad_norm": 1.9145227209817528,
"learning_rate": 1.107133828018323e-06,
"loss": 0.2228,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09349900484085083,
"step": 1710,
"valid_targets_mean": 6550.8,
"valid_targets_min": 4214
},
{
"epoch": 6.351851851851852,
"grad_norm": 0.5373455723958673,
"learning_rate": 1.0473433188858784e-06,
"loss": 0.2142,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08351831883192062,
"step": 1715,
"valid_targets_mean": 5286.6,
"valid_targets_min": 2838
},
{
"epoch": 6.37037037037037,
"grad_norm": 0.5118701946064793,
"learning_rate": 9.891690210434235e-07,
"loss": 0.226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08807747066020966,
"step": 1720,
"valid_targets_mean": 5508.8,
"valid_targets_min": 3072
},
{
"epoch": 6.388888888888889,
"grad_norm": 0.48206381384965274,
"learning_rate": 9.326158953763009e-07,
"loss": 0.2017,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09705394506454468,
"step": 1725,
"valid_targets_mean": 6747.9,
"valid_targets_min": 5018
},
{
"epoch": 6.407407407407407,
"grad_norm": 0.5459872813528737,
"learning_rate": 8.776887645224086e-07,
"loss": 0.2075,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1001187264919281,
"step": 1730,
"valid_targets_mean": 7545.6,
"valid_targets_min": 5814
},
{
"epoch": 6.425925925925926,
"grad_norm": 0.5445943239279856,
"learning_rate": 8.243923124609066e-07,
"loss": 0.2161,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09796342998743057,
"step": 1735,
"valid_targets_mean": 7020.4,
"valid_targets_min": 3779
},
{
"epoch": 6.444444444444445,
"grad_norm": 0.4684005826009441,
"learning_rate": 7.727310841128055e-07,
"loss": 0.2325,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14073237776756287,
"step": 1740,
"valid_targets_mean": 8810.2,
"valid_targets_min": 1462
},
{
"epoch": 6.462962962962963,
"grad_norm": 0.4541060098469679,
"learning_rate": 7.227094849533878e-07,
"loss": 0.2124,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09816377609968185,
"step": 1745,
"valid_targets_mean": 7020.2,
"valid_targets_min": 2597
},
{
"epoch": 6.481481481481482,
"grad_norm": 0.4673983795871858,
"learning_rate": 6.743317806365213e-07,
"loss": 0.2198,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10708339512348175,
"step": 1750,
"valid_targets_mean": 7477.1,
"valid_targets_min": 4900
},
{
"epoch": 6.5,
"grad_norm": 0.6105942569643106,
"learning_rate": 6.276020966309059e-07,
"loss": 0.199,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09840669482946396,
"step": 1755,
"valid_targets_mean": 6394.8,
"valid_targets_min": 3962
},
{
"epoch": 6.518518518518518,
"grad_norm": 0.5261917751262789,
"learning_rate": 5.825244178682621e-07,
"loss": 0.2089,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12467174232006073,
"step": 1760,
"valid_targets_mean": 7320.4,
"valid_targets_min": 4238
},
{
"epoch": 6.537037037037037,
"grad_norm": 0.5040103215289163,
"learning_rate": 5.391025884035239e-07,
"loss": 0.2128,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1107766330242157,
"step": 1765,
"valid_targets_mean": 6337.5,
"valid_targets_min": 329
},
{
"epoch": 6.555555555555555,
"grad_norm": 0.4850335062667886,
"learning_rate": 4.973403110870178e-07,
"loss": 0.2167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0921555906534195,
"step": 1770,
"valid_targets_mean": 6312.9,
"valid_targets_min": 5236
},
{
"epoch": 6.574074074074074,
"grad_norm": 0.5151266321529011,
"learning_rate": 4.5724114724870593e-07,
"loss": 0.2176,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12505650520324707,
"step": 1775,
"valid_targets_mean": 7164.6,
"valid_targets_min": 5352
},
{
"epoch": 6.592592592592593,
"grad_norm": 0.5287100835670476,
"learning_rate": 4.188085163944866e-07,
"loss": 0.2036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09333769977092743,
"step": 1780,
"valid_targets_mean": 6233.1,
"valid_targets_min": 471
},
{
"epoch": 6.611111111111111,
"grad_norm": 0.5184514712077551,
"learning_rate": 3.820456959145924e-07,
"loss": 0.2138,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10321502387523651,
"step": 1785,
"valid_targets_mean": 6844.4,
"valid_targets_min": 4841
},
{
"epoch": 6.62962962962963,
"grad_norm": 0.7780741153859662,
"learning_rate": 3.4695582080410686e-07,
"loss": 0.2173,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1244448572397232,
"step": 1790,
"valid_targets_mean": 7082.1,
"valid_targets_min": 4348
},
{
"epoch": 6.648148148148148,
"grad_norm": 0.5680521512792298,
"learning_rate": 3.1354188339562277e-07,
"loss": 0.2123,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11791664361953735,
"step": 1795,
"valid_targets_mean": 6692.6,
"valid_targets_min": 4308
},
{
"epoch": 6.666666666666667,
"grad_norm": 0.5333100590033606,
"learning_rate": 2.818067331040708e-07,
"loss": 0.211,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10548874735832214,
"step": 1800,
"valid_targets_mean": 6284.0,
"valid_targets_min": 4426
},
{
"epoch": 6.685185185185185,
"grad_norm": 0.4756549745078081,
"learning_rate": 2.517530761837228e-07,
"loss": 0.212,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10474137216806412,
"step": 1805,
"valid_targets_mean": 7218.1,
"valid_targets_min": 474
},
{
"epoch": 6.703703703703704,
"grad_norm": 0.6122065278621339,
"learning_rate": 2.2338347549742956e-07,
"loss": 0.2125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12869982421398163,
"step": 1810,
"valid_targets_mean": 5424.0,
"valid_targets_min": 301
},
{
"epoch": 6.722222222222222,
"grad_norm": 0.47686114593760576,
"learning_rate": 1.9670035029804912e-07,
"loss": 0.2253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1062430888414383,
"step": 1815,
"valid_targets_mean": 7357.9,
"valid_targets_min": 5245
},
{
"epoch": 6.7407407407407405,
"grad_norm": 0.4663069570426537,
"learning_rate": 1.7170597602215622e-07,
"loss": 0.2105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10924944281578064,
"step": 1820,
"valid_targets_mean": 7345.1,
"valid_targets_min": 417
},
{
"epoch": 6.7592592592592595,
"grad_norm": 0.4863214142184415,
"learning_rate": 1.4840248409599966e-07,
"loss": 0.2166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11560134589672089,
"step": 1825,
"valid_targets_mean": 6813.0,
"valid_targets_min": 336
},
{
"epoch": 6.777777777777778,
"grad_norm": 0.5144234551395174,
"learning_rate": 1.2679186175373448e-07,
"loss": 0.2205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10990774631500244,
"step": 1830,
"valid_targets_mean": 7161.0,
"valid_targets_min": 4072
},
{
"epoch": 6.796296296296296,
"grad_norm": 0.5092042732657823,
"learning_rate": 1.0687595186797073e-07,
"loss": 0.2187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11906181275844574,
"step": 1835,
"valid_targets_mean": 7092.1,
"valid_targets_min": 2798
},
{
"epoch": 6.814814814814815,
"grad_norm": 0.4879235110558119,
"learning_rate": 8.865645279260815e-08,
"loss": 0.2191,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11363916099071503,
"step": 1840,
"valid_targets_mean": 6146.9,
"valid_targets_min": 597
},
{
"epoch": 6.833333333333333,
"grad_norm": 0.47702518244695324,
"learning_rate": 7.213491821800977e-08,
"loss": 0.2081,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10680747032165527,
"step": 1845,
"valid_targets_mean": 7832.5,
"valid_targets_min": 6237
},
{
"epoch": 6.851851851851852,
"grad_norm": 0.5034576503374186,
"learning_rate": 5.731275703851902e-08,
"loss": 0.2148,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08941599726676941,
"step": 1850,
"valid_targets_mean": 6900.6,
"valid_targets_min": 2557
},
{
"epoch": 6.87037037037037,
"grad_norm": 0.5186476251514881,
"learning_rate": 4.4191233232300235e-08,
"loss": 0.206,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1075291633605957,
"step": 1855,
"valid_targets_mean": 6265.1,
"valid_targets_min": 457
},
{
"epoch": 6.888888888888889,
"grad_norm": 0.48818375200273456,
"learning_rate": 3.2771465753560495e-08,
"loss": 0.2164,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11990895122289658,
"step": 1860,
"valid_targets_mean": 6609.4,
"valid_targets_min": 1476
},
{
"epoch": 6.907407407407407,
"grad_norm": 0.4514003867926065,
"learning_rate": 2.3054428437125907e-08,
"loss": 0.2106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0958041399717331,
"step": 1865,
"valid_targets_mean": 7317.1,
"valid_targets_min": 1873
},
{
"epoch": 6.925925925925926,
"grad_norm": 0.4750915394409351,
"learning_rate": 1.5040949915399173e-08,
"loss": 0.2159,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11690913140773773,
"step": 1870,
"valid_targets_mean": 7307.8,
"valid_targets_min": 4736
},
{
"epoch": 6.944444444444445,
"grad_norm": 0.4406540020628789,
"learning_rate": 8.731713547689424e-09,
"loss": 0.2157,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09516197443008423,
"step": 1875,
"valid_targets_mean": 8098.8,
"valid_targets_min": 5210
},
{
"epoch": 6.962962962962963,
"grad_norm": 0.4595438788046572,
"learning_rate": 4.127257361954406e-09,
"loss": 0.2136,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08866438269615173,
"step": 1880,
"valid_targets_mean": 6785.6,
"valid_targets_min": 274
},
{
"epoch": 6.981481481481482,
"grad_norm": 0.4913637179887596,
"learning_rate": 1.2279740088971814e-09,
"loss": 0.2234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13494595885276794,
"step": 1885,
"valid_targets_mean": 7588.5,
"valid_targets_min": 5906
},
{
"epoch": 7.0,
"grad_norm": 0.5014822111690687,
"learning_rate": 3.411072850179054e-11,
"loss": 0.22,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10025124996900558,
"step": 1890,
"valid_targets_mean": 5791.9,
"valid_targets_min": 453
},
{
"epoch": 7.0,
"step": 1890,
"total_flos": 1.153060328150401e+18,
"train_loss": 0.0,
"train_runtime": 0.761,
"train_samples_per_second": 39699.155,
"train_steps_per_second": 2483.497
}
],
"logging_steps": 5,
"max_steps": 1890,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 1500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.153060328150401e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}