9728 lines
270 KiB
JSON
9728 lines
270 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4403,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00794912559618442,
|
|
"grad_norm": 17.84746630996863,
|
|
"learning_rate": 3.6281179138322e-07,
|
|
"loss": 0.6173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6814424991607666,
|
|
"step": 5,
|
|
"valid_targets_mean": 4120.6,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 0.01589825119236884,
|
|
"grad_norm": 17.156517918811076,
|
|
"learning_rate": 8.163265306122449e-07,
|
|
"loss": 0.6758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6257226467132568,
|
|
"step": 10,
|
|
"valid_targets_mean": 3674.1,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 0.02384737678855326,
|
|
"grad_norm": 18.01419791342929,
|
|
"learning_rate": 1.26984126984127e-06,
|
|
"loss": 0.7098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7265408635139465,
|
|
"step": 15,
|
|
"valid_targets_mean": 3347.9,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 0.03179650238473768,
|
|
"grad_norm": 11.167185739612897,
|
|
"learning_rate": 1.723356009070295e-06,
|
|
"loss": 0.6202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5243589282035828,
|
|
"step": 20,
|
|
"valid_targets_mean": 4327.7,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 0.0397456279809221,
|
|
"grad_norm": 8.22954319900581,
|
|
"learning_rate": 2.17687074829932e-06,
|
|
"loss": 0.5494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5256298780441284,
|
|
"step": 25,
|
|
"valid_targets_mean": 3764.8,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 0.04769475357710652,
|
|
"grad_norm": 5.286623836409655,
|
|
"learning_rate": 2.6303854875283447e-06,
|
|
"loss": 0.5207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5739851593971252,
|
|
"step": 30,
|
|
"valid_targets_mean": 3315.1,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 0.05564387917329094,
|
|
"grad_norm": 3.091625342947559,
|
|
"learning_rate": 3.08390022675737e-06,
|
|
"loss": 0.4249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46355006098747253,
|
|
"step": 35,
|
|
"valid_targets_mean": 3561.8,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 0.06359300476947535,
|
|
"grad_norm": 1.6120343190128226,
|
|
"learning_rate": 3.537414965986395e-06,
|
|
"loss": 0.4443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.429388165473938,
|
|
"step": 40,
|
|
"valid_targets_mean": 4614.9,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 0.07154213036565978,
|
|
"grad_norm": 1.1722091804408992,
|
|
"learning_rate": 3.99092970521542e-06,
|
|
"loss": 0.3702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38033077120780945,
|
|
"step": 45,
|
|
"valid_targets_mean": 4552.9,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 0.0794912559618442,
|
|
"grad_norm": 1.2237202548885548,
|
|
"learning_rate": 4.444444444444444e-06,
|
|
"loss": 0.4028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39322787523269653,
|
|
"step": 50,
|
|
"valid_targets_mean": 2935.4,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 0.08744038155802862,
|
|
"grad_norm": 0.9959193308010645,
|
|
"learning_rate": 4.897959183673469e-06,
|
|
"loss": 0.3929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3938031792640686,
|
|
"step": 55,
|
|
"valid_targets_mean": 3294.7,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 0.09538950715421304,
|
|
"grad_norm": 0.7829705793426464,
|
|
"learning_rate": 5.3514739229024945e-06,
|
|
"loss": 0.3598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37601467967033386,
|
|
"step": 60,
|
|
"valid_targets_mean": 4138.0,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 0.10333863275039745,
|
|
"grad_norm": 0.7891875286042732,
|
|
"learning_rate": 5.80498866213152e-06,
|
|
"loss": 0.3652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31892889738082886,
|
|
"step": 65,
|
|
"valid_targets_mean": 3174.8,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 0.11128775834658187,
|
|
"grad_norm": 0.5962960338010875,
|
|
"learning_rate": 6.258503401360545e-06,
|
|
"loss": 0.336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2778470516204834,
|
|
"step": 70,
|
|
"valid_targets_mean": 4142.1,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 0.1192368839427663,
|
|
"grad_norm": 0.6600711718977473,
|
|
"learning_rate": 6.71201814058957e-06,
|
|
"loss": 0.3393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32065966725349426,
|
|
"step": 75,
|
|
"valid_targets_mean": 3403.6,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 0.1271860095389507,
|
|
"grad_norm": 0.5724896809709736,
|
|
"learning_rate": 7.165532879818595e-06,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3127829134464264,
|
|
"step": 80,
|
|
"valid_targets_mean": 3913.0,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 0.13513513513513514,
|
|
"grad_norm": 0.6797026306142546,
|
|
"learning_rate": 7.61904761904762e-06,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32160434126853943,
|
|
"step": 85,
|
|
"valid_targets_mean": 3563.1,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 0.14308426073131955,
|
|
"grad_norm": 0.7278883756953868,
|
|
"learning_rate": 8.072562358276645e-06,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3462420701980591,
|
|
"step": 90,
|
|
"valid_targets_mean": 2732.4,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 0.151033386327504,
|
|
"grad_norm": 0.5708083904356873,
|
|
"learning_rate": 8.52607709750567e-06,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3244127333164215,
|
|
"step": 95,
|
|
"valid_targets_mean": 4137.4,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 0.1589825119236884,
|
|
"grad_norm": 0.6276207848939579,
|
|
"learning_rate": 8.979591836734695e-06,
|
|
"loss": 0.3111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26650288701057434,
|
|
"step": 100,
|
|
"valid_targets_mean": 3918.8,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 0.1669316375198728,
|
|
"grad_norm": 0.5159242585947521,
|
|
"learning_rate": 9.43310657596372e-06,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2642788887023926,
|
|
"step": 105,
|
|
"valid_targets_mean": 4179.4,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 0.17488076311605724,
|
|
"grad_norm": 0.507714398672033,
|
|
"learning_rate": 9.886621315192746e-06,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23547717928886414,
|
|
"step": 110,
|
|
"valid_targets_mean": 3923.2,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 0.18282988871224165,
|
|
"grad_norm": 0.5564144664645884,
|
|
"learning_rate": 1.034013605442177e-05,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28326016664505005,
|
|
"step": 115,
|
|
"valid_targets_mean": 3575.0,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 0.1907790143084261,
|
|
"grad_norm": 0.5695684791526006,
|
|
"learning_rate": 1.0793650793650794e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26167941093444824,
|
|
"step": 120,
|
|
"valid_targets_mean": 3303.6,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 0.1987281399046105,
|
|
"grad_norm": 0.5237201400320065,
|
|
"learning_rate": 1.124716553287982e-05,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28204721212387085,
|
|
"step": 125,
|
|
"valid_targets_mean": 4681.8,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 0.2066772655007949,
|
|
"grad_norm": 0.49001217913037826,
|
|
"learning_rate": 1.1700680272108845e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25121694803237915,
|
|
"step": 130,
|
|
"valid_targets_mean": 4638.6,
|
|
"valid_targets_min": 1821
|
|
},
|
|
{
|
|
"epoch": 0.21462639109697934,
|
|
"grad_norm": 0.5448380739456933,
|
|
"learning_rate": 1.215419501133787e-05,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24607300758361816,
|
|
"step": 135,
|
|
"valid_targets_mean": 3504.9,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 0.22257551669316375,
|
|
"grad_norm": 0.5687526210815153,
|
|
"learning_rate": 1.2607709750566895e-05,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677372694015503,
|
|
"step": 140,
|
|
"valid_targets_mean": 3854.4,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 0.23052464228934816,
|
|
"grad_norm": 0.6758185299669368,
|
|
"learning_rate": 1.3061224489795918e-05,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31545472145080566,
|
|
"step": 145,
|
|
"valid_targets_mean": 3157.5,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 0.2384737678855326,
|
|
"grad_norm": 0.5061882696731755,
|
|
"learning_rate": 1.3514739229024945e-05,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2856583595275879,
|
|
"step": 150,
|
|
"valid_targets_mean": 4388.9,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 0.246422893481717,
|
|
"grad_norm": 0.5014286356533292,
|
|
"learning_rate": 1.3968253968253968e-05,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2581082582473755,
|
|
"step": 155,
|
|
"valid_targets_mean": 4534.4,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 0.2543720190779014,
|
|
"grad_norm": 0.45068416242985176,
|
|
"learning_rate": 1.4421768707482994e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21287065744400024,
|
|
"step": 160,
|
|
"valid_targets_mean": 4851.8,
|
|
"valid_targets_min": 1754
|
|
},
|
|
{
|
|
"epoch": 0.26232114467408585,
|
|
"grad_norm": 0.47658632064664735,
|
|
"learning_rate": 1.4875283446712018e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22702300548553467,
|
|
"step": 165,
|
|
"valid_targets_mean": 4325.5,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 0.2702702702702703,
|
|
"grad_norm": 0.5277022645179128,
|
|
"learning_rate": 1.5328798185941044e-05,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2435617595911026,
|
|
"step": 170,
|
|
"valid_targets_mean": 3762.1,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 0.27821939586645467,
|
|
"grad_norm": 0.5760228675809232,
|
|
"learning_rate": 1.578231292517007e-05,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2474684715270996,
|
|
"step": 175,
|
|
"valid_targets_mean": 4084.1,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 0.2861685214626391,
|
|
"grad_norm": 0.6439319078197573,
|
|
"learning_rate": 1.6235827664399097e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3096400797367096,
|
|
"step": 180,
|
|
"valid_targets_mean": 2994.4,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 0.29411764705882354,
|
|
"grad_norm": 0.5928535221147084,
|
|
"learning_rate": 1.668934240362812e-05,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24331536889076233,
|
|
"step": 185,
|
|
"valid_targets_mean": 3329.2,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 0.302066772655008,
|
|
"grad_norm": 1.033627870156792,
|
|
"learning_rate": 1.7142857142857142e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2561456561088562,
|
|
"step": 190,
|
|
"valid_targets_mean": 3033.4,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 0.31001589825119236,
|
|
"grad_norm": 0.669117146581437,
|
|
"learning_rate": 1.759637188208617e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26669400930404663,
|
|
"step": 195,
|
|
"valid_targets_mean": 3196.8,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 0.3179650238473768,
|
|
"grad_norm": 0.5158072838218852,
|
|
"learning_rate": 1.8049886621315194e-05,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20886510610580444,
|
|
"step": 200,
|
|
"valid_targets_mean": 3514.2,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 0.32591414944356123,
|
|
"grad_norm": 0.4417940976227962,
|
|
"learning_rate": 1.8503401360544218e-05,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2474132776260376,
|
|
"step": 205,
|
|
"valid_targets_mean": 5143.9,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 0.3338632750397456,
|
|
"grad_norm": 0.5669027262046396,
|
|
"learning_rate": 1.8956916099773243e-05,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2375866174697876,
|
|
"step": 210,
|
|
"valid_targets_mean": 4060.5,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 0.34181240063593005,
|
|
"grad_norm": 0.5058043503905316,
|
|
"learning_rate": 1.941043083900227e-05,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21849530935287476,
|
|
"step": 215,
|
|
"valid_targets_mean": 4479.1,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 0.3497615262321145,
|
|
"grad_norm": 0.7472969855667505,
|
|
"learning_rate": 1.9863945578231295e-05,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22537925839424133,
|
|
"step": 220,
|
|
"valid_targets_mean": 3716.9,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 0.35771065182829886,
|
|
"grad_norm": 0.4763106253774264,
|
|
"learning_rate": 2.031746031746032e-05,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22549283504486084,
|
|
"step": 225,
|
|
"valid_targets_mean": 4696.7,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 0.3656597774244833,
|
|
"grad_norm": 0.5870776260460628,
|
|
"learning_rate": 2.0770975056689343e-05,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2403266578912735,
|
|
"step": 230,
|
|
"valid_targets_mean": 3434.6,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 0.37360890302066774,
|
|
"grad_norm": 0.5224207873951022,
|
|
"learning_rate": 2.122448979591837e-05,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20927098393440247,
|
|
"step": 235,
|
|
"valid_targets_mean": 3761.1,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 0.3815580286168522,
|
|
"grad_norm": 0.5356608057317997,
|
|
"learning_rate": 2.1678004535147395e-05,
|
|
"loss": 0.2234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22495192289352417,
|
|
"step": 240,
|
|
"valid_targets_mean": 3884.0,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 0.38950715421303655,
|
|
"grad_norm": 0.5368634114039467,
|
|
"learning_rate": 2.213151927437642e-05,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.202118381857872,
|
|
"step": 245,
|
|
"valid_targets_mean": 4109.8,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 0.397456279809221,
|
|
"grad_norm": 0.5223166530343325,
|
|
"learning_rate": 2.2585034013605444e-05,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2209833413362503,
|
|
"step": 250,
|
|
"valid_targets_mean": 4501.5,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 0.40540540540540543,
|
|
"grad_norm": 0.6182508332886121,
|
|
"learning_rate": 2.3038548752834472e-05,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25408124923706055,
|
|
"step": 255,
|
|
"valid_targets_mean": 3629.1,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 0.4133545310015898,
|
|
"grad_norm": 0.5912718878361719,
|
|
"learning_rate": 2.3492063492063496e-05,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24555028975009918,
|
|
"step": 260,
|
|
"valid_targets_mean": 3606.7,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 0.42130365659777425,
|
|
"grad_norm": 0.5750322104699228,
|
|
"learning_rate": 2.394557823129252e-05,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21299511194229126,
|
|
"step": 265,
|
|
"valid_targets_mean": 3439.4,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 0.4292527821939587,
|
|
"grad_norm": 0.706039350107576,
|
|
"learning_rate": 2.439909297052154e-05,
|
|
"loss": 0.2283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22689072787761688,
|
|
"step": 270,
|
|
"valid_targets_mean": 2713.6,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 0.43720190779014306,
|
|
"grad_norm": 0.4645382324491832,
|
|
"learning_rate": 2.4852607709750566e-05,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.186517596244812,
|
|
"step": 275,
|
|
"valid_targets_mean": 4288.1,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 0.4451510333863275,
|
|
"grad_norm": 0.5461367695964425,
|
|
"learning_rate": 2.5306122448979597e-05,
|
|
"loss": 0.2365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21831746399402618,
|
|
"step": 280,
|
|
"valid_targets_mean": 3579.9,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 0.45310015898251194,
|
|
"grad_norm": 0.4657927124996843,
|
|
"learning_rate": 2.5759637188208618e-05,
|
|
"loss": 0.2041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1893748790025711,
|
|
"step": 285,
|
|
"valid_targets_mean": 4740.9,
|
|
"valid_targets_min": 1740
|
|
},
|
|
{
|
|
"epoch": 0.4610492845786963,
|
|
"grad_norm": 0.6348763363997716,
|
|
"learning_rate": 2.6213151927437642e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19953399896621704,
|
|
"step": 290,
|
|
"valid_targets_mean": 2926.6,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 0.46899841017488075,
|
|
"grad_norm": 0.625040649676204,
|
|
"learning_rate": 2.6666666666666667e-05,
|
|
"loss": 0.2139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24280428886413574,
|
|
"step": 295,
|
|
"valid_targets_mean": 3343.4,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 0.4769475357710652,
|
|
"grad_norm": 0.6020091340791197,
|
|
"learning_rate": 2.7120181405895694e-05,
|
|
"loss": 0.2236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2328951060771942,
|
|
"step": 300,
|
|
"valid_targets_mean": 3270.6,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 0.4848966613672496,
|
|
"grad_norm": 0.5774743472055955,
|
|
"learning_rate": 2.757369614512472e-05,
|
|
"loss": 0.2074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18854255974292755,
|
|
"step": 305,
|
|
"valid_targets_mean": 3489.1,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 0.492845786963434,
|
|
"grad_norm": 0.5002918848960409,
|
|
"learning_rate": 2.8027210884353743e-05,
|
|
"loss": 0.1993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20658744871616364,
|
|
"step": 310,
|
|
"valid_targets_mean": 4087.5,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 0.5007949125596184,
|
|
"grad_norm": 0.5258071361084128,
|
|
"learning_rate": 2.8480725623582767e-05,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26666340231895447,
|
|
"step": 315,
|
|
"valid_targets_mean": 4920.9,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 0.5087440381558028,
|
|
"grad_norm": 0.5359657194543509,
|
|
"learning_rate": 2.893424036281179e-05,
|
|
"loss": 0.2231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2450794130563736,
|
|
"step": 320,
|
|
"valid_targets_mean": 4277.5,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 0.5166931637519873,
|
|
"grad_norm": 0.47155499713480653,
|
|
"learning_rate": 2.938775510204082e-05,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22731101512908936,
|
|
"step": 325,
|
|
"valid_targets_mean": 4231.8,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 0.5246422893481717,
|
|
"grad_norm": 0.5626140591094698,
|
|
"learning_rate": 2.9841269841269844e-05,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24118876457214355,
|
|
"step": 330,
|
|
"valid_targets_mean": 3969.8,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 0.5325914149443561,
|
|
"grad_norm": 0.4432872807332565,
|
|
"learning_rate": 3.0294784580498868e-05,
|
|
"loss": 0.2225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19566431641578674,
|
|
"step": 335,
|
|
"valid_targets_mean": 4741.9,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 0.5405405405405406,
|
|
"grad_norm": 0.938740381890964,
|
|
"learning_rate": 3.074829931972789e-05,
|
|
"loss": 0.2466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24197694659233093,
|
|
"step": 340,
|
|
"valid_targets_mean": 3894.6,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 0.548489666136725,
|
|
"grad_norm": 0.5053259602931793,
|
|
"learning_rate": 3.1201814058956924e-05,
|
|
"loss": 0.2136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2000350058078766,
|
|
"step": 345,
|
|
"valid_targets_mean": 4115.1,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 0.5564387917329093,
|
|
"grad_norm": 0.5110974452473003,
|
|
"learning_rate": 3.1655328798185945e-05,
|
|
"loss": 0.2078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20815539360046387,
|
|
"step": 350,
|
|
"valid_targets_mean": 4070.4,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 0.5643879173290938,
|
|
"grad_norm": 0.5374530089049312,
|
|
"learning_rate": 3.2108843537414965e-05,
|
|
"loss": 0.2073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19174043834209442,
|
|
"step": 355,
|
|
"valid_targets_mean": 3816.1,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 0.5723370429252782,
|
|
"grad_norm": 0.5278680977737629,
|
|
"learning_rate": 3.256235827664399e-05,
|
|
"loss": 0.216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19281309843063354,
|
|
"step": 360,
|
|
"valid_targets_mean": 4304.8,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 0.5802861685214626,
|
|
"grad_norm": 0.5556493209390555,
|
|
"learning_rate": 3.3015873015873014e-05,
|
|
"loss": 0.2157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20964595675468445,
|
|
"step": 365,
|
|
"valid_targets_mean": 3496.3,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 0.5882352941176471,
|
|
"grad_norm": 0.5099107198036154,
|
|
"learning_rate": 3.346938775510204e-05,
|
|
"loss": 0.2132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1972021907567978,
|
|
"step": 370,
|
|
"valid_targets_mean": 4024.8,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 0.5961844197138315,
|
|
"grad_norm": 0.5259960559393442,
|
|
"learning_rate": 3.392290249433107e-05,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2222093790769577,
|
|
"step": 375,
|
|
"valid_targets_mean": 3776.1,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 0.604133545310016,
|
|
"grad_norm": 0.5644984822926026,
|
|
"learning_rate": 3.437641723356009e-05,
|
|
"loss": 0.2339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1983184814453125,
|
|
"step": 380,
|
|
"valid_targets_mean": 3157.5,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 0.6120826709062003,
|
|
"grad_norm": 0.526845185443891,
|
|
"learning_rate": 3.482993197278912e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21050560474395752,
|
|
"step": 385,
|
|
"valid_targets_mean": 4162.3,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 0.6200317965023847,
|
|
"grad_norm": 0.5111567928477996,
|
|
"learning_rate": 3.5283446712018146e-05,
|
|
"loss": 0.2188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2091737687587738,
|
|
"step": 390,
|
|
"valid_targets_mean": 4373.3,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 0.6279809220985691,
|
|
"grad_norm": 0.4792741938563976,
|
|
"learning_rate": 3.573696145124717e-05,
|
|
"loss": 0.2098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21705269813537598,
|
|
"step": 395,
|
|
"valid_targets_mean": 4804.3,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 0.6359300476947536,
|
|
"grad_norm": 0.5918892454757362,
|
|
"learning_rate": 3.6190476190476195e-05,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2312270998954773,
|
|
"step": 400,
|
|
"valid_targets_mean": 3560.3,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 0.643879173290938,
|
|
"grad_norm": 0.5068901074600425,
|
|
"learning_rate": 3.6643990929705216e-05,
|
|
"loss": 0.2062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1908341348171234,
|
|
"step": 405,
|
|
"valid_targets_mean": 3449.1,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 0.6518282988871225,
|
|
"grad_norm": 0.5793934181213,
|
|
"learning_rate": 3.7097505668934243e-05,
|
|
"loss": 0.2184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1862396001815796,
|
|
"step": 410,
|
|
"valid_targets_mean": 2984.0,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 0.6597774244833068,
|
|
"grad_norm": 0.5690118134642445,
|
|
"learning_rate": 3.755102040816327e-05,
|
|
"loss": 0.2236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23144859075546265,
|
|
"step": 415,
|
|
"valid_targets_mean": 4501.0,
|
|
"valid_targets_min": 1604
|
|
},
|
|
{
|
|
"epoch": 0.6677265500794912,
|
|
"grad_norm": 0.5776584245877077,
|
|
"learning_rate": 3.800453514739229e-05,
|
|
"loss": 0.2179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21334588527679443,
|
|
"step": 420,
|
|
"valid_targets_mean": 3043.1,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 0.6756756756756757,
|
|
"grad_norm": 0.4713962722172494,
|
|
"learning_rate": 3.845804988662132e-05,
|
|
"loss": 0.2073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.193765789270401,
|
|
"step": 425,
|
|
"valid_targets_mean": 4792.8,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 0.6836248012718601,
|
|
"grad_norm": 0.43246949767784015,
|
|
"learning_rate": 3.891156462585034e-05,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1918296366930008,
|
|
"step": 430,
|
|
"valid_targets_mean": 5232.2,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 0.6915739268680445,
|
|
"grad_norm": 0.6617748262724793,
|
|
"learning_rate": 3.936507936507937e-05,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21801866590976715,
|
|
"step": 435,
|
|
"valid_targets_mean": 2876.1,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 0.699523052464229,
|
|
"grad_norm": 0.5970309098523798,
|
|
"learning_rate": 3.9818594104308396e-05,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21752095222473145,
|
|
"step": 440,
|
|
"valid_targets_mean": 3932.8,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 0.7074721780604134,
|
|
"grad_norm": 0.5161262404357317,
|
|
"learning_rate": 3.999994341346418e-05,
|
|
"loss": 0.2103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.221147358417511,
|
|
"step": 445,
|
|
"valid_targets_mean": 3660.2,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 0.7154213036565977,
|
|
"grad_norm": 0.5628100325252552,
|
|
"learning_rate": 3.999959760801596e-05,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2509196400642395,
|
|
"step": 450,
|
|
"valid_targets_mean": 3745.4,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 0.7233704292527822,
|
|
"grad_norm": 0.4798788268427838,
|
|
"learning_rate": 3.999893743951281e-05,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.249254047870636,
|
|
"step": 455,
|
|
"valid_targets_mean": 4542.9,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 0.7313195548489666,
|
|
"grad_norm": 0.5152240231525378,
|
|
"learning_rate": 3.9997962918331554e-05,
|
|
"loss": 0.1942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19418257474899292,
|
|
"step": 460,
|
|
"valid_targets_mean": 3978.0,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 0.739268680445151,
|
|
"grad_norm": 0.5512712084043713,
|
|
"learning_rate": 3.999667405979019e-05,
|
|
"loss": 0.204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20612534880638123,
|
|
"step": 465,
|
|
"valid_targets_mean": 2988.6,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 0.7472178060413355,
|
|
"grad_norm": 0.7851942416001637,
|
|
"learning_rate": 3.9995070884147604e-05,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22632327675819397,
|
|
"step": 470,
|
|
"valid_targets_mean": 3359.4,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 0.7551669316375199,
|
|
"grad_norm": 0.5537518840736152,
|
|
"learning_rate": 3.999315341660325e-05,
|
|
"loss": 0.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21473893523216248,
|
|
"step": 475,
|
|
"valid_targets_mean": 3244.5,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 0.7631160572337043,
|
|
"grad_norm": 0.5088365488324662,
|
|
"learning_rate": 3.9990921687296785e-05,
|
|
"loss": 0.219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21233266592025757,
|
|
"step": 480,
|
|
"valid_targets_mean": 4417.6,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 0.7710651828298887,
|
|
"grad_norm": 0.5591275224242683,
|
|
"learning_rate": 3.998837573130758e-05,
|
|
"loss": 0.2236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23937103152275085,
|
|
"step": 485,
|
|
"valid_targets_mean": 3575.7,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 0.7790143084260731,
|
|
"grad_norm": 0.41500354568791326,
|
|
"learning_rate": 3.9985515588654166e-05,
|
|
"loss": 0.2215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19625945389270782,
|
|
"step": 490,
|
|
"valid_targets_mean": 4809.2,
|
|
"valid_targets_min": 2182
|
|
},
|
|
{
|
|
"epoch": 0.7869634340222575,
|
|
"grad_norm": 0.5197016278046811,
|
|
"learning_rate": 3.99823413042936e-05,
|
|
"loss": 0.1907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16841718554496765,
|
|
"step": 495,
|
|
"valid_targets_mean": 3910.1,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 0.794912559618442,
|
|
"grad_norm": 0.42605527843070085,
|
|
"learning_rate": 3.997885292812078e-05,
|
|
"loss": 0.2098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21589577198028564,
|
|
"step": 500,
|
|
"valid_targets_mean": 5083.4,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 0.8028616852146264,
|
|
"grad_norm": 0.6563615392059224,
|
|
"learning_rate": 3.997505051496764e-05,
|
|
"loss": 0.2244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.269468754529953,
|
|
"step": 505,
|
|
"valid_targets_mean": 3249.7,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 0.8108108108108109,
|
|
"grad_norm": 0.511370927432411,
|
|
"learning_rate": 3.997093412460229e-05,
|
|
"loss": 0.2204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23530468344688416,
|
|
"step": 510,
|
|
"valid_targets_mean": 4039.5,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 0.8187599364069952,
|
|
"grad_norm": 0.46207600265174703,
|
|
"learning_rate": 3.9966503821728074e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18872712552547455,
|
|
"step": 515,
|
|
"valid_targets_mean": 3918.5,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 0.8267090620031796,
|
|
"grad_norm": 0.518466058860564,
|
|
"learning_rate": 3.996175967598258e-05,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.258210152387619,
|
|
"step": 520,
|
|
"valid_targets_mean": 3838.6,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 0.834658187599364,
|
|
"grad_norm": 0.5496770684162091,
|
|
"learning_rate": 3.995670176193651e-05,
|
|
"loss": 0.2042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1955585479736328,
|
|
"step": 525,
|
|
"valid_targets_mean": 3015.4,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 0.8426073131955485,
|
|
"grad_norm": 0.5141119419529661,
|
|
"learning_rate": 3.9951330159092554e-05,
|
|
"loss": 0.2077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1896553933620453,
|
|
"step": 530,
|
|
"valid_targets_mean": 3632.0,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 0.8505564387917329,
|
|
"grad_norm": 0.6019780534491145,
|
|
"learning_rate": 3.994564495188405e-05,
|
|
"loss": 0.2028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19973322749137878,
|
|
"step": 535,
|
|
"valid_targets_mean": 3989.1,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 0.8585055643879174,
|
|
"grad_norm": 0.43954090925747025,
|
|
"learning_rate": 3.9939646229673775e-05,
|
|
"loss": 0.2075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1840040683746338,
|
|
"step": 540,
|
|
"valid_targets_mean": 4522.1,
|
|
"valid_targets_min": 1633
|
|
},
|
|
{
|
|
"epoch": 0.8664546899841018,
|
|
"grad_norm": 0.5977708417760155,
|
|
"learning_rate": 3.993333408675244e-05,
|
|
"loss": 0.1873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20497910678386688,
|
|
"step": 545,
|
|
"valid_targets_mean": 2984.9,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 0.8744038155802861,
|
|
"grad_norm": 0.4757849711588045,
|
|
"learning_rate": 3.9926708622337285e-05,
|
|
"loss": 0.214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24841903150081635,
|
|
"step": 550,
|
|
"valid_targets_mean": 4137.8,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 0.8823529411764706,
|
|
"grad_norm": 0.4822074195433229,
|
|
"learning_rate": 3.991976994057046e-05,
|
|
"loss": 0.1943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1875840723514557,
|
|
"step": 555,
|
|
"valid_targets_mean": 4260.8,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 0.890302066772655,
|
|
"grad_norm": 0.5842684547912087,
|
|
"learning_rate": 3.991251815051741e-05,
|
|
"loss": 0.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19977149367332458,
|
|
"step": 560,
|
|
"valid_targets_mean": 2972.9,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 0.8982511923688394,
|
|
"grad_norm": 0.652464883044752,
|
|
"learning_rate": 3.990495336616519e-05,
|
|
"loss": 0.2,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20372867584228516,
|
|
"step": 565,
|
|
"valid_targets_mean": 3338.4,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 0.9062003179650239,
|
|
"grad_norm": 0.6216148839044923,
|
|
"learning_rate": 3.989707570642062e-05,
|
|
"loss": 0.204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19672566652297974,
|
|
"step": 570,
|
|
"valid_targets_mean": 3842.1,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 0.9141494435612083,
|
|
"grad_norm": 0.5103176135370198,
|
|
"learning_rate": 3.988888529510844e-05,
|
|
"loss": 0.2139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20896919071674347,
|
|
"step": 575,
|
|
"valid_targets_mean": 3890.2,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 0.9220985691573926,
|
|
"grad_norm": 0.5595134395559079,
|
|
"learning_rate": 3.988038226096939e-05,
|
|
"loss": 0.2112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2230646163225174,
|
|
"step": 580,
|
|
"valid_targets_mean": 3206.8,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 0.9300476947535771,
|
|
"grad_norm": 0.46172488073234735,
|
|
"learning_rate": 3.9871566737658144e-05,
|
|
"loss": 0.1983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17313021421432495,
|
|
"step": 585,
|
|
"valid_targets_mean": 3876.9,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 0.9379968203497615,
|
|
"grad_norm": 0.4500918322083041,
|
|
"learning_rate": 3.986243886374124e-05,
|
|
"loss": 0.2054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1883338987827301,
|
|
"step": 590,
|
|
"valid_targets_mean": 4360.1,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 0.9459459459459459,
|
|
"grad_norm": 0.46535487326329783,
|
|
"learning_rate": 3.985299878269486e-05,
|
|
"loss": 0.21,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.238117516040802,
|
|
"step": 595,
|
|
"valid_targets_mean": 4255.5,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 0.9538950715421304,
|
|
"grad_norm": 0.49612437665460657,
|
|
"learning_rate": 3.9843246642902646e-05,
|
|
"loss": 0.1927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22181783616542816,
|
|
"step": 600,
|
|
"valid_targets_mean": 3831.8,
|
|
"valid_targets_min": 305
|
|
},
|
|
{
|
|
"epoch": 0.9618441971383148,
|
|
"grad_norm": 0.48706304600729844,
|
|
"learning_rate": 3.98331825976533e-05,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1820496767759323,
|
|
"step": 605,
|
|
"valid_targets_mean": 3962.2,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 0.9697933227344993,
|
|
"grad_norm": 0.4308975740551794,
|
|
"learning_rate": 3.98228068051382e-05,
|
|
"loss": 0.2069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21952344477176666,
|
|
"step": 610,
|
|
"valid_targets_mean": 5032.3,
|
|
"valid_targets_min": 2134
|
|
},
|
|
{
|
|
"epoch": 0.9777424483306836,
|
|
"grad_norm": 0.5474648098644498,
|
|
"learning_rate": 3.9812119428448926e-05,
|
|
"loss": 0.1955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20172721147537231,
|
|
"step": 615,
|
|
"valid_targets_mean": 3203.3,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 0.985691573926868,
|
|
"grad_norm": 0.4453809222709813,
|
|
"learning_rate": 3.9801120635574664e-05,
|
|
"loss": 0.1876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1718732863664627,
|
|
"step": 620,
|
|
"valid_targets_mean": 3914.3,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 0.9936406995230525,
|
|
"grad_norm": 0.5334825618659923,
|
|
"learning_rate": 3.978981059939961e-05,
|
|
"loss": 0.1914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20379891991615295,
|
|
"step": 625,
|
|
"valid_targets_mean": 3863.4,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 1.0015898251192368,
|
|
"grad_norm": 0.5529835485096336,
|
|
"learning_rate": 3.977818949770022e-05,
|
|
"loss": 0.1867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2112482190132141,
|
|
"step": 630,
|
|
"valid_targets_mean": 3267.1,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 1.0095389507154213,
|
|
"grad_norm": 0.5122916288062801,
|
|
"learning_rate": 3.976625751314241e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2054670751094818,
|
|
"step": 635,
|
|
"valid_targets_mean": 4767.6,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 1.0174880763116056,
|
|
"grad_norm": 0.5068393688463881,
|
|
"learning_rate": 3.975401483327871e-05,
|
|
"loss": 0.186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1859699785709381,
|
|
"step": 640,
|
|
"valid_targets_mean": 3215.8,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 1.0254372019077902,
|
|
"grad_norm": 0.4145850767881214,
|
|
"learning_rate": 3.974146165054532e-05,
|
|
"loss": 0.1918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2187093198299408,
|
|
"step": 645,
|
|
"valid_targets_mean": 4994.4,
|
|
"valid_targets_min": 1438
|
|
},
|
|
{
|
|
"epoch": 1.0333863275039745,
|
|
"grad_norm": 0.4582537603183787,
|
|
"learning_rate": 3.972859816225904e-05,
|
|
"loss": 0.193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1593848317861557,
|
|
"step": 650,
|
|
"valid_targets_mean": 3891.4,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 1.041335453100159,
|
|
"grad_norm": 0.4567275257490204,
|
|
"learning_rate": 3.97154245706142e-05,
|
|
"loss": 0.1871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18935450911521912,
|
|
"step": 655,
|
|
"valid_targets_mean": 4475.3,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 1.0492845786963434,
|
|
"grad_norm": 0.5140859045337463,
|
|
"learning_rate": 3.970194108267952e-05,
|
|
"loss": 0.1901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18552550673484802,
|
|
"step": 660,
|
|
"valid_targets_mean": 3475.7,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 1.0572337042925277,
|
|
"grad_norm": 0.40778078731763423,
|
|
"learning_rate": 3.968814791039477e-05,
|
|
"loss": 0.189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18175040185451508,
|
|
"step": 665,
|
|
"valid_targets_mean": 4828.3,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 1.0651828298887123,
|
|
"grad_norm": 0.5263082294572751,
|
|
"learning_rate": 3.967404527056751e-05,
|
|
"loss": 0.2161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25833356380462646,
|
|
"step": 670,
|
|
"valid_targets_mean": 4129.8,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 1.0731319554848966,
|
|
"grad_norm": 0.4960768768163948,
|
|
"learning_rate": 3.9659633384869626e-05,
|
|
"loss": 0.189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17353063821792603,
|
|
"step": 675,
|
|
"valid_targets_mean": 4446.4,
|
|
"valid_targets_min": 1213
|
|
},
|
|
{
|
|
"epoch": 1.0810810810810811,
|
|
"grad_norm": 0.48100083595612203,
|
|
"learning_rate": 3.964491247983392e-05,
|
|
"loss": 0.2005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1870320737361908,
|
|
"step": 680,
|
|
"valid_targets_mean": 3895.7,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.0890302066772655,
|
|
"grad_norm": 0.61106090311707,
|
|
"learning_rate": 3.962988278685047e-05,
|
|
"loss": 0.1949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19367003440856934,
|
|
"step": 685,
|
|
"valid_targets_mean": 3629.8,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 1.09697933227345,
|
|
"grad_norm": 0.4365754632817891,
|
|
"learning_rate": 3.961454454216305e-05,
|
|
"loss": 0.197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19389833509922028,
|
|
"step": 690,
|
|
"valid_targets_mean": 4258.8,
|
|
"valid_targets_min": 197
|
|
},
|
|
{
|
|
"epoch": 1.1049284578696343,
|
|
"grad_norm": 0.5262716250941029,
|
|
"learning_rate": 3.9598897986865364e-05,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19017469882965088,
|
|
"step": 695,
|
|
"valid_targets_mean": 3231.6,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 1.1128775834658187,
|
|
"grad_norm": 0.49861602576948466,
|
|
"learning_rate": 3.9582943366897316e-05,
|
|
"loss": 0.1909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1815868318080902,
|
|
"step": 700,
|
|
"valid_targets_mean": 3687.8,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 1.1208267090620032,
|
|
"grad_norm": 0.48175933314623326,
|
|
"learning_rate": 3.956668093304112e-05,
|
|
"loss": 0.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1780034601688385,
|
|
"step": 705,
|
|
"valid_targets_mean": 4006.3,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 1.1287758346581875,
|
|
"grad_norm": 0.5179802897696733,
|
|
"learning_rate": 3.9550110940917313e-05,
|
|
"loss": 0.198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20583273470401764,
|
|
"step": 710,
|
|
"valid_targets_mean": 3334.9,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 1.136724960254372,
|
|
"grad_norm": 0.4267427875463337,
|
|
"learning_rate": 3.953323365098082e-05,
|
|
"loss": 0.188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18678268790245056,
|
|
"step": 715,
|
|
"valid_targets_mean": 4681.6,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 1.1446740858505564,
|
|
"grad_norm": 0.4891452481904024,
|
|
"learning_rate": 3.9516049328516795e-05,
|
|
"loss": 0.2026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19785034656524658,
|
|
"step": 720,
|
|
"valid_targets_mean": 4126.4,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 1.1526232114467407,
|
|
"grad_norm": 0.5279349481342677,
|
|
"learning_rate": 3.949855824363647e-05,
|
|
"loss": 0.1818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19800621271133423,
|
|
"step": 725,
|
|
"valid_targets_mean": 3239.9,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 1.1605723370429253,
|
|
"grad_norm": 0.8095806249907248,
|
|
"learning_rate": 3.948076067127294e-05,
|
|
"loss": 0.19,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17679157853126526,
|
|
"step": 730,
|
|
"valid_targets_mean": 3760.4,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 1.1685214626391096,
|
|
"grad_norm": 0.7153084766269169,
|
|
"learning_rate": 3.946265689117677e-05,
|
|
"loss": 0.1968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1971210539340973,
|
|
"step": 735,
|
|
"valid_targets_mean": 2638.4,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 1.1764705882352942,
|
|
"grad_norm": 0.4535582899393148,
|
|
"learning_rate": 3.944424718791169e-05,
|
|
"loss": 0.1822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16410158574581146,
|
|
"step": 740,
|
|
"valid_targets_mean": 3738.6,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 1.1844197138314785,
|
|
"grad_norm": 0.47691889948518285,
|
|
"learning_rate": 3.942553185085003e-05,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19451865553855896,
|
|
"step": 745,
|
|
"valid_targets_mean": 3723.2,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 1.192368839427663,
|
|
"grad_norm": 0.4783725873079702,
|
|
"learning_rate": 3.940651117416824e-05,
|
|
"loss": 0.1862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17500782012939453,
|
|
"step": 750,
|
|
"valid_targets_mean": 3733.9,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 1.2003179650238474,
|
|
"grad_norm": 0.45013056800116047,
|
|
"learning_rate": 3.9387185456842247e-05,
|
|
"loss": 0.1957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1873418688774109,
|
|
"step": 755,
|
|
"valid_targets_mean": 4532.6,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 1.2082670906200317,
|
|
"grad_norm": 0.4513636003405965,
|
|
"learning_rate": 3.936755500264274e-05,
|
|
"loss": 0.1972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19119738042354584,
|
|
"step": 760,
|
|
"valid_targets_mean": 3633.2,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 1.2162162162162162,
|
|
"grad_norm": 0.43039183768805145,
|
|
"learning_rate": 3.9347620120130384e-05,
|
|
"loss": 0.1853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19387167692184448,
|
|
"step": 765,
|
|
"valid_targets_mean": 5013.5,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 1.2241653418124006,
|
|
"grad_norm": 0.5545872512419363,
|
|
"learning_rate": 3.932738112265103e-05,
|
|
"loss": 0.2052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1966329962015152,
|
|
"step": 770,
|
|
"valid_targets_mean": 3744.9,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 1.232114467408585,
|
|
"grad_norm": 0.44088445810305865,
|
|
"learning_rate": 3.930683832833073e-05,
|
|
"loss": 0.189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1997533142566681,
|
|
"step": 775,
|
|
"valid_targets_mean": 4225.8,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 1.2400635930047694,
|
|
"grad_norm": 0.5628393929224067,
|
|
"learning_rate": 3.928599206007076e-05,
|
|
"loss": 0.1894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16368547081947327,
|
|
"step": 780,
|
|
"valid_targets_mean": 4202.9,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 1.248012718600954,
|
|
"grad_norm": 0.5044797716972268,
|
|
"learning_rate": 3.926484264554253e-05,
|
|
"loss": 0.1978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2036869078874588,
|
|
"step": 785,
|
|
"valid_targets_mean": 3405.1,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 1.2559618441971383,
|
|
"grad_norm": 0.5129578399604017,
|
|
"learning_rate": 3.924339041718247e-05,
|
|
"loss": 0.2047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19128306210041046,
|
|
"step": 790,
|
|
"valid_targets_mean": 3456.4,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 1.2639109697933226,
|
|
"grad_norm": 0.563159848952125,
|
|
"learning_rate": 3.922163571218676e-05,
|
|
"loss": 0.1901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2012992799282074,
|
|
"step": 795,
|
|
"valid_targets_mean": 3187.4,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 1.2718600953895072,
|
|
"grad_norm": 0.6173273492357961,
|
|
"learning_rate": 3.919957887250606e-05,
|
|
"loss": 0.2051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22333335876464844,
|
|
"step": 800,
|
|
"valid_targets_mean": 2847.9,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 1.2798092209856915,
|
|
"grad_norm": 0.44827689109710595,
|
|
"learning_rate": 3.917722024484011e-05,
|
|
"loss": 0.1978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18572327494621277,
|
|
"step": 805,
|
|
"valid_targets_mean": 4246.1,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 1.287758346581876,
|
|
"grad_norm": 0.6751673561517912,
|
|
"learning_rate": 3.915456018063232e-05,
|
|
"loss": 0.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20056423544883728,
|
|
"step": 810,
|
|
"valid_targets_mean": 2438.5,
|
|
"valid_targets_min": 225
|
|
},
|
|
{
|
|
"epoch": 1.2957074721780604,
|
|
"grad_norm": 0.7720325184700507,
|
|
"learning_rate": 3.9131599036064204e-05,
|
|
"loss": 0.1921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1767432987689972,
|
|
"step": 815,
|
|
"valid_targets_mean": 3982.4,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 1.303656597774245,
|
|
"grad_norm": 0.4366411337881234,
|
|
"learning_rate": 3.9108337172049794e-05,
|
|
"loss": 0.187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21016202867031097,
|
|
"step": 820,
|
|
"valid_targets_mean": 4805.6,
|
|
"valid_targets_min": 1425
|
|
},
|
|
{
|
|
"epoch": 1.3116057233704292,
|
|
"grad_norm": 0.5069327068809399,
|
|
"learning_rate": 3.908477495422998e-05,
|
|
"loss": 0.1969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19924074411392212,
|
|
"step": 825,
|
|
"valid_targets_mean": 3997.4,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 1.3195548489666136,
|
|
"grad_norm": 0.445873116262987,
|
|
"learning_rate": 3.906091275296676e-05,
|
|
"loss": 0.1923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17496737837791443,
|
|
"step": 830,
|
|
"valid_targets_mean": 4272.1,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 1.3275039745627981,
|
|
"grad_norm": 0.9206148708023212,
|
|
"learning_rate": 3.903675094333739e-05,
|
|
"loss": 0.1951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2181253880262375,
|
|
"step": 835,
|
|
"valid_targets_mean": 3913.7,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 1.3354531001589824,
|
|
"grad_norm": 0.5133359649094603,
|
|
"learning_rate": 3.901228990512854e-05,
|
|
"loss": 0.1987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2114499807357788,
|
|
"step": 840,
|
|
"valid_targets_mean": 3686.8,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 1.343402225755167,
|
|
"grad_norm": 0.4696592099080785,
|
|
"learning_rate": 3.898753002283027e-05,
|
|
"loss": 0.1998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.209947407245636,
|
|
"step": 845,
|
|
"valid_targets_mean": 3526.2,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 1.3513513513513513,
|
|
"grad_norm": 0.5149647286915602,
|
|
"learning_rate": 3.896247168563004e-05,
|
|
"loss": 0.21,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22973836958408356,
|
|
"step": 850,
|
|
"valid_targets_mean": 4359.3,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 1.3593004769475359,
|
|
"grad_norm": 0.45847358766247315,
|
|
"learning_rate": 3.8937115287406524e-05,
|
|
"loss": 0.1761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20062896609306335,
|
|
"step": 855,
|
|
"valid_targets_mean": 4928.5,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 1.3672496025437202,
|
|
"grad_norm": 0.5460254266383082,
|
|
"learning_rate": 3.891146122672349e-05,
|
|
"loss": 0.1994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20617523789405823,
|
|
"step": 860,
|
|
"valid_targets_mean": 3505.1,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 1.3751987281399045,
|
|
"grad_norm": 0.5461178071898645,
|
|
"learning_rate": 3.8885509906823496e-05,
|
|
"loss": 0.1807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19545643031597137,
|
|
"step": 865,
|
|
"valid_targets_mean": 3159.5,
|
|
"valid_targets_min": 1402
|
|
},
|
|
{
|
|
"epoch": 1.383147853736089,
|
|
"grad_norm": 0.4558363745882089,
|
|
"learning_rate": 3.885926173562157e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18399888277053833,
|
|
"step": 870,
|
|
"valid_targets_mean": 3677.1,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 1.3910969793322734,
|
|
"grad_norm": 0.3899280590888697,
|
|
"learning_rate": 3.883271712569875e-05,
|
|
"loss": 0.1944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20818111300468445,
|
|
"step": 875,
|
|
"valid_targets_mean": 5354.6,
|
|
"valid_targets_min": 1826
|
|
},
|
|
{
|
|
"epoch": 1.399046104928458,
|
|
"grad_norm": 0.4639547086845976,
|
|
"learning_rate": 3.8805876494295694e-05,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17379283905029297,
|
|
"step": 880,
|
|
"valid_targets_mean": 4342.3,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 1.4069952305246423,
|
|
"grad_norm": 0.39181296096344126,
|
|
"learning_rate": 3.877874026330602e-05,
|
|
"loss": 0.1913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15908557176589966,
|
|
"step": 885,
|
|
"valid_targets_mean": 4897.9,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 1.4149443561208268,
|
|
"grad_norm": 0.4132604124999311,
|
|
"learning_rate": 3.875130885926973e-05,
|
|
"loss": 0.1866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18337678909301758,
|
|
"step": 890,
|
|
"valid_targets_mean": 4757.7,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 1.4228934817170111,
|
|
"grad_norm": 0.5105514719522013,
|
|
"learning_rate": 3.872358271336651e-05,
|
|
"loss": 0.1837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19865591824054718,
|
|
"step": 895,
|
|
"valid_targets_mean": 3313.8,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 1.4308426073131955,
|
|
"grad_norm": 0.5235070348886672,
|
|
"learning_rate": 3.8695562261408915e-05,
|
|
"loss": 0.2151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1919848918914795,
|
|
"step": 900,
|
|
"valid_targets_mean": 2859.2,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 1.43879173290938,
|
|
"grad_norm": 0.47312190085140265,
|
|
"learning_rate": 3.8667247943835555e-05,
|
|
"loss": 0.1944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20830847322940826,
|
|
"step": 905,
|
|
"valid_targets_mean": 4533.8,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 1.4467408585055643,
|
|
"grad_norm": 0.5175775434248544,
|
|
"learning_rate": 3.863864020570414e-05,
|
|
"loss": 0.1871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19121363759040833,
|
|
"step": 910,
|
|
"valid_targets_mean": 3036.8,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 1.4546899841017489,
|
|
"grad_norm": 0.5781223268494494,
|
|
"learning_rate": 3.860973949668454e-05,
|
|
"loss": 0.1924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20266948640346527,
|
|
"step": 915,
|
|
"valid_targets_mean": 2671.8,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 1.4626391096979332,
|
|
"grad_norm": 0.4567922909167134,
|
|
"learning_rate": 3.8580546271051634e-05,
|
|
"loss": 0.1908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18493634462356567,
|
|
"step": 920,
|
|
"valid_targets_mean": 4128.1,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 1.4705882352941178,
|
|
"grad_norm": 0.47418699448382096,
|
|
"learning_rate": 3.8551060987678236e-05,
|
|
"loss": 0.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18517082929611206,
|
|
"step": 925,
|
|
"valid_targets_mean": 3606.5,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 1.478537360890302,
|
|
"grad_norm": 0.4220236091213908,
|
|
"learning_rate": 3.852128411002787e-05,
|
|
"loss": 0.1909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19654536247253418,
|
|
"step": 930,
|
|
"valid_targets_mean": 5133.6,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 1.4864864864864864,
|
|
"grad_norm": 0.4573542025228096,
|
|
"learning_rate": 3.849121610614745e-05,
|
|
"loss": 0.2062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21628251671791077,
|
|
"step": 935,
|
|
"valid_targets_mean": 4146.7,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 1.494435612082671,
|
|
"grad_norm": 0.455624363723731,
|
|
"learning_rate": 3.8460857448659975e-05,
|
|
"loss": 0.1815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16024024784564972,
|
|
"step": 940,
|
|
"valid_targets_mean": 3990.5,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 1.5023847376788553,
|
|
"grad_norm": 0.8397075238280883,
|
|
"learning_rate": 3.8430208614757044e-05,
|
|
"loss": 0.1914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19728557765483856,
|
|
"step": 945,
|
|
"valid_targets_mean": 3193.9,
|
|
"valid_targets_min": 1069
|
|
},
|
|
{
|
|
"epoch": 1.5103338632750396,
|
|
"grad_norm": 0.6455640246915216,
|
|
"learning_rate": 3.8399270086191425e-05,
|
|
"loss": 0.1831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18609043955802917,
|
|
"step": 950,
|
|
"valid_targets_mean": 3671.4,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 1.5182829888712241,
|
|
"grad_norm": 0.4393679011147717,
|
|
"learning_rate": 3.8368042349269405e-05,
|
|
"loss": 0.1931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18841350078582764,
|
|
"step": 955,
|
|
"valid_targets_mean": 4189.6,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 1.5262321144674087,
|
|
"grad_norm": 0.4748654125112523,
|
|
"learning_rate": 3.83365258948432e-05,
|
|
"loss": 0.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18544679880142212,
|
|
"step": 960,
|
|
"valid_targets_mean": 4059.8,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 1.534181240063593,
|
|
"grad_norm": 0.44195423356853586,
|
|
"learning_rate": 3.830472121830323e-05,
|
|
"loss": 0.1787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17977693676948547,
|
|
"step": 965,
|
|
"valid_targets_mean": 4036.9,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 1.5421303656597773,
|
|
"grad_norm": 0.4190445460085609,
|
|
"learning_rate": 3.82726288195703e-05,
|
|
"loss": 0.1703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15075984597206116,
|
|
"step": 970,
|
|
"valid_targets_mean": 3788.1,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 1.550079491255962,
|
|
"grad_norm": 0.38824323345082057,
|
|
"learning_rate": 3.824024920308781e-05,
|
|
"loss": 0.1886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16938742995262146,
|
|
"step": 975,
|
|
"valid_targets_mean": 4616.4,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.5580286168521462,
|
|
"grad_norm": 0.4803671121375321,
|
|
"learning_rate": 3.820758287781374e-05,
|
|
"loss": 0.191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21928074955940247,
|
|
"step": 980,
|
|
"valid_targets_mean": 4322.5,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 1.5659777424483305,
|
|
"grad_norm": 0.4853782041831188,
|
|
"learning_rate": 3.8174630357212714e-05,
|
|
"loss": 0.1964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17328375577926636,
|
|
"step": 985,
|
|
"valid_targets_mean": 4317.4,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 1.573926868044515,
|
|
"grad_norm": 0.47169665339515204,
|
|
"learning_rate": 3.8141392159247905e-05,
|
|
"loss": 0.1977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21782585978507996,
|
|
"step": 990,
|
|
"valid_targets_mean": 3688.9,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 1.5818759936406996,
|
|
"grad_norm": 0.4690651486120993,
|
|
"learning_rate": 3.81078688063729e-05,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19451123476028442,
|
|
"step": 995,
|
|
"valid_targets_mean": 3713.2,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 1.589825119236884,
|
|
"grad_norm": 0.4346450046592069,
|
|
"learning_rate": 3.807406082552348e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16455082595348358,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3809.7,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 1.5977742448330683,
|
|
"grad_norm": 0.41932684002795517,
|
|
"learning_rate": 3.803996874810934e-05,
|
|
"loss": 0.1848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17046436667442322,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4024.4,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 1.6057233704292528,
|
|
"grad_norm": 0.3705151557622996,
|
|
"learning_rate": 3.800559311000575e-05,
|
|
"loss": 0.17,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1598929464817047,
|
|
"step": 1010,
|
|
"valid_targets_mean": 5066.7,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 1.6136724960254372,
|
|
"grad_norm": 0.43289861621038134,
|
|
"learning_rate": 3.7970934451545104e-05,
|
|
"loss": 0.1925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1886836290359497,
|
|
"step": 1015,
|
|
"valid_targets_mean": 4769.2,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 1.6216216216216215,
|
|
"grad_norm": 0.45330830989766385,
|
|
"learning_rate": 3.7935993317508455e-05,
|
|
"loss": 0.1924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18560412526130676,
|
|
"step": 1020,
|
|
"valid_targets_mean": 4132.1,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 1.629570747217806,
|
|
"grad_norm": 0.515537997297837,
|
|
"learning_rate": 3.790077025711694e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21635811030864716,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3410.8,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 1.6375198728139906,
|
|
"grad_norm": 0.4946566085078078,
|
|
"learning_rate": 3.786526582402313e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17097623646259308,
|
|
"step": 1030,
|
|
"valid_targets_mean": 3390.3,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 1.645468998410175,
|
|
"grad_norm": 0.4669861404848048,
|
|
"learning_rate": 3.782948057630236e-05,
|
|
"loss": 0.1712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19812054932117462,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3441.6,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 1.6534181240063592,
|
|
"grad_norm": 0.4939545083568682,
|
|
"learning_rate": 3.779341507644394e-05,
|
|
"loss": 0.2019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17919695377349854,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3929.4,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 1.6613672496025438,
|
|
"grad_norm": 0.540991144768767,
|
|
"learning_rate": 3.775706989134231e-05,
|
|
"loss": 0.1857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20103199779987335,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3738.6,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 1.669316375198728,
|
|
"grad_norm": 0.4963684293040473,
|
|
"learning_rate": 3.772044559228813e-05,
|
|
"loss": 0.1882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19030243158340454,
|
|
"step": 1050,
|
|
"valid_targets_mean": 3166.8,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 1.6772655007949124,
|
|
"grad_norm": 0.501401895351916,
|
|
"learning_rate": 3.768354275495933e-05,
|
|
"loss": 0.1821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1745380461215973,
|
|
"step": 1055,
|
|
"valid_targets_mean": 3242.2,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 1.685214626391097,
|
|
"grad_norm": 0.4968401136342654,
|
|
"learning_rate": 3.764636195941198e-05,
|
|
"loss": 0.1831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17797568440437317,
|
|
"step": 1060,
|
|
"valid_targets_mean": 3188.0,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 1.6931637519872815,
|
|
"grad_norm": 0.4884093983436614,
|
|
"learning_rate": 3.760890379007129e-05,
|
|
"loss": 0.1837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2129162847995758,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4418.9,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 1.7011128775834659,
|
|
"grad_norm": 0.44076053571984003,
|
|
"learning_rate": 3.757116883572232e-05,
|
|
"loss": 0.1764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1850593388080597,
|
|
"step": 1070,
|
|
"valid_targets_mean": 4093.6,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 1.7090620031796502,
|
|
"grad_norm": 0.542297894741752,
|
|
"learning_rate": 3.753315768950079e-05,
|
|
"loss": 0.1819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21836097538471222,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3133.3,
|
|
"valid_targets_min": 204
|
|
},
|
|
{
|
|
"epoch": 1.7170111287758347,
|
|
"grad_norm": 0.5168670041375298,
|
|
"learning_rate": 3.74948709488837e-05,
|
|
"loss": 0.1778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18428263068199158,
|
|
"step": 1080,
|
|
"valid_targets_mean": 3062.6,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 1.724960254372019,
|
|
"grad_norm": 0.5653952426228848,
|
|
"learning_rate": 3.745630921568004e-05,
|
|
"loss": 0.1837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17465798556804657,
|
|
"step": 1085,
|
|
"valid_targets_mean": 2868.4,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 1.7329093799682034,
|
|
"grad_norm": 0.46856535469042454,
|
|
"learning_rate": 3.741747309602117e-05,
|
|
"loss": 0.2027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20139528810977936,
|
|
"step": 1090,
|
|
"valid_targets_mean": 4477.7,
|
|
"valid_targets_min": 212
|
|
},
|
|
{
|
|
"epoch": 1.740858505564388,
|
|
"grad_norm": 0.41928118910953777,
|
|
"learning_rate": 3.737836320035146e-05,
|
|
"loss": 0.1773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18881118297576904,
|
|
"step": 1095,
|
|
"valid_targets_mean": 4088.9,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 1.7488076311605725,
|
|
"grad_norm": 0.4251393076302139,
|
|
"learning_rate": 3.733898014341858e-05,
|
|
"loss": 0.1914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18259574472904205,
|
|
"step": 1100,
|
|
"valid_targets_mean": 4110.7,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 1.7567567567567568,
|
|
"grad_norm": 0.3931735507262585,
|
|
"learning_rate": 3.729932454426391e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18538832664489746,
|
|
"step": 1105,
|
|
"valid_targets_mean": 5117.2,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 1.7647058823529411,
|
|
"grad_norm": 0.4529514349775154,
|
|
"learning_rate": 3.725939702621273e-05,
|
|
"loss": 0.1903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18618038296699524,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4018.2,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 1.7726550079491257,
|
|
"grad_norm": 0.4503816594471195,
|
|
"learning_rate": 3.72191982168645e-05,
|
|
"loss": 0.1998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15615426003932953,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3476.6,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 1.78060413354531,
|
|
"grad_norm": 0.5088702362995737,
|
|
"learning_rate": 3.717872874808298e-05,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21099695563316345,
|
|
"step": 1120,
|
|
"valid_targets_mean": 3103.6,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 1.7885532591414943,
|
|
"grad_norm": 0.7999181539319212,
|
|
"learning_rate": 3.713798925598623e-05,
|
|
"loss": 0.1929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19236738979816437,
|
|
"step": 1125,
|
|
"valid_targets_mean": 3830.4,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 1.7965023847376789,
|
|
"grad_norm": 0.4307921702238077,
|
|
"learning_rate": 3.709698038093671e-05,
|
|
"loss": 0.1915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16169005632400513,
|
|
"step": 1130,
|
|
"valid_targets_mean": 3835.4,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 1.8044515103338634,
|
|
"grad_norm": 0.485741913256061,
|
|
"learning_rate": 3.705570276753116e-05,
|
|
"loss": 0.1851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18286767601966858,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3094.0,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 1.8124006359300477,
|
|
"grad_norm": 0.5255452896790747,
|
|
"learning_rate": 3.701415706459044e-05,
|
|
"loss": 0.1662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19145852327346802,
|
|
"step": 1140,
|
|
"valid_targets_mean": 3693.2,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 1.820349761526232,
|
|
"grad_norm": 0.5046926635208234,
|
|
"learning_rate": 3.697234392514942e-05,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2077309787273407,
|
|
"step": 1145,
|
|
"valid_targets_mean": 3055.6,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 1.8282988871224166,
|
|
"grad_norm": 0.4368221113650411,
|
|
"learning_rate": 3.693026400644662e-05,
|
|
"loss": 0.1795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18555589020252228,
|
|
"step": 1150,
|
|
"valid_targets_mean": 4117.4,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 1.836248012718601,
|
|
"grad_norm": 0.4836963066254356,
|
|
"learning_rate": 3.6887917969913944e-05,
|
|
"loss": 0.2034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2165643572807312,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3562.9,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 1.8441971383147853,
|
|
"grad_norm": 0.4084280506680083,
|
|
"learning_rate": 3.684530648116625e-05,
|
|
"loss": 0.1801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17716243863105774,
|
|
"step": 1160,
|
|
"valid_targets_mean": 4411.1,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 1.8521462639109698,
|
|
"grad_norm": 0.44584904009558374,
|
|
"learning_rate": 3.68024302099909e-05,
|
|
"loss": 0.1894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19152897596359253,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3953.6,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 1.8600953895071544,
|
|
"grad_norm": 0.5649727314954088,
|
|
"learning_rate": 3.6759289830337246e-05,
|
|
"loss": 0.1871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1756046712398529,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4100.8,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 1.8680445151033387,
|
|
"grad_norm": 0.49736684064600023,
|
|
"learning_rate": 3.6715886020306e-05,
|
|
"loss": 0.1743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16673019528388977,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3085.4,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 1.875993640699523,
|
|
"grad_norm": 0.45789796811072,
|
|
"learning_rate": 3.6672219462138604e-05,
|
|
"loss": 0.1767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1682954877614975,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4273.3,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 1.8839427662957076,
|
|
"grad_norm": 0.4895932235353001,
|
|
"learning_rate": 3.6628290842206495e-05,
|
|
"loss": 0.1954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19178809225559235,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3406.9,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 1.8918918918918919,
|
|
"grad_norm": 0.45298370929710946,
|
|
"learning_rate": 3.658410085100034e-05,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21535128355026245,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4193.9,
|
|
"valid_targets_min": 1461
|
|
},
|
|
{
|
|
"epoch": 1.8998410174880762,
|
|
"grad_norm": 0.44589909041567644,
|
|
"learning_rate": 3.6539650183119126e-05,
|
|
"loss": 0.181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18020093441009521,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3717.2,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 1.9077901430842608,
|
|
"grad_norm": 0.3600027728462642,
|
|
"learning_rate": 3.64949395372593e-05,
|
|
"loss": 0.1823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17888964712619781,
|
|
"step": 1200,
|
|
"valid_targets_mean": 5276.8,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 1.9157392686804453,
|
|
"grad_norm": 0.4993758462348809,
|
|
"learning_rate": 3.644996961620378e-05,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17506229877471924,
|
|
"step": 1205,
|
|
"valid_targets_mean": 3077.0,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 1.9236883942766294,
|
|
"grad_norm": 0.5207237968380335,
|
|
"learning_rate": 3.6404741126810854e-05,
|
|
"loss": 0.1777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18324558436870575,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3340.1,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 1.931637519872814,
|
|
"grad_norm": 0.5043242801398989,
|
|
"learning_rate": 3.635925478000315e-05,
|
|
"loss": 0.1955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19553515315055847,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3118.6,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 1.9395866454689985,
|
|
"grad_norm": 0.3968462413963593,
|
|
"learning_rate": 3.631351129075638e-05,
|
|
"loss": 0.1762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19829094409942627,
|
|
"step": 1220,
|
|
"valid_targets_mean": 4999.4,
|
|
"valid_targets_min": 1898
|
|
},
|
|
{
|
|
"epoch": 1.9475357710651828,
|
|
"grad_norm": 0.5208428823666412,
|
|
"learning_rate": 3.6267511378088174e-05,
|
|
"loss": 0.1718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18420946598052979,
|
|
"step": 1225,
|
|
"valid_targets_mean": 4461.8,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 1.9554848966613672,
|
|
"grad_norm": 0.531760446350055,
|
|
"learning_rate": 3.622125576504674e-05,
|
|
"loss": 0.1776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2018202245235443,
|
|
"step": 1230,
|
|
"valid_targets_mean": 2726.6,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 1.9634340222575517,
|
|
"grad_norm": 0.5155602824722777,
|
|
"learning_rate": 3.6174745178699484e-05,
|
|
"loss": 0.1745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20093253254890442,
|
|
"step": 1235,
|
|
"valid_targets_mean": 2971.2,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 1.9713831478537363,
|
|
"grad_norm": 0.48459979840868433,
|
|
"learning_rate": 3.612798035012161e-05,
|
|
"loss": 0.1876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17393648624420166,
|
|
"step": 1240,
|
|
"valid_targets_mean": 3234.2,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 1.9793322734499204,
|
|
"grad_norm": 0.4476136626874852,
|
|
"learning_rate": 3.608096201438465e-05,
|
|
"loss": 0.1969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17231547832489014,
|
|
"step": 1245,
|
|
"valid_targets_mean": 3629.6,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 1.987281399046105,
|
|
"grad_norm": 0.430623482890761,
|
|
"learning_rate": 3.603369091054484e-05,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1765379011631012,
|
|
"step": 1250,
|
|
"valid_targets_mean": 3597.4,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 1.9952305246422894,
|
|
"grad_norm": 0.436331980026303,
|
|
"learning_rate": 3.5986167781631556e-05,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1572694480419159,
|
|
"step": 1255,
|
|
"valid_targets_mean": 4041.3,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 2.0031796502384736,
|
|
"grad_norm": 0.43495258070360787,
|
|
"learning_rate": 3.5938393374635634e-05,
|
|
"loss": 0.1868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17657440900802612,
|
|
"step": 1260,
|
|
"valid_targets_mean": 3687.1,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 2.011128775834658,
|
|
"grad_norm": 0.44734489727626625,
|
|
"learning_rate": 3.589036844049762e-05,
|
|
"loss": 0.1601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16207647323608398,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3986.1,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 2.0190779014308426,
|
|
"grad_norm": 0.46524088697899335,
|
|
"learning_rate": 3.584209373409593e-05,
|
|
"loss": 0.1601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16202375292778015,
|
|
"step": 1270,
|
|
"valid_targets_mean": 3613.5,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 2.027027027027027,
|
|
"grad_norm": 0.45070406893947157,
|
|
"learning_rate": 3.579357001423505e-05,
|
|
"loss": 0.1664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17059147357940674,
|
|
"step": 1275,
|
|
"valid_targets_mean": 3577.6,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 2.0349761526232113,
|
|
"grad_norm": 0.4499960165350608,
|
|
"learning_rate": 3.5744798043633566e-05,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18004930019378662,
|
|
"step": 1280,
|
|
"valid_targets_mean": 4093.8,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 2.042925278219396,
|
|
"grad_norm": 0.5580419519655596,
|
|
"learning_rate": 3.569577858891219e-05,
|
|
"loss": 0.1683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1862042248249054,
|
|
"step": 1285,
|
|
"valid_targets_mean": 2967.2,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 2.0508744038155804,
|
|
"grad_norm": 0.4779943776711724,
|
|
"learning_rate": 3.56465124205817e-05,
|
|
"loss": 0.1714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22631016373634338,
|
|
"step": 1290,
|
|
"valid_targets_mean": 3858.1,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 2.0588235294117645,
|
|
"grad_norm": 0.4911452381940732,
|
|
"learning_rate": 3.559700031303082e-05,
|
|
"loss": 0.1865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16386955976486206,
|
|
"step": 1295,
|
|
"valid_targets_mean": 3233.1,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 2.066772655007949,
|
|
"grad_norm": 0.5288525437496359,
|
|
"learning_rate": 3.554724304451411e-05,
|
|
"loss": 0.1752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1542258858680725,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3689.6,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 2.0747217806041336,
|
|
"grad_norm": 0.6612223596945518,
|
|
"learning_rate": 3.549724139713962e-05,
|
|
"loss": 0.1651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17203554511070251,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3336.7,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 2.082670906200318,
|
|
"grad_norm": 0.4373776224124723,
|
|
"learning_rate": 3.544699615685671e-05,
|
|
"loss": 0.1637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14887259900569916,
|
|
"step": 1310,
|
|
"valid_targets_mean": 4026.8,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 2.0906200317965022,
|
|
"grad_norm": 0.5037762968017296,
|
|
"learning_rate": 3.539650811344363e-05,
|
|
"loss": 0.1716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1626054346561432,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3763.6,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 2.098569157392687,
|
|
"grad_norm": 0.6313496812038843,
|
|
"learning_rate": 3.534577806049512e-05,
|
|
"loss": 0.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15829899907112122,
|
|
"step": 1320,
|
|
"valid_targets_mean": 3636.2,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 2.1065182829888713,
|
|
"grad_norm": 0.4425905414584769,
|
|
"learning_rate": 3.529480679540996e-05,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15318667888641357,
|
|
"step": 1325,
|
|
"valid_targets_mean": 4196.6,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 2.1144674085850554,
|
|
"grad_norm": 0.6185724943267303,
|
|
"learning_rate": 3.524359511937838e-05,
|
|
"loss": 0.1786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16072843968868256,
|
|
"step": 1330,
|
|
"valid_targets_mean": 2547.9,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 2.12241653418124,
|
|
"grad_norm": 0.48298978637493106,
|
|
"learning_rate": 3.5192143837369523e-05,
|
|
"loss": 0.1727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19100898504257202,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3434.8,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 2.1303656597774245,
|
|
"grad_norm": 0.5247741664837559,
|
|
"learning_rate": 3.514045375811878e-05,
|
|
"loss": 0.18,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16440363228321075,
|
|
"step": 1340,
|
|
"valid_targets_mean": 2886.7,
|
|
"valid_targets_min": 201
|
|
},
|
|
{
|
|
"epoch": 2.138314785373609,
|
|
"grad_norm": 0.4645350321366234,
|
|
"learning_rate": 3.508852569411506e-05,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16781508922576904,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3687.1,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 2.146263910969793,
|
|
"grad_norm": 0.4185673015572621,
|
|
"learning_rate": 3.503636046158803e-05,
|
|
"loss": 0.1605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15546417236328125,
|
|
"step": 1350,
|
|
"valid_targets_mean": 4516.6,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 2.1542130365659777,
|
|
"grad_norm": 0.49044816100084626,
|
|
"learning_rate": 3.498395888049526e-05,
|
|
"loss": 0.1865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19985926151275635,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3536.6,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 2.1621621621621623,
|
|
"grad_norm": 0.4570482087101892,
|
|
"learning_rate": 3.4931321774509396e-05,
|
|
"loss": 0.1658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15912221372127533,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3607.9,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 2.1701112877583464,
|
|
"grad_norm": 0.4292180835767507,
|
|
"learning_rate": 3.487844997100515e-05,
|
|
"loss": 0.1691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19553008675575256,
|
|
"step": 1365,
|
|
"valid_targets_mean": 4204.5,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 2.178060413354531,
|
|
"grad_norm": 0.524906614575372,
|
|
"learning_rate": 3.482534430104633e-05,
|
|
"loss": 0.1585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.174746572971344,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3015.8,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 2.1860095389507155,
|
|
"grad_norm": 0.460503992627472,
|
|
"learning_rate": 3.4772005599372764e-05,
|
|
"loss": 0.1667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17967626452445984,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3896.6,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 2.1939586645469,
|
|
"grad_norm": 0.44690523777838326,
|
|
"learning_rate": 3.4718434704387174e-05,
|
|
"loss": 0.1606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1710180938243866,
|
|
"step": 1380,
|
|
"valid_targets_mean": 4478.8,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 2.201907790143084,
|
|
"grad_norm": 0.41765151372647924,
|
|
"learning_rate": 3.4664632458142016e-05,
|
|
"loss": 0.1638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15442225337028503,
|
|
"step": 1385,
|
|
"valid_targets_mean": 4279.1,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 2.2098569157392687,
|
|
"grad_norm": 0.434025285839872,
|
|
"learning_rate": 3.461059970632622e-05,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14256054162979126,
|
|
"step": 1390,
|
|
"valid_targets_mean": 3645.9,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 2.2178060413354532,
|
|
"grad_norm": 0.39810648920310915,
|
|
"learning_rate": 3.4556337298251943e-05,
|
|
"loss": 0.174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14162832498550415,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4033.1,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 2.2257551669316373,
|
|
"grad_norm": 0.5481937999092757,
|
|
"learning_rate": 3.450184608684114e-05,
|
|
"loss": 0.1687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1893032193183899,
|
|
"step": 1400,
|
|
"valid_targets_mean": 2773.9,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 2.233704292527822,
|
|
"grad_norm": 0.43591132617223516,
|
|
"learning_rate": 3.444712692861224e-05,
|
|
"loss": 0.1546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12860910594463348,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3698.6,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 2.2416534181240064,
|
|
"grad_norm": 0.43573783141415084,
|
|
"learning_rate": 3.439218068366663e-05,
|
|
"loss": 0.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1642727553844452,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3836.3,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 2.249602543720191,
|
|
"grad_norm": 0.4326278702113856,
|
|
"learning_rate": 3.433700821567516e-05,
|
|
"loss": 0.1661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14071246981620789,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4719.1,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 2.257551669316375,
|
|
"grad_norm": 0.5549303616093954,
|
|
"learning_rate": 3.428161039186456e-05,
|
|
"loss": 0.1737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18680042028427124,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3738.0,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 2.2655007949125596,
|
|
"grad_norm": 0.8319518010193951,
|
|
"learning_rate": 3.42259880830038e-05,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17515277862548828,
|
|
"step": 1425,
|
|
"valid_targets_mean": 3830.8,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 2.273449920508744,
|
|
"grad_norm": 0.4319372849950979,
|
|
"learning_rate": 3.417014216339043e-05,
|
|
"loss": 0.181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18945015966892242,
|
|
"step": 1430,
|
|
"valid_targets_mean": 4180.6,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 2.2813990461049283,
|
|
"grad_norm": 0.4645369639888501,
|
|
"learning_rate": 3.4114073510836794e-05,
|
|
"loss": 0.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17300865054130554,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3716.4,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 2.289348171701113,
|
|
"grad_norm": 0.4624581509651132,
|
|
"learning_rate": 3.4057783006656274e-05,
|
|
"loss": 0.156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1803177446126938,
|
|
"step": 1440,
|
|
"valid_targets_mean": 3995.3,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 2.2972972972972974,
|
|
"grad_norm": 0.3990685274728258,
|
|
"learning_rate": 3.400127153564941e-05,
|
|
"loss": 0.1632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1542663425207138,
|
|
"step": 1445,
|
|
"valid_targets_mean": 4452.5,
|
|
"valid_targets_min": 1190
|
|
},
|
|
{
|
|
"epoch": 2.3052464228934815,
|
|
"grad_norm": 0.6187952481541217,
|
|
"learning_rate": 3.394453998609001e-05,
|
|
"loss": 0.1683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18409979343414307,
|
|
"step": 1450,
|
|
"valid_targets_mean": 2894.1,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 2.313195548489666,
|
|
"grad_norm": 0.49894187881909596,
|
|
"learning_rate": 3.388758924971117e-05,
|
|
"loss": 0.1743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18296250700950623,
|
|
"step": 1455,
|
|
"valid_targets_mean": 3416.9,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 2.3211446740858506,
|
|
"grad_norm": 0.5357473334854262,
|
|
"learning_rate": 3.3830420221691286e-05,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18557733297348022,
|
|
"step": 1460,
|
|
"valid_targets_mean": 3307.2,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 2.329093799682035,
|
|
"grad_norm": 0.3962622061174984,
|
|
"learning_rate": 3.377303380063995e-05,
|
|
"loss": 0.1686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14266249537467957,
|
|
"step": 1465,
|
|
"valid_targets_mean": 4369.6,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 2.337042925278219,
|
|
"grad_norm": 0.3785010810778657,
|
|
"learning_rate": 3.371543088858384e-05,
|
|
"loss": 0.1587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16492611169815063,
|
|
"step": 1470,
|
|
"valid_targets_mean": 5120.1,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 2.3449920508744038,
|
|
"grad_norm": 0.48881533304790475,
|
|
"learning_rate": 3.365761239095253e-05,
|
|
"loss": 0.167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16915087401866913,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3925.3,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 2.3529411764705883,
|
|
"grad_norm": 0.48720287373725557,
|
|
"learning_rate": 3.3599579216564314e-05,
|
|
"loss": 0.1604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17547118663787842,
|
|
"step": 1480,
|
|
"valid_targets_mean": 3980.4,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 2.360890302066773,
|
|
"grad_norm": 0.5303350453029894,
|
|
"learning_rate": 3.354133227761181e-05,
|
|
"loss": 0.1731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.172575443983078,
|
|
"step": 1485,
|
|
"valid_targets_mean": 4342.2,
|
|
"valid_targets_min": 1065
|
|
},
|
|
{
|
|
"epoch": 2.368839427662957,
|
|
"grad_norm": 0.5948787016804042,
|
|
"learning_rate": 3.3482872489647745e-05,
|
|
"loss": 0.1635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16050350666046143,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4073.2,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 2.3767885532591415,
|
|
"grad_norm": 0.4793825142162827,
|
|
"learning_rate": 3.342420077157047e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1763351708650589,
|
|
"step": 1495,
|
|
"valid_targets_mean": 3483.2,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 2.384737678855326,
|
|
"grad_norm": 0.4530776431878804,
|
|
"learning_rate": 3.336531804560957e-05,
|
|
"loss": 0.1526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14177027344703674,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3812.7,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 2.39268680445151,
|
|
"grad_norm": 0.48697483476860215,
|
|
"learning_rate": 3.330622523731136e-05,
|
|
"loss": 0.1662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15083934366703033,
|
|
"step": 1505,
|
|
"valid_targets_mean": 2873.0,
|
|
"valid_targets_min": 224
|
|
},
|
|
{
|
|
"epoch": 2.4006359300476947,
|
|
"grad_norm": 0.49665518829864685,
|
|
"learning_rate": 3.32469232755243e-05,
|
|
"loss": 0.1753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1856893002986908,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3365.2,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 2.4085850556438793,
|
|
"grad_norm": 0.45662940616841696,
|
|
"learning_rate": 3.318741309238444e-05,
|
|
"loss": 0.1697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18095698952674866,
|
|
"step": 1515,
|
|
"valid_targets_mean": 3817.8,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 2.4165341812400634,
|
|
"grad_norm": 0.4040458879915397,
|
|
"learning_rate": 3.312769562330075e-05,
|
|
"loss": 0.1761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1404886543750763,
|
|
"step": 1520,
|
|
"valid_targets_mean": 4547.6,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 2.424483306836248,
|
|
"grad_norm": 0.4028349948778861,
|
|
"learning_rate": 3.306777180694042e-05,
|
|
"loss": 0.1697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15197904407978058,
|
|
"step": 1525,
|
|
"valid_targets_mean": 4536.2,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 2.4324324324324325,
|
|
"grad_norm": 0.488660600681927,
|
|
"learning_rate": 3.30076425852141e-05,
|
|
"loss": 0.1594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18501275777816772,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3903.4,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 2.440381558028617,
|
|
"grad_norm": 0.4360649019785482,
|
|
"learning_rate": 3.294730890326109e-05,
|
|
"loss": 0.168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14416208863258362,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3269.1,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 2.448330683624801,
|
|
"grad_norm": 0.3987623937481391,
|
|
"learning_rate": 3.2886771709434504e-05,
|
|
"loss": 0.1559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1603170931339264,
|
|
"step": 1540,
|
|
"valid_targets_mean": 4707.6,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 2.4562798092209857,
|
|
"grad_norm": 0.5797017322011377,
|
|
"learning_rate": 3.282603195528635e-05,
|
|
"loss": 0.1523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17639771103858948,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3203.8,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 2.46422893481717,
|
|
"grad_norm": 0.44881682454570193,
|
|
"learning_rate": 3.276509059555257e-05,
|
|
"loss": 0.1704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15224912762641907,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4156.8,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 2.4721780604133547,
|
|
"grad_norm": 0.4299523456790547,
|
|
"learning_rate": 3.270394858813802e-05,
|
|
"loss": 0.1693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2051280438899994,
|
|
"step": 1555,
|
|
"valid_targets_mean": 4095.7,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 2.480127186009539,
|
|
"grad_norm": 0.46068757123222875,
|
|
"learning_rate": 3.264260689410147e-05,
|
|
"loss": 0.1661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15283241868019104,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3226.4,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 2.4880763116057234,
|
|
"grad_norm": 0.47403025787042874,
|
|
"learning_rate": 3.2581066477640435e-05,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15874740481376648,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3952.1,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 2.496025437201908,
|
|
"grad_norm": 0.4626317047147037,
|
|
"learning_rate": 3.251932830607603e-05,
|
|
"loss": 0.1556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16611367464065552,
|
|
"step": 1570,
|
|
"valid_targets_mean": 3792.7,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 2.503974562798092,
|
|
"grad_norm": 0.48592321395910304,
|
|
"learning_rate": 3.245739334983779e-05,
|
|
"loss": 0.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1791059672832489,
|
|
"step": 1575,
|
|
"valid_targets_mean": 3011.2,
|
|
"valid_targets_min": 305
|
|
},
|
|
{
|
|
"epoch": 2.5119236883942766,
|
|
"grad_norm": 0.48642110315097836,
|
|
"learning_rate": 3.239526258244842e-05,
|
|
"loss": 0.1622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18121963739395142,
|
|
"step": 1580,
|
|
"valid_targets_mean": 3705.4,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 2.519872813990461,
|
|
"grad_norm": 0.4193245496234458,
|
|
"learning_rate": 3.233293698050845e-05,
|
|
"loss": 0.1638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15967342257499695,
|
|
"step": 1585,
|
|
"valid_targets_mean": 4023.4,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 2.5278219395866453,
|
|
"grad_norm": 0.5640866498973993,
|
|
"learning_rate": 3.227041752368091e-05,
|
|
"loss": 0.165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17153598368167877,
|
|
"step": 1590,
|
|
"valid_targets_mean": 2566.8,
|
|
"valid_targets_min": 217
|
|
},
|
|
{
|
|
"epoch": 2.53577106518283,
|
|
"grad_norm": 0.41665001246607075,
|
|
"learning_rate": 3.220770519467597e-05,
|
|
"loss": 0.1653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1515938937664032,
|
|
"step": 1595,
|
|
"valid_targets_mean": 4122.9,
|
|
"valid_targets_min": 237
|
|
},
|
|
{
|
|
"epoch": 2.5437201907790143,
|
|
"grad_norm": 0.39614139545188226,
|
|
"learning_rate": 3.214480097923542e-05,
|
|
"loss": 0.1769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18220946192741394,
|
|
"step": 1600,
|
|
"valid_targets_mean": 5175.8,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 2.551669316375199,
|
|
"grad_norm": 0.43229505102472626,
|
|
"learning_rate": 3.208170586611721e-05,
|
|
"loss": 0.1601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16309629380702972,
|
|
"step": 1605,
|
|
"valid_targets_mean": 3784.4,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 2.559618441971383,
|
|
"grad_norm": 0.4243434467048783,
|
|
"learning_rate": 3.201842084707993e-05,
|
|
"loss": 0.1603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1563934087753296,
|
|
"step": 1610,
|
|
"valid_targets_mean": 4158.7,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 2.5675675675675675,
|
|
"grad_norm": 0.43578064511427506,
|
|
"learning_rate": 3.195494691686718e-05,
|
|
"loss": 0.1736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18895184993743896,
|
|
"step": 1615,
|
|
"valid_targets_mean": 4406.8,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 2.575516693163752,
|
|
"grad_norm": 0.4519962457233152,
|
|
"learning_rate": 3.189128507319197e-05,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17107641696929932,
|
|
"step": 1620,
|
|
"valid_targets_mean": 3569.7,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 2.5834658187599366,
|
|
"grad_norm": 0.5454247674899096,
|
|
"learning_rate": 3.182743631672102e-05,
|
|
"loss": 0.1639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17942318320274353,
|
|
"step": 1625,
|
|
"valid_targets_mean": 2771.4,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 2.5914149443561207,
|
|
"grad_norm": 0.4497356577426226,
|
|
"learning_rate": 3.1763401651059025e-05,
|
|
"loss": 0.1727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1738359034061432,
|
|
"step": 1630,
|
|
"valid_targets_mean": 3365.7,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 2.5993640699523053,
|
|
"grad_norm": 0.7952703957802248,
|
|
"learning_rate": 3.1699182082732886e-05,
|
|
"loss": 0.1632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18283095955848694,
|
|
"step": 1635,
|
|
"valid_targets_mean": 2729.1,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 2.60731319554849,
|
|
"grad_norm": 0.39447463509094877,
|
|
"learning_rate": 3.1634778621175905e-05,
|
|
"loss": 0.1552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14707916975021362,
|
|
"step": 1640,
|
|
"valid_targets_mean": 4424.3,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 2.615262321144674,
|
|
"grad_norm": 0.5826754203242515,
|
|
"learning_rate": 3.157019227871189e-05,
|
|
"loss": 0.1583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18842703104019165,
|
|
"step": 1645,
|
|
"valid_targets_mean": 2496.5,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 2.6232114467408585,
|
|
"grad_norm": 0.4569413845264777,
|
|
"learning_rate": 3.150542407053927e-05,
|
|
"loss": 0.1649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15572036802768707,
|
|
"step": 1650,
|
|
"valid_targets_mean": 3717.3,
|
|
"valid_targets_min": 206
|
|
},
|
|
{
|
|
"epoch": 2.631160572337043,
|
|
"grad_norm": 0.3769631110508782,
|
|
"learning_rate": 3.144047501471511e-05,
|
|
"loss": 0.1596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15287700295448303,
|
|
"step": 1655,
|
|
"valid_targets_mean": 4491.1,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 2.639109697933227,
|
|
"grad_norm": 0.3834879165232079,
|
|
"learning_rate": 3.1375346132139135e-05,
|
|
"loss": 0.1736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14792010188102722,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4377.1,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 2.6470588235294117,
|
|
"grad_norm": 0.40025749764942126,
|
|
"learning_rate": 3.131003844653766e-05,
|
|
"loss": 0.175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16569784283638,
|
|
"step": 1665,
|
|
"valid_targets_mean": 4147.7,
|
|
"valid_targets_min": 231
|
|
},
|
|
{
|
|
"epoch": 2.6550079491255962,
|
|
"grad_norm": 0.6151639522393043,
|
|
"learning_rate": 3.124455298444752e-05,
|
|
"loss": 0.1687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18059095740318298,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3476.2,
|
|
"valid_targets_min": 205
|
|
},
|
|
{
|
|
"epoch": 2.6629570747217803,
|
|
"grad_norm": 0.4732700696731575,
|
|
"learning_rate": 3.1178890775199925e-05,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1820976436138153,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3768.2,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 2.670906200317965,
|
|
"grad_norm": 0.44427708127372323,
|
|
"learning_rate": 3.1113052850904275e-05,
|
|
"loss": 0.1541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14810726046562195,
|
|
"step": 1680,
|
|
"valid_targets_mean": 3540.6,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 2.6788553259141494,
|
|
"grad_norm": 0.47123009484821854,
|
|
"learning_rate": 3.1047040246431936e-05,
|
|
"loss": 0.1646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17077761888504028,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3889.4,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 2.686804451510334,
|
|
"grad_norm": 0.4318567795086639,
|
|
"learning_rate": 3.098085399939998e-05,
|
|
"loss": 0.167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1917814016342163,
|
|
"step": 1690,
|
|
"valid_targets_mean": 4599.8,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 2.6947535771065185,
|
|
"grad_norm": 0.4721685189025569,
|
|
"learning_rate": 3.091449515015489e-05,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17242677509784698,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3185.9,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 2.7027027027027026,
|
|
"grad_norm": 0.4250655218343069,
|
|
"learning_rate": 3.084796474175618e-05,
|
|
"loss": 0.167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1556561142206192,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4078.5,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 2.710651828298887,
|
|
"grad_norm": 0.4447639847156236,
|
|
"learning_rate": 3.078126381996001e-05,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15216678380966187,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4152.2,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 2.7186009538950717,
|
|
"grad_norm": 0.4579147243616527,
|
|
"learning_rate": 3.071439343320274e-05,
|
|
"loss": 0.1683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1592155396938324,
|
|
"step": 1710,
|
|
"valid_targets_mean": 3433.2,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 2.726550079491256,
|
|
"grad_norm": 0.4407571500778382,
|
|
"learning_rate": 3.064735463258449e-05,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17721107602119446,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4108.9,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 2.7344992050874404,
|
|
"grad_norm": 0.40400452078934607,
|
|
"learning_rate": 3.0580148471852544e-05,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15850304067134857,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4096.9,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 2.742448330683625,
|
|
"grad_norm": 0.37213244183165023,
|
|
"learning_rate": 3.0512776007384882e-05,
|
|
"loss": 0.1601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14977069199085236,
|
|
"step": 1725,
|
|
"valid_targets_mean": 5645.9,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 2.750397456279809,
|
|
"grad_norm": 0.46209322559956156,
|
|
"learning_rate": 3.0445238298173492e-05,
|
|
"loss": 0.1703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1584990918636322,
|
|
"step": 1730,
|
|
"valid_targets_mean": 3129.1,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 2.7583465818759936,
|
|
"grad_norm": 0.4385757383753134,
|
|
"learning_rate": 3.0377536405807753e-05,
|
|
"loss": 0.1614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1700027585029602,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4126.4,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 2.766295707472178,
|
|
"grad_norm": 0.5770273602388422,
|
|
"learning_rate": 3.030967139445776e-05,
|
|
"loss": 0.163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1636781394481659,
|
|
"step": 1740,
|
|
"valid_targets_mean": 2366.4,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 2.7742448330683622,
|
|
"grad_norm": 0.46564547279320734,
|
|
"learning_rate": 3.0241644330857604e-05,
|
|
"loss": 0.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15485122799873352,
|
|
"step": 1745,
|
|
"valid_targets_mean": 3985.4,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 2.7821939586645468,
|
|
"grad_norm": 0.47547498624328677,
|
|
"learning_rate": 3.0173456284288565e-05,
|
|
"loss": 0.1675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19546955823898315,
|
|
"step": 1750,
|
|
"valid_targets_mean": 4155.4,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 2.7901430842607313,
|
|
"grad_norm": 0.5158856359689629,
|
|
"learning_rate": 3.010510832656233e-05,
|
|
"loss": 0.1525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.161908358335495,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3625.6,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 2.798092209856916,
|
|
"grad_norm": 0.4471113346837271,
|
|
"learning_rate": 3.0036601532004175e-05,
|
|
"loss": 0.1605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1580810248851776,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3677.0,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 2.8060413354531004,
|
|
"grad_norm": 0.4114471784729695,
|
|
"learning_rate": 2.996793697743601e-05,
|
|
"loss": 0.1736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1642129123210907,
|
|
"step": 1765,
|
|
"valid_targets_mean": 4178.1,
|
|
"valid_targets_min": 835
|
|
},
|
|
{
|
|
"epoch": 2.8139904610492845,
|
|
"grad_norm": 0.43668170542838514,
|
|
"learning_rate": 2.9899115742159512e-05,
|
|
"loss": 0.1751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16554221510887146,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4644.9,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 2.821939586645469,
|
|
"grad_norm": 0.5043488707813197,
|
|
"learning_rate": 2.9830138907939137e-05,
|
|
"loss": 0.1689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16675223410129547,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3199.9,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 2.8298887122416536,
|
|
"grad_norm": 0.39898904679988073,
|
|
"learning_rate": 2.976100755898511e-05,
|
|
"loss": 0.1586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16235503554344177,
|
|
"step": 1780,
|
|
"valid_targets_mean": 4958.9,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 2.8378378378378377,
|
|
"grad_norm": 0.5087094254868092,
|
|
"learning_rate": 2.9691722781936398e-05,
|
|
"loss": 0.1638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16532549262046814,
|
|
"step": 1785,
|
|
"valid_targets_mean": 2913.4,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 2.8457869634340223,
|
|
"grad_norm": 0.514390244173884,
|
|
"learning_rate": 2.962228566584362e-05,
|
|
"loss": 0.1911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18927565217018127,
|
|
"step": 1790,
|
|
"valid_targets_mean": 3447.2,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 2.853736089030207,
|
|
"grad_norm": 0.4250826109776861,
|
|
"learning_rate": 2.9552697302151937e-05,
|
|
"loss": 0.1572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15817922353744507,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4025.1,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 2.861685214626391,
|
|
"grad_norm": 0.4104299314914552,
|
|
"learning_rate": 2.9482958784683883e-05,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1422051340341568,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3494.4,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 2.8696343402225755,
|
|
"grad_norm": 0.41440908629922096,
|
|
"learning_rate": 2.9413071209622174e-05,
|
|
"loss": 0.1661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14636817574501038,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4556.1,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 2.87758346581876,
|
|
"grad_norm": 0.48515765879460354,
|
|
"learning_rate": 2.934303567549251e-05,
|
|
"loss": 0.1705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1678956151008606,
|
|
"step": 1810,
|
|
"valid_targets_mean": 3278.6,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 2.885532591414944,
|
|
"grad_norm": 0.514128862513967,
|
|
"learning_rate": 2.9272853283146255e-05,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16510531306266785,
|
|
"step": 1815,
|
|
"valid_targets_mean": 2977.9,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 2.8934817170111287,
|
|
"grad_norm": 0.5493626194079975,
|
|
"learning_rate": 2.9202525135743158e-05,
|
|
"loss": 0.191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22584211826324463,
|
|
"step": 1820,
|
|
"valid_targets_mean": 2734.2,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 2.901430842607313,
|
|
"grad_norm": 0.4370645426098783,
|
|
"learning_rate": 2.9132052338734033e-05,
|
|
"loss": 0.1589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16311654448509216,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3748.9,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 2.9093799682034978,
|
|
"grad_norm": 0.45957400888294386,
|
|
"learning_rate": 2.9061435999843354e-05,
|
|
"loss": 0.1657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15466034412384033,
|
|
"step": 1830,
|
|
"valid_targets_mean": 3323.8,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 2.9173290937996823,
|
|
"grad_norm": 0.39548994984246866,
|
|
"learning_rate": 2.8990677229051855e-05,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1540757715702057,
|
|
"step": 1835,
|
|
"valid_targets_mean": 4090.8,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 2.9252782193958664,
|
|
"grad_norm": 0.4905605606999161,
|
|
"learning_rate": 2.8919777138579074e-05,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16376616060733795,
|
|
"step": 1840,
|
|
"valid_targets_mean": 2992.8,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 2.933227344992051,
|
|
"grad_norm": 0.39954714024700316,
|
|
"learning_rate": 2.8848736842865893e-05,
|
|
"loss": 0.168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13295181095600128,
|
|
"step": 1845,
|
|
"valid_targets_mean": 3622.3,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 2.9411764705882355,
|
|
"grad_norm": 0.4109518903334489,
|
|
"learning_rate": 2.8777557458556993e-05,
|
|
"loss": 0.1554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15689434111118317,
|
|
"step": 1850,
|
|
"valid_targets_mean": 4599.6,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 2.9491255961844196,
|
|
"grad_norm": 0.4134036810952952,
|
|
"learning_rate": 2.870624010448332e-05,
|
|
"loss": 0.189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16236630082130432,
|
|
"step": 1855,
|
|
"valid_targets_mean": 4970.9,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 2.957074721780604,
|
|
"grad_norm": 0.44310700881675275,
|
|
"learning_rate": 2.8634785901644497e-05,
|
|
"loss": 0.174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18711400032043457,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4005.5,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 2.9650238473767887,
|
|
"grad_norm": 0.3717450015352577,
|
|
"learning_rate": 2.856319597319119e-05,
|
|
"loss": 0.1531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14527782797813416,
|
|
"step": 1865,
|
|
"valid_targets_mean": 4636.0,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 2.972972972972973,
|
|
"grad_norm": 0.4489188113660503,
|
|
"learning_rate": 2.849147144440747e-05,
|
|
"loss": 0.1744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18267220258712769,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3598.4,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 2.9809220985691574,
|
|
"grad_norm": 0.463504579635663,
|
|
"learning_rate": 2.8419613442693127e-05,
|
|
"loss": 0.1825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16523773968219757,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3690.0,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 2.988871224165342,
|
|
"grad_norm": 0.42098915439827383,
|
|
"learning_rate": 2.834762309754593e-05,
|
|
"loss": 0.1758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15425992012023926,
|
|
"step": 1880,
|
|
"valid_targets_mean": 3851.8,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 2.996820349761526,
|
|
"grad_norm": 0.44087606889969083,
|
|
"learning_rate": 2.8275501540543877e-05,
|
|
"loss": 0.177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1857764720916748,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4492.8,
|
|
"valid_targets_min": 303
|
|
},
|
|
{
|
|
"epoch": 3.0047694753577106,
|
|
"grad_norm": 0.40019519904714795,
|
|
"learning_rate": 2.8203249905327434e-05,
|
|
"loss": 0.1439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14805257320404053,
|
|
"step": 1890,
|
|
"valid_targets_mean": 4000.0,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 3.012718600953895,
|
|
"grad_norm": 0.3925087002790559,
|
|
"learning_rate": 2.81308693275817e-05,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15407413244247437,
|
|
"step": 1895,
|
|
"valid_targets_mean": 5172.0,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 3.0206677265500796,
|
|
"grad_norm": 0.46275383155041416,
|
|
"learning_rate": 2.8058360945018518e-05,
|
|
"loss": 0.1496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13890619575977325,
|
|
"step": 1900,
|
|
"valid_targets_mean": 4028.7,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 3.0286168521462637,
|
|
"grad_norm": 0.4551559001740428,
|
|
"learning_rate": 2.7985725897358665e-05,
|
|
"loss": 0.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15436714887619019,
|
|
"step": 1905,
|
|
"valid_targets_mean": 3797.8,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 3.0365659777424483,
|
|
"grad_norm": 0.4842383814763534,
|
|
"learning_rate": 2.791296532631389e-05,
|
|
"loss": 0.1553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14294975996017456,
|
|
"step": 1910,
|
|
"valid_targets_mean": 3047.1,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 3.044515103338633,
|
|
"grad_norm": 0.5397174824822419,
|
|
"learning_rate": 2.7840080375568964e-05,
|
|
"loss": 0.1452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1518700122833252,
|
|
"step": 1915,
|
|
"valid_targets_mean": 3124.6,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 3.0524642289348174,
|
|
"grad_norm": 0.5358906602930991,
|
|
"learning_rate": 2.7767072190763733e-05,
|
|
"loss": 0.1643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1446910947561264,
|
|
"step": 1920,
|
|
"valid_targets_mean": 2824.1,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 3.0604133545310015,
|
|
"grad_norm": 0.40186405732354413,
|
|
"learning_rate": 2.7693941919475076e-05,
|
|
"loss": 0.1517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1320028454065323,
|
|
"step": 1925,
|
|
"valid_targets_mean": 4311.8,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 3.068362480127186,
|
|
"grad_norm": 0.5420721530535801,
|
|
"learning_rate": 2.7620690711198906e-05,
|
|
"loss": 0.1525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16763970255851746,
|
|
"step": 1930,
|
|
"valid_targets_mean": 3246.2,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 3.0763116057233706,
|
|
"grad_norm": 0.46354880795162495,
|
|
"learning_rate": 2.7547319717332066e-05,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15304705500602722,
|
|
"step": 1935,
|
|
"valid_targets_mean": 3665.1,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 3.0842607313195547,
|
|
"grad_norm": 0.4230495218281992,
|
|
"learning_rate": 2.7473830091154243e-05,
|
|
"loss": 0.1475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1243860200047493,
|
|
"step": 1940,
|
|
"valid_targets_mean": 3626.8,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 3.0922098569157392,
|
|
"grad_norm": 0.4361635236789642,
|
|
"learning_rate": 2.7400222987809856e-05,
|
|
"loss": 0.1448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12188908457756042,
|
|
"step": 1945,
|
|
"valid_targets_mean": 4010.5,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 3.100158982511924,
|
|
"grad_norm": 0.4439974035967091,
|
|
"learning_rate": 2.7326499564289867e-05,
|
|
"loss": 0.1494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13789242506027222,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3711.9,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 3.108108108108108,
|
|
"grad_norm": 0.41877176013375694,
|
|
"learning_rate": 2.725266097941363e-05,
|
|
"loss": 0.1395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1316269338130951,
|
|
"step": 1955,
|
|
"valid_targets_mean": 4314.9,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 3.1160572337042924,
|
|
"grad_norm": 0.527858481472521,
|
|
"learning_rate": 2.717870839381066e-05,
|
|
"loss": 0.1489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15569709241390228,
|
|
"step": 1960,
|
|
"valid_targets_mean": 3514.8,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 3.124006359300477,
|
|
"grad_norm": 0.3850217928191781,
|
|
"learning_rate": 2.7104642969902357e-05,
|
|
"loss": 0.1367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1267361342906952,
|
|
"step": 1965,
|
|
"valid_targets_mean": 4886.9,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 3.1319554848966615,
|
|
"grad_norm": 0.44634061646307166,
|
|
"learning_rate": 2.7030465871883812e-05,
|
|
"loss": 0.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15789856016635895,
|
|
"step": 1970,
|
|
"valid_targets_mean": 4405.6,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 3.1399046104928456,
|
|
"grad_norm": 0.44800018132825026,
|
|
"learning_rate": 2.6956178265705434e-05,
|
|
"loss": 0.1458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1587265133857727,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4086.9,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 3.14785373608903,
|
|
"grad_norm": 0.4616270378131397,
|
|
"learning_rate": 2.688178131905465e-05,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1500604748725891,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3678.9,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 3.1558028616852147,
|
|
"grad_norm": 0.5790165944659751,
|
|
"learning_rate": 2.680727620133757e-05,
|
|
"loss": 0.1504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15115083754062653,
|
|
"step": 1985,
|
|
"valid_targets_mean": 2691.0,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 3.1637519872813993,
|
|
"grad_norm": 0.4845439722921149,
|
|
"learning_rate": 2.673266408366057e-05,
|
|
"loss": 0.1364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12436607480049133,
|
|
"step": 1990,
|
|
"valid_targets_mean": 3094.4,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 3.1717011128775834,
|
|
"grad_norm": 0.49364688977299304,
|
|
"learning_rate": 2.6657946138811915e-05,
|
|
"loss": 0.1452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1511753499507904,
|
|
"step": 1995,
|
|
"valid_targets_mean": 3998.1,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 3.179650238473768,
|
|
"grad_norm": 0.488321490398884,
|
|
"learning_rate": 2.6583123541243302e-05,
|
|
"loss": 0.1433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17050573229789734,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3620.8,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 3.1875993640699525,
|
|
"grad_norm": 0.40982555411387517,
|
|
"learning_rate": 2.6508197467051406e-05,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12908829748630524,
|
|
"step": 2005,
|
|
"valid_targets_mean": 4178.4,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 3.1955484896661366,
|
|
"grad_norm": 0.40055451862255126,
|
|
"learning_rate": 2.6433169093959405e-05,
|
|
"loss": 0.1441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14281156659126282,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4660.8,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 3.203497615262321,
|
|
"grad_norm": 0.46116477905857267,
|
|
"learning_rate": 2.6358039601298454e-05,
|
|
"loss": 0.1515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13968366384506226,
|
|
"step": 2015,
|
|
"valid_targets_mean": 3884.2,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 3.2114467408585057,
|
|
"grad_norm": 0.43289954051552515,
|
|
"learning_rate": 2.6282810169989158e-05,
|
|
"loss": 0.1531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1596599519252777,
|
|
"step": 2020,
|
|
"valid_targets_mean": 4390.9,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 3.21939586645469,
|
|
"grad_norm": 0.5078592423856673,
|
|
"learning_rate": 2.6207481982523e-05,
|
|
"loss": 0.1546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1573963314294815,
|
|
"step": 2025,
|
|
"valid_targets_mean": 3303.6,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 3.2273449920508743,
|
|
"grad_norm": 0.5091146694996267,
|
|
"learning_rate": 2.6132056222943757e-05,
|
|
"loss": 0.154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1862080991268158,
|
|
"step": 2030,
|
|
"valid_targets_mean": 3232.9,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 3.235294117647059,
|
|
"grad_norm": 0.5238546808532447,
|
|
"learning_rate": 2.6056534076828883e-05,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18422989547252655,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3585.9,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 3.2432432432432434,
|
|
"grad_norm": 0.4363636469238868,
|
|
"learning_rate": 2.598091673127091e-05,
|
|
"loss": 0.1489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14309856295585632,
|
|
"step": 2040,
|
|
"valid_targets_mean": 3745.1,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 3.2511923688394275,
|
|
"grad_norm": 0.4339103556613612,
|
|
"learning_rate": 2.5905205374858728e-05,
|
|
"loss": 0.1526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14386232197284698,
|
|
"step": 2045,
|
|
"valid_targets_mean": 4056.8,
|
|
"valid_targets_min": 1465
|
|
},
|
|
{
|
|
"epoch": 3.259141494435612,
|
|
"grad_norm": 0.42741463506623106,
|
|
"learning_rate": 2.5829401197658946e-05,
|
|
"loss": 0.1618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20502996444702148,
|
|
"step": 2050,
|
|
"valid_targets_mean": 5179.3,
|
|
"valid_targets_min": 1065
|
|
},
|
|
{
|
|
"epoch": 3.2670906200317966,
|
|
"grad_norm": 0.43626977002875506,
|
|
"learning_rate": 2.5753505391197173e-05,
|
|
"loss": 0.1436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.148350328207016,
|
|
"step": 2055,
|
|
"valid_targets_mean": 4743.6,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 3.275039745627981,
|
|
"grad_norm": 0.46840087217117315,
|
|
"learning_rate": 2.5677519148439286e-05,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15138383209705353,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3961.1,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 3.2829888712241653,
|
|
"grad_norm": 0.507365313224075,
|
|
"learning_rate": 2.56014436637727e-05,
|
|
"loss": 0.1432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13263681530952454,
|
|
"step": 2065,
|
|
"valid_targets_mean": 2687.8,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 3.29093799682035,
|
|
"grad_norm": 0.4749587295324443,
|
|
"learning_rate": 2.5525280132987544e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13461729884147644,
|
|
"step": 2070,
|
|
"valid_targets_mean": 3908.1,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 3.2988871224165344,
|
|
"grad_norm": 0.5571544935176302,
|
|
"learning_rate": 2.544902975325793e-05,
|
|
"loss": 0.1547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15287074446678162,
|
|
"step": 2075,
|
|
"valid_targets_mean": 2425.0,
|
|
"valid_targets_min": 205
|
|
},
|
|
{
|
|
"epoch": 3.3068362480127185,
|
|
"grad_norm": 0.45571560695399776,
|
|
"learning_rate": 2.5372693723123075e-05,
|
|
"loss": 0.1445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1337605118751526,
|
|
"step": 2080,
|
|
"valid_targets_mean": 3706.5,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 3.314785373608903,
|
|
"grad_norm": 0.4841238396850524,
|
|
"learning_rate": 2.5296273242468514e-05,
|
|
"loss": 0.1432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13751645386219025,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3224.1,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 3.3227344992050876,
|
|
"grad_norm": 0.5882557810118016,
|
|
"learning_rate": 2.5219769512507202e-05,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15059013664722443,
|
|
"step": 2090,
|
|
"valid_targets_mean": 4222.4,
|
|
"valid_targets_min": 1837
|
|
},
|
|
{
|
|
"epoch": 3.3306836248012717,
|
|
"grad_norm": 0.4355529800792949,
|
|
"learning_rate": 2.5143183735760638e-05,
|
|
"loss": 0.1401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13719777762889862,
|
|
"step": 2095,
|
|
"valid_targets_mean": 5038.6,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 3.338632750397456,
|
|
"grad_norm": 0.48366910917541367,
|
|
"learning_rate": 2.5066517116039978e-05,
|
|
"loss": 0.1484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14523795247077942,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3369.9,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 3.3465818759936408,
|
|
"grad_norm": 0.432180761274418,
|
|
"learning_rate": 2.4989770858427113e-05,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14896121621131897,
|
|
"step": 2105,
|
|
"valid_targets_mean": 4274.6,
|
|
"valid_targets_min": 1463
|
|
},
|
|
{
|
|
"epoch": 3.3545310015898253,
|
|
"grad_norm": 0.4673043662584066,
|
|
"learning_rate": 2.4912946169255722e-05,
|
|
"loss": 0.1482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14605014026165009,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3783.2,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 3.3624801271860094,
|
|
"grad_norm": 0.4743882917216998,
|
|
"learning_rate": 2.4836044256092288e-05,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1554591804742813,
|
|
"step": 2115,
|
|
"valid_targets_mean": 3935.7,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 3.370429252782194,
|
|
"grad_norm": 0.4815307642109087,
|
|
"learning_rate": 2.475906632771714e-05,
|
|
"loss": 0.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14014503359794617,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3164.2,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 3.3783783783783785,
|
|
"grad_norm": 0.40589792615723674,
|
|
"learning_rate": 2.468201359410548e-05,
|
|
"loss": 0.1402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14491534233093262,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4419.9,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 3.3863275039745626,
|
|
"grad_norm": 0.44260667005600757,
|
|
"learning_rate": 2.4604887266408304e-05,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1457509994506836,
|
|
"step": 2130,
|
|
"valid_targets_mean": 4288.4,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 3.394276629570747,
|
|
"grad_norm": 0.4581973653604292,
|
|
"learning_rate": 2.4527688556933402e-05,
|
|
"loss": 0.1517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15415199100971222,
|
|
"step": 2135,
|
|
"valid_targets_mean": 4045.8,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 3.4022257551669317,
|
|
"grad_norm": 0.7140350335944461,
|
|
"learning_rate": 2.445041867912629e-05,
|
|
"loss": 0.1537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15349167585372925,
|
|
"step": 2140,
|
|
"valid_targets_mean": 2618.8,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 3.4101748807631163,
|
|
"grad_norm": 0.5034625114730497,
|
|
"learning_rate": 2.4373078847551154e-05,
|
|
"loss": 0.1456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14303407073020935,
|
|
"step": 2145,
|
|
"valid_targets_mean": 3076.3,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 3.4181240063593004,
|
|
"grad_norm": 0.48632708952047105,
|
|
"learning_rate": 2.4295670277871736e-05,
|
|
"loss": 0.1495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1465069055557251,
|
|
"step": 2150,
|
|
"valid_targets_mean": 3372.1,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 3.426073131955485,
|
|
"grad_norm": 0.4937147781905013,
|
|
"learning_rate": 2.4218194186832237e-05,
|
|
"loss": 0.1492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17899924516677856,
|
|
"step": 2155,
|
|
"valid_targets_mean": 3543.7,
|
|
"valid_targets_min": 204
|
|
},
|
|
{
|
|
"epoch": 3.4340222575516695,
|
|
"grad_norm": 0.446614778026149,
|
|
"learning_rate": 2.4140651792238193e-05,
|
|
"loss": 0.1502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16787397861480713,
|
|
"step": 2160,
|
|
"valid_targets_mean": 4232.8,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 3.4419713831478536,
|
|
"grad_norm": 0.5049089497785076,
|
|
"learning_rate": 2.4063044312937332e-05,
|
|
"loss": 0.1635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1745222806930542,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3461.6,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 3.449920508744038,
|
|
"grad_norm": 0.41955260576113246,
|
|
"learning_rate": 2.3985372968800407e-05,
|
|
"loss": 0.1517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1277008205652237,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3886.0,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 3.4578696343402227,
|
|
"grad_norm": 0.44853544938510986,
|
|
"learning_rate": 2.3907638980702043e-05,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13143455982208252,
|
|
"step": 2175,
|
|
"valid_targets_mean": 2990.2,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 3.4658187599364068,
|
|
"grad_norm": 0.5052915184377108,
|
|
"learning_rate": 2.382984357050151e-05,
|
|
"loss": 0.1592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1574200838804245,
|
|
"step": 2180,
|
|
"valid_targets_mean": 3370.8,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 3.4737678855325913,
|
|
"grad_norm": 0.44259229283391566,
|
|
"learning_rate": 2.3751987961023545e-05,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13950997591018677,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3953.6,
|
|
"valid_targets_min": 220
|
|
},
|
|
{
|
|
"epoch": 3.481717011128776,
|
|
"grad_norm": 0.48788565916051185,
|
|
"learning_rate": 2.3674073376039152e-05,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1728115677833557,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3669.0,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 3.4896661367249604,
|
|
"grad_norm": 0.4492760385555854,
|
|
"learning_rate": 2.359610104024631e-05,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15678560733795166,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3583.8,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 3.4976152623211445,
|
|
"grad_norm": 0.39986255476691307,
|
|
"learning_rate": 2.3518072179250753e-05,
|
|
"loss": 0.1462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14417177438735962,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4219.2,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 3.505564387917329,
|
|
"grad_norm": 0.384596360059307,
|
|
"learning_rate": 2.343998801954673e-05,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11725412309169769,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4539.5,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 3.5135135135135136,
|
|
"grad_norm": 0.3726022277230603,
|
|
"learning_rate": 2.3361849788497666e-05,
|
|
"loss": 0.1467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12121937423944473,
|
|
"step": 2210,
|
|
"valid_targets_mean": 4714.5,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 3.521462639109698,
|
|
"grad_norm": 0.426941312289138,
|
|
"learning_rate": 2.3283658714316935e-05,
|
|
"loss": 0.1498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13825076818466187,
|
|
"step": 2215,
|
|
"valid_targets_mean": 4406.4,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 3.5294117647058822,
|
|
"grad_norm": 0.4301504339821543,
|
|
"learning_rate": 2.320541602604851e-05,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13971027731895447,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3877.9,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 3.537360890302067,
|
|
"grad_norm": 0.5113573188300341,
|
|
"learning_rate": 2.3127122953547663e-05,
|
|
"loss": 0.15,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1780821532011032,
|
|
"step": 2225,
|
|
"valid_targets_mean": 3525.3,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 3.5453100158982513,
|
|
"grad_norm": 0.5063219756936636,
|
|
"learning_rate": 2.3048780727461627e-05,
|
|
"loss": 0.1625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16249513626098633,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3025.3,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 3.5532591414944354,
|
|
"grad_norm": 0.4792775762459798,
|
|
"learning_rate": 2.2970390579210246e-05,
|
|
"loss": 0.1414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1491563469171524,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3644.2,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 3.56120826709062,
|
|
"grad_norm": 0.4620771758711201,
|
|
"learning_rate": 2.2891953740966643e-05,
|
|
"loss": 0.1509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16006189584732056,
|
|
"step": 2240,
|
|
"valid_targets_mean": 4079.1,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 3.5691573926868045,
|
|
"grad_norm": 0.44785087939845997,
|
|
"learning_rate": 2.281347144563782e-05,
|
|
"loss": 0.1515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1455570012331009,
|
|
"step": 2245,
|
|
"valid_targets_mean": 4200.4,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 3.5771065182829886,
|
|
"grad_norm": 0.5185565156273501,
|
|
"learning_rate": 2.273494492684531e-05,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16593654453754425,
|
|
"step": 2250,
|
|
"valid_targets_mean": 3104.9,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 3.585055643879173,
|
|
"grad_norm": 0.4172228609919065,
|
|
"learning_rate": 2.265637541890577e-05,
|
|
"loss": 0.1484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1278490126132965,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3515.1,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 3.5930047694753577,
|
|
"grad_norm": 0.4286795679059123,
|
|
"learning_rate": 2.2577764156811563e-05,
|
|
"loss": 0.1455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14926648139953613,
|
|
"step": 2260,
|
|
"valid_targets_mean": 4430.3,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 3.6009538950715423,
|
|
"grad_norm": 0.560172122368529,
|
|
"learning_rate": 2.2499112376211373e-05,
|
|
"loss": 0.1631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18510201573371887,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3477.6,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 3.6089030206677264,
|
|
"grad_norm": 0.43226594819787095,
|
|
"learning_rate": 2.2420421313390776e-05,
|
|
"loss": 0.1498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14725884795188904,
|
|
"step": 2270,
|
|
"valid_targets_mean": 4508.8,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 3.616852146263911,
|
|
"grad_norm": 0.4437643653037656,
|
|
"learning_rate": 2.234169220525282e-05,
|
|
"loss": 0.1482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13907790184020996,
|
|
"step": 2275,
|
|
"valid_targets_mean": 4162.6,
|
|
"valid_targets_min": 1468
|
|
},
|
|
{
|
|
"epoch": 3.6248012718600955,
|
|
"grad_norm": 0.4837609246828785,
|
|
"learning_rate": 2.226292628929853e-05,
|
|
"loss": 0.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14810554683208466,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3524.7,
|
|
"valid_targets_min": 947
|
|
},
|
|
{
|
|
"epoch": 3.63275039745628,
|
|
"grad_norm": 0.4869827211098654,
|
|
"learning_rate": 2.2184124803607525e-05,
|
|
"loss": 0.1466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15223252773284912,
|
|
"step": 2285,
|
|
"valid_targets_mean": 3463.3,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 3.640699523052464,
|
|
"grad_norm": 0.4108383751418014,
|
|
"learning_rate": 2.210528898681851e-05,
|
|
"loss": 0.1539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.142578586935997,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4477.9,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 3.6486486486486487,
|
|
"grad_norm": 0.4566807244274852,
|
|
"learning_rate": 2.2026420078109825e-05,
|
|
"loss": 0.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13954466581344604,
|
|
"step": 2295,
|
|
"valid_targets_mean": 4329.2,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 3.6565977742448332,
|
|
"grad_norm": 0.4162315487171508,
|
|
"learning_rate": 2.1947519317179972e-05,
|
|
"loss": 0.1544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11526475101709366,
|
|
"step": 2300,
|
|
"valid_targets_mean": 4507.4,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 3.6645468998410173,
|
|
"grad_norm": 0.4900587733241069,
|
|
"learning_rate": 2.1868587944228118e-05,
|
|
"loss": 0.1595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15211628377437592,
|
|
"step": 2305,
|
|
"valid_targets_mean": 3048.8,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 3.672496025437202,
|
|
"grad_norm": 0.5187421796257302,
|
|
"learning_rate": 2.1789627199934588e-05,
|
|
"loss": 0.1605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1537800431251526,
|
|
"step": 2310,
|
|
"valid_targets_mean": 3951.4,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 3.6804451510333864,
|
|
"grad_norm": 0.4523155098757611,
|
|
"learning_rate": 2.1710638325441408e-05,
|
|
"loss": 0.1445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15439185500144958,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3848.8,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 3.6883942766295705,
|
|
"grad_norm": 0.5113186455328838,
|
|
"learning_rate": 2.1631622562332744e-05,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12993109226226807,
|
|
"step": 2320,
|
|
"valid_targets_mean": 3168.6,
|
|
"valid_targets_min": 203
|
|
},
|
|
{
|
|
"epoch": 3.696343402225755,
|
|
"grad_norm": 0.520264446513899,
|
|
"learning_rate": 2.155258115261542e-05,
|
|
"loss": 0.1453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1672590672969818,
|
|
"step": 2325,
|
|
"valid_targets_mean": 3197.3,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 3.7042925278219396,
|
|
"grad_norm": 0.397858988377642,
|
|
"learning_rate": 2.1473515338699383e-05,
|
|
"loss": 0.146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13418936729431152,
|
|
"step": 2330,
|
|
"valid_targets_mean": 4454.8,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 3.7122416534181237,
|
|
"grad_norm": 0.47038464892938564,
|
|
"learning_rate": 2.1394426363378186e-05,
|
|
"loss": 0.1567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14343999326229095,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3456.6,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 3.7201907790143083,
|
|
"grad_norm": 0.4590271298948369,
|
|
"learning_rate": 2.1315315469809426e-05,
|
|
"loss": 0.1465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15839475393295288,
|
|
"step": 2340,
|
|
"valid_targets_mean": 3980.2,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 3.728139904610493,
|
|
"grad_norm": 0.526091163735105,
|
|
"learning_rate": 2.1236183901495236e-05,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21831142902374268,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3343.4,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 3.7360890302066774,
|
|
"grad_norm": 0.4487683579294528,
|
|
"learning_rate": 2.1157032902262716e-05,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15747785568237305,
|
|
"step": 2350,
|
|
"valid_targets_mean": 4185.1,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 3.744038155802862,
|
|
"grad_norm": 0.41019128305055114,
|
|
"learning_rate": 2.1077863716244388e-05,
|
|
"loss": 0.1415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12705016136169434,
|
|
"step": 2355,
|
|
"valid_targets_mean": 3949.4,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 3.751987281399046,
|
|
"grad_norm": 0.446236415640611,
|
|
"learning_rate": 2.099867758785866e-05,
|
|
"loss": 0.1476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14645332098007202,
|
|
"step": 2360,
|
|
"valid_targets_mean": 3793.7,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 3.7599364069952306,
|
|
"grad_norm": 0.5374551107704829,
|
|
"learning_rate": 2.091947576179023e-05,
|
|
"loss": 0.1483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1566208451986313,
|
|
"step": 2365,
|
|
"valid_targets_mean": 3043.1,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 3.767885532591415,
|
|
"grad_norm": 0.48392227359646184,
|
|
"learning_rate": 2.084025948297055e-05,
|
|
"loss": 0.1393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14955100417137146,
|
|
"step": 2370,
|
|
"valid_targets_mean": 3625.3,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 3.7758346581875992,
|
|
"grad_norm": 0.4196458061277742,
|
|
"learning_rate": 2.0761029996558233e-05,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13161495327949524,
|
|
"step": 2375,
|
|
"valid_targets_mean": 3986.8,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 3.7837837837837838,
|
|
"grad_norm": 0.40047392629967743,
|
|
"learning_rate": 2.068178854791951e-05,
|
|
"loss": 0.1446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13223011791706085,
|
|
"step": 2380,
|
|
"valid_targets_mean": 4361.8,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 3.7917329093799683,
|
|
"grad_norm": 0.3964855035870811,
|
|
"learning_rate": 2.0602536382608638e-05,
|
|
"loss": 0.1452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12523694336414337,
|
|
"step": 2385,
|
|
"valid_targets_mean": 4463.9,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 3.7996820349761524,
|
|
"grad_norm": 0.4585272620018194,
|
|
"learning_rate": 2.0523274746348315e-05,
|
|
"loss": 0.1525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16742074489593506,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4025.6,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 3.807631160572337,
|
|
"grad_norm": 0.5154964949124577,
|
|
"learning_rate": 2.0444004885010114e-05,
|
|
"loss": 0.1542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14283734560012817,
|
|
"step": 2395,
|
|
"valid_targets_mean": 2625.4,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 3.8155802861685215,
|
|
"grad_norm": 0.40675149816537554,
|
|
"learning_rate": 2.0364728044594897e-05,
|
|
"loss": 0.1434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13937920331954956,
|
|
"step": 2400,
|
|
"valid_targets_mean": 4620.8,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 3.8235294117647056,
|
|
"grad_norm": 0.44791101289332774,
|
|
"learning_rate": 2.0285445471213218e-05,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11926200985908508,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3797.2,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 3.83147853736089,
|
|
"grad_norm": 0.4690049435912802,
|
|
"learning_rate": 2.020615841106575e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.158674418926239,
|
|
"step": 2410,
|
|
"valid_targets_mean": 3615.0,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 3.8394276629570747,
|
|
"grad_norm": 0.4778625950349475,
|
|
"learning_rate": 2.0126868110423685e-05,
|
|
"loss": 0.1582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1569017916917801,
|
|
"step": 2415,
|
|
"valid_targets_mean": 3607.1,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 3.8473767885532593,
|
|
"grad_norm": 0.5094399368297139,
|
|
"learning_rate": 2.0047575815609166e-05,
|
|
"loss": 0.1508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13234496116638184,
|
|
"step": 2420,
|
|
"valid_targets_mean": 4075.4,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 3.855325914149444,
|
|
"grad_norm": 0.4474580576793741,
|
|
"learning_rate": 1.996828277297566e-05,
|
|
"loss": 0.1469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1673652082681656,
|
|
"step": 2425,
|
|
"valid_targets_mean": 3956.6,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 3.863275039745628,
|
|
"grad_norm": 0.47557411799173016,
|
|
"learning_rate": 1.988899022888841e-05,
|
|
"loss": 0.1469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14662069082260132,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3376.6,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 3.8712241653418125,
|
|
"grad_norm": 0.47253918336301953,
|
|
"learning_rate": 1.98096994297048e-05,
|
|
"loss": 0.156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17109301686286926,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4009.4,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 3.879173290937997,
|
|
"grad_norm": 0.4811824636612629,
|
|
"learning_rate": 1.9730411621754798e-05,
|
|
"loss": 0.1507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16535258293151855,
|
|
"step": 2440,
|
|
"valid_targets_mean": 4401.9,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 3.887122416534181,
|
|
"grad_norm": 0.47574278128170894,
|
|
"learning_rate": 1.9651128051321376e-05,
|
|
"loss": 0.1568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16865494847297668,
|
|
"step": 2445,
|
|
"valid_targets_mean": 3826.2,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 3.8950715421303657,
|
|
"grad_norm": 0.4927720545302061,
|
|
"learning_rate": 1.9571849964620858e-05,
|
|
"loss": 0.149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1311875879764557,
|
|
"step": 2450,
|
|
"valid_targets_mean": 2777.1,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 3.90302066772655,
|
|
"grad_norm": 0.4520272890103647,
|
|
"learning_rate": 1.949257860778339e-05,
|
|
"loss": 0.1509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19364655017852783,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4212.9,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 3.9109697933227343,
|
|
"grad_norm": 0.48056289266042657,
|
|
"learning_rate": 1.9413315226833343e-05,
|
|
"loss": 0.1507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1471574306488037,
|
|
"step": 2460,
|
|
"valid_targets_mean": 3280.9,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 3.918918918918919,
|
|
"grad_norm": 0.44927106422669333,
|
|
"learning_rate": 1.9334061067669725e-05,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15913772583007812,
|
|
"step": 2465,
|
|
"valid_targets_mean": 4163.8,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 3.9268680445151034,
|
|
"grad_norm": 0.4047406712647876,
|
|
"learning_rate": 1.9254817376046556e-05,
|
|
"loss": 0.1628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14912940561771393,
|
|
"step": 2470,
|
|
"valid_targets_mean": 4459.5,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 3.9348171701112875,
|
|
"grad_norm": 0.48354323358352247,
|
|
"learning_rate": 1.9175585397553368e-05,
|
|
"loss": 0.1459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14772868156433105,
|
|
"step": 2475,
|
|
"valid_targets_mean": 3460.9,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 3.942766295707472,
|
|
"grad_norm": 0.45165188294145747,
|
|
"learning_rate": 1.909636637759554e-05,
|
|
"loss": 0.1585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14915470778942108,
|
|
"step": 2480,
|
|
"valid_targets_mean": 3941.9,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 3.9507154213036566,
|
|
"grad_norm": 0.675364427291404,
|
|
"learning_rate": 1.9017161561374787e-05,
|
|
"loss": 0.1375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14537788927555084,
|
|
"step": 2485,
|
|
"valid_targets_mean": 3596.5,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 3.958664546899841,
|
|
"grad_norm": 0.4529521240142203,
|
|
"learning_rate": 1.893797219386957e-05,
|
|
"loss": 0.1561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14005938172340393,
|
|
"step": 2490,
|
|
"valid_targets_mean": 3370.2,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 3.9666136724960257,
|
|
"grad_norm": 0.43898112823019947,
|
|
"learning_rate": 1.885879951981549e-05,
|
|
"loss": 0.143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1563473492860794,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4142.4,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 3.97456279809221,
|
|
"grad_norm": 0.4632944157750017,
|
|
"learning_rate": 1.877964478368577e-05,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15442904829978943,
|
|
"step": 2500,
|
|
"valid_targets_mean": 3855.9,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 3.9825119236883944,
|
|
"grad_norm": 0.4422485867941161,
|
|
"learning_rate": 1.8700509229671696e-05,
|
|
"loss": 0.1462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1370777189731598,
|
|
"step": 2505,
|
|
"valid_targets_mean": 3780.2,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 3.990461049284579,
|
|
"grad_norm": 0.40877054927258344,
|
|
"learning_rate": 1.8621394101663003e-05,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1423787623643875,
|
|
"step": 2510,
|
|
"valid_targets_mean": 4734.8,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 3.998410174880763,
|
|
"grad_norm": 0.49482609642383296,
|
|
"learning_rate": 1.854230064322837e-05,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15848851203918457,
|
|
"step": 2515,
|
|
"valid_targets_mean": 3223.8,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 4.006359300476947,
|
|
"grad_norm": 0.3675406957911613,
|
|
"learning_rate": 1.8463230097595887e-05,
|
|
"loss": 0.1376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13189461827278137,
|
|
"step": 2520,
|
|
"valid_targets_mean": 4952.8,
|
|
"valid_targets_min": 1721
|
|
},
|
|
{
|
|
"epoch": 4.014308426073132,
|
|
"grad_norm": 0.507020553139243,
|
|
"learning_rate": 1.8384183707633475e-05,
|
|
"loss": 0.1316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1438116431236267,
|
|
"step": 2525,
|
|
"valid_targets_mean": 3300.6,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 4.022257551669316,
|
|
"grad_norm": 0.47001299296067484,
|
|
"learning_rate": 1.8305162715829348e-05,
|
|
"loss": 0.139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14781130850315094,
|
|
"step": 2530,
|
|
"valid_targets_mean": 3934.6,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 4.030206677265501,
|
|
"grad_norm": 0.41847190140441093,
|
|
"learning_rate": 1.8226168364272534e-05,
|
|
"loss": 0.123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11341873556375504,
|
|
"step": 2535,
|
|
"valid_targets_mean": 4553.1,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 4.038155802861685,
|
|
"grad_norm": 0.5373424873152618,
|
|
"learning_rate": 1.8147201894633282e-05,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10829570144414902,
|
|
"step": 2540,
|
|
"valid_targets_mean": 4025.9,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 4.046104928457869,
|
|
"grad_norm": 0.44151630488419186,
|
|
"learning_rate": 1.8068264548143605e-05,
|
|
"loss": 0.1361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13234928250312805,
|
|
"step": 2545,
|
|
"valid_targets_mean": 4406.9,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 4.054054054054054,
|
|
"grad_norm": 0.44111473492914083,
|
|
"learning_rate": 1.7989357565577746e-05,
|
|
"loss": 0.1209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12020754814147949,
|
|
"step": 2550,
|
|
"valid_targets_mean": 3875.1,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 4.0620031796502385,
|
|
"grad_norm": 0.497648457748331,
|
|
"learning_rate": 1.7910482187232643e-05,
|
|
"loss": 0.1271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13153505325317383,
|
|
"step": 2555,
|
|
"valid_targets_mean": 3588.8,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 4.069952305246423,
|
|
"grad_norm": 0.4466450096417644,
|
|
"learning_rate": 1.7831639652908507e-05,
|
|
"loss": 0.132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1519913375377655,
|
|
"step": 2560,
|
|
"valid_targets_mean": 4519.4,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 4.077901430842608,
|
|
"grad_norm": 0.5133411375199687,
|
|
"learning_rate": 1.775283120188925e-05,
|
|
"loss": 0.12,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11747326701879501,
|
|
"step": 2565,
|
|
"valid_targets_mean": 3593.9,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 4.085850556438792,
|
|
"grad_norm": 0.5340060908823832,
|
|
"learning_rate": 1.7674058072923075e-05,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14561407268047333,
|
|
"step": 2570,
|
|
"valid_targets_mean": 2832.8,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 4.093799682034976,
|
|
"grad_norm": 0.5131875664235621,
|
|
"learning_rate": 1.7595321504202977e-05,
|
|
"loss": 0.1352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13636553287506104,
|
|
"step": 2575,
|
|
"valid_targets_mean": 3649.6,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 4.101748807631161,
|
|
"grad_norm": 0.49807691314048513,
|
|
"learning_rate": 1.751662273334725e-05,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14528346061706543,
|
|
"step": 2580,
|
|
"valid_targets_mean": 3304.9,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 4.109697933227345,
|
|
"grad_norm": 0.5137089285911086,
|
|
"learning_rate": 1.7437962997380093e-05,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12808027863502502,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3785.6,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 4.117647058823529,
|
|
"grad_norm": 0.4503928265620839,
|
|
"learning_rate": 1.7359343532712135e-05,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14875495433807373,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4586.7,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 4.125596184419714,
|
|
"grad_norm": 0.4166285208229854,
|
|
"learning_rate": 1.7280765575120992e-05,
|
|
"loss": 0.136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11069852113723755,
|
|
"step": 2595,
|
|
"valid_targets_mean": 4363.1,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 4.133545310015898,
|
|
"grad_norm": 0.5155386718641107,
|
|
"learning_rate": 1.7202230359731835e-05,
|
|
"loss": 0.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14892956614494324,
|
|
"step": 2600,
|
|
"valid_targets_mean": 3399.8,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 4.141494435612083,
|
|
"grad_norm": 0.526180170879376,
|
|
"learning_rate": 1.7123739120998033e-05,
|
|
"loss": 0.1252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.133394256234169,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3551.6,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 4.149443561208267,
|
|
"grad_norm": 0.43448892503799785,
|
|
"learning_rate": 1.7045293092681686e-05,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12770622968673706,
|
|
"step": 2610,
|
|
"valid_targets_mean": 4312.1,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 4.157392686804451,
|
|
"grad_norm": 0.47438188721000024,
|
|
"learning_rate": 1.6966893507834242e-05,
|
|
"loss": 0.1388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11475702375173569,
|
|
"step": 2615,
|
|
"valid_targets_mean": 3365.8,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 4.165341812400636,
|
|
"grad_norm": 0.4734353597894468,
|
|
"learning_rate": 1.6888541598777167e-05,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14915436506271362,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3915.1,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 4.17329093799682,
|
|
"grad_norm": 0.44954055606792176,
|
|
"learning_rate": 1.68102385970825e-05,
|
|
"loss": 0.1295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12147843092679977,
|
|
"step": 2625,
|
|
"valid_targets_mean": 4042.4,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 4.1812400635930045,
|
|
"grad_norm": 0.5461716268999647,
|
|
"learning_rate": 1.6731985733553545e-05,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1752236783504486,
|
|
"step": 2630,
|
|
"valid_targets_mean": 3508.4,
|
|
"valid_targets_min": 237
|
|
},
|
|
{
|
|
"epoch": 4.1891891891891895,
|
|
"grad_norm": 0.47376122084778394,
|
|
"learning_rate": 1.6653784238205525e-05,
|
|
"loss": 0.137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13985666632652283,
|
|
"step": 2635,
|
|
"valid_targets_mean": 3967.4,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 4.197138314785374,
|
|
"grad_norm": 0.49860840899961373,
|
|
"learning_rate": 1.6575635340246203e-05,
|
|
"loss": 0.142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13958841562271118,
|
|
"step": 2640,
|
|
"valid_targets_mean": 3776.4,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 4.205087440381558,
|
|
"grad_norm": 0.5671244632485692,
|
|
"learning_rate": 1.649754026805662e-05,
|
|
"loss": 0.1334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17595896124839783,
|
|
"step": 2645,
|
|
"valid_targets_mean": 3767.5,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 4.213036565977743,
|
|
"grad_norm": 0.4534522358165292,
|
|
"learning_rate": 1.6419500249171737e-05,
|
|
"loss": 0.1256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12649832665920258,
|
|
"step": 2650,
|
|
"valid_targets_mean": 3767.0,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 4.220985691573927,
|
|
"grad_norm": 0.48461063763882706,
|
|
"learning_rate": 1.634151651026118e-05,
|
|
"loss": 0.1253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11336950957775116,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3496.2,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 4.228934817170111,
|
|
"grad_norm": 0.5517432754534229,
|
|
"learning_rate": 1.626359027710993e-05,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12822790443897247,
|
|
"step": 2660,
|
|
"valid_targets_mean": 2547.8,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 4.236883942766296,
|
|
"grad_norm": 0.6506977836059051,
|
|
"learning_rate": 1.6185722774599064e-05,
|
|
"loss": 0.1323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13393157720565796,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3280.5,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 4.24483306836248,
|
|
"grad_norm": 0.5560817254019538,
|
|
"learning_rate": 1.6107915226686504e-05,
|
|
"loss": 0.1324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1394142508506775,
|
|
"step": 2670,
|
|
"valid_targets_mean": 3167.7,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 4.252782193958664,
|
|
"grad_norm": 0.531467297115508,
|
|
"learning_rate": 1.603016885638779e-05,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14491944015026093,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3221.8,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 4.260731319554849,
|
|
"grad_norm": 0.4488475679526595,
|
|
"learning_rate": 1.5952484885756827e-05,
|
|
"loss": 0.1423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13313503563404083,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3901.4,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 4.268680445151033,
|
|
"grad_norm": 0.47568888206678156,
|
|
"learning_rate": 1.587486453586669e-05,
|
|
"loss": 0.1346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12674440443515778,
|
|
"step": 2685,
|
|
"valid_targets_mean": 3657.6,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 4.276629570747218,
|
|
"grad_norm": 0.46272650057707004,
|
|
"learning_rate": 1.579730902679045e-05,
|
|
"loss": 0.1339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15541914105415344,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4025.2,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 4.284578696343402,
|
|
"grad_norm": 0.4472468025529375,
|
|
"learning_rate": 1.5719819577581982e-05,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12270389497280121,
|
|
"step": 2695,
|
|
"valid_targets_mean": 3892.1,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 4.292527821939586,
|
|
"grad_norm": 0.5592143714752654,
|
|
"learning_rate": 1.5642397406256768e-05,
|
|
"loss": 0.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15055924654006958,
|
|
"step": 2700,
|
|
"valid_targets_mean": 2888.4,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 4.300476947535771,
|
|
"grad_norm": 0.6380121268077782,
|
|
"learning_rate": 1.556504372977283e-05,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17369496822357178,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3295.8,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 4.3084260731319555,
|
|
"grad_norm": 0.43592849432503594,
|
|
"learning_rate": 1.548775976401152e-05,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12458646297454834,
|
|
"step": 2710,
|
|
"valid_targets_mean": 4257.1,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 4.31637519872814,
|
|
"grad_norm": 0.5249006611498069,
|
|
"learning_rate": 1.5410546723758452e-05,
|
|
"loss": 0.1429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1341407299041748,
|
|
"step": 2715,
|
|
"valid_targets_mean": 3361.5,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 4.324324324324325,
|
|
"grad_norm": 0.4535416533335133,
|
|
"learning_rate": 1.5333405822684428e-05,
|
|
"loss": 0.1254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1214504987001419,
|
|
"step": 2720,
|
|
"valid_targets_mean": 4414.6,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 4.332273449920509,
|
|
"grad_norm": 0.4731025497262397,
|
|
"learning_rate": 1.5256338273326293e-05,
|
|
"loss": 0.127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13752460479736328,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3609.0,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 4.340222575516693,
|
|
"grad_norm": 0.5291643855694953,
|
|
"learning_rate": 1.5179345287067935e-05,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14532646536827087,
|
|
"step": 2730,
|
|
"valid_targets_mean": 3274.8,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 4.348171701112878,
|
|
"grad_norm": 0.49276963970488963,
|
|
"learning_rate": 1.5102428074121222e-05,
|
|
"loss": 0.1359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13466641306877136,
|
|
"step": 2735,
|
|
"valid_targets_mean": 4083.1,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 4.356120826709062,
|
|
"grad_norm": 0.5271105291571133,
|
|
"learning_rate": 1.5025587843506986e-05,
|
|
"loss": 0.1387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13063038885593414,
|
|
"step": 2740,
|
|
"valid_targets_mean": 3548.1,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 4.364069952305247,
|
|
"grad_norm": 0.5029010534111552,
|
|
"learning_rate": 1.4948825803035996e-05,
|
|
"loss": 0.1302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.115936279296875,
|
|
"step": 2745,
|
|
"valid_targets_mean": 4106.9,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 4.372019077901431,
|
|
"grad_norm": 0.4305157347551604,
|
|
"learning_rate": 1.4872143159290016e-05,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1385708451271057,
|
|
"step": 2750,
|
|
"valid_targets_mean": 4007.7,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 4.379968203497615,
|
|
"grad_norm": 0.5872775238224067,
|
|
"learning_rate": 1.4795541117602808e-05,
|
|
"loss": 0.131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11982855200767517,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3377.2,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 4.3879173290938,
|
|
"grad_norm": 0.4813077822857676,
|
|
"learning_rate": 1.4719020882041175e-05,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1347476989030838,
|
|
"step": 2760,
|
|
"valid_targets_mean": 3714.1,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 4.395866454689984,
|
|
"grad_norm": 0.5454824004525505,
|
|
"learning_rate": 1.4642583655386084e-05,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12626631557941437,
|
|
"step": 2765,
|
|
"valid_targets_mean": 3165.4,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 4.403815580286168,
|
|
"grad_norm": 0.4885305355016124,
|
|
"learning_rate": 1.4566230639113696e-05,
|
|
"loss": 0.1332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1025635302066803,
|
|
"step": 2770,
|
|
"valid_targets_mean": 4869.7,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 4.411764705882353,
|
|
"grad_norm": 0.4770935247503296,
|
|
"learning_rate": 1.448996303337654e-05,
|
|
"loss": 0.134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.124050073325634,
|
|
"step": 2775,
|
|
"valid_targets_mean": 3172.6,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 4.419713831478537,
|
|
"grad_norm": 0.46987397640146145,
|
|
"learning_rate": 1.4413782036984616e-05,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11378094553947449,
|
|
"step": 2780,
|
|
"valid_targets_mean": 3712.6,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 4.4276629570747215,
|
|
"grad_norm": 0.4291495568703118,
|
|
"learning_rate": 1.4337688847386542e-05,
|
|
"loss": 0.1251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10549108684062958,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3789.5,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 4.4356120826709065,
|
|
"grad_norm": 0.48467330245194035,
|
|
"learning_rate": 1.426168466065077e-05,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14184287190437317,
|
|
"step": 2790,
|
|
"valid_targets_mean": 4098.1,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 4.443561208267091,
|
|
"grad_norm": 0.4320267387261653,
|
|
"learning_rate": 1.4185770671446743e-05,
|
|
"loss": 0.1397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11710529029369354,
|
|
"step": 2795,
|
|
"valid_targets_mean": 4207.4,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 4.451510333863275,
|
|
"grad_norm": 0.4899699268579415,
|
|
"learning_rate": 1.4109948073026153e-05,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13632246851921082,
|
|
"step": 2800,
|
|
"valid_targets_mean": 3296.7,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 4.45945945945946,
|
|
"grad_norm": 0.43337393856388357,
|
|
"learning_rate": 1.4034218057204165e-05,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12501519918441772,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3881.5,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 4.467408585055644,
|
|
"grad_norm": 0.4398737794805095,
|
|
"learning_rate": 1.3958581814340679e-05,
|
|
"loss": 0.1274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11017243564128876,
|
|
"step": 2810,
|
|
"valid_targets_mean": 3680.9,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 4.475357710651828,
|
|
"grad_norm": 0.5233156447822513,
|
|
"learning_rate": 1.3883040533321637e-05,
|
|
"loss": 0.1281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1441815048456192,
|
|
"step": 2815,
|
|
"valid_targets_mean": 3133.9,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 4.483306836248013,
|
|
"grad_norm": 0.4598890957258285,
|
|
"learning_rate": 1.3807595401540322e-05,
|
|
"loss": 0.1356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14368480443954468,
|
|
"step": 2820,
|
|
"valid_targets_mean": 4432.7,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 4.491255961844197,
|
|
"grad_norm": 0.5085977500037545,
|
|
"learning_rate": 1.3732247604878697e-05,
|
|
"loss": 0.1352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12015148252248764,
|
|
"step": 2825,
|
|
"valid_targets_mean": 3563.8,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 4.499205087440382,
|
|
"grad_norm": 0.5315330999004808,
|
|
"learning_rate": 1.3656998327688764e-05,
|
|
"loss": 0.1392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1353592574596405,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3689.6,
|
|
"valid_targets_min": 204
|
|
},
|
|
{
|
|
"epoch": 4.507154213036566,
|
|
"grad_norm": 0.5305726504410712,
|
|
"learning_rate": 1.3581848752773961e-05,
|
|
"loss": 0.1219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11149744689464569,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3077.8,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 4.51510333863275,
|
|
"grad_norm": 0.5139894765702253,
|
|
"learning_rate": 1.3506800061370555e-05,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14306671917438507,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3728.9,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 4.523052464228935,
|
|
"grad_norm": 0.4814345025583722,
|
|
"learning_rate": 1.3431853433129058e-05,
|
|
"loss": 0.1246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11117015779018402,
|
|
"step": 2845,
|
|
"valid_targets_mean": 3341.5,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 4.531001589825119,
|
|
"grad_norm": 0.5079196992633277,
|
|
"learning_rate": 1.3357010046095741e-05,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17309923470020294,
|
|
"step": 2850,
|
|
"valid_targets_mean": 2918.5,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 4.538950715421303,
|
|
"grad_norm": 0.4517165892172054,
|
|
"learning_rate": 1.3282271076694052e-05,
|
|
"loss": 0.1296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1249191164970398,
|
|
"step": 2855,
|
|
"valid_targets_mean": 4362.0,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 4.546899841017488,
|
|
"grad_norm": 0.47163152664152436,
|
|
"learning_rate": 1.3207637699706162e-05,
|
|
"loss": 0.1322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13854432106018066,
|
|
"step": 2860,
|
|
"valid_targets_mean": 3941.9,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 4.5548489666136724,
|
|
"grad_norm": 0.4474413787281936,
|
|
"learning_rate": 1.3133111088254507e-05,
|
|
"loss": 0.1267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11171597242355347,
|
|
"step": 2865,
|
|
"valid_targets_mean": 3772.6,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 4.5627980922098565,
|
|
"grad_norm": 0.5078641249343999,
|
|
"learning_rate": 1.3058692413783307e-05,
|
|
"loss": 0.1407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13531452417373657,
|
|
"step": 2870,
|
|
"valid_targets_mean": 3739.0,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 4.5707472178060415,
|
|
"grad_norm": 0.7026774408330564,
|
|
"learning_rate": 1.2984382846040187e-05,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1399625539779663,
|
|
"step": 2875,
|
|
"valid_targets_mean": 4271.8,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 4.578696343402226,
|
|
"grad_norm": 0.46761769157477573,
|
|
"learning_rate": 1.2910183553057788e-05,
|
|
"loss": 0.1426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14058490097522736,
|
|
"step": 2880,
|
|
"valid_targets_mean": 3651.0,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 4.586645468998411,
|
|
"grad_norm": 0.4383231851637327,
|
|
"learning_rate": 1.2836095701135398e-05,
|
|
"loss": 0.1322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11564694344997406,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3662.7,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 4.594594594594595,
|
|
"grad_norm": 0.46123656999116863,
|
|
"learning_rate": 1.2762120454820628e-05,
|
|
"loss": 0.1274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1169496402144432,
|
|
"step": 2890,
|
|
"valid_targets_mean": 3931.8,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 4.602543720190779,
|
|
"grad_norm": 0.4338183551025682,
|
|
"learning_rate": 1.268825897689108e-05,
|
|
"loss": 0.1292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12584465742111206,
|
|
"step": 2895,
|
|
"valid_targets_mean": 4739.1,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 4.610492845786963,
|
|
"grad_norm": 0.4172369284320454,
|
|
"learning_rate": 1.2614512428336105e-05,
|
|
"loss": 0.1321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12070607393980026,
|
|
"step": 2900,
|
|
"valid_targets_mean": 5442.9,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 4.618441971383148,
|
|
"grad_norm": 0.47358327667927247,
|
|
"learning_rate": 1.254088196833855e-05,
|
|
"loss": 0.1318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1363537609577179,
|
|
"step": 2905,
|
|
"valid_targets_mean": 4575.1,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 4.626391096979332,
|
|
"grad_norm": 0.46039621956253657,
|
|
"learning_rate": 1.2467368754256513e-05,
|
|
"loss": 0.138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1305224895477295,
|
|
"step": 2910,
|
|
"valid_targets_mean": 4386.9,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 4.634340222575517,
|
|
"grad_norm": 0.48832499251309247,
|
|
"learning_rate": 1.2393973941605161e-05,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.125309556722641,
|
|
"step": 2915,
|
|
"valid_targets_mean": 3788.6,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 4.642289348171701,
|
|
"grad_norm": 0.5232937532641884,
|
|
"learning_rate": 1.2320698684038599e-05,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1394532024860382,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3360.9,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 4.650238473767885,
|
|
"grad_norm": 0.4558597876370874,
|
|
"learning_rate": 1.2247544133331681e-05,
|
|
"loss": 0.1303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11967042088508606,
|
|
"step": 2925,
|
|
"valid_targets_mean": 4315.3,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 4.65818759936407,
|
|
"grad_norm": 0.5566298887502414,
|
|
"learning_rate": 1.2174511439361943e-05,
|
|
"loss": 0.1467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1408625692129135,
|
|
"step": 2930,
|
|
"valid_targets_mean": 2664.5,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 4.666136724960254,
|
|
"grad_norm": 0.48634223320131703,
|
|
"learning_rate": 1.2101601750091528e-05,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16628813743591309,
|
|
"step": 2935,
|
|
"valid_targets_mean": 4301.5,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 4.674085850556438,
|
|
"grad_norm": 0.46133307731688006,
|
|
"learning_rate": 1.2028816211549117e-05,
|
|
"loss": 0.1206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12970274686813354,
|
|
"step": 2940,
|
|
"valid_targets_mean": 4872.5,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 4.682034976152623,
|
|
"grad_norm": 0.42861468167336814,
|
|
"learning_rate": 1.195615596781194e-05,
|
|
"loss": 0.1325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1275981068611145,
|
|
"step": 2945,
|
|
"valid_targets_mean": 4291.4,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 4.6899841017488075,
|
|
"grad_norm": 0.45633412060443734,
|
|
"learning_rate": 1.18836221609878e-05,
|
|
"loss": 0.123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12598268687725067,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4175.3,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 4.697933227344992,
|
|
"grad_norm": 0.5343832571290266,
|
|
"learning_rate": 1.1811215931197084e-05,
|
|
"loss": 0.1332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1605256199836731,
|
|
"step": 2955,
|
|
"valid_targets_mean": 3076.2,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 4.705882352941177,
|
|
"grad_norm": 0.4961038270131699,
|
|
"learning_rate": 1.1738938416554857e-05,
|
|
"loss": 0.134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13582147657871246,
|
|
"step": 2960,
|
|
"valid_targets_mean": 3582.1,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 4.713831478537361,
|
|
"grad_norm": 0.5512004556887968,
|
|
"learning_rate": 1.1666790753153009e-05,
|
|
"loss": 0.1479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13612379133701324,
|
|
"step": 2965,
|
|
"valid_targets_mean": 2709.6,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 4.721780604133546,
|
|
"grad_norm": 0.5381030618661898,
|
|
"learning_rate": 1.1594774075042345e-05,
|
|
"loss": 0.1307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1454896181821823,
|
|
"step": 2970,
|
|
"valid_targets_mean": 3670.9,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 4.72972972972973,
|
|
"grad_norm": 0.5509557031574198,
|
|
"learning_rate": 1.152288951421478e-05,
|
|
"loss": 0.1253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13313612341880798,
|
|
"step": 2975,
|
|
"valid_targets_mean": 2507.1,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 4.737678855325914,
|
|
"grad_norm": 0.5619248692734761,
|
|
"learning_rate": 1.1451138200585567e-05,
|
|
"loss": 0.1301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13828039169311523,
|
|
"step": 2980,
|
|
"valid_targets_mean": 3192.6,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 4.745627980922099,
|
|
"grad_norm": 0.6067851198602392,
|
|
"learning_rate": 1.13795212619755e-05,
|
|
"loss": 0.1338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1492369920015335,
|
|
"step": 2985,
|
|
"valid_targets_mean": 2374.6,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 4.753577106518283,
|
|
"grad_norm": 0.4802919158480157,
|
|
"learning_rate": 1.1308039824093197e-05,
|
|
"loss": 0.1292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1495775282382965,
|
|
"step": 2990,
|
|
"valid_targets_mean": 3698.0,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 4.761526232114467,
|
|
"grad_norm": 0.4709231044726117,
|
|
"learning_rate": 1.1236695010517434e-05,
|
|
"loss": 0.143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1218520998954773,
|
|
"step": 2995,
|
|
"valid_targets_mean": 3672.4,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 4.769475357710652,
|
|
"grad_norm": 0.43463931612817397,
|
|
"learning_rate": 1.116548794267945e-05,
|
|
"loss": 0.1253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12758862972259521,
|
|
"step": 3000,
|
|
"valid_targets_mean": 4420.1,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 4.777424483306836,
|
|
"grad_norm": 0.45148983869993,
|
|
"learning_rate": 1.109441973984534e-05,
|
|
"loss": 0.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11758168786764145,
|
|
"step": 3005,
|
|
"valid_targets_mean": 3833.6,
|
|
"valid_targets_min": 199
|
|
},
|
|
{
|
|
"epoch": 4.78537360890302,
|
|
"grad_norm": 0.4726760123977448,
|
|
"learning_rate": 1.1023491519098439e-05,
|
|
"loss": 0.1328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15904685854911804,
|
|
"step": 3010,
|
|
"valid_targets_mean": 4469.8,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 4.793322734499205,
|
|
"grad_norm": 0.5308825581080399,
|
|
"learning_rate": 1.0952704395321781e-05,
|
|
"loss": 0.1256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13085004687309265,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3272.6,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 4.801271860095389,
|
|
"grad_norm": 0.5180355797993398,
|
|
"learning_rate": 1.0882059481180588e-05,
|
|
"loss": 0.1264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11950570344924927,
|
|
"step": 3020,
|
|
"valid_targets_mean": 3030.1,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 4.809220985691574,
|
|
"grad_norm": 0.4600006136661297,
|
|
"learning_rate": 1.0811557887104747e-05,
|
|
"loss": 0.1316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13670320808887482,
|
|
"step": 3025,
|
|
"valid_targets_mean": 3798.9,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 4.8171701112877585,
|
|
"grad_norm": 0.5471165258975516,
|
|
"learning_rate": 1.074120072127137e-05,
|
|
"loss": 0.139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10859496891498566,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4030.0,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 4.825119236883943,
|
|
"grad_norm": 0.4875896845337796,
|
|
"learning_rate": 1.0670989089587395e-05,
|
|
"loss": 0.1443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15141701698303223,
|
|
"step": 3035,
|
|
"valid_targets_mean": 3959.2,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 4.833068362480127,
|
|
"grad_norm": 0.46841773833007866,
|
|
"learning_rate": 1.0600924095672184e-05,
|
|
"loss": 0.1317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1404210776090622,
|
|
"step": 3040,
|
|
"valid_targets_mean": 4134.9,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 4.841017488076312,
|
|
"grad_norm": 0.4822180291377705,
|
|
"learning_rate": 1.0531006840840162e-05,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15318647027015686,
|
|
"step": 3045,
|
|
"valid_targets_mean": 3816.3,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 4.848966613672496,
|
|
"grad_norm": 0.4598922245539829,
|
|
"learning_rate": 1.046123842408354e-05,
|
|
"loss": 0.1338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.142354816198349,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4431.2,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 4.856915739268681,
|
|
"grad_norm": 0.4289633212386471,
|
|
"learning_rate": 1.0391619942055007e-05,
|
|
"loss": 0.1313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1268487572669983,
|
|
"step": 3055,
|
|
"valid_targets_mean": 3981.8,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 4.864864864864865,
|
|
"grad_norm": 0.5512192977590487,
|
|
"learning_rate": 1.0322152489050508e-05,
|
|
"loss": 0.1344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1406165063381195,
|
|
"step": 3060,
|
|
"valid_targets_mean": 3987.3,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 4.872813990461049,
|
|
"grad_norm": 0.473626788275172,
|
|
"learning_rate": 1.0252837156992065e-05,
|
|
"loss": 0.1278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12075041234493256,
|
|
"step": 3065,
|
|
"valid_targets_mean": 3601.8,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 4.880763116057234,
|
|
"grad_norm": 0.4208027067707839,
|
|
"learning_rate": 1.018367503541057e-05,
|
|
"loss": 0.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11000724136829376,
|
|
"step": 3070,
|
|
"valid_targets_mean": 4098.9,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 4.888712241653418,
|
|
"grad_norm": 0.4042871962162526,
|
|
"learning_rate": 1.0114667211428675e-05,
|
|
"loss": 0.1473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12441422045230865,
|
|
"step": 3075,
|
|
"valid_targets_mean": 5265.6,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 4.896661367249602,
|
|
"grad_norm": 0.48474645193150095,
|
|
"learning_rate": 1.0045814769743731e-05,
|
|
"loss": 0.1372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13803794980049133,
|
|
"step": 3080,
|
|
"valid_targets_mean": 3752.9,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 4.904610492845787,
|
|
"grad_norm": 0.558064394873645,
|
|
"learning_rate": 9.977118792610719e-06,
|
|
"loss": 0.1415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12287827581167221,
|
|
"step": 3085,
|
|
"valid_targets_mean": 3135.6,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 4.912559618441971,
|
|
"grad_norm": 0.47354314556138427,
|
|
"learning_rate": 9.908580359825204e-06,
|
|
"loss": 0.136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11178020387887955,
|
|
"step": 3090,
|
|
"valid_targets_mean": 4488.8,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 4.920508744038155,
|
|
"grad_norm": 0.4188496112443199,
|
|
"learning_rate": 9.840200548706435e-06,
|
|
"loss": 0.1311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1269189715385437,
|
|
"step": 3095,
|
|
"valid_targets_mean": 4419.6,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 4.92845786963434,
|
|
"grad_norm": 0.48069575474565923,
|
|
"learning_rate": 9.771980434080348e-06,
|
|
"loss": 0.1502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14761850237846375,
|
|
"step": 3100,
|
|
"valid_targets_mean": 4263.5,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 4.9364069952305245,
|
|
"grad_norm": 0.47361836995858364,
|
|
"learning_rate": 9.70392108826269e-06,
|
|
"loss": 0.1303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1389940083026886,
|
|
"step": 3105,
|
|
"valid_targets_mean": 3997.1,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 4.9443561208267095,
|
|
"grad_norm": 0.502593429400252,
|
|
"learning_rate": 9.636023581042191e-06,
|
|
"loss": 0.1352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1681274026632309,
|
|
"step": 3110,
|
|
"valid_targets_mean": 3780.9,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 4.952305246422894,
|
|
"grad_norm": 0.6771515271251293,
|
|
"learning_rate": 9.5682889796637e-06,
|
|
"loss": 0.1403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1500101536512375,
|
|
"step": 3115,
|
|
"valid_targets_mean": 4520.4,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 4.960254372019078,
|
|
"grad_norm": 0.4872072441985026,
|
|
"learning_rate": 9.500718348811457e-06,
|
|
"loss": 0.1442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1341622769832611,
|
|
"step": 3120,
|
|
"valid_targets_mean": 3533.3,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 4.968203497615263,
|
|
"grad_norm": 0.4872550048404107,
|
|
"learning_rate": 9.433312750592337e-06,
|
|
"loss": 0.1348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12632372975349426,
|
|
"step": 3125,
|
|
"valid_targets_mean": 3567.7,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 4.976152623211447,
|
|
"grad_norm": 0.4963406118007583,
|
|
"learning_rate": 9.366073244519124e-06,
|
|
"loss": 0.1262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12338396161794662,
|
|
"step": 3130,
|
|
"valid_targets_mean": 3381.1,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 4.984101748807631,
|
|
"grad_norm": 0.5634271164967108,
|
|
"learning_rate": 9.299000887493934e-06,
|
|
"loss": 0.13,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14706090092658997,
|
|
"step": 3135,
|
|
"valid_targets_mean": 3212.7,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 4.992050874403816,
|
|
"grad_norm": 0.5169455768152297,
|
|
"learning_rate": 9.232096733791518e-06,
|
|
"loss": 0.1339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14801155030727386,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3497.0,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.4146566743306906,
|
|
"learning_rate": 9.165361835042734e-06,
|
|
"loss": 0.1386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12379302084445953,
|
|
"step": 3145,
|
|
"valid_targets_mean": 4029.0,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 5.007949125596184,
|
|
"grad_norm": 0.457923014155659,
|
|
"learning_rate": 9.098797240218036e-06,
|
|
"loss": 0.121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10793712735176086,
|
|
"step": 3150,
|
|
"valid_targets_mean": 4072.9,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 5.015898251192369,
|
|
"grad_norm": 0.5279976098261123,
|
|
"learning_rate": 9.032403995610937e-06,
|
|
"loss": 0.1228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11182405054569244,
|
|
"step": 3155,
|
|
"valid_targets_mean": 2553.0,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 5.023847376788553,
|
|
"grad_norm": 0.48776564405835493,
|
|
"learning_rate": 8.966183144821583e-06,
|
|
"loss": 0.1147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10740777850151062,
|
|
"step": 3160,
|
|
"valid_targets_mean": 3739.8,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 5.031796502384737,
|
|
"grad_norm": 0.4595972840148685,
|
|
"learning_rate": 8.900135728740373e-06,
|
|
"loss": 0.1239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11679710447788239,
|
|
"step": 3165,
|
|
"valid_targets_mean": 4084.7,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 5.039745627980922,
|
|
"grad_norm": 0.47396166912319276,
|
|
"learning_rate": 8.83426278553158e-06,
|
|
"loss": 0.1073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11660698056221008,
|
|
"step": 3170,
|
|
"valid_targets_mean": 3834.4,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 5.047694753577106,
|
|
"grad_norm": 0.5657974369615991,
|
|
"learning_rate": 8.768565350616998e-06,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16728852689266205,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3509.4,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 5.0556438791732905,
|
|
"grad_norm": 0.4887634707217319,
|
|
"learning_rate": 8.703044456659741e-06,
|
|
"loss": 0.1238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11223262548446655,
|
|
"step": 3180,
|
|
"valid_targets_mean": 4200.7,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 5.0635930047694755,
|
|
"grad_norm": 0.5536746484103304,
|
|
"learning_rate": 8.63770113354794e-06,
|
|
"loss": 0.1229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12999343872070312,
|
|
"step": 3185,
|
|
"valid_targets_mean": 3016.6,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 5.07154213036566,
|
|
"grad_norm": 0.43802408737800774,
|
|
"learning_rate": 8.572536408378587e-06,
|
|
"loss": 0.1221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0922771543264389,
|
|
"step": 3190,
|
|
"valid_targets_mean": 4741.3,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 5.079491255961845,
|
|
"grad_norm": 0.4652049118167889,
|
|
"learning_rate": 8.507551305441408e-06,
|
|
"loss": 0.1226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12782129645347595,
|
|
"step": 3195,
|
|
"valid_targets_mean": 4579.9,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 5.087440381558029,
|
|
"grad_norm": 0.43604310018158265,
|
|
"learning_rate": 8.442746846202711e-06,
|
|
"loss": 0.119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09423212707042694,
|
|
"step": 3200,
|
|
"valid_targets_mean": 4047.1,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 5.095389507154213,
|
|
"grad_norm": 0.49985284842571,
|
|
"learning_rate": 8.378124049289394e-06,
|
|
"loss": 0.1135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12812739610671997,
|
|
"step": 3205,
|
|
"valid_targets_mean": 3787.6,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 5.103338632750398,
|
|
"grad_norm": 0.5306993606687345,
|
|
"learning_rate": 8.313683930472889e-06,
|
|
"loss": 0.1259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13846567273139954,
|
|
"step": 3210,
|
|
"valid_targets_mean": 3789.9,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 5.111287758346582,
|
|
"grad_norm": 0.526017364900564,
|
|
"learning_rate": 8.249427502653198e-06,
|
|
"loss": 0.1231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13277852535247803,
|
|
"step": 3215,
|
|
"valid_targets_mean": 3243.6,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 5.119236883942766,
|
|
"grad_norm": 0.4826797520729495,
|
|
"learning_rate": 8.185355775842982e-06,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1628974825143814,
|
|
"step": 3220,
|
|
"valid_targets_mean": 4196.1,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 5.127186009538951,
|
|
"grad_norm": 0.47266391553239756,
|
|
"learning_rate": 8.12146975715171e-06,
|
|
"loss": 0.1304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.124434694647789,
|
|
"step": 3225,
|
|
"valid_targets_mean": 4489.1,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 5.135135135135135,
|
|
"grad_norm": 0.5104351929242484,
|
|
"learning_rate": 8.057770450769771e-06,
|
|
"loss": 0.1269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14183968305587769,
|
|
"step": 3230,
|
|
"valid_targets_mean": 3486.3,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 5.143084260731319,
|
|
"grad_norm": 0.5350401545785989,
|
|
"learning_rate": 7.994258857952748e-06,
|
|
"loss": 0.1194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14458492398262024,
|
|
"step": 3235,
|
|
"valid_targets_mean": 3730.2,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 5.151033386327504,
|
|
"grad_norm": 0.47568127630523516,
|
|
"learning_rate": 7.93093597700564e-06,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11599075794219971,
|
|
"step": 3240,
|
|
"valid_targets_mean": 3875.6,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 5.158982511923688,
|
|
"grad_norm": 0.5212929399191217,
|
|
"learning_rate": 7.867802803267182e-06,
|
|
"loss": 0.1249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11911657452583313,
|
|
"step": 3245,
|
|
"valid_targets_mean": 3369.2,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 5.166931637519872,
|
|
"grad_norm": 0.44904326831297614,
|
|
"learning_rate": 7.80486032909421e-06,
|
|
"loss": 0.1283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13542252779006958,
|
|
"step": 3250,
|
|
"valid_targets_mean": 5412.6,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 5.174880763116057,
|
|
"grad_norm": 0.5816953917270461,
|
|
"learning_rate": 7.742109543846063e-06,
|
|
"loss": 0.1274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12738856673240662,
|
|
"step": 3255,
|
|
"valid_targets_mean": 2924.6,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 5.1828298887122415,
|
|
"grad_norm": 0.4706987710505265,
|
|
"learning_rate": 7.679551433869001e-06,
|
|
"loss": 0.1197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11224916577339172,
|
|
"step": 3260,
|
|
"valid_targets_mean": 4329.3,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 5.1907790143084265,
|
|
"grad_norm": 0.4351758065293425,
|
|
"learning_rate": 7.617186982480749e-06,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10177105665206909,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3785.4,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 5.198728139904611,
|
|
"grad_norm": 0.5015716097475679,
|
|
"learning_rate": 7.5550171699549945e-06,
|
|
"loss": 0.1219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14602243900299072,
|
|
"step": 3270,
|
|
"valid_targets_mean": 3782.1,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 5.206677265500795,
|
|
"grad_norm": 0.4602593511104768,
|
|
"learning_rate": 7.493042973506e-06,
|
|
"loss": 0.1194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11888657510280609,
|
|
"step": 3275,
|
|
"valid_targets_mean": 4810.8,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 5.21462639109698,
|
|
"grad_norm": 0.5526497578097684,
|
|
"learning_rate": 7.431265367273268e-06,
|
|
"loss": 0.1144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11730851978063583,
|
|
"step": 3280,
|
|
"valid_targets_mean": 3694.4,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 5.222575516693164,
|
|
"grad_norm": 0.5252464567539773,
|
|
"learning_rate": 7.36968532230617e-06,
|
|
"loss": 0.1428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1225329339504242,
|
|
"step": 3285,
|
|
"valid_targets_mean": 3195.1,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 5.230524642289348,
|
|
"grad_norm": 0.5192954833459635,
|
|
"learning_rate": 7.308303806548742e-06,
|
|
"loss": 0.129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13095176219940186,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3724.8,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 5.238473767885533,
|
|
"grad_norm": 0.5487669317072835,
|
|
"learning_rate": 7.247121784824445e-06,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13627754151821136,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3452.6,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 5.246422893481717,
|
|
"grad_norm": 0.528698693229382,
|
|
"learning_rate": 7.186140218820979e-06,
|
|
"loss": 0.1303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12275183945894241,
|
|
"step": 3300,
|
|
"valid_targets_mean": 4150.6,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 5.254372019077901,
|
|
"grad_norm": 0.5046952538958149,
|
|
"learning_rate": 7.125360067075196e-06,
|
|
"loss": 0.1216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13393549621105194,
|
|
"step": 3305,
|
|
"valid_targets_mean": 3828.8,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 5.262321144674086,
|
|
"grad_norm": 0.5275289393785,
|
|
"learning_rate": 7.0647822849580385e-06,
|
|
"loss": 0.1117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11984727531671524,
|
|
"step": 3310,
|
|
"valid_targets_mean": 2990.9,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 5.27027027027027,
|
|
"grad_norm": 0.5986098211183422,
|
|
"learning_rate": 7.004407824659491e-06,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12287823855876923,
|
|
"step": 3315,
|
|
"valid_targets_mean": 2975.7,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 5.278219395866454,
|
|
"grad_norm": 0.493508460343489,
|
|
"learning_rate": 6.944237635173627e-06,
|
|
"loss": 0.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1410299688577652,
|
|
"step": 3320,
|
|
"valid_targets_mean": 4679.4,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 5.286168521462639,
|
|
"grad_norm": 0.5340963975918633,
|
|
"learning_rate": 6.88427266228372e-06,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11206860840320587,
|
|
"step": 3325,
|
|
"valid_targets_mean": 2824.8,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 5.294117647058823,
|
|
"grad_norm": 0.5288120769728034,
|
|
"learning_rate": 6.824513848547323e-06,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13739335536956787,
|
|
"step": 3330,
|
|
"valid_targets_mean": 2994.0,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 5.302066772655008,
|
|
"grad_norm": 0.4460570196931101,
|
|
"learning_rate": 6.764962133281503e-06,
|
|
"loss": 0.1161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0984058827161789,
|
|
"step": 3335,
|
|
"valid_targets_mean": 4019.4,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 5.3100158982511925,
|
|
"grad_norm": 0.47811950799417524,
|
|
"learning_rate": 6.705618452548057e-06,
|
|
"loss": 0.1267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11787131428718567,
|
|
"step": 3340,
|
|
"valid_targets_mean": 4479.8,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 5.317965023847377,
|
|
"grad_norm": 0.4666566027641988,
|
|
"learning_rate": 6.646483739138778e-06,
|
|
"loss": 0.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12056516110897064,
|
|
"step": 3345,
|
|
"valid_targets_mean": 3949.9,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 5.325914149443562,
|
|
"grad_norm": 0.4975374351704887,
|
|
"learning_rate": 6.5875589225608376e-06,
|
|
"loss": 0.1224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11528253555297852,
|
|
"step": 3350,
|
|
"valid_targets_mean": 3459.3,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 5.333863275039746,
|
|
"grad_norm": 0.4880004851265583,
|
|
"learning_rate": 6.528844929022134e-06,
|
|
"loss": 0.1219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10715194791555405,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3779.9,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 5.34181240063593,
|
|
"grad_norm": 0.5375997285477788,
|
|
"learning_rate": 6.4703426814167434e-06,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12751339375972748,
|
|
"step": 3360,
|
|
"valid_targets_mean": 3645.8,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 5.349761526232115,
|
|
"grad_norm": 0.5313239520073082,
|
|
"learning_rate": 6.412053099310449e-06,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12041355669498444,
|
|
"step": 3365,
|
|
"valid_targets_mean": 4356.8,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 5.357710651828299,
|
|
"grad_norm": 0.5428782069622755,
|
|
"learning_rate": 6.353977098926225e-06,
|
|
"loss": 0.1278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13842326402664185,
|
|
"step": 3370,
|
|
"valid_targets_mean": 3841.1,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 5.365659777424483,
|
|
"grad_norm": 0.5475098794287568,
|
|
"learning_rate": 6.296115593129888e-06,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12307454645633698,
|
|
"step": 3375,
|
|
"valid_targets_mean": 4148.9,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 5.373608903020668,
|
|
"grad_norm": 0.797877703342453,
|
|
"learning_rate": 6.238469491415728e-06,
|
|
"loss": 0.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13038906455039978,
|
|
"step": 3380,
|
|
"valid_targets_mean": 2991.3,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 5.381558028616852,
|
|
"grad_norm": 0.48617214325748453,
|
|
"learning_rate": 6.181039699892206e-06,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11232590675354004,
|
|
"step": 3385,
|
|
"valid_targets_mean": 3804.4,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 5.389507154213036,
|
|
"grad_norm": 0.5086690584311961,
|
|
"learning_rate": 6.123827121267709e-06,
|
|
"loss": 0.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12465276569128036,
|
|
"step": 3390,
|
|
"valid_targets_mean": 4029.9,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 5.397456279809221,
|
|
"grad_norm": 0.4123929559301535,
|
|
"learning_rate": 6.066832654836396e-06,
|
|
"loss": 0.1228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10203292965888977,
|
|
"step": 3395,
|
|
"valid_targets_mean": 4760.1,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 5.405405405405405,
|
|
"grad_norm": 0.5479065770064241,
|
|
"learning_rate": 6.010057196464012e-06,
|
|
"loss": 0.1323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11120990663766861,
|
|
"step": 3400,
|
|
"valid_targets_mean": 3094.6,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 5.413354531001589,
|
|
"grad_norm": 0.48420485798613333,
|
|
"learning_rate": 5.9535016385738335e-06,
|
|
"loss": 0.1226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10648505389690399,
|
|
"step": 3405,
|
|
"valid_targets_mean": 4042.4,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 5.421303656597774,
|
|
"grad_norm": 0.48392218431147466,
|
|
"learning_rate": 5.897166870132658e-06,
|
|
"loss": 0.116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11931923776865005,
|
|
"step": 3410,
|
|
"valid_targets_mean": 3597.8,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 5.4292527821939585,
|
|
"grad_norm": 0.5393253911493405,
|
|
"learning_rate": 5.841053776636781e-06,
|
|
"loss": 0.1157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12923894822597504,
|
|
"step": 3415,
|
|
"valid_targets_mean": 3682.3,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 5.4372019077901435,
|
|
"grad_norm": 0.5792385751129733,
|
|
"learning_rate": 5.7851632400981285e-06,
|
|
"loss": 0.1149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10763402283191681,
|
|
"step": 3420,
|
|
"valid_targets_mean": 4127.3,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 5.4451510333863276,
|
|
"grad_norm": 0.48182773969358605,
|
|
"learning_rate": 5.729496139030377e-06,
|
|
"loss": 0.1222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1422126591205597,
|
|
"step": 3425,
|
|
"valid_targets_mean": 4550.5,
|
|
"valid_targets_min": 1598
|
|
},
|
|
{
|
|
"epoch": 5.453100158982512,
|
|
"grad_norm": 0.41697301913199153,
|
|
"learning_rate": 5.67405334843512e-06,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10176099836826324,
|
|
"step": 3430,
|
|
"valid_targets_mean": 5134.2,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 5.461049284578697,
|
|
"grad_norm": 0.5513948557355665,
|
|
"learning_rate": 5.618835739788136e-06,
|
|
"loss": 0.1175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12029505521059036,
|
|
"step": 3435,
|
|
"valid_targets_mean": 2973.1,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 5.468998410174881,
|
|
"grad_norm": 0.5035419846944795,
|
|
"learning_rate": 5.563844181025706e-06,
|
|
"loss": 0.1216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11482226848602295,
|
|
"step": 3440,
|
|
"valid_targets_mean": 3700.4,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 5.476947535771065,
|
|
"grad_norm": 0.5134434569519974,
|
|
"learning_rate": 5.509079536530939e-06,
|
|
"loss": 0.1313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14722689986228943,
|
|
"step": 3445,
|
|
"valid_targets_mean": 4011.1,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 5.48489666136725,
|
|
"grad_norm": 0.6045299385441325,
|
|
"learning_rate": 5.4545426671201905e-06,
|
|
"loss": 0.1411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12199525535106659,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3293.1,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 5.492845786963434,
|
|
"grad_norm": 0.550935705887857,
|
|
"learning_rate": 5.400234430029561e-06,
|
|
"loss": 0.136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14424438774585724,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3206.7,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 5.500794912559618,
|
|
"grad_norm": 0.4951329559478596,
|
|
"learning_rate": 5.346155678901392e-06,
|
|
"loss": 0.1175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12117502838373184,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3883.6,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 5.508744038155803,
|
|
"grad_norm": 0.531009459855668,
|
|
"learning_rate": 5.292307263770859e-06,
|
|
"loss": 0.1244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1292455941438675,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3177.2,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 5.516693163751987,
|
|
"grad_norm": 0.5665502788954249,
|
|
"learning_rate": 5.238690031052603e-06,
|
|
"loss": 0.1166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11746140569448471,
|
|
"step": 3470,
|
|
"valid_targets_mean": 2960.9,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 5.524642289348172,
|
|
"grad_norm": 0.47470831904665967,
|
|
"learning_rate": 5.185304823527426e-06,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11849209666252136,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4295.4,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 5.532591414944356,
|
|
"grad_norm": 0.5348564893263684,
|
|
"learning_rate": 5.132152480329072e-06,
|
|
"loss": 0.1252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13434147834777832,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3653.6,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 5.54054054054054,
|
|
"grad_norm": 0.45313684699490947,
|
|
"learning_rate": 5.07923383693099e-06,
|
|
"loss": 0.1123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10309700667858124,
|
|
"step": 3485,
|
|
"valid_targets_mean": 3730.9,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 5.548489666136725,
|
|
"grad_norm": 0.5594176484506791,
|
|
"learning_rate": 5.0265497251332314e-06,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11101377010345459,
|
|
"step": 3490,
|
|
"valid_targets_mean": 3820.3,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 5.556438791732909,
|
|
"grad_norm": 0.45166597844599227,
|
|
"learning_rate": 4.974100973049385e-06,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10583409667015076,
|
|
"step": 3495,
|
|
"valid_targets_mean": 4374.6,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 5.5643879173290935,
|
|
"grad_norm": 0.5180648698026142,
|
|
"learning_rate": 4.921888405093525e-06,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11158320307731628,
|
|
"step": 3500,
|
|
"valid_targets_mean": 3499.5,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 5.5723370429252785,
|
|
"grad_norm": 0.4509178468225708,
|
|
"learning_rate": 4.869912841967286e-06,
|
|
"loss": 0.1149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11846384406089783,
|
|
"step": 3505,
|
|
"valid_targets_mean": 4619.4,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 5.580286168521463,
|
|
"grad_norm": 0.5462056280603287,
|
|
"learning_rate": 4.818175100646952e-06,
|
|
"loss": 0.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12996745109558105,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3452.2,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 5.588235294117647,
|
|
"grad_norm": 0.4452887332184351,
|
|
"learning_rate": 4.766675994370598e-06,
|
|
"loss": 0.1148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12007399648427963,
|
|
"step": 3515,
|
|
"valid_targets_mean": 3976.9,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 5.596184419713832,
|
|
"grad_norm": 0.4955106517830543,
|
|
"learning_rate": 4.7154163326253265e-06,
|
|
"loss": 0.1311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1265922635793686,
|
|
"step": 3520,
|
|
"valid_targets_mean": 4157.2,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 5.604133545310016,
|
|
"grad_norm": 0.4967818054308162,
|
|
"learning_rate": 4.664396921134551e-06,
|
|
"loss": 0.1307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12537996470928192,
|
|
"step": 3525,
|
|
"valid_targets_mean": 3731.1,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 5.6120826709062,
|
|
"grad_norm": 0.5922113133845275,
|
|
"learning_rate": 4.613618561845306e-06,
|
|
"loss": 0.128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18149852752685547,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3075.6,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 5.620031796502385,
|
|
"grad_norm": 0.4486775225512763,
|
|
"learning_rate": 4.563082052915649e-06,
|
|
"loss": 0.1233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10791641473770142,
|
|
"step": 3535,
|
|
"valid_targets_mean": 4184.4,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 5.627980922098569,
|
|
"grad_norm": 0.47196647754075016,
|
|
"learning_rate": 4.512788188702135e-06,
|
|
"loss": 0.1074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10782913118600845,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4232.4,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 5.635930047694753,
|
|
"grad_norm": 0.40473591866365777,
|
|
"learning_rate": 4.462737759747315e-06,
|
|
"loss": 0.1143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09814178198575974,
|
|
"step": 3545,
|
|
"valid_targets_mean": 5018.9,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 5.643879173290938,
|
|
"grad_norm": 0.4970418048544055,
|
|
"learning_rate": 4.412931552767295e-06,
|
|
"loss": 0.1194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12168466299772263,
|
|
"step": 3550,
|
|
"valid_targets_mean": 3969.1,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 5.651828298887122,
|
|
"grad_norm": 0.4466747416437135,
|
|
"learning_rate": 4.363370350639405e-06,
|
|
"loss": 0.1236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11409342288970947,
|
|
"step": 3555,
|
|
"valid_targets_mean": 4624.2,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 5.659777424483307,
|
|
"grad_norm": 0.548867935644448,
|
|
"learning_rate": 4.314054932389859e-06,
|
|
"loss": 0.1214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13996033370494843,
|
|
"step": 3560,
|
|
"valid_targets_mean": 4342.4,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 5.667726550079491,
|
|
"grad_norm": 0.48475073547335606,
|
|
"learning_rate": 4.2649860731815255e-06,
|
|
"loss": 0.1262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12767939269542694,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3903.4,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 5.675675675675675,
|
|
"grad_norm": 0.5810018741448055,
|
|
"learning_rate": 4.216164544301755e-06,
|
|
"loss": 0.1234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12019410729408264,
|
|
"step": 3570,
|
|
"valid_targets_mean": 2819.9,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 5.68362480127186,
|
|
"grad_norm": 0.45812190476555054,
|
|
"learning_rate": 4.167591113150225e-06,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0970279797911644,
|
|
"step": 3575,
|
|
"valid_targets_mean": 3835.1,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 5.6915739268680445,
|
|
"grad_norm": 0.4583758611151626,
|
|
"learning_rate": 4.119266543226921e-06,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11823764443397522,
|
|
"step": 3580,
|
|
"valid_targets_mean": 4078.4,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 5.699523052464229,
|
|
"grad_norm": 0.5300561778832891,
|
|
"learning_rate": 4.071191594120081e-06,
|
|
"loss": 0.1311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1100766509771347,
|
|
"step": 3585,
|
|
"valid_targets_mean": 3071.7,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 5.707472178060414,
|
|
"grad_norm": 0.5096830083092221,
|
|
"learning_rate": 4.023367021494313e-06,
|
|
"loss": 0.1224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13244731724262238,
|
|
"step": 3590,
|
|
"valid_targets_mean": 3879.1,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 5.715421303656598,
|
|
"grad_norm": 0.6172482897906679,
|
|
"learning_rate": 3.975793577078682e-06,
|
|
"loss": 0.1147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11370215564966202,
|
|
"step": 3595,
|
|
"valid_targets_mean": 3879.4,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 5.723370429252782,
|
|
"grad_norm": 0.5043378824343994,
|
|
"learning_rate": 3.928472008654891e-06,
|
|
"loss": 0.1105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1176089495420456,
|
|
"step": 3600,
|
|
"valid_targets_mean": 4378.8,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 5.731319554848967,
|
|
"grad_norm": 0.5132872802234864,
|
|
"learning_rate": 3.881403060045545e-06,
|
|
"loss": 0.1217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10852010548114777,
|
|
"step": 3605,
|
|
"valid_targets_mean": 3407.3,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 5.739268680445151,
|
|
"grad_norm": 0.5063474175192859,
|
|
"learning_rate": 3.834587471102464e-06,
|
|
"loss": 0.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11489781737327576,
|
|
"step": 3610,
|
|
"valid_targets_mean": 3358.4,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 5.747217806041336,
|
|
"grad_norm": 0.44741625934657075,
|
|
"learning_rate": 3.7880259776950224e-06,
|
|
"loss": 0.1189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11125510931015015,
|
|
"step": 3615,
|
|
"valid_targets_mean": 4725.9,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 5.75516693163752,
|
|
"grad_norm": 0.5622460074052544,
|
|
"learning_rate": 3.741719311698608e-06,
|
|
"loss": 0.1254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1445854902267456,
|
|
"step": 3620,
|
|
"valid_targets_mean": 3845.8,
|
|
"valid_targets_min": 326
|
|
},
|
|
{
|
|
"epoch": 5.763116057233704,
|
|
"grad_norm": 0.5095441924194869,
|
|
"learning_rate": 3.69566820098312e-06,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11811588704586029,
|
|
"step": 3625,
|
|
"valid_targets_mean": 3504.1,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 5.771065182829888,
|
|
"grad_norm": 0.6091555550332428,
|
|
"learning_rate": 3.6498733694015197e-06,
|
|
"loss": 0.1204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15617358684539795,
|
|
"step": 3630,
|
|
"valid_targets_mean": 3396.4,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 5.779014308426073,
|
|
"grad_norm": 0.5229211609685805,
|
|
"learning_rate": 3.604335536778434e-06,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13374817371368408,
|
|
"step": 3635,
|
|
"valid_targets_mean": 4108.9,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 5.786963434022257,
|
|
"grad_norm": 0.5225267465274916,
|
|
"learning_rate": 3.559055418898887e-06,
|
|
"loss": 0.1323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1492437720298767,
|
|
"step": 3640,
|
|
"valid_targets_mean": 3701.8,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 5.794912559618442,
|
|
"grad_norm": 0.49027694616246914,
|
|
"learning_rate": 3.5140337274970014e-06,
|
|
"loss": 0.1205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12383852899074554,
|
|
"step": 3645,
|
|
"valid_targets_mean": 5302.7,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 5.802861685214626,
|
|
"grad_norm": 0.49546019569767785,
|
|
"learning_rate": 3.469271170244832e-06,
|
|
"loss": 0.1157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11465057730674744,
|
|
"step": 3650,
|
|
"valid_targets_mean": 3762.5,
|
|
"valid_targets_min": 1734
|
|
},
|
|
{
|
|
"epoch": 5.8108108108108105,
|
|
"grad_norm": 0.6328363349278949,
|
|
"learning_rate": 3.4247684507412605e-06,
|
|
"loss": 0.1242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13153359293937683,
|
|
"step": 3655,
|
|
"valid_targets_mean": 2784.2,
|
|
"valid_targets_min": 294
|
|
},
|
|
{
|
|
"epoch": 5.8187599364069955,
|
|
"grad_norm": 0.502167626760723,
|
|
"learning_rate": 3.380526268500892e-06,
|
|
"loss": 0.1181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12254327535629272,
|
|
"step": 3660,
|
|
"valid_targets_mean": 3786.4,
|
|
"valid_targets_min": 193
|
|
},
|
|
{
|
|
"epoch": 5.82670906200318,
|
|
"grad_norm": 0.45125186924627153,
|
|
"learning_rate": 3.3365453189430984e-06,
|
|
"loss": 0.1226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10970175266265869,
|
|
"step": 3665,
|
|
"valid_targets_mean": 4506.9,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 5.834658187599364,
|
|
"grad_norm": 0.5205657434846551,
|
|
"learning_rate": 3.292826293381071e-06,
|
|
"loss": 0.1272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13056659698486328,
|
|
"step": 3670,
|
|
"valid_targets_mean": 4076.0,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 5.842607313195549,
|
|
"grad_norm": 0.5429453705327059,
|
|
"learning_rate": 3.2493698790109664e-06,
|
|
"loss": 0.1225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12193150818347931,
|
|
"step": 3675,
|
|
"valid_targets_mean": 3906.0,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 5.850556438791733,
|
|
"grad_norm": 0.6227078505185998,
|
|
"learning_rate": 3.2061767589010763e-06,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13154500722885132,
|
|
"step": 3680,
|
|
"valid_targets_mean": 2870.4,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 5.858505564387917,
|
|
"grad_norm": 0.49948300350719843,
|
|
"learning_rate": 3.1632476119811285e-06,
|
|
"loss": 0.1214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11474519222974777,
|
|
"step": 3685,
|
|
"valid_targets_mean": 4023.4,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 5.866454689984102,
|
|
"grad_norm": 0.5200212537952184,
|
|
"learning_rate": 3.120583113031579e-06,
|
|
"loss": 0.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13559041917324066,
|
|
"step": 3690,
|
|
"valid_targets_mean": 3464.1,
|
|
"valid_targets_min": 203
|
|
},
|
|
{
|
|
"epoch": 5.874403815580286,
|
|
"grad_norm": 0.46446067076843983,
|
|
"learning_rate": 3.07818393267304e-06,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09316763281822205,
|
|
"step": 3695,
|
|
"valid_targets_mean": 3673.7,
|
|
"valid_targets_min": 231
|
|
},
|
|
{
|
|
"epoch": 5.882352941176471,
|
|
"grad_norm": 0.4404533791714637,
|
|
"learning_rate": 3.036050737355709e-06,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09938673675060272,
|
|
"step": 3700,
|
|
"valid_targets_mean": 4383.9,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 5.890302066772655,
|
|
"grad_norm": 0.5161902873588942,
|
|
"learning_rate": 2.9941841893489075e-06,
|
|
"loss": 0.1276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1233370378613472,
|
|
"step": 3705,
|
|
"valid_targets_mean": 4478.1,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 5.898251192368839,
|
|
"grad_norm": 0.49950014205727683,
|
|
"learning_rate": 2.9525849467306766e-06,
|
|
"loss": 0.1286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1349901705980301,
|
|
"step": 3710,
|
|
"valid_targets_mean": 4360.8,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 5.906200317965024,
|
|
"grad_norm": 0.6255270102689031,
|
|
"learning_rate": 2.9112536633774245e-06,
|
|
"loss": 0.1297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14242327213287354,
|
|
"step": 3715,
|
|
"valid_targets_mean": 2748.8,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 5.914149443561208,
|
|
"grad_norm": 0.4795827841967414,
|
|
"learning_rate": 2.8701909889536384e-06,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12320660799741745,
|
|
"step": 3720,
|
|
"valid_targets_mean": 3648.8,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 5.922098569157392,
|
|
"grad_norm": 0.46609837304180957,
|
|
"learning_rate": 2.8293975689017018e-06,
|
|
"loss": 0.121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09863148629665375,
|
|
"step": 3725,
|
|
"valid_targets_mean": 3843.1,
|
|
"valid_targets_min": 1049
|
|
},
|
|
{
|
|
"epoch": 5.930047694753577,
|
|
"grad_norm": 0.5299211238306798,
|
|
"learning_rate": 2.788874044431722e-06,
|
|
"loss": 0.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11683914065361023,
|
|
"step": 3730,
|
|
"valid_targets_mean": 3052.8,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 5.9379968203497615,
|
|
"grad_norm": 0.5208980690753293,
|
|
"learning_rate": 2.7486210525114533e-06,
|
|
"loss": 0.1133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12761931121349335,
|
|
"step": 3735,
|
|
"valid_targets_mean": 3831.2,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 5.945945945945946,
|
|
"grad_norm": 0.6678323734362,
|
|
"learning_rate": 2.708639225856311e-06,
|
|
"loss": 0.1237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12598900496959686,
|
|
"step": 3740,
|
|
"valid_targets_mean": 3501.5,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 5.953895071542131,
|
|
"grad_norm": 0.5264094063837836,
|
|
"learning_rate": 2.6689291929193962e-06,
|
|
"loss": 0.1282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13515892624855042,
|
|
"step": 3745,
|
|
"valid_targets_mean": 3571.8,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 5.961844197138315,
|
|
"grad_norm": 0.42300031769118174,
|
|
"learning_rate": 2.629491577881622e-06,
|
|
"loss": 0.1131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10418035089969635,
|
|
"step": 3750,
|
|
"valid_targets_mean": 4830.2,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 5.9697933227345,
|
|
"grad_norm": 0.4877715372350325,
|
|
"learning_rate": 2.5903270006419236e-06,
|
|
"loss": 0.1276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11131057143211365,
|
|
"step": 3755,
|
|
"valid_targets_mean": 3662.4,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 5.977742448330684,
|
|
"grad_norm": 0.4822345208975422,
|
|
"learning_rate": 2.551436076807501e-06,
|
|
"loss": 0.1206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10331891477108002,
|
|
"step": 3760,
|
|
"valid_targets_mean": 3850.6,
|
|
"valid_targets_min": 1253
|
|
},
|
|
{
|
|
"epoch": 5.985691573926868,
|
|
"grad_norm": 0.5405507727305193,
|
|
"learning_rate": 2.5128194176841226e-06,
|
|
"loss": 0.1208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11677927523851395,
|
|
"step": 3765,
|
|
"valid_targets_mean": 3175.6,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 5.993640699523052,
|
|
"grad_norm": 0.6454948139161141,
|
|
"learning_rate": 2.4744776302665563e-06,
|
|
"loss": 0.1213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13788512349128723,
|
|
"step": 3770,
|
|
"valid_targets_mean": 3206.7,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 6.001589825119237,
|
|
"grad_norm": 0.44867451795140395,
|
|
"learning_rate": 2.436411317228997e-06,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1155017837882042,
|
|
"step": 3775,
|
|
"valid_targets_mean": 3869.2,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 6.009538950715421,
|
|
"grad_norm": 0.48596893402956937,
|
|
"learning_rate": 2.3986210769155994e-06,
|
|
"loss": 0.1215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11859863996505737,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3937.1,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 6.017488076311606,
|
|
"grad_norm": 0.522552014133015,
|
|
"learning_rate": 2.361107503331095e-06,
|
|
"loss": 0.1206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13054189085960388,
|
|
"step": 3785,
|
|
"valid_targets_mean": 3340.6,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 6.02543720190779,
|
|
"grad_norm": 0.5187257820275404,
|
|
"learning_rate": 2.3238711861314165e-06,
|
|
"loss": 0.1164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0982595682144165,
|
|
"step": 3790,
|
|
"valid_targets_mean": 3887.9,
|
|
"valid_targets_min": 225
|
|
},
|
|
{
|
|
"epoch": 6.033386327503974,
|
|
"grad_norm": 0.492221911506423,
|
|
"learning_rate": 2.2869127106144663e-06,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10179409384727478,
|
|
"step": 3795,
|
|
"valid_targets_mean": 3659.4,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 6.041335453100159,
|
|
"grad_norm": 0.5706240801694208,
|
|
"learning_rate": 2.2502326577109e-06,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14859774708747864,
|
|
"step": 3800,
|
|
"valid_targets_mean": 3022.1,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 6.049284578696343,
|
|
"grad_norm": 0.4903458403444158,
|
|
"learning_rate": 2.213831603974985e-06,
|
|
"loss": 0.1208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12874770164489746,
|
|
"step": 3805,
|
|
"valid_targets_mean": 3873.9,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 6.0572337042925275,
|
|
"grad_norm": 0.6672892423704999,
|
|
"learning_rate": 2.1777101215755624e-06,
|
|
"loss": 0.124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14127162098884583,
|
|
"step": 3810,
|
|
"valid_targets_mean": 2096.2,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 6.0651828298887125,
|
|
"grad_norm": 0.4545999025709332,
|
|
"learning_rate": 2.1418687782870284e-06,
|
|
"loss": 0.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1308123916387558,
|
|
"step": 3815,
|
|
"valid_targets_mean": 4371.7,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 6.073131955484897,
|
|
"grad_norm": 0.487382004726638,
|
|
"learning_rate": 2.1063081374804263e-06,
|
|
"loss": 0.1156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11653155088424683,
|
|
"step": 3820,
|
|
"valid_targets_mean": 4370.7,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 6.081081081081081,
|
|
"grad_norm": 0.5230128586154896,
|
|
"learning_rate": 2.0710287581145884e-06,
|
|
"loss": 0.1107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11809296905994415,
|
|
"step": 3825,
|
|
"valid_targets_mean": 3450.1,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 6.089030206677266,
|
|
"grad_norm": 0.5190144803094725,
|
|
"learning_rate": 2.036031194727346e-06,
|
|
"loss": 0.1299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13838720321655273,
|
|
"step": 3830,
|
|
"valid_targets_mean": 3676.1,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 6.09697933227345,
|
|
"grad_norm": 0.46738733555474804,
|
|
"learning_rate": 2.0013159974268094e-06,
|
|
"loss": 0.1182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11170323193073273,
|
|
"step": 3835,
|
|
"valid_targets_mean": 4278.0,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 6.104928457869635,
|
|
"grad_norm": 0.4624128199722627,
|
|
"learning_rate": 1.9668837118827346e-06,
|
|
"loss": 0.1101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09756626933813095,
|
|
"step": 3840,
|
|
"valid_targets_mean": 4007.2,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 6.112877583465819,
|
|
"grad_norm": 0.5119513810750553,
|
|
"learning_rate": 1.932734879317937e-06,
|
|
"loss": 0.1265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10754363238811493,
|
|
"step": 3845,
|
|
"valid_targets_mean": 3588.8,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 6.120826709062003,
|
|
"grad_norm": 0.41100363613961804,
|
|
"learning_rate": 1.8988700364997758e-06,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10548628866672516,
|
|
"step": 3850,
|
|
"valid_targets_mean": 4552.5,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 6.128775834658188,
|
|
"grad_norm": 0.5133116111241467,
|
|
"learning_rate": 1.8652897157317395e-06,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14871114492416382,
|
|
"step": 3855,
|
|
"valid_targets_mean": 3678.9,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 6.136724960254372,
|
|
"grad_norm": 0.5261537871220848,
|
|
"learning_rate": 1.8319944448450578e-06,
|
|
"loss": 0.1073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10628058761358261,
|
|
"step": 3860,
|
|
"valid_targets_mean": 3190.5,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 6.144674085850556,
|
|
"grad_norm": 0.4713576235713761,
|
|
"learning_rate": 1.7989847471904065e-06,
|
|
"loss": 0.1097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09839437156915665,
|
|
"step": 3865,
|
|
"valid_targets_mean": 4262.8,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 6.152623211446741,
|
|
"grad_norm": 0.6149608619383016,
|
|
"learning_rate": 1.766261141629706e-06,
|
|
"loss": 0.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10006123036146164,
|
|
"step": 3870,
|
|
"valid_targets_mean": 4775.4,
|
|
"valid_targets_min": 1547
|
|
},
|
|
{
|
|
"epoch": 6.160572337042925,
|
|
"grad_norm": 0.5676332722536679,
|
|
"learning_rate": 1.7338241425279244e-06,
|
|
"loss": 0.116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1152639240026474,
|
|
"step": 3875,
|
|
"valid_targets_mean": 2869.9,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 6.168521462639109,
|
|
"grad_norm": 0.43406528581225035,
|
|
"learning_rate": 1.7016742597450341e-06,
|
|
"loss": 0.1035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0894886776804924,
|
|
"step": 3880,
|
|
"valid_targets_mean": 4228.3,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 6.176470588235294,
|
|
"grad_norm": 0.4391775601429534,
|
|
"learning_rate": 1.6698119986279726e-06,
|
|
"loss": 0.1302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1086900532245636,
|
|
"step": 3885,
|
|
"valid_targets_mean": 4487.6,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 6.1844197138314785,
|
|
"grad_norm": 0.43257145105765316,
|
|
"learning_rate": 1.6382378600026982e-06,
|
|
"loss": 0.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09367109835147858,
|
|
"step": 3890,
|
|
"valid_targets_mean": 4436.7,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 6.192368839427663,
|
|
"grad_norm": 0.46585023110175705,
|
|
"learning_rate": 1.60695234016633e-06,
|
|
"loss": 0.1239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11722202599048615,
|
|
"step": 3895,
|
|
"valid_targets_mean": 4294.6,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 6.200317965023848,
|
|
"grad_norm": 0.4902572872103629,
|
|
"learning_rate": 1.5759559308793448e-06,
|
|
"loss": 0.1081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10776770859956741,
|
|
"step": 3900,
|
|
"valid_targets_mean": 3943.4,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 6.208267090620032,
|
|
"grad_norm": 0.5594670199155989,
|
|
"learning_rate": 1.5452491193578412e-06,
|
|
"loss": 0.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12497895956039429,
|
|
"step": 3905,
|
|
"valid_targets_mean": 3098.4,
|
|
"valid_targets_min": 735
|
|
},
|
|
{
|
|
"epoch": 6.216216216216216,
|
|
"grad_norm": 0.5219935562912217,
|
|
"learning_rate": 1.5148323882658767e-06,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10808251798152924,
|
|
"step": 3910,
|
|
"valid_targets_mean": 4122.3,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 6.224165341812401,
|
|
"grad_norm": 0.5005990647111696,
|
|
"learning_rate": 1.484706215707905e-06,
|
|
"loss": 0.1206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10552050173282623,
|
|
"step": 3915,
|
|
"valid_targets_mean": 3699.5,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 6.232114467408585,
|
|
"grad_norm": 0.6045839092717541,
|
|
"learning_rate": 1.4548710752212292e-06,
|
|
"loss": 0.1085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11157447099685669,
|
|
"step": 3920,
|
|
"valid_targets_mean": 2340.5,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 6.24006359300477,
|
|
"grad_norm": 0.5250995042813899,
|
|
"learning_rate": 1.425327435768582e-06,
|
|
"loss": 0.1204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13459616899490356,
|
|
"step": 3925,
|
|
"valid_targets_mean": 3940.4,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 6.248012718600954,
|
|
"grad_norm": 0.472579910076907,
|
|
"learning_rate": 1.3960757617307486e-06,
|
|
"loss": 0.1204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12376942485570908,
|
|
"step": 3930,
|
|
"valid_targets_mean": 4687.8,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 6.255961844197138,
|
|
"grad_norm": 0.4618301620060851,
|
|
"learning_rate": 1.3671165128992514e-06,
|
|
"loss": 0.1095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10723580420017242,
|
|
"step": 3935,
|
|
"valid_targets_mean": 4437.8,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 6.263910969793323,
|
|
"grad_norm": 0.4999776109608244,
|
|
"learning_rate": 1.3384501444691544e-06,
|
|
"loss": 0.1047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10837283730506897,
|
|
"step": 3940,
|
|
"valid_targets_mean": 3520.4,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 6.271860095389507,
|
|
"grad_norm": 0.5126636237250259,
|
|
"learning_rate": 1.3100771070318796e-06,
|
|
"loss": 0.1176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11529883742332458,
|
|
"step": 3945,
|
|
"valid_targets_mean": 4288.9,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 6.279809220985691,
|
|
"grad_norm": 0.47848229996426966,
|
|
"learning_rate": 1.2819978465681283e-06,
|
|
"loss": 0.1236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0940292477607727,
|
|
"step": 3950,
|
|
"valid_targets_mean": 3521.3,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 6.287758346581876,
|
|
"grad_norm": 0.5732393529239456,
|
|
"learning_rate": 1.254212804440893e-06,
|
|
"loss": 0.1244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338890790939331,
|
|
"step": 3955,
|
|
"valid_targets_mean": 3135.6,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 6.29570747217806,
|
|
"grad_norm": 0.5326295342711036,
|
|
"learning_rate": 1.2267224173884929e-06,
|
|
"loss": 0.1145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13209447264671326,
|
|
"step": 3960,
|
|
"valid_targets_mean": 3774.2,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 6.3036565977742445,
|
|
"grad_norm": 0.4711791669124887,
|
|
"learning_rate": 1.199527117517727e-06,
|
|
"loss": 0.113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11155182123184204,
|
|
"step": 3965,
|
|
"valid_targets_mean": 3798.5,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 6.3116057233704295,
|
|
"grad_norm": 0.4818693237836275,
|
|
"learning_rate": 1.172627332297076e-06,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10713420063257217,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3704.2,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 6.319554848966614,
|
|
"grad_norm": 0.5058735177237607,
|
|
"learning_rate": 1.1460234845499763e-06,
|
|
"loss": 0.1135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.132293701171875,
|
|
"step": 3975,
|
|
"valid_targets_mean": 4033.2,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 6.327503974562799,
|
|
"grad_norm": 0.48180321982015545,
|
|
"learning_rate": 1.1197159924481804e-06,
|
|
"loss": 0.125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11559031903743744,
|
|
"step": 3980,
|
|
"valid_targets_mean": 4418.4,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 6.335453100158983,
|
|
"grad_norm": 0.6108262295945303,
|
|
"learning_rate": 1.0937052695051965e-06,
|
|
"loss": 0.1185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14231693744659424,
|
|
"step": 3985,
|
|
"valid_targets_mean": 4821.4,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 6.343402225755167,
|
|
"grad_norm": 0.5735633688087327,
|
|
"learning_rate": 1.067991724569759e-06,
|
|
"loss": 0.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12734287977218628,
|
|
"step": 3990,
|
|
"valid_targets_mean": 3712.1,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 6.351351351351352,
|
|
"grad_norm": 0.4962186042387327,
|
|
"learning_rate": 1.0425757618194265e-06,
|
|
"loss": 0.1143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11310100555419922,
|
|
"step": 3995,
|
|
"valid_targets_mean": 4223.6,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 6.359300476947536,
|
|
"grad_norm": 0.4808255029002646,
|
|
"learning_rate": 1.0174577807542273e-06,
|
|
"loss": 0.1242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10521212220191956,
|
|
"step": 4000,
|
|
"valid_targets_mean": 3883.7,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 6.36724960254372,
|
|
"grad_norm": 0.4740602995913292,
|
|
"learning_rate": 9.926381761903614e-07,
|
|
"loss": 0.1121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11520376056432724,
|
|
"step": 4005,
|
|
"valid_targets_mean": 4185.7,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 6.375198728139905,
|
|
"grad_norm": 0.4470619298701318,
|
|
"learning_rate": 9.681173382540177e-07,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10057910531759262,
|
|
"step": 4010,
|
|
"valid_targets_mean": 4749.6,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 6.383147853736089,
|
|
"grad_norm": 0.5582730349142845,
|
|
"learning_rate": 9.438956523752263e-07,
|
|
"loss": 0.1194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14447925984859467,
|
|
"step": 4015,
|
|
"valid_targets_mean": 3494.1,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 6.391096979332273,
|
|
"grad_norm": 0.4602774808448654,
|
|
"learning_rate": 9.199734992818099e-07,
|
|
"loss": 0.1045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10162511467933655,
|
|
"step": 4020,
|
|
"valid_targets_mean": 4045.6,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 6.399046104928458,
|
|
"grad_norm": 0.9217957425599271,
|
|
"learning_rate": 8.963512549933795e-07,
|
|
"loss": 0.1213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10582847148180008,
|
|
"step": 4025,
|
|
"valid_targets_mean": 3972.8,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 6.406995230524642,
|
|
"grad_norm": 0.48625229787458,
|
|
"learning_rate": 8.730292908154614e-07,
|
|
"loss": 0.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13875703513622284,
|
|
"step": 4030,
|
|
"valid_targets_mean": 3860.8,
|
|
"valid_targets_min": 218
|
|
},
|
|
{
|
|
"epoch": 6.414944356120826,
|
|
"grad_norm": 0.4648812460273993,
|
|
"learning_rate": 8.500079733336175e-07,
|
|
"loss": 0.1114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1072784960269928,
|
|
"step": 4035,
|
|
"valid_targets_mean": 3870.5,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 6.422893481717011,
|
|
"grad_norm": 0.5971157428394955,
|
|
"learning_rate": 8.272876644077188e-07,
|
|
"loss": 0.1207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16263656318187714,
|
|
"step": 4040,
|
|
"valid_targets_mean": 3596.1,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 6.4308426073131955,
|
|
"grad_norm": 0.4392492224710974,
|
|
"learning_rate": 8.048687211662343e-07,
|
|
"loss": 0.1076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0819651186466217,
|
|
"step": 4045,
|
|
"valid_targets_mean": 3597.6,
|
|
"valid_targets_min": 296
|
|
},
|
|
{
|
|
"epoch": 6.43879173290938,
|
|
"grad_norm": 0.5349233453433273,
|
|
"learning_rate": 7.827514960006266e-07,
|
|
"loss": 0.1304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.153071790933609,
|
|
"step": 4050,
|
|
"valid_targets_mean": 4198.1,
|
|
"valid_targets_min": 203
|
|
},
|
|
{
|
|
"epoch": 6.4467408585055646,
|
|
"grad_norm": 0.5485637315230164,
|
|
"learning_rate": 7.609363365598165e-07,
|
|
"loss": 0.1192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09980130195617676,
|
|
"step": 4055,
|
|
"valid_targets_mean": 3078.7,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 6.454689984101749,
|
|
"grad_norm": 0.568411168151065,
|
|
"learning_rate": 7.394235857447119e-07,
|
|
"loss": 0.1233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12002958357334137,
|
|
"step": 4060,
|
|
"valid_targets_mean": 3055.5,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 6.462639109697934,
|
|
"grad_norm": 0.5213091406213507,
|
|
"learning_rate": 7.182135817028157e-07,
|
|
"loss": 0.109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11195459216833115,
|
|
"step": 4065,
|
|
"valid_targets_mean": 3587.9,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 6.470588235294118,
|
|
"grad_norm": 0.5134486003820178,
|
|
"learning_rate": 6.973066578229248e-07,
|
|
"loss": 0.121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11719434708356857,
|
|
"step": 4070,
|
|
"valid_targets_mean": 4122.6,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 6.478537360890302,
|
|
"grad_norm": 0.4962383127871119,
|
|
"learning_rate": 6.767031427298687e-07,
|
|
"loss": 0.1113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11477995663881302,
|
|
"step": 4075,
|
|
"valid_targets_mean": 3880.5,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 6.486486486486487,
|
|
"grad_norm": 0.5116829216283925,
|
|
"learning_rate": 6.564033602793584e-07,
|
|
"loss": 0.112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10865809768438339,
|
|
"step": 4080,
|
|
"valid_targets_mean": 3936.4,
|
|
"valid_targets_min": 203
|
|
},
|
|
{
|
|
"epoch": 6.494435612082671,
|
|
"grad_norm": 0.49006467961889066,
|
|
"learning_rate": 6.364076295529042e-07,
|
|
"loss": 0.1224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11683492362499237,
|
|
"step": 4085,
|
|
"valid_targets_mean": 4282.9,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 6.502384737678855,
|
|
"grad_norm": 0.6452016504816608,
|
|
"learning_rate": 6.167162648527703e-07,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14076688885688782,
|
|
"step": 4090,
|
|
"valid_targets_mean": 3114.0,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 6.51033386327504,
|
|
"grad_norm": 0.581962945018844,
|
|
"learning_rate": 5.973295756970653e-07,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12058502435684204,
|
|
"step": 4095,
|
|
"valid_targets_mean": 2951.9,
|
|
"valid_targets_min": 215
|
|
},
|
|
{
|
|
"epoch": 6.518282988871224,
|
|
"grad_norm": 0.5276399869645112,
|
|
"learning_rate": 5.782478668148672e-07,
|
|
"loss": 0.1185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12663058936595917,
|
|
"step": 4100,
|
|
"valid_targets_mean": 3469.2,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 6.526232114467408,
|
|
"grad_norm": 0.47852804567749874,
|
|
"learning_rate": 5.59471438141419e-07,
|
|
"loss": 0.1063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09485980868339539,
|
|
"step": 4105,
|
|
"valid_targets_mean": 3878.8,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 6.534181240063593,
|
|
"grad_norm": 0.4656719855928257,
|
|
"learning_rate": 5.410005848134315e-07,
|
|
"loss": 0.1125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11063895374536514,
|
|
"step": 4110,
|
|
"valid_targets_mean": 4169.6,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 6.542130365659777,
|
|
"grad_norm": 0.582805206984922,
|
|
"learning_rate": 5.228355971644461e-07,
|
|
"loss": 0.1196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14886021614074707,
|
|
"step": 4115,
|
|
"valid_targets_mean": 3361.9,
|
|
"valid_targets_min": 205
|
|
},
|
|
{
|
|
"epoch": 6.550079491255962,
|
|
"grad_norm": 0.4719462940844744,
|
|
"learning_rate": 5.049767607202549e-07,
|
|
"loss": 0.1179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10012765973806381,
|
|
"step": 4120,
|
|
"valid_targets_mean": 4136.4,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 6.558028616852146,
|
|
"grad_norm": 1.3556535238795349,
|
|
"learning_rate": 4.874243561944214e-07,
|
|
"loss": 0.1142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12124701589345932,
|
|
"step": 4125,
|
|
"valid_targets_mean": 3717.2,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 6.5659777424483305,
|
|
"grad_norm": 0.5426225357284815,
|
|
"learning_rate": 4.701786594838753e-07,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13302527368068695,
|
|
"step": 4130,
|
|
"valid_targets_mean": 3212.1,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 6.573926868044515,
|
|
"grad_norm": 0.5217934884004445,
|
|
"learning_rate": 4.532399416645694e-07,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12307184189558029,
|
|
"step": 4135,
|
|
"valid_targets_mean": 3814.6,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 6.5818759936407,
|
|
"grad_norm": 0.46337006848354445,
|
|
"learning_rate": 4.366084689872074e-07,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.146135151386261,
|
|
"step": 4140,
|
|
"valid_targets_mean": 4986.4,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 6.589825119236884,
|
|
"grad_norm": 0.5660661020795137,
|
|
"learning_rate": 4.202845028730829e-07,
|
|
"loss": 0.1106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14232556521892548,
|
|
"step": 4145,
|
|
"valid_targets_mean": 3666.0,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 6.597774244833069,
|
|
"grad_norm": 0.5103188227448534,
|
|
"learning_rate": 4.0426829990994677e-07,
|
|
"loss": 0.109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10854796320199966,
|
|
"step": 4150,
|
|
"valid_targets_mean": 3345.2,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 6.605723370429253,
|
|
"grad_norm": 0.5531875415363303,
|
|
"learning_rate": 3.885601118479909e-07,
|
|
"loss": 0.1201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13567741215229034,
|
|
"step": 4155,
|
|
"valid_targets_mean": 3677.8,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 6.613672496025437,
|
|
"grad_norm": 0.5680532693391935,
|
|
"learning_rate": 3.731601855958844e-07,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10990526527166367,
|
|
"step": 4160,
|
|
"valid_targets_mean": 3246.2,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 6.621621621621622,
|
|
"grad_norm": 0.8069806110549206,
|
|
"learning_rate": 3.5806876321688553e-07,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15170586109161377,
|
|
"step": 4165,
|
|
"valid_targets_mean": 3854.2,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 6.629570747217806,
|
|
"grad_norm": 0.5399036381526361,
|
|
"learning_rate": 3.4328608192505164e-07,
|
|
"loss": 0.1269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1231968104839325,
|
|
"step": 4170,
|
|
"valid_targets_mean": 3397.2,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 6.63751987281399,
|
|
"grad_norm": 0.43662516483746666,
|
|
"learning_rate": 3.288123740814997e-07,
|
|
"loss": 0.1229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11871956288814545,
|
|
"step": 4175,
|
|
"valid_targets_mean": 5077.1,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 6.645468998410175,
|
|
"grad_norm": 0.52274643112803,
|
|
"learning_rate": 3.1464786719075825e-07,
|
|
"loss": 0.1063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09565606713294983,
|
|
"step": 4180,
|
|
"valid_targets_mean": 3180.1,
|
|
"valid_targets_min": 220
|
|
},
|
|
{
|
|
"epoch": 6.653418124006359,
|
|
"grad_norm": 0.5634216558721306,
|
|
"learning_rate": 3.0079278389719246e-07,
|
|
"loss": 0.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10372507572174072,
|
|
"step": 4185,
|
|
"valid_targets_mean": 3673.8,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 6.661367249602543,
|
|
"grad_norm": 0.6083528281521314,
|
|
"learning_rate": 2.8724734198149585e-07,
|
|
"loss": 0.1228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09832460433244705,
|
|
"step": 4190,
|
|
"valid_targets_mean": 3061.6,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 6.669316375198728,
|
|
"grad_norm": 0.4419979233676959,
|
|
"learning_rate": 2.7401175435727735e-07,
|
|
"loss": 0.1118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09950825572013855,
|
|
"step": 4195,
|
|
"valid_targets_mean": 4492.2,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 6.677265500794912,
|
|
"grad_norm": 0.504982275604603,
|
|
"learning_rate": 2.61086229067713e-07,
|
|
"loss": 0.1111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10245563834905624,
|
|
"step": 4200,
|
|
"valid_targets_mean": 3867.2,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 6.685214626391097,
|
|
"grad_norm": 0.5140544947040371,
|
|
"learning_rate": 2.4847096928226846e-07,
|
|
"loss": 0.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10791807621717453,
|
|
"step": 4205,
|
|
"valid_targets_mean": 3571.3,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 6.6931637519872815,
|
|
"grad_norm": 0.5413468567786459,
|
|
"learning_rate": 2.3616617329351499e-07,
|
|
"loss": 0.1141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11113090813159943,
|
|
"step": 4210,
|
|
"valid_targets_mean": 3172.6,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 6.701112877583466,
|
|
"grad_norm": 0.49989740724965176,
|
|
"learning_rate": 2.2417203451400749e-07,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11632831394672394,
|
|
"step": 4215,
|
|
"valid_targets_mean": 3993.2,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 6.709062003179651,
|
|
"grad_norm": 0.5360781888423928,
|
|
"learning_rate": 2.124887414732424e-07,
|
|
"loss": 0.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1259898841381073,
|
|
"step": 4220,
|
|
"valid_targets_mean": 3856.2,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 6.717011128775835,
|
|
"grad_norm": 0.5026442130672338,
|
|
"learning_rate": 2.0111647781470233e-07,
|
|
"loss": 0.1178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10925573855638504,
|
|
"step": 4225,
|
|
"valid_targets_mean": 3679.4,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 6.724960254372019,
|
|
"grad_norm": 0.5622738756387758,
|
|
"learning_rate": 1.9005542229295848e-07,
|
|
"loss": 0.1117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11003442853689194,
|
|
"step": 4230,
|
|
"valid_targets_mean": 2934.6,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 6.732909379968204,
|
|
"grad_norm": 0.5361206317257745,
|
|
"learning_rate": 1.793057487708705e-07,
|
|
"loss": 0.1173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11059747636318207,
|
|
"step": 4235,
|
|
"valid_targets_mean": 3127.6,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 6.740858505564388,
|
|
"grad_norm": 0.48942654254164564,
|
|
"learning_rate": 1.688676262168465e-07,
|
|
"loss": 0.1137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11752250790596008,
|
|
"step": 4240,
|
|
"valid_targets_mean": 3818.7,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 6.748807631160572,
|
|
"grad_norm": 0.45231885595248883,
|
|
"learning_rate": 1.5874121870219415e-07,
|
|
"loss": 0.1154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11548950523138046,
|
|
"step": 4245,
|
|
"valid_targets_mean": 4585.7,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 6.756756756756757,
|
|
"grad_norm": 0.47395667412902187,
|
|
"learning_rate": 1.4892668539853606e-07,
|
|
"loss": 0.1143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1214282214641571,
|
|
"step": 4250,
|
|
"valid_targets_mean": 4161.2,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 6.764705882352941,
|
|
"grad_norm": 0.43529395808289495,
|
|
"learning_rate": 1.3942418057530714e-07,
|
|
"loss": 0.1038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09696638584136963,
|
|
"step": 4255,
|
|
"valid_targets_mean": 4235.6,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 6.772655007949125,
|
|
"grad_norm": 0.4448513095931578,
|
|
"learning_rate": 1.3023385359733687e-07,
|
|
"loss": 0.1119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10132372379302979,
|
|
"step": 4260,
|
|
"valid_targets_mean": 4057.3,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 6.78060413354531,
|
|
"grad_norm": 0.5547672548578956,
|
|
"learning_rate": 1.213558489224953e-07,
|
|
"loss": 0.1241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12664559483528137,
|
|
"step": 4265,
|
|
"valid_targets_mean": 3385.1,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 6.788553259141494,
|
|
"grad_norm": 0.4670519878770078,
|
|
"learning_rate": 1.1279030609942177e-07,
|
|
"loss": 0.1339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12070097029209137,
|
|
"step": 4270,
|
|
"valid_targets_mean": 5304.9,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 6.796502384737678,
|
|
"grad_norm": 1.1700155137729815,
|
|
"learning_rate": 1.0453735976533985e-07,
|
|
"loss": 0.1216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12248082458972931,
|
|
"step": 4275,
|
|
"valid_targets_mean": 4182.2,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 6.804451510333863,
|
|
"grad_norm": 0.5171523736974354,
|
|
"learning_rate": 9.659713964392358e-08,
|
|
"loss": 0.1097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10080324113368988,
|
|
"step": 4280,
|
|
"valid_targets_mean": 3404.8,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 6.8124006359300475,
|
|
"grad_norm": 0.51183917876764,
|
|
"learning_rate": 8.896977054328349e-08,
|
|
"loss": 0.1078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08926388621330261,
|
|
"step": 4285,
|
|
"valid_targets_mean": 3781.9,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 6.8203497615262325,
|
|
"grad_norm": 0.5317954705582983,
|
|
"learning_rate": 8.165537235398146e-08,
|
|
"loss": 0.1094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10373544692993164,
|
|
"step": 4290,
|
|
"valid_targets_mean": 3098.6,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 6.828298887122417,
|
|
"grad_norm": 0.46554008682031195,
|
|
"learning_rate": 7.465406004715903e-08,
|
|
"loss": 0.1124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11525758355855942,
|
|
"step": 4295,
|
|
"valid_targets_mean": 4444.8,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 6.836248012718601,
|
|
"grad_norm": 0.45846708598172736,
|
|
"learning_rate": 6.796594367272535e-08,
|
|
"loss": 0.1077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09519517421722412,
|
|
"step": 4300,
|
|
"valid_targets_mean": 3990.9,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 6.844197138314786,
|
|
"grad_norm": 0.542174177410134,
|
|
"learning_rate": 6.159112835763204e-08,
|
|
"loss": 0.1073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09634318947792053,
|
|
"step": 4305,
|
|
"valid_targets_mean": 3841.8,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 6.85214626391097,
|
|
"grad_norm": 0.5687137465211649,
|
|
"learning_rate": 5.552971430421439e-08,
|
|
"loss": 0.1136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11241281777620316,
|
|
"step": 4310,
|
|
"valid_targets_mean": 2990.4,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 6.860095389507154,
|
|
"grad_norm": 0.4978344358648119,
|
|
"learning_rate": 4.9781796788621605e-08,
|
|
"loss": 0.1135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11623929440975189,
|
|
"step": 4315,
|
|
"valid_targets_mean": 3618.5,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 6.868044515103339,
|
|
"grad_norm": 0.5297908608971749,
|
|
"learning_rate": 4.434746615932018e-08,
|
|
"loss": 0.123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11995728313922882,
|
|
"step": 4320,
|
|
"valid_targets_mean": 3083.2,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 6.875993640699523,
|
|
"grad_norm": 0.61054223355744,
|
|
"learning_rate": 3.922680783566168e-08,
|
|
"loss": 0.1114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11064666509628296,
|
|
"step": 4325,
|
|
"valid_targets_mean": 3176.7,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 6.883942766295707,
|
|
"grad_norm": 0.522298631432037,
|
|
"learning_rate": 3.441990230656167e-08,
|
|
"loss": 0.1108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11717766523361206,
|
|
"step": 4330,
|
|
"valid_targets_mean": 3661.9,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 6.891891891891892,
|
|
"grad_norm": 0.49498352702719756,
|
|
"learning_rate": 2.992682512921175e-08,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12047946453094482,
|
|
"step": 4335,
|
|
"valid_targets_mean": 3766.9,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 6.899841017488076,
|
|
"grad_norm": 0.5212865153023899,
|
|
"learning_rate": 2.574764692790499e-08,
|
|
"loss": 0.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12060309201478958,
|
|
"step": 4340,
|
|
"valid_targets_mean": 4045.0,
|
|
"valid_targets_min": 1740
|
|
},
|
|
{
|
|
"epoch": 6.907790143084261,
|
|
"grad_norm": 0.53810487124348,
|
|
"learning_rate": 2.188243339292795e-08,
|
|
"loss": 0.1131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11383530497550964,
|
|
"step": 4345,
|
|
"valid_targets_mean": 4394.4,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 6.915739268680445,
|
|
"grad_norm": 0.44962307621678405,
|
|
"learning_rate": 1.8331245279517017e-08,
|
|
"loss": 0.109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09446686506271362,
|
|
"step": 4350,
|
|
"valid_targets_mean": 4012.4,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 6.923688394276629,
|
|
"grad_norm": 0.5374076189204164,
|
|
"learning_rate": 1.509413840691476e-08,
|
|
"loss": 0.1113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11552655696868896,
|
|
"step": 4355,
|
|
"valid_targets_mean": 3191.7,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 6.9316375198728135,
|
|
"grad_norm": 0.53179013058553,
|
|
"learning_rate": 1.2171163657481722e-08,
|
|
"loss": 0.1229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1528531163930893,
|
|
"step": 4360,
|
|
"valid_targets_mean": 3466.8,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 6.9395866454689985,
|
|
"grad_norm": 0.5741198365523206,
|
|
"learning_rate": 9.562366975910397e-09,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1264297068119049,
|
|
"step": 4365,
|
|
"valid_targets_mean": 3148.3,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 6.947535771065183,
|
|
"grad_norm": 0.5093734832614514,
|
|
"learning_rate": 7.2677893684880425e-09,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12815840542316437,
|
|
"step": 4370,
|
|
"valid_targets_mean": 3770.9,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 6.955484896661368,
|
|
"grad_norm": 0.5049584504832715,
|
|
"learning_rate": 5.2874669024616246e-09,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1203048899769783,
|
|
"step": 4375,
|
|
"valid_targets_mean": 4105.9,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 6.963434022257552,
|
|
"grad_norm": 0.5411424955946904,
|
|
"learning_rate": 3.621430705467166e-09,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1432824283838272,
|
|
"step": 4380,
|
|
"valid_targets_mean": 3633.6,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 6.971383147853736,
|
|
"grad_norm": 0.508251429943314,
|
|
"learning_rate": 2.2697069650456927e-09,
|
|
"loss": 0.1183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11797773092985153,
|
|
"step": 4385,
|
|
"valid_targets_mean": 3701.4,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 6.979332273449921,
|
|
"grad_norm": 0.48126946316204494,
|
|
"learning_rate": 1.2323169282257852e-09,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12007807195186615,
|
|
"step": 4390,
|
|
"valid_targets_mean": 4153.2,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 6.987281399046105,
|
|
"grad_norm": 0.5042572172371226,
|
|
"learning_rate": 5.092769011860732e-10,
|
|
"loss": 0.1192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13380548357963562,
|
|
"step": 4395,
|
|
"valid_targets_mean": 3818.6,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 6.995230524642289,
|
|
"grad_norm": 0.5243147413835153,
|
|
"learning_rate": 1.0059824901098581e-10,
|
|
"loss": 0.106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10705508291721344,
|
|
"step": 4400,
|
|
"valid_targets_mean": 3303.4,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10637553036212921,
|
|
"step": 4403,
|
|
"total_flos": 1578618973913088.0,
|
|
"train_loss": 0.16220748680723057,
|
|
"train_runtime": 24708.9634,
|
|
"train_samples_per_second": 2.847,
|
|
"train_steps_per_second": 0.178,
|
|
"valid_targets_mean": 4107.2,
|
|
"valid_targets_min": 717
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4403,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1578618973913088.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|