4646 lines
129 KiB
JSON
4646 lines
129 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 2093,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.016722408026755852,
|
|
"grad_norm": 16.65469389530231,
|
|
"learning_rate": 7.61904761904762e-07,
|
|
"loss": 0.9217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4541673958301544,
|
|
"step": 5,
|
|
"valid_targets_mean": 4281.0,
|
|
"valid_targets_min": 2292
|
|
},
|
|
{
|
|
"epoch": 0.033444816053511704,
|
|
"grad_norm": 9.593711548751804,
|
|
"learning_rate": 1.7142857142857145e-06,
|
|
"loss": 0.854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.393045037984848,
|
|
"step": 10,
|
|
"valid_targets_mean": 4455.8,
|
|
"valid_targets_min": 1901
|
|
},
|
|
{
|
|
"epoch": 0.05016722408026756,
|
|
"grad_norm": 1.570100983978469,
|
|
"learning_rate": 2.666666666666667e-06,
|
|
"loss": 0.664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2813848555088043,
|
|
"step": 15,
|
|
"valid_targets_mean": 5071.8,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 0.06688963210702341,
|
|
"grad_norm": 0.84750094814492,
|
|
"learning_rate": 3.6190476190476194e-06,
|
|
"loss": 0.5871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2861042320728302,
|
|
"step": 20,
|
|
"valid_targets_mean": 5426.4,
|
|
"valid_targets_min": 2338
|
|
},
|
|
{
|
|
"epoch": 0.08361204013377926,
|
|
"grad_norm": 0.5078810147061814,
|
|
"learning_rate": 4.571428571428572e-06,
|
|
"loss": 0.5483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25342974066734314,
|
|
"step": 25,
|
|
"valid_targets_mean": 5109.5,
|
|
"valid_targets_min": 1972
|
|
},
|
|
{
|
|
"epoch": 0.10033444816053512,
|
|
"grad_norm": 0.4701383513775354,
|
|
"learning_rate": 5.523809523809525e-06,
|
|
"loss": 0.5212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26867642998695374,
|
|
"step": 30,
|
|
"valid_targets_mean": 6256.6,
|
|
"valid_targets_min": 3286
|
|
},
|
|
{
|
|
"epoch": 0.11705685618729098,
|
|
"grad_norm": 0.38536922214818786,
|
|
"learning_rate": 6.476190476190477e-06,
|
|
"loss": 0.4845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25451430678367615,
|
|
"step": 35,
|
|
"valid_targets_mean": 5565.3,
|
|
"valid_targets_min": 2259
|
|
},
|
|
{
|
|
"epoch": 0.13377926421404682,
|
|
"grad_norm": 0.33409826139153187,
|
|
"learning_rate": 7.428571428571429e-06,
|
|
"loss": 0.4788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2341545969247818,
|
|
"step": 40,
|
|
"valid_targets_mean": 5089.4,
|
|
"valid_targets_min": 2021
|
|
},
|
|
{
|
|
"epoch": 0.1505016722408027,
|
|
"grad_norm": 0.29402767250702627,
|
|
"learning_rate": 8.380952380952382e-06,
|
|
"loss": 0.4532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2262199968099594,
|
|
"step": 45,
|
|
"valid_targets_mean": 5502.6,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 0.16722408026755853,
|
|
"grad_norm": 0.2656148146704675,
|
|
"learning_rate": 9.333333333333334e-06,
|
|
"loss": 0.4412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24271585047245026,
|
|
"step": 50,
|
|
"valid_targets_mean": 5302.3,
|
|
"valid_targets_min": 2051
|
|
},
|
|
{
|
|
"epoch": 0.18394648829431437,
|
|
"grad_norm": 0.3204626482537776,
|
|
"learning_rate": 1.0285714285714285e-05,
|
|
"loss": 0.3948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16809582710266113,
|
|
"step": 55,
|
|
"valid_targets_mean": 6117.5,
|
|
"valid_targets_min": 3261
|
|
},
|
|
{
|
|
"epoch": 0.20066889632107024,
|
|
"grad_norm": 0.2812887163361065,
|
|
"learning_rate": 1.1238095238095239e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15198282897472382,
|
|
"step": 60,
|
|
"valid_targets_mean": 6000.6,
|
|
"valid_targets_min": 3132
|
|
},
|
|
{
|
|
"epoch": 0.21739130434782608,
|
|
"grad_norm": 0.29854600716101576,
|
|
"learning_rate": 1.2190476190476192e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15732724964618683,
|
|
"step": 65,
|
|
"valid_targets_mean": 6224.2,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 0.23411371237458195,
|
|
"grad_norm": 0.22782864305925915,
|
|
"learning_rate": 1.3142857142857145e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1558077484369278,
|
|
"step": 70,
|
|
"valid_targets_mean": 6605.7,
|
|
"valid_targets_min": 2342
|
|
},
|
|
{
|
|
"epoch": 0.2508361204013378,
|
|
"grad_norm": 0.21436743354828944,
|
|
"learning_rate": 1.4095238095238097e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1520961970090866,
|
|
"step": 75,
|
|
"valid_targets_mean": 6263.8,
|
|
"valid_targets_min": 3362
|
|
},
|
|
{
|
|
"epoch": 0.26755852842809363,
|
|
"grad_norm": 0.3788590808038562,
|
|
"learning_rate": 1.5047619047619049e-05,
|
|
"loss": 0.346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2589488923549652,
|
|
"step": 80,
|
|
"valid_targets_mean": 6788.9,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 0.2842809364548495,
|
|
"grad_norm": 0.28475255883147405,
|
|
"learning_rate": 1.6000000000000003e-05,
|
|
"loss": 0.4682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2424606829881668,
|
|
"step": 85,
|
|
"valid_targets_mean": 7302.9,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 0.3010033444816054,
|
|
"grad_norm": 0.2729199003649453,
|
|
"learning_rate": 1.6952380952380955e-05,
|
|
"loss": 0.4518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2599203884601593,
|
|
"step": 90,
|
|
"valid_targets_mean": 8350.1,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 0.3177257525083612,
|
|
"grad_norm": 0.25467178487656233,
|
|
"learning_rate": 1.7904761904761907e-05,
|
|
"loss": 0.4346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23396766185760498,
|
|
"step": 95,
|
|
"valid_targets_mean": 7376.1,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 0.33444816053511706,
|
|
"grad_norm": 0.31673188804013747,
|
|
"learning_rate": 1.885714285714286e-05,
|
|
"loss": 0.3054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12907277047634125,
|
|
"step": 100,
|
|
"valid_targets_mean": 6250.0,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 0.3511705685618729,
|
|
"grad_norm": 0.29233038275986123,
|
|
"learning_rate": 1.980952380952381e-05,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12832678854465485,
|
|
"step": 105,
|
|
"valid_targets_mean": 6208.8,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 0.36789297658862874,
|
|
"grad_norm": 0.2541036396766231,
|
|
"learning_rate": 2.0761904761904767e-05,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11488709598779678,
|
|
"step": 110,
|
|
"valid_targets_mean": 5629.8,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 0.38461538461538464,
|
|
"grad_norm": 0.2723708632885485,
|
|
"learning_rate": 2.1714285714285715e-05,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11113937944173813,
|
|
"step": 115,
|
|
"valid_targets_mean": 5685.6,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 0.4013377926421405,
|
|
"grad_norm": 0.22730208159217696,
|
|
"learning_rate": 2.2666666666666668e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11613597720861435,
|
|
"step": 120,
|
|
"valid_targets_mean": 6092.8,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 0.4180602006688963,
|
|
"grad_norm": 0.25419858920754484,
|
|
"learning_rate": 2.361904761904762e-05,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11525461077690125,
|
|
"step": 125,
|
|
"valid_targets_mean": 6219.2,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 0.43478260869565216,
|
|
"grad_norm": 0.26108118197754543,
|
|
"learning_rate": 2.4571428571428575e-05,
|
|
"loss": 0.224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11281055212020874,
|
|
"step": 130,
|
|
"valid_targets_mean": 5671.3,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 0.451505016722408,
|
|
"grad_norm": 0.1981415430994643,
|
|
"learning_rate": 2.5523809523809524e-05,
|
|
"loss": 0.2181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11217977851629257,
|
|
"step": 135,
|
|
"valid_targets_mean": 7040.9,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 0.4682274247491639,
|
|
"grad_norm": 0.25750793385224546,
|
|
"learning_rate": 2.6476190476190476e-05,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10468745231628418,
|
|
"step": 140,
|
|
"valid_targets_mean": 5457.4,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 0.48494983277591974,
|
|
"grad_norm": 0.6227727198825252,
|
|
"learning_rate": 2.742857142857143e-05,
|
|
"loss": 0.3323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23783187568187714,
|
|
"step": 145,
|
|
"valid_targets_mean": 4684.6,
|
|
"valid_targets_min": 2287
|
|
},
|
|
{
|
|
"epoch": 0.5016722408026756,
|
|
"grad_norm": 0.49941907240735156,
|
|
"learning_rate": 2.8380952380952384e-05,
|
|
"loss": 0.4493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23639976978302002,
|
|
"step": 150,
|
|
"valid_targets_mean": 4927.4,
|
|
"valid_targets_min": 1920
|
|
},
|
|
{
|
|
"epoch": 0.5183946488294314,
|
|
"grad_norm": 0.39796557153947,
|
|
"learning_rate": 2.9333333333333333e-05,
|
|
"loss": 0.4398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2140408754348755,
|
|
"step": 155,
|
|
"valid_targets_mean": 4733.8,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 0.5351170568561873,
|
|
"grad_norm": 0.37126621077721994,
|
|
"learning_rate": 3.0285714285714288e-05,
|
|
"loss": 0.4196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.207777738571167,
|
|
"step": 160,
|
|
"valid_targets_mean": 4550.0,
|
|
"valid_targets_min": 1614
|
|
},
|
|
{
|
|
"epoch": 0.5518394648829431,
|
|
"grad_norm": 0.30166803473742143,
|
|
"learning_rate": 3.1238095238095244e-05,
|
|
"loss": 0.4087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19754235446453094,
|
|
"step": 165,
|
|
"valid_targets_mean": 4728.0,
|
|
"valid_targets_min": 1837
|
|
},
|
|
{
|
|
"epoch": 0.568561872909699,
|
|
"grad_norm": 0.3408739294459517,
|
|
"learning_rate": 3.219047619047619e-05,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19394855201244354,
|
|
"step": 170,
|
|
"valid_targets_mean": 4336.6,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 0.5852842809364549,
|
|
"grad_norm": 0.31019048370693647,
|
|
"learning_rate": 3.314285714285715e-05,
|
|
"loss": 0.405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20203059911727905,
|
|
"step": 175,
|
|
"valid_targets_mean": 4800.6,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 0.6020066889632107,
|
|
"grad_norm": 0.3839714662821696,
|
|
"learning_rate": 3.40952380952381e-05,
|
|
"loss": 0.3867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16273044049739838,
|
|
"step": 180,
|
|
"valid_targets_mean": 4165.9,
|
|
"valid_targets_min": 2248
|
|
},
|
|
{
|
|
"epoch": 0.6187290969899666,
|
|
"grad_norm": 0.4107593736826153,
|
|
"learning_rate": 3.504761904761905e-05,
|
|
"loss": 0.3502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18431758880615234,
|
|
"step": 185,
|
|
"valid_targets_mean": 4880.2,
|
|
"valid_targets_min": 1768
|
|
},
|
|
{
|
|
"epoch": 0.6354515050167224,
|
|
"grad_norm": 0.34114230080891156,
|
|
"learning_rate": 3.6e-05,
|
|
"loss": 0.3371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1555314064025879,
|
|
"step": 190,
|
|
"valid_targets_mean": 4474.0,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 0.6521739130434783,
|
|
"grad_norm": 0.37969459969458824,
|
|
"learning_rate": 3.6952380952380956e-05,
|
|
"loss": 0.3298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1679573506116867,
|
|
"step": 195,
|
|
"valid_targets_mean": 4282.3,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 0.6688963210702341,
|
|
"grad_norm": 0.389114179637553,
|
|
"learning_rate": 3.7904761904761905e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1570255607366562,
|
|
"step": 200,
|
|
"valid_targets_mean": 4385.5,
|
|
"valid_targets_min": 2771
|
|
},
|
|
{
|
|
"epoch": 0.68561872909699,
|
|
"grad_norm": 0.38150130690993866,
|
|
"learning_rate": 3.885714285714286e-05,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15566571056842804,
|
|
"step": 205,
|
|
"valid_targets_mean": 4312.4,
|
|
"valid_targets_min": 1839
|
|
},
|
|
{
|
|
"epoch": 0.7023411371237458,
|
|
"grad_norm": 0.3822800230808161,
|
|
"learning_rate": 3.9809523809523816e-05,
|
|
"loss": 0.3204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16227751970291138,
|
|
"step": 210,
|
|
"valid_targets_mean": 4501.5,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 0.7190635451505016,
|
|
"grad_norm": 0.36852764382049585,
|
|
"learning_rate": 3.999955463357239e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16031832993030548,
|
|
"step": 215,
|
|
"valid_targets_mean": 4562.2,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 0.7357859531772575,
|
|
"grad_norm": 0.28587522453297876,
|
|
"learning_rate": 3.999774536645491e-05,
|
|
"loss": 0.314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1489599645137787,
|
|
"step": 220,
|
|
"valid_targets_mean": 4599.1,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 0.7525083612040134,
|
|
"grad_norm": 0.7017797603303964,
|
|
"learning_rate": 3.999454448905291e-05,
|
|
"loss": 0.5877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.250739723443985,
|
|
"step": 225,
|
|
"valid_targets_mean": 4988.7,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 0.7692307692307693,
|
|
"grad_norm": 0.435345805517371,
|
|
"learning_rate": 3.998995222411022e-05,
|
|
"loss": 0.3291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1361171454191208,
|
|
"step": 230,
|
|
"valid_targets_mean": 2713.3,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 0.7859531772575251,
|
|
"grad_norm": 0.7349361666208011,
|
|
"learning_rate": 3.9983968891194997e-05,
|
|
"loss": 0.6989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3085998594760895,
|
|
"step": 235,
|
|
"valid_targets_mean": 4844.5,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 0.802675585284281,
|
|
"grad_norm": 0.3866279789233474,
|
|
"learning_rate": 3.9976594906677575e-05,
|
|
"loss": 0.3249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16373255848884583,
|
|
"step": 240,
|
|
"valid_targets_mean": 4910.9,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 0.8193979933110368,
|
|
"grad_norm": 0.9140356340504202,
|
|
"learning_rate": 3.996783078370143e-05,
|
|
"loss": 0.6117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3634010851383209,
|
|
"step": 245,
|
|
"valid_targets_mean": 728.3,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 0.8361204013377926,
|
|
"grad_norm": 0.3349680479270844,
|
|
"learning_rate": 3.9957677132147496e-05,
|
|
"loss": 0.3343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16286388039588928,
|
|
"step": 250,
|
|
"valid_targets_mean": 4948.3,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 0.8528428093645485,
|
|
"grad_norm": 0.9495016677582169,
|
|
"learning_rate": 3.994613465859173e-05,
|
|
"loss": 0.5705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40535733103752136,
|
|
"step": 255,
|
|
"valid_targets_mean": 736.7,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 0.8695652173913043,
|
|
"grad_norm": 0.34347398365785503,
|
|
"learning_rate": 3.993320416625592e-05,
|
|
"loss": 0.3476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13200610876083374,
|
|
"step": 260,
|
|
"valid_targets_mean": 4206.8,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 0.8862876254180602,
|
|
"grad_norm": 0.8927378124011034,
|
|
"learning_rate": 3.991888655495181e-05,
|
|
"loss": 0.5929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3799232244491577,
|
|
"step": 265,
|
|
"valid_targets_mean": 769.7,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 0.903010033444816,
|
|
"grad_norm": 0.37028609234607096,
|
|
"learning_rate": 3.9903182821018484e-05,
|
|
"loss": 0.4121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17529194056987762,
|
|
"step": 270,
|
|
"valid_targets_mean": 5161.2,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 0.919732441471572,
|
|
"grad_norm": 1.197448383161608,
|
|
"learning_rate": 3.988609405725301e-05,
|
|
"loss": 0.5054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36851346492767334,
|
|
"step": 275,
|
|
"valid_targets_mean": 679.1,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 0.9364548494983278,
|
|
"grad_norm": 0.2958879336001699,
|
|
"learning_rate": 3.9867621452834413e-05,
|
|
"loss": 0.475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16947634518146515,
|
|
"step": 280,
|
|
"valid_targets_mean": 4858.9,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 0.9531772575250836,
|
|
"grad_norm": 1.4787304729940822,
|
|
"learning_rate": 3.984776629324093e-05,
|
|
"loss": 0.4096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41241344809532166,
|
|
"step": 285,
|
|
"valid_targets_mean": 669.9,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 0.9698996655518395,
|
|
"grad_norm": 0.3873562573360048,
|
|
"learning_rate": 3.982652996016053e-05,
|
|
"loss": 0.5384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1752433031797409,
|
|
"step": 290,
|
|
"valid_targets_mean": 4958.1,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 0.9866220735785953,
|
|
"grad_norm": 0.312381694758762,
|
|
"learning_rate": 3.9803913931394775e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15231667459011078,
|
|
"step": 295,
|
|
"valid_targets_mean": 2792.8,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 1.0033444816053512,
|
|
"grad_norm": 0.5948962191188035,
|
|
"learning_rate": 3.977991978075602e-05,
|
|
"loss": 0.6545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2730309069156647,
|
|
"step": 300,
|
|
"valid_targets_mean": 5056.6,
|
|
"valid_targets_min": 2086
|
|
},
|
|
{
|
|
"epoch": 1.020066889632107,
|
|
"grad_norm": 0.38921571773674396,
|
|
"learning_rate": 3.975454917795781e-05,
|
|
"loss": 0.4976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24437852203845978,
|
|
"step": 305,
|
|
"valid_targets_mean": 4833.2,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 1.0367892976588629,
|
|
"grad_norm": 0.30272526953365064,
|
|
"learning_rate": 3.972780388849878e-05,
|
|
"loss": 0.4666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21178661286830902,
|
|
"step": 310,
|
|
"valid_targets_mean": 4094.9,
|
|
"valid_targets_min": 2016
|
|
},
|
|
{
|
|
"epoch": 1.0535117056856187,
|
|
"grad_norm": 0.2699551855197249,
|
|
"learning_rate": 3.9699685773539715e-05,
|
|
"loss": 0.4113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21475844085216522,
|
|
"step": 315,
|
|
"valid_targets_mean": 5057.2,
|
|
"valid_targets_min": 1996
|
|
},
|
|
{
|
|
"epoch": 1.0702341137123745,
|
|
"grad_norm": 0.21893353596472484,
|
|
"learning_rate": 3.967019678977411e-05,
|
|
"loss": 0.3813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19509156048297882,
|
|
"step": 320,
|
|
"valid_targets_mean": 5574.8,
|
|
"valid_targets_min": 2361
|
|
},
|
|
{
|
|
"epoch": 1.0869565217391304,
|
|
"grad_norm": 0.21729025220281234,
|
|
"learning_rate": 3.963933898929195e-05,
|
|
"loss": 0.3789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18215352296829224,
|
|
"step": 325,
|
|
"valid_targets_mean": 4868.2,
|
|
"valid_targets_min": 2343
|
|
},
|
|
{
|
|
"epoch": 1.1036789297658862,
|
|
"grad_norm": 0.1917178342232316,
|
|
"learning_rate": 3.9607114519436945e-05,
|
|
"loss": 0.3675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18778294324874878,
|
|
"step": 330,
|
|
"valid_targets_mean": 5462.7,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 1.120401337792642,
|
|
"grad_norm": 0.21079187892149256,
|
|
"learning_rate": 3.9573525622657055e-05,
|
|
"loss": 0.3605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1816849261522293,
|
|
"step": 335,
|
|
"valid_targets_mean": 4551.1,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 1.137123745819398,
|
|
"grad_norm": 0.20247511642068503,
|
|
"learning_rate": 3.9538574636348494e-05,
|
|
"loss": 0.3598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1871730089187622,
|
|
"step": 340,
|
|
"valid_targets_mean": 5626.3,
|
|
"valid_targets_min": 1920
|
|
},
|
|
{
|
|
"epoch": 1.1538461538461537,
|
|
"grad_norm": 0.1964361499904833,
|
|
"learning_rate": 3.950226399269305e-05,
|
|
"loss": 0.3461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18824917078018188,
|
|
"step": 345,
|
|
"valid_targets_mean": 6059.1,
|
|
"valid_targets_min": 2069
|
|
},
|
|
{
|
|
"epoch": 1.1705685618729098,
|
|
"grad_norm": 0.20328519707961729,
|
|
"learning_rate": 3.946459621848883e-05,
|
|
"loss": 0.3469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16231679916381836,
|
|
"step": 350,
|
|
"valid_targets_mean": 5260.9,
|
|
"valid_targets_min": 2059
|
|
},
|
|
{
|
|
"epoch": 1.1872909698996654,
|
|
"grad_norm": 0.2214119309392654,
|
|
"learning_rate": 3.9425573934974425e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13498373329639435,
|
|
"step": 355,
|
|
"valid_targets_mean": 6925.2,
|
|
"valid_targets_min": 3127
|
|
},
|
|
{
|
|
"epoch": 1.2040133779264215,
|
|
"grad_norm": 0.2033004885946869,
|
|
"learning_rate": 3.938519985764653e-05,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12404614686965942,
|
|
"step": 360,
|
|
"valid_targets_mean": 6540.8,
|
|
"valid_targets_min": 3272
|
|
},
|
|
{
|
|
"epoch": 1.2207357859531773,
|
|
"grad_norm": 0.16858665820396848,
|
|
"learning_rate": 3.93434767960709e-05,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11013186722993851,
|
|
"step": 365,
|
|
"valid_targets_mean": 6424.6,
|
|
"valid_targets_min": 3413
|
|
},
|
|
{
|
|
"epoch": 1.2374581939799332,
|
|
"grad_norm": 0.1813400664322866,
|
|
"learning_rate": 3.930040765368695e-05,
|
|
"loss": 0.2398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11691004782915115,
|
|
"step": 370,
|
|
"valid_targets_mean": 6344.0,
|
|
"valid_targets_min": 3854
|
|
},
|
|
{
|
|
"epoch": 1.254180602006689,
|
|
"grad_norm": 0.17239157532788563,
|
|
"learning_rate": 3.9255995427605615e-05,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11582476645708084,
|
|
"step": 375,
|
|
"valid_targets_mean": 6136.4,
|
|
"valid_targets_min": 2589
|
|
},
|
|
{
|
|
"epoch": 1.2709030100334449,
|
|
"grad_norm": 0.19587836745577725,
|
|
"learning_rate": 3.9210243208400826e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2069324404001236,
|
|
"step": 380,
|
|
"valid_targets_mean": 7202.7,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 1.2876254180602007,
|
|
"grad_norm": 0.2042538628036545,
|
|
"learning_rate": 3.9163154179894455e-05,
|
|
"loss": 0.3901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21518640220165253,
|
|
"step": 385,
|
|
"valid_targets_mean": 7978.9,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 1.3043478260869565,
|
|
"grad_norm": 0.19036800922991576,
|
|
"learning_rate": 3.9114731618934704e-05,
|
|
"loss": 0.3787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1918071061372757,
|
|
"step": 390,
|
|
"valid_targets_mean": 7635.1,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 1.3210702341137124,
|
|
"grad_norm": 0.19179258334792504,
|
|
"learning_rate": 3.906497889516815e-05,
|
|
"loss": 0.3728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16743119060993195,
|
|
"step": 395,
|
|
"valid_targets_mean": 6776.2,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 1.3377926421404682,
|
|
"grad_norm": 0.22710264992687623,
|
|
"learning_rate": 3.901389947080518e-05,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10678008943796158,
|
|
"step": 400,
|
|
"valid_targets_mean": 6226.6,
|
|
"valid_targets_min": 1382
|
|
},
|
|
{
|
|
"epoch": 1.354515050167224,
|
|
"grad_norm": 0.20953843167123715,
|
|
"learning_rate": 3.896149690037912e-05,
|
|
"loss": 0.2041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10037939995527267,
|
|
"step": 405,
|
|
"valid_targets_mean": 6309.2,
|
|
"valid_targets_min": 2073
|
|
},
|
|
{
|
|
"epoch": 1.37123745819398,
|
|
"grad_norm": 0.2105128619910076,
|
|
"learning_rate": 3.890777483049887e-05,
|
|
"loss": 0.1983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09858713299036026,
|
|
"step": 410,
|
|
"valid_targets_mean": 5741.7,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 1.3879598662207357,
|
|
"grad_norm": 0.2016867764429436,
|
|
"learning_rate": 3.88527369995951e-05,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09473591297864914,
|
|
"step": 415,
|
|
"valid_targets_mean": 5660.9,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 1.4046822742474916,
|
|
"grad_norm": 0.18171828963973438,
|
|
"learning_rate": 3.8796387237660157e-05,
|
|
"loss": 0.1886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09629547595977783,
|
|
"step": 420,
|
|
"valid_targets_mean": 5903.8,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 1.4214046822742474,
|
|
"grad_norm": 0.1809288529713271,
|
|
"learning_rate": 3.8738729465981496e-05,
|
|
"loss": 0.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09508106857538223,
|
|
"step": 425,
|
|
"valid_targets_mean": 6225.6,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 1.4381270903010033,
|
|
"grad_norm": 0.18193132566668244,
|
|
"learning_rate": 3.8679767696868844e-05,
|
|
"loss": 0.1855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0870060920715332,
|
|
"step": 430,
|
|
"valid_targets_mean": 5885.9,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 1.4548494983277591,
|
|
"grad_norm": 0.18537658312880498,
|
|
"learning_rate": 3.8619506033374956e-05,
|
|
"loss": 0.1868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09733594208955765,
|
|
"step": 435,
|
|
"valid_targets_mean": 6663.5,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 1.471571906354515,
|
|
"grad_norm": 0.20443477400285728,
|
|
"learning_rate": 3.855794866901011e-05,
|
|
"loss": 0.186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09635645896196365,
|
|
"step": 440,
|
|
"valid_targets_mean": 6012.0,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 1.488294314381271,
|
|
"grad_norm": 0.6900706239757075,
|
|
"learning_rate": 3.849509988745028e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17612408101558685,
|
|
"step": 445,
|
|
"valid_targets_mean": 4919.0,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 1.5050167224080266,
|
|
"grad_norm": 0.3885784985540023,
|
|
"learning_rate": 3.843096406223903e-05,
|
|
"loss": 0.3485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18202216923236847,
|
|
"step": 450,
|
|
"valid_targets_mean": 5070.2,
|
|
"valid_targets_min": 2026
|
|
},
|
|
{
|
|
"epoch": 1.5217391304347827,
|
|
"grad_norm": 0.3267311956598327,
|
|
"learning_rate": 3.836554565648321e-05,
|
|
"loss": 0.3429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15799282491207123,
|
|
"step": 455,
|
|
"valid_targets_mean": 4403.6,
|
|
"valid_targets_min": 2244
|
|
},
|
|
{
|
|
"epoch": 1.5384615384615383,
|
|
"grad_norm": 0.30478812314393505,
|
|
"learning_rate": 3.8298849222542315e-05,
|
|
"loss": 0.3348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16468192636966705,
|
|
"step": 460,
|
|
"valid_targets_mean": 4542.4,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 1.5551839464882944,
|
|
"grad_norm": 0.29519322395234937,
|
|
"learning_rate": 3.8230879401711734e-05,
|
|
"loss": 0.3212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16495239734649658,
|
|
"step": 465,
|
|
"valid_targets_mean": 4503.7,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 1.57190635451505,
|
|
"grad_norm": 0.27399843731267437,
|
|
"learning_rate": 3.816164092389977e-05,
|
|
"loss": 0.321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1542332023382187,
|
|
"step": 470,
|
|
"valid_targets_mean": 4398.6,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 1.588628762541806,
|
|
"grad_norm": 0.27297396614450214,
|
|
"learning_rate": 3.809113860729849e-05,
|
|
"loss": 0.3219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1487680822610855,
|
|
"step": 475,
|
|
"valid_targets_mean": 4617.8,
|
|
"valid_targets_min": 1769
|
|
},
|
|
{
|
|
"epoch": 1.605351170568562,
|
|
"grad_norm": 0.3128606762569062,
|
|
"learning_rate": 3.801937735804838e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14868587255477905,
|
|
"step": 480,
|
|
"valid_targets_mean": 4698.9,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 1.6220735785953178,
|
|
"grad_norm": 0.32206062182647344,
|
|
"learning_rate": 3.794636216989705e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12840931117534637,
|
|
"step": 485,
|
|
"valid_targets_mean": 4525.5,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 1.6387959866220736,
|
|
"grad_norm": 0.3455642904980467,
|
|
"learning_rate": 3.787209812385161e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1406206637620926,
|
|
"step": 490,
|
|
"valid_targets_mean": 4563.4,
|
|
"valid_targets_min": 2503
|
|
},
|
|
{
|
|
"epoch": 1.6555183946488294,
|
|
"grad_norm": 0.40105956618378386,
|
|
"learning_rate": 3.779659038782517e-05,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1317865252494812,
|
|
"step": 495,
|
|
"valid_targets_mean": 4560.3,
|
|
"valid_targets_min": 3580
|
|
},
|
|
{
|
|
"epoch": 1.6722408026755853,
|
|
"grad_norm": 0.3258999220225129,
|
|
"learning_rate": 3.771984421627717e-05,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1362745612859726,
|
|
"step": 500,
|
|
"valid_targets_mean": 4502.5,
|
|
"valid_targets_min": 1623
|
|
},
|
|
{
|
|
"epoch": 1.6889632107023411,
|
|
"grad_norm": 0.3752769496609806,
|
|
"learning_rate": 3.764186494984775e-05,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.128464937210083,
|
|
"step": 505,
|
|
"valid_targets_mean": 4578.2,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 1.705685618729097,
|
|
"grad_norm": 0.3121498734366986,
|
|
"learning_rate": 3.75626580149861e-05,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12992092967033386,
|
|
"step": 510,
|
|
"valid_targets_mean": 4561.1,
|
|
"valid_targets_min": 2619
|
|
},
|
|
{
|
|
"epoch": 1.7224080267558528,
|
|
"grad_norm": 0.5595878804696304,
|
|
"learning_rate": 3.748222892357284e-05,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14274828135967255,
|
|
"step": 515,
|
|
"valid_targets_mean": 5120.4,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 1.7391304347826086,
|
|
"grad_norm": 0.3265817128388542,
|
|
"learning_rate": 3.7400583272536465e-05,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09950075298547745,
|
|
"step": 520,
|
|
"valid_targets_mean": 717.9,
|
|
"valid_targets_min": 153
|
|
},
|
|
{
|
|
"epoch": 1.7558528428093645,
|
|
"grad_norm": 0.5675267325833321,
|
|
"learning_rate": 3.731772674346385e-05,
|
|
"loss": 0.3733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13803930580615997,
|
|
"step": 525,
|
|
"valid_targets_mean": 5254.4,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 1.7725752508361206,
|
|
"grad_norm": 1.5124573377521735,
|
|
"learning_rate": 3.723366510220489e-05,
|
|
"loss": 0.3494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3287752568721771,
|
|
"step": 530,
|
|
"valid_targets_mean": 689.2,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 1.7892976588628762,
|
|
"grad_norm": 0.4545429910503294,
|
|
"learning_rate": 3.714840419847126e-05,
|
|
"loss": 0.4001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15079976618289948,
|
|
"step": 535,
|
|
"valid_targets_mean": 4931.9,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 1.8060200668896322,
|
|
"grad_norm": 0.3052044654753761,
|
|
"learning_rate": 3.7061949965429335e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14627830684185028,
|
|
"step": 540,
|
|
"valid_targets_mean": 4745.1,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 1.8227424749163879,
|
|
"grad_norm": 0.602161135060335,
|
|
"learning_rate": 3.697430841928733e-05,
|
|
"loss": 0.4611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19379179179668427,
|
|
"step": 545,
|
|
"valid_targets_mean": 4869.9,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 1.839464882943144,
|
|
"grad_norm": 0.3729176823092706,
|
|
"learning_rate": 3.6885485658876616e-05,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1470441073179245,
|
|
"step": 550,
|
|
"valid_targets_mean": 4980.0,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 1.8561872909698995,
|
|
"grad_norm": 0.5233193913553243,
|
|
"learning_rate": 3.6795487865227345e-05,
|
|
"loss": 0.4493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2374511957168579,
|
|
"step": 555,
|
|
"valid_targets_mean": 2875.1,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 1.8729096989966556,
|
|
"grad_norm": 0.340377883110045,
|
|
"learning_rate": 3.6704321301138296e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14260174334049225,
|
|
"step": 560,
|
|
"valid_targets_mean": 4406.9,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 1.8896321070234112,
|
|
"grad_norm": 0.6702280613849935,
|
|
"learning_rate": 3.6611992310741056e-05,
|
|
"loss": 0.4836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24722188711166382,
|
|
"step": 565,
|
|
"valid_targets_mean": 731.0,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 1.9063545150501673,
|
|
"grad_norm": 0.3176467670477657,
|
|
"learning_rate": 3.651850731905856e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1421656757593155,
|
|
"step": 570,
|
|
"valid_targets_mean": 5006.6,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 1.9230769230769231,
|
|
"grad_norm": 0.7293779586925504,
|
|
"learning_rate": 3.642387283155797e-05,
|
|
"loss": 0.4605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2578056752681732,
|
|
"step": 575,
|
|
"valid_targets_mean": 746.1,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 1.939799331103679,
|
|
"grad_norm": 0.26805611729698886,
|
|
"learning_rate": 3.632809543369798e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14181950688362122,
|
|
"step": 580,
|
|
"valid_targets_mean": 4372.3,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 1.9565217391304348,
|
|
"grad_norm": 1.1674722707679637,
|
|
"learning_rate": 3.6231181790470555e-05,
|
|
"loss": 0.4151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27499672770500183,
|
|
"step": 585,
|
|
"valid_targets_mean": 684.0,
|
|
"valid_targets_min": 158
|
|
},
|
|
{
|
|
"epoch": 1.9732441471571907,
|
|
"grad_norm": 0.3367419096913946,
|
|
"learning_rate": 3.613313864593708e-05,
|
|
"loss": 0.3716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1283898502588272,
|
|
"step": 590,
|
|
"valid_targets_mean": 4803.8,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 1.9899665551839465,
|
|
"grad_norm": 2.15431163354833,
|
|
"learning_rate": 3.6033972822759095e-05,
|
|
"loss": 0.3568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3071735203266144,
|
|
"step": 595,
|
|
"valid_targets_mean": 638.7,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 2.0066889632107023,
|
|
"grad_norm": 0.44330605535056045,
|
|
"learning_rate": 3.5933691221723525e-05,
|
|
"loss": 0.4513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2030048817396164,
|
|
"step": 600,
|
|
"valid_targets_mean": 4360.0,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 2.0234113712374584,
|
|
"grad_norm": 0.36351840961425325,
|
|
"learning_rate": 3.5832300821262416e-05,
|
|
"loss": 0.3896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1920592039823532,
|
|
"step": 605,
|
|
"valid_targets_mean": 4426.5,
|
|
"valid_targets_min": 2155
|
|
},
|
|
{
|
|
"epoch": 2.040133779264214,
|
|
"grad_norm": 0.3282830998305939,
|
|
"learning_rate": 3.572980867696735e-05,
|
|
"loss": 0.3641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17049561440944672,
|
|
"step": 610,
|
|
"valid_targets_mean": 5113.0,
|
|
"valid_targets_min": 1992
|
|
},
|
|
{
|
|
"epoch": 2.05685618729097,
|
|
"grad_norm": 0.22781340044070733,
|
|
"learning_rate": 3.562622192109848e-05,
|
|
"loss": 0.3313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15022476017475128,
|
|
"step": 615,
|
|
"valid_targets_mean": 5511.5,
|
|
"valid_targets_min": 2217
|
|
},
|
|
{
|
|
"epoch": 2.0735785953177257,
|
|
"grad_norm": 0.22041188225749428,
|
|
"learning_rate": 3.5521547762088164e-05,
|
|
"loss": 0.3187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16398446261882782,
|
|
"step": 620,
|
|
"valid_targets_mean": 5144.7,
|
|
"valid_targets_min": 2132
|
|
},
|
|
{
|
|
"epoch": 2.0903010033444818,
|
|
"grad_norm": 0.2057113626129649,
|
|
"learning_rate": 3.541579348403935e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14786700904369354,
|
|
"step": 625,
|
|
"valid_targets_mean": 5073.6,
|
|
"valid_targets_min": 2153
|
|
},
|
|
{
|
|
"epoch": 2.1070234113712374,
|
|
"grad_norm": 0.2036725518881729,
|
|
"learning_rate": 3.530896644621871e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15274439752101898,
|
|
"step": 630,
|
|
"valid_targets_mean": 5383.8,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 2.1237458193979935,
|
|
"grad_norm": 0.20466937311138828,
|
|
"learning_rate": 3.520107408254451e-05,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13721401989459991,
|
|
"step": 635,
|
|
"valid_targets_mean": 4890.5,
|
|
"valid_targets_min": 1777
|
|
},
|
|
{
|
|
"epoch": 2.140468227424749,
|
|
"grad_norm": 0.20020962832894687,
|
|
"learning_rate": 3.50921239010693e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15264341235160828,
|
|
"step": 640,
|
|
"valid_targets_mean": 5238.5,
|
|
"valid_targets_min": 1745
|
|
},
|
|
{
|
|
"epoch": 2.157190635451505,
|
|
"grad_norm": 0.19000199622533884,
|
|
"learning_rate": 3.4982123483457405e-05,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13296349346637726,
|
|
"step": 645,
|
|
"valid_targets_mean": 5576.4,
|
|
"valid_targets_min": 1934
|
|
},
|
|
{
|
|
"epoch": 2.1739130434782608,
|
|
"grad_norm": 0.20276515096848405,
|
|
"learning_rate": 3.4871080484457395e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1579429805278778,
|
|
"step": 650,
|
|
"valid_targets_mean": 5300.1,
|
|
"valid_targets_min": 2036
|
|
},
|
|
{
|
|
"epoch": 2.190635451505017,
|
|
"grad_norm": 0.22148820248130507,
|
|
"learning_rate": 3.4759002631369356e-05,
|
|
"loss": 0.2167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09339672327041626,
|
|
"step": 655,
|
|
"valid_targets_mean": 6081.1,
|
|
"valid_targets_min": 3341
|
|
},
|
|
{
|
|
"epoch": 2.2073578595317724,
|
|
"grad_norm": 0.21073543865295008,
|
|
"learning_rate": 3.4645897723507156e-05,
|
|
"loss": 0.2052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09440391510725021,
|
|
"step": 660,
|
|
"valid_targets_mean": 6194.4,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 2.2240802675585285,
|
|
"grad_norm": 0.1957148496929314,
|
|
"learning_rate": 3.4531773631655745e-05,
|
|
"loss": 0.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10372451692819595,
|
|
"step": 665,
|
|
"valid_targets_mean": 7182.5,
|
|
"valid_targets_min": 3266
|
|
},
|
|
{
|
|
"epoch": 2.240802675585284,
|
|
"grad_norm": 0.2201334432956784,
|
|
"learning_rate": 3.4416638297523395e-05,
|
|
"loss": 0.1875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09747294336557388,
|
|
"step": 670,
|
|
"valid_targets_mean": 7253.9,
|
|
"valid_targets_min": 2166
|
|
},
|
|
{
|
|
"epoch": 2.25752508361204,
|
|
"grad_norm": 0.18483043482429706,
|
|
"learning_rate": 3.430049973318907e-05,
|
|
"loss": 0.1908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09588015824556351,
|
|
"step": 675,
|
|
"valid_targets_mean": 6681.4,
|
|
"valid_targets_min": 3666
|
|
},
|
|
{
|
|
"epoch": 2.274247491638796,
|
|
"grad_norm": 0.210656387754631,
|
|
"learning_rate": 3.4183366020544895e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1745520979166031,
|
|
"step": 680,
|
|
"valid_targets_mean": 6956.1,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 2.290969899665552,
|
|
"grad_norm": 0.19961717806368412,
|
|
"learning_rate": 3.4065245310733715e-05,
|
|
"loss": 0.334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1634805053472519,
|
|
"step": 685,
|
|
"valid_targets_mean": 7114.2,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 2.3076923076923075,
|
|
"grad_norm": 0.23316580081649804,
|
|
"learning_rate": 3.394614582358189e-05,
|
|
"loss": 0.3259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17267517745494843,
|
|
"step": 690,
|
|
"valid_targets_mean": 7117.8,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 2.3244147157190636,
|
|
"grad_norm": 0.3010840545321891,
|
|
"learning_rate": 3.3826075847027306e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08766163140535355,
|
|
"step": 695,
|
|
"valid_targets_mean": 6015.6,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 2.3411371237458196,
|
|
"grad_norm": 0.2814795279240952,
|
|
"learning_rate": 3.370504373654259e-05,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08037657290697098,
|
|
"step": 700,
|
|
"valid_targets_mean": 5990.2,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 2.3578595317725752,
|
|
"grad_norm": 0.23510298186866793,
|
|
"learning_rate": 3.358305791455371e-05,
|
|
"loss": 0.1602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07550784200429916,
|
|
"step": 705,
|
|
"valid_targets_mean": 5787.7,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 2.374581939799331,
|
|
"grad_norm": 0.20800639121912567,
|
|
"learning_rate": 3.3460126869853824e-05,
|
|
"loss": 0.1547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07413359731435776,
|
|
"step": 710,
|
|
"valid_targets_mean": 6570.8,
|
|
"valid_targets_min": 1213
|
|
},
|
|
{
|
|
"epoch": 2.391304347826087,
|
|
"grad_norm": 0.19167701910450277,
|
|
"learning_rate": 3.333625915701263e-05,
|
|
"loss": 0.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07030653953552246,
|
|
"step": 715,
|
|
"valid_targets_mean": 6088.4,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 2.408026755852843,
|
|
"grad_norm": 0.2087287620285402,
|
|
"learning_rate": 3.3211463395780994e-05,
|
|
"loss": 0.1472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07316835969686508,
|
|
"step": 720,
|
|
"valid_targets_mean": 6382.9,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 2.4247491638795986,
|
|
"grad_norm": 0.1918675549640399,
|
|
"learning_rate": 3.3085748270491154e-05,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07695066928863525,
|
|
"step": 725,
|
|
"valid_targets_mean": 6330.3,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 2.4414715719063547,
|
|
"grad_norm": 0.21291899588138824,
|
|
"learning_rate": 3.2959122529452385e-05,
|
|
"loss": 0.1455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07570717483758926,
|
|
"step": 730,
|
|
"valid_targets_mean": 6307.4,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 2.4581939799331103,
|
|
"grad_norm": 0.1992039563986723,
|
|
"learning_rate": 3.283159498434222e-05,
|
|
"loss": 0.15,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07548505067825317,
|
|
"step": 735,
|
|
"valid_targets_mean": 6431.6,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 2.4749163879598663,
|
|
"grad_norm": 0.227273095203105,
|
|
"learning_rate": 3.270317450959327e-05,
|
|
"loss": 0.146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07153384387493134,
|
|
"step": 740,
|
|
"valid_targets_mean": 5631.5,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 2.491638795986622,
|
|
"grad_norm": 0.36461991973033014,
|
|
"learning_rate": 3.2573870041775605e-05,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13246971368789673,
|
|
"step": 745,
|
|
"valid_targets_mean": 4485.4,
|
|
"valid_targets_min": 1841
|
|
},
|
|
{
|
|
"epoch": 2.508361204013378,
|
|
"grad_norm": 0.4368595090150272,
|
|
"learning_rate": 3.244369057897499e-05,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12643788754940033,
|
|
"step": 750,
|
|
"valid_targets_mean": 4666.4,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 2.5250836120401337,
|
|
"grad_norm": 0.43022834068848426,
|
|
"learning_rate": 3.2312645180166594e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13408567011356354,
|
|
"step": 755,
|
|
"valid_targets_mean": 5017.3,
|
|
"valid_targets_min": 2363
|
|
},
|
|
{
|
|
"epoch": 2.5418060200668897,
|
|
"grad_norm": 0.3205636021737139,
|
|
"learning_rate": 3.21807429645847e-05,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11894502490758896,
|
|
"step": 760,
|
|
"valid_targets_mean": 4183.5,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 2.5585284280936453,
|
|
"grad_norm": 0.3517141085896595,
|
|
"learning_rate": 3.204799311108804e-05,
|
|
"loss": 0.2453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1104094386100769,
|
|
"step": 765,
|
|
"valid_targets_mean": 4283.4,
|
|
"valid_targets_min": 1979
|
|
},
|
|
{
|
|
"epoch": 2.5752508361204014,
|
|
"grad_norm": 0.337517465766499,
|
|
"learning_rate": 3.191440485752107e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1099214181303978,
|
|
"step": 770,
|
|
"valid_targets_mean": 3987.4,
|
|
"valid_targets_min": 1948
|
|
},
|
|
{
|
|
"epoch": 2.591973244147157,
|
|
"grad_norm": 0.32119040206162525,
|
|
"learning_rate": 3.1779987500071164e-05,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13091079890727997,
|
|
"step": 775,
|
|
"valid_targets_mean": 4888.4,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 2.608695652173913,
|
|
"grad_norm": 0.2783326435164322,
|
|
"learning_rate": 3.1644750392621634e-05,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10899200290441513,
|
|
"step": 780,
|
|
"valid_targets_mean": 4523.4,
|
|
"valid_targets_min": 2893
|
|
},
|
|
{
|
|
"epoch": 2.625418060200669,
|
|
"grad_norm": 0.3442451875052845,
|
|
"learning_rate": 3.150870294610087e-05,
|
|
"loss": 0.2161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11050713062286377,
|
|
"step": 785,
|
|
"valid_targets_mean": 4427.6,
|
|
"valid_targets_min": 2169
|
|
},
|
|
{
|
|
"epoch": 2.6421404682274248,
|
|
"grad_norm": 0.4774968320705788,
|
|
"learning_rate": 3.137185462782744e-05,
|
|
"loss": 0.2136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10765110701322556,
|
|
"step": 790,
|
|
"valid_targets_mean": 4546.0,
|
|
"valid_targets_min": 3727
|
|
},
|
|
{
|
|
"epoch": 2.6588628762541804,
|
|
"grad_norm": 0.3647466799162243,
|
|
"learning_rate": 3.1234214960851236e-05,
|
|
"loss": 0.2078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09761184453964233,
|
|
"step": 795,
|
|
"valid_targets_mean": 4309.6,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 2.6755852842809364,
|
|
"grad_norm": 0.2987685668941293,
|
|
"learning_rate": 3.1095793523290825e-05,
|
|
"loss": 0.204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10194583982229233,
|
|
"step": 800,
|
|
"valid_targets_mean": 4764.8,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 2.6923076923076925,
|
|
"grad_norm": 0.30481754920393417,
|
|
"learning_rate": 3.095659994766691e-05,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09286397695541382,
|
|
"step": 805,
|
|
"valid_targets_mean": 4231.1,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 2.709030100334448,
|
|
"grad_norm": 0.37844360697995544,
|
|
"learning_rate": 3.0816643920232e-05,
|
|
"loss": 0.2126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11299896985292435,
|
|
"step": 810,
|
|
"valid_targets_mean": 4443.2,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 2.7257525083612038,
|
|
"grad_norm": 0.21622121491262486,
|
|
"learning_rate": 3.0675935180296375e-05,
|
|
"loss": 0.2179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1209292933344841,
|
|
"step": 815,
|
|
"valid_targets_mean": 5116.7,
|
|
"valid_targets_min": 396
|
|
},
|
|
{
|
|
"epoch": 2.74247491638796,
|
|
"grad_norm": 0.7617613264655086,
|
|
"learning_rate": 3.053448351955036e-05,
|
|
"loss": 0.2229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1357937455177307,
|
|
"step": 820,
|
|
"valid_targets_mean": 678.9,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 2.759197324414716,
|
|
"grad_norm": 0.6412500416085034,
|
|
"learning_rate": 3.0392298781382918e-05,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13528214395046234,
|
|
"step": 825,
|
|
"valid_targets_mean": 5045.1,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 2.7759197324414715,
|
|
"grad_norm": 0.744391863791493,
|
|
"learning_rate": 3.024939086019664e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1381472945213318,
|
|
"step": 830,
|
|
"valid_targets_mean": 677.9,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 2.7926421404682276,
|
|
"grad_norm": 0.3546929694137922,
|
|
"learning_rate": 3.0105769700719255e-05,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1281571090221405,
|
|
"step": 835,
|
|
"valid_targets_mean": 4906.8,
|
|
"valid_targets_min": 2644
|
|
},
|
|
{
|
|
"epoch": 2.809364548494983,
|
|
"grad_norm": 0.6325706043429706,
|
|
"learning_rate": 2.9961445297311597e-05,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12168969959020615,
|
|
"step": 840,
|
|
"valid_targets_mean": 664.2,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 2.8260869565217392,
|
|
"grad_norm": 0.42510351874193775,
|
|
"learning_rate": 2.9816427693272044e-05,
|
|
"loss": 0.3101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11830226331949234,
|
|
"step": 845,
|
|
"valid_targets_mean": 4205.6,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 2.842809364548495,
|
|
"grad_norm": 0.27335649239404725,
|
|
"learning_rate": 2.9670726980137695e-05,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11671405285596848,
|
|
"step": 850,
|
|
"valid_targets_mean": 4656.9,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 2.859531772575251,
|
|
"grad_norm": 0.4125244847863993,
|
|
"learning_rate": 2.9524353296982104e-05,
|
|
"loss": 0.3338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12057509273290634,
|
|
"step": 855,
|
|
"valid_targets_mean": 4234.8,
|
|
"valid_targets_min": 358
|
|
},
|
|
{
|
|
"epoch": 2.8762541806020065,
|
|
"grad_norm": 0.3077713827918398,
|
|
"learning_rate": 2.9377316829709675e-05,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08487764745950699,
|
|
"step": 860,
|
|
"valid_targets_mean": 1557.8,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 2.8929765886287626,
|
|
"grad_norm": 0.5085424033939305,
|
|
"learning_rate": 2.9229627810346882e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15053991973400116,
|
|
"step": 865,
|
|
"valid_targets_mean": 4253.9,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 2.9096989966555182,
|
|
"grad_norm": 0.3019343753323513,
|
|
"learning_rate": 2.908129651633024e-05,
|
|
"loss": 0.2477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11827830225229263,
|
|
"step": 870,
|
|
"valid_targets_mean": 4932.3,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 2.9264214046822743,
|
|
"grad_norm": 0.5854816841905488,
|
|
"learning_rate": 2.8932333269791094e-05,
|
|
"loss": 0.3338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15972377359867096,
|
|
"step": 875,
|
|
"valid_targets_mean": 733.3,
|
|
"valid_targets_min": 404
|
|
},
|
|
{
|
|
"epoch": 2.94314381270903,
|
|
"grad_norm": 0.296472067853378,
|
|
"learning_rate": 2.8782748436837316e-05,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12671761214733124,
|
|
"step": 880,
|
|
"valid_targets_mean": 4460.5,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 2.959866220735786,
|
|
"grad_norm": 0.6395374434495776,
|
|
"learning_rate": 2.8632552426831974e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16522859036922455,
|
|
"step": 885,
|
|
"valid_targets_mean": 747.6,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 2.976588628762542,
|
|
"grad_norm": 0.29360390623161053,
|
|
"learning_rate": 2.8481755691668947e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13144955039024353,
|
|
"step": 890,
|
|
"valid_targets_mean": 4944.9,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 2.9933110367892977,
|
|
"grad_norm": 0.7053314197565032,
|
|
"learning_rate": 2.833036872504557e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16579367220401764,
|
|
"step": 895,
|
|
"valid_targets_mean": 652.0,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 3.0100334448160537,
|
|
"grad_norm": 0.44701962439061516,
|
|
"learning_rate": 2.8178402061732446e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.177859827876091,
|
|
"step": 900,
|
|
"valid_targets_mean": 4865.0,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 3.0267558528428093,
|
|
"grad_norm": 0.39201988474144633,
|
|
"learning_rate": 2.802586627684028e-05,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14985615015029907,
|
|
"step": 905,
|
|
"valid_targets_mean": 4732.8,
|
|
"valid_targets_min": 2309
|
|
},
|
|
{
|
|
"epoch": 3.0434782608695654,
|
|
"grad_norm": 0.337444338673164,
|
|
"learning_rate": 2.787277198508406e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15656296908855438,
|
|
"step": 910,
|
|
"valid_targets_mean": 5218.4,
|
|
"valid_targets_min": 1617
|
|
},
|
|
{
|
|
"epoch": 3.060200668896321,
|
|
"grad_norm": 0.25949553075077986,
|
|
"learning_rate": 2.7719129840044315e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1286637783050537,
|
|
"step": 915,
|
|
"valid_targets_mean": 5345.2,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 3.076923076923077,
|
|
"grad_norm": 0.25673343017201206,
|
|
"learning_rate": 2.7564950533425796e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12565726041793823,
|
|
"step": 920,
|
|
"valid_targets_mean": 4840.4,
|
|
"valid_targets_min": 1973
|
|
},
|
|
{
|
|
"epoch": 3.0936454849498327,
|
|
"grad_norm": 0.241397514642209,
|
|
"learning_rate": 2.7410244794313455e-05,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1276269406080246,
|
|
"step": 925,
|
|
"valid_targets_mean": 5385.5,
|
|
"valid_targets_min": 1742
|
|
},
|
|
{
|
|
"epoch": 3.1103678929765888,
|
|
"grad_norm": 0.22191745974908927,
|
|
"learning_rate": 2.7255023388425805e-05,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11525890976190567,
|
|
"step": 930,
|
|
"valid_targets_mean": 5173.5,
|
|
"valid_targets_min": 1992
|
|
},
|
|
{
|
|
"epoch": 3.1270903010033444,
|
|
"grad_norm": 0.22893723673109465,
|
|
"learning_rate": 2.709929711736578e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12402933835983276,
|
|
"step": 935,
|
|
"valid_targets_mean": 5560.8,
|
|
"valid_targets_min": 1877
|
|
},
|
|
{
|
|
"epoch": 3.1438127090301005,
|
|
"grad_norm": 0.22283049610126812,
|
|
"learning_rate": 2.694307681786903e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11081374436616898,
|
|
"step": 940,
|
|
"valid_targets_mean": 5264.2,
|
|
"valid_targets_min": 2236
|
|
},
|
|
{
|
|
"epoch": 3.160535117056856,
|
|
"grad_norm": 0.22449481338301094,
|
|
"learning_rate": 2.678637336104985e-05,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1140298843383789,
|
|
"step": 945,
|
|
"valid_targets_mean": 5910.9,
|
|
"valid_targets_min": 2212
|
|
},
|
|
{
|
|
"epoch": 3.177257525083612,
|
|
"grad_norm": 0.21351177098559027,
|
|
"learning_rate": 2.662919765164466e-05,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11199861019849777,
|
|
"step": 950,
|
|
"valid_targets_mean": 5595.8,
|
|
"valid_targets_min": 1919
|
|
},
|
|
{
|
|
"epoch": 3.1939799331103678,
|
|
"grad_norm": 0.23132710216051663,
|
|
"learning_rate": 2.6471560627253152e-05,
|
|
"loss": 0.1607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08166929334402084,
|
|
"step": 955,
|
|
"valid_targets_mean": 6287.9,
|
|
"valid_targets_min": 2629
|
|
},
|
|
{
|
|
"epoch": 3.210702341137124,
|
|
"grad_norm": 0.2176969865354794,
|
|
"learning_rate": 2.631347325757717e-05,
|
|
"loss": 0.1623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08133003115653992,
|
|
"step": 960,
|
|
"valid_targets_mean": 6423.4,
|
|
"valid_targets_min": 2166
|
|
},
|
|
{
|
|
"epoch": 3.2274247491638794,
|
|
"grad_norm": 0.2683444169731619,
|
|
"learning_rate": 2.6154946543657372e-05,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07579614967107773,
|
|
"step": 965,
|
|
"valid_targets_mean": 6199.5,
|
|
"valid_targets_min": 3236
|
|
},
|
|
{
|
|
"epoch": 3.2441471571906355,
|
|
"grad_norm": 0.22458631112074987,
|
|
"learning_rate": 2.5995991517107633e-05,
|
|
"loss": 0.1484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07175298780202866,
|
|
"step": 970,
|
|
"valid_targets_mean": 6372.2,
|
|
"valid_targets_min": 3231
|
|
},
|
|
{
|
|
"epoch": 3.260869565217391,
|
|
"grad_norm": 0.23402373818201988,
|
|
"learning_rate": 2.5836619239347412e-05,
|
|
"loss": 0.1489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07491204142570496,
|
|
"step": 975,
|
|
"valid_targets_mean": 6481.0,
|
|
"valid_targets_min": 1654
|
|
},
|
|
{
|
|
"epoch": 3.277591973244147,
|
|
"grad_norm": 0.22944761830629365,
|
|
"learning_rate": 2.5676840800831997e-05,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14362378418445587,
|
|
"step": 980,
|
|
"valid_targets_mean": 6630.7,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 3.294314381270903,
|
|
"grad_norm": 0.24063427299380527,
|
|
"learning_rate": 2.5516667320280727e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14883650839328766,
|
|
"step": 985,
|
|
"valid_targets_mean": 6873.0,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 3.311036789297659,
|
|
"grad_norm": 0.23786512694842274,
|
|
"learning_rate": 2.5356109943903288e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1342322677373886,
|
|
"step": 990,
|
|
"valid_targets_mean": 7058.8,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 3.327759197324415,
|
|
"grad_norm": 0.2614771742406844,
|
|
"learning_rate": 2.5195179844624018e-05,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07092095911502838,
|
|
"step": 995,
|
|
"valid_targets_mean": 6207.5,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 3.3444816053511706,
|
|
"grad_norm": 0.2908016153116348,
|
|
"learning_rate": 2.503388822130446e-05,
|
|
"loss": 0.1327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06495915353298187,
|
|
"step": 1000,
|
|
"valid_targets_mean": 5640.7,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 3.361204013377926,
|
|
"grad_norm": 0.22988504493589393,
|
|
"learning_rate": 2.4872246297963997e-05,
|
|
"loss": 0.1283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057750970125198364,
|
|
"step": 1005,
|
|
"valid_targets_mean": 6027.7,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 3.3779264214046822,
|
|
"grad_norm": 0.2412062417122858,
|
|
"learning_rate": 2.4710265322998834e-05,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06646300107240677,
|
|
"step": 1010,
|
|
"valid_targets_mean": 5895.6,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 3.3946488294314383,
|
|
"grad_norm": 0.24119910481944373,
|
|
"learning_rate": 2.4547956568399202e-05,
|
|
"loss": 0.1173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0636618435382843,
|
|
"step": 1015,
|
|
"valid_targets_mean": 6396.6,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 3.411371237458194,
|
|
"grad_norm": 0.24188097213518175,
|
|
"learning_rate": 2.4385331328965005e-05,
|
|
"loss": 0.1154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05913702771067619,
|
|
"step": 1020,
|
|
"valid_targets_mean": 6244.1,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 3.42809364548495,
|
|
"grad_norm": 0.22182582646991983,
|
|
"learning_rate": 2.4222400921519792e-05,
|
|
"loss": 0.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05971468612551689,
|
|
"step": 1025,
|
|
"valid_targets_mean": 6428.5,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 3.4448160535117056,
|
|
"grad_norm": 0.22218115219544807,
|
|
"learning_rate": 2.4059176684123246e-05,
|
|
"loss": 0.1166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06487765908241272,
|
|
"step": 1030,
|
|
"valid_targets_mean": 7055.7,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 3.4615384615384617,
|
|
"grad_norm": 0.22612686583813624,
|
|
"learning_rate": 2.38956699752822e-05,
|
|
"loss": 0.119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05695559084415436,
|
|
"step": 1035,
|
|
"valid_targets_mean": 5752.0,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 3.4782608695652173,
|
|
"grad_norm": 0.2739772441298118,
|
|
"learning_rate": 2.3731892173160226e-05,
|
|
"loss": 0.1194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08695494383573532,
|
|
"step": 1040,
|
|
"valid_targets_mean": 5457.8,
|
|
"valid_targets_min": 1824
|
|
},
|
|
{
|
|
"epoch": 3.4949832775919734,
|
|
"grad_norm": 0.37370080964964203,
|
|
"learning_rate": 2.356785467478582e-05,
|
|
"loss": 0.2112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1005227193236351,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4800.8,
|
|
"valid_targets_min": 2153
|
|
},
|
|
{
|
|
"epoch": 3.511705685618729,
|
|
"grad_norm": 0.5599331536046348,
|
|
"learning_rate": 2.3403568895259303e-05,
|
|
"loss": 0.2028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08755787461996078,
|
|
"step": 1050,
|
|
"valid_targets_mean": 4231.5,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 3.528428093645485,
|
|
"grad_norm": 0.4393565289201433,
|
|
"learning_rate": 2.3239046266958488e-05,
|
|
"loss": 0.1941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09696167707443237,
|
|
"step": 1055,
|
|
"valid_targets_mean": 4866.4,
|
|
"valid_targets_min": 2424
|
|
},
|
|
{
|
|
"epoch": 3.5451505016722407,
|
|
"grad_norm": 0.41357161002608117,
|
|
"learning_rate": 2.307429823874312e-05,
|
|
"loss": 0.189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09798931330442429,
|
|
"step": 1060,
|
|
"valid_targets_mean": 4928.8,
|
|
"valid_targets_min": 2069
|
|
},
|
|
{
|
|
"epoch": 3.5618729096989967,
|
|
"grad_norm": 0.3606826270490686,
|
|
"learning_rate": 2.2909336275158117e-05,
|
|
"loss": 0.181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09319043904542923,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4796.2,
|
|
"valid_targets_min": 1738
|
|
},
|
|
{
|
|
"epoch": 3.5785953177257523,
|
|
"grad_norm": 0.37774206255103304,
|
|
"learning_rate": 2.2744171855635836e-05,
|
|
"loss": 0.1855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0886668860912323,
|
|
"step": 1070,
|
|
"valid_targets_mean": 4343.4,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 3.5953177257525084,
|
|
"grad_norm": 0.3273738626988227,
|
|
"learning_rate": 2.2578816473697204e-05,
|
|
"loss": 0.1862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08998367190361023,
|
|
"step": 1075,
|
|
"valid_targets_mean": 4528.2,
|
|
"valid_targets_min": 1869
|
|
},
|
|
{
|
|
"epoch": 3.6120401337792645,
|
|
"grad_norm": 0.322405593510531,
|
|
"learning_rate": 2.241328163615191e-05,
|
|
"loss": 0.1687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07671428471803665,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4402.4,
|
|
"valid_targets_min": 1611
|
|
},
|
|
{
|
|
"epoch": 3.62876254180602,
|
|
"grad_norm": 0.3803032837129777,
|
|
"learning_rate": 2.2247578862297657e-05,
|
|
"loss": 0.1692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0829327329993248,
|
|
"step": 1085,
|
|
"valid_targets_mean": 4568.2,
|
|
"valid_targets_min": 2476
|
|
},
|
|
{
|
|
"epoch": 3.6454849498327757,
|
|
"grad_norm": 0.3587153954079582,
|
|
"learning_rate": 2.2081719683118568e-05,
|
|
"loss": 0.1686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07560330629348755,
|
|
"step": 1090,
|
|
"valid_targets_mean": 3986.7,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 3.6622073578595318,
|
|
"grad_norm": 0.35701773827065825,
|
|
"learning_rate": 2.1915715640482755e-05,
|
|
"loss": 0.1669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08737528324127197,
|
|
"step": 1095,
|
|
"valid_targets_mean": 4547.2,
|
|
"valid_targets_min": 1585
|
|
},
|
|
{
|
|
"epoch": 3.678929765886288,
|
|
"grad_norm": 0.3607940536533331,
|
|
"learning_rate": 2.1749578286339138e-05,
|
|
"loss": 0.1595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07769344002008438,
|
|
"step": 1100,
|
|
"valid_targets_mean": 4497.4,
|
|
"valid_targets_min": 2173
|
|
},
|
|
{
|
|
"epoch": 3.6956521739130435,
|
|
"grad_norm": 0.3530221032418747,
|
|
"learning_rate": 2.1583319181913565e-05,
|
|
"loss": 0.1588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08173205703496933,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4407.4,
|
|
"valid_targets_min": 2182
|
|
},
|
|
{
|
|
"epoch": 3.712374581939799,
|
|
"grad_norm": 0.32995257382626914,
|
|
"learning_rate": 2.1416949896904288e-05,
|
|
"loss": 0.1659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08336662501096725,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4492.7,
|
|
"valid_targets_min": 2241
|
|
},
|
|
{
|
|
"epoch": 3.729096989966555,
|
|
"grad_norm": 0.28792730828744123,
|
|
"learning_rate": 2.125048200867685e-05,
|
|
"loss": 0.1812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0954076424241066,
|
|
"step": 1115,
|
|
"valid_targets_mean": 5137.0,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 3.745819397993311,
|
|
"grad_norm": 0.5540159205438687,
|
|
"learning_rate": 2.1083927101458417e-05,
|
|
"loss": 0.1623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06959410011768341,
|
|
"step": 1120,
|
|
"valid_targets_mean": 755.5,
|
|
"valid_targets_min": 402
|
|
},
|
|
{
|
|
"epoch": 3.762541806020067,
|
|
"grad_norm": 0.41490659622020165,
|
|
"learning_rate": 2.0917296765531677e-05,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09585434198379517,
|
|
"step": 1125,
|
|
"valid_targets_mean": 4799.0,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 3.779264214046823,
|
|
"grad_norm": 0.6675066191531142,
|
|
"learning_rate": 2.0750602596428275e-05,
|
|
"loss": 0.1723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06180841848254204,
|
|
"step": 1130,
|
|
"valid_targets_mean": 759.5,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 3.7959866220735785,
|
|
"grad_norm": 0.27718860268904544,
|
|
"learning_rate": 2.05838561941219e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11030929535627365,
|
|
"step": 1135,
|
|
"valid_targets_mean": 4424.9,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 3.8127090301003346,
|
|
"grad_norm": 0.8108181973083735,
|
|
"learning_rate": 2.0417069162221063e-05,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11384409666061401,
|
|
"step": 1140,
|
|
"valid_targets_mean": 770.6,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 3.82943143812709,
|
|
"grad_norm": 0.3619752853956528,
|
|
"learning_rate": 2.0250253107161614e-05,
|
|
"loss": 0.2123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11294696480035782,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4803.6,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 3.8461538461538463,
|
|
"grad_norm": 0.6049777368581511,
|
|
"learning_rate": 2.00834196373991e-05,
|
|
"loss": 0.2184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08877743035554886,
|
|
"step": 1150,
|
|
"valid_targets_mean": 661.5,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 3.862876254180602,
|
|
"grad_norm": 0.4163406659509841,
|
|
"learning_rate": 1.9916580362600907e-05,
|
|
"loss": 0.2229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.111849345266819,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4626.3,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 3.879598662207358,
|
|
"grad_norm": 0.835357910237281,
|
|
"learning_rate": 1.974974689283839e-05,
|
|
"loss": 0.2136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11454364657402039,
|
|
"step": 1160,
|
|
"valid_targets_mean": 691.8,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 3.8963210702341136,
|
|
"grad_norm": 0.45871333192913744,
|
|
"learning_rate": 1.9582930837778947e-05,
|
|
"loss": 0.1884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10185828804969788,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4193.6,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 3.9130434782608696,
|
|
"grad_norm": 0.29745700853028983,
|
|
"learning_rate": 1.9416143805878106e-05,
|
|
"loss": 0.2128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11157973855733871,
|
|
"step": 1170,
|
|
"valid_targets_mean": 5181.1,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 3.9297658862876252,
|
|
"grad_norm": 0.41693215690920316,
|
|
"learning_rate": 1.924939740357173e-05,
|
|
"loss": 0.2116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12292633205652237,
|
|
"step": 1175,
|
|
"valid_targets_mean": 5029.9,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 3.9464882943143813,
|
|
"grad_norm": 0.2335139902990285,
|
|
"learning_rate": 1.9082703234468326e-05,
|
|
"loss": 0.2215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1099981889128685,
|
|
"step": 1180,
|
|
"valid_targets_mean": 5158.0,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 3.9632107023411374,
|
|
"grad_norm": 0.4371470339737226,
|
|
"learning_rate": 1.8916072898541583e-05,
|
|
"loss": 0.2076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11368042230606079,
|
|
"step": 1185,
|
|
"valid_targets_mean": 1348.2,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 3.979933110367893,
|
|
"grad_norm": 0.25766869883268,
|
|
"learning_rate": 1.874951799132316e-05,
|
|
"loss": 0.2225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10243052989244461,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4666.4,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 3.9966555183946486,
|
|
"grad_norm": 0.5494797191322752,
|
|
"learning_rate": 1.8583050103095722e-05,
|
|
"loss": 0.1978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08642536401748657,
|
|
"step": 1195,
|
|
"valid_targets_mean": 764.8,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 4.013377926421405,
|
|
"grad_norm": 0.3983142073852994,
|
|
"learning_rate": 1.8416680818086438e-05,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1318155825138092,
|
|
"step": 1200,
|
|
"valid_targets_mean": 5490.5,
|
|
"valid_targets_min": 1589
|
|
},
|
|
{
|
|
"epoch": 4.030100334448161,
|
|
"grad_norm": 0.4204852965261042,
|
|
"learning_rate": 1.8250421713660865e-05,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.120455302298069,
|
|
"step": 1205,
|
|
"valid_targets_mean": 4523.2,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 4.046822742474917,
|
|
"grad_norm": 0.33020963970724276,
|
|
"learning_rate": 1.8084284359517258e-05,
|
|
"loss": 0.2417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11507785320281982,
|
|
"step": 1210,
|
|
"valid_targets_mean": 5242.6,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 4.063545150501672,
|
|
"grad_norm": 0.3178176667224097,
|
|
"learning_rate": 1.791828031688144e-05,
|
|
"loss": 0.2387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12455209344625473,
|
|
"step": 1215,
|
|
"valid_targets_mean": 5423.4,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 4.080267558528428,
|
|
"grad_norm": 0.25458181387306383,
|
|
"learning_rate": 1.7752421137702347e-05,
|
|
"loss": 0.2255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10788310319185257,
|
|
"step": 1220,
|
|
"valid_targets_mean": 5258.6,
|
|
"valid_targets_min": 1506
|
|
},
|
|
{
|
|
"epoch": 4.096989966555184,
|
|
"grad_norm": 0.2565198321258556,
|
|
"learning_rate": 1.7586718363848093e-05,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10485157370567322,
|
|
"step": 1225,
|
|
"valid_targets_mean": 5040.1,
|
|
"valid_targets_min": 2277
|
|
},
|
|
{
|
|
"epoch": 4.11371237458194,
|
|
"grad_norm": 0.24096997241067106,
|
|
"learning_rate": 1.74211835263028e-05,
|
|
"loss": 0.1991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09773701429367065,
|
|
"step": 1230,
|
|
"valid_targets_mean": 5713.9,
|
|
"valid_targets_min": 2457
|
|
},
|
|
{
|
|
"epoch": 4.130434782608695,
|
|
"grad_norm": 0.25155433454532644,
|
|
"learning_rate": 1.7255828144364167e-05,
|
|
"loss": 0.1882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10125782340765,
|
|
"step": 1235,
|
|
"valid_targets_mean": 5547.8,
|
|
"valid_targets_min": 1819
|
|
},
|
|
{
|
|
"epoch": 4.147157190635451,
|
|
"grad_norm": 0.2677930316004813,
|
|
"learning_rate": 1.709066372484189e-05,
|
|
"loss": 0.1822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0887550488114357,
|
|
"step": 1240,
|
|
"valid_targets_mean": 5416.3,
|
|
"valid_targets_min": 2377
|
|
},
|
|
{
|
|
"epoch": 4.1638795986622075,
|
|
"grad_norm": 0.2989434272273992,
|
|
"learning_rate": 1.6925701761256888e-05,
|
|
"loss": 0.1798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09367116540670395,
|
|
"step": 1245,
|
|
"valid_targets_mean": 5154.9,
|
|
"valid_targets_min": 2114
|
|
},
|
|
{
|
|
"epoch": 4.1806020066889635,
|
|
"grad_norm": 0.27686637163209604,
|
|
"learning_rate": 1.6760953733041512e-05,
|
|
"loss": 0.1678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0609724260866642,
|
|
"step": 1250,
|
|
"valid_targets_mean": 6273.1,
|
|
"valid_targets_min": 3156
|
|
},
|
|
{
|
|
"epoch": 4.197324414715719,
|
|
"grad_norm": 0.28265745678346893,
|
|
"learning_rate": 1.6596431104740707e-05,
|
|
"loss": 0.127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06785546988248825,
|
|
"step": 1255,
|
|
"valid_targets_mean": 6151.7,
|
|
"valid_targets_min": 3558
|
|
},
|
|
{
|
|
"epoch": 4.214046822742475,
|
|
"grad_norm": 0.2520941933383287,
|
|
"learning_rate": 1.643214532521419e-05,
|
|
"loss": 0.129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.062067896127700806,
|
|
"step": 1260,
|
|
"valid_targets_mean": 6682.7,
|
|
"valid_targets_min": 2837
|
|
},
|
|
{
|
|
"epoch": 4.230769230769231,
|
|
"grad_norm": 0.25181616788847366,
|
|
"learning_rate": 1.626810782683978e-05,
|
|
"loss": 0.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05547105893492699,
|
|
"step": 1265,
|
|
"valid_targets_mean": 6249.6,
|
|
"valid_targets_min": 2702
|
|
},
|
|
{
|
|
"epoch": 4.247491638795987,
|
|
"grad_norm": 0.24833091198602608,
|
|
"learning_rate": 1.6104330024717803e-05,
|
|
"loss": 0.1123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05175550654530525,
|
|
"step": 1270,
|
|
"valid_targets_mean": 6152.8,
|
|
"valid_targets_min": 3000
|
|
},
|
|
{
|
|
"epoch": 4.264214046822742,
|
|
"grad_norm": 0.25871715466876993,
|
|
"learning_rate": 1.5940823315876765e-05,
|
|
"loss": 0.1252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10184700042009354,
|
|
"step": 1275,
|
|
"valid_targets_mean": 6167.8,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 4.280936454849498,
|
|
"grad_norm": 0.2555385432900631,
|
|
"learning_rate": 1.577759907848022e-05,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12839829921722412,
|
|
"step": 1280,
|
|
"valid_targets_mean": 7075.5,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 4.297658862876254,
|
|
"grad_norm": 0.259330852439398,
|
|
"learning_rate": 1.5614668671035e-05,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1335558444261551,
|
|
"step": 1285,
|
|
"valid_targets_mean": 7309.6,
|
|
"valid_targets_min": 500
|
|
},
|
|
{
|
|
"epoch": 4.31438127090301,
|
|
"grad_norm": 0.3045285116635551,
|
|
"learning_rate": 1.54520434316008e-05,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12459353357553482,
|
|
"step": 1290,
|
|
"valid_targets_mean": 7088.9,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 4.331103678929766,
|
|
"grad_norm": 0.2615589809446335,
|
|
"learning_rate": 1.528973467700117e-05,
|
|
"loss": 0.1689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05501072108745575,
|
|
"step": 1295,
|
|
"valid_targets_mean": 6525.1,
|
|
"valid_targets_min": 787
|
|
},
|
|
{
|
|
"epoch": 4.3478260869565215,
|
|
"grad_norm": 0.19981406862826387,
|
|
"learning_rate": 1.5127753702036003e-05,
|
|
"loss": 0.1081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054008498787879944,
|
|
"step": 1300,
|
|
"valid_targets_mean": 6203.9,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 4.364548494983278,
|
|
"grad_norm": 0.2353563493658018,
|
|
"learning_rate": 1.496611177869555e-05,
|
|
"loss": 0.1054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051377419382333755,
|
|
"step": 1305,
|
|
"valid_targets_mean": 6005.3,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 4.381270903010034,
|
|
"grad_norm": 0.21506349693812962,
|
|
"learning_rate": 1.4804820155375987e-05,
|
|
"loss": 0.098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0491340346634388,
|
|
"step": 1310,
|
|
"valid_targets_mean": 6106.2,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 4.39799331103679,
|
|
"grad_norm": 0.2284066698635598,
|
|
"learning_rate": 1.4643890056096717e-05,
|
|
"loss": 0.0935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0445077009499073,
|
|
"step": 1315,
|
|
"valid_targets_mean": 6005.2,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 4.414715719063545,
|
|
"grad_norm": 0.19313870171516026,
|
|
"learning_rate": 1.4483332679719273e-05,
|
|
"loss": 0.0914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04023582860827446,
|
|
"step": 1320,
|
|
"valid_targets_mean": 5468.1,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 4.431438127090301,
|
|
"grad_norm": 0.2195867331925147,
|
|
"learning_rate": 1.4323159199168015e-05,
|
|
"loss": 0.0911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045754555612802505,
|
|
"step": 1325,
|
|
"valid_targets_mean": 6045.7,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 4.448160535117057,
|
|
"grad_norm": 0.21882337020400575,
|
|
"learning_rate": 1.4163380760652594e-05,
|
|
"loss": 0.0918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0426185317337513,
|
|
"step": 1330,
|
|
"valid_targets_mean": 6314.6,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 4.464882943143813,
|
|
"grad_norm": 0.20302154362576783,
|
|
"learning_rate": 1.4004008482892372e-05,
|
|
"loss": 0.0952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046527404338121414,
|
|
"step": 1335,
|
|
"valid_targets_mean": 5935.0,
|
|
"valid_targets_min": 1328
|
|
},
|
|
{
|
|
"epoch": 4.481605351170568,
|
|
"grad_norm": 0.4119166537133989,
|
|
"learning_rate": 1.3845053456342633e-05,
|
|
"loss": 0.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08957084268331528,
|
|
"step": 1340,
|
|
"valid_targets_mean": 5224.5,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 4.498327759197324,
|
|
"grad_norm": 0.40546925366754794,
|
|
"learning_rate": 1.368652674242283e-05,
|
|
"loss": 0.1529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07737911492586136,
|
|
"step": 1345,
|
|
"valid_targets_mean": 4458.2,
|
|
"valid_targets_min": 1825
|
|
},
|
|
{
|
|
"epoch": 4.51505016722408,
|
|
"grad_norm": 0.3714672129147885,
|
|
"learning_rate": 1.352843937274686e-05,
|
|
"loss": 0.1462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0723295584321022,
|
|
"step": 1350,
|
|
"valid_targets_mean": 4458.5,
|
|
"valid_targets_min": 1817
|
|
},
|
|
{
|
|
"epoch": 4.531772575250836,
|
|
"grad_norm": 0.42698962880542446,
|
|
"learning_rate": 1.337080234835535e-05,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06616537272930145,
|
|
"step": 1355,
|
|
"valid_targets_mean": 4560.5,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 4.548494983277592,
|
|
"grad_norm": 0.41413108828856393,
|
|
"learning_rate": 1.3213626638950152e-05,
|
|
"loss": 0.1324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0662195160984993,
|
|
"step": 1360,
|
|
"valid_targets_mean": 4229.3,
|
|
"valid_targets_min": 1621
|
|
},
|
|
{
|
|
"epoch": 4.565217391304348,
|
|
"grad_norm": 0.35862939257782944,
|
|
"learning_rate": 1.3056923182130974e-05,
|
|
"loss": 0.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07382261008024216,
|
|
"step": 1365,
|
|
"valid_targets_mean": 5184.2,
|
|
"valid_targets_min": 2222
|
|
},
|
|
{
|
|
"epoch": 4.581939799331104,
|
|
"grad_norm": 0.33416425401618666,
|
|
"learning_rate": 1.2900702882634227e-05,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05994178727269173,
|
|
"step": 1370,
|
|
"valid_targets_mean": 4821.6,
|
|
"valid_targets_min": 2090
|
|
},
|
|
{
|
|
"epoch": 4.59866220735786,
|
|
"grad_norm": 0.35073031768381946,
|
|
"learning_rate": 1.2744976611574198e-05,
|
|
"loss": 0.1303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0693393424153328,
|
|
"step": 1375,
|
|
"valid_targets_mean": 4852.0,
|
|
"valid_targets_min": 1973
|
|
},
|
|
{
|
|
"epoch": 4.615384615384615,
|
|
"grad_norm": 0.3643728181330638,
|
|
"learning_rate": 1.2589755205686552e-05,
|
|
"loss": 0.1243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06601840257644653,
|
|
"step": 1380,
|
|
"valid_targets_mean": 4581.0,
|
|
"valid_targets_min": 3280
|
|
},
|
|
{
|
|
"epoch": 4.632107023411371,
|
|
"grad_norm": 0.40297111807029706,
|
|
"learning_rate": 1.2435049466574212e-05,
|
|
"loss": 0.128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06454489380121231,
|
|
"step": 1385,
|
|
"valid_targets_mean": 4304.0,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 4.648829431438127,
|
|
"grad_norm": 0.41515506041295197,
|
|
"learning_rate": 1.228087015995569e-05,
|
|
"loss": 0.1317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07071829587221146,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4750.5,
|
|
"valid_targets_min": 1991
|
|
},
|
|
{
|
|
"epoch": 4.665551839464883,
|
|
"grad_norm": 0.3704291621283176,
|
|
"learning_rate": 1.212722801491595e-05,
|
|
"loss": 0.1253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05995642766356468,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4458.9,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 4.682274247491639,
|
|
"grad_norm": 0.346419851333213,
|
|
"learning_rate": 1.1974133723159727e-05,
|
|
"loss": 0.1205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05941097065806389,
|
|
"step": 1400,
|
|
"valid_targets_mean": 4414.5,
|
|
"valid_targets_min": 3146
|
|
},
|
|
{
|
|
"epoch": 4.698996655518394,
|
|
"grad_norm": 0.399905240779612,
|
|
"learning_rate": 1.182159793826756e-05,
|
|
"loss": 0.1233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07197066396474838,
|
|
"step": 1405,
|
|
"valid_targets_mean": 4694.9,
|
|
"valid_targets_min": 1356
|
|
},
|
|
{
|
|
"epoch": 4.7157190635451505,
|
|
"grad_norm": 0.3005392997985981,
|
|
"learning_rate": 1.1669631274954432e-05,
|
|
"loss": 0.1256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07580582797527313,
|
|
"step": 1410,
|
|
"valid_targets_mean": 4821.0,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 4.7324414715719065,
|
|
"grad_norm": 0.2617864462812205,
|
|
"learning_rate": 1.1518244308331056e-05,
|
|
"loss": 0.1472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07534201443195343,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4934.1,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 4.749163879598662,
|
|
"grad_norm": 0.43297181615656766,
|
|
"learning_rate": 1.1367447573168025e-05,
|
|
"loss": 0.0987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03398430347442627,
|
|
"step": 1420,
|
|
"valid_targets_mean": 686.5,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 4.765886287625418,
|
|
"grad_norm": 0.39525436130072816,
|
|
"learning_rate": 1.1217251563162685e-05,
|
|
"loss": 0.1461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07678047567605972,
|
|
"step": 1425,
|
|
"valid_targets_mean": 4654.8,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 4.782608695652174,
|
|
"grad_norm": 0.43314877198061574,
|
|
"learning_rate": 1.106766673020891e-05,
|
|
"loss": 0.0903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027931950986385345,
|
|
"step": 1430,
|
|
"valid_targets_mean": 720.2,
|
|
"valid_targets_min": 240
|
|
},
|
|
{
|
|
"epoch": 4.79933110367893,
|
|
"grad_norm": 0.26860268972223394,
|
|
"learning_rate": 1.091870348366976e-05,
|
|
"loss": 0.1729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08632151037454605,
|
|
"step": 1435,
|
|
"valid_targets_mean": 5107.3,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 4.816053511705686,
|
|
"grad_norm": 0.4656028655812997,
|
|
"learning_rate": 1.0770372189653124e-05,
|
|
"loss": 0.1327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043083954602479935,
|
|
"step": 1440,
|
|
"valid_targets_mean": 744.3,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 4.832775919732441,
|
|
"grad_norm": 0.257607348933929,
|
|
"learning_rate": 1.0622683170290337e-05,
|
|
"loss": 0.1622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0950133204460144,
|
|
"step": 1445,
|
|
"valid_targets_mean": 4939.2,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 4.849498327759197,
|
|
"grad_norm": 0.5326252143263469,
|
|
"learning_rate": 1.0475646703017906e-05,
|
|
"loss": 0.1575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05841116979718208,
|
|
"step": 1450,
|
|
"valid_targets_mean": 715.5,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 4.866220735785953,
|
|
"grad_norm": 0.2561988201946194,
|
|
"learning_rate": 1.0329273019862303e-05,
|
|
"loss": 0.1506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07884644716978073,
|
|
"step": 1455,
|
|
"valid_targets_mean": 4340.4,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 4.882943143812709,
|
|
"grad_norm": 0.43011126467789684,
|
|
"learning_rate": 1.0183572306727968e-05,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04043419659137726,
|
|
"step": 1460,
|
|
"valid_targets_mean": 677.5,
|
|
"valid_targets_min": 332
|
|
},
|
|
{
|
|
"epoch": 4.8996655518394645,
|
|
"grad_norm": 0.29575509504666075,
|
|
"learning_rate": 1.0038554702688414e-05,
|
|
"loss": 0.1265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08426320552825928,
|
|
"step": 1465,
|
|
"valid_targets_mean": 4601.8,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 4.916387959866221,
|
|
"grad_norm": 0.5391203808910142,
|
|
"learning_rate": 9.894230299280745e-06,
|
|
"loss": 0.1624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05296686291694641,
|
|
"step": 1470,
|
|
"valid_targets_mean": 691.7,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 4.933110367892977,
|
|
"grad_norm": 0.2947872075814511,
|
|
"learning_rate": 9.750609139803366e-06,
|
|
"loss": 0.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.093158058822155,
|
|
"step": 1475,
|
|
"valid_targets_mean": 4822.0,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 4.949832775919733,
|
|
"grad_norm": 0.20260636225953862,
|
|
"learning_rate": 9.607701218617087e-06,
|
|
"loss": 0.1786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07647214084863663,
|
|
"step": 1480,
|
|
"valid_targets_mean": 3765.1,
|
|
"valid_targets_min": 396
|
|
},
|
|
{
|
|
"epoch": 4.966555183946488,
|
|
"grad_norm": 0.2896502864734037,
|
|
"learning_rate": 9.465516480449637e-06,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08254526555538177,
|
|
"step": 1485,
|
|
"valid_targets_mean": 4749.6,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 4.983277591973244,
|
|
"grad_norm": 0.19533525689441109,
|
|
"learning_rate": 9.324064819703627e-06,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07892594486474991,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4327.7,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.3530825490412881,
|
|
"learning_rate": 9.183356079768007e-06,
|
|
"loss": 0.0963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05275370553135872,
|
|
"step": 1495,
|
|
"valid_targets_mean": 2201.4,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 5.016722408026756,
|
|
"grad_norm": 0.38060515370591574,
|
|
"learning_rate": 9.043400052333093e-06,
|
|
"loss": 0.1792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0847535952925682,
|
|
"step": 1500,
|
|
"valid_targets_mean": 4281.0,
|
|
"valid_targets_min": 2292
|
|
},
|
|
{
|
|
"epoch": 5.033444816053512,
|
|
"grad_norm": 0.4805998569165993,
|
|
"learning_rate": 8.90420647670918e-06,
|
|
"loss": 0.1706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08545965701341629,
|
|
"step": 1505,
|
|
"valid_targets_mean": 4455.8,
|
|
"valid_targets_min": 1901
|
|
},
|
|
{
|
|
"epoch": 5.050167224080267,
|
|
"grad_norm": 0.3902166653580631,
|
|
"learning_rate": 8.765785039148772e-06,
|
|
"loss": 0.2001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09431061893701553,
|
|
"step": 1510,
|
|
"valid_targets_mean": 5071.8,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 5.066889632107023,
|
|
"grad_norm": 0.30612331177016067,
|
|
"learning_rate": 8.62814537217257e-06,
|
|
"loss": 0.1975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09857640415430069,
|
|
"step": 1515,
|
|
"valid_targets_mean": 5426.4,
|
|
"valid_targets_min": 2338
|
|
},
|
|
{
|
|
"epoch": 5.083612040133779,
|
|
"grad_norm": 0.31377770155989654,
|
|
"learning_rate": 8.491297053899135e-06,
|
|
"loss": 0.1886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09046251326799393,
|
|
"step": 1520,
|
|
"valid_targets_mean": 5109.5,
|
|
"valid_targets_min": 1972
|
|
},
|
|
{
|
|
"epoch": 5.1003344481605355,
|
|
"grad_norm": 0.2686255916491811,
|
|
"learning_rate": 8.355249607378373e-06,
|
|
"loss": 0.1784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09167512506246567,
|
|
"step": 1525,
|
|
"valid_targets_mean": 6256.6,
|
|
"valid_targets_min": 3286
|
|
},
|
|
{
|
|
"epoch": 5.117056856187291,
|
|
"grad_norm": 0.26728067630148067,
|
|
"learning_rate": 8.220012499928843e-06,
|
|
"loss": 0.1598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08101589977741241,
|
|
"step": 1530,
|
|
"valid_targets_mean": 5565.3,
|
|
"valid_targets_min": 2259
|
|
},
|
|
{
|
|
"epoch": 5.133779264214047,
|
|
"grad_norm": 0.25700467135530397,
|
|
"learning_rate": 8.08559514247893e-06,
|
|
"loss": 0.1463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07393509894609451,
|
|
"step": 1535,
|
|
"valid_targets_mean": 5089.4,
|
|
"valid_targets_min": 2021
|
|
},
|
|
{
|
|
"epoch": 5.150501672240803,
|
|
"grad_norm": 0.28354142706216606,
|
|
"learning_rate": 7.952006888911965e-06,
|
|
"loss": 0.139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0719037726521492,
|
|
"step": 1540,
|
|
"valid_targets_mean": 5502.6,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 5.167224080267559,
|
|
"grad_norm": 0.2813252460267186,
|
|
"learning_rate": 7.819257035415302e-06,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07315998524427414,
|
|
"step": 1545,
|
|
"valid_targets_mean": 5302.3,
|
|
"valid_targets_min": 2051
|
|
},
|
|
{
|
|
"epoch": 5.183946488294314,
|
|
"grad_norm": 0.26797454522313097,
|
|
"learning_rate": 7.687354819833408e-06,
|
|
"loss": 0.1219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05114075541496277,
|
|
"step": 1550,
|
|
"valid_targets_mean": 6117.5,
|
|
"valid_targets_min": 3261
|
|
},
|
|
{
|
|
"epoch": 5.20066889632107,
|
|
"grad_norm": 0.23042517166164886,
|
|
"learning_rate": 7.556309421025026e-06,
|
|
"loss": 0.1019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05171547457575798,
|
|
"step": 1555,
|
|
"valid_targets_mean": 6000.6,
|
|
"valid_targets_min": 3132
|
|
},
|
|
{
|
|
"epoch": 5.217391304347826,
|
|
"grad_norm": 0.2212031720637949,
|
|
"learning_rate": 7.4261299582244014e-06,
|
|
"loss": 0.1023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05276946350932121,
|
|
"step": 1560,
|
|
"valid_targets_mean": 6224.2,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 5.234113712374582,
|
|
"grad_norm": 0.21614252871552436,
|
|
"learning_rate": 7.296825490406736e-06,
|
|
"loss": 0.0944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04958316683769226,
|
|
"step": 1565,
|
|
"valid_targets_mean": 6605.7,
|
|
"valid_targets_min": 2342
|
|
},
|
|
{
|
|
"epoch": 5.250836120401337,
|
|
"grad_norm": 0.23365537530043104,
|
|
"learning_rate": 7.1684050156577735e-06,
|
|
"loss": 0.085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0437626950442791,
|
|
"step": 1570,
|
|
"valid_targets_mean": 6263.8,
|
|
"valid_targets_min": 3362
|
|
},
|
|
{
|
|
"epoch": 5.2675585284280935,
|
|
"grad_norm": 0.34400224897248954,
|
|
"learning_rate": 7.040877470547618e-06,
|
|
"loss": 0.1236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11208627372980118,
|
|
"step": 1575,
|
|
"valid_targets_mean": 6788.9,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 5.2842809364548495,
|
|
"grad_norm": 0.3075855071911673,
|
|
"learning_rate": 6.91425172950885e-06,
|
|
"loss": 0.2126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11405330896377563,
|
|
"step": 1580,
|
|
"valid_targets_mean": 7302.9,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 5.301003344481606,
|
|
"grad_norm": 0.29967218395541134,
|
|
"learning_rate": 6.788536604219012e-06,
|
|
"loss": 0.2146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12489277124404907,
|
|
"step": 1585,
|
|
"valid_targets_mean": 8350.1,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 5.317725752508361,
|
|
"grad_norm": 0.3077803724132993,
|
|
"learning_rate": 6.663740842987376e-06,
|
|
"loss": 0.2063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10921672731637955,
|
|
"step": 1590,
|
|
"valid_targets_mean": 7376.1,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 5.334448160535117,
|
|
"grad_norm": 0.27610331782419223,
|
|
"learning_rate": 6.53987313014618e-06,
|
|
"loss": 0.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04607057943940163,
|
|
"step": 1595,
|
|
"valid_targets_mean": 6250.0,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 5.351170568561873,
|
|
"grad_norm": 0.27196219134308003,
|
|
"learning_rate": 6.4169420854463e-06,
|
|
"loss": 0.0899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04740108177065849,
|
|
"step": 1600,
|
|
"valid_targets_mean": 6208.8,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 5.367892976588629,
|
|
"grad_norm": 0.213352435192677,
|
|
"learning_rate": 6.294956263457415e-06,
|
|
"loss": 0.0864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04106152430176735,
|
|
"step": 1605,
|
|
"valid_targets_mean": 5629.8,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 5.384615384615385,
|
|
"grad_norm": 0.19084010885821173,
|
|
"learning_rate": 6.173924152972697e-06,
|
|
"loss": 0.0782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03774537518620491,
|
|
"step": 1610,
|
|
"valid_targets_mean": 5685.6,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 5.40133779264214,
|
|
"grad_norm": 0.18043966303248157,
|
|
"learning_rate": 6.053854176418108e-06,
|
|
"loss": 0.0764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038588013499975204,
|
|
"step": 1615,
|
|
"valid_targets_mean": 6092.8,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 5.418060200668896,
|
|
"grad_norm": 0.1693748076409906,
|
|
"learning_rate": 5.934754689266293e-06,
|
|
"loss": 0.0738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03586020693182945,
|
|
"step": 1620,
|
|
"valid_targets_mean": 6219.2,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 5.434782608695652,
|
|
"grad_norm": 0.17861772571200932,
|
|
"learning_rate": 5.816633979455111e-06,
|
|
"loss": 0.0727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03658032417297363,
|
|
"step": 1625,
|
|
"valid_targets_mean": 5671.3,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 5.451505016722408,
|
|
"grad_norm": 0.19246635632596268,
|
|
"learning_rate": 5.699500266810936e-06,
|
|
"loss": 0.0739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03878069296479225,
|
|
"step": 1630,
|
|
"valid_targets_mean": 7040.9,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 5.468227424749164,
|
|
"grad_norm": 0.18383058080835318,
|
|
"learning_rate": 5.583361702476615e-06,
|
|
"loss": 0.0751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03463970497250557,
|
|
"step": 1635,
|
|
"valid_targets_mean": 5457.4,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 5.48494983277592,
|
|
"grad_norm": 0.38378629675073944,
|
|
"learning_rate": 5.468226368344256e-06,
|
|
"loss": 0.0952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06299524009227753,
|
|
"step": 1640,
|
|
"valid_targets_mean": 4684.6,
|
|
"valid_targets_min": 2287
|
|
},
|
|
{
|
|
"epoch": 5.501672240802676,
|
|
"grad_norm": 0.3950735797517467,
|
|
"learning_rate": 5.354102276492847e-06,
|
|
"loss": 0.1133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06622468680143356,
|
|
"step": 1645,
|
|
"valid_targets_mean": 4927.4,
|
|
"valid_targets_min": 1920
|
|
},
|
|
{
|
|
"epoch": 5.518394648829432,
|
|
"grad_norm": 0.3243028511326553,
|
|
"learning_rate": 5.240997368630652e-06,
|
|
"loss": 0.1011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047553639858961105,
|
|
"step": 1650,
|
|
"valid_targets_mean": 4733.8,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 5.535117056856187,
|
|
"grad_norm": 0.30525894241572327,
|
|
"learning_rate": 5.1289195155426075e-06,
|
|
"loss": 0.0955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042171042412519455,
|
|
"step": 1655,
|
|
"valid_targets_mean": 4550.0,
|
|
"valid_targets_min": 1614
|
|
},
|
|
{
|
|
"epoch": 5.551839464882943,
|
|
"grad_norm": 0.28662589252212267,
|
|
"learning_rate": 5.017876516542599e-06,
|
|
"loss": 0.0866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04329480230808258,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4728.0,
|
|
"valid_targets_min": 1837
|
|
},
|
|
{
|
|
"epoch": 5.568561872909699,
|
|
"grad_norm": 0.2922360826520029,
|
|
"learning_rate": 4.907876098930715e-06,
|
|
"loss": 0.0889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04296515882015228,
|
|
"step": 1665,
|
|
"valid_targets_mean": 4336.6,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 5.585284280936455,
|
|
"grad_norm": 0.28983604310745525,
|
|
"learning_rate": 4.798925917455497e-06,
|
|
"loss": 0.0866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04725012183189392,
|
|
"step": 1670,
|
|
"valid_targets_mean": 4800.6,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 5.602006688963211,
|
|
"grad_norm": 0.29000437301509696,
|
|
"learning_rate": 4.691033553781293e-06,
|
|
"loss": 0.0873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04241691902279854,
|
|
"step": 1675,
|
|
"valid_targets_mean": 4165.9,
|
|
"valid_targets_min": 2248
|
|
},
|
|
{
|
|
"epoch": 5.618729096989966,
|
|
"grad_norm": 0.3495605735556065,
|
|
"learning_rate": 4.584206515960654e-06,
|
|
"loss": 0.0957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049654100090265274,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4880.2,
|
|
"valid_targets_min": 1768
|
|
},
|
|
{
|
|
"epoch": 5.635451505016722,
|
|
"grad_norm": 0.3155209940869205,
|
|
"learning_rate": 4.478452237911839e-06,
|
|
"loss": 0.0961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04520755633711815,
|
|
"step": 1685,
|
|
"valid_targets_mean": 4474.0,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 5.6521739130434785,
|
|
"grad_norm": 0.32693235199587467,
|
|
"learning_rate": 4.373778078901521e-06,
|
|
"loss": 0.0966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045738428831100464,
|
|
"step": 1690,
|
|
"valid_targets_mean": 4282.3,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 5.668896321070234,
|
|
"grad_norm": 0.26509778683991037,
|
|
"learning_rate": 4.270191323032651e-06,
|
|
"loss": 0.0936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048090968281030655,
|
|
"step": 1695,
|
|
"valid_targets_mean": 4385.5,
|
|
"valid_targets_min": 2771
|
|
},
|
|
{
|
|
"epoch": 5.68561872909699,
|
|
"grad_norm": 0.2744917762086293,
|
|
"learning_rate": 4.167699178737592e-06,
|
|
"loss": 0.0892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04659506306052208,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4312.4,
|
|
"valid_targets_min": 1839
|
|
},
|
|
{
|
|
"epoch": 5.702341137123746,
|
|
"grad_norm": 0.24287839211852771,
|
|
"learning_rate": 4.06630877827648e-06,
|
|
"loss": 0.0881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042560476809740067,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4501.5,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 5.719063545150502,
|
|
"grad_norm": 0.3162022601781066,
|
|
"learning_rate": 3.966027177240905e-06,
|
|
"loss": 0.1,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05805010721087456,
|
|
"step": 1710,
|
|
"valid_targets_mean": 4562.2,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 5.735785953177258,
|
|
"grad_norm": 0.2582433466601274,
|
|
"learning_rate": 3.866861354062925e-06,
|
|
"loss": 0.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05881081148982048,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4599.1,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 5.752508361204013,
|
|
"grad_norm": 0.3355914174201087,
|
|
"learning_rate": 3.768818209529448e-06,
|
|
"loss": 0.0613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06443934142589569,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4988.7,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 5.769230769230769,
|
|
"grad_norm": 0.26672346902575295,
|
|
"learning_rate": 3.6719045663020183e-06,
|
|
"loss": 0.1195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04287024214863777,
|
|
"step": 1725,
|
|
"valid_targets_mean": 2713.3,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 5.785953177257525,
|
|
"grad_norm": 0.317874104850162,
|
|
"learning_rate": 3.576127168442034e-06,
|
|
"loss": 0.0503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08537133783102036,
|
|
"step": 1730,
|
|
"valid_targets_mean": 4844.5,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 5.802675585284281,
|
|
"grad_norm": 0.3195979630372435,
|
|
"learning_rate": 3.4814926809414493e-06,
|
|
"loss": 0.1485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07487867027521133,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4910.9,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 5.8193979933110365,
|
|
"grad_norm": 0.3311422146791468,
|
|
"learning_rate": 3.3880076892589518e-06,
|
|
"loss": 0.0687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01669185422360897,
|
|
"step": 1740,
|
|
"valid_targets_mean": 728.3,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 5.8361204013377925,
|
|
"grad_norm": 0.23209035884451032,
|
|
"learning_rate": 3.295678698861711e-06,
|
|
"loss": 0.1552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0814889594912529,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4948.3,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 5.852842809364549,
|
|
"grad_norm": 0.2779057961501916,
|
|
"learning_rate": 3.2045121347726573e-06,
|
|
"loss": 0.0941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01734340935945511,
|
|
"step": 1750,
|
|
"valid_targets_mean": 736.7,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 5.869565217391305,
|
|
"grad_norm": 0.22701135927973293,
|
|
"learning_rate": 3.1145143411233825e-06,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0628105103969574,
|
|
"step": 1755,
|
|
"valid_targets_mean": 4206.8,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 5.88628762541806,
|
|
"grad_norm": 0.2267474623544993,
|
|
"learning_rate": 3.0256915807126775e-06,
|
|
"loss": 0.0733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.013419833965599537,
|
|
"step": 1760,
|
|
"valid_targets_mean": 769.7,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 5.903010033444816,
|
|
"grad_norm": 0.2101566804170016,
|
|
"learning_rate": 2.93805003457067e-06,
|
|
"loss": 0.1156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0785791277885437,
|
|
"step": 1765,
|
|
"valid_targets_mean": 5161.2,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 5.919732441471572,
|
|
"grad_norm": 0.27739314875952154,
|
|
"learning_rate": 2.8515958015287458e-06,
|
|
"loss": 0.1038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01612609438598156,
|
|
"step": 1770,
|
|
"valid_targets_mean": 679.1,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 5.936454849498328,
|
|
"grad_norm": 0.2746264092864155,
|
|
"learning_rate": 2.766334897795113e-06,
|
|
"loss": 0.1099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08698531985282898,
|
|
"step": 1775,
|
|
"valid_targets_mean": 4858.9,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 5.953177257525084,
|
|
"grad_norm": 0.3168949130208854,
|
|
"learning_rate": 2.6822732565361563e-06,
|
|
"loss": 0.1305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.020553024485707283,
|
|
"step": 1780,
|
|
"valid_targets_mean": 669.9,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 5.969899665551839,
|
|
"grad_norm": 0.20213229107425051,
|
|
"learning_rate": 2.5994167274635374e-06,
|
|
"loss": 0.0858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08304500579833984,
|
|
"step": 1785,
|
|
"valid_targets_mean": 4958.1,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 5.986622073578595,
|
|
"grad_norm": 0.21174700725681858,
|
|
"learning_rate": 2.51777107642716e-06,
|
|
"loss": 0.1499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057880375534296036,
|
|
"step": 1790,
|
|
"valid_targets_mean": 2792.8,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 6.003344481605351,
|
|
"grad_norm": 0.3473576083007528,
|
|
"learning_rate": 2.437341985013901e-06,
|
|
"loss": 0.0531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08338546752929688,
|
|
"step": 1795,
|
|
"valid_targets_mean": 5056.6,
|
|
"valid_targets_min": 2086
|
|
},
|
|
{
|
|
"epoch": 6.0200668896321075,
|
|
"grad_norm": 0.4091754490201116,
|
|
"learning_rate": 2.3581350501522527e-06,
|
|
"loss": 0.138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07090913504362106,
|
|
"step": 1800,
|
|
"valid_targets_mean": 4833.2,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 6.036789297658863,
|
|
"grad_norm": 0.36177584819645003,
|
|
"learning_rate": 2.280155783722835e-06,
|
|
"loss": 0.1274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06082862615585327,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4094.9,
|
|
"valid_targets_min": 2016
|
|
},
|
|
{
|
|
"epoch": 6.053511705685619,
|
|
"grad_norm": 0.4148628828591379,
|
|
"learning_rate": 2.2034096121748315e-06,
|
|
"loss": 0.1717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08808497339487076,
|
|
"step": 1810,
|
|
"valid_targets_mean": 5057.2,
|
|
"valid_targets_min": 1996
|
|
},
|
|
{
|
|
"epoch": 6.070234113712375,
|
|
"grad_norm": 0.325171344129949,
|
|
"learning_rate": 2.1279018761483904e-06,
|
|
"loss": 0.1723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0902898907661438,
|
|
"step": 1815,
|
|
"valid_targets_mean": 5574.8,
|
|
"valid_targets_min": 2361
|
|
},
|
|
{
|
|
"epoch": 6.086956521739131,
|
|
"grad_norm": 0.2940323995229,
|
|
"learning_rate": 2.0536378301029525e-06,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07505873590707779,
|
|
"step": 1820,
|
|
"valid_targets_mean": 4868.2,
|
|
"valid_targets_min": 2343
|
|
},
|
|
{
|
|
"epoch": 6.103678929765886,
|
|
"grad_norm": 0.2838435573628232,
|
|
"learning_rate": 1.9806226419516195e-06,
|
|
"loss": 0.1495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07449151575565338,
|
|
"step": 1825,
|
|
"valid_targets_mean": 5462.7,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 6.120401337792642,
|
|
"grad_norm": 0.29357688650009867,
|
|
"learning_rate": 1.9088613927015175e-06,
|
|
"loss": 0.1332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057813793420791626,
|
|
"step": 1830,
|
|
"valid_targets_mean": 4551.1,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 6.137123745819398,
|
|
"grad_norm": 0.27448268906481116,
|
|
"learning_rate": 1.8383590761002267e-06,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06645906716585159,
|
|
"step": 1835,
|
|
"valid_targets_mean": 5626.3,
|
|
"valid_targets_min": 1920
|
|
},
|
|
{
|
|
"epoch": 6.153846153846154,
|
|
"grad_norm": 0.24594560064883572,
|
|
"learning_rate": 1.7691205982882674e-06,
|
|
"loss": 0.1145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06805051118135452,
|
|
"step": 1840,
|
|
"valid_targets_mean": 6059.1,
|
|
"valid_targets_min": 2069
|
|
},
|
|
{
|
|
"epoch": 6.170568561872909,
|
|
"grad_norm": 0.24201650093289612,
|
|
"learning_rate": 1.7011507774576919e-06,
|
|
"loss": 0.1121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05539209768176079,
|
|
"step": 1845,
|
|
"valid_targets_mean": 5260.9,
|
|
"valid_targets_min": 2059
|
|
},
|
|
{
|
|
"epoch": 6.187290969899665,
|
|
"grad_norm": 0.2477739054762657,
|
|
"learning_rate": 1.6344543435167982e-06,
|
|
"loss": 0.0913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0450078509747982,
|
|
"step": 1850,
|
|
"valid_targets_mean": 6925.2,
|
|
"valid_targets_min": 3127
|
|
},
|
|
{
|
|
"epoch": 6.2040133779264215,
|
|
"grad_norm": 0.28369018922342387,
|
|
"learning_rate": 1.5690359377609744e-06,
|
|
"loss": 0.087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043438881635665894,
|
|
"step": 1855,
|
|
"valid_targets_mean": 6540.8,
|
|
"valid_targets_min": 3272
|
|
},
|
|
{
|
|
"epoch": 6.2207357859531776,
|
|
"grad_norm": 0.24412443503877276,
|
|
"learning_rate": 1.5049001125497299e-06,
|
|
"loss": 0.0861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03809829428792,
|
|
"step": 1860,
|
|
"valid_targets_mean": 6424.6,
|
|
"valid_targets_min": 3413
|
|
},
|
|
{
|
|
"epoch": 6.237458193979933,
|
|
"grad_norm": 0.19920088549109813,
|
|
"learning_rate": 1.442051330989891e-06,
|
|
"loss": 0.0787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03498367220163345,
|
|
"step": 1865,
|
|
"valid_targets_mean": 6344.0,
|
|
"valid_targets_min": 3854
|
|
},
|
|
{
|
|
"epoch": 6.254180602006689,
|
|
"grad_norm": 0.19243055547290297,
|
|
"learning_rate": 1.3804939666250473e-06,
|
|
"loss": 0.0727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0376565121114254,
|
|
"step": 1870,
|
|
"valid_targets_mean": 6136.4,
|
|
"valid_targets_min": 2589
|
|
},
|
|
{
|
|
"epoch": 6.270903010033445,
|
|
"grad_norm": 0.2838700650979706,
|
|
"learning_rate": 1.3202323031311614e-06,
|
|
"loss": 0.126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0998857244849205,
|
|
"step": 1875,
|
|
"valid_targets_mean": 7202.7,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 6.287625418060201,
|
|
"grad_norm": 0.29085342449814766,
|
|
"learning_rate": 1.2612705340185106e-06,
|
|
"loss": 0.1931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11738074570894241,
|
|
"step": 1880,
|
|
"valid_targets_mean": 7978.9,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 6.304347826086957,
|
|
"grad_norm": 0.2592485275583357,
|
|
"learning_rate": 1.203612762339852e-06,
|
|
"loss": 0.1887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10271837562322617,
|
|
"step": 1885,
|
|
"valid_targets_mean": 7635.1,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 6.321070234113712,
|
|
"grad_norm": 0.2727012456159791,
|
|
"learning_rate": 1.1472630004049057e-06,
|
|
"loss": 0.1813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08445721864700317,
|
|
"step": 1890,
|
|
"valid_targets_mean": 6776.2,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 6.337792642140468,
|
|
"grad_norm": 0.2288111747264134,
|
|
"learning_rate": 1.0922251695011354e-06,
|
|
"loss": 0.0851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04259712994098663,
|
|
"step": 1895,
|
|
"valid_targets_mean": 6226.6,
|
|
"valid_targets_min": 1382
|
|
},
|
|
{
|
|
"epoch": 6.354515050167224,
|
|
"grad_norm": 0.19919464965423062,
|
|
"learning_rate": 1.03850309962088e-06,
|
|
"loss": 0.0787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03875719755887985,
|
|
"step": 1900,
|
|
"valid_targets_mean": 6309.2,
|
|
"valid_targets_min": 2073
|
|
},
|
|
{
|
|
"epoch": 6.3712374581939795,
|
|
"grad_norm": 0.17131171402393938,
|
|
"learning_rate": 9.861005291948244e-07,
|
|
"loss": 0.0744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03536094352602959,
|
|
"step": 1905,
|
|
"valid_targets_mean": 5741.7,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 6.3879598662207355,
|
|
"grad_norm": 0.16122474878987092,
|
|
"learning_rate": 9.350211048318569e-07,
|
|
"loss": 0.069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03413437306880951,
|
|
"step": 1910,
|
|
"valid_targets_mean": 5660.9,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 6.404682274247492,
|
|
"grad_norm": 0.14825250994360142,
|
|
"learning_rate": 8.852683810652984e-07,
|
|
"loss": 0.0663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032544225454330444,
|
|
"step": 1915,
|
|
"valid_targets_mean": 5903.8,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 6.421404682274248,
|
|
"grad_norm": 0.14726166989285255,
|
|
"learning_rate": 8.368458201055496e-07,
|
|
"loss": 0.063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03000800870358944,
|
|
"step": 1920,
|
|
"valid_targets_mean": 6225.6,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 6.438127090301004,
|
|
"grad_norm": 0.1396267954013971,
|
|
"learning_rate": 7.897567915991722e-07,
|
|
"loss": 0.0635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028944382444024086,
|
|
"step": 1925,
|
|
"valid_targets_mean": 5885.9,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 6.454849498327759,
|
|
"grad_norm": 0.13727210655438601,
|
|
"learning_rate": 7.440045723943878e-07,
|
|
"loss": 0.0646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03259645402431488,
|
|
"step": 1930,
|
|
"valid_targets_mean": 6663.5,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 6.471571906354515,
|
|
"grad_norm": 0.14223565108156394,
|
|
"learning_rate": 6.995923463130539e-07,
|
|
"loss": 0.0632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032681550830602646,
|
|
"step": 1935,
|
|
"valid_targets_mean": 6012.0,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 6.488294314381271,
|
|
"grad_norm": 0.2825635742642267,
|
|
"learning_rate": 6.565232039291025e-07,
|
|
"loss": 0.0835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048326071351766586,
|
|
"step": 1940,
|
|
"valid_targets_mean": 4919.0,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 6.505016722408027,
|
|
"grad_norm": 0.31361206565913763,
|
|
"learning_rate": 6.148001423534778e-07,
|
|
"loss": 0.0901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0449114553630352,
|
|
"step": 1945,
|
|
"valid_targets_mean": 5070.2,
|
|
"valid_targets_min": 2026
|
|
},
|
|
{
|
|
"epoch": 6.521739130434782,
|
|
"grad_norm": 0.29670625746396645,
|
|
"learning_rate": 5.744260650255728e-07,
|
|
"loss": 0.0813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04008527100086212,
|
|
"step": 1950,
|
|
"valid_targets_mean": 4403.6,
|
|
"valid_targets_min": 2244
|
|
},
|
|
{
|
|
"epoch": 6.538461538461538,
|
|
"grad_norm": 0.2716221458024064,
|
|
"learning_rate": 5.354037815111701e-07,
|
|
"loss": 0.0762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03740563616156578,
|
|
"step": 1955,
|
|
"valid_targets_mean": 4542.4,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 6.555183946488294,
|
|
"grad_norm": 0.2532032074830418,
|
|
"learning_rate": 4.977360073069526e-07,
|
|
"loss": 0.0689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03577180206775665,
|
|
"step": 1960,
|
|
"valid_targets_mean": 4503.7,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 6.5719063545150505,
|
|
"grad_norm": 0.22552989450775937,
|
|
"learning_rate": 4.6142536365151094e-07,
|
|
"loss": 0.0692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03444986417889595,
|
|
"step": 1965,
|
|
"valid_targets_mean": 4398.6,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 6.588628762541806,
|
|
"grad_norm": 0.20729385398915387,
|
|
"learning_rate": 4.264743773429514e-07,
|
|
"loss": 0.0657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03252232074737549,
|
|
"step": 1970,
|
|
"valid_targets_mean": 4617.8,
|
|
"valid_targets_min": 1769
|
|
},
|
|
{
|
|
"epoch": 6.605351170568562,
|
|
"grad_norm": 0.2990520979043523,
|
|
"learning_rate": 3.928854805630589e-07,
|
|
"loss": 0.0693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04175163805484772,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4698.9,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 6.622073578595318,
|
|
"grad_norm": 0.33424990474677074,
|
|
"learning_rate": 3.606610107080455e-07,
|
|
"loss": 0.0806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03844280540943146,
|
|
"step": 1980,
|
|
"valid_targets_mean": 4525.5,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 6.638795986622074,
|
|
"grad_norm": 0.2978823977132389,
|
|
"learning_rate": 3.2980321022588966e-07,
|
|
"loss": 0.0823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039478056132793427,
|
|
"step": 1985,
|
|
"valid_targets_mean": 4563.4,
|
|
"valid_targets_min": 2503
|
|
},
|
|
{
|
|
"epoch": 6.65551839464883,
|
|
"grad_norm": 0.24932136960407367,
|
|
"learning_rate": 3.0031422646028543e-07,
|
|
"loss": 0.0819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04389968141913414,
|
|
"step": 1990,
|
|
"valid_targets_mean": 4560.3,
|
|
"valid_targets_min": 3580
|
|
},
|
|
{
|
|
"epoch": 6.672240802675585,
|
|
"grad_norm": 0.22617928000458856,
|
|
"learning_rate": 2.721961115012262e-07,
|
|
"loss": 0.0786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042046938091516495,
|
|
"step": 1995,
|
|
"valid_targets_mean": 4502.5,
|
|
"valid_targets_min": 1623
|
|
},
|
|
{
|
|
"epoch": 6.688963210702341,
|
|
"grad_norm": 0.22172661227009116,
|
|
"learning_rate": 2.4545082204219027e-07,
|
|
"loss": 0.0739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03488779440522194,
|
|
"step": 2000,
|
|
"valid_targets_mean": 4578.2,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 6.705685618729097,
|
|
"grad_norm": 0.22569167167372475,
|
|
"learning_rate": 2.2008021924398326e-07,
|
|
"loss": 0.0747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038183216005563736,
|
|
"step": 2005,
|
|
"valid_targets_mean": 4561.1,
|
|
"valid_targets_min": 2619
|
|
},
|
|
{
|
|
"epoch": 6.722408026755852,
|
|
"grad_norm": 0.2548038543282001,
|
|
"learning_rate": 1.960860686052235e-07,
|
|
"loss": 0.0913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05467091500759125,
|
|
"step": 2010,
|
|
"valid_targets_mean": 5120.4,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 6.739130434782608,
|
|
"grad_norm": 0.27709232779933785,
|
|
"learning_rate": 1.7347003983947618e-07,
|
|
"loss": 0.0973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.015401815064251423,
|
|
"step": 2015,
|
|
"valid_targets_mean": 717.9,
|
|
"valid_targets_min": 153
|
|
},
|
|
{
|
|
"epoch": 6.7558528428093645,
|
|
"grad_norm": 0.28067633113664575,
|
|
"learning_rate": 1.5223370675907512e-07,
|
|
"loss": 0.052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05179910361766815,
|
|
"step": 2020,
|
|
"valid_targets_mean": 5254.4,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 6.7725752508361206,
|
|
"grad_norm": 0.21202211005636343,
|
|
"learning_rate": 1.3237854716558805e-07,
|
|
"loss": 0.0928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.013374594040215015,
|
|
"step": 2025,
|
|
"valid_targets_mean": 689.2,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 6.789297658862877,
|
|
"grad_norm": 0.3264472072680639,
|
|
"learning_rate": 1.139059427469924e-07,
|
|
"loss": 0.0573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0691215768456459,
|
|
"step": 2030,
|
|
"valid_targets_mean": 4931.9,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 6.806020066889632,
|
|
"grad_norm": 0.4412563630604212,
|
|
"learning_rate": 9.681717898151643e-08,
|
|
"loss": 0.1359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07211461663246155,
|
|
"step": 2035,
|
|
"valid_targets_mean": 4745.1,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 6.822742474916388,
|
|
"grad_norm": 0.4366569231412587,
|
|
"learning_rate": 8.111344504818652e-08,
|
|
"loss": 0.0524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08174751698970795,
|
|
"step": 2040,
|
|
"valid_targets_mean": 4869.9,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 6.839464882943144,
|
|
"grad_norm": 0.4366167223122456,
|
|
"learning_rate": 6.679583374407994e-08,
|
|
"loss": 0.1513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07840166240930557,
|
|
"step": 2045,
|
|
"valid_targets_mean": 4980.0,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 6.8561872909699,
|
|
"grad_norm": 0.3649814021288434,
|
|
"learning_rate": 5.386534140827016e-08,
|
|
"loss": 0.0666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05510362610220909,
|
|
"step": 2050,
|
|
"valid_targets_mean": 2875.1,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 6.872909698996655,
|
|
"grad_norm": 0.36466189168717694,
|
|
"learning_rate": 4.2322867852504504e-08,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06946464627981186,
|
|
"step": 2055,
|
|
"valid_targets_mean": 4406.9,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 6.889632107023411,
|
|
"grad_norm": 0.28264506176242976,
|
|
"learning_rate": 3.216921629857428e-08,
|
|
"loss": 0.0406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.009398256428539753,
|
|
"step": 2060,
|
|
"valid_targets_mean": 731.0,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 6.906354515050167,
|
|
"grad_norm": 0.3636763763778237,
|
|
"learning_rate": 2.3405093322428352e-08,
|
|
"loss": 0.1323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0698225274682045,
|
|
"step": 2065,
|
|
"valid_targets_mean": 5006.6,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 6.923076923076923,
|
|
"grad_norm": 0.26979662455584824,
|
|
"learning_rate": 1.60311088050058e-08,
|
|
"loss": 0.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.009567172266542912,
|
|
"step": 2070,
|
|
"valid_targets_mean": 746.1,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 6.9397993311036785,
|
|
"grad_norm": 0.4024366198247134,
|
|
"learning_rate": 1.0047775889785449e-08,
|
|
"loss": 0.1263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07361600548028946,
|
|
"step": 2075,
|
|
"valid_targets_mean": 4372.3,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 6.956521739130435,
|
|
"grad_norm": 0.25377009064197936,
|
|
"learning_rate": 5.455510947089959e-09,
|
|
"loss": 0.0995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.012170721776783466,
|
|
"step": 2080,
|
|
"valid_targets_mean": 684.0,
|
|
"valid_targets_min": 158
|
|
},
|
|
{
|
|
"epoch": 6.973244147157191,
|
|
"grad_norm": 0.4334822839491044,
|
|
"learning_rate": 2.254633545097917e-09,
|
|
"loss": 0.1031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06972566246986389,
|
|
"step": 2085,
|
|
"valid_targets_mean": 4803.8,
|
|
"valid_targets_min": 395
|
|
},
|
|
{
|
|
"epoch": 6.989966555183947,
|
|
"grad_norm": 0.2520773089738265,
|
|
"learning_rate": 4.453664276171665e-10,
|
|
"loss": 0.1211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.012810945510864258,
|
|
"step": 2090,
|
|
"valid_targets_mean": 638.7,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01995828002691269,
|
|
"step": 2093,
|
|
"total_flos": 6.2847112177181e+18,
|
|
"train_loss": 0.030045701052888524,
|
|
"train_runtime": 9894.0562,
|
|
"train_samples_per_second": 20.297,
|
|
"train_steps_per_second": 0.212,
|
|
"valid_targets_mean": 2201.4,
|
|
"valid_targets_min": 325
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 2093,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 750,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 6.2847112177181e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|