9695 lines
269 KiB
JSON
9695 lines
269 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4389,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.007974481658692184,
|
|
"grad_norm": 15.127922447384513,
|
|
"learning_rate": 3.644646924829157e-07,
|
|
"loss": 0.8496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8034714460372925,
|
|
"step": 5,
|
|
"valid_targets_mean": 3464.3,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 0.01594896331738437,
|
|
"grad_norm": 19.69733636281772,
|
|
"learning_rate": 8.200455580865605e-07,
|
|
"loss": 0.9079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9585621953010559,
|
|
"step": 10,
|
|
"valid_targets_mean": 3331.6,
|
|
"valid_targets_min": 1734
|
|
},
|
|
{
|
|
"epoch": 0.023923444976076555,
|
|
"grad_norm": 14.431941657096576,
|
|
"learning_rate": 1.2756264236902052e-06,
|
|
"loss": 0.9144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8896681070327759,
|
|
"step": 15,
|
|
"valid_targets_mean": 3510.8,
|
|
"valid_targets_min": 1846
|
|
},
|
|
{
|
|
"epoch": 0.03189792663476874,
|
|
"grad_norm": 13.334887478820832,
|
|
"learning_rate": 1.7312072892938498e-06,
|
|
"loss": 0.827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.796250581741333,
|
|
"step": 20,
|
|
"valid_targets_mean": 2784.3,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 0.03987240829346093,
|
|
"grad_norm": 9.692569675376305,
|
|
"learning_rate": 2.1867881548974945e-06,
|
|
"loss": 0.8502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8489354848861694,
|
|
"step": 25,
|
|
"valid_targets_mean": 2541.0,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 0.04784688995215311,
|
|
"grad_norm": 4.674896503436323,
|
|
"learning_rate": 2.642369020501139e-06,
|
|
"loss": 0.7904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7984523773193359,
|
|
"step": 30,
|
|
"valid_targets_mean": 3130.2,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 0.05582137161084529,
|
|
"grad_norm": 2.865802787966796,
|
|
"learning_rate": 3.0979498861047843e-06,
|
|
"loss": 0.7596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6358535289764404,
|
|
"step": 35,
|
|
"valid_targets_mean": 3559.8,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 0.06379585326953748,
|
|
"grad_norm": 2.1378353777481056,
|
|
"learning_rate": 3.5535307517084285e-06,
|
|
"loss": 0.7025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6606279611587524,
|
|
"step": 40,
|
|
"valid_targets_mean": 3011.1,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 0.07177033492822966,
|
|
"grad_norm": 1.7608165254529249,
|
|
"learning_rate": 4.009111617312073e-06,
|
|
"loss": 0.6654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6862376928329468,
|
|
"step": 45,
|
|
"valid_targets_mean": 2800.4,
|
|
"valid_targets_min": 913
|
|
},
|
|
{
|
|
"epoch": 0.07974481658692185,
|
|
"grad_norm": 1.2921596417750083,
|
|
"learning_rate": 4.464692482915718e-06,
|
|
"loss": 0.6513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6210326552391052,
|
|
"step": 50,
|
|
"valid_targets_mean": 3970.8,
|
|
"valid_targets_min": 1514
|
|
},
|
|
{
|
|
"epoch": 0.08771929824561403,
|
|
"grad_norm": 1.196200020012732,
|
|
"learning_rate": 4.920273348519363e-06,
|
|
"loss": 0.6339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5954710245132446,
|
|
"step": 55,
|
|
"valid_targets_mean": 2827.4,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 0.09569377990430622,
|
|
"grad_norm": 0.9890870548908984,
|
|
"learning_rate": 5.375854214123008e-06,
|
|
"loss": 0.5964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.56838059425354,
|
|
"step": 60,
|
|
"valid_targets_mean": 2744.9,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 0.10366826156299841,
|
|
"grad_norm": 0.9988611669224601,
|
|
"learning_rate": 5.831435079726651e-06,
|
|
"loss": 0.634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7371460199356079,
|
|
"step": 65,
|
|
"valid_targets_mean": 3436.1,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 0.11164274322169059,
|
|
"grad_norm": 0.8876613445056799,
|
|
"learning_rate": 6.287015945330297e-06,
|
|
"loss": 0.596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6225056648254395,
|
|
"step": 70,
|
|
"valid_targets_mean": 3361.5,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 0.11961722488038277,
|
|
"grad_norm": 1.2216199185383356,
|
|
"learning_rate": 6.742596810933942e-06,
|
|
"loss": 0.6353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6561897397041321,
|
|
"step": 75,
|
|
"valid_targets_mean": 3629.6,
|
|
"valid_targets_min": 1617
|
|
},
|
|
{
|
|
"epoch": 0.12759170653907495,
|
|
"grad_norm": 0.7437445344074957,
|
|
"learning_rate": 7.1981776765375854e-06,
|
|
"loss": 0.6016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5876457095146179,
|
|
"step": 80,
|
|
"valid_targets_mean": 3879.9,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 0.13556618819776714,
|
|
"grad_norm": 0.738513705168467,
|
|
"learning_rate": 7.65375854214123e-06,
|
|
"loss": 0.6265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.702168345451355,
|
|
"step": 85,
|
|
"valid_targets_mean": 4440.8,
|
|
"valid_targets_min": 1154
|
|
},
|
|
{
|
|
"epoch": 0.14354066985645933,
|
|
"grad_norm": 0.8652779608375268,
|
|
"learning_rate": 8.109339407744875e-06,
|
|
"loss": 0.5918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.53765869140625,
|
|
"step": 90,
|
|
"valid_targets_mean": 2699.6,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 0.15151515151515152,
|
|
"grad_norm": 0.7248877271796724,
|
|
"learning_rate": 8.564920273348521e-06,
|
|
"loss": 0.5826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5529447197914124,
|
|
"step": 95,
|
|
"valid_targets_mean": 3513.9,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 0.1594896331738437,
|
|
"grad_norm": 0.7720771528092698,
|
|
"learning_rate": 9.020501138952164e-06,
|
|
"loss": 0.5835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5463300347328186,
|
|
"step": 100,
|
|
"valid_targets_mean": 3194.2,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 0.1674641148325359,
|
|
"grad_norm": 0.7109031182973444,
|
|
"learning_rate": 9.47608200455581e-06,
|
|
"loss": 0.5573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5521876811981201,
|
|
"step": 105,
|
|
"valid_targets_mean": 3626.8,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 0.17543859649122806,
|
|
"grad_norm": 0.8165240717723814,
|
|
"learning_rate": 9.931662870159453e-06,
|
|
"loss": 0.5567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5793558359146118,
|
|
"step": 110,
|
|
"valid_targets_mean": 2847.6,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 0.18341307814992025,
|
|
"grad_norm": 0.7788709279878354,
|
|
"learning_rate": 1.03872437357631e-05,
|
|
"loss": 0.5526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5513651371002197,
|
|
"step": 115,
|
|
"valid_targets_mean": 2899.9,
|
|
"valid_targets_min": 1015
|
|
},
|
|
{
|
|
"epoch": 0.19138755980861244,
|
|
"grad_norm": 0.7186633508530215,
|
|
"learning_rate": 1.0842824601366744e-05,
|
|
"loss": 0.5376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.536964476108551,
|
|
"step": 120,
|
|
"valid_targets_mean": 3179.4,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 0.19936204146730463,
|
|
"grad_norm": 0.7618409228447418,
|
|
"learning_rate": 1.1298405466970387e-05,
|
|
"loss": 0.5654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5276836156845093,
|
|
"step": 125,
|
|
"valid_targets_mean": 3019.7,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 0.20733652312599682,
|
|
"grad_norm": 0.7548292779417054,
|
|
"learning_rate": 1.1753986332574032e-05,
|
|
"loss": 0.5545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5917087197303772,
|
|
"step": 130,
|
|
"valid_targets_mean": 3047.1,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 0.215311004784689,
|
|
"grad_norm": 0.645242667197478,
|
|
"learning_rate": 1.2209567198177677e-05,
|
|
"loss": 0.5203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5084380507469177,
|
|
"step": 135,
|
|
"valid_targets_mean": 4194.2,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 0.22328548644338117,
|
|
"grad_norm": 0.7310092156703836,
|
|
"learning_rate": 1.2665148063781323e-05,
|
|
"loss": 0.5414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5333206653594971,
|
|
"step": 140,
|
|
"valid_targets_mean": 3039.2,
|
|
"valid_targets_min": 1089
|
|
},
|
|
{
|
|
"epoch": 0.23125996810207336,
|
|
"grad_norm": 0.8906109699349473,
|
|
"learning_rate": 1.3120728929384968e-05,
|
|
"loss": 0.5058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4827396869659424,
|
|
"step": 145,
|
|
"valid_targets_mean": 2157.5,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 0.23923444976076555,
|
|
"grad_norm": 0.878460548312033,
|
|
"learning_rate": 1.357630979498861e-05,
|
|
"loss": 0.5875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5706103444099426,
|
|
"step": 150,
|
|
"valid_targets_mean": 3064.0,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 0.24720893141945774,
|
|
"grad_norm": 0.6489549915146423,
|
|
"learning_rate": 1.4031890660592255e-05,
|
|
"loss": 0.4942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4534875452518463,
|
|
"step": 155,
|
|
"valid_targets_mean": 3274.9,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 0.2551834130781499,
|
|
"grad_norm": 0.7303299756814815,
|
|
"learning_rate": 1.4487471526195902e-05,
|
|
"loss": 0.5109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5248881578445435,
|
|
"step": 160,
|
|
"valid_targets_mean": 3270.2,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 0.2631578947368421,
|
|
"grad_norm": 0.8298683265620993,
|
|
"learning_rate": 1.4943052391799546e-05,
|
|
"loss": 0.5226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5034124851226807,
|
|
"step": 165,
|
|
"valid_targets_mean": 2528.7,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 0.2711323763955343,
|
|
"grad_norm": 0.7598177413717587,
|
|
"learning_rate": 1.539863325740319e-05,
|
|
"loss": 0.5666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5285817384719849,
|
|
"step": 170,
|
|
"valid_targets_mean": 3129.4,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 0.27910685805422647,
|
|
"grad_norm": 0.6921208171982842,
|
|
"learning_rate": 1.5854214123006836e-05,
|
|
"loss": 0.519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5039660930633545,
|
|
"step": 175,
|
|
"valid_targets_mean": 3425.8,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 0.28708133971291866,
|
|
"grad_norm": 0.7436673901842339,
|
|
"learning_rate": 1.630979498861048e-05,
|
|
"loss": 0.5171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5366975665092468,
|
|
"step": 180,
|
|
"valid_targets_mean": 3364.8,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 0.29505582137161085,
|
|
"grad_norm": 0.7732547566563825,
|
|
"learning_rate": 1.6765375854214125e-05,
|
|
"loss": 0.5292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4973922669887543,
|
|
"step": 185,
|
|
"valid_targets_mean": 2913.9,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 0.30303030303030304,
|
|
"grad_norm": 0.6866592156949076,
|
|
"learning_rate": 1.722095671981777e-05,
|
|
"loss": 0.498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.472371369600296,
|
|
"step": 190,
|
|
"valid_targets_mean": 3674.4,
|
|
"valid_targets_min": 1725
|
|
},
|
|
{
|
|
"epoch": 0.31100478468899523,
|
|
"grad_norm": 0.8739771965156021,
|
|
"learning_rate": 1.7676537585421415e-05,
|
|
"loss": 0.5472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5557119846343994,
|
|
"step": 195,
|
|
"valid_targets_mean": 3430.9,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 0.3189792663476874,
|
|
"grad_norm": 0.7437974196095252,
|
|
"learning_rate": 1.813211845102506e-05,
|
|
"loss": 0.5162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5431371331214905,
|
|
"step": 200,
|
|
"valid_targets_mean": 3698.8,
|
|
"valid_targets_min": 2127
|
|
},
|
|
{
|
|
"epoch": 0.3269537480063796,
|
|
"grad_norm": 1.1635344671006835,
|
|
"learning_rate": 1.8587699316628704e-05,
|
|
"loss": 0.5325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5302642583847046,
|
|
"step": 205,
|
|
"valid_targets_mean": 3076.0,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 0.3349282296650718,
|
|
"grad_norm": 0.6020654471372455,
|
|
"learning_rate": 1.904328018223235e-05,
|
|
"loss": 0.4567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4613984525203705,
|
|
"step": 210,
|
|
"valid_targets_mean": 4328.1,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 0.34290271132376393,
|
|
"grad_norm": 0.8502520460002825,
|
|
"learning_rate": 1.9498861047835993e-05,
|
|
"loss": 0.4723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4569597840309143,
|
|
"step": 215,
|
|
"valid_targets_mean": 2609.9,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 0.3508771929824561,
|
|
"grad_norm": 0.7363086654666873,
|
|
"learning_rate": 1.9954441913439638e-05,
|
|
"loss": 0.5175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4768849313259125,
|
|
"step": 220,
|
|
"valid_targets_mean": 3625.3,
|
|
"valid_targets_min": 1377
|
|
},
|
|
{
|
|
"epoch": 0.3588516746411483,
|
|
"grad_norm": 0.7294115930930211,
|
|
"learning_rate": 2.0410022779043283e-05,
|
|
"loss": 0.4615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44677841663360596,
|
|
"step": 225,
|
|
"valid_targets_mean": 2950.6,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 0.3668261562998405,
|
|
"grad_norm": 0.7625867958551134,
|
|
"learning_rate": 2.0865603644646927e-05,
|
|
"loss": 0.5161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5774617195129395,
|
|
"step": 230,
|
|
"valid_targets_mean": 3574.1,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 0.3748006379585327,
|
|
"grad_norm": 0.8577344255267311,
|
|
"learning_rate": 2.1321184510250572e-05,
|
|
"loss": 0.5442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5697407722473145,
|
|
"step": 235,
|
|
"valid_targets_mean": 4210.1,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 0.3827751196172249,
|
|
"grad_norm": 0.6872145622727927,
|
|
"learning_rate": 2.1776765375854217e-05,
|
|
"loss": 0.4759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46876877546310425,
|
|
"step": 240,
|
|
"valid_targets_mean": 3643.3,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 0.39074960127591707,
|
|
"grad_norm": 0.690766988387977,
|
|
"learning_rate": 2.223234624145786e-05,
|
|
"loss": 0.489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4921073019504547,
|
|
"step": 245,
|
|
"valid_targets_mean": 3560.4,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 0.39872408293460926,
|
|
"grad_norm": 0.8826544208323784,
|
|
"learning_rate": 2.2687927107061506e-05,
|
|
"loss": 0.4549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4656578600406647,
|
|
"step": 250,
|
|
"valid_targets_mean": 3101.1,
|
|
"valid_targets_min": 1378
|
|
},
|
|
{
|
|
"epoch": 0.40669856459330145,
|
|
"grad_norm": 0.7846687411256017,
|
|
"learning_rate": 2.314350797266515e-05,
|
|
"loss": 0.4863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4907362759113312,
|
|
"step": 255,
|
|
"valid_targets_mean": 3228.5,
|
|
"valid_targets_min": 1289
|
|
},
|
|
{
|
|
"epoch": 0.41467304625199364,
|
|
"grad_norm": 0.7670642162448978,
|
|
"learning_rate": 2.3599088838268792e-05,
|
|
"loss": 0.4733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48784101009368896,
|
|
"step": 260,
|
|
"valid_targets_mean": 3500.4,
|
|
"valid_targets_min": 1065
|
|
},
|
|
{
|
|
"epoch": 0.4226475279106858,
|
|
"grad_norm": 0.6225554238523089,
|
|
"learning_rate": 2.4054669703872436e-05,
|
|
"loss": 0.4858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49528658390045166,
|
|
"step": 265,
|
|
"valid_targets_mean": 4491.2,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 0.430622009569378,
|
|
"grad_norm": 0.7629950738750323,
|
|
"learning_rate": 2.4510250569476085e-05,
|
|
"loss": 0.5013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48723483085632324,
|
|
"step": 270,
|
|
"valid_targets_mean": 3000.6,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 0.43859649122807015,
|
|
"grad_norm": 0.8972591664356634,
|
|
"learning_rate": 2.496583143507973e-05,
|
|
"loss": 0.468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5626935958862305,
|
|
"step": 275,
|
|
"valid_targets_mean": 2843.7,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 0.44657097288676234,
|
|
"grad_norm": 0.7786772912165658,
|
|
"learning_rate": 2.5421412300683374e-05,
|
|
"loss": 0.5081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5341470241546631,
|
|
"step": 280,
|
|
"valid_targets_mean": 3316.4,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 0.45454545454545453,
|
|
"grad_norm": 0.75518168872564,
|
|
"learning_rate": 2.587699316628702e-05,
|
|
"loss": 0.4666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5116881132125854,
|
|
"step": 285,
|
|
"valid_targets_mean": 3286.8,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 0.4625199362041467,
|
|
"grad_norm": 0.8078369406824649,
|
|
"learning_rate": 2.6332574031890663e-05,
|
|
"loss": 0.5157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5708426237106323,
|
|
"step": 290,
|
|
"valid_targets_mean": 2961.6,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 0.4704944178628389,
|
|
"grad_norm": 0.8246121711278322,
|
|
"learning_rate": 2.6788154897494308e-05,
|
|
"loss": 0.4924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5241743326187134,
|
|
"step": 295,
|
|
"valid_targets_mean": 2755.7,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 0.4784688995215311,
|
|
"grad_norm": 0.8180474083390543,
|
|
"learning_rate": 2.7243735763097953e-05,
|
|
"loss": 0.5255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5246994495391846,
|
|
"step": 300,
|
|
"valid_targets_mean": 2879.9,
|
|
"valid_targets_min": 1055
|
|
},
|
|
{
|
|
"epoch": 0.4864433811802233,
|
|
"grad_norm": 0.7378505225677134,
|
|
"learning_rate": 2.7699316628701597e-05,
|
|
"loss": 0.4932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4941822290420532,
|
|
"step": 305,
|
|
"valid_targets_mean": 3403.6,
|
|
"valid_targets_min": 1986
|
|
},
|
|
{
|
|
"epoch": 0.4944178628389155,
|
|
"grad_norm": 0.7531487512729909,
|
|
"learning_rate": 2.815489749430524e-05,
|
|
"loss": 0.4933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4404856860637665,
|
|
"step": 310,
|
|
"valid_targets_mean": 3416.0,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 0.5023923444976076,
|
|
"grad_norm": 0.7745720365544442,
|
|
"learning_rate": 2.8610478359908883e-05,
|
|
"loss": 0.4695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.50621098279953,
|
|
"step": 315,
|
|
"valid_targets_mean": 2967.9,
|
|
"valid_targets_min": 1485
|
|
},
|
|
{
|
|
"epoch": 0.5103668261562998,
|
|
"grad_norm": 0.6650247958388963,
|
|
"learning_rate": 2.906605922551253e-05,
|
|
"loss": 0.4629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42790138721466064,
|
|
"step": 320,
|
|
"valid_targets_mean": 3460.0,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 0.518341307814992,
|
|
"grad_norm": 0.912794392682691,
|
|
"learning_rate": 2.9521640091116176e-05,
|
|
"loss": 0.4938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5048559904098511,
|
|
"step": 325,
|
|
"valid_targets_mean": 3103.2,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 0.5263157894736842,
|
|
"grad_norm": 0.8339178445435564,
|
|
"learning_rate": 2.997722095671982e-05,
|
|
"loss": 0.4908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5622161030769348,
|
|
"step": 330,
|
|
"valid_targets_mean": 2952.8,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 0.5342902711323764,
|
|
"grad_norm": 0.7293959717107233,
|
|
"learning_rate": 3.0432801822323465e-05,
|
|
"loss": 0.4815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4653390944004059,
|
|
"step": 335,
|
|
"valid_targets_mean": 3460.2,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 0.5422647527910686,
|
|
"grad_norm": 0.8213157388903748,
|
|
"learning_rate": 3.088838268792711e-05,
|
|
"loss": 0.4822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5032646059989929,
|
|
"step": 340,
|
|
"valid_targets_mean": 3149.8,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 0.5502392344497608,
|
|
"grad_norm": 0.6934731912913757,
|
|
"learning_rate": 3.1343963553530755e-05,
|
|
"loss": 0.4948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5283869504928589,
|
|
"step": 345,
|
|
"valid_targets_mean": 3777.6,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 0.5582137161084529,
|
|
"grad_norm": 0.8092216001786762,
|
|
"learning_rate": 3.17995444191344e-05,
|
|
"loss": 0.467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4412536025047302,
|
|
"step": 350,
|
|
"valid_targets_mean": 2992.0,
|
|
"valid_targets_min": 1428
|
|
},
|
|
{
|
|
"epoch": 0.5661881977671451,
|
|
"grad_norm": 0.6403582411150247,
|
|
"learning_rate": 3.2255125284738044e-05,
|
|
"loss": 0.4611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49964314699172974,
|
|
"step": 355,
|
|
"valid_targets_mean": 4235.7,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 0.5741626794258373,
|
|
"grad_norm": 0.7706254424279698,
|
|
"learning_rate": 3.271070615034169e-05,
|
|
"loss": 0.4659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.482266902923584,
|
|
"step": 360,
|
|
"valid_targets_mean": 3361.9,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 0.5821371610845295,
|
|
"grad_norm": 1.0817990690164243,
|
|
"learning_rate": 3.316628701594533e-05,
|
|
"loss": 0.4599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47092992067337036,
|
|
"step": 365,
|
|
"valid_targets_mean": 3200.4,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 0.5901116427432217,
|
|
"grad_norm": 0.6808641417190957,
|
|
"learning_rate": 3.362186788154898e-05,
|
|
"loss": 0.4792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4793427884578705,
|
|
"step": 370,
|
|
"valid_targets_mean": 3397.7,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 0.5980861244019139,
|
|
"grad_norm": 0.7380006132214376,
|
|
"learning_rate": 3.407744874715262e-05,
|
|
"loss": 0.4732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4377143383026123,
|
|
"step": 375,
|
|
"valid_targets_mean": 3803.4,
|
|
"valid_targets_min": 1483
|
|
},
|
|
{
|
|
"epoch": 0.6060606060606061,
|
|
"grad_norm": 0.8350926523235963,
|
|
"learning_rate": 3.453302961275627e-05,
|
|
"loss": 0.4503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49169227480888367,
|
|
"step": 380,
|
|
"valid_targets_mean": 2857.6,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 0.6140350877192983,
|
|
"grad_norm": 0.7394535165923002,
|
|
"learning_rate": 3.498861047835991e-05,
|
|
"loss": 0.4658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5178069472312927,
|
|
"step": 385,
|
|
"valid_targets_mean": 3749.4,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 0.6220095693779905,
|
|
"grad_norm": 0.8655988585894333,
|
|
"learning_rate": 3.5444191343963557e-05,
|
|
"loss": 0.4595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43641331791877747,
|
|
"step": 390,
|
|
"valid_targets_mean": 3012.1,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 0.6299840510366826,
|
|
"grad_norm": 0.7220991949945159,
|
|
"learning_rate": 3.58997722095672e-05,
|
|
"loss": 0.4701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4417484700679779,
|
|
"step": 395,
|
|
"valid_targets_mean": 3444.9,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 0.6379585326953748,
|
|
"grad_norm": 0.78329388951421,
|
|
"learning_rate": 3.6355353075170846e-05,
|
|
"loss": 0.4628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4460381269454956,
|
|
"step": 400,
|
|
"valid_targets_mean": 2709.7,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 0.645933014354067,
|
|
"grad_norm": 0.665156372558353,
|
|
"learning_rate": 3.681093394077449e-05,
|
|
"loss": 0.4588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4657803773880005,
|
|
"step": 405,
|
|
"valid_targets_mean": 4157.2,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 0.6539074960127592,
|
|
"grad_norm": 0.7116868785014887,
|
|
"learning_rate": 3.7266514806378135e-05,
|
|
"loss": 0.461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41383352875709534,
|
|
"step": 410,
|
|
"valid_targets_mean": 3004.6,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 0.6618819776714514,
|
|
"grad_norm": 0.7703279183087242,
|
|
"learning_rate": 3.772209567198178e-05,
|
|
"loss": 0.4757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.505657970905304,
|
|
"step": 415,
|
|
"valid_targets_mean": 3329.6,
|
|
"valid_targets_min": 1558
|
|
},
|
|
{
|
|
"epoch": 0.6698564593301436,
|
|
"grad_norm": 0.788312020505803,
|
|
"learning_rate": 3.8177676537585425e-05,
|
|
"loss": 0.46,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4934982657432556,
|
|
"step": 420,
|
|
"valid_targets_mean": 2820.3,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 0.6778309409888357,
|
|
"grad_norm": 0.8425980827813794,
|
|
"learning_rate": 3.863325740318907e-05,
|
|
"loss": 0.4794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4820382595062256,
|
|
"step": 425,
|
|
"valid_targets_mean": 2876.2,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 0.6858054226475279,
|
|
"grad_norm": 0.770934748081358,
|
|
"learning_rate": 3.9088838268792714e-05,
|
|
"loss": 0.4944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5031993389129639,
|
|
"step": 430,
|
|
"valid_targets_mean": 4022.6,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 0.69377990430622,
|
|
"grad_norm": 0.6955481727635371,
|
|
"learning_rate": 3.954441913439636e-05,
|
|
"loss": 0.464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43387743830680847,
|
|
"step": 435,
|
|
"valid_targets_mean": 3336.1,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 0.7017543859649122,
|
|
"grad_norm": 0.7018082377308924,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.4901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44216442108154297,
|
|
"step": 440,
|
|
"valid_targets_mean": 3435.8,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 0.7097288676236044,
|
|
"grad_norm": 0.7727970681425346,
|
|
"learning_rate": 3.9999841858814384e-05,
|
|
"loss": 0.4607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.421633243560791,
|
|
"step": 445,
|
|
"valid_targets_mean": 2995.8,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 0.7177033492822966,
|
|
"grad_norm": 0.7024689370532011,
|
|
"learning_rate": 3.999936743775839e-05,
|
|
"loss": 0.4735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4753640592098236,
|
|
"step": 450,
|
|
"valid_targets_mean": 3742.2,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 0.7256778309409888,
|
|
"grad_norm": 0.7218810672300192,
|
|
"learning_rate": 3.9998576744334574e-05,
|
|
"loss": 0.4611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46897149085998535,
|
|
"step": 455,
|
|
"valid_targets_mean": 3290.3,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 0.733652312599681,
|
|
"grad_norm": 0.7677892675633117,
|
|
"learning_rate": 3.999746979104705e-05,
|
|
"loss": 0.4318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4421650767326355,
|
|
"step": 460,
|
|
"valid_targets_mean": 2847.1,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 0.7416267942583732,
|
|
"grad_norm": 0.7166772406877355,
|
|
"learning_rate": 3.999604659540131e-05,
|
|
"loss": 0.4621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3989388346672058,
|
|
"step": 465,
|
|
"valid_targets_mean": 3179.4,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 0.7496012759170654,
|
|
"grad_norm": 0.7435326987092978,
|
|
"learning_rate": 3.999430717990395e-05,
|
|
"loss": 0.4719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5204008221626282,
|
|
"step": 470,
|
|
"valid_targets_mean": 3188.2,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 0.7575757575757576,
|
|
"grad_norm": 0.755534387577385,
|
|
"learning_rate": 3.999225157206228e-05,
|
|
"loss": 0.4564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.445170521736145,
|
|
"step": 475,
|
|
"valid_targets_mean": 3302.5,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 0.7655502392344498,
|
|
"grad_norm": 0.7495457167389177,
|
|
"learning_rate": 3.998987980438393e-05,
|
|
"loss": 0.4609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49949586391448975,
|
|
"step": 480,
|
|
"valid_targets_mean": 3332.2,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 0.773524720893142,
|
|
"grad_norm": 0.754094408022391,
|
|
"learning_rate": 3.9987191914376306e-05,
|
|
"loss": 0.4879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47942936420440674,
|
|
"step": 485,
|
|
"valid_targets_mean": 3432.8,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 0.7814992025518341,
|
|
"grad_norm": 0.668414279105942,
|
|
"learning_rate": 3.998418794454604e-05,
|
|
"loss": 0.4653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4753269553184509,
|
|
"step": 490,
|
|
"valid_targets_mean": 3745.1,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 0.7894736842105263,
|
|
"grad_norm": 0.7025487477786932,
|
|
"learning_rate": 3.998086794239825e-05,
|
|
"loss": 0.4421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47894811630249023,
|
|
"step": 495,
|
|
"valid_targets_mean": 3535.8,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 0.7974481658692185,
|
|
"grad_norm": 0.6496308484564586,
|
|
"learning_rate": 3.997723196043585e-05,
|
|
"loss": 0.4548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47890549898147583,
|
|
"step": 500,
|
|
"valid_targets_mean": 4449.6,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 0.8054226475279107,
|
|
"grad_norm": 0.6650659619437272,
|
|
"learning_rate": 3.9973280056158695e-05,
|
|
"loss": 0.4606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4356272220611572,
|
|
"step": 505,
|
|
"valid_targets_mean": 3514.4,
|
|
"valid_targets_min": 1610
|
|
},
|
|
{
|
|
"epoch": 0.8133971291866029,
|
|
"grad_norm": 0.7073435846815495,
|
|
"learning_rate": 3.9969012292062655e-05,
|
|
"loss": 0.4391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4547845721244812,
|
|
"step": 510,
|
|
"valid_targets_mean": 3459.7,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 0.8213716108452951,
|
|
"grad_norm": 0.7566989621215652,
|
|
"learning_rate": 3.996442873563866e-05,
|
|
"loss": 0.4558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5277615785598755,
|
|
"step": 515,
|
|
"valid_targets_mean": 3295.5,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 0.8293460925039873,
|
|
"grad_norm": 0.666918299742626,
|
|
"learning_rate": 3.9959529459371624e-05,
|
|
"loss": 0.474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41587015986442566,
|
|
"step": 520,
|
|
"valid_targets_mean": 4048.8,
|
|
"valid_targets_min": 2234
|
|
},
|
|
{
|
|
"epoch": 0.8373205741626795,
|
|
"grad_norm": 0.7436322093062475,
|
|
"learning_rate": 3.9954314540739284e-05,
|
|
"loss": 0.4724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4899299144744873,
|
|
"step": 525,
|
|
"valid_targets_mean": 2825.4,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 0.8452950558213717,
|
|
"grad_norm": 0.6377246792826012,
|
|
"learning_rate": 3.994878406221097e-05,
|
|
"loss": 0.4422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4290744662284851,
|
|
"step": 530,
|
|
"valid_targets_mean": 3352.1,
|
|
"valid_targets_min": 1580
|
|
},
|
|
{
|
|
"epoch": 0.8532695374800638,
|
|
"grad_norm": 0.6902804734854149,
|
|
"learning_rate": 3.994293811124632e-05,
|
|
"loss": 0.4626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43337637186050415,
|
|
"step": 535,
|
|
"valid_targets_mean": 3419.5,
|
|
"valid_targets_min": 1375
|
|
},
|
|
{
|
|
"epoch": 0.861244019138756,
|
|
"grad_norm": 0.6040023334686732,
|
|
"learning_rate": 3.993677678029392e-05,
|
|
"loss": 0.4576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41659659147262573,
|
|
"step": 540,
|
|
"valid_targets_mean": 4390.6,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 0.8692185007974481,
|
|
"grad_norm": 0.6691584214509636,
|
|
"learning_rate": 3.9930300166789765e-05,
|
|
"loss": 0.4817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47513049840927124,
|
|
"step": 545,
|
|
"valid_targets_mean": 3866.2,
|
|
"valid_targets_min": 1213
|
|
},
|
|
{
|
|
"epoch": 0.8771929824561403,
|
|
"grad_norm": 0.9143543238079537,
|
|
"learning_rate": 3.992350837315581e-05,
|
|
"loss": 0.478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44165804982185364,
|
|
"step": 550,
|
|
"valid_targets_mean": 3577.1,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 0.8851674641148325,
|
|
"grad_norm": 0.865903583686957,
|
|
"learning_rate": 3.991640150679826e-05,
|
|
"loss": 0.4678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47674328088760376,
|
|
"step": 555,
|
|
"valid_targets_mean": 2551.1,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 0.8931419457735247,
|
|
"grad_norm": 0.6376038033002362,
|
|
"learning_rate": 3.990897968010596e-05,
|
|
"loss": 0.4384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4302416145801544,
|
|
"step": 560,
|
|
"valid_targets_mean": 4115.6,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 0.9011164274322169,
|
|
"grad_norm": 0.6922244219522204,
|
|
"learning_rate": 3.990124301044855e-05,
|
|
"loss": 0.4249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41978585720062256,
|
|
"step": 565,
|
|
"valid_targets_mean": 3209.1,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 0.9090909090909091,
|
|
"grad_norm": 0.7756822045017229,
|
|
"learning_rate": 3.989319162017465e-05,
|
|
"loss": 0.4415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4361206889152527,
|
|
"step": 570,
|
|
"valid_targets_mean": 2626.6,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 0.9170653907496013,
|
|
"grad_norm": 0.6737852778904551,
|
|
"learning_rate": 3.988482563660989e-05,
|
|
"loss": 0.4486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42350614070892334,
|
|
"step": 575,
|
|
"valid_targets_mean": 3167.2,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 0.9250398724082934,
|
|
"grad_norm": 0.6652706112241898,
|
|
"learning_rate": 3.987614519205493e-05,
|
|
"loss": 0.4373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43774667382240295,
|
|
"step": 580,
|
|
"valid_targets_mean": 3633.7,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 0.9330143540669856,
|
|
"grad_norm": 0.7054337748600209,
|
|
"learning_rate": 3.986715042378334e-05,
|
|
"loss": 0.4799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47987425327301025,
|
|
"step": 585,
|
|
"valid_targets_mean": 3179.0,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 0.9409888357256778,
|
|
"grad_norm": 0.8689490785153259,
|
|
"learning_rate": 3.985784147403947e-05,
|
|
"loss": 0.4485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4705367088317871,
|
|
"step": 590,
|
|
"valid_targets_mean": 3018.4,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 0.94896331738437,
|
|
"grad_norm": 0.572538557598008,
|
|
"learning_rate": 3.9848218490036144e-05,
|
|
"loss": 0.4469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43098652362823486,
|
|
"step": 595,
|
|
"valid_targets_mean": 4543.1,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 0.9569377990430622,
|
|
"grad_norm": 0.7410804929359192,
|
|
"learning_rate": 3.983828162395238e-05,
|
|
"loss": 0.464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44189587235450745,
|
|
"step": 600,
|
|
"valid_targets_mean": 3793.6,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 0.9649122807017544,
|
|
"grad_norm": 0.731695114070416,
|
|
"learning_rate": 3.9828031032930944e-05,
|
|
"loss": 0.4523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43939492106437683,
|
|
"step": 605,
|
|
"valid_targets_mean": 3041.2,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 0.9728867623604466,
|
|
"grad_norm": 0.7752732207678218,
|
|
"learning_rate": 3.98174668790759e-05,
|
|
"loss": 0.4484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4257933497428894,
|
|
"step": 610,
|
|
"valid_targets_mean": 2972.4,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 0.9808612440191388,
|
|
"grad_norm": 0.6620071649114668,
|
|
"learning_rate": 3.9806589329450045e-05,
|
|
"loss": 0.4275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39830583333969116,
|
|
"step": 615,
|
|
"valid_targets_mean": 3291.2,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 0.988835725677831,
|
|
"grad_norm": 0.7407661135345047,
|
|
"learning_rate": 3.979539855607222e-05,
|
|
"loss": 0.4752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43133264780044556,
|
|
"step": 620,
|
|
"valid_targets_mean": 3201.2,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 0.9968102073365231,
|
|
"grad_norm": 0.771869161730962,
|
|
"learning_rate": 3.9783894735914646e-05,
|
|
"loss": 0.4432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47225266695022583,
|
|
"step": 625,
|
|
"valid_targets_mean": 3336.6,
|
|
"valid_targets_min": 1565
|
|
},
|
|
{
|
|
"epoch": 1.0047846889952152,
|
|
"grad_norm": 0.7401539639465231,
|
|
"learning_rate": 3.9772078050900105e-05,
|
|
"loss": 0.4133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38412249088287354,
|
|
"step": 630,
|
|
"valid_targets_mean": 2690.7,
|
|
"valid_targets_min": 1233
|
|
},
|
|
{
|
|
"epoch": 1.0127591706539074,
|
|
"grad_norm": 0.7078952793203476,
|
|
"learning_rate": 3.9759948687899055e-05,
|
|
"loss": 0.4235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46491843461990356,
|
|
"step": 635,
|
|
"valid_targets_mean": 3437.4,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 1.0207336523125996,
|
|
"grad_norm": 0.7333208771121233,
|
|
"learning_rate": 3.974750683872667e-05,
|
|
"loss": 0.4048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36504125595092773,
|
|
"step": 640,
|
|
"valid_targets_mean": 2904.5,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 1.0287081339712918,
|
|
"grad_norm": 0.723647328274376,
|
|
"learning_rate": 3.973475270013984e-05,
|
|
"loss": 0.4358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4465000033378601,
|
|
"step": 645,
|
|
"valid_targets_mean": 3352.6,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 1.036682615629984,
|
|
"grad_norm": 0.6915942791040528,
|
|
"learning_rate": 3.972168647383402e-05,
|
|
"loss": 0.4276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40669485926628113,
|
|
"step": 650,
|
|
"valid_targets_mean": 3460.3,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 1.0446570972886762,
|
|
"grad_norm": 0.71523092308001,
|
|
"learning_rate": 3.970830836644006e-05,
|
|
"loss": 0.4408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4429106116294861,
|
|
"step": 655,
|
|
"valid_targets_mean": 3286.7,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 1.0526315789473684,
|
|
"grad_norm": 0.6795045845763161,
|
|
"learning_rate": 3.9694618589520945e-05,
|
|
"loss": 0.4296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.429425984621048,
|
|
"step": 660,
|
|
"valid_targets_mean": 4042.9,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 1.0606060606060606,
|
|
"grad_norm": 0.6519507165071796,
|
|
"learning_rate": 3.9680617359568414e-05,
|
|
"loss": 0.4316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4048612713813782,
|
|
"step": 665,
|
|
"valid_targets_mean": 3689.1,
|
|
"valid_targets_min": 1458
|
|
},
|
|
{
|
|
"epoch": 1.0685805422647527,
|
|
"grad_norm": 0.7729115179731982,
|
|
"learning_rate": 3.966630489799959e-05,
|
|
"loss": 0.4283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3845750391483307,
|
|
"step": 670,
|
|
"valid_targets_mean": 2459.6,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 1.076555023923445,
|
|
"grad_norm": 0.8291097212264954,
|
|
"learning_rate": 3.9651681431153445e-05,
|
|
"loss": 0.4294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41951286792755127,
|
|
"step": 675,
|
|
"valid_targets_mean": 2610.4,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 1.0845295055821371,
|
|
"grad_norm": 0.6862021487618009,
|
|
"learning_rate": 3.96367471902872e-05,
|
|
"loss": 0.4171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4087642431259155,
|
|
"step": 680,
|
|
"valid_targets_mean": 3439.5,
|
|
"valid_targets_min": 1785
|
|
},
|
|
{
|
|
"epoch": 1.0925039872408293,
|
|
"grad_norm": 1.1618441441958833,
|
|
"learning_rate": 3.9621502411572705e-05,
|
|
"loss": 0.4131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40604037046432495,
|
|
"step": 685,
|
|
"valid_targets_mean": 2812.4,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 1.1004784688995215,
|
|
"grad_norm": 0.6385036907491932,
|
|
"learning_rate": 3.960594733609273e-05,
|
|
"loss": 0.4172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3924773931503296,
|
|
"step": 690,
|
|
"valid_targets_mean": 3588.4,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 1.1084529505582137,
|
|
"grad_norm": 0.797336121375181,
|
|
"learning_rate": 3.9590082209837054e-05,
|
|
"loss": 0.4339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4019266366958618,
|
|
"step": 695,
|
|
"valid_targets_mean": 2833.1,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 1.1164274322169059,
|
|
"grad_norm": 0.6988304823846329,
|
|
"learning_rate": 3.957390728369867e-05,
|
|
"loss": 0.4615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4863991439342499,
|
|
"step": 700,
|
|
"valid_targets_mean": 3256.9,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 1.124401913875598,
|
|
"grad_norm": 0.752522117200672,
|
|
"learning_rate": 3.955742281346979e-05,
|
|
"loss": 0.4101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42362910509109497,
|
|
"step": 705,
|
|
"valid_targets_mean": 2987.2,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 1.1323763955342903,
|
|
"grad_norm": 0.6588597940064862,
|
|
"learning_rate": 3.9540629059837767e-05,
|
|
"loss": 0.4136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38508880138397217,
|
|
"step": 710,
|
|
"valid_targets_mean": 3352.6,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 1.1403508771929824,
|
|
"grad_norm": 0.5916480838749906,
|
|
"learning_rate": 3.952352628838102e-05,
|
|
"loss": 0.4188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36647146940231323,
|
|
"step": 715,
|
|
"valid_targets_mean": 3808.9,
|
|
"valid_targets_min": 1550
|
|
},
|
|
{
|
|
"epoch": 1.1483253588516746,
|
|
"grad_norm": 0.7133118506508499,
|
|
"learning_rate": 3.95061147695648e-05,
|
|
"loss": 0.4305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4364580810070038,
|
|
"step": 720,
|
|
"valid_targets_mean": 2967.9,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 1.1562998405103668,
|
|
"grad_norm": 0.6386688623792343,
|
|
"learning_rate": 3.9488394778736935e-05,
|
|
"loss": 0.4459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46748489141464233,
|
|
"step": 725,
|
|
"valid_targets_mean": 4425.9,
|
|
"valid_targets_min": 1081
|
|
},
|
|
{
|
|
"epoch": 1.164274322169059,
|
|
"grad_norm": 0.6195200532564845,
|
|
"learning_rate": 3.947036659612345e-05,
|
|
"loss": 0.401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3520451784133911,
|
|
"step": 730,
|
|
"valid_targets_mean": 3296.6,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 1.1722488038277512,
|
|
"grad_norm": 0.685264895185126,
|
|
"learning_rate": 3.945203050682418e-05,
|
|
"loss": 0.4086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3831281065940857,
|
|
"step": 735,
|
|
"valid_targets_mean": 3001.0,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 1.1802232854864434,
|
|
"grad_norm": 0.5989944902203753,
|
|
"learning_rate": 3.94333868008082e-05,
|
|
"loss": 0.4133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40738391876220703,
|
|
"step": 740,
|
|
"valid_targets_mean": 4224.7,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 1.1881977671451356,
|
|
"grad_norm": 0.7137320217174784,
|
|
"learning_rate": 3.94144357729093e-05,
|
|
"loss": 0.4372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42201972007751465,
|
|
"step": 745,
|
|
"valid_targets_mean": 3505.2,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 1.1961722488038278,
|
|
"grad_norm": 0.6194283753521941,
|
|
"learning_rate": 3.939517772282127e-05,
|
|
"loss": 0.4405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38171854615211487,
|
|
"step": 750,
|
|
"valid_targets_mean": 4167.9,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 1.20414673046252,
|
|
"grad_norm": 0.6345896334419947,
|
|
"learning_rate": 3.93756129550932e-05,
|
|
"loss": 0.4081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4040636122226715,
|
|
"step": 755,
|
|
"valid_targets_mean": 3595.1,
|
|
"valid_targets_min": 1663
|
|
},
|
|
{
|
|
"epoch": 1.2121212121212122,
|
|
"grad_norm": 0.6360828239021951,
|
|
"learning_rate": 3.935574177912465e-05,
|
|
"loss": 0.4205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4120604991912842,
|
|
"step": 760,
|
|
"valid_targets_mean": 4046.0,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 1.2200956937799043,
|
|
"grad_norm": 0.6993877151640597,
|
|
"learning_rate": 3.9335564509160746e-05,
|
|
"loss": 0.4348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49273666739463806,
|
|
"step": 765,
|
|
"valid_targets_mean": 3515.2,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 1.2280701754385965,
|
|
"grad_norm": 0.6990528252211622,
|
|
"learning_rate": 3.931508146428724e-05,
|
|
"loss": 0.4332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43578076362609863,
|
|
"step": 770,
|
|
"valid_targets_mean": 3406.3,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 1.2360446570972887,
|
|
"grad_norm": 0.65866418500165,
|
|
"learning_rate": 3.929429296842542e-05,
|
|
"loss": 0.4498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47980165481567383,
|
|
"step": 775,
|
|
"valid_targets_mean": 3736.8,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 1.244019138755981,
|
|
"grad_norm": 0.6703613148448246,
|
|
"learning_rate": 3.927319935032703e-05,
|
|
"loss": 0.4297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3892901539802551,
|
|
"step": 780,
|
|
"valid_targets_mean": 3557.1,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 1.251993620414673,
|
|
"grad_norm": 0.5945967564368729,
|
|
"learning_rate": 3.925180094356905e-05,
|
|
"loss": 0.4284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4297284781932831,
|
|
"step": 785,
|
|
"valid_targets_mean": 3972.6,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 1.2599681020733653,
|
|
"grad_norm": 0.5955612176103448,
|
|
"learning_rate": 3.9230098086548414e-05,
|
|
"loss": 0.4144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3463818430900574,
|
|
"step": 790,
|
|
"valid_targets_mean": 3222.6,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 1.2679425837320575,
|
|
"grad_norm": 0.7628281028481103,
|
|
"learning_rate": 3.920809112247668e-05,
|
|
"loss": 0.4297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37421393394470215,
|
|
"step": 795,
|
|
"valid_targets_mean": 2711.8,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 1.2759170653907497,
|
|
"grad_norm": 0.765870039135906,
|
|
"learning_rate": 3.918578039937459e-05,
|
|
"loss": 0.4662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46083706617355347,
|
|
"step": 800,
|
|
"valid_targets_mean": 2796.8,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 1.2838915470494419,
|
|
"grad_norm": 0.6669250931322693,
|
|
"learning_rate": 3.916316627006656e-05,
|
|
"loss": 0.4083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3843752145767212,
|
|
"step": 805,
|
|
"valid_targets_mean": 2955.2,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 1.291866028708134,
|
|
"grad_norm": 0.6243416431878199,
|
|
"learning_rate": 3.914024909217511e-05,
|
|
"loss": 0.4189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39225244522094727,
|
|
"step": 810,
|
|
"valid_targets_mean": 3466.9,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 1.2998405103668262,
|
|
"grad_norm": 0.6638287346283385,
|
|
"learning_rate": 3.911702922811522e-05,
|
|
"loss": 0.4524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43244144320487976,
|
|
"step": 815,
|
|
"valid_targets_mean": 3593.4,
|
|
"valid_targets_min": 1527
|
|
},
|
|
{
|
|
"epoch": 1.3078149920255182,
|
|
"grad_norm": 0.724096091285797,
|
|
"learning_rate": 3.909350704508856e-05,
|
|
"loss": 0.4348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4547302722930908,
|
|
"step": 820,
|
|
"valid_targets_mean": 3239.9,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 1.3157894736842106,
|
|
"grad_norm": 0.6420135116987287,
|
|
"learning_rate": 3.906968291507773e-05,
|
|
"loss": 0.4287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40994346141815186,
|
|
"step": 825,
|
|
"valid_targets_mean": 3795.1,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 1.3237639553429026,
|
|
"grad_norm": 0.7028438010381166,
|
|
"learning_rate": 3.904555721484034e-05,
|
|
"loss": 0.4344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3853358030319214,
|
|
"step": 830,
|
|
"valid_targets_mean": 2687.6,
|
|
"valid_targets_min": 1573
|
|
},
|
|
{
|
|
"epoch": 1.331738437001595,
|
|
"grad_norm": 0.6453816592314697,
|
|
"learning_rate": 3.9021130325903076e-05,
|
|
"loss": 0.4451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40860795974731445,
|
|
"step": 835,
|
|
"valid_targets_mean": 3654.7,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 1.339712918660287,
|
|
"grad_norm": 0.9493972155572922,
|
|
"learning_rate": 3.899640263455566e-05,
|
|
"loss": 0.4007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43106314539909363,
|
|
"step": 840,
|
|
"valid_targets_mean": 3383.8,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 1.3476874003189794,
|
|
"grad_norm": 0.6313187590116264,
|
|
"learning_rate": 3.897137453184472e-05,
|
|
"loss": 0.4318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40859609842300415,
|
|
"step": 845,
|
|
"valid_targets_mean": 3895.8,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 1.3556618819776713,
|
|
"grad_norm": 0.6738154141622964,
|
|
"learning_rate": 3.894604641356767e-05,
|
|
"loss": 0.4617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46169257164001465,
|
|
"step": 850,
|
|
"valid_targets_mean": 3727.9,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 1.3636363636363638,
|
|
"grad_norm": 0.7623426422194148,
|
|
"learning_rate": 3.8920418680266346e-05,
|
|
"loss": 0.3987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42094945907592773,
|
|
"step": 855,
|
|
"valid_targets_mean": 2532.9,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 1.3716108452950557,
|
|
"grad_norm": 0.7424583031747671,
|
|
"learning_rate": 3.889449173722077e-05,
|
|
"loss": 0.4445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47202539443969727,
|
|
"step": 860,
|
|
"valid_targets_mean": 3271.7,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 1.3795853269537481,
|
|
"grad_norm": 0.6885879440227077,
|
|
"learning_rate": 3.8868265994442694e-05,
|
|
"loss": 0.4162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41570407152175903,
|
|
"step": 865,
|
|
"valid_targets_mean": 3455.1,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 1.38755980861244,
|
|
"grad_norm": 0.6739403810509708,
|
|
"learning_rate": 3.8841741866669126e-05,
|
|
"loss": 0.4356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48149287700653076,
|
|
"step": 870,
|
|
"valid_targets_mean": 3788.0,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 1.3955342902711323,
|
|
"grad_norm": 0.6473182655690478,
|
|
"learning_rate": 3.881491977335577e-05,
|
|
"loss": 0.4118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40731143951416016,
|
|
"step": 875,
|
|
"valid_targets_mean": 3900.2,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 1.4035087719298245,
|
|
"grad_norm": 0.6838958350692489,
|
|
"learning_rate": 3.878780013867038e-05,
|
|
"loss": 0.4002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35340070724487305,
|
|
"step": 880,
|
|
"valid_targets_mean": 3568.2,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 1.4114832535885167,
|
|
"grad_norm": 0.7695517185572368,
|
|
"learning_rate": 3.8760383391486074e-05,
|
|
"loss": 0.4419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4930613338947296,
|
|
"step": 885,
|
|
"valid_targets_mean": 2920.6,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 1.4194577352472089,
|
|
"grad_norm": 0.5791355375281413,
|
|
"learning_rate": 3.873266996537456e-05,
|
|
"loss": 0.4146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37575191259384155,
|
|
"step": 890,
|
|
"valid_targets_mean": 4208.9,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 1.427432216905901,
|
|
"grad_norm": 0.7292399275131892,
|
|
"learning_rate": 3.8704660298599225e-05,
|
|
"loss": 0.4335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39419451355934143,
|
|
"step": 895,
|
|
"valid_targets_mean": 2970.6,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 1.4354066985645932,
|
|
"grad_norm": 0.5895098519561811,
|
|
"learning_rate": 3.867635483410827e-05,
|
|
"loss": 0.4094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40455514192581177,
|
|
"step": 900,
|
|
"valid_targets_mean": 4065.5,
|
|
"valid_targets_min": 1595
|
|
},
|
|
{
|
|
"epoch": 1.4433811802232854,
|
|
"grad_norm": 0.7189396859653461,
|
|
"learning_rate": 3.864775401952767e-05,
|
|
"loss": 0.4346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4476749300956726,
|
|
"step": 905,
|
|
"valid_targets_mean": 2895.7,
|
|
"valid_targets_min": 1395
|
|
},
|
|
{
|
|
"epoch": 1.4513556618819776,
|
|
"grad_norm": 0.6366469405334749,
|
|
"learning_rate": 3.8618858307154085e-05,
|
|
"loss": 0.4361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4621413052082062,
|
|
"step": 910,
|
|
"valid_targets_mean": 4018.6,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 1.4593301435406698,
|
|
"grad_norm": 0.6802024200964512,
|
|
"learning_rate": 3.8589668153947743e-05,
|
|
"loss": 0.4185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43520981073379517,
|
|
"step": 915,
|
|
"valid_targets_mean": 3291.3,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 1.467304625199362,
|
|
"grad_norm": 0.6992909869822863,
|
|
"learning_rate": 3.8560184021525194e-05,
|
|
"loss": 0.393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39124223589897156,
|
|
"step": 920,
|
|
"valid_targets_mean": 2746.3,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 1.4752791068580542,
|
|
"grad_norm": 0.7273148053795566,
|
|
"learning_rate": 3.853040637615199e-05,
|
|
"loss": 0.4256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39543312788009644,
|
|
"step": 925,
|
|
"valid_targets_mean": 2544.5,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 1.4832535885167464,
|
|
"grad_norm": 0.7228792882922969,
|
|
"learning_rate": 3.850033568873536e-05,
|
|
"loss": 0.4147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5022583603858948,
|
|
"step": 930,
|
|
"valid_targets_mean": 3204.9,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 1.4912280701754386,
|
|
"grad_norm": 0.7236283242675637,
|
|
"learning_rate": 3.8469972434816706e-05,
|
|
"loss": 0.4042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41342538595199585,
|
|
"step": 935,
|
|
"valid_targets_mean": 3097.8,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 1.4992025518341308,
|
|
"grad_norm": 0.6367133418778236,
|
|
"learning_rate": 3.843931709456414e-05,
|
|
"loss": 0.4004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43486636877059937,
|
|
"step": 940,
|
|
"valid_targets_mean": 4237.7,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 1.507177033492823,
|
|
"grad_norm": 0.6354510711054572,
|
|
"learning_rate": 3.840837015276483e-05,
|
|
"loss": 0.4241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45768535137176514,
|
|
"step": 945,
|
|
"valid_targets_mean": 4090.1,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 1.5151515151515151,
|
|
"grad_norm": 0.7382041088566098,
|
|
"learning_rate": 3.83771320988174e-05,
|
|
"loss": 0.4053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4071834683418274,
|
|
"step": 950,
|
|
"valid_targets_mean": 2436.1,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 1.5231259968102073,
|
|
"grad_norm": 0.7063705188485128,
|
|
"learning_rate": 3.834560342672413e-05,
|
|
"loss": 0.413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41264647245407104,
|
|
"step": 955,
|
|
"valid_targets_mean": 3089.2,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 1.5311004784688995,
|
|
"grad_norm": 0.6569971909214346,
|
|
"learning_rate": 3.831378463508318e-05,
|
|
"loss": 0.4195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4057466983795166,
|
|
"step": 960,
|
|
"valid_targets_mean": 3574.0,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 1.5390749601275917,
|
|
"grad_norm": 0.6810391332756381,
|
|
"learning_rate": 3.8281676227080694e-05,
|
|
"loss": 0.3836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3792906403541565,
|
|
"step": 965,
|
|
"valid_targets_mean": 3348.1,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 1.547049441786284,
|
|
"grad_norm": 0.6353534546017063,
|
|
"learning_rate": 3.824927871048284e-05,
|
|
"loss": 0.4271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42927268147468567,
|
|
"step": 970,
|
|
"valid_targets_mean": 3880.4,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 1.555023923444976,
|
|
"grad_norm": 0.7296769363631778,
|
|
"learning_rate": 3.8216592597627797e-05,
|
|
"loss": 0.414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41197121143341064,
|
|
"step": 975,
|
|
"valid_targets_mean": 3109.5,
|
|
"valid_targets_min": 1447
|
|
},
|
|
{
|
|
"epoch": 1.5629984051036683,
|
|
"grad_norm": 0.6724477148474506,
|
|
"learning_rate": 3.818361840541761e-05,
|
|
"loss": 0.4132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.388108491897583,
|
|
"step": 980,
|
|
"valid_targets_mean": 3087.5,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 1.5709728867623605,
|
|
"grad_norm": 0.6669439859024038,
|
|
"learning_rate": 3.815035665531008e-05,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37243035435676575,
|
|
"step": 985,
|
|
"valid_targets_mean": 3532.2,
|
|
"valid_targets_min": 1275
|
|
},
|
|
{
|
|
"epoch": 1.5789473684210527,
|
|
"grad_norm": 0.6148608282276289,
|
|
"learning_rate": 3.811680787331047e-05,
|
|
"loss": 0.4286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42982906103134155,
|
|
"step": 990,
|
|
"valid_targets_mean": 3934.0,
|
|
"valid_targets_min": 1742
|
|
},
|
|
{
|
|
"epoch": 1.5869218500797448,
|
|
"grad_norm": 0.6297959985151552,
|
|
"learning_rate": 3.8082972589963175e-05,
|
|
"loss": 0.3997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4003561735153198,
|
|
"step": 995,
|
|
"valid_targets_mean": 3886.8,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 1.594896331738437,
|
|
"grad_norm": 0.6755599963523873,
|
|
"learning_rate": 3.80488513403434e-05,
|
|
"loss": 0.4167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40847086906433105,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3062.8,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 1.6028708133971292,
|
|
"grad_norm": 0.7505876659296074,
|
|
"learning_rate": 3.8014444664048616e-05,
|
|
"loss": 0.4172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41980603337287903,
|
|
"step": 1005,
|
|
"valid_targets_mean": 2926.9,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 1.6108452950558214,
|
|
"grad_norm": 0.6589637575412,
|
|
"learning_rate": 3.797975310519009e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38760387897491455,
|
|
"step": 1010,
|
|
"valid_targets_mean": 3761.8,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 1.6188197767145136,
|
|
"grad_norm": 0.7574908069191536,
|
|
"learning_rate": 3.794477721238425e-05,
|
|
"loss": 0.4272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5113911628723145,
|
|
"step": 1015,
|
|
"valid_targets_mean": 2719.2,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 1.6267942583732058,
|
|
"grad_norm": 0.6788457265269795,
|
|
"learning_rate": 3.7909517538744e-05,
|
|
"loss": 0.4217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3992574214935303,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3254.7,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 1.6347687400318978,
|
|
"grad_norm": 0.6233500327411822,
|
|
"learning_rate": 3.7873974641870006e-05,
|
|
"loss": 0.4074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3768187165260315,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3937.4,
|
|
"valid_targets_min": 1601
|
|
},
|
|
{
|
|
"epoch": 1.6427432216905902,
|
|
"grad_norm": 0.6142151321352362,
|
|
"learning_rate": 3.7838149083841856e-05,
|
|
"loss": 0.4173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39406371116638184,
|
|
"step": 1030,
|
|
"valid_targets_mean": 3506.6,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 1.6507177033492821,
|
|
"grad_norm": 0.6578894673369284,
|
|
"learning_rate": 3.7802041431209166e-05,
|
|
"loss": 0.4114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38979512453079224,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3113.4,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 1.6586921850079746,
|
|
"grad_norm": 0.6589543499720713,
|
|
"learning_rate": 3.776565225498264e-05,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4063750207424164,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3225.6,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.6677782025577473,
|
|
"learning_rate": 3.7728982130625025e-05,
|
|
"loss": 0.4015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.391085147857666,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3264.9,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 1.674641148325359,
|
|
"grad_norm": 0.6981747159611452,
|
|
"learning_rate": 3.769203163804202e-05,
|
|
"loss": 0.4252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3903113603591919,
|
|
"step": 1050,
|
|
"valid_targets_mean": 3292.9,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 1.682615629984051,
|
|
"grad_norm": 0.7315445640505674,
|
|
"learning_rate": 3.7654801361573076e-05,
|
|
"loss": 0.4261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5633702874183655,
|
|
"step": 1055,
|
|
"valid_targets_mean": 4292.4,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 1.6905901116427433,
|
|
"grad_norm": 1.819141570666334,
|
|
"learning_rate": 3.761729188998222e-05,
|
|
"loss": 0.4167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39686328172683716,
|
|
"step": 1060,
|
|
"valid_targets_mean": 2768.1,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 1.6985645933014353,
|
|
"grad_norm": 0.6335018816731464,
|
|
"learning_rate": 3.757950381644868e-05,
|
|
"loss": 0.443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36634722352027893,
|
|
"step": 1065,
|
|
"valid_targets_mean": 3580.9,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 1.7065390749601277,
|
|
"grad_norm": 0.6866678670030197,
|
|
"learning_rate": 3.7541437738557524e-05,
|
|
"loss": 0.4405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5142737627029419,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3518.6,
|
|
"valid_targets_min": 1775
|
|
},
|
|
{
|
|
"epoch": 1.7145135566188197,
|
|
"grad_norm": 0.6703668667066753,
|
|
"learning_rate": 3.750309425829022e-05,
|
|
"loss": 0.4255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3926396369934082,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3453.3,
|
|
"valid_targets_min": 1560
|
|
},
|
|
{
|
|
"epoch": 1.722488038277512,
|
|
"grad_norm": 0.6652907238295139,
|
|
"learning_rate": 3.746447398201512e-05,
|
|
"loss": 0.4251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40042924880981445,
|
|
"step": 1080,
|
|
"valid_targets_mean": 2986.4,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 1.730462519936204,
|
|
"grad_norm": 0.6669225793236236,
|
|
"learning_rate": 3.7425577520477846e-05,
|
|
"loss": 0.3977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4015754163265228,
|
|
"step": 1085,
|
|
"valid_targets_mean": 3179.0,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 1.7384370015948964,
|
|
"grad_norm": 0.6862832700588433,
|
|
"learning_rate": 3.738640548879166e-05,
|
|
"loss": 0.402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4010501503944397,
|
|
"step": 1090,
|
|
"valid_targets_mean": 3138.1,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 1.7464114832535884,
|
|
"grad_norm": 0.7227448074140783,
|
|
"learning_rate": 3.7346958506427696e-05,
|
|
"loss": 0.4269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4000108242034912,
|
|
"step": 1095,
|
|
"valid_targets_mean": 2936.7,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 1.7543859649122808,
|
|
"grad_norm": 0.6464004923750059,
|
|
"learning_rate": 3.730723719720523e-05,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41491401195526123,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3414.4,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 1.7623604465709728,
|
|
"grad_norm": 0.6090150844553947,
|
|
"learning_rate": 3.7267242189281746e-05,
|
|
"loss": 0.4286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38227415084838867,
|
|
"step": 1105,
|
|
"valid_targets_mean": 3707.8,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 1.7703349282296652,
|
|
"grad_norm": 0.6130264789325811,
|
|
"learning_rate": 3.722697411514305e-05,
|
|
"loss": 0.4006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4005066454410553,
|
|
"step": 1110,
|
|
"valid_targets_mean": 3463.8,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 1.7783094098883572,
|
|
"grad_norm": 0.6303751090768865,
|
|
"learning_rate": 3.7186433611593225e-05,
|
|
"loss": 0.4131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39449629187583923,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3947.3,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 1.7862838915470496,
|
|
"grad_norm": 0.6538336426404057,
|
|
"learning_rate": 3.7145621319744614e-05,
|
|
"loss": 0.3967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4242631196975708,
|
|
"step": 1120,
|
|
"valid_targets_mean": 4217.5,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 1.7942583732057416,
|
|
"grad_norm": 0.6908094084737153,
|
|
"learning_rate": 3.7104537885007635e-05,
|
|
"loss": 0.4082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3897255063056946,
|
|
"step": 1125,
|
|
"valid_targets_mean": 3000.1,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 1.802232854864434,
|
|
"grad_norm": 0.6659689657111252,
|
|
"learning_rate": 3.7063183957080594e-05,
|
|
"loss": 0.3922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37051305174827576,
|
|
"step": 1130,
|
|
"valid_targets_mean": 3206.4,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 1.810207336523126,
|
|
"grad_norm": 0.7253370502232225,
|
|
"learning_rate": 3.7021560189939416e-05,
|
|
"loss": 0.412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43550822138786316,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3459.0,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 1.8181818181818183,
|
|
"grad_norm": 0.6678071596919457,
|
|
"learning_rate": 3.697966724182729e-05,
|
|
"loss": 0.3795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4003176689147949,
|
|
"step": 1140,
|
|
"valid_targets_mean": 3299.6,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 1.8261562998405103,
|
|
"grad_norm": 0.6418928682541023,
|
|
"learning_rate": 3.6937505775244246e-05,
|
|
"loss": 0.3826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37100380659103394,
|
|
"step": 1145,
|
|
"valid_targets_mean": 3666.0,
|
|
"valid_targets_min": 1592
|
|
},
|
|
{
|
|
"epoch": 1.8341307814992025,
|
|
"grad_norm": 0.6928332366094467,
|
|
"learning_rate": 3.689507645693674e-05,
|
|
"loss": 0.4125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39010167121887207,
|
|
"step": 1150,
|
|
"valid_targets_mean": 2818.2,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 1.8421052631578947,
|
|
"grad_norm": 0.565397847619194,
|
|
"learning_rate": 3.6852379957887025e-05,
|
|
"loss": 0.3931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4160916209220886,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4141.2,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 1.8500797448165869,
|
|
"grad_norm": 0.6735693400789545,
|
|
"learning_rate": 3.6809416953302606e-05,
|
|
"loss": 0.4255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45080703496932983,
|
|
"step": 1160,
|
|
"valid_targets_mean": 3745.6,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 1.858054226475279,
|
|
"grad_norm": 0.7203009612916168,
|
|
"learning_rate": 3.676618812260553e-05,
|
|
"loss": 0.4209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4387802481651306,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3261.7,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 1.8660287081339713,
|
|
"grad_norm": 0.6365421138878193,
|
|
"learning_rate": 3.672269414942166e-05,
|
|
"loss": 0.4333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4012058973312378,
|
|
"step": 1170,
|
|
"valid_targets_mean": 3368.2,
|
|
"valid_targets_min": 1577
|
|
},
|
|
{
|
|
"epoch": 1.8740031897926634,
|
|
"grad_norm": 0.7505855056869178,
|
|
"learning_rate": 3.6678935721569825e-05,
|
|
"loss": 0.4316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42188188433647156,
|
|
"step": 1175,
|
|
"valid_targets_mean": 2791.4,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 1.8819776714513556,
|
|
"grad_norm": 0.5981214970638279,
|
|
"learning_rate": 3.663491353105101e-05,
|
|
"loss": 0.4118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43208760023117065,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4071.8,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 1.8899521531100478,
|
|
"grad_norm": 0.7223705503857738,
|
|
"learning_rate": 3.659062827403735e-05,
|
|
"loss": 0.4432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4402581751346588,
|
|
"step": 1185,
|
|
"valid_targets_mean": 2942.9,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 1.89792663476874,
|
|
"grad_norm": 0.6913053869689189,
|
|
"learning_rate": 3.654608065086115e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4274940490722656,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3440.4,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 1.9059011164274322,
|
|
"grad_norm": 0.8045269656870567,
|
|
"learning_rate": 3.650127136600379e-05,
|
|
"loss": 0.4162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3979170322418213,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3487.2,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 1.9138755980861244,
|
|
"grad_norm": 0.669792327162365,
|
|
"learning_rate": 3.645620112808464e-05,
|
|
"loss": 0.4302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4759683609008789,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3269.3,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 1.9218500797448166,
|
|
"grad_norm": 0.70511456257013,
|
|
"learning_rate": 3.641087064984977e-05,
|
|
"loss": 0.4161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.459990531206131,
|
|
"step": 1205,
|
|
"valid_targets_mean": 2946.3,
|
|
"valid_targets_min": 1691
|
|
},
|
|
{
|
|
"epoch": 1.9298245614035088,
|
|
"grad_norm": 0.6363699429905626,
|
|
"learning_rate": 3.636528064816073e-05,
|
|
"loss": 0.4166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4155214726924896,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3987.8,
|
|
"valid_targets_min": 1902
|
|
},
|
|
{
|
|
"epoch": 1.937799043062201,
|
|
"grad_norm": 0.7214558423452982,
|
|
"learning_rate": 3.6319431843983223e-05,
|
|
"loss": 0.4097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4036198854446411,
|
|
"step": 1215,
|
|
"valid_targets_mean": 2635.2,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 1.9457735247208932,
|
|
"grad_norm": 0.5992903041087578,
|
|
"learning_rate": 3.6273324962375676e-05,
|
|
"loss": 0.4179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41311943531036377,
|
|
"step": 1220,
|
|
"valid_targets_mean": 4070.4,
|
|
"valid_targets_min": 1852
|
|
},
|
|
{
|
|
"epoch": 1.9537480063795853,
|
|
"grad_norm": 0.6654748242145356,
|
|
"learning_rate": 3.622696073247777e-05,
|
|
"loss": 0.4207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3969907760620117,
|
|
"step": 1225,
|
|
"valid_targets_mean": 3008.8,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 1.9617224880382775,
|
|
"grad_norm": 0.6688857607369519,
|
|
"learning_rate": 3.6180339887498953e-05,
|
|
"loss": 0.3891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35892730951309204,
|
|
"step": 1230,
|
|
"valid_targets_mean": 2779.9,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 1.9696969696969697,
|
|
"grad_norm": 0.7525356233857121,
|
|
"learning_rate": 3.613346316470678e-05,
|
|
"loss": 0.4114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4026384949684143,
|
|
"step": 1235,
|
|
"valid_targets_mean": 2345.8,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 1.977671451355662,
|
|
"grad_norm": 0.7036805946382984,
|
|
"learning_rate": 3.60863313054153e-05,
|
|
"loss": 0.4144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4080306887626648,
|
|
"step": 1240,
|
|
"valid_targets_mean": 3360.4,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 1.985645933014354,
|
|
"grad_norm": 0.577670016294368,
|
|
"learning_rate": 3.6038945054973334e-05,
|
|
"loss": 0.4081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.390597403049469,
|
|
"step": 1245,
|
|
"valid_targets_mean": 4169.4,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 1.9936204146730463,
|
|
"grad_norm": 0.6108269940269941,
|
|
"learning_rate": 3.599130516275266e-05,
|
|
"loss": 0.4106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4361282289028168,
|
|
"step": 1250,
|
|
"valid_targets_mean": 3937.7,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 2.0015948963317385,
|
|
"grad_norm": 0.7713164135130286,
|
|
"learning_rate": 3.594341238213618e-05,
|
|
"loss": 0.4047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42938780784606934,
|
|
"step": 1255,
|
|
"valid_targets_mean": 3104.2,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 2.0095693779904304,
|
|
"grad_norm": 0.7771663764451714,
|
|
"learning_rate": 3.589526747050601e-05,
|
|
"loss": 0.3853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3887682259082794,
|
|
"step": 1260,
|
|
"valid_targets_mean": 2839.8,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 2.017543859649123,
|
|
"grad_norm": 0.7043651308334903,
|
|
"learning_rate": 3.584687118923149e-05,
|
|
"loss": 0.3526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39590615034103394,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3327.2,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 2.025518341307815,
|
|
"grad_norm": 0.6814058692265738,
|
|
"learning_rate": 3.579822430365714e-05,
|
|
"loss": 0.3791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38219624757766724,
|
|
"step": 1270,
|
|
"valid_targets_mean": 3160.0,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 2.0334928229665072,
|
|
"grad_norm": 0.7327248182586243,
|
|
"learning_rate": 3.57493275830906e-05,
|
|
"loss": 0.3823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4009716510772705,
|
|
"step": 1275,
|
|
"valid_targets_mean": 3403.6,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 2.041467304625199,
|
|
"grad_norm": 0.6562037781363829,
|
|
"learning_rate": 3.570018180079037e-05,
|
|
"loss": 0.3527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36178916692733765,
|
|
"step": 1280,
|
|
"valid_targets_mean": 3575.4,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 2.0494417862838916,
|
|
"grad_norm": 0.6486353726245339,
|
|
"learning_rate": 3.5650787733953715e-05,
|
|
"loss": 0.3638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3657630383968353,
|
|
"step": 1285,
|
|
"valid_targets_mean": 3565.6,
|
|
"valid_targets_min": 1552
|
|
},
|
|
{
|
|
"epoch": 2.0574162679425836,
|
|
"grad_norm": 0.788793620527162,
|
|
"learning_rate": 3.560114616370425e-05,
|
|
"loss": 0.371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37835225462913513,
|
|
"step": 1290,
|
|
"valid_targets_mean": 2611.6,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 2.065390749601276,
|
|
"grad_norm": 0.6546578082419797,
|
|
"learning_rate": 3.555125787507964e-05,
|
|
"loss": 0.3956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3609480857849121,
|
|
"step": 1295,
|
|
"valid_targets_mean": 3295.5,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 2.073365231259968,
|
|
"grad_norm": 1.0740627228302269,
|
|
"learning_rate": 3.550112365701921e-05,
|
|
"loss": 0.3896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3922559916973114,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3213.5,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 2.0813397129186604,
|
|
"grad_norm": 0.7134354386595515,
|
|
"learning_rate": 3.545074430235142e-05,
|
|
"loss": 0.3705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3848370313644409,
|
|
"step": 1305,
|
|
"valid_targets_mean": 2954.8,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 2.0893141945773523,
|
|
"grad_norm": 0.6921772500440376,
|
|
"learning_rate": 3.540012060778137e-05,
|
|
"loss": 0.3679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40677371621131897,
|
|
"step": 1310,
|
|
"valid_targets_mean": 3284.5,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 2.0972886762360448,
|
|
"grad_norm": 0.6885635082386192,
|
|
"learning_rate": 3.534925337387816e-05,
|
|
"loss": 0.3658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3504820466041565,
|
|
"step": 1315,
|
|
"valid_targets_mean": 2812.0,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 2.1052631578947367,
|
|
"grad_norm": 0.7341871254520772,
|
|
"learning_rate": 3.529814340506226e-05,
|
|
"loss": 0.3617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36829423904418945,
|
|
"step": 1320,
|
|
"valid_targets_mean": 3311.9,
|
|
"valid_targets_min": 1290
|
|
},
|
|
{
|
|
"epoch": 2.113237639553429,
|
|
"grad_norm": 0.7603204501924026,
|
|
"learning_rate": 3.524679150959277e-05,
|
|
"loss": 0.3689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4063563346862793,
|
|
"step": 1325,
|
|
"valid_targets_mean": 2711.4,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 2.121212121212121,
|
|
"grad_norm": 0.7913752815865426,
|
|
"learning_rate": 3.519519849955466e-05,
|
|
"loss": 0.388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.405544638633728,
|
|
"step": 1330,
|
|
"valid_targets_mean": 3593.8,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 2.1291866028708135,
|
|
"grad_norm": 0.6724442108328078,
|
|
"learning_rate": 3.514336519084591e-05,
|
|
"loss": 0.3829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4006918668746948,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3708.7,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 2.1371610845295055,
|
|
"grad_norm": 0.6227593502649125,
|
|
"learning_rate": 3.509129240316461e-05,
|
|
"loss": 0.3549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3489978313446045,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3649.8,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 2.145135566188198,
|
|
"grad_norm": 0.6347475129327896,
|
|
"learning_rate": 3.5038980959995985e-05,
|
|
"loss": 0.3839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3530866205692291,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3586.9,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 2.15311004784689,
|
|
"grad_norm": 0.7992730609473288,
|
|
"learning_rate": 3.498643168859941e-05,
|
|
"loss": 0.3879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4214326739311218,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3066.7,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 2.1610845295055823,
|
|
"grad_norm": 0.6347568996456564,
|
|
"learning_rate": 3.493364541999529e-05,
|
|
"loss": 0.3735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3516305088996887,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3693.1,
|
|
"valid_targets_min": 1487
|
|
},
|
|
{
|
|
"epoch": 2.1690590111642742,
|
|
"grad_norm": 0.7532717186730438,
|
|
"learning_rate": 3.488062298895194e-05,
|
|
"loss": 0.3519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4031350016593933,
|
|
"step": 1360,
|
|
"valid_targets_mean": 2934.2,
|
|
"valid_targets_min": 1069
|
|
},
|
|
{
|
|
"epoch": 2.1770334928229667,
|
|
"grad_norm": 0.644574029137327,
|
|
"learning_rate": 3.482736523397237e-05,
|
|
"loss": 0.363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3530963659286499,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3380.9,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 2.1850079744816586,
|
|
"grad_norm": 0.6463594743344104,
|
|
"learning_rate": 3.4773872997281026e-05,
|
|
"loss": 0.3466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32944726943969727,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3447.6,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 2.192982456140351,
|
|
"grad_norm": 0.6985753338690244,
|
|
"learning_rate": 3.472014712481048e-05,
|
|
"loss": 0.3672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4293551445007324,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3218.7,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 2.200956937799043,
|
|
"grad_norm": 0.8096347685279716,
|
|
"learning_rate": 3.466618846618806e-05,
|
|
"loss": 0.3882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36100339889526367,
|
|
"step": 1380,
|
|
"valid_targets_mean": 2829.8,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 2.2089314194577354,
|
|
"grad_norm": 0.6862581357845517,
|
|
"learning_rate": 3.461199787472238e-05,
|
|
"loss": 0.3504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3710808753967285,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3267.7,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 2.2169059011164274,
|
|
"grad_norm": 0.8092467875990395,
|
|
"learning_rate": 3.455757620738989e-05,
|
|
"loss": 0.388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4684341251850128,
|
|
"step": 1390,
|
|
"valid_targets_mean": 3134.1,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 2.22488038277512,
|
|
"grad_norm": 0.6733678609118939,
|
|
"learning_rate": 3.450292432482127e-05,
|
|
"loss": 0.3864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.367912620306015,
|
|
"step": 1395,
|
|
"valid_targets_mean": 3314.7,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 2.2328548644338118,
|
|
"grad_norm": 0.7023060377926819,
|
|
"learning_rate": 3.444804309128789e-05,
|
|
"loss": 0.3807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43316739797592163,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3503.3,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 2.240829346092504,
|
|
"grad_norm": 0.6740533242642961,
|
|
"learning_rate": 3.439293337468808e-05,
|
|
"loss": 0.361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3444497287273407,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3606.4,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 2.248803827751196,
|
|
"grad_norm": 0.6203876823119541,
|
|
"learning_rate": 3.4337596046533426e-05,
|
|
"loss": 0.3759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3932915925979614,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3932.6,
|
|
"valid_targets_min": 1161
|
|
},
|
|
{
|
|
"epoch": 2.2567783094098885,
|
|
"grad_norm": 0.6524203560744807,
|
|
"learning_rate": 3.4282031981935e-05,
|
|
"loss": 0.3844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37616485357284546,
|
|
"step": 1415,
|
|
"valid_targets_mean": 3452.6,
|
|
"valid_targets_min": 1275
|
|
},
|
|
{
|
|
"epoch": 2.2647527910685805,
|
|
"grad_norm": 0.7596455453556623,
|
|
"learning_rate": 3.42262420595895e-05,
|
|
"loss": 0.3583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3516044020652771,
|
|
"step": 1420,
|
|
"valid_targets_mean": 2676.4,
|
|
"valid_targets_min": 1055
|
|
},
|
|
{
|
|
"epoch": 2.2727272727272725,
|
|
"grad_norm": 0.590574799426514,
|
|
"learning_rate": 3.417022716176539e-05,
|
|
"loss": 0.4071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.362797349691391,
|
|
"step": 1425,
|
|
"valid_targets_mean": 4440.1,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 2.280701754385965,
|
|
"grad_norm": 0.755339570874472,
|
|
"learning_rate": 3.411398817428889e-05,
|
|
"loss": 0.3675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39096003770828247,
|
|
"step": 1430,
|
|
"valid_targets_mean": 2734.0,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 2.2886762360446573,
|
|
"grad_norm": 0.6772818983126377,
|
|
"learning_rate": 3.4057525986530016e-05,
|
|
"loss": 0.3675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3835941553115845,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3422.3,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 2.2966507177033493,
|
|
"grad_norm": 0.6965469175897261,
|
|
"learning_rate": 3.400084149138851e-05,
|
|
"loss": 0.3651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3296578824520111,
|
|
"step": 1440,
|
|
"valid_targets_mean": 2941.1,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 2.3046251993620412,
|
|
"grad_norm": 0.6843454376990589,
|
|
"learning_rate": 3.394393558527969e-05,
|
|
"loss": 0.3735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35185760259628296,
|
|
"step": 1445,
|
|
"valid_targets_mean": 3044.4,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 2.3125996810207337,
|
|
"grad_norm": 0.654891330451907,
|
|
"learning_rate": 3.388680916812031e-05,
|
|
"loss": 0.3918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3985101282596588,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3410.7,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 2.320574162679426,
|
|
"grad_norm": 0.7694671062612541,
|
|
"learning_rate": 3.382946314331429e-05,
|
|
"loss": 0.3714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37215566635131836,
|
|
"step": 1455,
|
|
"valid_targets_mean": 2893.6,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 2.328548644338118,
|
|
"grad_norm": 0.8255646269856286,
|
|
"learning_rate": 3.377189841773848e-05,
|
|
"loss": 0.3856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43123891949653625,
|
|
"step": 1460,
|
|
"valid_targets_mean": 2943.9,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 2.33652312599681,
|
|
"grad_norm": 0.8170664772071144,
|
|
"learning_rate": 3.371411590172827e-05,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4071405529975891,
|
|
"step": 1465,
|
|
"valid_targets_mean": 2653.4,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 2.3444976076555024,
|
|
"grad_norm": 0.789393341086325,
|
|
"learning_rate": 3.365611650906321e-05,
|
|
"loss": 0.3909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4238942861557007,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3011.8,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 2.352472089314195,
|
|
"grad_norm": 0.7464713473849459,
|
|
"learning_rate": 3.359790115695259e-05,
|
|
"loss": 0.3754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3934614360332489,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3862.6,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 2.360446570972887,
|
|
"grad_norm": 0.629832597290583,
|
|
"learning_rate": 3.353947076602088e-05,
|
|
"loss": 0.3815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35173749923706055,
|
|
"step": 1480,
|
|
"valid_targets_mean": 3304.6,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 2.3684210526315788,
|
|
"grad_norm": 0.7941895502858402,
|
|
"learning_rate": 3.34808262602932e-05,
|
|
"loss": 0.3716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37625449895858765,
|
|
"step": 1485,
|
|
"valid_targets_mean": 2570.2,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 2.376395534290271,
|
|
"grad_norm": 0.6206999903627516,
|
|
"learning_rate": 3.342196856718074e-05,
|
|
"loss": 0.3701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35230153799057007,
|
|
"step": 1490,
|
|
"valid_targets_mean": 3973.2,
|
|
"valid_targets_min": 1479
|
|
},
|
|
{
|
|
"epoch": 2.384370015948963,
|
|
"grad_norm": 0.7134960021882305,
|
|
"learning_rate": 3.336289861746602e-05,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42916515469551086,
|
|
"step": 1495,
|
|
"valid_targets_mean": 3089.9,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 2.3923444976076556,
|
|
"grad_norm": 0.6570474111897717,
|
|
"learning_rate": 3.330361734528823e-05,
|
|
"loss": 0.3746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32198184728622437,
|
|
"step": 1500,
|
|
"valid_targets_mean": 2991.1,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 2.4003189792663475,
|
|
"grad_norm": 0.7591576345758384,
|
|
"learning_rate": 3.324412568812844e-05,
|
|
"loss": 0.378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39926987886428833,
|
|
"step": 1505,
|
|
"valid_targets_mean": 2589.1,
|
|
"valid_targets_min": 1484
|
|
},
|
|
{
|
|
"epoch": 2.40829346092504,
|
|
"grad_norm": 0.6234558160870561,
|
|
"learning_rate": 3.318442458679477e-05,
|
|
"loss": 0.3567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3213271498680115,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3192.2,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 2.416267942583732,
|
|
"grad_norm": 0.7385476016902303,
|
|
"learning_rate": 3.312451498540751e-05,
|
|
"loss": 0.3759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37497782707214355,
|
|
"step": 1515,
|
|
"valid_targets_mean": 2520.6,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 2.4242424242424243,
|
|
"grad_norm": 0.8089967030460891,
|
|
"learning_rate": 3.306439783138421e-05,
|
|
"loss": 0.3857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.442488431930542,
|
|
"step": 1520,
|
|
"valid_targets_mean": 3322.2,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 2.4322169059011163,
|
|
"grad_norm": 0.8267427094181998,
|
|
"learning_rate": 3.3004074075424666e-05,
|
|
"loss": 0.3722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3569936156272888,
|
|
"step": 1525,
|
|
"valid_targets_mean": 2490.6,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 2.4401913875598087,
|
|
"grad_norm": 0.694246313180276,
|
|
"learning_rate": 3.29435446714959e-05,
|
|
"loss": 0.3722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3653833568096161,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3317.6,
|
|
"valid_targets_min": 1553
|
|
},
|
|
{
|
|
"epoch": 2.4481658692185007,
|
|
"grad_norm": 0.6738800152614978,
|
|
"learning_rate": 3.288281057681709e-05,
|
|
"loss": 0.3572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35374999046325684,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3545.5,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 2.456140350877193,
|
|
"grad_norm": 0.687086231438634,
|
|
"learning_rate": 3.28218727518444e-05,
|
|
"loss": 0.3943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38243716955184937,
|
|
"step": 1540,
|
|
"valid_targets_mean": 3304.3,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 2.464114832535885,
|
|
"grad_norm": 0.7069285245138436,
|
|
"learning_rate": 3.2760732160255835e-05,
|
|
"loss": 0.3567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3574937582015991,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3231.9,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 2.4720893141945774,
|
|
"grad_norm": 0.6595616831329948,
|
|
"learning_rate": 3.269938976893595e-05,
|
|
"loss": 0.3702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31367528438568115,
|
|
"step": 1550,
|
|
"valid_targets_mean": 3229.1,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 2.4800637958532694,
|
|
"grad_norm": 0.8045456663432187,
|
|
"learning_rate": 3.2637846547960596e-05,
|
|
"loss": 0.379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43471935391426086,
|
|
"step": 1555,
|
|
"valid_targets_mean": 2528.9,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 2.488038277511962,
|
|
"grad_norm": 0.7375313806783964,
|
|
"learning_rate": 3.2576103470581564e-05,
|
|
"loss": 0.3901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4245631694793701,
|
|
"step": 1560,
|
|
"valid_targets_mean": 2941.9,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 2.496012759170654,
|
|
"grad_norm": 0.582977938856741,
|
|
"learning_rate": 3.25141615132112e-05,
|
|
"loss": 0.3546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3379749655723572,
|
|
"step": 1565,
|
|
"valid_targets_mean": 4142.6,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 2.503987240829346,
|
|
"grad_norm": 0.594045099871453,
|
|
"learning_rate": 3.245202165540697e-05,
|
|
"loss": 0.3677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41011056303977966,
|
|
"step": 1570,
|
|
"valid_targets_mean": 4688.1,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 2.511961722488038,
|
|
"grad_norm": 0.6788994854982295,
|
|
"learning_rate": 3.238968487985594e-05,
|
|
"loss": 0.3762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39587777853012085,
|
|
"step": 1575,
|
|
"valid_targets_mean": 3588.1,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 2.5199362041467306,
|
|
"grad_norm": 0.6696971603918799,
|
|
"learning_rate": 3.232715217235927e-05,
|
|
"loss": 0.3636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3593067526817322,
|
|
"step": 1580,
|
|
"valid_targets_mean": 3162.4,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 2.5279106858054226,
|
|
"grad_norm": 0.813542398756882,
|
|
"learning_rate": 3.226442452181662e-05,
|
|
"loss": 0.3723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36189043521881104,
|
|
"step": 1585,
|
|
"valid_targets_mean": 2782.6,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 2.535885167464115,
|
|
"grad_norm": 0.6989961116436353,
|
|
"learning_rate": 3.220150292021049e-05,
|
|
"loss": 0.3984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4006919860839844,
|
|
"step": 1590,
|
|
"valid_targets_mean": 3299.0,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 2.543859649122807,
|
|
"grad_norm": 0.6951995943515534,
|
|
"learning_rate": 3.213838836259055e-05,
|
|
"loss": 0.3776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38397926092147827,
|
|
"step": 1595,
|
|
"valid_targets_mean": 3410.5,
|
|
"valid_targets_min": 1356
|
|
},
|
|
{
|
|
"epoch": 2.5518341307814993,
|
|
"grad_norm": 0.7193206005631059,
|
|
"learning_rate": 3.2075081847057886e-05,
|
|
"loss": 0.3907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3920392692089081,
|
|
"step": 1600,
|
|
"valid_targets_mean": 3327.3,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 2.5598086124401913,
|
|
"grad_norm": 0.6750256666707704,
|
|
"learning_rate": 3.201158437474925e-05,
|
|
"loss": 0.4157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4216896891593933,
|
|
"step": 1605,
|
|
"valid_targets_mean": 3629.1,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 2.5677830940988837,
|
|
"grad_norm": 0.6935798314532866,
|
|
"learning_rate": 3.194789694982119e-05,
|
|
"loss": 0.3869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37298423051834106,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3069.4,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 2.5757575757575757,
|
|
"grad_norm": 0.7754570197213737,
|
|
"learning_rate": 3.1884020579434216e-05,
|
|
"loss": 0.378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33575713634490967,
|
|
"step": 1615,
|
|
"valid_targets_mean": 2850.7,
|
|
"valid_targets_min": 1233
|
|
},
|
|
{
|
|
"epoch": 2.583732057416268,
|
|
"grad_norm": 0.6630331045318754,
|
|
"learning_rate": 3.181995627373679e-05,
|
|
"loss": 0.3791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39484184980392456,
|
|
"step": 1620,
|
|
"valid_targets_mean": 3725.4,
|
|
"valid_targets_min": 1768
|
|
},
|
|
{
|
|
"epoch": 2.59170653907496,
|
|
"grad_norm": 0.6882483355577453,
|
|
"learning_rate": 3.1755705045849465e-05,
|
|
"loss": 0.3883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35860878229141235,
|
|
"step": 1625,
|
|
"valid_targets_mean": 3084.6,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 2.5996810207336525,
|
|
"grad_norm": 0.914173612442749,
|
|
"learning_rate": 3.1691267911848765e-05,
|
|
"loss": 0.3963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4026825726032257,
|
|
"step": 1630,
|
|
"valid_targets_mean": 3004.3,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 2.6076555023923444,
|
|
"grad_norm": 0.6662339131010145,
|
|
"learning_rate": 3.1626645890751167e-05,
|
|
"loss": 0.37,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3396124839782715,
|
|
"step": 1635,
|
|
"valid_targets_mean": 3313.6,
|
|
"valid_targets_min": 1766
|
|
},
|
|
{
|
|
"epoch": 2.6156299840510364,
|
|
"grad_norm": 0.7241562109483987,
|
|
"learning_rate": 3.156184000449697e-05,
|
|
"loss": 0.3901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40760475397109985,
|
|
"step": 1640,
|
|
"valid_targets_mean": 3923.2,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 2.623604465709729,
|
|
"grad_norm": 0.656899010884032,
|
|
"learning_rate": 3.149685127793415e-05,
|
|
"loss": 0.3984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42402899265289307,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3807.3,
|
|
"valid_targets_min": 1438
|
|
},
|
|
{
|
|
"epoch": 2.6315789473684212,
|
|
"grad_norm": 0.6406715356256204,
|
|
"learning_rate": 3.143168073880214e-05,
|
|
"loss": 0.3835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39723414182662964,
|
|
"step": 1650,
|
|
"valid_targets_mean": 3872.0,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 2.639553429027113,
|
|
"grad_norm": 0.6758721230831286,
|
|
"learning_rate": 3.1366329417715556e-05,
|
|
"loss": 0.3718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3360089659690857,
|
|
"step": 1655,
|
|
"valid_targets_mean": 3342.1,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 2.647527910685805,
|
|
"grad_norm": 0.5962178054786375,
|
|
"learning_rate": 3.1300798348147954e-05,
|
|
"loss": 0.3903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41695430874824524,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4067.1,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 2.6555023923444976,
|
|
"grad_norm": 0.7037158149267086,
|
|
"learning_rate": 3.123508856641542e-05,
|
|
"loss": 0.3909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36286890506744385,
|
|
"step": 1665,
|
|
"valid_targets_mean": 3117.3,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 2.66347687400319,
|
|
"grad_norm": 0.6470542337738016,
|
|
"learning_rate": 3.116920111166025e-05,
|
|
"loss": 0.3489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3440346121788025,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3737.6,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 2.671451355661882,
|
|
"grad_norm": 0.7299166839228555,
|
|
"learning_rate": 3.1103137025834456e-05,
|
|
"loss": 0.3732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.409359872341156,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3226.1,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 2.679425837320574,
|
|
"grad_norm": 0.6068757159371367,
|
|
"learning_rate": 3.103689735368333e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34454524517059326,
|
|
"step": 1680,
|
|
"valid_targets_mean": 3672.1,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 2.6874003189792663,
|
|
"grad_norm": 0.6400668512481118,
|
|
"learning_rate": 3.097048314272889e-05,
|
|
"loss": 0.3615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34516024589538574,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3511.6,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 2.6953748006379588,
|
|
"grad_norm": 0.727001029574095,
|
|
"learning_rate": 3.090389544325335e-05,
|
|
"loss": 0.3997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37679344415664673,
|
|
"step": 1690,
|
|
"valid_targets_mean": 3585.9,
|
|
"valid_targets_min": 1321
|
|
},
|
|
{
|
|
"epoch": 2.7033492822966507,
|
|
"grad_norm": 0.6610282182343522,
|
|
"learning_rate": 3.08371353082825e-05,
|
|
"loss": 0.3712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3914487957954407,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3458.6,
|
|
"valid_targets_min": 1546
|
|
},
|
|
{
|
|
"epoch": 2.7113237639553427,
|
|
"grad_norm": 0.7181695423413437,
|
|
"learning_rate": 3.0770203793568994e-05,
|
|
"loss": 0.3736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3519781827926636,
|
|
"step": 1700,
|
|
"valid_targets_mean": 2930.5,
|
|
"valid_targets_min": 1437
|
|
},
|
|
{
|
|
"epoch": 2.719298245614035,
|
|
"grad_norm": 0.6762363814685325,
|
|
"learning_rate": 3.0703101957575765e-05,
|
|
"loss": 0.415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42757710814476013,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4165.9,
|
|
"valid_targets_min": 2001
|
|
},
|
|
{
|
|
"epoch": 2.7272727272727275,
|
|
"grad_norm": 0.6207304909204282,
|
|
"learning_rate": 3.0635830861459204e-05,
|
|
"loss": 0.3617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3620043992996216,
|
|
"step": 1710,
|
|
"valid_targets_mean": 4295.6,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 2.7352472089314195,
|
|
"grad_norm": 0.7169729288468969,
|
|
"learning_rate": 3.056839156905239e-05,
|
|
"loss": 0.3749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36832132935523987,
|
|
"step": 1715,
|
|
"valid_targets_mean": 2686.6,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 2.7432216905901115,
|
|
"grad_norm": 0.6805074587103233,
|
|
"learning_rate": 3.0500785146848303e-05,
|
|
"loss": 0.3563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.325675904750824,
|
|
"step": 1720,
|
|
"valid_targets_mean": 3018.7,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 2.751196172248804,
|
|
"grad_norm": 0.6711332601920913,
|
|
"learning_rate": 3.04330126639829e-05,
|
|
"loss": 0.3634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3771580457687378,
|
|
"step": 1725,
|
|
"valid_targets_mean": 3769.1,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 2.7591706539074963,
|
|
"grad_norm": 0.6782728844052294,
|
|
"learning_rate": 3.0365075192218278e-05,
|
|
"loss": 0.4027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36162233352661133,
|
|
"step": 1730,
|
|
"valid_targets_mean": 3022.1,
|
|
"valid_targets_min": 1528
|
|
},
|
|
{
|
|
"epoch": 2.7671451355661882,
|
|
"grad_norm": 0.6732586783026472,
|
|
"learning_rate": 3.0296973805925663e-05,
|
|
"loss": 0.4092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36110275983810425,
|
|
"step": 1735,
|
|
"valid_targets_mean": 3330.6,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 2.77511961722488,
|
|
"grad_norm": 0.6090539220109752,
|
|
"learning_rate": 3.022870958206845e-05,
|
|
"loss": 0.3569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36692139506340027,
|
|
"step": 1740,
|
|
"valid_targets_mean": 3984.5,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 2.7830940988835726,
|
|
"grad_norm": 0.663345530981874,
|
|
"learning_rate": 3.0160283600185168e-05,
|
|
"loss": 0.3738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36079245805740356,
|
|
"step": 1745,
|
|
"valid_targets_mean": 3678.8,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 2.7910685805422646,
|
|
"grad_norm": 0.6172614656553852,
|
|
"learning_rate": 3.0091696942372412e-05,
|
|
"loss": 0.3859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3773835301399231,
|
|
"step": 1750,
|
|
"valid_targets_mean": 3677.6,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 2.799043062200957,
|
|
"grad_norm": 0.6553584489707749,
|
|
"learning_rate": 3.002295069326772e-05,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37102147936820984,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3305.1,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 2.807017543859649,
|
|
"grad_norm": 0.6584188015026995,
|
|
"learning_rate": 2.9954045940032423e-05,
|
|
"loss": 0.3705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37571462988853455,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3672.1,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 2.8149920255183414,
|
|
"grad_norm": 0.6473248650439101,
|
|
"learning_rate": 2.988498377233446e-05,
|
|
"loss": 0.3535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33738934993743896,
|
|
"step": 1765,
|
|
"valid_targets_mean": 3494.4,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 2.8229665071770333,
|
|
"grad_norm": 0.6456769797049253,
|
|
"learning_rate": 2.981576528233114e-05,
|
|
"loss": 0.3695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4076976776123047,
|
|
"step": 1770,
|
|
"valid_targets_mean": 3999.9,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 2.8309409888357258,
|
|
"grad_norm": 0.6820793389973177,
|
|
"learning_rate": 2.9746391564651867e-05,
|
|
"loss": 0.3855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3809807598590851,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3234.6,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 2.8389154704944177,
|
|
"grad_norm": 0.6226126228653166,
|
|
"learning_rate": 2.9676863716380845e-05,
|
|
"loss": 0.3488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3555409014225006,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3744.9,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 2.84688995215311,
|
|
"grad_norm": 0.7112308339744315,
|
|
"learning_rate": 2.9607182837039697e-05,
|
|
"loss": 0.3702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32321643829345703,
|
|
"step": 1785,
|
|
"valid_targets_mean": 2452.6,
|
|
"valid_targets_min": 1089
|
|
},
|
|
{
|
|
"epoch": 2.854864433811802,
|
|
"grad_norm": 0.7072881297985282,
|
|
"learning_rate": 2.9537350028570126e-05,
|
|
"loss": 0.3775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3759729862213135,
|
|
"step": 1790,
|
|
"valid_targets_mean": 3052.1,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 2.8628389154704945,
|
|
"grad_norm": 0.6641930218108315,
|
|
"learning_rate": 2.946736639531643e-05,
|
|
"loss": 0.3867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3872828483581543,
|
|
"step": 1795,
|
|
"valid_targets_mean": 3660.1,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 2.8708133971291865,
|
|
"grad_norm": 0.7884681486570855,
|
|
"learning_rate": 2.9397233044008092e-05,
|
|
"loss": 0.3756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4154924750328064,
|
|
"step": 1800,
|
|
"valid_targets_mean": 2720.0,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 2.878787878787879,
|
|
"grad_norm": 0.6144761007750891,
|
|
"learning_rate": 2.9326951083742243e-05,
|
|
"loss": 0.4005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3193194270133972,
|
|
"step": 1805,
|
|
"valid_targets_mean": 3506.2,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 2.886762360446571,
|
|
"grad_norm": 0.651001910145529,
|
|
"learning_rate": 2.925652162596613e-05,
|
|
"loss": 0.3905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43179601430892944,
|
|
"step": 1810,
|
|
"valid_targets_mean": 3889.9,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 2.8947368421052633,
|
|
"grad_norm": 0.597956387043226,
|
|
"learning_rate": 2.9185945784459558e-05,
|
|
"loss": 0.3721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34256434440612793,
|
|
"step": 1815,
|
|
"valid_targets_mean": 3664.3,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 2.9027113237639552,
|
|
"grad_norm": 0.6632019316326107,
|
|
"learning_rate": 2.9115224675317252e-05,
|
|
"loss": 0.3685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3724680542945862,
|
|
"step": 1820,
|
|
"valid_targets_mean": 3418.6,
|
|
"valid_targets_min": 1089
|
|
},
|
|
{
|
|
"epoch": 2.9106858054226477,
|
|
"grad_norm": 0.7089357912630404,
|
|
"learning_rate": 2.9044359416931206e-05,
|
|
"loss": 0.4039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4182717502117157,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3266.9,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 2.9186602870813396,
|
|
"grad_norm": 0.7088032858967475,
|
|
"learning_rate": 2.897335112997302e-05,
|
|
"loss": 0.3722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3508828282356262,
|
|
"step": 1830,
|
|
"valid_targets_mean": 2722.4,
|
|
"valid_targets_min": 1087
|
|
},
|
|
{
|
|
"epoch": 2.926634768740032,
|
|
"grad_norm": 0.6885707226042495,
|
|
"learning_rate": 2.8902200937376173e-05,
|
|
"loss": 0.3639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35092300176620483,
|
|
"step": 1835,
|
|
"valid_targets_mean": 3096.8,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 2.934609250398724,
|
|
"grad_norm": 0.629581413496703,
|
|
"learning_rate": 2.8830909964318242e-05,
|
|
"loss": 0.3756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36820733547210693,
|
|
"step": 1840,
|
|
"valid_targets_mean": 3331.4,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 2.9425837320574164,
|
|
"grad_norm": 0.7052623536521537,
|
|
"learning_rate": 2.875947933820312e-05,
|
|
"loss": 0.3864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3617854118347168,
|
|
"step": 1845,
|
|
"valid_targets_mean": 2692.1,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 2.9505582137161084,
|
|
"grad_norm": 0.6364086927514756,
|
|
"learning_rate": 2.868791018864321e-05,
|
|
"loss": 0.3497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36003005504608154,
|
|
"step": 1850,
|
|
"valid_targets_mean": 3828.5,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 2.958532695374801,
|
|
"grad_norm": 0.6764106975374632,
|
|
"learning_rate": 2.861620364744151e-05,
|
|
"loss": 0.3627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37287288904190063,
|
|
"step": 1855,
|
|
"valid_targets_mean": 3544.1,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 2.9665071770334928,
|
|
"grad_norm": 0.6690216236205346,
|
|
"learning_rate": 2.854436084857379e-05,
|
|
"loss": 0.374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32605046033859253,
|
|
"step": 1860,
|
|
"valid_targets_mean": 2863.2,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 2.974481658692185,
|
|
"grad_norm": 0.7331545474099052,
|
|
"learning_rate": 2.847238292817057e-05,
|
|
"loss": 0.3709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3661121726036072,
|
|
"step": 1865,
|
|
"valid_targets_mean": 2836.2,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 2.982456140350877,
|
|
"grad_norm": 0.7237151113115085,
|
|
"learning_rate": 2.8400271024499212e-05,
|
|
"loss": 0.3784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38563573360443115,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3023.7,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 2.990430622009569,
|
|
"grad_norm": 0.6964348317166811,
|
|
"learning_rate": 2.832802627794593e-05,
|
|
"loss": 0.3824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3342386484146118,
|
|
"step": 1875,
|
|
"valid_targets_mean": 2729.8,
|
|
"valid_targets_min": 1525
|
|
},
|
|
{
|
|
"epoch": 2.9984051036682615,
|
|
"grad_norm": 0.6410633546297118,
|
|
"learning_rate": 2.8255649830997704e-05,
|
|
"loss": 0.3526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3735944330692291,
|
|
"step": 1880,
|
|
"valid_targets_mean": 4313.7,
|
|
"valid_targets_min": 1375
|
|
},
|
|
{
|
|
"epoch": 3.006379585326954,
|
|
"grad_norm": 0.6636837977043534,
|
|
"learning_rate": 2.8183142828224238e-05,
|
|
"loss": 0.3741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3632535934448242,
|
|
"step": 1885,
|
|
"valid_targets_mean": 3643.6,
|
|
"valid_targets_min": 1937
|
|
},
|
|
{
|
|
"epoch": 3.014354066985646,
|
|
"grad_norm": 0.7913607303006132,
|
|
"learning_rate": 2.811050641625988e-05,
|
|
"loss": 0.344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3004690110683441,
|
|
"step": 1890,
|
|
"valid_targets_mean": 2965.2,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 3.0223285486443383,
|
|
"grad_norm": 0.7004515987252902,
|
|
"learning_rate": 2.8037741743785458e-05,
|
|
"loss": 0.333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3426717519760132,
|
|
"step": 1895,
|
|
"valid_targets_mean": 3850.6,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 3.0303030303030303,
|
|
"grad_norm": 0.7699624028710709,
|
|
"learning_rate": 2.796484996151013e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37789401412010193,
|
|
"step": 1900,
|
|
"valid_targets_mean": 2968.8,
|
|
"valid_targets_min": 1200
|
|
},
|
|
{
|
|
"epoch": 3.0382775119617227,
|
|
"grad_norm": 0.745677378106637,
|
|
"learning_rate": 2.7891832222153188e-05,
|
|
"loss": 0.3433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2869911789894104,
|
|
"step": 1905,
|
|
"valid_targets_mean": 3075.0,
|
|
"valid_targets_min": 1209
|
|
},
|
|
{
|
|
"epoch": 3.0462519936204147,
|
|
"grad_norm": 0.6963384258947051,
|
|
"learning_rate": 2.7818689680425807e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3869955241680145,
|
|
"step": 1910,
|
|
"valid_targets_mean": 3743.2,
|
|
"valid_targets_min": 1551
|
|
},
|
|
{
|
|
"epoch": 3.054226475279107,
|
|
"grad_norm": 0.6062766596858246,
|
|
"learning_rate": 2.7745423493012827e-05,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.350485622882843,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4463.9,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 3.062200956937799,
|
|
"grad_norm": 0.68166256422052,
|
|
"learning_rate": 2.767203481855441e-05,
|
|
"loss": 0.3504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30649104714393616,
|
|
"step": 1920,
|
|
"valid_targets_mean": 3113.1,
|
|
"valid_targets_min": 1154
|
|
},
|
|
{
|
|
"epoch": 3.0701754385964914,
|
|
"grad_norm": 0.7336461815836415,
|
|
"learning_rate": 2.7598524817627764e-05,
|
|
"loss": 0.3395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29599595069885254,
|
|
"step": 1925,
|
|
"valid_targets_mean": 2818.1,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 3.0781499202551834,
|
|
"grad_norm": 0.7208313369258172,
|
|
"learning_rate": 2.7524894652728754e-05,
|
|
"loss": 0.3645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3971787691116333,
|
|
"step": 1930,
|
|
"valid_targets_mean": 3344.1,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 3.0861244019138754,
|
|
"grad_norm": 0.6796133930339058,
|
|
"learning_rate": 2.7451145488253544e-05,
|
|
"loss": 0.3369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4038735628128052,
|
|
"step": 1935,
|
|
"valid_targets_mean": 3832.6,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 3.094098883572568,
|
|
"grad_norm": 0.6712513757118379,
|
|
"learning_rate": 2.7377278490480157e-05,
|
|
"loss": 0.3504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3190050721168518,
|
|
"step": 1940,
|
|
"valid_targets_mean": 3270.6,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 3.1020733652312598,
|
|
"grad_norm": 0.6146967793779301,
|
|
"learning_rate": 2.730329482755006e-05,
|
|
"loss": 0.363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3272600769996643,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3766.5,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 3.110047846889952,
|
|
"grad_norm": 0.7039756549715139,
|
|
"learning_rate": 2.7229195669449667e-05,
|
|
"loss": 0.3306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3352798521518707,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3478.8,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 3.118022328548644,
|
|
"grad_norm": 0.7375700571000556,
|
|
"learning_rate": 2.7154982187991855e-05,
|
|
"loss": 0.3562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3656879663467407,
|
|
"step": 1955,
|
|
"valid_targets_mean": 2951.7,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 3.1259968102073366,
|
|
"grad_norm": 0.693947733808338,
|
|
"learning_rate": 2.7080655556797406e-05,
|
|
"loss": 0.3437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3600881099700928,
|
|
"step": 1960,
|
|
"valid_targets_mean": 3462.1,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 3.1339712918660285,
|
|
"grad_norm": 0.7100202879346965,
|
|
"learning_rate": 2.700621695127649e-05,
|
|
"loss": 0.3663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3497456908226013,
|
|
"step": 1965,
|
|
"valid_targets_mean": 3272.7,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 3.141945773524721,
|
|
"grad_norm": 0.6433601636362585,
|
|
"learning_rate": 2.693166754861003e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2964088022708893,
|
|
"step": 1970,
|
|
"valid_targets_mean": 3232.2,
|
|
"valid_targets_min": 1290
|
|
},
|
|
{
|
|
"epoch": 3.149920255183413,
|
|
"grad_norm": 0.6647546242671697,
|
|
"learning_rate": 2.685700852773113e-05,
|
|
"loss": 0.3483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3770093023777008,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4035.8,
|
|
"valid_targets_min": 1955
|
|
},
|
|
{
|
|
"epoch": 3.1578947368421053,
|
|
"grad_norm": 0.6492905530504308,
|
|
"learning_rate": 2.6782241069306395e-05,
|
|
"loss": 0.3571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39621323347091675,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3770.6,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 3.1658692185007973,
|
|
"grad_norm": 0.7967318845885408,
|
|
"learning_rate": 2.6707366355717268e-05,
|
|
"loss": 0.3462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33766570687294006,
|
|
"step": 1985,
|
|
"valid_targets_mean": 3101.4,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 3.1738437001594897,
|
|
"grad_norm": 0.7485866484963103,
|
|
"learning_rate": 2.663238557104136e-05,
|
|
"loss": 0.3324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33724066615104675,
|
|
"step": 1990,
|
|
"valid_targets_mean": 2947.5,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 3.1818181818181817,
|
|
"grad_norm": 0.6729643383611308,
|
|
"learning_rate": 2.655729990103368e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3188331127166748,
|
|
"step": 1995,
|
|
"valid_targets_mean": 3452.9,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 3.189792663476874,
|
|
"grad_norm": 0.7447318137092777,
|
|
"learning_rate": 2.648211053310792e-05,
|
|
"loss": 0.3365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3603431284427643,
|
|
"step": 2000,
|
|
"valid_targets_mean": 2968.2,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 3.197767145135566,
|
|
"grad_norm": 0.6703208939741206,
|
|
"learning_rate": 2.6406818656317654e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3488501012325287,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3599.8,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 3.2057416267942584,
|
|
"grad_norm": 0.6502391251429906,
|
|
"learning_rate": 2.6331425461337557e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3210432529449463,
|
|
"step": 2010,
|
|
"valid_targets_mean": 3798.7,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 3.2137161084529504,
|
|
"grad_norm": 0.8022848644954198,
|
|
"learning_rate": 2.6255932140444546e-05,
|
|
"loss": 0.361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3998960852622986,
|
|
"step": 2015,
|
|
"valid_targets_mean": 3594.8,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 3.221690590111643,
|
|
"grad_norm": 0.6666192394779106,
|
|
"learning_rate": 2.618033988749895e-05,
|
|
"loss": 0.3546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3319270610809326,
|
|
"step": 2020,
|
|
"valid_targets_mean": 3700.9,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 3.229665071770335,
|
|
"grad_norm": 0.6981120310256206,
|
|
"learning_rate": 2.6104649897925622e-05,
|
|
"loss": 0.3442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3497855067253113,
|
|
"step": 2025,
|
|
"valid_targets_mean": 3612.4,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 3.237639553429027,
|
|
"grad_norm": 0.7391079202051112,
|
|
"learning_rate": 2.602886336869503e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31525206565856934,
|
|
"step": 2030,
|
|
"valid_targets_mean": 2837.6,
|
|
"valid_targets_min": 1437
|
|
},
|
|
{
|
|
"epoch": 3.245614035087719,
|
|
"grad_norm": 0.6756398513475517,
|
|
"learning_rate": 2.595298149830433e-05,
|
|
"loss": 0.3402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34850138425827026,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3398.4,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 3.2535885167464116,
|
|
"grad_norm": 0.7472274895881388,
|
|
"learning_rate": 2.5877005486758424e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30729418992996216,
|
|
"step": 2040,
|
|
"valid_targets_mean": 2733.2,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 3.2615629984051036,
|
|
"grad_norm": 0.7407995326743878,
|
|
"learning_rate": 2.5800936535550954e-05,
|
|
"loss": 0.352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35543033480644226,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3055.9,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 3.269537480063796,
|
|
"grad_norm": 0.7148626272486822,
|
|
"learning_rate": 2.5724775847645345e-05,
|
|
"loss": 0.3493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3244854509830475,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3271.9,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 3.277511961722488,
|
|
"grad_norm": 0.7918345219351905,
|
|
"learning_rate": 2.5648524627455738e-05,
|
|
"loss": 0.3458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.340337336063385,
|
|
"step": 2055,
|
|
"valid_targets_mean": 2546.3,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 3.2854864433811803,
|
|
"grad_norm": 0.7249235523649565,
|
|
"learning_rate": 2.557218408082798e-05,
|
|
"loss": 0.3285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32011061906814575,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3126.1,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 3.2934609250398723,
|
|
"grad_norm": 0.7991925589887385,
|
|
"learning_rate": 2.5495755415020504e-05,
|
|
"loss": 0.3503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.322753369808197,
|
|
"step": 2065,
|
|
"valid_targets_mean": 2929.4,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 3.3014354066985647,
|
|
"grad_norm": 0.6495407410133522,
|
|
"learning_rate": 2.5419239838685325e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2992458641529083,
|
|
"step": 2070,
|
|
"valid_targets_mean": 3445.9,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 3.3094098883572567,
|
|
"grad_norm": 0.6209586702491424,
|
|
"learning_rate": 2.5342638561848817e-05,
|
|
"loss": 0.3422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3132351338863373,
|
|
"step": 2075,
|
|
"valid_targets_mean": 4094.6,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 3.317384370015949,
|
|
"grad_norm": 0.8237035517435888,
|
|
"learning_rate": 2.526595279589265e-05,
|
|
"loss": 0.3532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3527466058731079,
|
|
"step": 2080,
|
|
"valid_targets_mean": 2905.6,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 3.325358851674641,
|
|
"grad_norm": 0.6682122635199401,
|
|
"learning_rate": 2.5189183753534634e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3185807168483734,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3866.2,
|
|
"valid_targets_min": 1386
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.7727166779224482,
|
|
"learning_rate": 2.5112332648809495e-05,
|
|
"loss": 0.3292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3343910574913025,
|
|
"step": 2090,
|
|
"valid_targets_mean": 2838.6,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 3.3413078149920254,
|
|
"grad_norm": 0.6488263901406396,
|
|
"learning_rate": 2.503540069704973e-05,
|
|
"loss": 0.3431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33391863107681274,
|
|
"step": 2095,
|
|
"valid_targets_mean": 4033.4,
|
|
"valid_targets_min": 1395
|
|
},
|
|
{
|
|
"epoch": 3.349282296650718,
|
|
"grad_norm": 0.8037241038543373,
|
|
"learning_rate": 2.4958389114866326e-05,
|
|
"loss": 0.3345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31732261180877686,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3249.5,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 3.35725677830941,
|
|
"grad_norm": 0.6690720167392082,
|
|
"learning_rate": 2.488129912012958e-05,
|
|
"loss": 0.3394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3379870653152466,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3448.5,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 3.3652312599681022,
|
|
"grad_norm": 0.7495167194312311,
|
|
"learning_rate": 2.4804131931949823e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3339096009731293,
|
|
"step": 2110,
|
|
"valid_targets_mean": 2800.4,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 3.373205741626794,
|
|
"grad_norm": 0.726534238259944,
|
|
"learning_rate": 2.4726888770658103e-05,
|
|
"loss": 0.3596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3416109085083008,
|
|
"step": 2115,
|
|
"valid_targets_mean": 3138.2,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 3.3811802232854866,
|
|
"grad_norm": 0.6978493283608483,
|
|
"learning_rate": 2.4649570857786928e-05,
|
|
"loss": 0.3363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31508249044418335,
|
|
"step": 2120,
|
|
"valid_targets_mean": 2986.4,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 3.3891547049441786,
|
|
"grad_norm": 0.7801088560621459,
|
|
"learning_rate": 2.4572179416050953e-05,
|
|
"loss": 0.3414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3553147315979004,
|
|
"step": 2125,
|
|
"valid_targets_mean": 2816.1,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 3.397129186602871,
|
|
"grad_norm": 0.814273690983606,
|
|
"learning_rate": 2.449471566932761e-05,
|
|
"loss": 0.3535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3594319820404053,
|
|
"step": 2130,
|
|
"valid_targets_mean": 2554.2,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 3.405103668261563,
|
|
"grad_norm": 0.7793445594534658,
|
|
"learning_rate": 2.4417180842637764e-05,
|
|
"loss": 0.3513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3512042164802551,
|
|
"step": 2135,
|
|
"valid_targets_mean": 2841.2,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 3.4130781499202554,
|
|
"grad_norm": 0.7250774245332084,
|
|
"learning_rate": 2.4339576162126362e-05,
|
|
"loss": 0.3202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37423276901245117,
|
|
"step": 2140,
|
|
"valid_targets_mean": 3434.9,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 3.4210526315789473,
|
|
"grad_norm": 0.5919083730994342,
|
|
"learning_rate": 2.4261902855043027e-05,
|
|
"loss": 0.3196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3036159873008728,
|
|
"step": 2145,
|
|
"valid_targets_mean": 3921.9,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 3.4290271132376393,
|
|
"grad_norm": 0.6365275948845664,
|
|
"learning_rate": 2.418416214972265e-05,
|
|
"loss": 0.3468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2949761152267456,
|
|
"step": 2150,
|
|
"valid_targets_mean": 3674.8,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 3.4370015948963317,
|
|
"grad_norm": 0.6572573306425243,
|
|
"learning_rate": 2.4106355275565937e-05,
|
|
"loss": 0.3237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.308928906917572,
|
|
"step": 2155,
|
|
"valid_targets_mean": 3448.0,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 3.444976076555024,
|
|
"grad_norm": 0.8043810211407846,
|
|
"learning_rate": 2.4028483463020053e-05,
|
|
"loss": 0.3578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3475992679595947,
|
|
"step": 2160,
|
|
"valid_targets_mean": 3189.1,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 3.452950558213716,
|
|
"grad_norm": 0.6719412176663501,
|
|
"learning_rate": 2.3950547943559056e-05,
|
|
"loss": 0.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3477882742881775,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3367.3,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 3.460925039872408,
|
|
"grad_norm": 0.7034232604750902,
|
|
"learning_rate": 2.3872549949664493e-05,
|
|
"loss": 0.3538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3525240123271942,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3394.3,
|
|
"valid_targets_min": 1544
|
|
},
|
|
{
|
|
"epoch": 3.4688995215311005,
|
|
"grad_norm": 0.7249691954033293,
|
|
"learning_rate": 2.3794490714805897e-05,
|
|
"loss": 0.3396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34647244215011597,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3241.5,
|
|
"valid_targets_min": 1518
|
|
},
|
|
{
|
|
"epoch": 3.476874003189793,
|
|
"grad_norm": 0.6704560447648356,
|
|
"learning_rate": 2.3716371473421242e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31735607981681824,
|
|
"step": 2180,
|
|
"valid_targets_mean": 3621.4,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 3.484848484848485,
|
|
"grad_norm": 0.6969193720058093,
|
|
"learning_rate": 2.36381934608975e-05,
|
|
"loss": 0.3312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3780801296234131,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3780.2,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 3.492822966507177,
|
|
"grad_norm": 0.6412783302780224,
|
|
"learning_rate": 2.3559957913551014e-05,
|
|
"loss": 0.3526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32190749049186707,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3535.0,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 3.5007974481658692,
|
|
"grad_norm": 0.6438390870649656,
|
|
"learning_rate": 2.3481666068607994e-05,
|
|
"loss": 0.3507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3409099578857422,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3838.1,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 3.5087719298245617,
|
|
"grad_norm": 0.7146545620321331,
|
|
"learning_rate": 2.3403319164184986e-05,
|
|
"loss": 0.34,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35894519090652466,
|
|
"step": 2200,
|
|
"valid_targets_mean": 3199.6,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 3.5167464114832536,
|
|
"grad_norm": 0.8025376759175539,
|
|
"learning_rate": 2.3324918439269206e-05,
|
|
"loss": 0.3477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33247920870780945,
|
|
"step": 2205,
|
|
"valid_targets_mean": 3264.5,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 3.5247208931419456,
|
|
"grad_norm": 0.8056587179616345,
|
|
"learning_rate": 2.3246465133699024e-05,
|
|
"loss": 0.3417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3728751242160797,
|
|
"step": 2210,
|
|
"valid_targets_mean": 2605.6,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 3.532695374800638,
|
|
"grad_norm": 0.6532868757465472,
|
|
"learning_rate": 2.31679604881443e-05,
|
|
"loss": 0.354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34238746762275696,
|
|
"step": 2215,
|
|
"valid_targets_mean": 3685.9,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 3.5406698564593304,
|
|
"grad_norm": 0.6027346187288216,
|
|
"learning_rate": 2.3089405744086823e-05,
|
|
"loss": 0.3325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3245461881160736,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3837.6,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 3.5486443381180224,
|
|
"grad_norm": 0.6819919279712949,
|
|
"learning_rate": 2.3010802143800626e-05,
|
|
"loss": 0.3266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3117847442626953,
|
|
"step": 2225,
|
|
"valid_targets_mean": 3520.8,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 3.5566188197767143,
|
|
"grad_norm": 0.7431414770408725,
|
|
"learning_rate": 2.2932150930332363e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3068672716617584,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3372.3,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 3.5645933014354068,
|
|
"grad_norm": 0.7321801527590662,
|
|
"learning_rate": 2.2853453347481635e-05,
|
|
"loss": 0.3237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33019566535949707,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3249.2,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 3.5725677830940987,
|
|
"grad_norm": 0.8719146380418346,
|
|
"learning_rate": 2.277471063978137e-05,
|
|
"loss": 0.345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3312244117259979,
|
|
"step": 2240,
|
|
"valid_targets_mean": 2604.1,
|
|
"valid_targets_min": 1321
|
|
},
|
|
{
|
|
"epoch": 3.580542264752791,
|
|
"grad_norm": 0.759097004538618,
|
|
"learning_rate": 2.2695924052478065e-05,
|
|
"loss": 0.3286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32476186752319336,
|
|
"step": 2245,
|
|
"valid_targets_mean": 2999.3,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 3.588516746411483,
|
|
"grad_norm": 0.8400350903338049,
|
|
"learning_rate": 2.2617094831512167e-05,
|
|
"loss": 0.3588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37403565645217896,
|
|
"step": 2250,
|
|
"valid_targets_mean": 2598.8,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 3.5964912280701755,
|
|
"grad_norm": 0.7592438164701092,
|
|
"learning_rate": 2.253822422349831e-05,
|
|
"loss": 0.3649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39340460300445557,
|
|
"step": 2255,
|
|
"valid_targets_mean": 2922.0,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 3.6044657097288675,
|
|
"grad_norm": 0.7216060759573354,
|
|
"learning_rate": 2.2459313475705645e-05,
|
|
"loss": 0.3239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3181716799736023,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3004.3,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 3.61244019138756,
|
|
"grad_norm": 0.6535173179651595,
|
|
"learning_rate": 2.23803638360381e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30530452728271484,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3350.6,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 3.620414673046252,
|
|
"grad_norm": 0.7203061769887287,
|
|
"learning_rate": 2.2301376553014625e-05,
|
|
"loss": 0.339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2965018153190613,
|
|
"step": 2270,
|
|
"valid_targets_mean": 2995.0,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 3.6283891547049443,
|
|
"grad_norm": 0.7492851382207867,
|
|
"learning_rate": 2.2222352875749493e-05,
|
|
"loss": 0.3438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3546943664550781,
|
|
"step": 2275,
|
|
"valid_targets_mean": 2803.8,
|
|
"valid_targets_min": 1356
|
|
},
|
|
{
|
|
"epoch": 3.6363636363636362,
|
|
"grad_norm": 0.6501942032757967,
|
|
"learning_rate": 2.214329405393249e-05,
|
|
"loss": 0.3504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3669120669364929,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3456.5,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 3.6443381180223287,
|
|
"grad_norm": 0.6759481101133017,
|
|
"learning_rate": 2.2064201337809212e-05,
|
|
"loss": 0.3399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34218889474868774,
|
|
"step": 2285,
|
|
"valid_targets_mean": 3528.9,
|
|
"valid_targets_min": 1275
|
|
},
|
|
{
|
|
"epoch": 3.6523125996810206,
|
|
"grad_norm": 0.6774347231385998,
|
|
"learning_rate": 2.1985075978161236e-05,
|
|
"loss": 0.323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32096028327941895,
|
|
"step": 2290,
|
|
"valid_targets_mean": 3088.6,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 3.660287081339713,
|
|
"grad_norm": 0.7754142706284111,
|
|
"learning_rate": 2.1905919226286385e-05,
|
|
"loss": 0.3539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3991492688655853,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3377.6,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 3.668261562998405,
|
|
"grad_norm": 0.6383037557741134,
|
|
"learning_rate": 2.1826732333978924e-05,
|
|
"loss": 0.3565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2896692156791687,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3331.6,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 3.6762360446570974,
|
|
"grad_norm": 0.7556281893602906,
|
|
"learning_rate": 2.1747516553509755e-05,
|
|
"loss": 0.314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.340459942817688,
|
|
"step": 2305,
|
|
"valid_targets_mean": 2939.3,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 3.6842105263157894,
|
|
"grad_norm": 0.6669054271312059,
|
|
"learning_rate": 2.166827313760662e-05,
|
|
"loss": 0.3554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37772446870803833,
|
|
"step": 2310,
|
|
"valid_targets_mean": 3515.9,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 3.692185007974482,
|
|
"grad_norm": 0.765547026671147,
|
|
"learning_rate": 2.1589003339434292e-05,
|
|
"loss": 0.3561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3578113913536072,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3179.1,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 3.7001594896331738,
|
|
"grad_norm": 0.7141525647785049,
|
|
"learning_rate": 2.150970841257476e-05,
|
|
"loss": 0.3512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37463968992233276,
|
|
"step": 2320,
|
|
"valid_targets_mean": 3176.3,
|
|
"valid_targets_min": 1568
|
|
},
|
|
{
|
|
"epoch": 3.708133971291866,
|
|
"grad_norm": 0.7458605904809033,
|
|
"learning_rate": 2.1430389611007393e-05,
|
|
"loss": 0.3324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3165624737739563,
|
|
"step": 2325,
|
|
"valid_targets_mean": 2523.6,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 3.716108452950558,
|
|
"grad_norm": 0.7633722554550901,
|
|
"learning_rate": 2.135104818908913e-05,
|
|
"loss": 0.3341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.300517737865448,
|
|
"step": 2330,
|
|
"valid_targets_mean": 2713.4,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 3.7240829346092506,
|
|
"grad_norm": 0.6798427976940601,
|
|
"learning_rate": 2.1271685401534617e-05,
|
|
"loss": 0.3519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2998039126396179,
|
|
"step": 2335,
|
|
"valid_targets_mean": 2916.8,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 3.7320574162679425,
|
|
"grad_norm": 0.6956893791514964,
|
|
"learning_rate": 2.11923025033964e-05,
|
|
"loss": 0.3579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3145022690296173,
|
|
"step": 2340,
|
|
"valid_targets_mean": 3335.2,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 3.740031897926635,
|
|
"grad_norm": 0.7078829176038507,
|
|
"learning_rate": 2.111290075004503e-05,
|
|
"loss": 0.3269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3304348886013031,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3434.8,
|
|
"valid_targets_min": 1476
|
|
},
|
|
{
|
|
"epoch": 3.748006379585327,
|
|
"grad_norm": 0.6014321009772997,
|
|
"learning_rate": 2.103348139714925e-05,
|
|
"loss": 0.3502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35806751251220703,
|
|
"step": 2350,
|
|
"valid_targets_mean": 4172.1,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 3.7559808612440193,
|
|
"grad_norm": 0.6850627039128502,
|
|
"learning_rate": 2.0954045700656126e-05,
|
|
"loss": 0.3422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3409198224544525,
|
|
"step": 2355,
|
|
"valid_targets_mean": 3117.0,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 3.7639553429027113,
|
|
"grad_norm": 0.6613908534398819,
|
|
"learning_rate": 2.0874594916771174e-05,
|
|
"loss": 0.3364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3665401041507721,
|
|
"step": 2360,
|
|
"valid_targets_mean": 3493.3,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 3.7719298245614032,
|
|
"grad_norm": 0.746983118477907,
|
|
"learning_rate": 2.079513030193852e-05,
|
|
"loss": 0.363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4050191044807434,
|
|
"step": 2365,
|
|
"valid_targets_mean": 3352.1,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 3.7799043062200957,
|
|
"grad_norm": 0.720244055411808,
|
|
"learning_rate": 2.071565311282099e-05,
|
|
"loss": 0.3572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.394406259059906,
|
|
"step": 2370,
|
|
"valid_targets_mean": 3272.1,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 3.787878787878788,
|
|
"grad_norm": 0.7309183426032821,
|
|
"learning_rate": 2.063616460628029e-05,
|
|
"loss": 0.3396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34431731700897217,
|
|
"step": 2375,
|
|
"valid_targets_mean": 3395.9,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 3.79585326953748,
|
|
"grad_norm": 0.8148103331670786,
|
|
"learning_rate": 2.0556666039357084e-05,
|
|
"loss": 0.3354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32172083854675293,
|
|
"step": 2380,
|
|
"valid_targets_mean": 2152.2,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 3.803827751196172,
|
|
"grad_norm": 0.7895226862175505,
|
|
"learning_rate": 2.047715866925113e-05,
|
|
"loss": 0.3543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3987800180912018,
|
|
"step": 2385,
|
|
"valid_targets_mean": 2768.7,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 3.8118022328548644,
|
|
"grad_norm": 0.6835321489924063,
|
|
"learning_rate": 2.0397643753301403e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3152831196784973,
|
|
"step": 2390,
|
|
"valid_targets_mean": 3351.9,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 3.819776714513557,
|
|
"grad_norm": 0.7918038333821873,
|
|
"learning_rate": 2.0318122548966225e-05,
|
|
"loss": 0.3461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32996347546577454,
|
|
"step": 2395,
|
|
"valid_targets_mean": 2416.5,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 3.827751196172249,
|
|
"grad_norm": 0.6526070050102301,
|
|
"learning_rate": 2.0238596313803337e-05,
|
|
"loss": 0.3606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44588369131088257,
|
|
"step": 2400,
|
|
"valid_targets_mean": 4357.1,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 3.8357256778309408,
|
|
"grad_norm": 0.6935137685290869,
|
|
"learning_rate": 2.015906630545005e-05,
|
|
"loss": 0.3323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3234461545944214,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3182.6,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 3.843700159489633,
|
|
"grad_norm": 0.6867465762369088,
|
|
"learning_rate": 2.0079533781603352e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31318309903144836,
|
|
"step": 2410,
|
|
"valid_targets_mean": 3286.3,
|
|
"valid_targets_min": 1477
|
|
},
|
|
{
|
|
"epoch": 3.8516746411483256,
|
|
"grad_norm": 0.665596229295388,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.2987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32160598039627075,
|
|
"step": 2415,
|
|
"valid_targets_mean": 3151.1,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 3.8596491228070176,
|
|
"grad_norm": 0.8464200930017578,
|
|
"learning_rate": 1.9920466218396655e-05,
|
|
"loss": 0.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32876816391944885,
|
|
"step": 2420,
|
|
"valid_targets_mean": 2595.0,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 3.8676236044657095,
|
|
"grad_norm": 0.7104989515060451,
|
|
"learning_rate": 1.9840933694549956e-05,
|
|
"loss": 0.324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3736070394515991,
|
|
"step": 2425,
|
|
"valid_targets_mean": 3088.2,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 3.875598086124402,
|
|
"grad_norm": 0.7071699330405526,
|
|
"learning_rate": 1.976140368619667e-05,
|
|
"loss": 0.352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3520479202270508,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3062.1,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 3.8835725677830943,
|
|
"grad_norm": 0.6729607657029636,
|
|
"learning_rate": 1.9681877451033778e-05,
|
|
"loss": 0.3315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35349786281585693,
|
|
"step": 2435,
|
|
"valid_targets_mean": 3632.2,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 3.8915470494417863,
|
|
"grad_norm": 0.7632233604415041,
|
|
"learning_rate": 1.9602356246698597e-05,
|
|
"loss": 0.3434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.353920578956604,
|
|
"step": 2440,
|
|
"valid_targets_mean": 2736.2,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 3.8995215311004783,
|
|
"grad_norm": 0.6135051873598603,
|
|
"learning_rate": 1.9522841330748877e-05,
|
|
"loss": 0.3635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3720296025276184,
|
|
"step": 2445,
|
|
"valid_targets_mean": 4268.2,
|
|
"valid_targets_min": 2247
|
|
},
|
|
{
|
|
"epoch": 3.9074960127591707,
|
|
"grad_norm": 0.7561218085350198,
|
|
"learning_rate": 1.944333396064292e-05,
|
|
"loss": 0.348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36515435576438904,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3652.4,
|
|
"valid_targets_min": 1691
|
|
},
|
|
{
|
|
"epoch": 3.915470494417863,
|
|
"grad_norm": 0.7144546711025309,
|
|
"learning_rate": 1.936383539371971e-05,
|
|
"loss": 0.3521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3405328392982483,
|
|
"step": 2455,
|
|
"valid_targets_mean": 2971.2,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 3.923444976076555,
|
|
"grad_norm": 0.6672199165496382,
|
|
"learning_rate": 1.9284346887179016e-05,
|
|
"loss": 0.3399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3292880654335022,
|
|
"step": 2460,
|
|
"valid_targets_mean": 3476.6,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 3.931419457735247,
|
|
"grad_norm": 0.6680955551350991,
|
|
"learning_rate": 1.9204869698061493e-05,
|
|
"loss": 0.3555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3599945902824402,
|
|
"step": 2465,
|
|
"valid_targets_mean": 3399.4,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 3.9393939393939394,
|
|
"grad_norm": 0.6513185249743233,
|
|
"learning_rate": 1.9125405083228833e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3338255286216736,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3636.8,
|
|
"valid_targets_min": 1477
|
|
},
|
|
{
|
|
"epoch": 3.9473684210526314,
|
|
"grad_norm": 0.667556171677643,
|
|
"learning_rate": 1.9045954299343884e-05,
|
|
"loss": 0.3446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.330007404088974,
|
|
"step": 2475,
|
|
"valid_targets_mean": 3692.1,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 3.955342902711324,
|
|
"grad_norm": 0.715457382213996,
|
|
"learning_rate": 1.896651860285076e-05,
|
|
"loss": 0.3416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3423001766204834,
|
|
"step": 2480,
|
|
"valid_targets_mean": 3261.5,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 3.963317384370016,
|
|
"grad_norm": 0.7097617519271261,
|
|
"learning_rate": 1.8887099249954976e-05,
|
|
"loss": 0.3326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3196974992752075,
|
|
"step": 2485,
|
|
"valid_targets_mean": 2966.3,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 3.971291866028708,
|
|
"grad_norm": 0.8275318693620048,
|
|
"learning_rate": 1.8807697496603604e-05,
|
|
"loss": 0.3535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3298540711402893,
|
|
"step": 2490,
|
|
"valid_targets_mean": 2314.2,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 3.9792663476874,
|
|
"grad_norm": 0.8106609826475644,
|
|
"learning_rate": 1.8728314598465386e-05,
|
|
"loss": 0.3462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33071044087409973,
|
|
"step": 2495,
|
|
"valid_targets_mean": 2284.5,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 3.9872408293460926,
|
|
"grad_norm": 0.6213619849864003,
|
|
"learning_rate": 1.8648951810910878e-05,
|
|
"loss": 0.3409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37467679381370544,
|
|
"step": 2500,
|
|
"valid_targets_mean": 4229.3,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 3.9952153110047846,
|
|
"grad_norm": 0.6682020874571176,
|
|
"learning_rate": 1.856961038899261e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33383795619010925,
|
|
"step": 2505,
|
|
"valid_targets_mean": 3242.3,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 4.003189792663477,
|
|
"grad_norm": 0.6798843912721649,
|
|
"learning_rate": 1.849029158742525e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3023320734500885,
|
|
"step": 2510,
|
|
"valid_targets_mean": 3349.3,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 4.011164274322169,
|
|
"grad_norm": 0.7138558982007717,
|
|
"learning_rate": 1.8410996660565714e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3142721354961395,
|
|
"step": 2515,
|
|
"valid_targets_mean": 3140.0,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 4.019138755980861,
|
|
"grad_norm": 0.7445737866266949,
|
|
"learning_rate": 1.8331726862393385e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28572627902030945,
|
|
"step": 2520,
|
|
"valid_targets_mean": 3110.7,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 4.027113237639553,
|
|
"grad_norm": 0.7136212433333143,
|
|
"learning_rate": 1.8252483446490245e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.296284019947052,
|
|
"step": 2525,
|
|
"valid_targets_mean": 2991.2,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 4.035087719298246,
|
|
"grad_norm": 0.789354403648822,
|
|
"learning_rate": 1.8173267666021075e-05,
|
|
"loss": 0.3183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37759897112846375,
|
|
"step": 2530,
|
|
"valid_targets_mean": 3433.6,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 4.043062200956938,
|
|
"grad_norm": 0.7274944214377269,
|
|
"learning_rate": 1.8094080773713615e-05,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3111798167228699,
|
|
"step": 2535,
|
|
"valid_targets_mean": 3153.6,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 4.05103668261563,
|
|
"grad_norm": 0.7876444666956668,
|
|
"learning_rate": 1.8014924021838774e-05,
|
|
"loss": 0.3359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2966739535331726,
|
|
"step": 2540,
|
|
"valid_targets_mean": 2667.6,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 4.059011164274322,
|
|
"grad_norm": 0.7244220066917392,
|
|
"learning_rate": 1.7935798662190798e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2928505837917328,
|
|
"step": 2545,
|
|
"valid_targets_mean": 3271.7,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 4.0669856459330145,
|
|
"grad_norm": 0.7842083024029242,
|
|
"learning_rate": 1.785670594606752e-05,
|
|
"loss": 0.327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3623940348625183,
|
|
"step": 2550,
|
|
"valid_targets_mean": 3346.1,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 4.074960127591707,
|
|
"grad_norm": 0.6763717973478656,
|
|
"learning_rate": 1.777764712425052e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3145608901977539,
|
|
"step": 2555,
|
|
"valid_targets_mean": 3932.8,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 4.082934609250398,
|
|
"grad_norm": 0.7785794401216987,
|
|
"learning_rate": 1.7698623446985378e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3303467035293579,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3286.2,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 4.090909090909091,
|
|
"grad_norm": 0.8555026630090804,
|
|
"learning_rate": 1.7619636163961907e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2977840304374695,
|
|
"step": 2565,
|
|
"valid_targets_mean": 2805.2,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 4.098883572567783,
|
|
"grad_norm": 0.673108293936476,
|
|
"learning_rate": 1.754068652429436e-05,
|
|
"loss": 0.3225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2916679382324219,
|
|
"step": 2570,
|
|
"valid_targets_mean": 3603.2,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 4.106858054226476,
|
|
"grad_norm": 0.9057538985520027,
|
|
"learning_rate": 1.74617757765017e-05,
|
|
"loss": 0.3111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.327725350856781,
|
|
"step": 2575,
|
|
"valid_targets_mean": 2445.5,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 4.114832535885167,
|
|
"grad_norm": 0.709372336281818,
|
|
"learning_rate": 1.738290516848784e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26606905460357666,
|
|
"step": 2580,
|
|
"valid_targets_mean": 2960.3,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 4.12280701754386,
|
|
"grad_norm": 0.763987383867807,
|
|
"learning_rate": 1.730407594752194e-05,
|
|
"loss": 0.3082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3407701849937439,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3421.9,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 4.130781499202552,
|
|
"grad_norm": 0.797234958522927,
|
|
"learning_rate": 1.722528936021864e-05,
|
|
"loss": 0.3098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3326779901981354,
|
|
"step": 2590,
|
|
"valid_targets_mean": 2752.8,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 4.138755980861244,
|
|
"grad_norm": 0.6588633866472032,
|
|
"learning_rate": 1.714654665251837e-05,
|
|
"loss": 0.3276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36316221952438354,
|
|
"step": 2595,
|
|
"valid_targets_mean": 4095.1,
|
|
"valid_targets_min": 1200
|
|
},
|
|
{
|
|
"epoch": 4.146730462519936,
|
|
"grad_norm": 1.301621502711309,
|
|
"learning_rate": 1.7067849069667644e-05,
|
|
"loss": 0.3223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3349881172180176,
|
|
"step": 2600,
|
|
"valid_targets_mean": 2848.1,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 4.154704944178628,
|
|
"grad_norm": 0.8051799894518866,
|
|
"learning_rate": 1.6989197856199377e-05,
|
|
"loss": 0.3198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3687628507614136,
|
|
"step": 2605,
|
|
"valid_targets_mean": 2747.1,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 4.162679425837321,
|
|
"grad_norm": 0.7574767271278734,
|
|
"learning_rate": 1.6910594255913177e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3026912808418274,
|
|
"step": 2610,
|
|
"valid_targets_mean": 3538.0,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 4.170653907496013,
|
|
"grad_norm": 0.700784077750503,
|
|
"learning_rate": 1.6832039511855702e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3029630482196808,
|
|
"step": 2615,
|
|
"valid_targets_mean": 3521.2,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 4.178628389154705,
|
|
"grad_norm": 0.7272848277564268,
|
|
"learning_rate": 1.6753534866300983e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31462332606315613,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3495.2,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 4.186602870813397,
|
|
"grad_norm": 0.7820920005603447,
|
|
"learning_rate": 1.6675081560730804e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30004727840423584,
|
|
"step": 2625,
|
|
"valid_targets_mean": 2895.2,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 4.1945773524720895,
|
|
"grad_norm": 0.6383966968357844,
|
|
"learning_rate": 1.6596680835815024e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3003702163696289,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4205.9,
|
|
"valid_targets_min": 1577
|
|
},
|
|
{
|
|
"epoch": 4.202551834130782,
|
|
"grad_norm": 0.717483411522531,
|
|
"learning_rate": 1.651833393139201e-05,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2812724709510803,
|
|
"step": 2635,
|
|
"valid_targets_mean": 3303.8,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 4.2105263157894735,
|
|
"grad_norm": 0.7092984169114671,
|
|
"learning_rate": 1.6440042086449e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29016828536987305,
|
|
"step": 2640,
|
|
"valid_targets_mean": 3394.0,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 4.218500797448166,
|
|
"grad_norm": 0.6629715477873863,
|
|
"learning_rate": 1.6361806539102508e-05,
|
|
"loss": 0.3256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3291637897491455,
|
|
"step": 2645,
|
|
"valid_targets_mean": 4108.1,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 4.226475279106858,
|
|
"grad_norm": 0.7522348082987887,
|
|
"learning_rate": 1.628362852657876e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3314817547798157,
|
|
"step": 2650,
|
|
"valid_targets_mean": 3096.4,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 4.23444976076555,
|
|
"grad_norm": 0.6352560589410327,
|
|
"learning_rate": 1.620550928519411e-05,
|
|
"loss": 0.2982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28492623567581177,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3972.4,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 4.242424242424242,
|
|
"grad_norm": 0.7686634614599898,
|
|
"learning_rate": 1.612745005033551e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3498157560825348,
|
|
"step": 2660,
|
|
"valid_targets_mean": 3326.0,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 4.250398724082935,
|
|
"grad_norm": 0.6826376916125342,
|
|
"learning_rate": 1.6049452056440948e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3425927758216858,
|
|
"step": 2665,
|
|
"valid_targets_mean": 4086.0,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 4.258373205741627,
|
|
"grad_norm": 0.8794209715537231,
|
|
"learning_rate": 1.597151653697995e-05,
|
|
"loss": 0.3348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33404669165611267,
|
|
"step": 2670,
|
|
"valid_targets_mean": 3023.2,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 4.266347687400319,
|
|
"grad_norm": 0.7566974310644822,
|
|
"learning_rate": 1.5893644724434066e-05,
|
|
"loss": 0.3145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3593965172767639,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3333.2,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 4.274322169059011,
|
|
"grad_norm": 0.7387889498305469,
|
|
"learning_rate": 1.581583785027736e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2826180160045624,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3119.8,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 4.282296650717703,
|
|
"grad_norm": 0.6561729316459983,
|
|
"learning_rate": 1.5738097144956976e-05,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.318328857421875,
|
|
"step": 2685,
|
|
"valid_targets_mean": 4558.8,
|
|
"valid_targets_min": 2087
|
|
},
|
|
{
|
|
"epoch": 4.290271132376396,
|
|
"grad_norm": 0.7242158676092938,
|
|
"learning_rate": 1.566042383787364e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2921149730682373,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3022.1,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 4.298245614035087,
|
|
"grad_norm": 0.7414839339889141,
|
|
"learning_rate": 1.558281915736224e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30103516578674316,
|
|
"step": 2695,
|
|
"valid_targets_mean": 3197.7,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 4.30622009569378,
|
|
"grad_norm": 0.8169019704288297,
|
|
"learning_rate": 1.550528433067239e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3514130711555481,
|
|
"step": 2700,
|
|
"valid_targets_mean": 2887.1,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 4.314194577352472,
|
|
"grad_norm": 0.7795246919074701,
|
|
"learning_rate": 1.5427820583949054e-05,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.300483375787735,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3239.7,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 4.3221690590111645,
|
|
"grad_norm": 0.8330423257697352,
|
|
"learning_rate": 1.5350429142213075e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32835930585861206,
|
|
"step": 2710,
|
|
"valid_targets_mean": 2760.4,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 4.330143540669856,
|
|
"grad_norm": 0.8409700250166574,
|
|
"learning_rate": 1.5273111229341907e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3193663954734802,
|
|
"step": 2715,
|
|
"valid_targets_mean": 2679.3,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 4.3381180223285485,
|
|
"grad_norm": 0.7894009674085395,
|
|
"learning_rate": 1.5195868068050185e-05,
|
|
"loss": 0.3183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.351798951625824,
|
|
"step": 2720,
|
|
"valid_targets_mean": 3079.6,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 4.346092503987241,
|
|
"grad_norm": 0.7213333471218348,
|
|
"learning_rate": 1.5118700879870426e-05,
|
|
"loss": 0.3198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31772780418395996,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3642.4,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 4.354066985645933,
|
|
"grad_norm": 0.8266106900026843,
|
|
"learning_rate": 1.5041610885133681e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35368919372558594,
|
|
"step": 2730,
|
|
"valid_targets_mean": 2665.3,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 4.362041467304625,
|
|
"grad_norm": 0.808575346077874,
|
|
"learning_rate": 1.496459930295028e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3841049373149872,
|
|
"step": 2735,
|
|
"valid_targets_mean": 3057.1,
|
|
"valid_targets_min": 1452
|
|
},
|
|
{
|
|
"epoch": 4.370015948963317,
|
|
"grad_norm": 0.7520724264110302,
|
|
"learning_rate": 1.4887667351190508e-05,
|
|
"loss": 0.308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30727773904800415,
|
|
"step": 2740,
|
|
"valid_targets_mean": 3277.1,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 4.37799043062201,
|
|
"grad_norm": 0.8935642179513363,
|
|
"learning_rate": 1.4810816246465374e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3343425989151001,
|
|
"step": 2745,
|
|
"valid_targets_mean": 3314.3,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 4.385964912280702,
|
|
"grad_norm": 0.7987513409121181,
|
|
"learning_rate": 1.4734047204107358e-05,
|
|
"loss": 0.3219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3002738654613495,
|
|
"step": 2750,
|
|
"valid_targets_mean": 2681.8,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 4.393939393939394,
|
|
"grad_norm": 0.63687908996254,
|
|
"learning_rate": 1.4657361438151192e-05,
|
|
"loss": 0.3215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3182310163974762,
|
|
"step": 2755,
|
|
"valid_targets_mean": 4120.0,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 4.401913875598086,
|
|
"grad_norm": 0.765743691389168,
|
|
"learning_rate": 1.458076016131468e-05,
|
|
"loss": 0.3247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32906395196914673,
|
|
"step": 2760,
|
|
"valid_targets_mean": 3590.9,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 4.409888357256778,
|
|
"grad_norm": 0.7314266309488221,
|
|
"learning_rate": 1.4504244584979494e-05,
|
|
"loss": 0.3193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29726940393447876,
|
|
"step": 2765,
|
|
"valid_targets_mean": 3364.9,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 4.417862838915471,
|
|
"grad_norm": 0.6984075571490638,
|
|
"learning_rate": 1.442781591917203e-05,
|
|
"loss": 0.3295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30236005783081055,
|
|
"step": 2770,
|
|
"valid_targets_mean": 3590.2,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 4.425837320574162,
|
|
"grad_norm": 0.7013843344357578,
|
|
"learning_rate": 1.4351475372544262e-05,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26208868622779846,
|
|
"step": 2775,
|
|
"valid_targets_mean": 3071.1,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 4.433811802232855,
|
|
"grad_norm": 0.7395192342996243,
|
|
"learning_rate": 1.4275224152354658e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30977094173431396,
|
|
"step": 2780,
|
|
"valid_targets_mean": 3221.2,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 4.441786283891547,
|
|
"grad_norm": 0.7151093422792792,
|
|
"learning_rate": 1.4199063464449047e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30665749311447144,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3510.1,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 4.44976076555024,
|
|
"grad_norm": 0.6501107796340024,
|
|
"learning_rate": 1.4122994513241587e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2921556234359741,
|
|
"step": 2790,
|
|
"valid_targets_mean": 3882.2,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 4.457735247208931,
|
|
"grad_norm": 0.7083752808497087,
|
|
"learning_rate": 1.4047018501695677e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2962319552898407,
|
|
"step": 2795,
|
|
"valid_targets_mean": 3678.4,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 4.4657097288676235,
|
|
"grad_norm": 0.7297984140388221,
|
|
"learning_rate": 1.3971136631304978e-05,
|
|
"loss": 0.3268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2842541038990021,
|
|
"step": 2800,
|
|
"valid_targets_mean": 3077.6,
|
|
"valid_targets_min": 1078
|
|
},
|
|
{
|
|
"epoch": 4.473684210526316,
|
|
"grad_norm": 0.7259945147882136,
|
|
"learning_rate": 1.3895350102074386e-05,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30145084857940674,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3074.6,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 4.481658692185008,
|
|
"grad_norm": 0.7472840763273451,
|
|
"learning_rate": 1.3819660112501054e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3042908310890198,
|
|
"step": 2810,
|
|
"valid_targets_mean": 3219.1,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 4.4896331738437,
|
|
"grad_norm": 0.7439394669755198,
|
|
"learning_rate": 1.3744067859555461e-05,
|
|
"loss": 0.3272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30904334783554077,
|
|
"step": 2815,
|
|
"valid_targets_mean": 3547.2,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 4.497607655502392,
|
|
"grad_norm": 2.6617934829341614,
|
|
"learning_rate": 1.3668574538662451e-05,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.281440407037735,
|
|
"step": 2820,
|
|
"valid_targets_mean": 3352.5,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 4.505582137161085,
|
|
"grad_norm": 0.7954915248870515,
|
|
"learning_rate": 1.3593181343682353e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3072655200958252,
|
|
"step": 2825,
|
|
"valid_targets_mean": 2635.6,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 4.513556618819777,
|
|
"grad_norm": 0.7375870487764873,
|
|
"learning_rate": 1.3517889466892085e-05,
|
|
"loss": 0.3031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2905675768852234,
|
|
"step": 2830,
|
|
"valid_targets_mean": 2986.1,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 4.521531100478469,
|
|
"grad_norm": 0.841146351180395,
|
|
"learning_rate": 1.3442700098966326e-05,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2761963903903961,
|
|
"step": 2835,
|
|
"valid_targets_mean": 2456.3,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 4.529505582137161,
|
|
"grad_norm": 0.8015720518308336,
|
|
"learning_rate": 1.3367614428958646e-05,
|
|
"loss": 0.3256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3150325417518616,
|
|
"step": 2840,
|
|
"valid_targets_mean": 2477.7,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 4.5374800637958534,
|
|
"grad_norm": 0.8575662432601303,
|
|
"learning_rate": 1.3292633644282735e-05,
|
|
"loss": 0.3238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.369368314743042,
|
|
"step": 2845,
|
|
"valid_targets_mean": 2970.8,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 4.545454545454545,
|
|
"grad_norm": 0.6256871875913504,
|
|
"learning_rate": 1.3217758930693608e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30744612216949463,
|
|
"step": 2850,
|
|
"valid_targets_mean": 4485.2,
|
|
"valid_targets_min": 1604
|
|
},
|
|
{
|
|
"epoch": 4.553429027113237,
|
|
"grad_norm": 0.6972451298731225,
|
|
"learning_rate": 1.314299147226887e-05,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2642960846424103,
|
|
"step": 2855,
|
|
"valid_targets_mean": 3622.3,
|
|
"valid_targets_min": 1486
|
|
},
|
|
{
|
|
"epoch": 4.56140350877193,
|
|
"grad_norm": 0.7168042682886268,
|
|
"learning_rate": 1.3068332451389969e-05,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2901843190193176,
|
|
"step": 2860,
|
|
"valid_targets_mean": 3444.8,
|
|
"valid_targets_min": 1289
|
|
},
|
|
{
|
|
"epoch": 4.569377990430622,
|
|
"grad_norm": 0.7798665992858413,
|
|
"learning_rate": 1.2993783048723515e-05,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27249443531036377,
|
|
"step": 2865,
|
|
"valid_targets_mean": 2573.5,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 4.577352472089315,
|
|
"grad_norm": 0.7646199415496282,
|
|
"learning_rate": 1.2919344443202602e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3488754332065582,
|
|
"step": 2870,
|
|
"valid_targets_mean": 3272.3,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 4.585326953748006,
|
|
"grad_norm": 0.7521686370872009,
|
|
"learning_rate": 1.2845017812008158e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2862269878387451,
|
|
"step": 2875,
|
|
"valid_targets_mean": 2951.1,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 4.5933014354066986,
|
|
"grad_norm": 0.7060854926857518,
|
|
"learning_rate": 1.277080433055034e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3114144802093506,
|
|
"step": 2880,
|
|
"valid_targets_mean": 3681.5,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 4.601275917065391,
|
|
"grad_norm": 0.6915946240239654,
|
|
"learning_rate": 1.2696705172449944e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3670293092727661,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3655.9,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 4.6092503987240825,
|
|
"grad_norm": 0.6786728369517215,
|
|
"learning_rate": 1.2622721509519846e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3293587267398834,
|
|
"step": 2890,
|
|
"valid_targets_mean": 4200.0,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 4.617224880382775,
|
|
"grad_norm": 0.6547625025620735,
|
|
"learning_rate": 1.2548854511746461e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28593114018440247,
|
|
"step": 2895,
|
|
"valid_targets_mean": 4056.7,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 4.625199362041467,
|
|
"grad_norm": 0.8451418027175611,
|
|
"learning_rate": 1.247510534727125e-05,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29237547516822815,
|
|
"step": 2900,
|
|
"valid_targets_mean": 2525.2,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 4.63317384370016,
|
|
"grad_norm": 0.8184805487357323,
|
|
"learning_rate": 1.240147518237224e-05,
|
|
"loss": 0.3355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2751353979110718,
|
|
"step": 2905,
|
|
"valid_targets_mean": 3232.1,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 4.641148325358852,
|
|
"grad_norm": 0.7451980833903311,
|
|
"learning_rate": 1.2327965181445593e-05,
|
|
"loss": 0.3054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26416337490081787,
|
|
"step": 2910,
|
|
"valid_targets_mean": 2847.1,
|
|
"valid_targets_min": 1289
|
|
},
|
|
{
|
|
"epoch": 4.649122807017544,
|
|
"grad_norm": 0.7880003669764357,
|
|
"learning_rate": 1.2254576506987182e-05,
|
|
"loss": 0.3191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3492470681667328,
|
|
"step": 2915,
|
|
"valid_targets_mean": 3011.7,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 4.657097288676236,
|
|
"grad_norm": 0.6826872453391979,
|
|
"learning_rate": 1.21813103195742e-05,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26810115575790405,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3272.8,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 4.6650717703349285,
|
|
"grad_norm": 0.7521215740292294,
|
|
"learning_rate": 1.2108167777846815e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2664267420768738,
|
|
"step": 2925,
|
|
"valid_targets_mean": 3117.4,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 4.67304625199362,
|
|
"grad_norm": 0.6854209141092252,
|
|
"learning_rate": 1.203515003848987e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2957659363746643,
|
|
"step": 2930,
|
|
"valid_targets_mean": 3404.8,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 4.681020733652312,
|
|
"grad_norm": 0.739848102357362,
|
|
"learning_rate": 1.1962258256214545e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3138800859451294,
|
|
"step": 2935,
|
|
"valid_targets_mean": 3109.7,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 4.688995215311005,
|
|
"grad_norm": 0.7689672817868513,
|
|
"learning_rate": 1.1889493583740124e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.313041627407074,
|
|
"step": 2940,
|
|
"valid_targets_mean": 3008.9,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 4.696969696969697,
|
|
"grad_norm": 0.7184915261952176,
|
|
"learning_rate": 1.1816857171775767e-05,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3128308653831482,
|
|
"step": 2945,
|
|
"valid_targets_mean": 3663.8,
|
|
"valid_targets_min": 1275
|
|
},
|
|
{
|
|
"epoch": 4.70494417862839,
|
|
"grad_norm": 0.6922626057485617,
|
|
"learning_rate": 1.1744350169002308e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2551296353340149,
|
|
"step": 2950,
|
|
"valid_targets_mean": 3011.0,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 4.712918660287081,
|
|
"grad_norm": 0.7548300822392016,
|
|
"learning_rate": 1.1671973722054077e-05,
|
|
"loss": 0.3141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30510175228118896,
|
|
"step": 2955,
|
|
"valid_targets_mean": 2951.2,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 4.720893141945774,
|
|
"grad_norm": 0.7959987672279856,
|
|
"learning_rate": 1.159972897550079e-05,
|
|
"loss": 0.3124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30906587839126587,
|
|
"step": 2960,
|
|
"valid_targets_mean": 2991.6,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 4.728867623604466,
|
|
"grad_norm": 0.697993583287317,
|
|
"learning_rate": 1.1527617071829447e-05,
|
|
"loss": 0.3319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3346853256225586,
|
|
"step": 2965,
|
|
"valid_targets_mean": 3543.8,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 4.7368421052631575,
|
|
"grad_norm": 0.6946051854766289,
|
|
"learning_rate": 1.145563915142622e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32877376675605774,
|
|
"step": 2970,
|
|
"valid_targets_mean": 3803.6,
|
|
"valid_targets_min": 1467
|
|
},
|
|
{
|
|
"epoch": 4.74481658692185,
|
|
"grad_norm": 0.7157887232155995,
|
|
"learning_rate": 1.1383796352558489e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2445966899394989,
|
|
"step": 2975,
|
|
"valid_targets_mean": 3293.1,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 4.752791068580542,
|
|
"grad_norm": 0.7866869264629741,
|
|
"learning_rate": 1.1312089811356803e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3185698091983795,
|
|
"step": 2980,
|
|
"valid_targets_mean": 2774.6,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 4.760765550239235,
|
|
"grad_norm": 0.6987141430990652,
|
|
"learning_rate": 1.1240520661796882e-05,
|
|
"loss": 0.3372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30906355381011963,
|
|
"step": 2985,
|
|
"valid_targets_mean": 3509.4,
|
|
"valid_targets_min": 1628
|
|
},
|
|
{
|
|
"epoch": 4.768740031897926,
|
|
"grad_norm": 0.7667396150062957,
|
|
"learning_rate": 1.1169090035681772e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3126988410949707,
|
|
"step": 2990,
|
|
"valid_targets_mean": 3334.3,
|
|
"valid_targets_min": 1154
|
|
},
|
|
{
|
|
"epoch": 4.776714513556619,
|
|
"grad_norm": 0.7378470249274736,
|
|
"learning_rate": 1.109779906262383e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3087505102157593,
|
|
"step": 2995,
|
|
"valid_targets_mean": 2949.7,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 4.784688995215311,
|
|
"grad_norm": 0.7309815454995062,
|
|
"learning_rate": 1.102664887002698e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29310843348503113,
|
|
"step": 3000,
|
|
"valid_targets_mean": 3472.3,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 4.7926634768740035,
|
|
"grad_norm": 0.8897522554840841,
|
|
"learning_rate": 1.0955640583068802e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33130979537963867,
|
|
"step": 3005,
|
|
"valid_targets_mean": 2333.4,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 4.800637958532695,
|
|
"grad_norm": 0.7351601907631145,
|
|
"learning_rate": 1.0884775324682755e-05,
|
|
"loss": 0.2873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27004849910736084,
|
|
"step": 3010,
|
|
"valid_targets_mean": 2920.6,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 4.8086124401913874,
|
|
"grad_norm": 0.8435319397058453,
|
|
"learning_rate": 1.081405421554044e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3315794765949249,
|
|
"step": 3015,
|
|
"valid_targets_mean": 2625.4,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 4.81658692185008,
|
|
"grad_norm": 0.8083791322876364,
|
|
"learning_rate": 1.074347837403387e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30663663148880005,
|
|
"step": 3020,
|
|
"valid_targets_mean": 2688.2,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 4.824561403508772,
|
|
"grad_norm": 0.7815384549379466,
|
|
"learning_rate": 1.067304891625776e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3033127784729004,
|
|
"step": 3025,
|
|
"valid_targets_mean": 2833.2,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 4.832535885167464,
|
|
"grad_norm": 0.8299929744078244,
|
|
"learning_rate": 1.0602766955991913e-05,
|
|
"loss": 0.3295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3745916485786438,
|
|
"step": 3030,
|
|
"valid_targets_mean": 2914.3,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 4.840510366826156,
|
|
"grad_norm": 0.7483910786119474,
|
|
"learning_rate": 1.053263360468358e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29096633195877075,
|
|
"step": 3035,
|
|
"valid_targets_mean": 2980.6,
|
|
"valid_targets_min": 1544
|
|
},
|
|
{
|
|
"epoch": 4.848484848484849,
|
|
"grad_norm": 0.831073914728117,
|
|
"learning_rate": 1.0462649971429884e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30728650093078613,
|
|
"step": 3040,
|
|
"valid_targets_mean": 2524.6,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 4.856459330143541,
|
|
"grad_norm": 0.9208989032905411,
|
|
"learning_rate": 1.0392817162960304e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3363386392593384,
|
|
"step": 3045,
|
|
"valid_targets_mean": 2211.4,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 4.8644338118022326,
|
|
"grad_norm": 0.8316624791913371,
|
|
"learning_rate": 1.0323136283619167e-05,
|
|
"loss": 0.327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30167120695114136,
|
|
"step": 3050,
|
|
"valid_targets_mean": 2670.7,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 4.872408293460925,
|
|
"grad_norm": 0.7448292090937383,
|
|
"learning_rate": 1.0253608435348136e-05,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3208216726779938,
|
|
"step": 3055,
|
|
"valid_targets_mean": 3202.4,
|
|
"valid_targets_min": 1362
|
|
},
|
|
{
|
|
"epoch": 4.880382775119617,
|
|
"grad_norm": 0.9487354451736023,
|
|
"learning_rate": 1.0184234717668867e-05,
|
|
"loss": 0.3373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3511803150177002,
|
|
"step": 3060,
|
|
"valid_targets_mean": 2281.9,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 4.88835725677831,
|
|
"grad_norm": 0.7573693698618885,
|
|
"learning_rate": 1.0115016227665544e-05,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3294350206851959,
|
|
"step": 3065,
|
|
"valid_targets_mean": 3406.2,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 4.896331738437001,
|
|
"grad_norm": 0.7024419856128182,
|
|
"learning_rate": 1.0045954059967577e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27417218685150146,
|
|
"step": 3070,
|
|
"valid_targets_mean": 3185.2,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 4.904306220095694,
|
|
"grad_norm": 0.6971859477541038,
|
|
"learning_rate": 9.977049306732287e-06,
|
|
"loss": 0.3352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3165581524372101,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3701.2,
|
|
"valid_targets_min": 1646
|
|
},
|
|
{
|
|
"epoch": 4.912280701754386,
|
|
"grad_norm": 0.7132227070393632,
|
|
"learning_rate": 9.908303057627591e-06,
|
|
"loss": 0.3115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34644725918769836,
|
|
"step": 3080,
|
|
"valid_targets_mean": 3485.7,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 4.920255183413078,
|
|
"grad_norm": 0.6776155953264424,
|
|
"learning_rate": 9.83971639981484e-06,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29809799790382385,
|
|
"step": 3085,
|
|
"valid_targets_mean": 3347.2,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 4.92822966507177,
|
|
"grad_norm": 0.6603183712624823,
|
|
"learning_rate": 9.771290417931559e-06,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2726275324821472,
|
|
"step": 3090,
|
|
"valid_targets_mean": 3642.6,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 4.9362041467304625,
|
|
"grad_norm": 0.809303990918829,
|
|
"learning_rate": 9.703026194074342e-06,
|
|
"loss": 0.3167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2747498154640198,
|
|
"step": 3095,
|
|
"valid_targets_mean": 2573.3,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 4.944178628389155,
|
|
"grad_norm": 0.731696411055998,
|
|
"learning_rate": 9.634924807781729e-06,
|
|
"loss": 0.3193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3218362629413605,
|
|
"step": 3100,
|
|
"valid_targets_mean": 3320.7,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 4.952153110047847,
|
|
"grad_norm": 0.7203488177334941,
|
|
"learning_rate": 9.566987336017102e-06,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31702274084091187,
|
|
"step": 3105,
|
|
"valid_targets_mean": 3243.9,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 4.960127591706539,
|
|
"grad_norm": 0.7667974320868555,
|
|
"learning_rate": 9.499214853151699e-06,
|
|
"loss": 0.3485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3820187449455261,
|
|
"step": 3110,
|
|
"valid_targets_mean": 3377.4,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 4.968102073365231,
|
|
"grad_norm": 0.747437046921312,
|
|
"learning_rate": 9.431608430947619e-06,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3390301764011383,
|
|
"step": 3115,
|
|
"valid_targets_mean": 3451.5,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 4.976076555023924,
|
|
"grad_norm": 0.8836632702080381,
|
|
"learning_rate": 9.364169138540805e-06,
|
|
"loss": 0.3255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3598892092704773,
|
|
"step": 3120,
|
|
"valid_targets_mean": 2496.3,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 4.984051036682615,
|
|
"grad_norm": 0.8541254738304676,
|
|
"learning_rate": 9.296898042424237e-06,
|
|
"loss": 0.314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35240429639816284,
|
|
"step": 3125,
|
|
"valid_targets_mean": 2605.9,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 4.992025518341308,
|
|
"grad_norm": 0.8902039511073574,
|
|
"learning_rate": 9.229796206431015e-06,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31266897916793823,
|
|
"step": 3130,
|
|
"valid_targets_mean": 2928.4,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.6716371325925632,
|
|
"learning_rate": 9.162864691717513e-06,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27087920904159546,
|
|
"step": 3135,
|
|
"valid_targets_mean": 3227.2,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 5.007974481658692,
|
|
"grad_norm": 0.710714358010972,
|
|
"learning_rate": 9.096104556746654e-06,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2968267798423767,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3516.1,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 5.015948963317384,
|
|
"grad_norm": 0.7471434449801442,
|
|
"learning_rate": 9.029516857271115e-06,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2739573121070862,
|
|
"step": 3145,
|
|
"valid_targets_mean": 3110.4,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 5.023923444976076,
|
|
"grad_norm": 0.7761914152822722,
|
|
"learning_rate": 8.963102646316677e-06,
|
|
"loss": 0.3108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29111355543136597,
|
|
"step": 3150,
|
|
"valid_targets_mean": 3050.1,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 5.031897926634769,
|
|
"grad_norm": 0.8813953032846399,
|
|
"learning_rate": 8.896862974165553e-06,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3010531961917877,
|
|
"step": 3155,
|
|
"valid_targets_mean": 2616.4,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 5.039872408293461,
|
|
"grad_norm": 0.7306095373587296,
|
|
"learning_rate": 8.830798888339756e-06,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30917805433273315,
|
|
"step": 3160,
|
|
"valid_targets_mean": 3652.0,
|
|
"valid_targets_min": 1596
|
|
},
|
|
{
|
|
"epoch": 5.047846889952153,
|
|
"grad_norm": 0.7679864304188719,
|
|
"learning_rate": 8.764911433584581e-06,
|
|
"loss": 0.3108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28775179386138916,
|
|
"step": 3165,
|
|
"valid_targets_mean": 3484.0,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 5.055821371610845,
|
|
"grad_norm": 0.7312794585195165,
|
|
"learning_rate": 8.699201651852056e-06,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2922794818878174,
|
|
"step": 3170,
|
|
"valid_targets_mean": 3225.9,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 5.0637958532695375,
|
|
"grad_norm": 0.8204437547869823,
|
|
"learning_rate": 8.633670582284446e-06,
|
|
"loss": 0.2868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30454614758491516,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3331.8,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 5.07177033492823,
|
|
"grad_norm": 0.7900885923075235,
|
|
"learning_rate": 8.56831926119787e-06,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2870129346847534,
|
|
"step": 3180,
|
|
"valid_targets_mean": 3147.5,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 5.0797448165869215,
|
|
"grad_norm": 0.7484754280989424,
|
|
"learning_rate": 8.503148722065851e-06,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2726858854293823,
|
|
"step": 3185,
|
|
"valid_targets_mean": 3332.2,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 5.087719298245614,
|
|
"grad_norm": 0.7583645292590403,
|
|
"learning_rate": 8.43815999550303e-06,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29640626907348633,
|
|
"step": 3190,
|
|
"valid_targets_mean": 3712.4,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 5.095693779904306,
|
|
"grad_norm": 0.7986770482964909,
|
|
"learning_rate": 8.373354109248842e-06,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3314608335494995,
|
|
"step": 3195,
|
|
"valid_targets_mean": 3252.4,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 5.103668261562999,
|
|
"grad_norm": 0.7136003424302804,
|
|
"learning_rate": 8.308732088151245e-06,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2969920039176941,
|
|
"step": 3200,
|
|
"valid_targets_mean": 3675.1,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 5.11164274322169,
|
|
"grad_norm": 0.7607229475388195,
|
|
"learning_rate": 8.24429495415054e-06,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.284313440322876,
|
|
"step": 3205,
|
|
"valid_targets_mean": 3511.3,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 5.119617224880383,
|
|
"grad_norm": 0.7433169454906013,
|
|
"learning_rate": 8.180043726263216e-06,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27382850646972656,
|
|
"step": 3210,
|
|
"valid_targets_mean": 3673.4,
|
|
"valid_targets_min": 1544
|
|
},
|
|
{
|
|
"epoch": 5.127591706539075,
|
|
"grad_norm": 0.7666960695758672,
|
|
"learning_rate": 8.115979420565794e-06,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29559946060180664,
|
|
"step": 3215,
|
|
"valid_targets_mean": 3228.2,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 5.1355661881977674,
|
|
"grad_norm": 1.0675625887989877,
|
|
"learning_rate": 8.052103050178806e-06,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28551188111305237,
|
|
"step": 3220,
|
|
"valid_targets_mean": 3047.9,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 5.143540669856459,
|
|
"grad_norm": 0.7952321712947749,
|
|
"learning_rate": 7.988415625250755e-06,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30445098876953125,
|
|
"step": 3225,
|
|
"valid_targets_mean": 3130.4,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 5.151515151515151,
|
|
"grad_norm": 0.8302086374615822,
|
|
"learning_rate": 7.924918152942117e-06,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27466854453086853,
|
|
"step": 3230,
|
|
"valid_targets_mean": 3180.1,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 5.159489633173844,
|
|
"grad_norm": 0.824058512121437,
|
|
"learning_rate": 7.861611637409462e-06,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2754451036453247,
|
|
"step": 3235,
|
|
"valid_targets_mean": 2769.5,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 5.167464114832536,
|
|
"grad_norm": 0.8092771218603739,
|
|
"learning_rate": 7.798497079789513e-06,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3613300621509552,
|
|
"step": 3240,
|
|
"valid_targets_mean": 3296.7,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 5.175438596491228,
|
|
"grad_norm": 0.697044287152732,
|
|
"learning_rate": 7.735575478183381e-06,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25999772548675537,
|
|
"step": 3245,
|
|
"valid_targets_mean": 3779.8,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 5.18341307814992,
|
|
"grad_norm": 0.837389212331934,
|
|
"learning_rate": 7.672847827640735e-06,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26358726620674133,
|
|
"step": 3250,
|
|
"valid_targets_mean": 2823.2,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 5.1913875598086126,
|
|
"grad_norm": 0.8760147554368255,
|
|
"learning_rate": 7.610315120144067e-06,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29566675424575806,
|
|
"step": 3255,
|
|
"valid_targets_mean": 2634.8,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 5.199362041467305,
|
|
"grad_norm": 0.7709043474487708,
|
|
"learning_rate": 7.5479783445930414e-06,
|
|
"loss": 0.3256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27149730920791626,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3377.8,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 5.2073365231259965,
|
|
"grad_norm": 0.8112964564697657,
|
|
"learning_rate": 7.485838486788803e-06,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2754499316215515,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3050.4,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 5.215311004784689,
|
|
"grad_norm": 0.7290855330598013,
|
|
"learning_rate": 7.4238965294184374e-06,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29471445083618164,
|
|
"step": 3270,
|
|
"valid_targets_mean": 3946.7,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 5.223285486443381,
|
|
"grad_norm": 0.7498182458811173,
|
|
"learning_rate": 7.362153452039409e-06,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2554609179496765,
|
|
"step": 3275,
|
|
"valid_targets_mean": 3265.6,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 5.231259968102074,
|
|
"grad_norm": 0.7071995159264484,
|
|
"learning_rate": 7.300610231064056e-06,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.347484290599823,
|
|
"step": 3280,
|
|
"valid_targets_mean": 4258.6,
|
|
"valid_targets_min": 1294
|
|
},
|
|
{
|
|
"epoch": 5.239234449760765,
|
|
"grad_norm": 0.9148638643916521,
|
|
"learning_rate": 7.239267839744166e-06,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2665436565876007,
|
|
"step": 3285,
|
|
"valid_targets_mean": 2360.8,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 5.247208931419458,
|
|
"grad_norm": 0.7488373849426715,
|
|
"learning_rate": 7.178127248155604e-06,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28261643648147583,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3355.0,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 5.25518341307815,
|
|
"grad_norm": 0.7620638055914893,
|
|
"learning_rate": 7.117189423182917e-06,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27844250202178955,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3004.2,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 5.2631578947368425,
|
|
"grad_norm": 0.8412098691644836,
|
|
"learning_rate": 7.056455328504104e-06,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3144789934158325,
|
|
"step": 3300,
|
|
"valid_targets_mean": 3071.9,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 5.271132376395534,
|
|
"grad_norm": 0.7756265647960496,
|
|
"learning_rate": 6.995925924575342e-06,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29475539922714233,
|
|
"step": 3305,
|
|
"valid_targets_mean": 3563.2,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 5.279106858054226,
|
|
"grad_norm": 0.7349133254115108,
|
|
"learning_rate": 6.935602168615792e-06,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25601115822792053,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3368.3,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 5.287081339712919,
|
|
"grad_norm": 0.7538592280865347,
|
|
"learning_rate": 6.875485014592493e-06,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.305560439825058,
|
|
"step": 3315,
|
|
"valid_targets_mean": 3466.9,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 5.295055821371611,
|
|
"grad_norm": 0.837003744003305,
|
|
"learning_rate": 6.815575413205235e-06,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.314494788646698,
|
|
"step": 3320,
|
|
"valid_targets_mean": 2824.2,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 5.303030303030303,
|
|
"grad_norm": 0.8284729276693038,
|
|
"learning_rate": 6.755874311871562e-06,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2890750765800476,
|
|
"step": 3325,
|
|
"valid_targets_mean": 2576.9,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 5.311004784688995,
|
|
"grad_norm": 0.6873560917457716,
|
|
"learning_rate": 6.696382654711777e-06,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2673877775669098,
|
|
"step": 3330,
|
|
"valid_targets_mean": 3786.9,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 5.318979266347688,
|
|
"grad_norm": 0.9259745362358232,
|
|
"learning_rate": 6.637101382533986e-06,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29916876554489136,
|
|
"step": 3335,
|
|
"valid_targets_mean": 2488.7,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 5.32695374800638,
|
|
"grad_norm": 0.6846952582421462,
|
|
"learning_rate": 6.578031432819263e-06,
|
|
"loss": 0.314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3273669183254242,
|
|
"step": 3340,
|
|
"valid_targets_mean": 3781.2,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 5.3349282296650715,
|
|
"grad_norm": 0.7474281457428978,
|
|
"learning_rate": 6.5191737397068015e-06,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3178284168243408,
|
|
"step": 3345,
|
|
"valid_targets_mean": 3507.8,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 5.342902711323764,
|
|
"grad_norm": 0.7916681044465806,
|
|
"learning_rate": 6.460529233979127e-06,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29731759428977966,
|
|
"step": 3350,
|
|
"valid_targets_mean": 3076.4,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 5.350877192982456,
|
|
"grad_norm": 0.7039358638749136,
|
|
"learning_rate": 6.402098843047417e-06,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2724928855895996,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3446.7,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 5.358851674641148,
|
|
"grad_norm": 0.8272084279345132,
|
|
"learning_rate": 6.343883490936791e-06,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29124242067337036,
|
|
"step": 3360,
|
|
"valid_targets_mean": 2747.8,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 5.36682615629984,
|
|
"grad_norm": 0.671196359563464,
|
|
"learning_rate": 6.285884098271739e-06,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24441444873809814,
|
|
"step": 3365,
|
|
"valid_targets_mean": 3710.4,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 5.374800637958533,
|
|
"grad_norm": 0.7688609079335349,
|
|
"learning_rate": 6.228101582261532e-06,
|
|
"loss": 0.3118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28145867586135864,
|
|
"step": 3370,
|
|
"valid_targets_mean": 3017.2,
|
|
"valid_targets_min": 1424
|
|
},
|
|
{
|
|
"epoch": 5.382775119617225,
|
|
"grad_norm": 0.8242665981720759,
|
|
"learning_rate": 6.170536856685716e-06,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28621822595596313,
|
|
"step": 3375,
|
|
"valid_targets_mean": 2631.4,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 5.3907496012759175,
|
|
"grad_norm": 0.7740512205546519,
|
|
"learning_rate": 6.113190831879698e-06,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28994134068489075,
|
|
"step": 3380,
|
|
"valid_targets_mean": 3237.6,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 5.398724082934609,
|
|
"grad_norm": 0.8131116018037541,
|
|
"learning_rate": 6.056064414720317e-06,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29924651980400085,
|
|
"step": 3385,
|
|
"valid_targets_mean": 3130.8,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 5.4066985645933014,
|
|
"grad_norm": 0.7866601845943676,
|
|
"learning_rate": 5.999158508611496e-06,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25244852900505066,
|
|
"step": 3390,
|
|
"valid_targets_mean": 2728.0,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 5.414673046251994,
|
|
"grad_norm": 0.6788666232762772,
|
|
"learning_rate": 5.942474013469983e-06,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2859055995941162,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3834.1,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 5.422647527910685,
|
|
"grad_norm": 0.8519626651447585,
|
|
"learning_rate": 5.886011825711117e-06,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2878015637397766,
|
|
"step": 3400,
|
|
"valid_targets_mean": 2866.2,
|
|
"valid_targets_min": 1693
|
|
},
|
|
{
|
|
"epoch": 5.430622009569378,
|
|
"grad_norm": 0.8330351227125117,
|
|
"learning_rate": 5.829772838234615e-06,
|
|
"loss": 0.3131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.336703896522522,
|
|
"step": 3405,
|
|
"valid_targets_mean": 3246.6,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 5.43859649122807,
|
|
"grad_norm": 0.7736881113319958,
|
|
"learning_rate": 5.773757940410503e-06,
|
|
"loss": 0.2973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3133225440979004,
|
|
"step": 3410,
|
|
"valid_targets_mean": 3233.7,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 5.446570972886763,
|
|
"grad_norm": 0.7327836514414126,
|
|
"learning_rate": 5.7179680180650055e-06,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2733174264431,
|
|
"step": 3415,
|
|
"valid_targets_mean": 3465.8,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 5.454545454545454,
|
|
"grad_norm": 0.6537813664226828,
|
|
"learning_rate": 5.6624039534665775e-06,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34024161100387573,
|
|
"step": 3420,
|
|
"valid_targets_mean": 4219.4,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 5.4625199362041466,
|
|
"grad_norm": 0.8326737321984348,
|
|
"learning_rate": 5.607066625311925e-06,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2964819073677063,
|
|
"step": 3425,
|
|
"valid_targets_mean": 3127.6,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 5.470494417862839,
|
|
"grad_norm": 0.9160991918175211,
|
|
"learning_rate": 5.55195690871211e-06,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27323150634765625,
|
|
"step": 3430,
|
|
"valid_targets_mean": 2081.2,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 5.478468899521531,
|
|
"grad_norm": 1.2903598210927032,
|
|
"learning_rate": 5.497075675178727e-06,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30534014105796814,
|
|
"step": 3435,
|
|
"valid_targets_mean": 3697.5,
|
|
"valid_targets_min": 1721
|
|
},
|
|
{
|
|
"epoch": 5.486443381180223,
|
|
"grad_norm": 0.840947118660976,
|
|
"learning_rate": 5.442423792610118e-06,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2796475887298584,
|
|
"step": 3440,
|
|
"valid_targets_mean": 2588.4,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 5.494417862838915,
|
|
"grad_norm": 0.7735441801901292,
|
|
"learning_rate": 5.388002125277627e-06,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2658176124095917,
|
|
"step": 3445,
|
|
"valid_targets_mean": 2953.5,
|
|
"valid_targets_min": 1581
|
|
},
|
|
{
|
|
"epoch": 5.502392344497608,
|
|
"grad_norm": 0.7487224135681535,
|
|
"learning_rate": 5.333811533811945e-06,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29886364936828613,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3503.8,
|
|
"valid_targets_min": 1246
|
|
},
|
|
{
|
|
"epoch": 5.5103668261563,
|
|
"grad_norm": 0.7840199970728534,
|
|
"learning_rate": 5.2798528751895265e-06,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3059585690498352,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3144.6,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 5.518341307814992,
|
|
"grad_norm": 0.6706717817951494,
|
|
"learning_rate": 5.226127002718984e-06,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3075452744960785,
|
|
"step": 3460,
|
|
"valid_targets_mean": 4317.6,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 5.526315789473684,
|
|
"grad_norm": 0.7721944974335879,
|
|
"learning_rate": 5.1726347660276424e-06,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29788467288017273,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3175.2,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 5.5342902711323765,
|
|
"grad_norm": 0.797522768029407,
|
|
"learning_rate": 5.119377011048066e-06,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2965890169143677,
|
|
"step": 3470,
|
|
"valid_targets_mean": 2753.1,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 5.542264752791069,
|
|
"grad_norm": 0.7233645755401543,
|
|
"learning_rate": 5.066354580004713e-06,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2789902091026306,
|
|
"step": 3475,
|
|
"valid_targets_mean": 3653.4,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 5.55023923444976,
|
|
"grad_norm": 0.7456991094402688,
|
|
"learning_rate": 5.013568311400599e-06,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24442625045776367,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3075.9,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 5.558213716108453,
|
|
"grad_norm": 0.7198853970658616,
|
|
"learning_rate": 4.96101904000402e-06,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3117629289627075,
|
|
"step": 3485,
|
|
"valid_targets_mean": 3802.4,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 5.566188197767145,
|
|
"grad_norm": 0.6787102990984744,
|
|
"learning_rate": 4.908707596835396e-06,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29401254653930664,
|
|
"step": 3490,
|
|
"valid_targets_mean": 4449.2,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 5.574162679425838,
|
|
"grad_norm": 0.7817511151693457,
|
|
"learning_rate": 4.856634809154093e-06,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28079456090927124,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3252.1,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 5.582137161084529,
|
|
"grad_norm": 0.7062136240467745,
|
|
"learning_rate": 4.804801500445338e-06,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30768612027168274,
|
|
"step": 3500,
|
|
"valid_targets_mean": 4188.6,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 5.590111642743222,
|
|
"grad_norm": 0.7104528564852672,
|
|
"learning_rate": 4.753208490407233e-06,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25615689158439636,
|
|
"step": 3505,
|
|
"valid_targets_mean": 3572.3,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 5.598086124401914,
|
|
"grad_norm": 0.7531974441600975,
|
|
"learning_rate": 4.701856594937744e-06,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587321102619171,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3055.4,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 5.606060606060606,
|
|
"grad_norm": 0.7219059789703632,
|
|
"learning_rate": 4.650746626121838e-06,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25097543001174927,
|
|
"step": 3515,
|
|
"valid_targets_mean": 3382.1,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 5.614035087719298,
|
|
"grad_norm": 0.7369615680768942,
|
|
"learning_rate": 4.5998793922186315e-06,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2753133177757263,
|
|
"step": 3520,
|
|
"valid_targets_mean": 3676.9,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 5.62200956937799,
|
|
"grad_norm": 0.8557090862234061,
|
|
"learning_rate": 4.549255697648576e-06,
|
|
"loss": 0.3108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2856610417366028,
|
|
"step": 3525,
|
|
"valid_targets_mean": 2679.6,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 5.629984051036683,
|
|
"grad_norm": 0.7251856778025971,
|
|
"learning_rate": 4.498876342980796e-06,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24028414487838745,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3044.9,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 5.637958532695375,
|
|
"grad_norm": 0.7864840870150974,
|
|
"learning_rate": 4.448742124920368e-06,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2869475185871124,
|
|
"step": 3535,
|
|
"valid_targets_mean": 3141.7,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 5.645933014354067,
|
|
"grad_norm": 0.7110241641503189,
|
|
"learning_rate": 4.39885383629576e-06,
|
|
"loss": 0.3264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3586641252040863,
|
|
"step": 3540,
|
|
"valid_targets_mean": 3827.6,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 5.653907496012759,
|
|
"grad_norm": 0.7348179291937081,
|
|
"learning_rate": 4.349212266046285e-06,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2735123038291931,
|
|
"step": 3545,
|
|
"valid_targets_mean": 3672.6,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 5.6618819776714515,
|
|
"grad_norm": 0.8495984635098721,
|
|
"learning_rate": 4.299818199209629e-06,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2979012131690979,
|
|
"step": 3550,
|
|
"valid_targets_mean": 2957.8,
|
|
"valid_targets_min": 1723
|
|
},
|
|
{
|
|
"epoch": 5.669856459330144,
|
|
"grad_norm": 0.7679653379811294,
|
|
"learning_rate": 4.250672416909407e-06,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3230304718017578,
|
|
"step": 3555,
|
|
"valid_targets_mean": 3302.6,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 5.6778309409888355,
|
|
"grad_norm": 0.6283130703480063,
|
|
"learning_rate": 4.201775696342862e-06,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2580801844596863,
|
|
"step": 3560,
|
|
"valid_targets_mean": 4984.8,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 5.685805422647528,
|
|
"grad_norm": 0.7526283613691929,
|
|
"learning_rate": 4.153128810768517e-06,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2530617415904999,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3903.7,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 5.69377990430622,
|
|
"grad_norm": 0.7895154771381323,
|
|
"learning_rate": 4.104732529493991e-06,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3062116503715515,
|
|
"step": 3570,
|
|
"valid_targets_mean": 3920.7,
|
|
"valid_targets_min": 1646
|
|
},
|
|
{
|
|
"epoch": 5.701754385964913,
|
|
"grad_norm": 0.7934202072183948,
|
|
"learning_rate": 4.056587617863825e-06,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2501561641693115,
|
|
"step": 3575,
|
|
"valid_targets_mean": 2928.9,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 5.709728867623604,
|
|
"grad_norm": 0.8166759222086794,
|
|
"learning_rate": 4.008694837247345e-06,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3217408061027527,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3681.3,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 5.717703349282297,
|
|
"grad_norm": 0.8292119649057055,
|
|
"learning_rate": 3.961054945026674e-06,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2800944447517395,
|
|
"step": 3585,
|
|
"valid_targets_mean": 3050.6,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 5.725677830940989,
|
|
"grad_norm": 0.7657800096671613,
|
|
"learning_rate": 3.913668694584705e-06,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2820630669593811,
|
|
"step": 3590,
|
|
"valid_targets_mean": 3334.1,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 5.733652312599681,
|
|
"grad_norm": 0.7630886815787318,
|
|
"learning_rate": 3.866536835293227e-06,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34516584873199463,
|
|
"step": 3595,
|
|
"valid_targets_mean": 3878.1,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 5.741626794258373,
|
|
"grad_norm": 0.8649689502986208,
|
|
"learning_rate": 3.819660112501053e-06,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34323447942733765,
|
|
"step": 3600,
|
|
"valid_targets_mean": 2760.4,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 5.749601275917065,
|
|
"grad_norm": 0.7619895352765311,
|
|
"learning_rate": 3.773039267522227e-06,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2712087035179138,
|
|
"step": 3605,
|
|
"valid_targets_mean": 3101.5,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 5.757575757575758,
|
|
"grad_norm": 0.8445010622725413,
|
|
"learning_rate": 3.72667503762433e-06,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27487391233444214,
|
|
"step": 3610,
|
|
"valid_targets_mean": 3087.4,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 5.76555023923445,
|
|
"grad_norm": 0.699688970476364,
|
|
"learning_rate": 3.680568156016786e-06,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.272575318813324,
|
|
"step": 3615,
|
|
"valid_targets_mean": 4070.9,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 5.773524720893142,
|
|
"grad_norm": 0.7794711223279197,
|
|
"learning_rate": 3.6347193518392776e-06,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28461533784866333,
|
|
"step": 3620,
|
|
"valid_targets_mean": 3133.4,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 5.781499202551834,
|
|
"grad_norm": 0.6929691440803091,
|
|
"learning_rate": 3.58912935015024e-06,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24106857180595398,
|
|
"step": 3625,
|
|
"valid_targets_mean": 3540.1,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 5.7894736842105265,
|
|
"grad_norm": 0.8001798868362564,
|
|
"learning_rate": 3.543798871915367e-06,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3152342140674591,
|
|
"step": 3630,
|
|
"valid_targets_mean": 3537.1,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 5.797448165869218,
|
|
"grad_norm": 0.7751974549806043,
|
|
"learning_rate": 3.498728633996209e-06,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30529099702835083,
|
|
"step": 3635,
|
|
"valid_targets_mean": 3213.6,
|
|
"valid_targets_min": 1546
|
|
},
|
|
{
|
|
"epoch": 5.8054226475279105,
|
|
"grad_norm": 0.7392426930112831,
|
|
"learning_rate": 3.453919349138859e-06,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30453023314476013,
|
|
"step": 3640,
|
|
"valid_targets_mean": 3634.0,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 5.813397129186603,
|
|
"grad_norm": 0.7831809516470786,
|
|
"learning_rate": 3.4093717259626514e-06,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28058892488479614,
|
|
"step": 3645,
|
|
"valid_targets_mean": 3135.0,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 5.821371610845295,
|
|
"grad_norm": 0.8149020436059916,
|
|
"learning_rate": 3.365086468948988e-06,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31160250306129456,
|
|
"step": 3650,
|
|
"valid_targets_mean": 3394.0,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 5.829346092503988,
|
|
"grad_norm": 0.9060044585538425,
|
|
"learning_rate": 3.321064278430175e-06,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.320772260427475,
|
|
"step": 3655,
|
|
"valid_targets_mean": 2646.4,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 5.837320574162679,
|
|
"grad_norm": 0.7242362392758929,
|
|
"learning_rate": 3.277305850578345e-06,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2689828872680664,
|
|
"step": 3660,
|
|
"valid_targets_mean": 3484.1,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 5.845295055821372,
|
|
"grad_norm": 0.7771330054394557,
|
|
"learning_rate": 3.2338118773944684e-06,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2721349000930786,
|
|
"step": 3665,
|
|
"valid_targets_mean": 2851.6,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 5.853269537480064,
|
|
"grad_norm": 0.694415448405407,
|
|
"learning_rate": 3.1905830466973975e-06,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2633028030395508,
|
|
"step": 3670,
|
|
"valid_targets_mean": 3929.9,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 5.861244019138756,
|
|
"grad_norm": 0.7910294534726202,
|
|
"learning_rate": 3.14762004211298e-06,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31551462411880493,
|
|
"step": 3675,
|
|
"valid_targets_mean": 3557.8,
|
|
"valid_targets_min": 2057
|
|
},
|
|
{
|
|
"epoch": 5.869218500797448,
|
|
"grad_norm": 0.7634984881053162,
|
|
"learning_rate": 3.1049235430632696e-06,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30648574233055115,
|
|
"step": 3680,
|
|
"valid_targets_mean": 3140.0,
|
|
"valid_targets_min": 1081
|
|
},
|
|
{
|
|
"epoch": 5.87719298245614,
|
|
"grad_norm": 0.8791701716670745,
|
|
"learning_rate": 3.062494224755759e-06,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3178468942642212,
|
|
"step": 3685,
|
|
"valid_targets_mean": 2702.6,
|
|
"valid_targets_min": 1249
|
|
},
|
|
{
|
|
"epoch": 5.885167464114833,
|
|
"grad_norm": 0.8104924406322477,
|
|
"learning_rate": 3.0203327581727195e-06,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2999758720397949,
|
|
"step": 3690,
|
|
"valid_targets_mean": 3018.0,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 5.893141945773524,
|
|
"grad_norm": 0.7416267578110668,
|
|
"learning_rate": 2.9784398100605937e-06,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717277407646179,
|
|
"step": 3695,
|
|
"valid_targets_mean": 3577.7,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 5.901116427432217,
|
|
"grad_norm": 0.7841489737429255,
|
|
"learning_rate": 2.9368160429194127e-06,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27620965242385864,
|
|
"step": 3700,
|
|
"valid_targets_mean": 3221.7,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 5.909090909090909,
|
|
"grad_norm": 0.7143024454926995,
|
|
"learning_rate": 2.895462114992371e-06,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2524372637271881,
|
|
"step": 3705,
|
|
"valid_targets_mean": 3235.1,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 5.917065390749602,
|
|
"grad_norm": 0.7034631827985708,
|
|
"learning_rate": 2.8543786802553943e-06,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2638762593269348,
|
|
"step": 3710,
|
|
"valid_targets_mean": 3184.6,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 5.925039872408293,
|
|
"grad_norm": 0.7035768608970581,
|
|
"learning_rate": 2.813566388406781e-06,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2590932846069336,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3645.1,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 5.9330143540669855,
|
|
"grad_norm": 0.8605725969248682,
|
|
"learning_rate": 2.773025884856957e-06,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2713908851146698,
|
|
"step": 3720,
|
|
"valid_targets_mean": 2531.6,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 5.940988835725678,
|
|
"grad_norm": 0.7499282180292243,
|
|
"learning_rate": 2.7327578107182585e-06,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29156097769737244,
|
|
"step": 3725,
|
|
"valid_targets_mean": 3615.2,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 5.94896331738437,
|
|
"grad_norm": 0.7200832783103432,
|
|
"learning_rate": 2.692762802794775e-06,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2945834994316101,
|
|
"step": 3730,
|
|
"valid_targets_mean": 3942.2,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 5.956937799043062,
|
|
"grad_norm": 0.6892179330043087,
|
|
"learning_rate": 2.6530414935723104e-06,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30010709166526794,
|
|
"step": 3735,
|
|
"valid_targets_mean": 4309.0,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 5.964912280701754,
|
|
"grad_norm": 0.8211555748772531,
|
|
"learning_rate": 2.6135945112083506e-06,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3027190566062927,
|
|
"step": 3740,
|
|
"valid_targets_mean": 2867.8,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 5.972886762360447,
|
|
"grad_norm": 0.8921002206117652,
|
|
"learning_rate": 2.574422479522156e-06,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29479217529296875,
|
|
"step": 3745,
|
|
"valid_targets_mean": 2485.8,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 5.980861244019139,
|
|
"grad_norm": 0.7851163716809045,
|
|
"learning_rate": 2.535526017984884e-06,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3180612027645111,
|
|
"step": 3750,
|
|
"valid_targets_mean": 3481.7,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 5.988835725677831,
|
|
"grad_norm": 0.7873549583062809,
|
|
"learning_rate": 2.4969057417097807e-06,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27344661951065063,
|
|
"step": 3755,
|
|
"valid_targets_mean": 3306.9,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 5.996810207336523,
|
|
"grad_norm": 0.8252224664461691,
|
|
"learning_rate": 2.458562261442483e-06,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32259202003479004,
|
|
"step": 3760,
|
|
"valid_targets_mean": 2873.8,
|
|
"valid_targets_min": 1386
|
|
},
|
|
{
|
|
"epoch": 6.0047846889952154,
|
|
"grad_norm": 0.7974715268930842,
|
|
"learning_rate": 2.4204961835513263e-06,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31450459361076355,
|
|
"step": 3765,
|
|
"valid_targets_mean": 3169.5,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 6.012759170653908,
|
|
"grad_norm": 0.7430227006086101,
|
|
"learning_rate": 2.3827081100177797e-06,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2682039439678192,
|
|
"step": 3770,
|
|
"valid_targets_mean": 3057.6,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 6.020733652312599,
|
|
"grad_norm": 0.8192137838147895,
|
|
"learning_rate": 2.3451986384269266e-06,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2474377155303955,
|
|
"step": 3775,
|
|
"valid_targets_mean": 2550.8,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 6.028708133971292,
|
|
"grad_norm": 0.7930597022762513,
|
|
"learning_rate": 2.307968361957993e-06,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2826935350894928,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3171.9,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 6.036682615629984,
|
|
"grad_norm": 0.7969501011926802,
|
|
"learning_rate": 2.2710178693749805e-06,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29617172479629517,
|
|
"step": 3785,
|
|
"valid_targets_mean": 3261.6,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 6.044657097288677,
|
|
"grad_norm": 0.6807343010501113,
|
|
"learning_rate": 2.2343477450173665e-06,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3070717453956604,
|
|
"step": 3790,
|
|
"valid_targets_mean": 4419.9,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 6.052631578947368,
|
|
"grad_norm": 0.6647856883220293,
|
|
"learning_rate": 2.197958568790839e-06,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2669077515602112,
|
|
"step": 3795,
|
|
"valid_targets_mean": 4130.1,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 6.0606060606060606,
|
|
"grad_norm": 1.0021170006130768,
|
|
"learning_rate": 2.161850916158148e-06,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32475006580352783,
|
|
"step": 3800,
|
|
"valid_targets_mean": 2529.1,
|
|
"valid_targets_min": 1081
|
|
},
|
|
{
|
|
"epoch": 6.068580542264753,
|
|
"grad_norm": 0.7364824009867521,
|
|
"learning_rate": 2.1260253581299996e-06,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2675652503967285,
|
|
"step": 3805,
|
|
"valid_targets_mean": 3359.9,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 6.076555023923445,
|
|
"grad_norm": 0.9180628419689219,
|
|
"learning_rate": 2.0904824612560046e-06,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28436872363090515,
|
|
"step": 3810,
|
|
"valid_targets_mean": 2637.1,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 6.084529505582137,
|
|
"grad_norm": 0.7936732400598879,
|
|
"learning_rate": 2.0552227876157536e-06,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2766130864620209,
|
|
"step": 3815,
|
|
"valid_targets_mean": 3107.8,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 6.092503987240829,
|
|
"grad_norm": 0.7272076152013754,
|
|
"learning_rate": 2.020246894809912e-06,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36514467000961304,
|
|
"step": 3820,
|
|
"valid_targets_mean": 3962.5,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 6.100478468899522,
|
|
"grad_norm": 0.8156621071496624,
|
|
"learning_rate": 1.9855553359513836e-06,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26616501808166504,
|
|
"step": 3825,
|
|
"valid_targets_mean": 2777.8,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 6.108452950558214,
|
|
"grad_norm": 0.7062812539751833,
|
|
"learning_rate": 1.9511486596566054e-06,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2987385094165802,
|
|
"step": 3830,
|
|
"valid_targets_mean": 3728.7,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 6.116427432216906,
|
|
"grad_norm": 0.7529011735577514,
|
|
"learning_rate": 1.917027410036825e-06,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2866564393043518,
|
|
"step": 3835,
|
|
"valid_targets_mean": 3176.4,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 6.124401913875598,
|
|
"grad_norm": 0.7759698008544635,
|
|
"learning_rate": 1.8831921266895348e-06,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2970890998840332,
|
|
"step": 3840,
|
|
"valid_targets_mean": 4038.4,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 6.1323763955342905,
|
|
"grad_norm": 0.8074794899189528,
|
|
"learning_rate": 1.8496433446899197e-06,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24439358711242676,
|
|
"step": 3845,
|
|
"valid_targets_mean": 2845.8,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 6.140350877192983,
|
|
"grad_norm": 0.6898043592327477,
|
|
"learning_rate": 1.8163815945823881e-06,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2419794499874115,
|
|
"step": 3850,
|
|
"valid_targets_mean": 3956.2,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 6.148325358851674,
|
|
"grad_norm": 0.9031012877118172,
|
|
"learning_rate": 1.7834074023722082e-06,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26671546697616577,
|
|
"step": 3855,
|
|
"valid_targets_mean": 2401.6,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 6.156299840510367,
|
|
"grad_norm": 0.8011034119625807,
|
|
"learning_rate": 1.7507212895171632e-06,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3011058568954468,
|
|
"step": 3860,
|
|
"valid_targets_mean": 3336.1,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 6.164274322169059,
|
|
"grad_norm": 0.7467429605008289,
|
|
"learning_rate": 1.7183237729193081e-06,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2877018451690674,
|
|
"step": 3865,
|
|
"valid_targets_mean": 3870.4,
|
|
"valid_targets_min": 1716
|
|
},
|
|
{
|
|
"epoch": 6.172248803827751,
|
|
"grad_norm": 0.783716759095167,
|
|
"learning_rate": 1.6862153649168211e-06,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2814072370529175,
|
|
"step": 3870,
|
|
"valid_targets_mean": 3224.5,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 6.180223285486443,
|
|
"grad_norm": 0.7399110728245858,
|
|
"learning_rate": 1.6543965732758737e-06,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2924342155456543,
|
|
"step": 3875,
|
|
"valid_targets_mean": 3872.3,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 6.188197767145136,
|
|
"grad_norm": 0.7273846391286767,
|
|
"learning_rate": 1.6228679011826032e-06,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2778133153915405,
|
|
"step": 3880,
|
|
"valid_targets_mean": 4085.9,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 6.196172248803828,
|
|
"grad_norm": 0.8959125976220985,
|
|
"learning_rate": 1.591629847235172e-06,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28340858221054077,
|
|
"step": 3885,
|
|
"valid_targets_mean": 2557.8,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 6.2041467304625195,
|
|
"grad_norm": 0.8088800365262653,
|
|
"learning_rate": 1.5606829054358686e-06,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29278314113616943,
|
|
"step": 3890,
|
|
"valid_targets_mean": 2953.4,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 6.212121212121212,
|
|
"grad_norm": 0.8130181864911936,
|
|
"learning_rate": 1.5300275651832963e-06,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27339938282966614,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3460.8,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 6.220095693779904,
|
|
"grad_norm": 0.8268069862607291,
|
|
"learning_rate": 1.499664311264648e-06,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27889516949653625,
|
|
"step": 3900,
|
|
"valid_targets_mean": 2982.8,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 6.228070175438597,
|
|
"grad_norm": 0.7158470446181221,
|
|
"learning_rate": 1.4695936238480135e-06,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.239360973238945,
|
|
"step": 3905,
|
|
"valid_targets_mean": 3583.8,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 6.236044657097288,
|
|
"grad_norm": 0.8204092497568253,
|
|
"learning_rate": 1.4398159784748144e-06,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2355031967163086,
|
|
"step": 3910,
|
|
"valid_targets_mean": 2498.1,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 6.244019138755981,
|
|
"grad_norm": 0.7434970325539655,
|
|
"learning_rate": 1.4103318460522598e-06,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24456895887851715,
|
|
"step": 3915,
|
|
"valid_targets_mean": 3054.6,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 6.251993620414673,
|
|
"grad_norm": 0.7382990572168812,
|
|
"learning_rate": 1.3811416928459177e-06,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2701323330402374,
|
|
"step": 3920,
|
|
"valid_targets_mean": 3795.4,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 6.2599681020733655,
|
|
"grad_norm": 0.8395387379831285,
|
|
"learning_rate": 1.3522459804723353e-06,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2629268169403076,
|
|
"step": 3925,
|
|
"valid_targets_mean": 2798.2,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 6.267942583732057,
|
|
"grad_norm": 0.7977068963204613,
|
|
"learning_rate": 1.3236451658917293e-06,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29388511180877686,
|
|
"step": 3930,
|
|
"valid_targets_mean": 3395.2,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 6.2759170653907494,
|
|
"grad_norm": 0.8613157870285739,
|
|
"learning_rate": 1.2953397014007728e-06,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2841698229312897,
|
|
"step": 3935,
|
|
"valid_targets_mean": 2708.6,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 6.283891547049442,
|
|
"grad_norm": 0.7596441613284619,
|
|
"learning_rate": 1.2673300346254447e-06,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2941230535507202,
|
|
"step": 3940,
|
|
"valid_targets_mean": 3576.6,
|
|
"valid_targets_min": 1766
|
|
},
|
|
{
|
|
"epoch": 6.291866028708134,
|
|
"grad_norm": 0.8219071424379902,
|
|
"learning_rate": 1.239616608513925e-06,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2530288100242615,
|
|
"step": 3945,
|
|
"valid_targets_mean": 3618.8,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 6.299840510366826,
|
|
"grad_norm": 0.8169388577488748,
|
|
"learning_rate": 1.2121998613296259e-06,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27685093879699707,
|
|
"step": 3950,
|
|
"valid_targets_mean": 2957.6,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 6.307814992025518,
|
|
"grad_norm": 0.819846440088174,
|
|
"learning_rate": 1.1850802266442396e-06,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2383117973804474,
|
|
"step": 3955,
|
|
"valid_targets_mean": 2856.3,
|
|
"valid_targets_min": 879
|
|
},
|
|
{
|
|
"epoch": 6.315789473684211,
|
|
"grad_norm": 0.8156744936722151,
|
|
"learning_rate": 1.1582581333308784e-06,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27129244804382324,
|
|
"step": 3960,
|
|
"valid_targets_mean": 3501.8,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 6.323763955342903,
|
|
"grad_norm": 0.7486283247759367,
|
|
"learning_rate": 1.1317340055573122e-06,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2514297366142273,
|
|
"step": 3965,
|
|
"valid_targets_mean": 3596.1,
|
|
"valid_targets_min": 2039
|
|
},
|
|
{
|
|
"epoch": 6.3317384370015946,
|
|
"grad_norm": 1.0539919375433147,
|
|
"learning_rate": 1.1055082627792357e-06,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3055712580680847,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3362.6,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 6.339712918660287,
|
|
"grad_norm": 0.8199147609602492,
|
|
"learning_rate": 1.0795813197336602e-06,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.241234689950943,
|
|
"step": 3975,
|
|
"valid_targets_mean": 2670.2,
|
|
"valid_targets_min": 1442
|
|
},
|
|
{
|
|
"epoch": 6.347687400318979,
|
|
"grad_norm": 0.7670654615425087,
|
|
"learning_rate": 1.0539535864323391e-06,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33173882961273193,
|
|
"step": 3980,
|
|
"valid_targets_mean": 3764.7,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 6.355661881977672,
|
|
"grad_norm": 0.7337344351711541,
|
|
"learning_rate": 1.0286254681552777e-06,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3128550052642822,
|
|
"step": 3985,
|
|
"valid_targets_mean": 3599.2,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 6.363636363636363,
|
|
"grad_norm": 0.7497608589961262,
|
|
"learning_rate": 1.0035973654443466e-06,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.297832727432251,
|
|
"step": 3990,
|
|
"valid_targets_mean": 3702.3,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 6.371610845295056,
|
|
"grad_norm": 0.898324812220322,
|
|
"learning_rate": 9.788696740969295e-07,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2749426066875458,
|
|
"step": 3995,
|
|
"valid_targets_mean": 2732.2,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 6.379585326953748,
|
|
"grad_norm": 0.809451988161517,
|
|
"learning_rate": 9.544427851596661e-07,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31416672468185425,
|
|
"step": 4000,
|
|
"valid_targets_mean": 3079.9,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 6.3875598086124405,
|
|
"grad_norm": 0.8766199834991626,
|
|
"learning_rate": 9.303170849222764e-07,
|
|
"loss": 0.2848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2843509316444397,
|
|
"step": 4005,
|
|
"valid_targets_mean": 2680.5,
|
|
"valid_targets_min": 1190
|
|
},
|
|
{
|
|
"epoch": 6.395534290271132,
|
|
"grad_norm": 0.9355781681512354,
|
|
"learning_rate": 9.064929549114421e-07,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31102293729782104,
|
|
"step": 4010,
|
|
"valid_targets_mean": 2565.0,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 6.4035087719298245,
|
|
"grad_norm": 0.7649640822207412,
|
|
"learning_rate": 8.829707718847835e-07,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2721022367477417,
|
|
"step": 4015,
|
|
"valid_targets_mean": 3392.1,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 6.411483253588517,
|
|
"grad_norm": 1.0222195411108324,
|
|
"learning_rate": 8.597509078248923e-07,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2733663022518158,
|
|
"step": 4020,
|
|
"valid_targets_mean": 2933.9,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 6.419457735247209,
|
|
"grad_norm": 0.8000088389097196,
|
|
"learning_rate": 8.368337299334461e-07,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2713809311389923,
|
|
"step": 4025,
|
|
"valid_targets_mean": 3406.8,
|
|
"valid_targets_min": 1522
|
|
},
|
|
{
|
|
"epoch": 6.427432216905901,
|
|
"grad_norm": 0.8024039200092558,
|
|
"learning_rate": 8.142196006254144e-07,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29235154390335083,
|
|
"step": 4030,
|
|
"valid_targets_mean": 2990.3,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 6.435406698564593,
|
|
"grad_norm": 0.7124946721040755,
|
|
"learning_rate": 7.919088775233264e-07,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31047379970550537,
|
|
"step": 4035,
|
|
"valid_targets_mean": 3673.7,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 6.443381180223286,
|
|
"grad_norm": 0.7144386618639592,
|
|
"learning_rate": 7.699019134515917e-07,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26780247688293457,
|
|
"step": 4040,
|
|
"valid_targets_mean": 3647.6,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 6.451355661881978,
|
|
"grad_norm": 0.8643030305178355,
|
|
"learning_rate": 7.48199056430956e-07,
|
|
"loss": 0.2974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3704472482204437,
|
|
"step": 4045,
|
|
"valid_targets_mean": 3557.1,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 6.45933014354067,
|
|
"grad_norm": 0.7420714929369399,
|
|
"learning_rate": 7.268006496729762e-07,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27698734402656555,
|
|
"step": 4050,
|
|
"valid_targets_mean": 3721.9,
|
|
"valid_targets_min": 1065
|
|
},
|
|
{
|
|
"epoch": 6.467304625199362,
|
|
"grad_norm": 0.8807597694314151,
|
|
"learning_rate": 7.057070315745851e-07,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30026280879974365,
|
|
"step": 4055,
|
|
"valid_targets_mean": 2657.8,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 6.475279106858054,
|
|
"grad_norm": 0.6936189572411893,
|
|
"learning_rate": 6.849185357127686e-07,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3064683973789215,
|
|
"step": 4060,
|
|
"valid_targets_mean": 4207.0,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 6.483253588516747,
|
|
"grad_norm": 0.8460305078527767,
|
|
"learning_rate": 6.64435490839257e-07,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24955013394355774,
|
|
"step": 4065,
|
|
"valid_targets_mean": 2585.5,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 6.491228070175438,
|
|
"grad_norm": 0.76273537186481,
|
|
"learning_rate": 6.442582208753578e-07,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25004202127456665,
|
|
"step": 4070,
|
|
"valid_targets_mean": 3283.9,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 6.499202551834131,
|
|
"grad_norm": 0.7238461145978691,
|
|
"learning_rate": 6.243870449068068e-07,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29826968908309937,
|
|
"step": 4075,
|
|
"valid_targets_mean": 3690.1,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 6.507177033492823,
|
|
"grad_norm": 0.9109208163608681,
|
|
"learning_rate": 6.048222771787382e-07,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26449793577194214,
|
|
"step": 4080,
|
|
"valid_targets_mean": 2396.9,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 6.515151515151516,
|
|
"grad_norm": 0.7947185635572114,
|
|
"learning_rate": 5.85564227090707e-07,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26804065704345703,
|
|
"step": 4085,
|
|
"valid_targets_mean": 3060.6,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 6.523125996810207,
|
|
"grad_norm": 0.7622093987893789,
|
|
"learning_rate": 5.666131991917989e-07,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28946974873542786,
|
|
"step": 4090,
|
|
"valid_targets_mean": 3373.7,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 6.5311004784688995,
|
|
"grad_norm": 0.7550679332516894,
|
|
"learning_rate": 5.479694931758194e-07,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2812947630882263,
|
|
"step": 4095,
|
|
"valid_targets_mean": 3302.4,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 6.539074960127592,
|
|
"grad_norm": 0.8544383711374757,
|
|
"learning_rate": 5.296334038765483e-07,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2988916039466858,
|
|
"step": 4100,
|
|
"valid_targets_mean": 2972.1,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 6.5470494417862835,
|
|
"grad_norm": 0.8178568207638477,
|
|
"learning_rate": 5.116052212630696e-07,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2973562180995941,
|
|
"step": 4105,
|
|
"valid_targets_mean": 3012.4,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 6.555023923444976,
|
|
"grad_norm": 0.8833994115882965,
|
|
"learning_rate": 4.938852304352026e-07,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2733122706413269,
|
|
"step": 4110,
|
|
"valid_targets_mean": 2883.2,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 6.562998405103668,
|
|
"grad_norm": 0.6603706578424878,
|
|
"learning_rate": 4.7647371161898547e-07,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3092110753059387,
|
|
"step": 4115,
|
|
"valid_targets_mean": 4563.2,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 6.570972886762361,
|
|
"grad_norm": 0.7378334587942197,
|
|
"learning_rate": 4.593709401622359e-07,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31666845083236694,
|
|
"step": 4120,
|
|
"valid_targets_mean": 3965.4,
|
|
"valid_targets_min": 1640
|
|
},
|
|
{
|
|
"epoch": 6.578947368421053,
|
|
"grad_norm": 0.8194664700995854,
|
|
"learning_rate": 4.425771865302153e-07,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24312043190002441,
|
|
"step": 4125,
|
|
"valid_targets_mean": 2733.4,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 6.586921850079745,
|
|
"grad_norm": 0.8300117776862517,
|
|
"learning_rate": 4.2609271630133174e-07,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2855415642261505,
|
|
"step": 4130,
|
|
"valid_targets_mean": 3026.8,
|
|
"valid_targets_min": 1448
|
|
},
|
|
{
|
|
"epoch": 6.594896331738437,
|
|
"grad_norm": 0.7563673630469645,
|
|
"learning_rate": 4.099177901629525e-07,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29849427938461304,
|
|
"step": 4135,
|
|
"valid_targets_mean": 4127.1,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 6.6028708133971294,
|
|
"grad_norm": 0.8609110082950004,
|
|
"learning_rate": 3.9405266390727836e-07,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31364691257476807,
|
|
"step": 4140,
|
|
"valid_targets_mean": 3414.8,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 6.610845295055821,
|
|
"grad_norm": 0.7498677479295537,
|
|
"learning_rate": 3.7849758842729344e-07,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26106780767440796,
|
|
"step": 4145,
|
|
"valid_targets_mean": 3164.6,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 6.618819776714513,
|
|
"grad_norm": 0.7999124140910767,
|
|
"learning_rate": 3.632528097128085e-07,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.267208456993103,
|
|
"step": 4150,
|
|
"valid_targets_mean": 3083.7,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 6.626794258373206,
|
|
"grad_norm": 0.8540186670468961,
|
|
"learning_rate": 3.48318568846564e-07,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2639211416244507,
|
|
"step": 4155,
|
|
"valid_targets_mean": 2732.8,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 6.634768740031898,
|
|
"grad_norm": 0.8479901024549839,
|
|
"learning_rate": 3.336951020004087e-07,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25870245695114136,
|
|
"step": 4160,
|
|
"valid_targets_mean": 2755.1,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 6.64274322169059,
|
|
"grad_norm": 0.955339184810887,
|
|
"learning_rate": 3.1938264043158694e-07,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2837870121002197,
|
|
"step": 4165,
|
|
"valid_targets_mean": 2464.6,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 6.650717703349282,
|
|
"grad_norm": 0.7536797103608006,
|
|
"learning_rate": 3.05381410479062e-07,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31108590960502625,
|
|
"step": 4170,
|
|
"valid_targets_mean": 3465.0,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 6.6586921850079746,
|
|
"grad_norm": 0.7328580352048765,
|
|
"learning_rate": 2.916916335599407e-07,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25500962138175964,
|
|
"step": 4175,
|
|
"valid_targets_mean": 3717.8,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.8329622604602505,
|
|
"learning_rate": 2.783135261659831e-07,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27851560711860657,
|
|
"step": 4180,
|
|
"valid_targets_mean": 2676.5,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 6.6746411483253585,
|
|
"grad_norm": 0.7800164718190925,
|
|
"learning_rate": 2.6524729986016293e-07,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3040604591369629,
|
|
"step": 4185,
|
|
"valid_targets_mean": 3490.9,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 6.682615629984051,
|
|
"grad_norm": 0.7665644146192355,
|
|
"learning_rate": 2.524931612733328e-07,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2704489231109619,
|
|
"step": 4190,
|
|
"valid_targets_mean": 3531.0,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 6.690590111642743,
|
|
"grad_norm": 0.7580580612416418,
|
|
"learning_rate": 2.400513121009529e-07,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2547246813774109,
|
|
"step": 4195,
|
|
"valid_targets_mean": 3438.0,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 6.698564593301436,
|
|
"grad_norm": 0.8007582754539122,
|
|
"learning_rate": 2.279219490998985e-07,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3226009011268616,
|
|
"step": 4200,
|
|
"valid_targets_mean": 3253.1,
|
|
"valid_targets_min": 1702
|
|
},
|
|
{
|
|
"epoch": 6.706539074960127,
|
|
"grad_norm": 0.7766830678250739,
|
|
"learning_rate": 2.161052640853578e-07,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29874488711357117,
|
|
"step": 4205,
|
|
"valid_targets_mean": 3905.3,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 6.71451355661882,
|
|
"grad_norm": 0.7264566338788268,
|
|
"learning_rate": 2.0460144392778768e-07,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28135061264038086,
|
|
"step": 4210,
|
|
"valid_targets_mean": 4193.4,
|
|
"valid_targets_min": 1899
|
|
},
|
|
{
|
|
"epoch": 6.722488038277512,
|
|
"grad_norm": 0.7700720739771919,
|
|
"learning_rate": 1.9341067054996277e-07,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28918081521987915,
|
|
"step": 4215,
|
|
"valid_targets_mean": 3738.1,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 6.7304625199362045,
|
|
"grad_norm": 0.8484380875806526,
|
|
"learning_rate": 1.8253312092409992e-07,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3091745376586914,
|
|
"step": 4220,
|
|
"valid_targets_mean": 3354.8,
|
|
"valid_targets_min": 1577
|
|
},
|
|
{
|
|
"epoch": 6.738437001594896,
|
|
"grad_norm": 0.6977068543745993,
|
|
"learning_rate": 1.7196896706906273e-07,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3119521737098694,
|
|
"step": 4225,
|
|
"valid_targets_mean": 4126.1,
|
|
"valid_targets_min": 1531
|
|
},
|
|
{
|
|
"epoch": 6.746411483253588,
|
|
"grad_norm": 0.7055113617368801,
|
|
"learning_rate": 1.6171837604762597e-07,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26250123977661133,
|
|
"step": 4230,
|
|
"valid_targets_mean": 3763.4,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 6.754385964912281,
|
|
"grad_norm": 0.7047378528091365,
|
|
"learning_rate": 1.5178150996385755e-07,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2571178674697876,
|
|
"step": 4235,
|
|
"valid_targets_mean": 4024.6,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 6.762360446570973,
|
|
"grad_norm": 0.7896137342599928,
|
|
"learning_rate": 1.421585259605318e-07,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30015867948532104,
|
|
"step": 4240,
|
|
"valid_targets_mean": 3500.8,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 6.770334928229665,
|
|
"grad_norm": 0.7938480604878547,
|
|
"learning_rate": 1.3284957621666039e-07,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24995727837085724,
|
|
"step": 4245,
|
|
"valid_targets_mean": 3312.6,
|
|
"valid_targets_min": 1925
|
|
},
|
|
{
|
|
"epoch": 6.778309409888357,
|
|
"grad_norm": 0.7393072388386471,
|
|
"learning_rate": 1.2385480794507853e-07,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2663888931274414,
|
|
"step": 4250,
|
|
"valid_targets_mean": 3451.4,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 6.78628389154705,
|
|
"grad_norm": 0.7485217962877782,
|
|
"learning_rate": 1.1517436339011589e-07,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2690439224243164,
|
|
"step": 4255,
|
|
"valid_targets_mean": 3895.0,
|
|
"valid_targets_min": 1546
|
|
},
|
|
{
|
|
"epoch": 6.794258373205742,
|
|
"grad_norm": 0.809301204178066,
|
|
"learning_rate": 1.0680837982535607e-07,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3254355192184448,
|
|
"step": 4260,
|
|
"valid_targets_mean": 3506.1,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 6.8022328548644335,
|
|
"grad_norm": 0.9485819118798953,
|
|
"learning_rate": 9.875698955145174e-08,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33554935455322266,
|
|
"step": 4265,
|
|
"valid_targets_mean": 2606.9,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 6.810207336523126,
|
|
"grad_norm": 0.7361374487056215,
|
|
"learning_rate": 9.102031989404403e-08,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24248762428760529,
|
|
"step": 4270,
|
|
"valid_targets_mean": 3634.4,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 6.818181818181818,
|
|
"grad_norm": 0.7540744340766455,
|
|
"learning_rate": 8.359849320174196e-08,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27458927035331726,
|
|
"step": 4275,
|
|
"valid_targets_mean": 3289.6,
|
|
"valid_targets_min": 1321
|
|
},
|
|
{
|
|
"epoch": 6.826156299840511,
|
|
"grad_norm": 0.868235784954899,
|
|
"learning_rate": 7.649162684419731e-08,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2793521285057068,
|
|
"step": 4280,
|
|
"valid_targets_mean": 2904.8,
|
|
"valid_targets_min": 1209
|
|
},
|
|
{
|
|
"epoch": 6.834130781499202,
|
|
"grad_norm": 0.7861614089297881,
|
|
"learning_rate": 6.969983321023499e-08,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3022116422653198,
|
|
"step": 4285,
|
|
"valid_targets_mean": 3410.6,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 6.842105263157895,
|
|
"grad_norm": 0.8100919452976064,
|
|
"learning_rate": 6.322321970608337e-08,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29975757002830505,
|
|
"step": 4290,
|
|
"valid_targets_mean": 3057.0,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 6.850079744816587,
|
|
"grad_norm": 0.7565777371934128,
|
|
"learning_rate": 5.7061888753677796e-08,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2968496084213257,
|
|
"step": 4295,
|
|
"valid_targets_mean": 3787.8,
|
|
"valid_targets_min": 1290
|
|
},
|
|
{
|
|
"epoch": 6.858054226475279,
|
|
"grad_norm": 0.9421160543666579,
|
|
"learning_rate": 5.121593778903755e-08,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2672404944896698,
|
|
"step": 4300,
|
|
"valid_targets_mean": 3039.3,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 6.866028708133971,
|
|
"grad_norm": 0.873088134601706,
|
|
"learning_rate": 4.5685459260722544e-08,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3209856152534485,
|
|
"step": 4305,
|
|
"valid_targets_mean": 3169.1,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 6.8740031897926634,
|
|
"grad_norm": 0.6448693360564675,
|
|
"learning_rate": 4.047054062837452e-08,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38163435459136963,
|
|
"step": 4310,
|
|
"valid_targets_mean": 5327.4,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 6.881977671451356,
|
|
"grad_norm": 0.8022857480226772,
|
|
"learning_rate": 3.55712643613404e-08,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3100656270980835,
|
|
"step": 4315,
|
|
"valid_targets_mean": 3153.1,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 6.889952153110048,
|
|
"grad_norm": 0.754398525226754,
|
|
"learning_rate": 3.0987707937351066e-08,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2644553780555725,
|
|
"step": 4320,
|
|
"valid_targets_mean": 3510.8,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 6.89792663476874,
|
|
"grad_norm": 0.8184214095383463,
|
|
"learning_rate": 2.6719943841311268e-08,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3004235327243805,
|
|
"step": 4325,
|
|
"valid_targets_mean": 3118.9,
|
|
"valid_targets_min": 1672
|
|
},
|
|
{
|
|
"epoch": 6.905901116427432,
|
|
"grad_norm": 0.7591860783810543,
|
|
"learning_rate": 2.2768039564151635e-08,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2757605314254761,
|
|
"step": 4330,
|
|
"valid_targets_mean": 3500.9,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 6.913875598086125,
|
|
"grad_norm": 0.7736059420444852,
|
|
"learning_rate": 1.913205760175174e-08,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31430119276046753,
|
|
"step": 4335,
|
|
"valid_targets_mean": 3761.9,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 6.921850079744816,
|
|
"grad_norm": 0.722091299372459,
|
|
"learning_rate": 1.5812055453963136e-08,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25637492537498474,
|
|
"step": 4340,
|
|
"valid_targets_mean": 3674.6,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 6.9298245614035086,
|
|
"grad_norm": 0.7644597501267615,
|
|
"learning_rate": 1.280808562369229e-08,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.269060343503952,
|
|
"step": 4345,
|
|
"valid_targets_mean": 3619.8,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 6.937799043062201,
|
|
"grad_norm": 0.7816896013646375,
|
|
"learning_rate": 1.01201956160768e-08,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3388156592845917,
|
|
"step": 4350,
|
|
"valid_targets_mean": 3373.9,
|
|
"valid_targets_min": 1474
|
|
},
|
|
{
|
|
"epoch": 6.945773524720893,
|
|
"grad_norm": 0.8588949061684745,
|
|
"learning_rate": 7.74842793772601e-09,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2762036919593811,
|
|
"step": 4355,
|
|
"valid_targets_mean": 2608.9,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 6.953748006379586,
|
|
"grad_norm": 0.688075645724166,
|
|
"learning_rate": 5.692820096054874e-09,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26201221346855164,
|
|
"step": 4360,
|
|
"valid_targets_mean": 3907.6,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 6.961722488038277,
|
|
"grad_norm": 0.8725285267430308,
|
|
"learning_rate": 3.9534045986888706e-09,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30014878511428833,
|
|
"step": 4365,
|
|
"valid_targets_mean": 2716.9,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 6.96969696969697,
|
|
"grad_norm": 0.7874273746802156,
|
|
"learning_rate": 2.530208952953306e-09,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24479547142982483,
|
|
"step": 4370,
|
|
"valid_targets_mean": 2847.1,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 6.977671451355662,
|
|
"grad_norm": 0.8538141551046308,
|
|
"learning_rate": 1.4232556654314445e-09,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27175116539001465,
|
|
"step": 4375,
|
|
"valid_targets_mean": 3139.9,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 6.985645933014354,
|
|
"grad_norm": 1.0388601799303985,
|
|
"learning_rate": 6.325622416136767e-10,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25292569398880005,
|
|
"step": 4380,
|
|
"valid_targets_mean": 2725.7,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 6.993620414673046,
|
|
"grad_norm": 0.6368291411870719,
|
|
"learning_rate": 1.581411856199644e-10,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2768556475639343,
|
|
"step": 4385,
|
|
"valid_targets_mean": 4455.6,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32130008935928345,
|
|
"step": 4389,
|
|
"total_flos": 1003723615371264.0,
|
|
"train_loss": 0.36283224716521145,
|
|
"train_runtime": 20822.9549,
|
|
"train_samples_per_second": 3.369,
|
|
"train_steps_per_second": 0.211,
|
|
"valid_targets_mean": 3016.4,
|
|
"valid_targets_min": 723
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4389,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1003723615371264.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|