9189 lines
255 KiB
JSON
9189 lines
255 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 7.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 4158,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.015822784810126583,
|
||
|
|
"grad_norm": 19.635326284997042,
|
||
|
|
"learning_rate": 7.207207207207208e-07,
|
||
|
|
"loss": 0.8894,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.8479753136634827,
|
||
|
|
"step": 5,
|
||
|
|
"valid_targets_mean": 6356.2,
|
||
|
|
"valid_targets_min": 4626
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03164556962025317,
|
||
|
|
"grad_norm": 16.13080891734784,
|
||
|
|
"learning_rate": 1.6216216216216219e-06,
|
||
|
|
"loss": 0.8811,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.8280214071273804,
|
||
|
|
"step": 10,
|
||
|
|
"valid_targets_mean": 5469.8,
|
||
|
|
"valid_targets_min": 1984
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04746835443037975,
|
||
|
|
"grad_norm": 11.991696300533674,
|
||
|
|
"learning_rate": 2.5225225225225225e-06,
|
||
|
|
"loss": 0.8509,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.8600161075592041,
|
||
|
|
"step": 15,
|
||
|
|
"valid_targets_mean": 4922.1,
|
||
|
|
"valid_targets_min": 963
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06329113924050633,
|
||
|
|
"grad_norm": 5.138935831764275,
|
||
|
|
"learning_rate": 3.423423423423424e-06,
|
||
|
|
"loss": 0.7628,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.730722188949585,
|
||
|
|
"step": 20,
|
||
|
|
"valid_targets_mean": 5286.3,
|
||
|
|
"valid_targets_min": 2311
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07911392405063292,
|
||
|
|
"grad_norm": 2.5378388792138282,
|
||
|
|
"learning_rate": 4.324324324324325e-06,
|
||
|
|
"loss": 0.7133,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.7108891010284424,
|
||
|
|
"step": 25,
|
||
|
|
"valid_targets_mean": 6008.8,
|
||
|
|
"valid_targets_min": 2554
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0949367088607595,
|
||
|
|
"grad_norm": 1.7389619679720372,
|
||
|
|
"learning_rate": 5.225225225225226e-06,
|
||
|
|
"loss": 0.6769,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.6590981483459473,
|
||
|
|
"step": 30,
|
||
|
|
"valid_targets_mean": 5323.6,
|
||
|
|
"valid_targets_min": 738
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11075949367088607,
|
||
|
|
"grad_norm": 1.3640080618334556,
|
||
|
|
"learning_rate": 6.126126126126126e-06,
|
||
|
|
"loss": 0.6836,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.618993878364563,
|
||
|
|
"step": 35,
|
||
|
|
"valid_targets_mean": 6272.8,
|
||
|
|
"valid_targets_min": 4170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12658227848101267,
|
||
|
|
"grad_norm": 1.1186375634687915,
|
||
|
|
"learning_rate": 7.027027027027028e-06,
|
||
|
|
"loss": 0.6246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.6017647981643677,
|
||
|
|
"step": 40,
|
||
|
|
"valid_targets_mean": 5764.1,
|
||
|
|
"valid_targets_min": 4916
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14240506329113925,
|
||
|
|
"grad_norm": 0.9386209237716225,
|
||
|
|
"learning_rate": 7.927927927927929e-06,
|
||
|
|
"loss": 0.6045,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.59061598777771,
|
||
|
|
"step": 45,
|
||
|
|
"valid_targets_mean": 5393.1,
|
||
|
|
"valid_targets_min": 2068
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15822784810126583,
|
||
|
|
"grad_norm": 0.8597710318195954,
|
||
|
|
"learning_rate": 8.82882882882883e-06,
|
||
|
|
"loss": 0.587,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.5794469714164734,
|
||
|
|
"step": 50,
|
||
|
|
"valid_targets_mean": 5057.8,
|
||
|
|
"valid_targets_min": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17405063291139242,
|
||
|
|
"grad_norm": 0.7380078972896661,
|
||
|
|
"learning_rate": 9.729729729729732e-06,
|
||
|
|
"loss": 0.5877,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.5776603817939758,
|
||
|
|
"step": 55,
|
||
|
|
"valid_targets_mean": 5990.2,
|
||
|
|
"valid_targets_min": 2501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.189873417721519,
|
||
|
|
"grad_norm": 0.7385128432786578,
|
||
|
|
"learning_rate": 1.0630630630630632e-05,
|
||
|
|
"loss": 0.5414,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.5155864357948303,
|
||
|
|
"step": 60,
|
||
|
|
"valid_targets_mean": 5768.9,
|
||
|
|
"valid_targets_min": 2221
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20569620253164558,
|
||
|
|
"grad_norm": 0.6209730262155323,
|
||
|
|
"learning_rate": 1.1531531531531532e-05,
|
||
|
|
"loss": 0.5315,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.5242434740066528,
|
||
|
|
"step": 65,
|
||
|
|
"valid_targets_mean": 5509.4,
|
||
|
|
"valid_targets_min": 936
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22151898734177214,
|
||
|
|
"grad_norm": 0.6528264506538175,
|
||
|
|
"learning_rate": 1.2432432432432433e-05,
|
||
|
|
"loss": 0.512,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4938278794288635,
|
||
|
|
"step": 70,
|
||
|
|
"valid_targets_mean": 5477.1,
|
||
|
|
"valid_targets_min": 706
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23734177215189872,
|
||
|
|
"grad_norm": 0.5670440656251462,
|
||
|
|
"learning_rate": 1.3333333333333333e-05,
|
||
|
|
"loss": 0.4835,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.490886926651001,
|
||
|
|
"step": 75,
|
||
|
|
"valid_targets_mean": 6629.6,
|
||
|
|
"valid_targets_min": 2375
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25316455696202533,
|
||
|
|
"grad_norm": 0.5622722280452882,
|
||
|
|
"learning_rate": 1.4234234234234234e-05,
|
||
|
|
"loss": 0.466,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.45689529180526733,
|
||
|
|
"step": 80,
|
||
|
|
"valid_targets_mean": 5843.8,
|
||
|
|
"valid_targets_min": 3654
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2689873417721519,
|
||
|
|
"grad_norm": 0.605023842930082,
|
||
|
|
"learning_rate": 1.5135135135135138e-05,
|
||
|
|
"loss": 0.4579,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4165900945663452,
|
||
|
|
"step": 85,
|
||
|
|
"valid_targets_mean": 5597.8,
|
||
|
|
"valid_targets_min": 1965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2848101265822785,
|
||
|
|
"grad_norm": 0.5554544705328142,
|
||
|
|
"learning_rate": 1.6036036036036036e-05,
|
||
|
|
"loss": 0.4421,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.46295350790023804,
|
||
|
|
"step": 90,
|
||
|
|
"valid_targets_mean": 5449.9,
|
||
|
|
"valid_targets_min": 572
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30063291139240506,
|
||
|
|
"grad_norm": 0.6041342893699451,
|
||
|
|
"learning_rate": 1.693693693693694e-05,
|
||
|
|
"loss": 0.4352,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.44789403676986694,
|
||
|
|
"step": 95,
|
||
|
|
"valid_targets_mean": 5553.9,
|
||
|
|
"valid_targets_min": 607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31645569620253167,
|
||
|
|
"grad_norm": 0.7659279709674273,
|
||
|
|
"learning_rate": 1.783783783783784e-05,
|
||
|
|
"loss": 0.4242,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4142349660396576,
|
||
|
|
"step": 100,
|
||
|
|
"valid_targets_mean": 5741.9,
|
||
|
|
"valid_targets_min": 1930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3322784810126582,
|
||
|
|
"grad_norm": 0.6209067476802824,
|
||
|
|
"learning_rate": 1.873873873873874e-05,
|
||
|
|
"loss": 0.4183,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4335191547870636,
|
||
|
|
"step": 105,
|
||
|
|
"valid_targets_mean": 6375.3,
|
||
|
|
"valid_targets_min": 4533
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34810126582278483,
|
||
|
|
"grad_norm": 0.581107613504689,
|
||
|
|
"learning_rate": 1.963963963963964e-05,
|
||
|
|
"loss": 0.4175,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.41051313281059265,
|
||
|
|
"step": 110,
|
||
|
|
"valid_targets_mean": 5599.6,
|
||
|
|
"valid_targets_min": 1732
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3639240506329114,
|
||
|
|
"grad_norm": 0.6038416216295759,
|
||
|
|
"learning_rate": 2.054054054054054e-05,
|
||
|
|
"loss": 0.3992,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4384707808494568,
|
||
|
|
"step": 115,
|
||
|
|
"valid_targets_mean": 5761.0,
|
||
|
|
"valid_targets_min": 809
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.379746835443038,
|
||
|
|
"grad_norm": 0.5907567666475526,
|
||
|
|
"learning_rate": 2.1441441441441442e-05,
|
||
|
|
"loss": 0.3927,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.40794870257377625,
|
||
|
|
"step": 120,
|
||
|
|
"valid_targets_mean": 5991.1,
|
||
|
|
"valid_targets_min": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39556962025316456,
|
||
|
|
"grad_norm": 0.5764381955083145,
|
||
|
|
"learning_rate": 2.234234234234234e-05,
|
||
|
|
"loss": 0.4041,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4018586277961731,
|
||
|
|
"step": 125,
|
||
|
|
"valid_targets_mean": 5456.4,
|
||
|
|
"valid_targets_min": 1555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41139240506329117,
|
||
|
|
"grad_norm": 0.5755518106976973,
|
||
|
|
"learning_rate": 2.3243243243243243e-05,
|
||
|
|
"loss": 0.3897,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3841174840927124,
|
||
|
|
"step": 130,
|
||
|
|
"valid_targets_mean": 5648.2,
|
||
|
|
"valid_targets_min": 3331
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4272151898734177,
|
||
|
|
"grad_norm": 0.7549254910637737,
|
||
|
|
"learning_rate": 2.414414414414415e-05,
|
||
|
|
"loss": 0.3976,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.36377841234207153,
|
||
|
|
"step": 135,
|
||
|
|
"valid_targets_mean": 4960.0,
|
||
|
|
"valid_targets_min": 685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4430379746835443,
|
||
|
|
"grad_norm": 0.646228878541494,
|
||
|
|
"learning_rate": 2.5045045045045047e-05,
|
||
|
|
"loss": 0.39,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3614453077316284,
|
||
|
|
"step": 140,
|
||
|
|
"valid_targets_mean": 6316.5,
|
||
|
|
"valid_targets_min": 3461
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4588607594936709,
|
||
|
|
"grad_norm": 0.6223647658352607,
|
||
|
|
"learning_rate": 2.594594594594595e-05,
|
||
|
|
"loss": 0.3932,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3782048523426056,
|
||
|
|
"step": 145,
|
||
|
|
"valid_targets_mean": 5925.8,
|
||
|
|
"valid_targets_min": 962
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47468354430379744,
|
||
|
|
"grad_norm": 0.5319538222768427,
|
||
|
|
"learning_rate": 2.6846846846846852e-05,
|
||
|
|
"loss": 0.3691,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.35863935947418213,
|
||
|
|
"step": 150,
|
||
|
|
"valid_targets_mean": 6230.6,
|
||
|
|
"valid_targets_min": 4410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49050632911392406,
|
||
|
|
"grad_norm": 0.5959100850015796,
|
||
|
|
"learning_rate": 2.774774774774775e-05,
|
||
|
|
"loss": 0.3906,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4075894355773926,
|
||
|
|
"step": 155,
|
||
|
|
"valid_targets_mean": 5685.1,
|
||
|
|
"valid_targets_min": 2217
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5063291139240507,
|
||
|
|
"grad_norm": 0.6104000907758355,
|
||
|
|
"learning_rate": 2.8648648648648653e-05,
|
||
|
|
"loss": 0.3632,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3834109902381897,
|
||
|
|
"step": 160,
|
||
|
|
"valid_targets_mean": 5372.4,
|
||
|
|
"valid_targets_min": 2488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5221518987341772,
|
||
|
|
"grad_norm": 0.5943597918399458,
|
||
|
|
"learning_rate": 2.954954954954955e-05,
|
||
|
|
"loss": 0.3611,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.34721097350120544,
|
||
|
|
"step": 165,
|
||
|
|
"valid_targets_mean": 6153.5,
|
||
|
|
"valid_targets_min": 3625
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5379746835443038,
|
||
|
|
"grad_norm": 0.8779668397824408,
|
||
|
|
"learning_rate": 3.0450450450450454e-05,
|
||
|
|
"loss": 0.3584,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3657713830471039,
|
||
|
|
"step": 170,
|
||
|
|
"valid_targets_mean": 6067.9,
|
||
|
|
"valid_targets_min": 2587
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5537974683544303,
|
||
|
|
"grad_norm": 0.6223661471412824,
|
||
|
|
"learning_rate": 3.135135135135135e-05,
|
||
|
|
"loss": 0.3584,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3509843051433563,
|
||
|
|
"step": 175,
|
||
|
|
"valid_targets_mean": 5559.8,
|
||
|
|
"valid_targets_min": 2152
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.569620253164557,
|
||
|
|
"grad_norm": 0.6222537770299622,
|
||
|
|
"learning_rate": 3.225225225225225e-05,
|
||
|
|
"loss": 0.3558,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3183259963989258,
|
||
|
|
"step": 180,
|
||
|
|
"valid_targets_mean": 5944.1,
|
||
|
|
"valid_targets_min": 806
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5854430379746836,
|
||
|
|
"grad_norm": 0.6005449566296928,
|
||
|
|
"learning_rate": 3.3153153153153157e-05,
|
||
|
|
"loss": 0.3585,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.35840126872062683,
|
||
|
|
"step": 185,
|
||
|
|
"valid_targets_mean": 5495.9,
|
||
|
|
"valid_targets_min": 3212
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6012658227848101,
|
||
|
|
"grad_norm": 0.6023686554503512,
|
||
|
|
"learning_rate": 3.4054054054054055e-05,
|
||
|
|
"loss": 0.361,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3845367431640625,
|
||
|
|
"step": 190,
|
||
|
|
"valid_targets_mean": 6150.2,
|
||
|
|
"valid_targets_min": 4223
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6170886075949367,
|
||
|
|
"grad_norm": 0.5614758195645654,
|
||
|
|
"learning_rate": 3.4954954954954954e-05,
|
||
|
|
"loss": 0.3677,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.35003113746643066,
|
||
|
|
"step": 195,
|
||
|
|
"valid_targets_mean": 6085.7,
|
||
|
|
"valid_targets_min": 648
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6329113924050633,
|
||
|
|
"grad_norm": 0.5669477947277907,
|
||
|
|
"learning_rate": 3.585585585585586e-05,
|
||
|
|
"loss": 0.3482,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3124888241291046,
|
||
|
|
"step": 200,
|
||
|
|
"valid_targets_mean": 5489.0,
|
||
|
|
"valid_targets_min": 3469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6487341772151899,
|
||
|
|
"grad_norm": 0.5792807141463496,
|
||
|
|
"learning_rate": 3.6756756756756765e-05,
|
||
|
|
"loss": 0.3437,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3375263214111328,
|
||
|
|
"step": 205,
|
||
|
|
"valid_targets_mean": 6183.4,
|
||
|
|
"valid_targets_min": 1949
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6645569620253164,
|
||
|
|
"grad_norm": 0.5605895836940232,
|
||
|
|
"learning_rate": 3.7657657657657664e-05,
|
||
|
|
"loss": 0.3365,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3439105153083801,
|
||
|
|
"step": 210,
|
||
|
|
"valid_targets_mean": 6067.1,
|
||
|
|
"valid_targets_min": 3691
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.680379746835443,
|
||
|
|
"grad_norm": 0.5409677012753102,
|
||
|
|
"learning_rate": 3.855855855855856e-05,
|
||
|
|
"loss": 0.3559,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32987433671951294,
|
||
|
|
"step": 215,
|
||
|
|
"valid_targets_mean": 5943.6,
|
||
|
|
"valid_targets_min": 2565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6962025316455697,
|
||
|
|
"grad_norm": 0.5651963802609234,
|
||
|
|
"learning_rate": 3.945945945945946e-05,
|
||
|
|
"loss": 0.3547,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3495904207229614,
|
||
|
|
"step": 220,
|
||
|
|
"valid_targets_mean": 5503.1,
|
||
|
|
"valid_targets_min": 769
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7120253164556962,
|
||
|
|
"grad_norm": 0.6159705821271058,
|
||
|
|
"learning_rate": 3.999990030962651e-05,
|
||
|
|
"loss": 0.3513,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3498522639274597,
|
||
|
|
"step": 225,
|
||
|
|
"valid_targets_mean": 5073.9,
|
||
|
|
"valid_targets_min": 916
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7278481012658228,
|
||
|
|
"grad_norm": 0.5769256804710682,
|
||
|
|
"learning_rate": 3.9998778804338035e-05,
|
||
|
|
"loss": 0.3352,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3386867046356201,
|
||
|
|
"step": 230,
|
||
|
|
"valid_targets_mean": 5822.4,
|
||
|
|
"valid_targets_min": 823
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7436708860759493,
|
||
|
|
"grad_norm": 0.5997266023641833,
|
||
|
|
"learning_rate": 3.9996411250903884e-05,
|
||
|
|
"loss": 0.338,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32479092478752136,
|
||
|
|
"step": 235,
|
||
|
|
"valid_targets_mean": 5829.8,
|
||
|
|
"valid_targets_min": 2857
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.759493670886076,
|
||
|
|
"grad_norm": 0.5809144885862758,
|
||
|
|
"learning_rate": 3.9992797796837354e-05,
|
||
|
|
"loss": 0.3587,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3352227807044983,
|
||
|
|
"step": 240,
|
||
|
|
"valid_targets_mean": 5659.2,
|
||
|
|
"valid_targets_min": 1761
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7753164556962026,
|
||
|
|
"grad_norm": 0.5987836701077235,
|
||
|
|
"learning_rate": 3.9987938667279065e-05,
|
||
|
|
"loss": 0.3381,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33761051297187805,
|
||
|
|
"step": 245,
|
||
|
|
"valid_targets_mean": 6031.9,
|
||
|
|
"valid_targets_min": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7911392405063291,
|
||
|
|
"grad_norm": 0.5120036432889168,
|
||
|
|
"learning_rate": 3.998183416498299e-05,
|
||
|
|
"loss": 0.3385,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32812052965164185,
|
||
|
|
"step": 250,
|
||
|
|
"valid_targets_mean": 6019.6,
|
||
|
|
"valid_targets_min": 3347
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8069620253164557,
|
||
|
|
"grad_norm": 0.5533906259307175,
|
||
|
|
"learning_rate": 3.9974484670297515e-05,
|
||
|
|
"loss": 0.3453,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33938735723495483,
|
||
|
|
"step": 255,
|
||
|
|
"valid_targets_mean": 6177.3,
|
||
|
|
"valid_targets_min": 2736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8227848101265823,
|
||
|
|
"grad_norm": 0.5463218213680792,
|
||
|
|
"learning_rate": 3.996589064114183e-05,
|
||
|
|
"loss": 0.3388,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33829447627067566,
|
||
|
|
"step": 260,
|
||
|
|
"valid_targets_mean": 5798.7,
|
||
|
|
"valid_targets_min": 2076
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8386075949367089,
|
||
|
|
"grad_norm": 0.5858802651211497,
|
||
|
|
"learning_rate": 3.995605261297733e-05,
|
||
|
|
"loss": 0.3421,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32265397906303406,
|
||
|
|
"step": 265,
|
||
|
|
"valid_targets_mean": 5114.2,
|
||
|
|
"valid_targets_min": 669
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8544303797468354,
|
||
|
|
"grad_norm": 0.5957957307763452,
|
||
|
|
"learning_rate": 3.994497119877429e-05,
|
||
|
|
"loss": 0.3462,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.36409419775009155,
|
||
|
|
"step": 270,
|
||
|
|
"valid_targets_mean": 5744.9,
|
||
|
|
"valid_targets_min": 2013
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.870253164556962,
|
||
|
|
"grad_norm": 0.5392854104650391,
|
||
|
|
"learning_rate": 3.9932647088973635e-05,
|
||
|
|
"loss": 0.3229,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3287871181964874,
|
||
|
|
"step": 275,
|
||
|
|
"valid_targets_mean": 5633.2,
|
||
|
|
"valid_targets_min": 1960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8860759493670886,
|
||
|
|
"grad_norm": 0.544848398408457,
|
||
|
|
"learning_rate": 3.9919081051443974e-05,
|
||
|
|
"loss": 0.3361,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3490983247756958,
|
||
|
|
"step": 280,
|
||
|
|
"valid_targets_mean": 6091.4,
|
||
|
|
"valid_targets_min": 2448
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9018987341772152,
|
||
|
|
"grad_norm": 0.5207114070396386,
|
||
|
|
"learning_rate": 3.990427393143369e-05,
|
||
|
|
"loss": 0.337,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3348560333251953,
|
||
|
|
"step": 285,
|
||
|
|
"valid_targets_mean": 5576.4,
|
||
|
|
"valid_targets_min": 886
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9177215189873418,
|
||
|
|
"grad_norm": 0.5913839083299426,
|
||
|
|
"learning_rate": 3.988822665151836e-05,
|
||
|
|
"loss": 0.3334,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.35333555936813354,
|
||
|
|
"step": 290,
|
||
|
|
"valid_targets_mean": 5589.2,
|
||
|
|
"valid_targets_min": 766
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9335443037974683,
|
||
|
|
"grad_norm": 0.6143720688184718,
|
||
|
|
"learning_rate": 3.987094021154319e-05,
|
||
|
|
"loss": 0.3388,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3340070843696594,
|
||
|
|
"step": 295,
|
||
|
|
"valid_targets_mean": 5667.8,
|
||
|
|
"valid_targets_min": 2662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9493670886075949,
|
||
|
|
"grad_norm": 0.5592248016927946,
|
||
|
|
"learning_rate": 3.985241568856077e-05,
|
||
|
|
"loss": 0.3329,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30103063583374023,
|
||
|
|
"step": 300,
|
||
|
|
"valid_targets_mean": 5012.6,
|
||
|
|
"valid_targets_min": 2048
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9651898734177216,
|
||
|
|
"grad_norm": 0.5453255678206526,
|
||
|
|
"learning_rate": 3.983265423676396e-05,
|
||
|
|
"loss": 0.3393,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29531943798065186,
|
||
|
|
"step": 305,
|
||
|
|
"valid_targets_mean": 5888.4,
|
||
|
|
"valid_targets_min": 1837
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9810126582278481,
|
||
|
|
"grad_norm": 0.5365359931030111,
|
||
|
|
"learning_rate": 3.9811657087413955e-05,
|
||
|
|
"loss": 0.3291,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32722777128219604,
|
||
|
|
"step": 310,
|
||
|
|
"valid_targets_mean": 5922.3,
|
||
|
|
"valid_targets_min": 1486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9968354430379747,
|
||
|
|
"grad_norm": 0.5234404324989647,
|
||
|
|
"learning_rate": 3.9789425548763574e-05,
|
||
|
|
"loss": 0.3202,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30988502502441406,
|
||
|
|
"step": 315,
|
||
|
|
"valid_targets_mean": 6443.0,
|
||
|
|
"valid_targets_min": 3893
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0126582278481013,
|
||
|
|
"grad_norm": 0.5183687500516394,
|
||
|
|
"learning_rate": 3.976596100597579e-05,
|
||
|
|
"loss": 0.3204,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3249934911727905,
|
||
|
|
"step": 320,
|
||
|
|
"valid_targets_mean": 5519.9,
|
||
|
|
"valid_targets_min": 1847
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0284810126582278,
|
||
|
|
"grad_norm": 0.5331896984808763,
|
||
|
|
"learning_rate": 3.974126492103736e-05,
|
||
|
|
"loss": 0.3159,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3015314042568207,
|
||
|
|
"step": 325,
|
||
|
|
"valid_targets_mean": 6262.5,
|
||
|
|
"valid_targets_min": 2170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0443037974683544,
|
||
|
|
"grad_norm": 0.5855356182197418,
|
||
|
|
"learning_rate": 3.971533883266778e-05,
|
||
|
|
"loss": 0.3294,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3428266644477844,
|
||
|
|
"step": 330,
|
||
|
|
"valid_targets_mean": 4937.9,
|
||
|
|
"valid_targets_min": 1619
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0601265822784811,
|
||
|
|
"grad_norm": 0.5338122150976636,
|
||
|
|
"learning_rate": 3.9688184356223406e-05,
|
||
|
|
"loss": 0.3224,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31056129932403564,
|
||
|
|
"step": 335,
|
||
|
|
"valid_targets_mean": 5959.5,
|
||
|
|
"valid_targets_min": 3731
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0759493670886076,
|
||
|
|
"grad_norm": 0.5365587414802077,
|
||
|
|
"learning_rate": 3.9659803183596794e-05,
|
||
|
|
"loss": 0.3242,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33881163597106934,
|
||
|
|
"step": 340,
|
||
|
|
"valid_targets_mean": 6117.9,
|
||
|
|
"valid_targets_min": 4169
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0917721518987342,
|
||
|
|
"grad_norm": 0.5883328441611787,
|
||
|
|
"learning_rate": 3.963019708311129e-05,
|
||
|
|
"loss": 0.3151,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2953089475631714,
|
||
|
|
"step": 345,
|
||
|
|
"valid_targets_mean": 5483.4,
|
||
|
|
"valid_targets_min": 3383
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1075949367088607,
|
||
|
|
"grad_norm": 0.5639266443192867,
|
||
|
|
"learning_rate": 3.9599367899410865e-05,
|
||
|
|
"loss": 0.3246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3456162214279175,
|
||
|
|
"step": 350,
|
||
|
|
"valid_targets_mean": 5594.0,
|
||
|
|
"valid_targets_min": 2189
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1234177215189873,
|
||
|
|
"grad_norm": 0.4882291609214383,
|
||
|
|
"learning_rate": 3.956731755334516e-05,
|
||
|
|
"loss": 0.3251,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30995047092437744,
|
||
|
|
"step": 355,
|
||
|
|
"valid_targets_mean": 6331.2,
|
||
|
|
"valid_targets_min": 2270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.139240506329114,
|
||
|
|
"grad_norm": 0.5272761657710062,
|
||
|
|
"learning_rate": 3.953404804184982e-05,
|
||
|
|
"loss": 0.313,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3174111247062683,
|
||
|
|
"step": 360,
|
||
|
|
"valid_targets_mean": 5742.4,
|
||
|
|
"valid_targets_min": 3466
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1550632911392404,
|
||
|
|
"grad_norm": 0.5368848055456228,
|
||
|
|
"learning_rate": 3.949956143782208e-05,
|
||
|
|
"loss": 0.3268,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32597553730010986,
|
||
|
|
"step": 365,
|
||
|
|
"valid_targets_mean": 6278.2,
|
||
|
|
"valid_targets_min": 809
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1708860759493671,
|
||
|
|
"grad_norm": 0.46231267116022384,
|
||
|
|
"learning_rate": 3.946385988999158e-05,
|
||
|
|
"loss": 0.3025,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2897875905036926,
|
||
|
|
"step": 370,
|
||
|
|
"valid_targets_mean": 6156.8,
|
||
|
|
"valid_targets_min": 2261
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1867088607594938,
|
||
|
|
"grad_norm": 0.5279582306080264,
|
||
|
|
"learning_rate": 3.942694562278652e-05,
|
||
|
|
"loss": 0.3228,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33394286036491394,
|
||
|
|
"step": 375,
|
||
|
|
"valid_targets_mean": 5678.4,
|
||
|
|
"valid_targets_min": 3621
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2025316455696202,
|
||
|
|
"grad_norm": 0.5047890297509585,
|
||
|
|
"learning_rate": 3.938882093619505e-05,
|
||
|
|
"loss": 0.3206,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32703208923339844,
|
||
|
|
"step": 380,
|
||
|
|
"valid_targets_mean": 5641.8,
|
||
|
|
"valid_targets_min": 677
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2183544303797469,
|
||
|
|
"grad_norm": 0.5044680379524039,
|
||
|
|
"learning_rate": 3.934948820562196e-05,
|
||
|
|
"loss": 0.3143,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3180423676967621,
|
||
|
|
"step": 385,
|
||
|
|
"valid_targets_mean": 6107.4,
|
||
|
|
"valid_targets_min": 4917
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2341772151898733,
|
||
|
|
"grad_norm": 0.5054464486878625,
|
||
|
|
"learning_rate": 3.9308949881740684e-05,
|
||
|
|
"loss": 0.3133,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3271169662475586,
|
||
|
|
"step": 390,
|
||
|
|
"valid_targets_mean": 5758.1,
|
||
|
|
"valid_targets_min": 693
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.25,
|
||
|
|
"grad_norm": 0.4722764193174416,
|
||
|
|
"learning_rate": 3.926720849034062e-05,
|
||
|
|
"loss": 0.308,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2783935070037842,
|
||
|
|
"step": 395,
|
||
|
|
"valid_targets_mean": 5941.9,
|
||
|
|
"valid_targets_min": 4132
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2658227848101267,
|
||
|
|
"grad_norm": 0.5869772158307476,
|
||
|
|
"learning_rate": 3.922426663216973e-05,
|
||
|
|
"loss": 0.3223,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31632736325263977,
|
||
|
|
"step": 400,
|
||
|
|
"valid_targets_mean": 5153.4,
|
||
|
|
"valid_targets_min": 806
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2816455696202531,
|
||
|
|
"grad_norm": 0.4936808719044175,
|
||
|
|
"learning_rate": 3.9180126982772524e-05,
|
||
|
|
"loss": 0.313,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32065725326538086,
|
||
|
|
"step": 405,
|
||
|
|
"valid_targets_mean": 5738.6,
|
||
|
|
"valid_targets_min": 3262
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2974683544303798,
|
||
|
|
"grad_norm": 0.49156656318553427,
|
||
|
|
"learning_rate": 3.9134792292323343e-05,
|
||
|
|
"loss": 0.3147,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2900536060333252,
|
||
|
|
"step": 410,
|
||
|
|
"valid_targets_mean": 6236.5,
|
||
|
|
"valid_targets_min": 2029
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3132911392405062,
|
||
|
|
"grad_norm": 0.48351243867680455,
|
||
|
|
"learning_rate": 3.9088265385454995e-05,
|
||
|
|
"loss": 0.3082,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2834135591983795,
|
||
|
|
"step": 415,
|
||
|
|
"valid_targets_mean": 5412.0,
|
||
|
|
"valid_targets_min": 753
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3291139240506329,
|
||
|
|
"grad_norm": 0.5149369616768734,
|
||
|
|
"learning_rate": 3.9040549161082804e-05,
|
||
|
|
"loss": 0.3137,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3001251220703125,
|
||
|
|
"step": 420,
|
||
|
|
"valid_targets_mean": 6026.9,
|
||
|
|
"valid_targets_min": 4356
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3449367088607596,
|
||
|
|
"grad_norm": 0.5056930922739988,
|
||
|
|
"learning_rate": 3.8991646592223936e-05,
|
||
|
|
"loss": 0.3226,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32380956411361694,
|
||
|
|
"step": 425,
|
||
|
|
"valid_targets_mean": 6292.7,
|
||
|
|
"valid_targets_min": 5079
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.360759493670886,
|
||
|
|
"grad_norm": 0.4975035489093388,
|
||
|
|
"learning_rate": 3.8941560725812196e-05,
|
||
|
|
"loss": 0.316,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3131970167160034,
|
||
|
|
"step": 430,
|
||
|
|
"valid_targets_mean": 6082.8,
|
||
|
|
"valid_targets_min": 4291
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3765822784810127,
|
||
|
|
"grad_norm": 0.5135806794137311,
|
||
|
|
"learning_rate": 3.889029468250819e-05,
|
||
|
|
"loss": 0.3164,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31978845596313477,
|
||
|
|
"step": 435,
|
||
|
|
"valid_targets_mean": 5666.0,
|
||
|
|
"valid_targets_min": 3586
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3924050632911391,
|
||
|
|
"grad_norm": 0.517191652020098,
|
||
|
|
"learning_rate": 3.883785165650486e-05,
|
||
|
|
"loss": 0.3227,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.34643927216529846,
|
||
|
|
"step": 440,
|
||
|
|
"valid_targets_mean": 6099.8,
|
||
|
|
"valid_targets_min": 4447
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4082278481012658,
|
||
|
|
"grad_norm": 0.46886883431653514,
|
||
|
|
"learning_rate": 3.878423491532848e-05,
|
||
|
|
"loss": 0.3221,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29647165536880493,
|
||
|
|
"step": 445,
|
||
|
|
"valid_targets_mean": 5554.6,
|
||
|
|
"valid_targets_min": 2158
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4240506329113924,
|
||
|
|
"grad_norm": 0.5009776650584222,
|
||
|
|
"learning_rate": 3.872944779963508e-05,
|
||
|
|
"loss": 0.3069,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2930678725242615,
|
||
|
|
"step": 450,
|
||
|
|
"valid_targets_mean": 5905.5,
|
||
|
|
"valid_targets_min": 4156
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.439873417721519,
|
||
|
|
"grad_norm": 0.5581179183711339,
|
||
|
|
"learning_rate": 3.8673493723002295e-05,
|
||
|
|
"loss": 0.3207,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3513834476470947,
|
||
|
|
"step": 455,
|
||
|
|
"valid_targets_mean": 4625.1,
|
||
|
|
"valid_targets_min": 628
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4556962025316456,
|
||
|
|
"grad_norm": 0.4973056639296272,
|
||
|
|
"learning_rate": 3.861637617171666e-05,
|
||
|
|
"loss": 0.305,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3165348172187805,
|
||
|
|
"step": 460,
|
||
|
|
"valid_targets_mean": 6393.6,
|
||
|
|
"valid_targets_min": 842
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4715189873417722,
|
||
|
|
"grad_norm": 0.5759427318216085,
|
||
|
|
"learning_rate": 3.855809870455642e-05,
|
||
|
|
"loss": 0.323,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3243919014930725,
|
||
|
|
"step": 465,
|
||
|
|
"valid_targets_mean": 5460.6,
|
||
|
|
"valid_targets_min": 771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4873417721518987,
|
||
|
|
"grad_norm": 0.4546482939790815,
|
||
|
|
"learning_rate": 3.8498664952569776e-05,
|
||
|
|
"loss": 0.3156,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31235501170158386,
|
||
|
|
"step": 470,
|
||
|
|
"valid_targets_mean": 6463.9,
|
||
|
|
"valid_targets_min": 3142
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5031645569620253,
|
||
|
|
"grad_norm": 0.6343641310574493,
|
||
|
|
"learning_rate": 3.843807861884866e-05,
|
||
|
|
"loss": 0.3175,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2982757091522217,
|
||
|
|
"step": 475,
|
||
|
|
"valid_targets_mean": 4136.8,
|
||
|
|
"valid_targets_min": 648
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.518987341772152,
|
||
|
|
"grad_norm": 0.5069046822030032,
|
||
|
|
"learning_rate": 3.8376343478298016e-05,
|
||
|
|
"loss": 0.3086,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29897549748420715,
|
||
|
|
"step": 480,
|
||
|
|
"valid_targets_mean": 6222.6,
|
||
|
|
"valid_targets_min": 5441
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5348101265822784,
|
||
|
|
"grad_norm": 0.5330238248658747,
|
||
|
|
"learning_rate": 3.831346337740057e-05,
|
||
|
|
"loss": 0.3173,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31486159563064575,
|
||
|
|
"step": 485,
|
||
|
|
"valid_targets_mean": 5535.6,
|
||
|
|
"valid_targets_min": 514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5506329113924051,
|
||
|
|
"grad_norm": 0.48002428336966424,
|
||
|
|
"learning_rate": 3.824944223397721e-05,
|
||
|
|
"loss": 0.3333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32473498582839966,
|
||
|
|
"step": 490,
|
||
|
|
"valid_targets_mean": 5520.2,
|
||
|
|
"valid_targets_min": 806
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5664556962025316,
|
||
|
|
"grad_norm": 0.46876258971251733,
|
||
|
|
"learning_rate": 3.818428403694283e-05,
|
||
|
|
"loss": 0.3074,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3067629933357239,
|
||
|
|
"step": 495,
|
||
|
|
"valid_targets_mean": 6308.1,
|
||
|
|
"valid_targets_min": 4391
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5822784810126582,
|
||
|
|
"grad_norm": 0.5196291770297198,
|
||
|
|
"learning_rate": 3.811799284605787e-05,
|
||
|
|
"loss": 0.3081,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3322131633758545,
|
||
|
|
"step": 500,
|
||
|
|
"valid_targets_mean": 5697.0,
|
||
|
|
"valid_targets_min": 799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5981012658227849,
|
||
|
|
"grad_norm": 0.523916980014032,
|
||
|
|
"learning_rate": 3.805057279167529e-05,
|
||
|
|
"loss": 0.31,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3147963881492615,
|
||
|
|
"step": 505,
|
||
|
|
"valid_targets_mean": 5464.8,
|
||
|
|
"valid_targets_min": 744
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6139240506329116,
|
||
|
|
"grad_norm": 0.5145689742215844,
|
||
|
|
"learning_rate": 3.798202807448328e-05,
|
||
|
|
"loss": 0.3121,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3126614987850189,
|
||
|
|
"step": 510,
|
||
|
|
"valid_targets_mean": 5357.8,
|
||
|
|
"valid_targets_min": 2133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.629746835443038,
|
||
|
|
"grad_norm": 0.522545617708332,
|
||
|
|
"learning_rate": 3.791236296524349e-05,
|
||
|
|
"loss": 0.3017,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31563282012939453,
|
||
|
|
"step": 515,
|
||
|
|
"valid_targets_mean": 6165.4,
|
||
|
|
"valid_targets_min": 4172
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6455696202531644,
|
||
|
|
"grad_norm": 0.4587360075244091,
|
||
|
|
"learning_rate": 3.7841581804524966e-05,
|
||
|
|
"loss": 0.3049,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3099702298641205,
|
||
|
|
"step": 520,
|
||
|
|
"valid_targets_mean": 6268.2,
|
||
|
|
"valid_targets_min": 5153
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6613924050632911,
|
||
|
|
"grad_norm": 0.48285835961523327,
|
||
|
|
"learning_rate": 3.776968900243369e-05,
|
||
|
|
"loss": 0.3118,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3159397840499878,
|
||
|
|
"step": 525,
|
||
|
|
"valid_targets_mean": 6025.0,
|
||
|
|
"valid_targets_min": 882
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6772151898734178,
|
||
|
|
"grad_norm": 0.5027822640943034,
|
||
|
|
"learning_rate": 3.7696689038337807e-05,
|
||
|
|
"loss": 0.3132,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3112294673919678,
|
||
|
|
"step": 530,
|
||
|
|
"valid_targets_mean": 6107.8,
|
||
|
|
"valid_targets_min": 3057
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6930379746835444,
|
||
|
|
"grad_norm": 0.49706927167370746,
|
||
|
|
"learning_rate": 3.762258646058854e-05,
|
||
|
|
"loss": 0.3073,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2948613166809082,
|
||
|
|
"step": 535,
|
||
|
|
"valid_targets_mean": 5631.9,
|
||
|
|
"valid_targets_min": 2175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7088607594936709,
|
||
|
|
"grad_norm": 0.5035384651568894,
|
||
|
|
"learning_rate": 3.754738588623679e-05,
|
||
|
|
"loss": 0.3039,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3071019649505615,
|
||
|
|
"step": 540,
|
||
|
|
"valid_targets_mean": 6088.6,
|
||
|
|
"valid_targets_min": 2811
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7246835443037973,
|
||
|
|
"grad_norm": 0.5147424772974462,
|
||
|
|
"learning_rate": 3.747109200074544e-05,
|
||
|
|
"loss": 0.2913,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2903684973716736,
|
||
|
|
"step": 545,
|
||
|
|
"valid_targets_mean": 5766.8,
|
||
|
|
"valid_targets_min": 1985
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.740506329113924,
|
||
|
|
"grad_norm": 0.5282616133475547,
|
||
|
|
"learning_rate": 3.739370955769748e-05,
|
||
|
|
"loss": 0.3054,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.305397093296051,
|
||
|
|
"step": 550,
|
||
|
|
"valid_targets_mean": 5689.4,
|
||
|
|
"valid_targets_min": 2557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7563291139240507,
|
||
|
|
"grad_norm": 0.4682192117249516,
|
||
|
|
"learning_rate": 3.73152433784998e-05,
|
||
|
|
"loss": 0.2931,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3013456165790558,
|
||
|
|
"step": 555,
|
||
|
|
"valid_targets_mean": 5368.7,
|
||
|
|
"valid_targets_min": 1986
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7721518987341773,
|
||
|
|
"grad_norm": 0.44057671857937225,
|
||
|
|
"learning_rate": 3.723569835208276e-05,
|
||
|
|
"loss": 0.2935,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2914057672023773,
|
||
|
|
"step": 560,
|
||
|
|
"valid_targets_mean": 6597.2,
|
||
|
|
"valid_targets_min": 5193
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7879746835443038,
|
||
|
|
"grad_norm": 0.4796159961996015,
|
||
|
|
"learning_rate": 3.715507943459561e-05,
|
||
|
|
"loss": 0.2996,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2853228449821472,
|
||
|
|
"step": 565,
|
||
|
|
"valid_targets_mean": 5998.0,
|
||
|
|
"valid_targets_min": 3590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8037974683544302,
|
||
|
|
"grad_norm": 0.4610958393107323,
|
||
|
|
"learning_rate": 3.70733916490977e-05,
|
||
|
|
"loss": 0.2774,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2611140012741089,
|
||
|
|
"step": 570,
|
||
|
|
"valid_targets_mean": 5264.6,
|
||
|
|
"valid_targets_min": 2780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8196202531645569,
|
||
|
|
"grad_norm": 0.49037791791585045,
|
||
|
|
"learning_rate": 3.699064008524548e-05,
|
||
|
|
"loss": 0.315,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32969820499420166,
|
||
|
|
"step": 575,
|
||
|
|
"valid_targets_mean": 5529.9,
|
||
|
|
"valid_targets_min": 2026
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8354430379746836,
|
||
|
|
"grad_norm": 0.4797430506584315,
|
||
|
|
"learning_rate": 3.690682989897539e-05,
|
||
|
|
"loss": 0.2992,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2864863872528076,
|
||
|
|
"step": 580,
|
||
|
|
"valid_targets_mean": 5474.2,
|
||
|
|
"valid_targets_min": 2469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8512658227848102,
|
||
|
|
"grad_norm": 0.49388198180056775,
|
||
|
|
"learning_rate": 3.682196631218267e-05,
|
||
|
|
"loss": 0.307,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3122454881668091,
|
||
|
|
"step": 585,
|
||
|
|
"valid_targets_mean": 5575.6,
|
||
|
|
"valid_targets_min": 2994
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8670886075949367,
|
||
|
|
"grad_norm": 0.45749314010557557,
|
||
|
|
"learning_rate": 3.67360546123959e-05,
|
||
|
|
"loss": 0.3121,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2997322082519531,
|
||
|
|
"step": 590,
|
||
|
|
"valid_targets_mean": 5467.4,
|
||
|
|
"valid_targets_min": 767
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8829113924050633,
|
||
|
|
"grad_norm": 0.45411543089255624,
|
||
|
|
"learning_rate": 3.6649100152447636e-05,
|
||
|
|
"loss": 0.3055,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30825352668762207,
|
||
|
|
"step": 595,
|
||
|
|
"valid_targets_mean": 6071.1,
|
||
|
|
"valid_targets_min": 3451
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8987341772151898,
|
||
|
|
"grad_norm": 0.44164946101678815,
|
||
|
|
"learning_rate": 3.6561108350140876e-05,
|
||
|
|
"loss": 0.3003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2762806713581085,
|
||
|
|
"step": 600,
|
||
|
|
"valid_targets_mean": 6110.0,
|
||
|
|
"valid_targets_min": 4094
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9145569620253164,
|
||
|
|
"grad_norm": 0.4843182843398918,
|
||
|
|
"learning_rate": 3.647208468791147e-05,
|
||
|
|
"loss": 0.3137,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3104550838470459,
|
||
|
|
"step": 605,
|
||
|
|
"valid_targets_mean": 5450.8,
|
||
|
|
"valid_targets_min": 2042
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9303797468354431,
|
||
|
|
"grad_norm": 0.5246679183938616,
|
||
|
|
"learning_rate": 3.638203471248656e-05,
|
||
|
|
"loss": 0.2969,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.311735063791275,
|
||
|
|
"step": 610,
|
||
|
|
"valid_targets_mean": 6342.0,
|
||
|
|
"valid_targets_min": 4203
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9462025316455698,
|
||
|
|
"grad_norm": 0.4739098308477039,
|
||
|
|
"learning_rate": 3.6290964034539e-05,
|
||
|
|
"loss": 0.2882,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2983429729938507,
|
||
|
|
"step": 615,
|
||
|
|
"valid_targets_mean": 6205.6,
|
||
|
|
"valid_targets_min": 805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9620253164556962,
|
||
|
|
"grad_norm": 0.43160749249022473,
|
||
|
|
"learning_rate": 3.61988783283377e-05,
|
||
|
|
"loss": 0.3155,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31190475821495056,
|
||
|
|
"step": 620,
|
||
|
|
"valid_targets_mean": 6585.2,
|
||
|
|
"valid_targets_min": 3950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9778481012658227,
|
||
|
|
"grad_norm": 0.48372069468120066,
|
||
|
|
"learning_rate": 3.610578333139418e-05,
|
||
|
|
"loss": 0.2993,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30041295289993286,
|
||
|
|
"step": 625,
|
||
|
|
"valid_targets_mean": 5826.0,
|
||
|
|
"valid_targets_min": 2238
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9936708860759493,
|
||
|
|
"grad_norm": 0.5074154638338687,
|
||
|
|
"learning_rate": 3.601168484410503e-05,
|
||
|
|
"loss": 0.2986,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29460474848747253,
|
||
|
|
"step": 630,
|
||
|
|
"valid_targets_mean": 5909.8,
|
||
|
|
"valid_targets_min": 3580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.009493670886076,
|
||
|
|
"grad_norm": 0.4582513785903484,
|
||
|
|
"learning_rate": 3.591658872939051e-05,
|
||
|
|
"loss": 0.2956,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2935415804386139,
|
||
|
|
"step": 635,
|
||
|
|
"valid_targets_mean": 6060.9,
|
||
|
|
"valid_targets_min": 685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0253164556962027,
|
||
|
|
"grad_norm": 0.4865535063786641,
|
||
|
|
"learning_rate": 3.582050091232927e-05,
|
||
|
|
"loss": 0.2933,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30103033781051636,
|
||
|
|
"step": 640,
|
||
|
|
"valid_targets_mean": 5298.8,
|
||
|
|
"valid_targets_min": 2244
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.041139240506329,
|
||
|
|
"grad_norm": 0.4899472148100494,
|
||
|
|
"learning_rate": 3.572342737978919e-05,
|
||
|
|
"loss": 0.2926,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2633832097053528,
|
||
|
|
"step": 645,
|
||
|
|
"valid_targets_mean": 5349.4,
|
||
|
|
"valid_targets_min": 1965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0569620253164556,
|
||
|
|
"grad_norm": 0.4592971443361485,
|
||
|
|
"learning_rate": 3.562537418005433e-05,
|
||
|
|
"loss": 0.291,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26928383111953735,
|
||
|
|
"step": 650,
|
||
|
|
"valid_targets_mean": 5509.2,
|
||
|
|
"valid_targets_min": 767
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0727848101265822,
|
||
|
|
"grad_norm": 0.45093775520530355,
|
||
|
|
"learning_rate": 3.5526347422448115e-05,
|
||
|
|
"loss": 0.2957,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30223190784454346,
|
||
|
|
"step": 655,
|
||
|
|
"valid_targets_mean": 5825.9,
|
||
|
|
"valid_targets_min": 3179
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.088607594936709,
|
||
|
|
"grad_norm": 0.46619156750938906,
|
||
|
|
"learning_rate": 3.5426353276952664e-05,
|
||
|
|
"loss": 0.297,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31004923582077026,
|
||
|
|
"step": 660,
|
||
|
|
"valid_targets_mean": 6050.8,
|
||
|
|
"valid_targets_min": 836
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1044303797468356,
|
||
|
|
"grad_norm": 0.5198681015561224,
|
||
|
|
"learning_rate": 3.532539797382438e-05,
|
||
|
|
"loss": 0.3003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30181536078453064,
|
||
|
|
"step": 665,
|
||
|
|
"valid_targets_mean": 5796.1,
|
||
|
|
"valid_targets_min": 2469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1202531645569622,
|
||
|
|
"grad_norm": 0.48495786428109977,
|
||
|
|
"learning_rate": 3.5223487803205745e-05,
|
||
|
|
"loss": 0.2896,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28098708391189575,
|
||
|
|
"step": 670,
|
||
|
|
"valid_targets_mean": 5556.3,
|
||
|
|
"valid_targets_min": 3519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1360759493670884,
|
||
|
|
"grad_norm": 0.4697953936662229,
|
||
|
|
"learning_rate": 3.512062911473342e-05,
|
||
|
|
"loss": 0.3005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2885569930076599,
|
||
|
|
"step": 675,
|
||
|
|
"valid_targets_mean": 5751.8,
|
||
|
|
"valid_targets_min": 3446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.151898734177215,
|
||
|
|
"grad_norm": 0.4778592999393128,
|
||
|
|
"learning_rate": 3.501682831714263e-05,
|
||
|
|
"loss": 0.2828,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2793217897415161,
|
||
|
|
"step": 680,
|
||
|
|
"valid_targets_mean": 6121.1,
|
||
|
|
"valid_targets_min": 5274
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1677215189873418,
|
||
|
|
"grad_norm": 0.49097382184697463,
|
||
|
|
"learning_rate": 3.491209187786784e-05,
|
||
|
|
"loss": 0.2908,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2594742774963379,
|
||
|
|
"step": 685,
|
||
|
|
"valid_targets_mean": 5144.1,
|
||
|
|
"valid_targets_min": 1753
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1835443037974684,
|
||
|
|
"grad_norm": 0.48053227810239074,
|
||
|
|
"learning_rate": 3.480642632263981e-05,
|
||
|
|
"loss": 0.3004,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3140348792076111,
|
||
|
|
"step": 690,
|
||
|
|
"valid_targets_mean": 5762.4,
|
||
|
|
"valid_targets_min": 1713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.199367088607595,
|
||
|
|
"grad_norm": 0.4554353384848606,
|
||
|
|
"learning_rate": 3.469983823507898e-05,
|
||
|
|
"loss": 0.3007,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29892221093177795,
|
||
|
|
"step": 695,
|
||
|
|
"valid_targets_mean": 5548.7,
|
||
|
|
"valid_targets_min": 642
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2151898734177213,
|
||
|
|
"grad_norm": 0.47227747671392983,
|
||
|
|
"learning_rate": 3.4592334256285306e-05,
|
||
|
|
"loss": 0.288,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2790769338607788,
|
||
|
|
"step": 700,
|
||
|
|
"valid_targets_mean": 5659.8,
|
||
|
|
"valid_targets_min": 2595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.231012658227848,
|
||
|
|
"grad_norm": 0.46014971292294476,
|
||
|
|
"learning_rate": 3.4483921084424446e-05,
|
||
|
|
"loss": 0.2845,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2855474352836609,
|
||
|
|
"step": 705,
|
||
|
|
"valid_targets_mean": 5672.0,
|
||
|
|
"valid_targets_min": 2942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2468354430379747,
|
||
|
|
"grad_norm": 0.45834270629586105,
|
||
|
|
"learning_rate": 3.437460547431044e-05,
|
||
|
|
"loss": 0.2943,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2855965793132782,
|
||
|
|
"step": 710,
|
||
|
|
"valid_targets_mean": 6076.5,
|
||
|
|
"valid_targets_min": 2378
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2626582278481013,
|
||
|
|
"grad_norm": 0.4760222160081626,
|
||
|
|
"learning_rate": 3.426439423698483e-05,
|
||
|
|
"loss": 0.2948,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30656492710113525,
|
||
|
|
"step": 715,
|
||
|
|
"valid_targets_mean": 6437.0,
|
||
|
|
"valid_targets_min": 4826
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.278481012658228,
|
||
|
|
"grad_norm": 0.46990296888744815,
|
||
|
|
"learning_rate": 3.41532942392923e-05,
|
||
|
|
"loss": 0.2817,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3031311333179474,
|
||
|
|
"step": 720,
|
||
|
|
"valid_targets_mean": 6133.1,
|
||
|
|
"valid_targets_min": 3111
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2943037974683547,
|
||
|
|
"grad_norm": 0.4571705839965309,
|
||
|
|
"learning_rate": 3.404131240345281e-05,
|
||
|
|
"loss": 0.2894,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29287195205688477,
|
||
|
|
"step": 725,
|
||
|
|
"valid_targets_mean": 6849.6,
|
||
|
|
"valid_targets_min": 5307
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.310126582278481,
|
||
|
|
"grad_norm": 0.45070130004909464,
|
||
|
|
"learning_rate": 3.3928455706630354e-05,
|
||
|
|
"loss": 0.3008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29879462718963623,
|
||
|
|
"step": 730,
|
||
|
|
"valid_targets_mean": 6255.3,
|
||
|
|
"valid_targets_min": 4712
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3259493670886076,
|
||
|
|
"grad_norm": 0.46137028628139504,
|
||
|
|
"learning_rate": 3.3814731180498166e-05,
|
||
|
|
"loss": 0.2942,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2994469702243805,
|
||
|
|
"step": 735,
|
||
|
|
"valid_targets_mean": 6294.0,
|
||
|
|
"valid_targets_min": 3962
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3417721518987342,
|
||
|
|
"grad_norm": 0.46762059401539763,
|
||
|
|
"learning_rate": 3.370014591080064e-05,
|
||
|
|
"loss": 0.2816,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26314711570739746,
|
||
|
|
"step": 740,
|
||
|
|
"valid_targets_mean": 5497.4,
|
||
|
|
"valid_targets_min": 2189
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.357594936708861,
|
||
|
|
"grad_norm": 0.4572706524806831,
|
||
|
|
"learning_rate": 3.358470703691184e-05,
|
||
|
|
"loss": 0.296,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28679436445236206,
|
||
|
|
"step": 745,
|
||
|
|
"valid_targets_mean": 5952.6,
|
||
|
|
"valid_targets_min": 3586
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3734177215189876,
|
||
|
|
"grad_norm": 0.4595477740780959,
|
||
|
|
"learning_rate": 3.34684217513907e-05,
|
||
|
|
"loss": 0.2921,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2892051041126251,
|
||
|
|
"step": 750,
|
||
|
|
"valid_targets_mean": 5905.7,
|
||
|
|
"valid_targets_min": 2447
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3892405063291138,
|
||
|
|
"grad_norm": 0.46680713190521667,
|
||
|
|
"learning_rate": 3.335129729953282e-05,
|
||
|
|
"loss": 0.2934,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31123167276382446,
|
||
|
|
"step": 755,
|
||
|
|
"valid_targets_mean": 6154.4,
|
||
|
|
"valid_targets_min": 4720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4050632911392404,
|
||
|
|
"grad_norm": 0.486405918820402,
|
||
|
|
"learning_rate": 3.323334097891908e-05,
|
||
|
|
"loss": 0.2874,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2746376097202301,
|
||
|
|
"step": 760,
|
||
|
|
"valid_targets_mean": 5427.4,
|
||
|
|
"valid_targets_min": 2031
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.420886075949367,
|
||
|
|
"grad_norm": 0.48586284114044387,
|
||
|
|
"learning_rate": 3.311456013896099e-05,
|
||
|
|
"loss": 0.2919,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3024941384792328,
|
||
|
|
"step": 765,
|
||
|
|
"valid_targets_mean": 5223.7,
|
||
|
|
"valid_targets_min": 778
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4367088607594938,
|
||
|
|
"grad_norm": 0.5025764682649462,
|
||
|
|
"learning_rate": 3.299496218044269e-05,
|
||
|
|
"loss": 0.2936,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.315251886844635,
|
||
|
|
"step": 770,
|
||
|
|
"valid_targets_mean": 4921.6,
|
||
|
|
"valid_targets_min": 596
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4525316455696204,
|
||
|
|
"grad_norm": 0.4549092472858824,
|
||
|
|
"learning_rate": 3.287455455505991e-05,
|
||
|
|
"loss": 0.2828,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.293832927942276,
|
||
|
|
"step": 775,
|
||
|
|
"valid_targets_mean": 6210.0,
|
||
|
|
"valid_targets_min": 775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4683544303797467,
|
||
|
|
"grad_norm": 0.5687625275372207,
|
||
|
|
"learning_rate": 3.275334476495564e-05,
|
||
|
|
"loss": 0.2832,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25202175974845886,
|
||
|
|
"step": 780,
|
||
|
|
"valid_targets_mean": 5337.6,
|
||
|
|
"valid_targets_min": 2448
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4841772151898733,
|
||
|
|
"grad_norm": 0.5152415549279067,
|
||
|
|
"learning_rate": 3.2631340362252725e-05,
|
||
|
|
"loss": 0.2845,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29954302310943604,
|
||
|
|
"step": 785,
|
||
|
|
"valid_targets_mean": 4789.7,
|
||
|
|
"valid_targets_min": 743
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5,
|
||
|
|
"grad_norm": 0.4500139404772333,
|
||
|
|
"learning_rate": 3.25085489485833e-05,
|
||
|
|
"loss": 0.2918,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3050997257232666,
|
||
|
|
"step": 790,
|
||
|
|
"valid_targets_mean": 6118.1,
|
||
|
|
"valid_targets_min": 5123
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5158227848101267,
|
||
|
|
"grad_norm": 0.4158348076851014,
|
||
|
|
"learning_rate": 3.238497817461519e-05,
|
||
|
|
"loss": 0.3008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27680695056915283,
|
||
|
|
"step": 795,
|
||
|
|
"valid_targets_mean": 6113.9,
|
||
|
|
"valid_targets_min": 3881
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5316455696202533,
|
||
|
|
"grad_norm": 0.4603920238857949,
|
||
|
|
"learning_rate": 3.226063573957518e-05,
|
||
|
|
"loss": 0.2984,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30450594425201416,
|
||
|
|
"step": 800,
|
||
|
|
"valid_targets_mean": 5632.4,
|
||
|
|
"valid_targets_min": 2030
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5474683544303796,
|
||
|
|
"grad_norm": 0.42817337943378414,
|
||
|
|
"learning_rate": 3.2135529390769364e-05,
|
||
|
|
"loss": 0.2987,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30379846692085266,
|
||
|
|
"step": 805,
|
||
|
|
"valid_targets_mean": 6379.4,
|
||
|
|
"valid_targets_min": 2224
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5632911392405062,
|
||
|
|
"grad_norm": 0.4610425941332099,
|
||
|
|
"learning_rate": 3.200966692310038e-05,
|
||
|
|
"loss": 0.2829,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2888951599597931,
|
||
|
|
"step": 810,
|
||
|
|
"valid_targets_mean": 6147.2,
|
||
|
|
"valid_targets_min": 2198
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.579113924050633,
|
||
|
|
"grad_norm": 0.4942290487075236,
|
||
|
|
"learning_rate": 3.1883056178581806e-05,
|
||
|
|
"loss": 0.282,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28200429677963257,
|
||
|
|
"step": 815,
|
||
|
|
"valid_targets_mean": 5177.9,
|
||
|
|
"valid_targets_min": 685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5949367088607596,
|
||
|
|
"grad_norm": 0.4388572833069293,
|
||
|
|
"learning_rate": 3.1755705045849465e-05,
|
||
|
|
"loss": 0.2951,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3148422837257385,
|
||
|
|
"step": 820,
|
||
|
|
"valid_targets_mean": 6577.1,
|
||
|
|
"valid_targets_min": 3687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6107594936708862,
|
||
|
|
"grad_norm": 0.4512748381822771,
|
||
|
|
"learning_rate": 3.162762145967001e-05,
|
||
|
|
"loss": 0.2858,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28762495517730713,
|
||
|
|
"step": 825,
|
||
|
|
"valid_targets_mean": 5885.2,
|
||
|
|
"valid_targets_min": 2707
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6265822784810124,
|
||
|
|
"grad_norm": 0.4612075125257181,
|
||
|
|
"learning_rate": 3.149881340044646e-05,
|
||
|
|
"loss": 0.2991,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2912214398384094,
|
||
|
|
"step": 830,
|
||
|
|
"valid_targets_mean": 5434.2,
|
||
|
|
"valid_targets_min": 766
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.642405063291139,
|
||
|
|
"grad_norm": 0.4802794790594333,
|
||
|
|
"learning_rate": 3.1369288893721036e-05,
|
||
|
|
"loss": 0.2867,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3072575628757477,
|
||
|
|
"step": 835,
|
||
|
|
"valid_targets_mean": 5911.4,
|
||
|
|
"valid_targets_min": 1609
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6582278481012658,
|
||
|
|
"grad_norm": 0.44392324432819513,
|
||
|
|
"learning_rate": 3.123905600967506e-05,
|
||
|
|
"loss": 0.2835,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2849636971950531,
|
||
|
|
"step": 840,
|
||
|
|
"valid_targets_mean": 5639.6,
|
||
|
|
"valid_targets_min": 2609
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6740506329113924,
|
||
|
|
"grad_norm": 0.4632523986474561,
|
||
|
|
"learning_rate": 3.110812286262618e-05,
|
||
|
|
"loss": 0.2963,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2830730974674225,
|
||
|
|
"step": 845,
|
||
|
|
"valid_targets_mean": 5890.8,
|
||
|
|
"valid_targets_min": 3121
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.689873417721519,
|
||
|
|
"grad_norm": 0.4501317007069297,
|
||
|
|
"learning_rate": 3.097649761052278e-05,
|
||
|
|
"loss": 0.2933,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3051697313785553,
|
||
|
|
"step": 850,
|
||
|
|
"valid_targets_mean": 5598.3,
|
||
|
|
"valid_targets_min": 2592
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7056962025316453,
|
||
|
|
"grad_norm": 0.46325393649233176,
|
||
|
|
"learning_rate": 3.084418845443566e-05,
|
||
|
|
"loss": 0.2935,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28344061970710754,
|
||
|
|
"step": 855,
|
||
|
|
"valid_targets_mean": 5272.2,
|
||
|
|
"valid_targets_min": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.721518987341772,
|
||
|
|
"grad_norm": 0.43355255805031406,
|
||
|
|
"learning_rate": 3.0711203638047124e-05,
|
||
|
|
"loss": 0.3005,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.280727356672287,
|
||
|
|
"step": 860,
|
||
|
|
"valid_targets_mean": 6016.7,
|
||
|
|
"valid_targets_min": 3875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7373417721518987,
|
||
|
|
"grad_norm": 0.5243573469765044,
|
||
|
|
"learning_rate": 3.05775514471373e-05,
|
||
|
|
"loss": 0.2819,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2778775095939636,
|
||
|
|
"step": 865,
|
||
|
|
"valid_targets_mean": 5218.6,
|
||
|
|
"valid_targets_min": 805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7531645569620253,
|
||
|
|
"grad_norm": 0.45747491693031095,
|
||
|
|
"learning_rate": 3.0443240209067863e-05,
|
||
|
|
"loss": 0.2944,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25808537006378174,
|
||
|
|
"step": 870,
|
||
|
|
"valid_targets_mean": 5862.0,
|
||
|
|
"valid_targets_min": 2501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.768987341772152,
|
||
|
|
"grad_norm": 0.49891165828308487,
|
||
|
|
"learning_rate": 3.0308278292263266e-05,
|
||
|
|
"loss": 0.2771,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29467886686325073,
|
||
|
|
"step": 875,
|
||
|
|
"valid_targets_mean": 5912.4,
|
||
|
|
"valid_targets_min": 3287
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7848101265822782,
|
||
|
|
"grad_norm": 0.48235098411869953,
|
||
|
|
"learning_rate": 3.0172674105689256e-05,
|
||
|
|
"loss": 0.2862,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31870269775390625,
|
||
|
|
"step": 880,
|
||
|
|
"valid_targets_mean": 5790.2,
|
||
|
|
"valid_targets_min": 642
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8006329113924053,
|
||
|
|
"grad_norm": 0.41869426026137824,
|
||
|
|
"learning_rate": 3.003643609832899e-05,
|
||
|
|
"loss": 0.2836,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27100610733032227,
|
||
|
|
"step": 885,
|
||
|
|
"valid_targets_mean": 5882.7,
|
||
|
|
"valid_targets_min": 2918
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8164556962025316,
|
||
|
|
"grad_norm": 0.49003311128203797,
|
||
|
|
"learning_rate": 2.98995727586566e-05,
|
||
|
|
"loss": 0.2965,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28583741188049316,
|
||
|
|
"step": 890,
|
||
|
|
"valid_targets_mean": 5098.6,
|
||
|
|
"valid_targets_min": 1482
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8322784810126582,
|
||
|
|
"grad_norm": 0.4911196460814385,
|
||
|
|
"learning_rate": 2.9762092614108288e-05,
|
||
|
|
"loss": 0.293,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2707781195640564,
|
||
|
|
"step": 895,
|
||
|
|
"valid_targets_mean": 5186.9,
|
||
|
|
"valid_targets_min": 2133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.848101265822785,
|
||
|
|
"grad_norm": 0.4509284368354034,
|
||
|
|
"learning_rate": 2.9624004230551056e-05,
|
||
|
|
"loss": 0.2733,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2792275547981262,
|
||
|
|
"step": 900,
|
||
|
|
"valid_targets_mean": 6446.4,
|
||
|
|
"valid_targets_min": 4394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8639240506329116,
|
||
|
|
"grad_norm": 0.4697430774538071,
|
||
|
|
"learning_rate": 2.9485316211748947e-05,
|
||
|
|
"loss": 0.2895,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2734847664833069,
|
||
|
|
"step": 905,
|
||
|
|
"valid_targets_mean": 5766.9,
|
||
|
|
"valid_targets_min": 3057
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.879746835443038,
|
||
|
|
"grad_norm": 0.4562524053984699,
|
||
|
|
"learning_rate": 2.934603719882703e-05,
|
||
|
|
"loss": 0.2971,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3326224684715271,
|
||
|
|
"step": 910,
|
||
|
|
"valid_targets_mean": 5974.5,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8955696202531644,
|
||
|
|
"grad_norm": 0.45011259088694505,
|
||
|
|
"learning_rate": 2.920617586973297e-05,
|
||
|
|
"loss": 0.2885,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2704487442970276,
|
||
|
|
"step": 915,
|
||
|
|
"valid_targets_mean": 5862.4,
|
||
|
|
"valid_targets_min": 2292
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.911392405063291,
|
||
|
|
"grad_norm": 0.4772485229953272,
|
||
|
|
"learning_rate": 2.9065740938696333e-05,
|
||
|
|
"loss": 0.2894,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2805355191230774,
|
||
|
|
"step": 920,
|
||
|
|
"valid_targets_mean": 5758.2,
|
||
|
|
"valid_targets_min": 3948
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9272151898734178,
|
||
|
|
"grad_norm": 0.4727148738831468,
|
||
|
|
"learning_rate": 2.8924741155685675e-05,
|
||
|
|
"loss": 0.2906,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30549630522727966,
|
||
|
|
"step": 925,
|
||
|
|
"valid_targets_mean": 6252.5,
|
||
|
|
"valid_targets_min": 1967
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9430379746835444,
|
||
|
|
"grad_norm": 0.45775895170790065,
|
||
|
|
"learning_rate": 2.8783185305863307e-05,
|
||
|
|
"loss": 0.2865,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2964749336242676,
|
||
|
|
"step": 930,
|
||
|
|
"valid_targets_mean": 5598.0,
|
||
|
|
"valid_targets_min": 834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.958860759493671,
|
||
|
|
"grad_norm": 0.4337416998396072,
|
||
|
|
"learning_rate": 2.8641082209037984e-05,
|
||
|
|
"loss": 0.2827,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2781994938850403,
|
||
|
|
"step": 935,
|
||
|
|
"valid_targets_mean": 6104.6,
|
||
|
|
"valid_targets_min": 2587
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9746835443037973,
|
||
|
|
"grad_norm": 0.43295170811481387,
|
||
|
|
"learning_rate": 2.8498440719115344e-05,
|
||
|
|
"loss": 0.2897,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29673928022384644,
|
||
|
|
"step": 940,
|
||
|
|
"valid_targets_mean": 6292.1,
|
||
|
|
"valid_targets_min": 1984
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.990506329113924,
|
||
|
|
"grad_norm": 0.44789535466180536,
|
||
|
|
"learning_rate": 2.8355269723546234e-05,
|
||
|
|
"loss": 0.296,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27013325691223145,
|
||
|
|
"step": 945,
|
||
|
|
"valid_targets_mean": 5349.1,
|
||
|
|
"valid_targets_min": 1760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0063291139240507,
|
||
|
|
"grad_norm": 0.42162207814788233,
|
||
|
|
"learning_rate": 2.821157814277304e-05,
|
||
|
|
"loss": 0.2872,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27115151286125183,
|
||
|
|
"step": 950,
|
||
|
|
"valid_targets_mean": 5983.3,
|
||
|
|
"valid_targets_min": 3679
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0221518987341773,
|
||
|
|
"grad_norm": 0.4416759250384479,
|
||
|
|
"learning_rate": 2.806737492967378e-05,
|
||
|
|
"loss": 0.2879,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2797127366065979,
|
||
|
|
"step": 955,
|
||
|
|
"valid_targets_mean": 6223.0,
|
||
|
|
"valid_targets_min": 3355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.037974683544304,
|
||
|
|
"grad_norm": 0.4679293009398521,
|
||
|
|
"learning_rate": 2.7922669069004394e-05,
|
||
|
|
"loss": 0.2789,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27050942182540894,
|
||
|
|
"step": 960,
|
||
|
|
"valid_targets_mean": 5775.5,
|
||
|
|
"valid_targets_min": 2546
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0537974683544302,
|
||
|
|
"grad_norm": 0.41810285637714373,
|
||
|
|
"learning_rate": 2.7777469576838868e-05,
|
||
|
|
"loss": 0.2876,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2595946192741394,
|
||
|
|
"step": 965,
|
||
|
|
"valid_targets_mean": 5585.8,
|
||
|
|
"valid_targets_min": 2470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.069620253164557,
|
||
|
|
"grad_norm": 0.41670836098856057,
|
||
|
|
"learning_rate": 2.76317855000075e-05,
|
||
|
|
"loss": 0.2713,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26501375436782837,
|
||
|
|
"step": 970,
|
||
|
|
"valid_targets_mean": 6174.8,
|
||
|
|
"valid_targets_min": 3179
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0854430379746836,
|
||
|
|
"grad_norm": 0.4176187275484247,
|
||
|
|
"learning_rate": 2.7485625915533215e-05,
|
||
|
|
"loss": 0.274,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2671218514442444,
|
||
|
|
"step": 975,
|
||
|
|
"valid_targets_mean": 6132.2,
|
||
|
|
"valid_targets_min": 2633
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1012658227848102,
|
||
|
|
"grad_norm": 0.46082577065514035,
|
||
|
|
"learning_rate": 2.7338999930066016e-05,
|
||
|
|
"loss": 0.2793,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2930499315261841,
|
||
|
|
"step": 980,
|
||
|
|
"valid_targets_mean": 5562.1,
|
||
|
|
"valid_targets_min": 809
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.117088607594937,
|
||
|
|
"grad_norm": 0.42281633576231314,
|
||
|
|
"learning_rate": 2.719191667931561e-05,
|
||
|
|
"loss": 0.2773,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26106882095336914,
|
||
|
|
"step": 985,
|
||
|
|
"valid_targets_mean": 5840.2,
|
||
|
|
"valid_targets_min": 900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.132911392405063,
|
||
|
|
"grad_norm": 0.47392007897126087,
|
||
|
|
"learning_rate": 2.7044385327482135e-05,
|
||
|
|
"loss": 0.2888,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29251885414123535,
|
||
|
|
"step": 990,
|
||
|
|
"valid_targets_mean": 5295.4,
|
||
|
|
"valid_targets_min": 548
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1487341772151898,
|
||
|
|
"grad_norm": 0.47021600771641175,
|
||
|
|
"learning_rate": 2.689641506668525e-05,
|
||
|
|
"loss": 0.2759,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.290133535861969,
|
||
|
|
"step": 995,
|
||
|
|
"valid_targets_mean": 5718.2,
|
||
|
|
"valid_targets_min": 849
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1645569620253164,
|
||
|
|
"grad_norm": 0.45553216877342095,
|
||
|
|
"learning_rate": 2.674801511639133e-05,
|
||
|
|
"loss": 0.2776,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2877347767353058,
|
||
|
|
"step": 1000,
|
||
|
|
"valid_targets_mean": 6502.6,
|
||
|
|
"valid_targets_min": 5276
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.180379746835443,
|
||
|
|
"grad_norm": 0.4491892439143873,
|
||
|
|
"learning_rate": 2.6599194722839097e-05,
|
||
|
|
"loss": 0.275,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28108590841293335,
|
||
|
|
"step": 1005,
|
||
|
|
"valid_targets_mean": 5940.3,
|
||
|
|
"valid_targets_min": 1960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1962025316455698,
|
||
|
|
"grad_norm": 0.4448846946970026,
|
||
|
|
"learning_rate": 2.64499631584635e-05,
|
||
|
|
"loss": 0.2855,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2802794575691223,
|
||
|
|
"step": 1010,
|
||
|
|
"valid_targets_mean": 5671.4,
|
||
|
|
"valid_targets_min": 2455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.212025316455696,
|
||
|
|
"grad_norm": 0.4484367055515372,
|
||
|
|
"learning_rate": 2.6300329721317973e-05,
|
||
|
|
"loss": 0.2813,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26574909687042236,
|
||
|
|
"step": 1015,
|
||
|
|
"valid_targets_mean": 5304.2,
|
||
|
|
"valid_targets_min": 789
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2278481012658227,
|
||
|
|
"grad_norm": 0.43854176405636597,
|
||
|
|
"learning_rate": 2.6150303734495138e-05,
|
||
|
|
"loss": 0.2746,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2753145694732666,
|
||
|
|
"step": 1020,
|
||
|
|
"valid_targets_mean": 5756.1,
|
||
|
|
"valid_targets_min": 823
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2436708860759493,
|
||
|
|
"grad_norm": 0.42425115614897546,
|
||
|
|
"learning_rate": 2.599989454554587e-05,
|
||
|
|
"loss": 0.268,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2607611417770386,
|
||
|
|
"step": 1025,
|
||
|
|
"valid_targets_mean": 5802.4,
|
||
|
|
"valid_targets_min": 3111
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.259493670886076,
|
||
|
|
"grad_norm": 0.4504789142568739,
|
||
|
|
"learning_rate": 2.584911152589695e-05,
|
||
|
|
"loss": 0.2688,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28150200843811035,
|
||
|
|
"step": 1030,
|
||
|
|
"valid_targets_mean": 5944.2,
|
||
|
|
"valid_targets_min": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2753164556962027,
|
||
|
|
"grad_norm": 0.4525200916510691,
|
||
|
|
"learning_rate": 2.569796407026711e-05,
|
||
|
|
"loss": 0.2741,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27071693539619446,
|
||
|
|
"step": 1035,
|
||
|
|
"valid_targets_mean": 5265.6,
|
||
|
|
"valid_targets_min": 1782
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.291139240506329,
|
||
|
|
"grad_norm": 0.4478586364611807,
|
||
|
|
"learning_rate": 2.5546461596081727e-05,
|
||
|
|
"loss": 0.2895,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28119248151779175,
|
||
|
|
"step": 1040,
|
||
|
|
"valid_targets_mean": 5817.7,
|
||
|
|
"valid_targets_min": 771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3069620253164556,
|
||
|
|
"grad_norm": 0.4994557894280622,
|
||
|
|
"learning_rate": 2.539461354288602e-05,
|
||
|
|
"loss": 0.2693,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2649917006492615,
|
||
|
|
"step": 1045,
|
||
|
|
"valid_targets_mean": 5387.4,
|
||
|
|
"valid_targets_min": 2027
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3227848101265822,
|
||
|
|
"grad_norm": 0.4212634476803461,
|
||
|
|
"learning_rate": 2.5242429371756936e-05,
|
||
|
|
"loss": 0.2861,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2844800353050232,
|
||
|
|
"step": 1050,
|
||
|
|
"valid_targets_mean": 6229.3,
|
||
|
|
"valid_targets_min": 3601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.338607594936709,
|
||
|
|
"grad_norm": 0.47147792069734795,
|
||
|
|
"learning_rate": 2.508991856471366e-05,
|
||
|
|
"loss": 0.2717,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2720435559749603,
|
||
|
|
"step": 1055,
|
||
|
|
"valid_targets_mean": 5872.4,
|
||
|
|
"valid_targets_min": 4683
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3544303797468356,
|
||
|
|
"grad_norm": 0.45341683692445545,
|
||
|
|
"learning_rate": 2.493709062412682e-05,
|
||
|
|
"loss": 0.274,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2766611576080322,
|
||
|
|
"step": 1060,
|
||
|
|
"valid_targets_mean": 6110.9,
|
||
|
|
"valid_targets_min": 4224
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.370253164556962,
|
||
|
|
"grad_norm": 0.42859174931725985,
|
||
|
|
"learning_rate": 2.4783955072126436e-05,
|
||
|
|
"loss": 0.2812,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2609805166721344,
|
||
|
|
"step": 1065,
|
||
|
|
"valid_targets_mean": 6118.9,
|
||
|
|
"valid_targets_min": 4341
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3860759493670884,
|
||
|
|
"grad_norm": 0.40346214436942013,
|
||
|
|
"learning_rate": 2.463052145000863e-05,
|
||
|
|
"loss": 0.2749,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26767057180404663,
|
||
|
|
"step": 1070,
|
||
|
|
"valid_targets_mean": 6161.4,
|
||
|
|
"valid_targets_min": 3469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.401898734177215,
|
||
|
|
"grad_norm": 0.46918080352369473,
|
||
|
|
"learning_rate": 2.4476799317641126e-05,
|
||
|
|
"loss": 0.2756,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30101579427719116,
|
||
|
|
"step": 1075,
|
||
|
|
"valid_targets_mean": 5820.3,
|
||
|
|
"valid_targets_min": 685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4177215189873418,
|
||
|
|
"grad_norm": 0.4695439709744784,
|
||
|
|
"learning_rate": 2.432279825286766e-05,
|
||
|
|
"loss": 0.2678,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2902735769748688,
|
||
|
|
"step": 1080,
|
||
|
|
"valid_targets_mean": 5656.1,
|
||
|
|
"valid_targets_min": 2328
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4335443037974684,
|
||
|
|
"grad_norm": 0.4464240769598974,
|
||
|
|
"learning_rate": 2.4168527850911167e-05,
|
||
|
|
"loss": 0.2835,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27806729078292847,
|
||
|
|
"step": 1085,
|
||
|
|
"valid_targets_mean": 5907.1,
|
||
|
|
"valid_targets_min": 1952
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.449367088607595,
|
||
|
|
"grad_norm": 0.45916771177387666,
|
||
|
|
"learning_rate": 2.401399772377597e-05,
|
||
|
|
"loss": 0.2751,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26612377166748047,
|
||
|
|
"step": 1090,
|
||
|
|
"valid_targets_mean": 5740.3,
|
||
|
|
"valid_targets_min": 2251
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4651898734177213,
|
||
|
|
"grad_norm": 0.43614438276566764,
|
||
|
|
"learning_rate": 2.3859217499648893e-05,
|
||
|
|
"loss": 0.2928,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2967340052127838,
|
||
|
|
"step": 1095,
|
||
|
|
"valid_targets_mean": 5992.1,
|
||
|
|
"valid_targets_min": 767
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.481012658227848,
|
||
|
|
"grad_norm": 0.42047467518959086,
|
||
|
|
"learning_rate": 2.3704196822299322e-05,
|
||
|
|
"loss": 0.2726,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25916093587875366,
|
||
|
|
"step": 1100,
|
||
|
|
"valid_targets_mean": 5764.1,
|
||
|
|
"valid_targets_min": 1060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4968354430379747,
|
||
|
|
"grad_norm": 0.4842338297714059,
|
||
|
|
"learning_rate": 2.3548945350478416e-05,
|
||
|
|
"loss": 0.2894,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32087451219558716,
|
||
|
|
"step": 1105,
|
||
|
|
"valid_targets_mean": 5177.1,
|
||
|
|
"valid_targets_min": 753
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5126582278481013,
|
||
|
|
"grad_norm": 0.46656590339014214,
|
||
|
|
"learning_rate": 2.3393472757317236e-05,
|
||
|
|
"loss": 0.2771,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.295026957988739,
|
||
|
|
"step": 1110,
|
||
|
|
"valid_targets_mean": 5936.8,
|
||
|
|
"valid_targets_min": 3333
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.528481012658228,
|
||
|
|
"grad_norm": 0.4972487400584799,
|
||
|
|
"learning_rate": 2.323778872972408e-05,
|
||
|
|
"loss": 0.2808,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2944778501987457,
|
||
|
|
"step": 1115,
|
||
|
|
"valid_targets_mean": 4811.2,
|
||
|
|
"valid_targets_min": 877
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5443037974683547,
|
||
|
|
"grad_norm": 0.45881784037617374,
|
||
|
|
"learning_rate": 2.3081902967780935e-05,
|
||
|
|
"loss": 0.2747,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2754046618938446,
|
||
|
|
"step": 1120,
|
||
|
|
"valid_targets_mean": 5554.3,
|
||
|
|
"valid_targets_min": 2591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.560126582278481,
|
||
|
|
"grad_norm": 0.43038062477562644,
|
||
|
|
"learning_rate": 2.292582518413908e-05,
|
||
|
|
"loss": 0.2771,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2747959792613983,
|
||
|
|
"step": 1125,
|
||
|
|
"valid_targets_mean": 5968.6,
|
||
|
|
"valid_targets_min": 3979
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5759493670886076,
|
||
|
|
"grad_norm": 0.4607696578365557,
|
||
|
|
"learning_rate": 2.2769565103413935e-05,
|
||
|
|
"loss": 0.2734,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25683754682540894,
|
||
|
|
"step": 1130,
|
||
|
|
"valid_targets_mean": 5161.8,
|
||
|
|
"valid_targets_min": 1842
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5917721518987342,
|
||
|
|
"grad_norm": 0.47711868798365914,
|
||
|
|
"learning_rate": 2.2613132461579186e-05,
|
||
|
|
"loss": 0.2782,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28505629301071167,
|
||
|
|
"step": 1135,
|
||
|
|
"valid_targets_mean": 5645.9,
|
||
|
|
"valid_targets_min": 706
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.607594936708861,
|
||
|
|
"grad_norm": 0.4546045209649017,
|
||
|
|
"learning_rate": 2.2456537005360105e-05,
|
||
|
|
"loss": 0.2861,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2820706069469452,
|
||
|
|
"step": 1140,
|
||
|
|
"valid_targets_mean": 6324.2,
|
||
|
|
"valid_targets_min": 4468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6234177215189876,
|
||
|
|
"grad_norm": 0.4540725645210422,
|
||
|
|
"learning_rate": 2.2299788491626362e-05,
|
||
|
|
"loss": 0.2888,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30551356077194214,
|
||
|
|
"step": 1145,
|
||
|
|
"valid_targets_mean": 6043.6,
|
||
|
|
"valid_targets_min": 1900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6392405063291138,
|
||
|
|
"grad_norm": 0.4607470691585838,
|
||
|
|
"learning_rate": 2.2142896686784017e-05,
|
||
|
|
"loss": 0.2844,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3159760534763336,
|
||
|
|
"step": 1150,
|
||
|
|
"valid_targets_mean": 5909.0,
|
||
|
|
"valid_targets_min": 669
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6550632911392404,
|
||
|
|
"grad_norm": 0.485657713940311,
|
||
|
|
"learning_rate": 2.198587136616708e-05,
|
||
|
|
"loss": 0.2865,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2861446440219879,
|
||
|
|
"step": 1155,
|
||
|
|
"valid_targets_mean": 5887.2,
|
||
|
|
"valid_targets_min": 1033
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.670886075949367,
|
||
|
|
"grad_norm": 0.44431348288850725,
|
||
|
|
"learning_rate": 2.1828722313428425e-05,
|
||
|
|
"loss": 0.288,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2906510829925537,
|
||
|
|
"step": 1160,
|
||
|
|
"valid_targets_mean": 6221.4,
|
||
|
|
"valid_targets_min": 4137
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6867088607594938,
|
||
|
|
"grad_norm": 0.43442541085036207,
|
||
|
|
"learning_rate": 2.167145931993019e-05,
|
||
|
|
"loss": 0.2761,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2862897515296936,
|
||
|
|
"step": 1165,
|
||
|
|
"valid_targets_mean": 6012.8,
|
||
|
|
"valid_targets_min": 2815
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7025316455696204,
|
||
|
|
"grad_norm": 0.38176947662087063,
|
||
|
|
"learning_rate": 2.151409218413374e-05,
|
||
|
|
"loss": 0.2722,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2606070041656494,
|
||
|
|
"step": 1170,
|
||
|
|
"valid_targets_mean": 6584.1,
|
||
|
|
"valid_targets_min": 4706
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7183544303797467,
|
||
|
|
"grad_norm": 0.4536298731246358,
|
||
|
|
"learning_rate": 2.135663071098915e-05,
|
||
|
|
"loss": 0.2738,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2756196856498718,
|
||
|
|
"step": 1175,
|
||
|
|
"valid_targets_mean": 5809.6,
|
||
|
|
"valid_targets_min": 2501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7341772151898733,
|
||
|
|
"grad_norm": 0.44313775169197916,
|
||
|
|
"learning_rate": 2.1199084711324298e-05,
|
||
|
|
"loss": 0.287,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2668633759021759,
|
||
|
|
"step": 1180,
|
||
|
|
"valid_targets_mean": 5518.7,
|
||
|
|
"valid_targets_min": 2497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.75,
|
||
|
|
"grad_norm": 0.4299516471156008,
|
||
|
|
"learning_rate": 2.1041464001233595e-05,
|
||
|
|
"loss": 0.2694,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2561643719673157,
|
||
|
|
"step": 1185,
|
||
|
|
"valid_targets_mean": 5885.6,
|
||
|
|
"valid_targets_min": 3359
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7658227848101267,
|
||
|
|
"grad_norm": 0.4478929646947557,
|
||
|
|
"learning_rate": 2.0883778401466364e-05,
|
||
|
|
"loss": 0.279,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2736479341983795,
|
||
|
|
"step": 1190,
|
||
|
|
"valid_targets_mean": 5109.4,
|
||
|
|
"valid_targets_min": 1954
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7816455696202533,
|
||
|
|
"grad_norm": 0.41061449931460264,
|
||
|
|
"learning_rate": 2.0726037736814963e-05,
|
||
|
|
"loss": 0.2741,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2618325352668762,
|
||
|
|
"step": 1195,
|
||
|
|
"valid_targets_mean": 6287.6,
|
||
|
|
"valid_targets_min": 4065
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7974683544303796,
|
||
|
|
"grad_norm": 0.4605604729306755,
|
||
|
|
"learning_rate": 2.0568251835502647e-05,
|
||
|
|
"loss": 0.2744,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26115620136260986,
|
||
|
|
"step": 1200,
|
||
|
|
"valid_targets_mean": 5568.8,
|
||
|
|
"valid_targets_min": 2346
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8132911392405062,
|
||
|
|
"grad_norm": 0.45057688611815,
|
||
|
|
"learning_rate": 2.0410430528571174e-05,
|
||
|
|
"loss": 0.2714,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2877865433692932,
|
||
|
|
"step": 1205,
|
||
|
|
"valid_targets_mean": 6449.9,
|
||
|
|
"valid_targets_min": 3503
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.829113924050633,
|
||
|
|
"grad_norm": 0.5086771269895078,
|
||
|
|
"learning_rate": 2.02525836492683e-05,
|
||
|
|
"loss": 0.2897,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3087032437324524,
|
||
|
|
"step": 1210,
|
||
|
|
"valid_targets_mean": 5531.9,
|
||
|
|
"valid_targets_min": 3800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8449367088607596,
|
||
|
|
"grad_norm": 0.4452489781308896,
|
||
|
|
"learning_rate": 2.009472103243511e-05,
|
||
|
|
"loss": 0.2939,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3064022660255432,
|
||
|
|
"step": 1215,
|
||
|
|
"valid_targets_mean": 5917.0,
|
||
|
|
"valid_targets_min": 4748
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8607594936708862,
|
||
|
|
"grad_norm": 0.4863342707710525,
|
||
|
|
"learning_rate": 1.993685251389322e-05,
|
||
|
|
"loss": 0.2745,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2625889182090759,
|
||
|
|
"step": 1220,
|
||
|
|
"valid_targets_mean": 5559.6,
|
||
|
|
"valid_targets_min": 2258
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8765822784810124,
|
||
|
|
"grad_norm": 0.4290653882064235,
|
||
|
|
"learning_rate": 1.9778987929831972e-05,
|
||
|
|
"loss": 0.2781,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29007023572921753,
|
||
|
|
"step": 1225,
|
||
|
|
"valid_targets_mean": 6616.1,
|
||
|
|
"valid_targets_min": 5869
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.892405063291139,
|
||
|
|
"grad_norm": 0.4296040763825853,
|
||
|
|
"learning_rate": 1.9621137116195548e-05,
|
||
|
|
"loss": 0.2749,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2632428705692291,
|
||
|
|
"step": 1230,
|
||
|
|
"valid_targets_mean": 5761.6,
|
||
|
|
"valid_targets_min": 3731
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9082278481012658,
|
||
|
|
"grad_norm": 0.623945942618191,
|
||
|
|
"learning_rate": 1.9463309908070164e-05,
|
||
|
|
"loss": 0.2693,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26622751355171204,
|
||
|
|
"step": 1235,
|
||
|
|
"valid_targets_mean": 6114.1,
|
||
|
|
"valid_targets_min": 4962
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9240506329113924,
|
||
|
|
"grad_norm": 0.4793044781966833,
|
||
|
|
"learning_rate": 1.9305516139071264e-05,
|
||
|
|
"loss": 0.2667,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2645919919013977,
|
||
|
|
"step": 1240,
|
||
|
|
"valid_targets_mean": 5016.1,
|
||
|
|
"valid_targets_min": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.939873417721519,
|
||
|
|
"grad_norm": 0.45305985353592,
|
||
|
|
"learning_rate": 1.9147765640730803e-05,
|
||
|
|
"loss": 0.2867,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2768091559410095,
|
||
|
|
"step": 1245,
|
||
|
|
"valid_targets_mean": 5664.0,
|
||
|
|
"valid_targets_min": 3534
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9556962025316453,
|
||
|
|
"grad_norm": 0.4605421239088336,
|
||
|
|
"learning_rate": 1.8990068241884748e-05,
|
||
|
|
"loss": 0.2795,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2963486313819885,
|
||
|
|
"step": 1250,
|
||
|
|
"valid_targets_mean": 5677.1,
|
||
|
|
"valid_targets_min": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.971518987341772,
|
||
|
|
"grad_norm": 0.4597763295363824,
|
||
|
|
"learning_rate": 1.88324337680606e-05,
|
||
|
|
"loss": 0.276,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2958153486251831,
|
||
|
|
"step": 1255,
|
||
|
|
"valid_targets_mean": 5865.9,
|
||
|
|
"valid_targets_min": 2303
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9873417721518987,
|
||
|
|
"grad_norm": 0.47799649075734507,
|
||
|
|
"learning_rate": 1.8674872040865264e-05,
|
||
|
|
"loss": 0.2823,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3197573125362396,
|
||
|
|
"step": 1260,
|
||
|
|
"valid_targets_mean": 5307.5,
|
||
|
|
"valid_targets_min": 599
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.003164556962025,
|
||
|
|
"grad_norm": 0.43937378661184534,
|
||
|
|
"learning_rate": 1.8517392877373062e-05,
|
||
|
|
"loss": 0.2702,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25840771198272705,
|
||
|
|
"step": 1265,
|
||
|
|
"valid_targets_mean": 5393.1,
|
||
|
|
"valid_targets_min": 1782
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.018987341772152,
|
||
|
|
"grad_norm": 0.4766511193828852,
|
||
|
|
"learning_rate": 1.8360006089514083e-05,
|
||
|
|
"loss": 0.2712,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2725266218185425,
|
||
|
|
"step": 1270,
|
||
|
|
"valid_targets_mean": 5521.6,
|
||
|
|
"valid_targets_min": 3121
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.034810126582278,
|
||
|
|
"grad_norm": 0.4148691145081977,
|
||
|
|
"learning_rate": 1.8202721483462864e-05,
|
||
|
|
"loss": 0.2706,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29157158732414246,
|
||
|
|
"step": 1275,
|
||
|
|
"valid_targets_mean": 6168.4,
|
||
|
|
"valid_targets_min": 2189
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.050632911392405,
|
||
|
|
"grad_norm": 0.4270401645062619,
|
||
|
|
"learning_rate": 1.8045548859027354e-05,
|
||
|
|
"loss": 0.2625,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2323746681213379,
|
||
|
|
"step": 1280,
|
||
|
|
"valid_targets_mean": 5276.9,
|
||
|
|
"valid_targets_min": 599
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.0664556962025316,
|
||
|
|
"grad_norm": 0.4282844537809516,
|
||
|
|
"learning_rate": 1.7888498009038368e-05,
|
||
|
|
"loss": 0.269,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2751336097717285,
|
||
|
|
"step": 1285,
|
||
|
|
"valid_targets_mean": 5892.6,
|
||
|
|
"valid_targets_min": 2244
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.082278481012658,
|
||
|
|
"grad_norm": 0.47316582639020416,
|
||
|
|
"learning_rate": 1.7731578718739414e-05,
|
||
|
|
"loss": 0.2768,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30304911732673645,
|
||
|
|
"step": 1290,
|
||
|
|
"valid_targets_mean": 6108.2,
|
||
|
|
"valid_targets_min": 2388
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.098101265822785,
|
||
|
|
"grad_norm": 0.4803746298155582,
|
||
|
|
"learning_rate": 1.7574800765176994e-05,
|
||
|
|
"loss": 0.2809,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2848498225212097,
|
||
|
|
"step": 1295,
|
||
|
|
"valid_targets_mean": 4842.9,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.113924050632911,
|
||
|
|
"grad_norm": 0.4312417867244571,
|
||
|
|
"learning_rate": 1.741817391659149e-05,
|
||
|
|
"loss": 0.2756,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26076751947402954,
|
||
|
|
"step": 1300,
|
||
|
|
"valid_targets_mean": 5613.5,
|
||
|
|
"valid_targets_min": 2346
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.129746835443038,
|
||
|
|
"grad_norm": 0.4307261808714215,
|
||
|
|
"learning_rate": 1.726170793180848e-05,
|
||
|
|
"loss": 0.2766,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28067219257354736,
|
||
|
|
"step": 1305,
|
||
|
|
"valid_targets_mean": 6023.4,
|
||
|
|
"valid_targets_min": 1905
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.1455696202531644,
|
||
|
|
"grad_norm": 0.4568416978649484,
|
||
|
|
"learning_rate": 1.7105412559630735e-05,
|
||
|
|
"loss": 0.2752,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27373552322387695,
|
||
|
|
"step": 1310,
|
||
|
|
"valid_targets_mean": 6107.5,
|
||
|
|
"valid_targets_min": 2019
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.1613924050632916,
|
||
|
|
"grad_norm": 0.4500355857613447,
|
||
|
|
"learning_rate": 1.6949297538230792e-05,
|
||
|
|
"loss": 0.2634,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27843713760375977,
|
||
|
|
"step": 1315,
|
||
|
|
"valid_targets_mean": 5768.1,
|
||
|
|
"valid_targets_min": 2215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.177215189873418,
|
||
|
|
"grad_norm": 0.4388953953436056,
|
||
|
|
"learning_rate": 1.6793372594544224e-05,
|
||
|
|
"loss": 0.2671,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27434033155441284,
|
||
|
|
"step": 1320,
|
||
|
|
"valid_targets_mean": 5503.1,
|
||
|
|
"valid_targets_min": 900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.193037974683544,
|
||
|
|
"grad_norm": 0.4576763460557553,
|
||
|
|
"learning_rate": 1.6637647443663593e-05,
|
||
|
|
"loss": 0.2683,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2750262916088104,
|
||
|
|
"step": 1325,
|
||
|
|
"valid_targets_mean": 6009.2,
|
||
|
|
"valid_targets_min": 4215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.208860759493671,
|
||
|
|
"grad_norm": 0.46329839465044464,
|
||
|
|
"learning_rate": 1.6482131788233097e-05,
|
||
|
|
"loss": 0.2742,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28142502903938293,
|
||
|
|
"step": 1330,
|
||
|
|
"valid_targets_mean": 5375.4,
|
||
|
|
"valid_targets_min": 767
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.224683544303797,
|
||
|
|
"grad_norm": 0.4434699499274513,
|
||
|
|
"learning_rate": 1.6326835317844096e-05,
|
||
|
|
"loss": 0.2743,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2833135724067688,
|
||
|
|
"step": 1335,
|
||
|
|
"valid_targets_mean": 5825.7,
|
||
|
|
"valid_targets_min": 3592
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.2405063291139244,
|
||
|
|
"grad_norm": 0.44275524559779117,
|
||
|
|
"learning_rate": 1.6171767708431343e-05,
|
||
|
|
"loss": 0.2607,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24540019035339355,
|
||
|
|
"step": 1340,
|
||
|
|
"valid_targets_mean": 5436.6,
|
||
|
|
"valid_targets_min": 973
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.256329113924051,
|
||
|
|
"grad_norm": 0.4253187110223974,
|
||
|
|
"learning_rate": 1.601693862167013e-05,
|
||
|
|
"loss": 0.2654,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27130672335624695,
|
||
|
|
"step": 1345,
|
||
|
|
"valid_targets_mean": 6132.4,
|
||
|
|
"valid_targets_min": 3411
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.272151898734177,
|
||
|
|
"grad_norm": 0.44888455434609287,
|
||
|
|
"learning_rate": 1.5862357704374328e-05,
|
||
|
|
"loss": 0.27,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29880720376968384,
|
||
|
|
"step": 1350,
|
||
|
|
"valid_targets_mean": 5640.2,
|
||
|
|
"valid_targets_min": 577
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.287974683544304,
|
||
|
|
"grad_norm": 0.4447433594075796,
|
||
|
|
"learning_rate": 1.5708034587895278e-05,
|
||
|
|
"loss": 0.2682,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2827964425086975,
|
||
|
|
"step": 1355,
|
||
|
|
"valid_targets_mean": 5510.6,
|
||
|
|
"valid_targets_min": 1400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.30379746835443,
|
||
|
|
"grad_norm": 0.5005058691286197,
|
||
|
|
"learning_rate": 1.5553978887521756e-05,
|
||
|
|
"loss": 0.2661,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24379396438598633,
|
||
|
|
"step": 1360,
|
||
|
|
"valid_targets_mean": 5517.3,
|
||
|
|
"valid_targets_min": 1860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.319620253164557,
|
||
|
|
"grad_norm": 0.4277824202684447,
|
||
|
|
"learning_rate": 1.5400200201880832e-05,
|
||
|
|
"loss": 0.2693,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2706229090690613,
|
||
|
|
"step": 1365,
|
||
|
|
"valid_targets_mean": 6064.0,
|
||
|
|
"valid_targets_min": 3083
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.3354430379746836,
|
||
|
|
"grad_norm": 0.42891831640248634,
|
||
|
|
"learning_rate": 1.5246708112339841e-05,
|
||
|
|
"loss": 0.2635,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2757185697555542,
|
||
|
|
"step": 1370,
|
||
|
|
"valid_targets_mean": 6370.2,
|
||
|
|
"valid_targets_min": 3108
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.35126582278481,
|
||
|
|
"grad_norm": 0.5680716223524007,
|
||
|
|
"learning_rate": 1.5093512182409426e-05,
|
||
|
|
"loss": 0.2696,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27294766902923584,
|
||
|
|
"step": 1375,
|
||
|
|
"valid_targets_mean": 5403.8,
|
||
|
|
"valid_targets_min": 2218
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.367088607594937,
|
||
|
|
"grad_norm": 0.43343129168281397,
|
||
|
|
"learning_rate": 1.4940621957147604e-05,
|
||
|
|
"loss": 0.2701,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2662773132324219,
|
||
|
|
"step": 1380,
|
||
|
|
"valid_targets_mean": 5882.5,
|
||
|
|
"valid_targets_min": 4166
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.382911392405063,
|
||
|
|
"grad_norm": 0.41494002682269865,
|
||
|
|
"learning_rate": 1.4788046962565137e-05,
|
||
|
|
"loss": 0.2766,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2607043981552124,
|
||
|
|
"step": 1385,
|
||
|
|
"valid_targets_mean": 6177.9,
|
||
|
|
"valid_targets_min": 2455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.39873417721519,
|
||
|
|
"grad_norm": 0.43843034588316787,
|
||
|
|
"learning_rate": 1.4635796705031921e-05,
|
||
|
|
"loss": 0.2651,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27472174167633057,
|
||
|
|
"step": 1390,
|
||
|
|
"valid_targets_mean": 6159.9,
|
||
|
|
"valid_targets_min": 3177
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.4145569620253164,
|
||
|
|
"grad_norm": 0.44829735270254406,
|
||
|
|
"learning_rate": 1.4483880670684734e-05,
|
||
|
|
"loss": 0.2675,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2803511619567871,
|
||
|
|
"step": 1395,
|
||
|
|
"valid_targets_mean": 6351.8,
|
||
|
|
"valid_targets_min": 3409
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.430379746835443,
|
||
|
|
"grad_norm": 0.43784463660660367,
|
||
|
|
"learning_rate": 1.4332308324836175e-05,
|
||
|
|
"loss": 0.2719,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2583090662956238,
|
||
|
|
"step": 1400,
|
||
|
|
"valid_targets_mean": 5574.8,
|
||
|
|
"valid_targets_min": 851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.44620253164557,
|
||
|
|
"grad_norm": 0.41049241534830344,
|
||
|
|
"learning_rate": 1.4181089111384897e-05,
|
||
|
|
"loss": 0.2687,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2622532248497009,
|
||
|
|
"step": 1405,
|
||
|
|
"valid_targets_mean": 6493.8,
|
||
|
|
"valid_targets_min": 5483
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.462025316455696,
|
||
|
|
"grad_norm": 0.4438465909763393,
|
||
|
|
"learning_rate": 1.4030232452227247e-05,
|
||
|
|
"loss": 0.2711,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2785758376121521,
|
||
|
|
"step": 1410,
|
||
|
|
"valid_targets_mean": 5877.3,
|
||
|
|
"valid_targets_min": 3379
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.477848101265823,
|
||
|
|
"grad_norm": 0.45409808589805667,
|
||
|
|
"learning_rate": 1.3879747746670153e-05,
|
||
|
|
"loss": 0.2674,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28627505898475647,
|
||
|
|
"step": 1415,
|
||
|
|
"valid_targets_mean": 5778.6,
|
||
|
|
"valid_targets_min": 1930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.493670886075949,
|
||
|
|
"grad_norm": 0.44565153269075114,
|
||
|
|
"learning_rate": 1.3729644370845527e-05,
|
||
|
|
"loss": 0.2824,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2990872263908386,
|
||
|
|
"step": 1420,
|
||
|
|
"valid_targets_mean": 6347.4,
|
||
|
|
"valid_targets_min": 886
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.509493670886076,
|
||
|
|
"grad_norm": 0.4867505301206323,
|
||
|
|
"learning_rate": 1.3579931677126095e-05,
|
||
|
|
"loss": 0.2715,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2777697443962097,
|
||
|
|
"step": 1425,
|
||
|
|
"valid_targets_mean": 5492.8,
|
||
|
|
"valid_targets_min": 963
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.525316455696203,
|
||
|
|
"grad_norm": 0.46892411857600824,
|
||
|
|
"learning_rate": 1.3430618993542624e-05,
|
||
|
|
"loss": 0.2706,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27069422602653503,
|
||
|
|
"step": 1430,
|
||
|
|
"valid_targets_mean": 4925.8,
|
||
|
|
"valid_targets_min": 514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.541139240506329,
|
||
|
|
"grad_norm": 0.4258887412808488,
|
||
|
|
"learning_rate": 1.3281715623202805e-05,
|
||
|
|
"loss": 0.2701,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25205692648887634,
|
||
|
|
"step": 1435,
|
||
|
|
"valid_targets_mean": 6138.2,
|
||
|
|
"valid_targets_min": 1828
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.556962025316456,
|
||
|
|
"grad_norm": 0.43025376755770534,
|
||
|
|
"learning_rate": 1.3133230843711542e-05,
|
||
|
|
"loss": 0.2753,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2578020989894867,
|
||
|
|
"step": 1440,
|
||
|
|
"valid_targets_mean": 5396.9,
|
||
|
|
"valid_targets_min": 648
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.572784810126582,
|
||
|
|
"grad_norm": 0.4121061717464447,
|
||
|
|
"learning_rate": 1.2985173906592938e-05,
|
||
|
|
"loss": 0.2621,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2575612962245941,
|
||
|
|
"step": 1445,
|
||
|
|
"valid_targets_mean": 6095.8,
|
||
|
|
"valid_targets_min": 3363
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.588607594936709,
|
||
|
|
"grad_norm": 0.4455401669257146,
|
||
|
|
"learning_rate": 1.283755403671386e-05,
|
||
|
|
"loss": 0.2656,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2595190107822418,
|
||
|
|
"step": 1450,
|
||
|
|
"valid_targets_mean": 5895.9,
|
||
|
|
"valid_targets_min": 765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.6044303797468356,
|
||
|
|
"grad_norm": 0.47995584984074363,
|
||
|
|
"learning_rate": 1.2690380431709164e-05,
|
||
|
|
"loss": 0.2717,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25681594014167786,
|
||
|
|
"step": 1455,
|
||
|
|
"valid_targets_mean": 5262.5,
|
||
|
|
"valid_targets_min": 896
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.620253164556962,
|
||
|
|
"grad_norm": 0.44625916208811905,
|
||
|
|
"learning_rate": 1.2543662261408651e-05,
|
||
|
|
"loss": 0.2766,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26975834369659424,
|
||
|
|
"step": 1460,
|
||
|
|
"valid_targets_mean": 5787.6,
|
||
|
|
"valid_targets_min": 3012
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.636075949367089,
|
||
|
|
"grad_norm": 0.45780589523131776,
|
||
|
|
"learning_rate": 1.239740866726569e-05,
|
||
|
|
"loss": 0.2609,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2560340166091919,
|
||
|
|
"step": 1465,
|
||
|
|
"valid_targets_mean": 5298.8,
|
||
|
|
"valid_targets_min": 2302
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.651898734177215,
|
||
|
|
"grad_norm": 0.4374203751318027,
|
||
|
|
"learning_rate": 1.2251628761787676e-05,
|
||
|
|
"loss": 0.2656,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.269504189491272,
|
||
|
|
"step": 1470,
|
||
|
|
"valid_targets_mean": 5891.4,
|
||
|
|
"valid_targets_min": 976
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.667721518987342,
|
||
|
|
"grad_norm": 0.48912556917169475,
|
||
|
|
"learning_rate": 1.2106331627968268e-05,
|
||
|
|
"loss": 0.2668,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2673969268798828,
|
||
|
|
"step": 1475,
|
||
|
|
"valid_targets_mean": 5466.0,
|
||
|
|
"valid_targets_min": 628
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.6835443037974684,
|
||
|
|
"grad_norm": 0.44150795560819456,
|
||
|
|
"learning_rate": 1.1961526318721429e-05,
|
||
|
|
"loss": 0.2685,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28151217103004456,
|
||
|
|
"step": 1480,
|
||
|
|
"valid_targets_mean": 5715.2,
|
||
|
|
"valid_targets_min": 963
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.699367088607595,
|
||
|
|
"grad_norm": 0.42332732047910177,
|
||
|
|
"learning_rate": 1.1817221856317426e-05,
|
||
|
|
"loss": 0.2675,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2617891728878021,
|
||
|
|
"step": 1485,
|
||
|
|
"valid_targets_mean": 5987.8,
|
||
|
|
"valid_targets_min": 3898
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.715189873417722,
|
||
|
|
"grad_norm": 0.4331817000713787,
|
||
|
|
"learning_rate": 1.1673427231820637e-05,
|
||
|
|
"loss": 0.2699,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26096978783607483,
|
||
|
|
"step": 1490,
|
||
|
|
"valid_targets_mean": 6009.0,
|
||
|
|
"valid_targets_min": 753
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.731012658227848,
|
||
|
|
"grad_norm": 0.43190281742549974,
|
||
|
|
"learning_rate": 1.1530151404529362e-05,
|
||
|
|
"loss": 0.2754,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3024364113807678,
|
||
|
|
"step": 1495,
|
||
|
|
"valid_targets_mean": 6329.8,
|
||
|
|
"valid_targets_min": 4300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.746835443037975,
|
||
|
|
"grad_norm": 0.4270377021411673,
|
||
|
|
"learning_rate": 1.1387403301417645e-05,
|
||
|
|
"loss": 0.2789,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2895660400390625,
|
||
|
|
"step": 1500,
|
||
|
|
"valid_targets_mean": 5815.9,
|
||
|
|
"valid_targets_min": 2026
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5336700336700337,
|
||
|
|
"grad_norm": 0.47175833690416435,
|
||
|
|
"learning_rate": 3.222069246473091e-05,
|
||
|
|
"loss": 0.2636,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2578871548175812,
|
||
|
|
"step": 1505,
|
||
|
|
"valid_targets_mean": 5995.9,
|
||
|
|
"valid_targets_min": 514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.542087542087542,
|
||
|
|
"grad_norm": 0.4988144120109162,
|
||
|
|
"learning_rate": 3.2154125959136395e-05,
|
||
|
|
"loss": 0.2606,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26715636253356934,
|
||
|
|
"step": 1510,
|
||
|
|
"valid_targets_mean": 5125.2,
|
||
|
|
"valid_targets_min": 809
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5505050505050506,
|
||
|
|
"grad_norm": 0.49561294420836705,
|
||
|
|
"learning_rate": 3.208734528519581e-05,
|
||
|
|
"loss": 0.2635,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27658218145370483,
|
||
|
|
"step": 1515,
|
||
|
|
"valid_targets_mean": 5416.2,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.558922558922559,
|
||
|
|
"grad_norm": 0.4909723100319413,
|
||
|
|
"learning_rate": 3.20203516196541e-05,
|
||
|
|
"loss": 0.2682,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2683098316192627,
|
||
|
|
"step": 1520,
|
||
|
|
"valid_targets_mean": 5724.5,
|
||
|
|
"valid_targets_min": 3716
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5673400673400675,
|
||
|
|
"grad_norm": 0.46049058044117985,
|
||
|
|
"learning_rate": 3.1953146143009367e-05,
|
||
|
|
"loss": 0.273,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28621405363082886,
|
||
|
|
"step": 1525,
|
||
|
|
"valid_targets_mean": 6398.9,
|
||
|
|
"valid_targets_min": 5081
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5757575757575757,
|
||
|
|
"grad_norm": 0.5068485562732447,
|
||
|
|
"learning_rate": 3.188573003949203e-05,
|
||
|
|
"loss": 0.2749,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2784101963043213,
|
||
|
|
"step": 1530,
|
||
|
|
"valid_targets_mean": 5874.6,
|
||
|
|
"valid_targets_min": 1807
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.584175084175084,
|
||
|
|
"grad_norm": 0.49170049168708563,
|
||
|
|
"learning_rate": 3.181810449704399e-05,
|
||
|
|
"loss": 0.2675,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2548806071281433,
|
||
|
|
"step": 1535,
|
||
|
|
"valid_targets_mean": 5808.4,
|
||
|
|
"valid_targets_min": 2488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5925925925925926,
|
||
|
|
"grad_norm": 0.46857660364859843,
|
||
|
|
"learning_rate": 3.175027070729768e-05,
|
||
|
|
"loss": 0.2607,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27297866344451904,
|
||
|
|
"step": 1540,
|
||
|
|
"valid_targets_mean": 5892.1,
|
||
|
|
"valid_targets_min": 1373
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.601010101010101,
|
||
|
|
"grad_norm": 0.4623443280958736,
|
||
|
|
"learning_rate": 3.168222986555504e-05,
|
||
|
|
"loss": 0.2649,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25697314739227295,
|
||
|
|
"step": 1545,
|
||
|
|
"valid_targets_mean": 5303.7,
|
||
|
|
"valid_targets_min": 1735
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6094276094276094,
|
||
|
|
"grad_norm": 0.4793035936728254,
|
||
|
|
"learning_rate": 3.161398317076652e-05,
|
||
|
|
"loss": 0.2702,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2764521837234497,
|
||
|
|
"step": 1550,
|
||
|
|
"valid_targets_mean": 5819.5,
|
||
|
|
"valid_targets_min": 765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6178451178451176,
|
||
|
|
"grad_norm": 0.4668860860510128,
|
||
|
|
"learning_rate": 3.154553182550986e-05,
|
||
|
|
"loss": 0.2608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26006996631622314,
|
||
|
|
"step": 1555,
|
||
|
|
"valid_targets_mean": 5489.5,
|
||
|
|
"valid_targets_min": 3047
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6262626262626263,
|
||
|
|
"grad_norm": 0.47796110140270853,
|
||
|
|
"learning_rate": 3.1476877035969024e-05,
|
||
|
|
"loss": 0.2765,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27874821424484253,
|
||
|
|
"step": 1560,
|
||
|
|
"valid_targets_mean": 5781.8,
|
||
|
|
"valid_targets_min": 963
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.634680134680135,
|
||
|
|
"grad_norm": 0.4581773426950499,
|
||
|
|
"learning_rate": 3.140802001191283e-05,
|
||
|
|
"loss": 0.2604,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.237541064620018,
|
||
|
|
"step": 1565,
|
||
|
|
"valid_targets_mean": 6255.6,
|
||
|
|
"valid_targets_min": 3544
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.643097643097643,
|
||
|
|
"grad_norm": 0.48335835010448197,
|
||
|
|
"learning_rate": 3.133896196667369e-05,
|
||
|
|
"loss": 0.2652,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29038745164871216,
|
||
|
|
"step": 1570,
|
||
|
|
"valid_targets_mean": 5953.4,
|
||
|
|
"valid_targets_min": 3524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6515151515151514,
|
||
|
|
"grad_norm": 0.5426689425154643,
|
||
|
|
"learning_rate": 3.1269704117126206e-05,
|
||
|
|
"loss": 0.2768,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2984479069709778,
|
||
|
|
"step": 1575,
|
||
|
|
"valid_targets_mean": 5832.0,
|
||
|
|
"valid_targets_min": 886
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.65993265993266,
|
||
|
|
"grad_norm": 0.45988998500840067,
|
||
|
|
"learning_rate": 3.120024768366576e-05,
|
||
|
|
"loss": 0.2775,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2751707434654236,
|
||
|
|
"step": 1580,
|
||
|
|
"valid_targets_mean": 5657.8,
|
||
|
|
"valid_targets_min": 744
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6683501683501682,
|
||
|
|
"grad_norm": 0.4792673772983562,
|
||
|
|
"learning_rate": 3.113059389018699e-05,
|
||
|
|
"loss": 0.2617,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2405269742012024,
|
||
|
|
"step": 1585,
|
||
|
|
"valid_targets_mean": 5396.2,
|
||
|
|
"valid_targets_min": 1555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.676767676767677,
|
||
|
|
"grad_norm": 0.5103650573018135,
|
||
|
|
"learning_rate": 3.1060743964062194e-05,
|
||
|
|
"loss": 0.2493,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23580007255077362,
|
||
|
|
"step": 1590,
|
||
|
|
"valid_targets_mean": 6230.6,
|
||
|
|
"valid_targets_min": 4426
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.685185185185185,
|
||
|
|
"grad_norm": 0.4756410298275338,
|
||
|
|
"learning_rate": 3.099069913611977e-05,
|
||
|
|
"loss": 0.2682,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28566116094589233,
|
||
|
|
"step": 1595,
|
||
|
|
"valid_targets_mean": 5506.6,
|
||
|
|
"valid_targets_min": 775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6936026936026938,
|
||
|
|
"grad_norm": 0.4745602293201921,
|
||
|
|
"learning_rate": 3.0920460640622464e-05,
|
||
|
|
"loss": 0.2891,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2973249554634094,
|
||
|
|
"step": 1600,
|
||
|
|
"valid_targets_mean": 6110.0,
|
||
|
|
"valid_targets_min": 3333
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.702020202020202,
|
||
|
|
"grad_norm": 0.43970970369378504,
|
||
|
|
"learning_rate": 3.085002971524564e-05,
|
||
|
|
"loss": 0.2725,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26657000184059143,
|
||
|
|
"step": 1605,
|
||
|
|
"valid_targets_mean": 6084.0,
|
||
|
|
"valid_targets_min": 5257
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.71043771043771,
|
||
|
|
"grad_norm": 0.4464368757498652,
|
||
|
|
"learning_rate": 3.077940760105551e-05,
|
||
|
|
"loss": 0.2553,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22137847542762756,
|
||
|
|
"step": 1610,
|
||
|
|
"valid_targets_mean": 6026.0,
|
||
|
|
"valid_targets_min": 3654
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.718855218855219,
|
||
|
|
"grad_norm": 0.4996610996027433,
|
||
|
|
"learning_rate": 3.070859554248719e-05,
|
||
|
|
"loss": 0.2641,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26007986068725586,
|
||
|
|
"step": 1615,
|
||
|
|
"valid_targets_mean": 4993.4,
|
||
|
|
"valid_targets_min": 1273
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7272727272727275,
|
||
|
|
"grad_norm": 0.4658302368436588,
|
||
|
|
"learning_rate": 3.063759478732284e-05,
|
||
|
|
"loss": 0.2611,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25886788964271545,
|
||
|
|
"step": 1620,
|
||
|
|
"valid_targets_mean": 5908.8,
|
||
|
|
"valid_targets_min": 1880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7356902356902357,
|
||
|
|
"grad_norm": 0.4881933303892004,
|
||
|
|
"learning_rate": 3.056640658666965e-05,
|
||
|
|
"loss": 0.2724,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2800159454345703,
|
||
|
|
"step": 1625,
|
||
|
|
"valid_targets_mean": 5548.6,
|
||
|
|
"valid_targets_min": 3026
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.744107744107744,
|
||
|
|
"grad_norm": 0.4622627848229377,
|
||
|
|
"learning_rate": 3.0495032194937785e-05,
|
||
|
|
"loss": 0.2787,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29500848054885864,
|
||
|
|
"step": 1630,
|
||
|
|
"valid_targets_mean": 6286.5,
|
||
|
|
"valid_targets_min": 1868
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7525252525252526,
|
||
|
|
"grad_norm": 0.5224900072775827,
|
||
|
|
"learning_rate": 3.042347286981829e-05,
|
||
|
|
"loss": 0.2638,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26520195603370667,
|
||
|
|
"step": 1635,
|
||
|
|
"valid_targets_mean": 5687.2,
|
||
|
|
"valid_targets_min": 767
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.760942760942761,
|
||
|
|
"grad_norm": 0.48341273547659325,
|
||
|
|
"learning_rate": 3.0351729872260935e-05,
|
||
|
|
"loss": 0.2699,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26769256591796875,
|
||
|
|
"step": 1640,
|
||
|
|
"valid_targets_mean": 5853.3,
|
||
|
|
"valid_targets_min": 963
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7693602693602695,
|
||
|
|
"grad_norm": 0.4759754441664444,
|
||
|
|
"learning_rate": 3.0279804466451996e-05,
|
||
|
|
"loss": 0.2709,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27780115604400635,
|
||
|
|
"step": 1645,
|
||
|
|
"valid_targets_mean": 5861.6,
|
||
|
|
"valid_targets_min": 2244
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7777777777777777,
|
||
|
|
"grad_norm": 0.4934818313537275,
|
||
|
|
"learning_rate": 3.0207697919791953e-05,
|
||
|
|
"loss": 0.2583,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2537994384765625,
|
||
|
|
"step": 1650,
|
||
|
|
"valid_targets_mean": 5367.6,
|
||
|
|
"valid_targets_min": 2169
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7861952861952863,
|
||
|
|
"grad_norm": 0.4802875983555916,
|
||
|
|
"learning_rate": 3.0135411502873188e-05,
|
||
|
|
"loss": 0.2731,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2672436535358429,
|
||
|
|
"step": 1655,
|
||
|
|
"valid_targets_mean": 5591.4,
|
||
|
|
"valid_targets_min": 2148
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7946127946127945,
|
||
|
|
"grad_norm": 0.4919537763945096,
|
||
|
|
"learning_rate": 3.0062946489457588e-05,
|
||
|
|
"loss": 0.2683,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2736820578575134,
|
||
|
|
"step": 1660,
|
||
|
|
"valid_targets_mean": 5776.6,
|
||
|
|
"valid_targets_min": 2095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8030303030303028,
|
||
|
|
"grad_norm": 0.4874244346144825,
|
||
|
|
"learning_rate": 2.9990304156454088e-05,
|
||
|
|
"loss": 0.2665,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24358776211738586,
|
||
|
|
"step": 1665,
|
||
|
|
"valid_targets_mean": 5696.8,
|
||
|
|
"valid_targets_min": 2170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8114478114478114,
|
||
|
|
"grad_norm": 0.4736894046023768,
|
||
|
|
"learning_rate": 2.9917485783896184e-05,
|
||
|
|
"loss": 0.278,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28212714195251465,
|
||
|
|
"step": 1670,
|
||
|
|
"valid_targets_mean": 5973.2,
|
||
|
|
"valid_targets_min": 804
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.81986531986532,
|
||
|
|
"grad_norm": 0.5180701591667134,
|
||
|
|
"learning_rate": 2.984449265491937e-05,
|
||
|
|
"loss": 0.2755,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2965032160282135,
|
||
|
|
"step": 1675,
|
||
|
|
"valid_targets_mean": 5538.2,
|
||
|
|
"valid_targets_min": 642
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8282828282828283,
|
||
|
|
"grad_norm": 0.48472099517020967,
|
||
|
|
"learning_rate": 2.9771326055738524e-05,
|
||
|
|
"loss": 0.2609,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.257454514503479,
|
||
|
|
"step": 1680,
|
||
|
|
"valid_targets_mean": 5996.2,
|
||
|
|
"valid_targets_min": 4286
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8367003367003365,
|
||
|
|
"grad_norm": 0.4708230007906033,
|
||
|
|
"learning_rate": 2.969798727562526e-05,
|
||
|
|
"loss": 0.2622,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27739977836608887,
|
||
|
|
"step": 1685,
|
||
|
|
"valid_targets_mean": 5786.6,
|
||
|
|
"valid_targets_min": 2952
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.845117845117845,
|
||
|
|
"grad_norm": 0.49972614462585313,
|
||
|
|
"learning_rate": 2.9624477606885196e-05,
|
||
|
|
"loss": 0.2673,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2510569989681244,
|
||
|
|
"step": 1690,
|
||
|
|
"valid_targets_mean": 5390.0,
|
||
|
|
"valid_targets_min": 728
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8535353535353534,
|
||
|
|
"grad_norm": 0.4488084533581549,
|
||
|
|
"learning_rate": 2.9550798344835176e-05,
|
||
|
|
"loss": 0.2701,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25537407398223877,
|
||
|
|
"step": 1695,
|
||
|
|
"valid_targets_mean": 6486.0,
|
||
|
|
"valid_targets_min": 4394
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.861952861952862,
|
||
|
|
"grad_norm": 0.44624289950556423,
|
||
|
|
"learning_rate": 2.947695078778047e-05,
|
||
|
|
"loss": 0.2644,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24400271475315094,
|
||
|
|
"step": 1700,
|
||
|
|
"valid_targets_mean": 5361.6,
|
||
|
|
"valid_targets_min": 1273
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8703703703703702,
|
||
|
|
"grad_norm": 0.49323369912437803,
|
||
|
|
"learning_rate": 2.940293623699187e-05,
|
||
|
|
"loss": 0.2659,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2662852108478546,
|
||
|
|
"step": 1705,
|
||
|
|
"valid_targets_mean": 5423.9,
|
||
|
|
"valid_targets_min": 1761
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.878787878787879,
|
||
|
|
"grad_norm": 0.461964707436428,
|
||
|
|
"learning_rate": 2.9328755996682784e-05,
|
||
|
|
"loss": 0.2649,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2640298008918762,
|
||
|
|
"step": 1710,
|
||
|
|
"valid_targets_mean": 5557.8,
|
||
|
|
"valid_targets_min": 1847
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.887205387205387,
|
||
|
|
"grad_norm": 0.4842785667166425,
|
||
|
|
"learning_rate": 2.9254411373986218e-05,
|
||
|
|
"loss": 0.2625,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2701316475868225,
|
||
|
|
"step": 1715,
|
||
|
|
"valid_targets_mean": 5645.8,
|
||
|
|
"valid_targets_min": 2087
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8956228956228958,
|
||
|
|
"grad_norm": 0.4400726168230355,
|
||
|
|
"learning_rate": 2.9179903678931798e-05,
|
||
|
|
"loss": 0.2659,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2720245122909546,
|
||
|
|
"step": 1720,
|
||
|
|
"valid_targets_mean": 6178.3,
|
||
|
|
"valid_targets_min": 2283
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.904040404040404,
|
||
|
|
"grad_norm": 0.5039310956644923,
|
||
|
|
"learning_rate": 2.910523422442262e-05,
|
||
|
|
"loss": 0.259,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2551863193511963,
|
||
|
|
"step": 1725,
|
||
|
|
"valid_targets_mean": 5084.2,
|
||
|
|
"valid_targets_min": 572
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9124579124579126,
|
||
|
|
"grad_norm": 0.48130526767640697,
|
||
|
|
"learning_rate": 2.903040432621218e-05,
|
||
|
|
"loss": 0.2712,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2813180088996887,
|
||
|
|
"step": 1730,
|
||
|
|
"valid_targets_mean": 5492.9,
|
||
|
|
"valid_targets_min": 642
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.920875420875421,
|
||
|
|
"grad_norm": 0.4450566158621727,
|
||
|
|
"learning_rate": 2.895541530288115e-05,
|
||
|
|
"loss": 0.2694,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24626556038856506,
|
||
|
|
"step": 1735,
|
||
|
|
"valid_targets_mean": 5779.6,
|
||
|
|
"valid_targets_min": 833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.929292929292929,
|
||
|
|
"grad_norm": 0.4628884880600571,
|
||
|
|
"learning_rate": 2.8880268475814132e-05,
|
||
|
|
"loss": 0.2646,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2738983929157257,
|
||
|
|
"step": 1740,
|
||
|
|
"valid_targets_mean": 5713.9,
|
||
|
|
"valid_targets_min": 2152
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9377104377104377,
|
||
|
|
"grad_norm": 0.47146875329522886,
|
||
|
|
"learning_rate": 2.880496516917642e-05,
|
||
|
|
"loss": 0.2618,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25897499918937683,
|
||
|
|
"step": 1745,
|
||
|
|
"valid_targets_mean": 5837.1,
|
||
|
|
"valid_targets_min": 1954
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9461279461279464,
|
||
|
|
"grad_norm": 0.44589205425484196,
|
||
|
|
"learning_rate": 2.8729506709890645e-05,
|
||
|
|
"loss": 0.2714,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26478633284568787,
|
||
|
|
"step": 1750,
|
||
|
|
"valid_targets_mean": 6132.0,
|
||
|
|
"valid_targets_min": 3592
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9545454545454546,
|
||
|
|
"grad_norm": 0.45812178416316796,
|
||
|
|
"learning_rate": 2.865389442761336e-05,
|
||
|
|
"loss": 0.2654,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26409947872161865,
|
||
|
|
"step": 1755,
|
||
|
|
"valid_targets_mean": 5849.6,
|
||
|
|
"valid_targets_min": 3131
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.962962962962963,
|
||
|
|
"grad_norm": 0.4801446094642877,
|
||
|
|
"learning_rate": 2.857812965471166e-05,
|
||
|
|
"loss": 0.2649,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2876226603984833,
|
||
|
|
"step": 1760,
|
||
|
|
"valid_targets_mean": 5856.8,
|
||
|
|
"valid_targets_min": 804
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9713804713804715,
|
||
|
|
"grad_norm": 0.48367462710997594,
|
||
|
|
"learning_rate": 2.8502213726239678e-05,
|
||
|
|
"loss": 0.2654,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26591169834136963,
|
||
|
|
"step": 1765,
|
||
|
|
"valid_targets_mean": 5456.8,
|
||
|
|
"valid_targets_min": 2276
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9797979797979797,
|
||
|
|
"grad_norm": 0.4506297725367813,
|
||
|
|
"learning_rate": 2.8426147979915067e-05,
|
||
|
|
"loss": 0.2663,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24496473371982574,
|
||
|
|
"step": 1770,
|
||
|
|
"valid_targets_mean": 5691.6,
|
||
|
|
"valid_targets_min": 3015
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9882154882154883,
|
||
|
|
"grad_norm": 0.46527654760090237,
|
||
|
|
"learning_rate": 2.8349933756095427e-05,
|
||
|
|
"loss": 0.2668,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26362547278404236,
|
||
|
|
"step": 1775,
|
||
|
|
"valid_targets_mean": 6209.3,
|
||
|
|
"valid_targets_min": 4577
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9966329966329965,
|
||
|
|
"grad_norm": 0.44774308378539096,
|
||
|
|
"learning_rate": 2.827357239775468e-05,
|
||
|
|
"loss": 0.2622,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.265911340713501,
|
||
|
|
"step": 1780,
|
||
|
|
"valid_targets_mean": 5725.1,
|
||
|
|
"valid_targets_min": 2060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.005050505050505,
|
||
|
|
"grad_norm": 0.4228834306946797,
|
||
|
|
"learning_rate": 2.8197065250459422e-05,
|
||
|
|
"loss": 0.2486,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24547988176345825,
|
||
|
|
"step": 1785,
|
||
|
|
"valid_targets_mean": 5958.1,
|
||
|
|
"valid_targets_min": 2375
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0134680134680134,
|
||
|
|
"grad_norm": 0.5040780268885077,
|
||
|
|
"learning_rate": 2.81204136623452e-05,
|
||
|
|
"loss": 0.2534,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26460325717926025,
|
||
|
|
"step": 1790,
|
||
|
|
"valid_targets_mean": 5342.4,
|
||
|
|
"valid_targets_min": 2552
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.021885521885522,
|
||
|
|
"grad_norm": 0.44455255554024603,
|
||
|
|
"learning_rate": 2.804361898409274e-05,
|
||
|
|
"loss": 0.2603,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.250008225440979,
|
||
|
|
"step": 1795,
|
||
|
|
"valid_targets_mean": 6092.6,
|
||
|
|
"valid_targets_min": 3132
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0303030303030303,
|
||
|
|
"grad_norm": 0.4888540010472703,
|
||
|
|
"learning_rate": 2.796668256890419e-05,
|
||
|
|
"loss": 0.2601,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.255595326423645,
|
||
|
|
"step": 1800,
|
||
|
|
"valid_targets_mean": 5412.1,
|
||
|
|
"valid_targets_min": 599
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.038720538720539,
|
||
|
|
"grad_norm": 0.456272332197701,
|
||
|
|
"learning_rate": 2.7889605772479233e-05,
|
||
|
|
"loss": 0.2565,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26009058952331543,
|
||
|
|
"step": 1805,
|
||
|
|
"valid_targets_mean": 5873.6,
|
||
|
|
"valid_targets_min": 3667
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.047138047138047,
|
||
|
|
"grad_norm": 0.47526162068172445,
|
||
|
|
"learning_rate": 2.7812389952991234e-05,
|
||
|
|
"loss": 0.2651,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2717510163784027,
|
||
|
|
"step": 1810,
|
||
|
|
"valid_targets_mean": 5074.5,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0555555555555554,
|
||
|
|
"grad_norm": 0.46603159599538324,
|
||
|
|
"learning_rate": 2.7735036471063265e-05,
|
||
|
|
"loss": 0.258,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24621015787124634,
|
||
|
|
"step": 1815,
|
||
|
|
"valid_targets_mean": 5842.6,
|
||
|
|
"valid_targets_min": 4595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.063973063973064,
|
||
|
|
"grad_norm": 0.4562445024563088,
|
||
|
|
"learning_rate": 2.7657546689744167e-05,
|
||
|
|
"loss": 0.2554,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23575446009635925,
|
||
|
|
"step": 1820,
|
||
|
|
"valid_targets_mean": 5539.0,
|
||
|
|
"valid_targets_min": 877
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0723905723905722,
|
||
|
|
"grad_norm": 0.43940550371469234,
|
||
|
|
"learning_rate": 2.7579921974484532e-05,
|
||
|
|
"loss": 0.2521,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2583876848220825,
|
||
|
|
"step": 1825,
|
||
|
|
"valid_targets_mean": 6058.7,
|
||
|
|
"valid_targets_min": 3841
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.080808080808081,
|
||
|
|
"grad_norm": 0.4757427901719824,
|
||
|
|
"learning_rate": 2.7502163693112595e-05,
|
||
|
|
"loss": 0.2637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.287031352519989,
|
||
|
|
"step": 1830,
|
||
|
|
"valid_targets_mean": 5939.2,
|
||
|
|
"valid_targets_min": 1761
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.089225589225589,
|
||
|
|
"grad_norm": 0.47089901333182543,
|
||
|
|
"learning_rate": 2.7424273215810192e-05,
|
||
|
|
"loss": 0.2602,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25683337450027466,
|
||
|
|
"step": 1835,
|
||
|
|
"valid_targets_mean": 6200.4,
|
||
|
|
"valid_targets_min": 4841
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0976430976430978,
|
||
|
|
"grad_norm": 0.47079150260275704,
|
||
|
|
"learning_rate": 2.7346251915088574e-05,
|
||
|
|
"loss": 0.2744,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2823716402053833,
|
||
|
|
"step": 1840,
|
||
|
|
"valid_targets_mean": 5534.8,
|
||
|
|
"valid_targets_min": 3315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.106060606060606,
|
||
|
|
"grad_norm": 0.41384136717630904,
|
||
|
|
"learning_rate": 2.7268101165764256e-05,
|
||
|
|
"loss": 0.2567,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24914711713790894,
|
||
|
|
"step": 1845,
|
||
|
|
"valid_targets_mean": 5942.4,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1144781144781146,
|
||
|
|
"grad_norm": 0.47799928718479734,
|
||
|
|
"learning_rate": 2.7189822344934746e-05,
|
||
|
|
"loss": 0.2545,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2740170359611511,
|
||
|
|
"step": 1850,
|
||
|
|
"valid_targets_mean": 5577.8,
|
||
|
|
"valid_targets_min": 754
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.122895622895623,
|
||
|
|
"grad_norm": 0.4393256529310863,
|
||
|
|
"learning_rate": 2.7111416831954324e-05,
|
||
|
|
"loss": 0.2745,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2606737017631531,
|
||
|
|
"step": 1855,
|
||
|
|
"valid_targets_mean": 6356.8,
|
||
|
|
"valid_targets_min": 4689
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1313131313131315,
|
||
|
|
"grad_norm": 0.49227894096611585,
|
||
|
|
"learning_rate": 2.7032886008409707e-05,
|
||
|
|
"loss": 0.2697,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2675955891609192,
|
||
|
|
"step": 1860,
|
||
|
|
"valid_targets_mean": 5334.9,
|
||
|
|
"valid_targets_min": 3591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1397306397306397,
|
||
|
|
"grad_norm": 0.44300019232744026,
|
||
|
|
"learning_rate": 2.6954231258095728e-05,
|
||
|
|
"loss": 0.2522,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24864640831947327,
|
||
|
|
"step": 1865,
|
||
|
|
"valid_targets_mean": 6035.8,
|
||
|
|
"valid_targets_min": 5344
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.148148148148148,
|
||
|
|
"grad_norm": 0.48096371062581594,
|
||
|
|
"learning_rate": 2.6875453966990924e-05,
|
||
|
|
"loss": 0.256,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24091598391532898,
|
||
|
|
"step": 1870,
|
||
|
|
"valid_targets_mean": 5171.2,
|
||
|
|
"valid_targets_min": 896
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1565656565656566,
|
||
|
|
"grad_norm": 0.4485606822042903,
|
||
|
|
"learning_rate": 2.679655552323313e-05,
|
||
|
|
"loss": 0.2631,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2668924927711487,
|
||
|
|
"step": 1875,
|
||
|
|
"valid_targets_mean": 6351.5,
|
||
|
|
"valid_targets_min": 3570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.164983164983165,
|
||
|
|
"grad_norm": 0.47490151861785507,
|
||
|
|
"learning_rate": 2.671753731709503e-05,
|
||
|
|
"loss": 0.269,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2609248161315918,
|
||
|
|
"step": 1880,
|
||
|
|
"valid_targets_mean": 6502.5,
|
||
|
|
"valid_targets_min": 4606
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1734006734006734,
|
||
|
|
"grad_norm": 0.48491249046654883,
|
||
|
|
"learning_rate": 2.663840074095963e-05,
|
||
|
|
"loss": 0.25,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2430686056613922,
|
||
|
|
"step": 1885,
|
||
|
|
"valid_targets_mean": 5501.4,
|
||
|
|
"valid_targets_min": 834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1818181818181817,
|
||
|
|
"grad_norm": 0.48579484347272883,
|
||
|
|
"learning_rate": 2.655914718929575e-05,
|
||
|
|
"loss": 0.2585,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24881702661514282,
|
||
|
|
"step": 1890,
|
||
|
|
"valid_targets_mean": 5342.8,
|
||
|
|
"valid_targets_min": 3266
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1902356902356903,
|
||
|
|
"grad_norm": 0.46339440739579857,
|
||
|
|
"learning_rate": 2.6479778058633426e-05,
|
||
|
|
"loss": 0.2598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2772354185581207,
|
||
|
|
"step": 1895,
|
||
|
|
"valid_targets_mean": 6097.9,
|
||
|
|
"valid_targets_min": 4994
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1986531986531985,
|
||
|
|
"grad_norm": 0.47252401948247413,
|
||
|
|
"learning_rate": 2.6400294747539338e-05,
|
||
|
|
"loss": 0.2666,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2552820146083832,
|
||
|
|
"step": 1900,
|
||
|
|
"valid_targets_mean": 6086.3,
|
||
|
|
"valid_targets_min": 3383
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.207070707070707,
|
||
|
|
"grad_norm": 0.4733757827268643,
|
||
|
|
"learning_rate": 2.6320698656592126e-05,
|
||
|
|
"loss": 0.2608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2829735279083252,
|
||
|
|
"step": 1905,
|
||
|
|
"valid_targets_mean": 6193.1,
|
||
|
|
"valid_targets_min": 4736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2154882154882154,
|
||
|
|
"grad_norm": 0.4545516302082259,
|
||
|
|
"learning_rate": 2.6240991188357736e-05,
|
||
|
|
"loss": 0.2672,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2541789412498474,
|
||
|
|
"step": 1910,
|
||
|
|
"valid_targets_mean": 5517.2,
|
||
|
|
"valid_targets_min": 765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.223905723905724,
|
||
|
|
"grad_norm": 0.477035335388407,
|
||
|
|
"learning_rate": 2.6161173747364694e-05,
|
||
|
|
"loss": 0.265,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29022878408432007,
|
||
|
|
"step": 1915,
|
||
|
|
"valid_targets_mean": 6209.6,
|
||
|
|
"valid_targets_min": 1807
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2323232323232323,
|
||
|
|
"grad_norm": 0.4721123241784799,
|
||
|
|
"learning_rate": 2.6081247740079363e-05,
|
||
|
|
"loss": 0.2637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27373164892196655,
|
||
|
|
"step": 1920,
|
||
|
|
"valid_targets_mean": 5637.5,
|
||
|
|
"valid_targets_min": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.240740740740741,
|
||
|
|
"grad_norm": 0.49909640283340984,
|
||
|
|
"learning_rate": 2.600121457488116e-05,
|
||
|
|
"loss": 0.2623,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26537537574768066,
|
||
|
|
"step": 1925,
|
||
|
|
"valid_targets_mean": 4531.2,
|
||
|
|
"valid_targets_min": 753
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.249158249158249,
|
||
|
|
"grad_norm": 0.4508067303088388,
|
||
|
|
"learning_rate": 2.5921075662037726e-05,
|
||
|
|
"loss": 0.2575,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2458057701587677,
|
||
|
|
"step": 1930,
|
||
|
|
"valid_targets_mean": 5773.4,
|
||
|
|
"valid_targets_min": 2719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.257575757575758,
|
||
|
|
"grad_norm": 0.4986299908186157,
|
||
|
|
"learning_rate": 2.5840832413680096e-05,
|
||
|
|
"loss": 0.2603,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.255989670753479,
|
||
|
|
"step": 1935,
|
||
|
|
"valid_targets_mean": 5769.0,
|
||
|
|
"valid_targets_min": 973
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.265993265993266,
|
||
|
|
"grad_norm": 0.4517947133476855,
|
||
|
|
"learning_rate": 2.5760486243777797e-05,
|
||
|
|
"loss": 0.2545,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27278995513916016,
|
||
|
|
"step": 1940,
|
||
|
|
"valid_targets_mean": 6153.6,
|
||
|
|
"valid_targets_min": 954
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.274410774410774,
|
||
|
|
"grad_norm": 0.45871371182609605,
|
||
|
|
"learning_rate": 2.5680038568113944e-05,
|
||
|
|
"loss": 0.2514,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2577679753303528,
|
||
|
|
"step": 1945,
|
||
|
|
"valid_targets_mean": 5734.0,
|
||
|
|
"valid_targets_min": 3637
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.282828282828283,
|
||
|
|
"grad_norm": 0.4618326850988252,
|
||
|
|
"learning_rate": 2.5599490804260305e-05,
|
||
|
|
"loss": 0.2694,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27027878165245056,
|
||
|
|
"step": 1950,
|
||
|
|
"valid_targets_mean": 6097.2,
|
||
|
|
"valid_targets_min": 2811
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.291245791245791,
|
||
|
|
"grad_norm": 0.49755407687551717,
|
||
|
|
"learning_rate": 2.551884437155228e-05,
|
||
|
|
"loss": 0.263,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2811235785484314,
|
||
|
|
"step": 1955,
|
||
|
|
"valid_targets_mean": 5254.4,
|
||
|
|
"valid_targets_min": 789
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2996632996632997,
|
||
|
|
"grad_norm": 0.457846159639027,
|
||
|
|
"learning_rate": 2.5438100691063922e-05,
|
||
|
|
"loss": 0.2685,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24545416235923767,
|
||
|
|
"step": 1960,
|
||
|
|
"valid_targets_mean": 6070.6,
|
||
|
|
"valid_targets_min": 1828
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.308080808080808,
|
||
|
|
"grad_norm": 0.4758750298348739,
|
||
|
|
"learning_rate": 2.5357261185582905e-05,
|
||
|
|
"loss": 0.2637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27742427587509155,
|
||
|
|
"step": 1965,
|
||
|
|
"valid_targets_mean": 5808.8,
|
||
|
|
"valid_targets_min": 1796
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3164983164983166,
|
||
|
|
"grad_norm": 0.46197179559249013,
|
||
|
|
"learning_rate": 2.527632727958543e-05,
|
||
|
|
"loss": 0.266,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2675663232803345,
|
||
|
|
"step": 1970,
|
||
|
|
"valid_targets_mean": 6366.4,
|
||
|
|
"valid_targets_min": 5123
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.324915824915825,
|
||
|
|
"grad_norm": 0.4867263212000984,
|
||
|
|
"learning_rate": 2.5195300399211137e-05,
|
||
|
|
"loss": 0.2626,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2639455199241638,
|
||
|
|
"step": 1975,
|
||
|
|
"valid_targets_mean": 4910.4,
|
||
|
|
"valid_targets_min": 634
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3333333333333335,
|
||
|
|
"grad_norm": 0.4284195435461982,
|
||
|
|
"learning_rate": 2.511418197223796e-05,
|
||
|
|
"loss": 0.2619,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24187898635864258,
|
||
|
|
"step": 1980,
|
||
|
|
"valid_targets_mean": 5845.8,
|
||
|
|
"valid_targets_min": 923
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3417508417508417,
|
||
|
|
"grad_norm": 0.4135358228574883,
|
||
|
|
"learning_rate": 2.5032973428057e-05,
|
||
|
|
"loss": 0.2594,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2515963912010193,
|
||
|
|
"step": 1985,
|
||
|
|
"valid_targets_mean": 6830.1,
|
||
|
|
"valid_targets_min": 5004
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3501683501683504,
|
||
|
|
"grad_norm": 0.4377565753960238,
|
||
|
|
"learning_rate": 2.49516761976473e-05,
|
||
|
|
"loss": 0.2451,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23978541791439056,
|
||
|
|
"step": 1990,
|
||
|
|
"valid_targets_mean": 6151.1,
|
||
|
|
"valid_targets_min": 5254
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3585858585858586,
|
||
|
|
"grad_norm": 0.4993668860572999,
|
||
|
|
"learning_rate": 2.4870291713550648e-05,
|
||
|
|
"loss": 0.261,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27316054701805115,
|
||
|
|
"step": 1995,
|
||
|
|
"valid_targets_mean": 5725.8,
|
||
|
|
"valid_targets_min": 2261
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3670033670033668,
|
||
|
|
"grad_norm": 0.46810011894975945,
|
||
|
|
"learning_rate": 2.4788821409846334e-05,
|
||
|
|
"loss": 0.2529,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24408294260501862,
|
||
|
|
"step": 2000,
|
||
|
|
"valid_targets_mean": 5991.2,
|
||
|
|
"valid_targets_min": 3469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3754208754208754,
|
||
|
|
"grad_norm": 0.44354178389049237,
|
||
|
|
"learning_rate": 2.4707266722125888e-05,
|
||
|
|
"loss": 0.2604,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24701344966888428,
|
||
|
|
"step": 2005,
|
||
|
|
"valid_targets_mean": 5817.7,
|
||
|
|
"valid_targets_min": 3108
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3838383838383836,
|
||
|
|
"grad_norm": 0.4616401687442844,
|
||
|
|
"learning_rate": 2.4625629087467776e-05,
|
||
|
|
"loss": 0.2597,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26236820220947266,
|
||
|
|
"step": 2010,
|
||
|
|
"valid_targets_mean": 5806.8,
|
||
|
|
"valid_targets_min": 2318
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3922558922558923,
|
||
|
|
"grad_norm": 0.46918202560215155,
|
||
|
|
"learning_rate": 2.4543909944412048e-05,
|
||
|
|
"loss": 0.2614,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2738293409347534,
|
||
|
|
"step": 2015,
|
||
|
|
"valid_targets_mean": 6169.3,
|
||
|
|
"valid_targets_min": 3881
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4006734006734005,
|
||
|
|
"grad_norm": 0.45482866759373874,
|
||
|
|
"learning_rate": 2.4462110732935043e-05,
|
||
|
|
"loss": 0.2547,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25623035430908203,
|
||
|
|
"step": 2020,
|
||
|
|
"valid_targets_mean": 5538.8,
|
||
|
|
"valid_targets_min": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.409090909090909,
|
||
|
|
"grad_norm": 0.4410889962382104,
|
||
|
|
"learning_rate": 2.438023289442399e-05,
|
||
|
|
"loss": 0.2637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26775211095809937,
|
||
|
|
"step": 2025,
|
||
|
|
"valid_targets_mean": 6071.1,
|
||
|
|
"valid_targets_min": 2483
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4175084175084174,
|
||
|
|
"grad_norm": 0.45428199396855873,
|
||
|
|
"learning_rate": 2.4298277871651585e-05,
|
||
|
|
"loss": 0.2606,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25729498267173767,
|
||
|
|
"step": 2030,
|
||
|
|
"valid_targets_mean": 6182.1,
|
||
|
|
"valid_targets_min": 688
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.425925925925926,
|
||
|
|
"grad_norm": 0.4801004820149534,
|
||
|
|
"learning_rate": 2.4216247108750613e-05,
|
||
|
|
"loss": 0.259,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24965059757232666,
|
||
|
|
"step": 2035,
|
||
|
|
"valid_targets_mean": 5397.3,
|
||
|
|
"valid_targets_min": 775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4343434343434343,
|
||
|
|
"grad_norm": 0.4373441581395516,
|
||
|
|
"learning_rate": 2.413414205118847e-05,
|
||
|
|
"loss": 0.2644,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2545247972011566,
|
||
|
|
"step": 2040,
|
||
|
|
"valid_targets_mean": 6557.8,
|
||
|
|
"valid_targets_min": 4539
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.442760942760943,
|
||
|
|
"grad_norm": 0.47614455354626983,
|
||
|
|
"learning_rate": 2.405196414574171e-05,
|
||
|
|
"loss": 0.2604,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2595536708831787,
|
||
|
|
"step": 2045,
|
||
|
|
"valid_targets_mean": 6224.1,
|
||
|
|
"valid_targets_min": 4957
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.451178451178451,
|
||
|
|
"grad_norm": 0.4427047407389265,
|
||
|
|
"learning_rate": 2.3969714840470526e-05,
|
||
|
|
"loss": 0.2577,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25375697016716003,
|
||
|
|
"step": 2050,
|
||
|
|
"valid_targets_mean": 6163.6,
|
||
|
|
"valid_targets_min": 4911
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.45959595959596,
|
||
|
|
"grad_norm": 0.4350812895960367,
|
||
|
|
"learning_rate": 2.3887395584693258e-05,
|
||
|
|
"loss": 0.253,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22944723069667816,
|
||
|
|
"step": 2055,
|
||
|
|
"valid_targets_mean": 5471.1,
|
||
|
|
"valid_targets_min": 1468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.468013468013468,
|
||
|
|
"grad_norm": 0.44240321353041623,
|
||
|
|
"learning_rate": 2.3805007828960855e-05,
|
||
|
|
"loss": 0.2485,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2613484263420105,
|
||
|
|
"step": 2060,
|
||
|
|
"valid_targets_mean": 6239.8,
|
||
|
|
"valid_targets_min": 5408
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4764309764309766,
|
||
|
|
"grad_norm": 0.44476788322817057,
|
||
|
|
"learning_rate": 2.3722553025031304e-05,
|
||
|
|
"loss": 0.2557,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2447347342967987,
|
||
|
|
"step": 2065,
|
||
|
|
"valid_targets_mean": 5525.6,
|
||
|
|
"valid_targets_min": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.484848484848485,
|
||
|
|
"grad_norm": 0.47337002524786304,
|
||
|
|
"learning_rate": 2.364003262584403e-05,
|
||
|
|
"loss": 0.2539,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26840806007385254,
|
||
|
|
"step": 2070,
|
||
|
|
"valid_targets_mean": 6076.1,
|
||
|
|
"valid_targets_min": 3742
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.493265993265993,
|
||
|
|
"grad_norm": 0.46884526171525137,
|
||
|
|
"learning_rate": 2.3557448085494343e-05,
|
||
|
|
"loss": 0.2585,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2716432511806488,
|
||
|
|
"step": 2075,
|
||
|
|
"valid_targets_mean": 5966.1,
|
||
|
|
"valid_targets_min": 4596
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5016835016835017,
|
||
|
|
"grad_norm": 0.4782651673999994,
|
||
|
|
"learning_rate": 2.347480085920778e-05,
|
||
|
|
"loss": 0.2537,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24839454889297485,
|
||
|
|
"step": 2080,
|
||
|
|
"valid_targets_mean": 5711.4,
|
||
|
|
"valid_targets_min": 2042
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.51010101010101,
|
||
|
|
"grad_norm": 0.47733783992450113,
|
||
|
|
"learning_rate": 2.3392092403314447e-05,
|
||
|
|
"loss": 0.2618,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2837388515472412,
|
||
|
|
"step": 2085,
|
||
|
|
"valid_targets_mean": 6067.6,
|
||
|
|
"valid_targets_min": 2048
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5185185185185186,
|
||
|
|
"grad_norm": 0.4782687051339394,
|
||
|
|
"learning_rate": 2.3309324175223415e-05,
|
||
|
|
"loss": 0.2542,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2605840563774109,
|
||
|
|
"step": 2090,
|
||
|
|
"valid_targets_mean": 5370.0,
|
||
|
|
"valid_targets_min": 799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.526936026936027,
|
||
|
|
"grad_norm": 0.4872076274161844,
|
||
|
|
"learning_rate": 2.3226497633396978e-05,
|
||
|
|
"loss": 0.258,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26101091504096985,
|
||
|
|
"step": 2095,
|
||
|
|
"valid_targets_mean": 5702.2,
|
||
|
|
"valid_targets_min": 2030
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5353535353535355,
|
||
|
|
"grad_norm": 0.47862375556228315,
|
||
|
|
"learning_rate": 2.3143614237324986e-05,
|
||
|
|
"loss": 0.2538,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24673721194267273,
|
||
|
|
"step": 2100,
|
||
|
|
"valid_targets_mean": 5217.2,
|
||
|
|
"valid_targets_min": 882
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5437710437710437,
|
||
|
|
"grad_norm": 0.46837329902348335,
|
||
|
|
"learning_rate": 2.3060675447499116e-05,
|
||
|
|
"loss": 0.2566,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2669448256492615,
|
||
|
|
"step": 2105,
|
||
|
|
"valid_targets_mean": 5955.2,
|
||
|
|
"valid_targets_min": 3111
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5521885521885523,
|
||
|
|
"grad_norm": 0.5013173665418452,
|
||
|
|
"learning_rate": 2.2977682725387154e-05,
|
||
|
|
"loss": 0.2634,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26355600357055664,
|
||
|
|
"step": 2110,
|
||
|
|
"valid_targets_mean": 5380.4,
|
||
|
|
"valid_targets_min": 634
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5606060606060606,
|
||
|
|
"grad_norm": 0.4850228761732901,
|
||
|
|
"learning_rate": 2.2894637533407212e-05,
|
||
|
|
"loss": 0.2596,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2803172469139099,
|
||
|
|
"step": 2115,
|
||
|
|
"valid_targets_mean": 5126.5,
|
||
|
|
"valid_targets_min": 828
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.569023569023569,
|
||
|
|
"grad_norm": 0.523494145167435,
|
||
|
|
"learning_rate": 2.2811541334901993e-05,
|
||
|
|
"loss": 0.2635,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2550443410873413,
|
||
|
|
"step": 2120,
|
||
|
|
"valid_targets_mean": 5063.9,
|
||
|
|
"valid_targets_min": 1239
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5774410774410774,
|
||
|
|
"grad_norm": 0.4942692493742798,
|
||
|
|
"learning_rate": 2.2728395594112965e-05,
|
||
|
|
"loss": 0.2469,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2536007761955261,
|
||
|
|
"step": 2125,
|
||
|
|
"valid_targets_mean": 5600.1,
|
||
|
|
"valid_targets_min": 1971
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5858585858585856,
|
||
|
|
"grad_norm": 0.4616263962222559,
|
||
|
|
"learning_rate": 2.264520177615461e-05,
|
||
|
|
"loss": 0.2542,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2446945160627365,
|
||
|
|
"step": 2130,
|
||
|
|
"valid_targets_mean": 5717.0,
|
||
|
|
"valid_targets_min": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5942760942760943,
|
||
|
|
"grad_norm": 0.47379121647220346,
|
||
|
|
"learning_rate": 2.2561961346988553e-05,
|
||
|
|
"loss": 0.2607,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2528979778289795,
|
||
|
|
"step": 2135,
|
||
|
|
"valid_targets_mean": 5486.9,
|
||
|
|
"valid_targets_min": 1782
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.602693602693603,
|
||
|
|
"grad_norm": 0.4624142451949401,
|
||
|
|
"learning_rate": 2.2478675773397775e-05,
|
||
|
|
"loss": 0.2639,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.255023330450058,
|
||
|
|
"step": 2140,
|
||
|
|
"valid_targets_mean": 6134.9,
|
||
|
|
"valid_targets_min": 2221
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.611111111111111,
|
||
|
|
"grad_norm": 0.4935821545925253,
|
||
|
|
"learning_rate": 2.2395346522960742e-05,
|
||
|
|
"loss": 0.2539,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2875485420227051,
|
||
|
|
"step": 2145,
|
||
|
|
"valid_targets_mean": 5438.0,
|
||
|
|
"valid_targets_min": 1249
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6195286195286194,
|
||
|
|
"grad_norm": 0.4709964419857675,
|
||
|
|
"learning_rate": 2.231197506402556e-05,
|
||
|
|
"loss": 0.2523,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2643469572067261,
|
||
|
|
"step": 2150,
|
||
|
|
"valid_targets_mean": 5791.2,
|
||
|
|
"valid_targets_min": 916
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.627946127946128,
|
||
|
|
"grad_norm": 0.44196735316646596,
|
||
|
|
"learning_rate": 2.222856286568408e-05,
|
||
|
|
"loss": 0.2611,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.262684166431427,
|
||
|
|
"step": 2155,
|
||
|
|
"valid_targets_mean": 6122.8,
|
||
|
|
"valid_targets_min": 4714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6363636363636362,
|
||
|
|
"grad_norm": 0.43497103101851703,
|
||
|
|
"learning_rate": 2.2145111397746027e-05,
|
||
|
|
"loss": 0.2607,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26448798179626465,
|
||
|
|
"step": 2160,
|
||
|
|
"valid_targets_mean": 6194.3,
|
||
|
|
"valid_targets_min": 2164
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.644781144781145,
|
||
|
|
"grad_norm": 0.4655861221362141,
|
||
|
|
"learning_rate": 2.2061622130713097e-05,
|
||
|
|
"loss": 0.2559,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26554441452026367,
|
||
|
|
"step": 2165,
|
||
|
|
"valid_targets_mean": 5773.9,
|
||
|
|
"valid_targets_min": 702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.653198653198653,
|
||
|
|
"grad_norm": 0.44414860807418716,
|
||
|
|
"learning_rate": 2.197809653575306e-05,
|
||
|
|
"loss": 0.2446,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.247327983379364,
|
||
|
|
"step": 2170,
|
||
|
|
"valid_targets_mean": 5814.2,
|
||
|
|
"valid_targets_min": 2019
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6616161616161618,
|
||
|
|
"grad_norm": 0.4752089032694026,
|
||
|
|
"learning_rate": 2.18945360846738e-05,
|
||
|
|
"loss": 0.2513,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26908397674560547,
|
||
|
|
"step": 2175,
|
||
|
|
"valid_targets_mean": 6000.1,
|
||
|
|
"valid_targets_min": 1468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.67003367003367,
|
||
|
|
"grad_norm": 0.4721150110395281,
|
||
|
|
"learning_rate": 2.1810942249897416e-05,
|
||
|
|
"loss": 0.2505,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2538966238498688,
|
||
|
|
"step": 2180,
|
||
|
|
"valid_targets_mean": 5865.1,
|
||
|
|
"valid_targets_min": 3149
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.678451178451178,
|
||
|
|
"grad_norm": 0.4513682828405357,
|
||
|
|
"learning_rate": 2.172731650443425e-05,
|
||
|
|
"loss": 0.2483,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2598488926887512,
|
||
|
|
"step": 2185,
|
||
|
|
"valid_targets_mean": 5767.9,
|
||
|
|
"valid_targets_min": 2936
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.686868686868687,
|
||
|
|
"grad_norm": 0.4840032550986964,
|
||
|
|
"learning_rate": 2.1643660321856973e-05,
|
||
|
|
"loss": 0.2659,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2745859622955322,
|
||
|
|
"step": 2190,
|
||
|
|
"valid_targets_mean": 5705.9,
|
||
|
|
"valid_targets_min": 728
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6952861952861955,
|
||
|
|
"grad_norm": 0.4658741775575672,
|
||
|
|
"learning_rate": 2.1559975176274545e-05,
|
||
|
|
"loss": 0.2594,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24418890476226807,
|
||
|
|
"step": 2195,
|
||
|
|
"valid_targets_mean": 5492.0,
|
||
|
|
"valid_targets_min": 677
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7037037037037037,
|
||
|
|
"grad_norm": 0.4825279150558414,
|
||
|
|
"learning_rate": 2.147626254230631e-05,
|
||
|
|
"loss": 0.2695,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28623268008232117,
|
||
|
|
"step": 2200,
|
||
|
|
"valid_targets_mean": 5611.0,
|
||
|
|
"valid_targets_min": 627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.712121212121212,
|
||
|
|
"grad_norm": 0.47492010284265773,
|
||
|
|
"learning_rate": 2.1392523895056e-05,
|
||
|
|
"loss": 0.2508,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2459857016801834,
|
||
|
|
"step": 2205,
|
||
|
|
"valid_targets_mean": 6181.9,
|
||
|
|
"valid_targets_min": 2042
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7205387205387206,
|
||
|
|
"grad_norm": 0.4330257446398321,
|
||
|
|
"learning_rate": 2.1308760710085706e-05,
|
||
|
|
"loss": 0.2605,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23315058648586273,
|
||
|
|
"step": 2210,
|
||
|
|
"valid_targets_mean": 6284.3,
|
||
|
|
"valid_targets_min": 3800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.728956228956229,
|
||
|
|
"grad_norm": 0.4590373305942424,
|
||
|
|
"learning_rate": 2.1224974463389892e-05,
|
||
|
|
"loss": 0.2519,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24978694319725037,
|
||
|
|
"step": 2215,
|
||
|
|
"valid_targets_mean": 6140.1,
|
||
|
|
"valid_targets_min": 3554
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7373737373737375,
|
||
|
|
"grad_norm": 0.48529430064128043,
|
||
|
|
"learning_rate": 2.1141166631369418e-05,
|
||
|
|
"loss": 0.2479,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24444152414798737,
|
||
|
|
"step": 2220,
|
||
|
|
"valid_targets_mean": 4900.2,
|
||
|
|
"valid_targets_min": 834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7457912457912457,
|
||
|
|
"grad_norm": 0.48562007061040907,
|
||
|
|
"learning_rate": 2.1057338690805485e-05,
|
||
|
|
"loss": 0.2666,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2763746678829193,
|
||
|
|
"step": 2225,
|
||
|
|
"valid_targets_mean": 5667.4,
|
||
|
|
"valid_targets_min": 2152
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7542087542087543,
|
||
|
|
"grad_norm": 0.45153067684291887,
|
||
|
|
"learning_rate": 2.0973492118833633e-05,
|
||
|
|
"loss": 0.2574,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2593878209590912,
|
||
|
|
"step": 2230,
|
||
|
|
"valid_targets_mean": 5811.3,
|
||
|
|
"valid_targets_min": 2868
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7626262626262625,
|
||
|
|
"grad_norm": 0.46947043053986687,
|
||
|
|
"learning_rate": 2.08896283929177e-05,
|
||
|
|
"loss": 0.2617,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2528648376464844,
|
||
|
|
"step": 2235,
|
||
|
|
"valid_targets_mean": 5667.5,
|
||
|
|
"valid_targets_min": 577
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.771043771043771,
|
||
|
|
"grad_norm": 0.443062772560876,
|
||
|
|
"learning_rate": 2.0805748990823808e-05,
|
||
|
|
"loss": 0.2467,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23090139031410217,
|
||
|
|
"step": 2240,
|
||
|
|
"valid_targets_mean": 5894.3,
|
||
|
|
"valid_targets_min": 4097
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7794612794612794,
|
||
|
|
"grad_norm": 0.46928488809946345,
|
||
|
|
"learning_rate": 2.0721855390594294e-05,
|
||
|
|
"loss": 0.2524,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23324617743492126,
|
||
|
|
"step": 2245,
|
||
|
|
"valid_targets_mean": 6142.8,
|
||
|
|
"valid_targets_min": 3662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.787878787878788,
|
||
|
|
"grad_norm": 0.4713367550706847,
|
||
|
|
"learning_rate": 2.0637949070521688e-05,
|
||
|
|
"loss": 0.2596,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2696024775505066,
|
||
|
|
"step": 2250,
|
||
|
|
"valid_targets_mean": 6141.5,
|
||
|
|
"valid_targets_min": 2133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7962962962962963,
|
||
|
|
"grad_norm": 0.4392650932413215,
|
||
|
|
"learning_rate": 2.0554031509122656e-05,
|
||
|
|
"loss": 0.2445,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2330874651670456,
|
||
|
|
"step": 2255,
|
||
|
|
"valid_targets_mean": 5963.0,
|
||
|
|
"valid_targets_min": 3554
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8047138047138045,
|
||
|
|
"grad_norm": 0.5082714907464522,
|
||
|
|
"learning_rate": 2.0470104185111946e-05,
|
||
|
|
"loss": 0.2619,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24720750749111176,
|
||
|
|
"step": 2260,
|
||
|
|
"valid_targets_mean": 5415.8,
|
||
|
|
"valid_targets_min": 1997
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.813131313131313,
|
||
|
|
"grad_norm": 0.4335382849960993,
|
||
|
|
"learning_rate": 2.0386168577376346e-05,
|
||
|
|
"loss": 0.26,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2274424135684967,
|
||
|
|
"step": 2265,
|
||
|
|
"valid_targets_mean": 5680.9,
|
||
|
|
"valid_targets_min": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.821548821548822,
|
||
|
|
"grad_norm": 0.46601152865749573,
|
||
|
|
"learning_rate": 2.0302226164948592e-05,
|
||
|
|
"loss": 0.2604,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2622186541557312,
|
||
|
|
"step": 2270,
|
||
|
|
"valid_targets_mean": 5564.1,
|
||
|
|
"valid_targets_min": 1668
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.82996632996633,
|
||
|
|
"grad_norm": 0.4340781482822856,
|
||
|
|
"learning_rate": 2.0218278426981332e-05,
|
||
|
|
"loss": 0.2473,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23421743512153625,
|
||
|
|
"step": 2275,
|
||
|
|
"valid_targets_mean": 5899.7,
|
||
|
|
"valid_targets_min": 738
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8383838383838382,
|
||
|
|
"grad_norm": 0.4734907148873857,
|
||
|
|
"learning_rate": 2.013432684272107e-05,
|
||
|
|
"loss": 0.253,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2441825568675995,
|
||
|
|
"step": 2280,
|
||
|
|
"valid_targets_mean": 5232.0,
|
||
|
|
"valid_targets_min": 572
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.846801346801347,
|
||
|
|
"grad_norm": 0.5215053718016555,
|
||
|
|
"learning_rate": 2.0050372891482065e-05,
|
||
|
|
"loss": 0.2548,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26573556661605835,
|
||
|
|
"step": 2285,
|
||
|
|
"valid_targets_mean": 5406.9,
|
||
|
|
"valid_targets_min": 877
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.855218855218855,
|
||
|
|
"grad_norm": 0.43539608646753364,
|
||
|
|
"learning_rate": 1.99664180526203e-05,
|
||
|
|
"loss": 0.253,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2478955239057541,
|
||
|
|
"step": 2290,
|
||
|
|
"valid_targets_mean": 6055.0,
|
||
|
|
"valid_targets_min": 3749
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8636363636363638,
|
||
|
|
"grad_norm": 0.44650430172333616,
|
||
|
|
"learning_rate": 1.988246380550739e-05,
|
||
|
|
"loss": 0.2451,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23736608028411865,
|
||
|
|
"step": 2295,
|
||
|
|
"valid_targets_mean": 5916.8,
|
||
|
|
"valid_targets_min": 766
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.872053872053872,
|
||
|
|
"grad_norm": 0.4753554150730085,
|
||
|
|
"learning_rate": 1.9798511629504526e-05,
|
||
|
|
"loss": 0.2566,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26632919907569885,
|
||
|
|
"step": 2300,
|
||
|
|
"valid_targets_mean": 5875.3,
|
||
|
|
"valid_targets_min": 2644
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8804713804713806,
|
||
|
|
"grad_norm": 0.663332930647706,
|
||
|
|
"learning_rate": 1.9714563003936414e-05,
|
||
|
|
"loss": 0.2632,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28136366605758667,
|
||
|
|
"step": 2305,
|
||
|
|
"valid_targets_mean": 5596.1,
|
||
|
|
"valid_targets_min": 851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.888888888888889,
|
||
|
|
"grad_norm": 0.4370023219399334,
|
||
|
|
"learning_rate": 1.9630619408065166e-05,
|
||
|
|
"loss": 0.2532,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23587965965270996,
|
||
|
|
"step": 2310,
|
||
|
|
"valid_targets_mean": 5893.2,
|
||
|
|
"valid_targets_min": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.897306397306397,
|
||
|
|
"grad_norm": 0.46508686514621095,
|
||
|
|
"learning_rate": 1.9546682321064296e-05,
|
||
|
|
"loss": 0.2474,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2453293353319168,
|
||
|
|
"step": 2315,
|
||
|
|
"valid_targets_mean": 5837.5,
|
||
|
|
"valid_targets_min": 2215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9057239057239057,
|
||
|
|
"grad_norm": 0.4982192014247366,
|
||
|
|
"learning_rate": 1.9462753221992625e-05,
|
||
|
|
"loss": 0.2487,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24440611898899078,
|
||
|
|
"step": 2320,
|
||
|
|
"valid_targets_mean": 5267.2,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9141414141414144,
|
||
|
|
"grad_norm": 0.4805922369728175,
|
||
|
|
"learning_rate": 1.9378833589768196e-05,
|
||
|
|
"loss": 0.2552,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24339064955711365,
|
||
|
|
"step": 2325,
|
||
|
|
"valid_targets_mean": 5242.2,
|
||
|
|
"valid_targets_min": 2388
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9225589225589226,
|
||
|
|
"grad_norm": 0.451836362923565,
|
||
|
|
"learning_rate": 1.9294924903142252e-05,
|
||
|
|
"loss": 0.2684,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2769237458705902,
|
||
|
|
"step": 2330,
|
||
|
|
"valid_targets_mean": 6302.6,
|
||
|
|
"valid_targets_min": 2157
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.930976430976431,
|
||
|
|
"grad_norm": 0.4735168791034422,
|
||
|
|
"learning_rate": 1.9211028640673173e-05,
|
||
|
|
"loss": 0.252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25596874952316284,
|
||
|
|
"step": 2335,
|
||
|
|
"valid_targets_mean": 5781.4,
|
||
|
|
"valid_targets_min": 1809
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9393939393939394,
|
||
|
|
"grad_norm": 0.49285584030359986,
|
||
|
|
"learning_rate": 1.9127146280700384e-05,
|
||
|
|
"loss": 0.251,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2413105070590973,
|
||
|
|
"step": 2340,
|
||
|
|
"valid_targets_mean": 5354.1,
|
||
|
|
"valid_targets_min": 580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9478114478114477,
|
||
|
|
"grad_norm": 0.45921191146130463,
|
||
|
|
"learning_rate": 1.9043279301318365e-05,
|
||
|
|
"loss": 0.2615,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2670944333076477,
|
||
|
|
"step": 2345,
|
||
|
|
"valid_targets_mean": 5593.6,
|
||
|
|
"valid_targets_min": 736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9562289562289563,
|
||
|
|
"grad_norm": 0.46211290571675284,
|
||
|
|
"learning_rate": 1.895942918035054e-05,
|
||
|
|
"loss": 0.2517,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24511289596557617,
|
||
|
|
"step": 2350,
|
||
|
|
"valid_targets_mean": 5811.6,
|
||
|
|
"valid_targets_min": 2208
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9646464646464645,
|
||
|
|
"grad_norm": 0.4794002903772811,
|
||
|
|
"learning_rate": 1.88755973953233e-05,
|
||
|
|
"loss": 0.2466,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2600766122341156,
|
||
|
|
"step": 2355,
|
||
|
|
"valid_targets_mean": 5570.6,
|
||
|
|
"valid_targets_min": 3574
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.973063973063973,
|
||
|
|
"grad_norm": 0.4611418225959471,
|
||
|
|
"learning_rate": 1.8791785423439926e-05,
|
||
|
|
"loss": 0.2487,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2635408043861389,
|
||
|
|
"step": 2360,
|
||
|
|
"valid_targets_mean": 5771.7,
|
||
|
|
"valid_targets_min": 753
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9814814814814814,
|
||
|
|
"grad_norm": 0.4364240728827987,
|
||
|
|
"learning_rate": 1.8707994741554565e-05,
|
||
|
|
"loss": 0.2497,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24071215093135834,
|
||
|
|
"step": 2365,
|
||
|
|
"valid_targets_mean": 6022.0,
|
||
|
|
"valid_targets_min": 2766
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.98989898989899,
|
||
|
|
"grad_norm": 0.4868677738278521,
|
||
|
|
"learning_rate": 1.8624226826146228e-05,
|
||
|
|
"loss": 0.2606,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2623073160648346,
|
||
|
|
"step": 2370,
|
||
|
|
"valid_targets_mean": 5395.6,
|
||
|
|
"valid_targets_min": 2961
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9983164983164983,
|
||
|
|
"grad_norm": 0.43425493407176013,
|
||
|
|
"learning_rate": 1.8540483153292736e-05,
|
||
|
|
"loss": 0.2448,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24014857411384583,
|
||
|
|
"step": 2375,
|
||
|
|
"valid_targets_mean": 6008.5,
|
||
|
|
"valid_targets_min": 4815
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.006734006734007,
|
||
|
|
"grad_norm": 0.47405743239962633,
|
||
|
|
"learning_rate": 1.8456765198644747e-05,
|
||
|
|
"loss": 0.2477,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2570481300354004,
|
||
|
|
"step": 2380,
|
||
|
|
"valid_targets_mean": 5911.1,
|
||
|
|
"valid_targets_min": 5134
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.015151515151516,
|
||
|
|
"grad_norm": 0.4869860772722204,
|
||
|
|
"learning_rate": 1.8373074437399744e-05,
|
||
|
|
"loss": 0.2455,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23833802342414856,
|
||
|
|
"step": 2385,
|
||
|
|
"valid_targets_mean": 5891.4,
|
||
|
|
"valid_targets_min": 3355
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.023569023569023,
|
||
|
|
"grad_norm": 0.4846976942136745,
|
||
|
|
"learning_rate": 1.8289412344276004e-05,
|
||
|
|
"loss": 0.2534,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2526353597640991,
|
||
|
|
"step": 2390,
|
||
|
|
"valid_targets_mean": 5712.9,
|
||
|
|
"valid_targets_min": 1880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.031986531986532,
|
||
|
|
"grad_norm": 0.47446175331935553,
|
||
|
|
"learning_rate": 1.8205780393486675e-05,
|
||
|
|
"loss": 0.2351,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24016064405441284,
|
||
|
|
"step": 2395,
|
||
|
|
"valid_targets_mean": 6065.6,
|
||
|
|
"valid_targets_min": 3815
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.040404040404041,
|
||
|
|
"grad_norm": 0.5119722118605593,
|
||
|
|
"learning_rate": 1.8122180058713738e-05,
|
||
|
|
"loss": 0.2477,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2695065438747406,
|
||
|
|
"step": 2400,
|
||
|
|
"valid_targets_mean": 5231.4,
|
||
|
|
"valid_targets_min": 2031
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.048821548821548,
|
||
|
|
"grad_norm": 0.4738469599038624,
|
||
|
|
"learning_rate": 1.8038612813082084e-05,
|
||
|
|
"loss": 0.2388,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23778414726257324,
|
||
|
|
"step": 2405,
|
||
|
|
"valid_targets_mean": 5650.5,
|
||
|
|
"valid_targets_min": 777
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.057239057239057,
|
||
|
|
"grad_norm": 0.46284060268484284,
|
||
|
|
"learning_rate": 1.7955080129133543e-05,
|
||
|
|
"loss": 0.2403,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24119344353675842,
|
||
|
|
"step": 2410,
|
||
|
|
"valid_targets_mean": 6040.1,
|
||
|
|
"valid_targets_min": 3473
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.065656565656566,
|
||
|
|
"grad_norm": 0.4970401933263894,
|
||
|
|
"learning_rate": 1.7871583478800915e-05,
|
||
|
|
"loss": 0.2361,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24557490646839142,
|
||
|
|
"step": 2415,
|
||
|
|
"valid_targets_mean": 5264.2,
|
||
|
|
"valid_targets_min": 1960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.074074074074074,
|
||
|
|
"grad_norm": 0.45239648329446647,
|
||
|
|
"learning_rate": 1.7788124333382064e-05,
|
||
|
|
"loss": 0.2592,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2602667510509491,
|
||
|
|
"step": 2420,
|
||
|
|
"valid_targets_mean": 6043.5,
|
||
|
|
"valid_targets_min": 2042
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.082491582491582,
|
||
|
|
"grad_norm": 0.4456410655388535,
|
||
|
|
"learning_rate": 1.770470416351398e-05,
|
||
|
|
"loss": 0.248,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.234627828001976,
|
||
|
|
"step": 2425,
|
||
|
|
"valid_targets_mean": 5970.1,
|
||
|
|
"valid_targets_min": 2800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.090909090909091,
|
||
|
|
"grad_norm": 0.5192498075506568,
|
||
|
|
"learning_rate": 1.7621324439146843e-05,
|
||
|
|
"loss": 0.2493,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2598259150981903,
|
||
|
|
"step": 2430,
|
||
|
|
"valid_targets_mean": 5802.1,
|
||
|
|
"valid_targets_min": 1828
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.0993265993265995,
|
||
|
|
"grad_norm": 0.5023479734758095,
|
||
|
|
"learning_rate": 1.753798662951816e-05,
|
||
|
|
"loss": 0.2375,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23720863461494446,
|
||
|
|
"step": 2435,
|
||
|
|
"valid_targets_mean": 5042.8,
|
||
|
|
"valid_targets_min": 1555
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.107744107744108,
|
||
|
|
"grad_norm": 0.5293355450065484,
|
||
|
|
"learning_rate": 1.745469220312685e-05,
|
||
|
|
"loss": 0.25,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24346238374710083,
|
||
|
|
"step": 2440,
|
||
|
|
"valid_targets_mean": 6005.5,
|
||
|
|
"valid_targets_min": 3947
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.116161616161616,
|
||
|
|
"grad_norm": 0.46485453409872673,
|
||
|
|
"learning_rate": 1.7371442627707385e-05,
|
||
|
|
"loss": 0.2437,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24842385947704315,
|
||
|
|
"step": 2445,
|
||
|
|
"valid_targets_mean": 6210.0,
|
||
|
|
"valid_targets_min": 2189
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.124579124579125,
|
||
|
|
"grad_norm": 0.4401011838255257,
|
||
|
|
"learning_rate": 1.72882393702039e-05,
|
||
|
|
"loss": 0.2512,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2509979009628296,
|
||
|
|
"step": 2450,
|
||
|
|
"valid_targets_mean": 5898.5,
|
||
|
|
"valid_targets_min": 2221
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.132996632996633,
|
||
|
|
"grad_norm": 0.4977857748823478,
|
||
|
|
"learning_rate": 1.7205083896744365e-05,
|
||
|
|
"loss": 0.2403,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24902662634849548,
|
||
|
|
"step": 2455,
|
||
|
|
"valid_targets_mean": 5446.1,
|
||
|
|
"valid_targets_min": 2169
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.141414141414141,
|
||
|
|
"grad_norm": 0.4736888568431637,
|
||
|
|
"learning_rate": 1.712197767261476e-05,
|
||
|
|
"loss": 0.247,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24341030418872833,
|
||
|
|
"step": 2460,
|
||
|
|
"valid_targets_mean": 5585.1,
|
||
|
|
"valid_targets_min": 789
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.14983164983165,
|
||
|
|
"grad_norm": 0.47946329643831576,
|
||
|
|
"learning_rate": 1.7038922162233224e-05,
|
||
|
|
"loss": 0.2448,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23274879157543182,
|
||
|
|
"step": 2465,
|
||
|
|
"valid_targets_mean": 5161.8,
|
||
|
|
"valid_targets_min": 1937
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.158249158249158,
|
||
|
|
"grad_norm": 0.4776304140534067,
|
||
|
|
"learning_rate": 1.6955918829124252e-05,
|
||
|
|
"loss": 0.2443,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2704820930957794,
|
||
|
|
"step": 2470,
|
||
|
|
"valid_targets_mean": 6214.0,
|
||
|
|
"valid_targets_min": 2880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.166666666666667,
|
||
|
|
"grad_norm": 0.48354690048207816,
|
||
|
|
"learning_rate": 1.6872969135892963e-05,
|
||
|
|
"loss": 0.2396,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.261080801486969,
|
||
|
|
"step": 2475,
|
||
|
|
"valid_targets_mean": 5749.2,
|
||
|
|
"valid_targets_min": 2662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.175084175084175,
|
||
|
|
"grad_norm": 0.51929036494525,
|
||
|
|
"learning_rate": 1.6790074544199257e-05,
|
||
|
|
"loss": 0.2433,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2342805117368698,
|
||
|
|
"step": 2480,
|
||
|
|
"valid_targets_mean": 4718.4,
|
||
|
|
"valid_targets_min": 596
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.183501683501683,
|
||
|
|
"grad_norm": 0.4767487596980206,
|
||
|
|
"learning_rate": 1.670723651473209e-05,
|
||
|
|
"loss": 0.2416,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2481677085161209,
|
||
|
|
"step": 2485,
|
||
|
|
"valid_targets_mean": 6451.2,
|
||
|
|
"valid_targets_min": 3580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.191919191919192,
|
||
|
|
"grad_norm": 0.4908992003019385,
|
||
|
|
"learning_rate": 1.6624456507183735e-05,
|
||
|
|
"loss": 0.2504,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2497548609972,
|
||
|
|
"step": 2490,
|
||
|
|
"valid_targets_mean": 5495.7,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.200336700336701,
|
||
|
|
"grad_norm": 0.464864052610427,
|
||
|
|
"learning_rate": 1.654173598022407e-05,
|
||
|
|
"loss": 0.2471,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23745915293693542,
|
||
|
|
"step": 2495,
|
||
|
|
"valid_targets_mean": 5795.8,
|
||
|
|
"valid_targets_min": 744
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.2087542087542085,
|
||
|
|
"grad_norm": 0.5125678578793912,
|
||
|
|
"learning_rate": 1.645907639147484e-05,
|
||
|
|
"loss": 0.2468,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27014249563217163,
|
||
|
|
"step": 2500,
|
||
|
|
"valid_targets_mean": 5685.8,
|
||
|
|
"valid_targets_min": 778
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.217171717171717,
|
||
|
|
"grad_norm": 0.47049554426126494,
|
||
|
|
"learning_rate": 1.6376479197484028e-05,
|
||
|
|
"loss": 0.246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21898922324180603,
|
||
|
|
"step": 2505,
|
||
|
|
"valid_targets_mean": 5405.2,
|
||
|
|
"valid_targets_min": 1019
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.225589225589226,
|
||
|
|
"grad_norm": 0.47870546675653486,
|
||
|
|
"learning_rate": 1.629394585370011e-05,
|
||
|
|
"loss": 0.249,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25341683626174927,
|
||
|
|
"step": 2510,
|
||
|
|
"valid_targets_mean": 6033.2,
|
||
|
|
"valid_targets_min": 1807
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.2340067340067336,
|
||
|
|
"grad_norm": 0.46327108777649934,
|
||
|
|
"learning_rate": 1.6211477814446498e-05,
|
||
|
|
"loss": 0.238,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25074249505996704,
|
||
|
|
"step": 2515,
|
||
|
|
"valid_targets_mean": 5824.4,
|
||
|
|
"valid_targets_min": 2207
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.242424242424242,
|
||
|
|
"grad_norm": 0.46668716627675316,
|
||
|
|
"learning_rate": 1.6129076532895853e-05,
|
||
|
|
"loss": 0.2492,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23657959699630737,
|
||
|
|
"step": 2520,
|
||
|
|
"valid_targets_mean": 5824.7,
|
||
|
|
"valid_targets_min": 2845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.250841750841751,
|
||
|
|
"grad_norm": 0.5225102540153259,
|
||
|
|
"learning_rate": 1.604674346104449e-05,
|
||
|
|
"loss": 0.2492,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26022762060165405,
|
||
|
|
"step": 2525,
|
||
|
|
"valid_targets_mean": 4983.4,
|
||
|
|
"valid_targets_min": 809
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.2592592592592595,
|
||
|
|
"grad_norm": 0.47507759485734535,
|
||
|
|
"learning_rate": 1.596448004968681e-05,
|
||
|
|
"loss": 0.2442,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21946153044700623,
|
||
|
|
"step": 2530,
|
||
|
|
"valid_targets_mean": 5204.6,
|
||
|
|
"valid_targets_min": 806
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.267676767676767,
|
||
|
|
"grad_norm": 0.4691020718582385,
|
||
|
|
"learning_rate": 1.5882287748389716e-05,
|
||
|
|
"loss": 0.2472,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2251463383436203,
|
||
|
|
"step": 2535,
|
||
|
|
"valid_targets_mean": 5837.6,
|
||
|
|
"valid_targets_min": 3772
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.276094276094276,
|
||
|
|
"grad_norm": 0.508551212418667,
|
||
|
|
"learning_rate": 1.5800168005467083e-05,
|
||
|
|
"loss": 0.2493,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24717667698860168,
|
||
|
|
"step": 2540,
|
||
|
|
"valid_targets_mean": 5223.6,
|
||
|
|
"valid_targets_min": 1273
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.284511784511785,
|
||
|
|
"grad_norm": 0.49267867623234085,
|
||
|
|
"learning_rate": 1.5718122267954232e-05,
|
||
|
|
"loss": 0.2448,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2596457302570343,
|
||
|
|
"step": 2545,
|
||
|
|
"valid_targets_mean": 5383.6,
|
||
|
|
"valid_targets_min": 1619
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.292929292929293,
|
||
|
|
"grad_norm": 0.48887693296457063,
|
||
|
|
"learning_rate": 1.5636151981582417e-05,
|
||
|
|
"loss": 0.2552,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2524750232696533,
|
||
|
|
"step": 2550,
|
||
|
|
"valid_targets_mean": 5214.6,
|
||
|
|
"valid_targets_min": 2469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.301346801346801,
|
||
|
|
"grad_norm": 0.47058944318885165,
|
||
|
|
"learning_rate": 1.5554258590753388e-05,
|
||
|
|
"loss": 0.241,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24721063673496246,
|
||
|
|
"step": 2555,
|
||
|
|
"valid_targets_mean": 5628.5,
|
||
|
|
"valid_targets_min": 3056
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.30976430976431,
|
||
|
|
"grad_norm": 0.4695975900983712,
|
||
|
|
"learning_rate": 1.5472443538513894e-05,
|
||
|
|
"loss": 0.2441,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23654913902282715,
|
||
|
|
"step": 2560,
|
||
|
|
"valid_targets_mean": 5601.7,
|
||
|
|
"valid_targets_min": 648
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.318181818181818,
|
||
|
|
"grad_norm": 0.4853336295956766,
|
||
|
|
"learning_rate": 1.5390708266530288e-05,
|
||
|
|
"loss": 0.2493,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.257931113243103,
|
||
|
|
"step": 2565,
|
||
|
|
"valid_targets_mean": 5604.9,
|
||
|
|
"valid_targets_min": 706
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.326599326599327,
|
||
|
|
"grad_norm": 0.469469857279881,
|
||
|
|
"learning_rate": 1.530905421506312e-05,
|
||
|
|
"loss": 0.248,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25359490513801575,
|
||
|
|
"step": 2570,
|
||
|
|
"valid_targets_mean": 5639.9,
|
||
|
|
"valid_targets_min": 1609
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.335016835016835,
|
||
|
|
"grad_norm": 0.49705236539026776,
|
||
|
|
"learning_rate": 1.5227482822941725e-05,
|
||
|
|
"loss": 0.2528,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2658868134021759,
|
||
|
|
"step": 2575,
|
||
|
|
"valid_targets_mean": 5859.4,
|
||
|
|
"valid_targets_min": 702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.343434343434343,
|
||
|
|
"grad_norm": 0.5576981250686313,
|
||
|
|
"learning_rate": 1.5145995527538918e-05,
|
||
|
|
"loss": 0.2451,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2369997501373291,
|
||
|
|
"step": 2580,
|
||
|
|
"valid_targets_mean": 5897.0,
|
||
|
|
"valid_targets_min": 1842
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.351851851851852,
|
||
|
|
"grad_norm": 0.4688656894324779,
|
||
|
|
"learning_rate": 1.506459376474564e-05,
|
||
|
|
"loss": 0.2399,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24687042832374573,
|
||
|
|
"step": 2585,
|
||
|
|
"valid_targets_mean": 5883.9,
|
||
|
|
"valid_targets_min": 3493
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.36026936026936,
|
||
|
|
"grad_norm": 0.47447175952384196,
|
||
|
|
"learning_rate": 1.4983278968945638e-05,
|
||
|
|
"loss": 0.2439,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24589446187019348,
|
||
|
|
"step": 2590,
|
||
|
|
"valid_targets_mean": 6050.8,
|
||
|
|
"valid_targets_min": 3455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.3686868686868685,
|
||
|
|
"grad_norm": 0.4834177789093981,
|
||
|
|
"learning_rate": 1.4902052572990236e-05,
|
||
|
|
"loss": 0.2384,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2334013432264328,
|
||
|
|
"step": 2595,
|
||
|
|
"valid_targets_mean": 5976.0,
|
||
|
|
"valid_targets_min": 4277
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.377104377104377,
|
||
|
|
"grad_norm": 0.4725416804232908,
|
||
|
|
"learning_rate": 1.4820916008173039e-05,
|
||
|
|
"loss": 0.2518,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24257586896419525,
|
||
|
|
"step": 2600,
|
||
|
|
"valid_targets_mean": 6083.2,
|
||
|
|
"valid_targets_min": 5017
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.385521885521886,
|
||
|
|
"grad_norm": 0.4945988617073285,
|
||
|
|
"learning_rate": 1.4739870704204746e-05,
|
||
|
|
"loss": 0.2515,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2596173882484436,
|
||
|
|
"step": 2605,
|
||
|
|
"valid_targets_mean": 5484.8,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.393939393939394,
|
||
|
|
"grad_norm": 0.4455712778477704,
|
||
|
|
"learning_rate": 1.4658918089187952e-05,
|
||
|
|
"loss": 0.2387,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2324620634317398,
|
||
|
|
"step": 2610,
|
||
|
|
"valid_targets_mean": 6543.8,
|
||
|
|
"valid_targets_min": 5223
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.402356902356902,
|
||
|
|
"grad_norm": 0.4682972971703617,
|
||
|
|
"learning_rate": 1.4578059589591953e-05,
|
||
|
|
"loss": 0.2492,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24419406056404114,
|
||
|
|
"step": 2615,
|
||
|
|
"valid_targets_mean": 6016.2,
|
||
|
|
"valid_targets_min": 3048
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.410774410774411,
|
||
|
|
"grad_norm": 0.7488896871745445,
|
||
|
|
"learning_rate": 1.4497296630227658e-05,
|
||
|
|
"loss": 0.2416,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2361116111278534,
|
||
|
|
"step": 2620,
|
||
|
|
"valid_targets_mean": 4575.4,
|
||
|
|
"valid_targets_min": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.41919191919192,
|
||
|
|
"grad_norm": 0.49298367487554073,
|
||
|
|
"learning_rate": 1.4416630634222449e-05,
|
||
|
|
"loss": 0.2458,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2516939043998718,
|
||
|
|
"step": 2625,
|
||
|
|
"valid_targets_mean": 6001.4,
|
||
|
|
"valid_targets_min": 4759
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.427609427609427,
|
||
|
|
"grad_norm": 0.4773695575753377,
|
||
|
|
"learning_rate": 1.4336063022995102e-05,
|
||
|
|
"loss": 0.2378,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2478029727935791,
|
||
|
|
"step": 2630,
|
||
|
|
"valid_targets_mean": 6015.2,
|
||
|
|
"valid_targets_min": 1735
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.436026936026936,
|
||
|
|
"grad_norm": 0.4858496060742026,
|
||
|
|
"learning_rate": 1.4255595216230753e-05,
|
||
|
|
"loss": 0.2392,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2524571418762207,
|
||
|
|
"step": 2635,
|
||
|
|
"valid_targets_mean": 5901.1,
|
||
|
|
"valid_targets_min": 4608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.444444444444445,
|
||
|
|
"grad_norm": 0.46779566482936225,
|
||
|
|
"learning_rate": 1.4175228631855902e-05,
|
||
|
|
"loss": 0.257,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27151983976364136,
|
||
|
|
"step": 2640,
|
||
|
|
"valid_targets_mean": 6261.8,
|
||
|
|
"valid_targets_min": 2048
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.452861952861953,
|
||
|
|
"grad_norm": 0.5098921296763699,
|
||
|
|
"learning_rate": 1.4094964686013381e-05,
|
||
|
|
"loss": 0.2487,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2560115456581116,
|
||
|
|
"step": 2645,
|
||
|
|
"valid_targets_mean": 5527.9,
|
||
|
|
"valid_targets_min": 771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.461279461279461,
|
||
|
|
"grad_norm": 0.4963397756532333,
|
||
|
|
"learning_rate": 1.4014804793037434e-05,
|
||
|
|
"loss": 0.2407,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24187448620796204,
|
||
|
|
"step": 2650,
|
||
|
|
"valid_targets_mean": 5302.6,
|
||
|
|
"valid_targets_min": 791
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.46969696969697,
|
||
|
|
"grad_norm": 0.4720446760534927,
|
||
|
|
"learning_rate": 1.3934750365428783e-05,
|
||
|
|
"loss": 0.2402,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24175631999969482,
|
||
|
|
"step": 2655,
|
||
|
|
"valid_targets_mean": 6306.9,
|
||
|
|
"valid_targets_min": 3272
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.478114478114478,
|
||
|
|
"grad_norm": 0.4494062792812156,
|
||
|
|
"learning_rate": 1.3854802813829744e-05,
|
||
|
|
"loss": 0.2384,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23085647821426392,
|
||
|
|
"step": 2660,
|
||
|
|
"valid_targets_mean": 6597.3,
|
||
|
|
"valid_targets_min": 5296
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.486531986531986,
|
||
|
|
"grad_norm": 0.5505872162256524,
|
||
|
|
"learning_rate": 1.3774963546999364e-05,
|
||
|
|
"loss": 0.2387,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2597641348838806,
|
||
|
|
"step": 2665,
|
||
|
|
"valid_targets_mean": 4935.4,
|
||
|
|
"valid_targets_min": 728
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.494949494949495,
|
||
|
|
"grad_norm": 0.4547480815464649,
|
||
|
|
"learning_rate": 1.3695233971788592e-05,
|
||
|
|
"loss": 0.2373,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2259209305047989,
|
||
|
|
"step": 2670,
|
||
|
|
"valid_targets_mean": 5772.8,
|
||
|
|
"valid_targets_min": 1067
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5033670033670035,
|
||
|
|
"grad_norm": 0.49796826408398476,
|
||
|
|
"learning_rate": 1.3615615493115509e-05,
|
||
|
|
"loss": 0.237,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2491885870695114,
|
||
|
|
"step": 2675,
|
||
|
|
"valid_targets_mean": 5509.8,
|
||
|
|
"valid_targets_min": 2095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.511784511784512,
|
||
|
|
"grad_norm": 0.4675189339816609,
|
||
|
|
"learning_rate": 1.3536109513940556e-05,
|
||
|
|
"loss": 0.2588,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2503166198730469,
|
||
|
|
"step": 2680,
|
||
|
|
"valid_targets_mean": 6271.0,
|
||
|
|
"valid_targets_min": 3815
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.52020202020202,
|
||
|
|
"grad_norm": 0.4910407602321847,
|
||
|
|
"learning_rate": 1.3456717435241808e-05,
|
||
|
|
"loss": 0.2468,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2446977198123932,
|
||
|
|
"step": 2685,
|
||
|
|
"valid_targets_mean": 5576.9,
|
||
|
|
"valid_targets_min": 3428
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5286195286195285,
|
||
|
|
"grad_norm": 0.4834526315851509,
|
||
|
|
"learning_rate": 1.3377440655990307e-05,
|
||
|
|
"loss": 0.2604,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2552727460861206,
|
||
|
|
"step": 2690,
|
||
|
|
"valid_targets_mean": 5595.2,
|
||
|
|
"valid_targets_min": 1965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.537037037037037,
|
||
|
|
"grad_norm": 0.501007163784396,
|
||
|
|
"learning_rate": 1.3298280573125392e-05,
|
||
|
|
"loss": 0.2339,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22512459754943848,
|
||
|
|
"step": 2695,
|
||
|
|
"valid_targets_mean": 5356.6,
|
||
|
|
"valid_targets_min": 2819
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.545454545454545,
|
||
|
|
"grad_norm": 0.49703879208215895,
|
||
|
|
"learning_rate": 1.321923858153009e-05,
|
||
|
|
"loss": 0.2332,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24406014382839203,
|
||
|
|
"step": 2700,
|
||
|
|
"valid_targets_mean": 5361.9,
|
||
|
|
"valid_targets_min": 875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.553872053872054,
|
||
|
|
"grad_norm": 0.4660714609745113,
|
||
|
|
"learning_rate": 1.3140316074006545e-05,
|
||
|
|
"loss": 0.2392,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22664999961853027,
|
||
|
|
"step": 2705,
|
||
|
|
"valid_targets_mean": 5331.6,
|
||
|
|
"valid_targets_min": 2224
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.562289562289562,
|
||
|
|
"grad_norm": 0.49471186723664107,
|
||
|
|
"learning_rate": 1.3061514441251446e-05,
|
||
|
|
"loss": 0.2442,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2508906424045563,
|
||
|
|
"step": 2710,
|
||
|
|
"valid_targets_mean": 5700.0,
|
||
|
|
"valid_targets_min": 3778
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.570707070707071,
|
||
|
|
"grad_norm": 0.48226739181141126,
|
||
|
|
"learning_rate": 1.2982835071831565e-05,
|
||
|
|
"loss": 0.246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2557341456413269,
|
||
|
|
"step": 2715,
|
||
|
|
"valid_targets_mean": 6064.6,
|
||
|
|
"valid_targets_min": 3898
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.57912457912458,
|
||
|
|
"grad_norm": 0.5920571149767186,
|
||
|
|
"learning_rate": 1.2904279352159264e-05,
|
||
|
|
"loss": 0.232,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23203839361667633,
|
||
|
|
"step": 2720,
|
||
|
|
"valid_targets_mean": 5828.5,
|
||
|
|
"valid_targets_min": 3282
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.587542087542087,
|
||
|
|
"grad_norm": 0.5112076721317061,
|
||
|
|
"learning_rate": 1.2825848666468052e-05,
|
||
|
|
"loss": 0.2479,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26419830322265625,
|
||
|
|
"step": 2725,
|
||
|
|
"valid_targets_mean": 6003.7,
|
||
|
|
"valid_targets_min": 4170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.595959595959596,
|
||
|
|
"grad_norm": 0.5154589913874446,
|
||
|
|
"learning_rate": 1.274754439678823e-05,
|
||
|
|
"loss": 0.2438,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2602817416191101,
|
||
|
|
"step": 2730,
|
||
|
|
"valid_targets_mean": 5174.1,
|
||
|
|
"valid_targets_min": 1954
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.604377104377105,
|
||
|
|
"grad_norm": 0.4792667230253282,
|
||
|
|
"learning_rate": 1.2669367922922504e-05,
|
||
|
|
"loss": 0.2413,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24768483638763428,
|
||
|
|
"step": 2735,
|
||
|
|
"valid_targets_mean": 6099.4,
|
||
|
|
"valid_targets_min": 4942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.6127946127946124,
|
||
|
|
"grad_norm": 0.48102648961761757,
|
||
|
|
"learning_rate": 1.2591320622421686e-05,
|
||
|
|
"loss": 0.2411,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23812749981880188,
|
||
|
|
"step": 2740,
|
||
|
|
"valid_targets_mean": 5808.0,
|
||
|
|
"valid_targets_min": 2587
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.621212121212121,
|
||
|
|
"grad_norm": 0.48280565166454636,
|
||
|
|
"learning_rate": 1.2513403870560429e-05,
|
||
|
|
"loss": 0.2452,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2352055013179779,
|
||
|
|
"step": 2745,
|
||
|
|
"valid_targets_mean": 5633.4,
|
||
|
|
"valid_targets_min": 3291
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.62962962962963,
|
||
|
|
"grad_norm": 0.5394317361742055,
|
||
|
|
"learning_rate": 1.2435619040312963e-05,
|
||
|
|
"loss": 0.2328,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2418336272239685,
|
||
|
|
"step": 2750,
|
||
|
|
"valid_targets_mean": 6345.4,
|
||
|
|
"valid_targets_min": 4447
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.638047138047138,
|
||
|
|
"grad_norm": 0.5264541632883329,
|
||
|
|
"learning_rate": 1.2357967502328943e-05,
|
||
|
|
"loss": 0.2528,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2687990367412567,
|
||
|
|
"step": 2755,
|
||
|
|
"valid_targets_mean": 6275.9,
|
||
|
|
"valid_targets_min": 4262
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.646464646464646,
|
||
|
|
"grad_norm": 0.49314757440762375,
|
||
|
|
"learning_rate": 1.2280450624909257e-05,
|
||
|
|
"loss": 0.2513,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2628011107444763,
|
||
|
|
"step": 2760,
|
||
|
|
"valid_targets_mean": 5708.2,
|
||
|
|
"valid_targets_min": 806
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.654882154882155,
|
||
|
|
"grad_norm": 0.4595389422928486,
|
||
|
|
"learning_rate": 1.2203069773981953e-05,
|
||
|
|
"loss": 0.2432,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2331043779850006,
|
||
|
|
"step": 2765,
|
||
|
|
"valid_targets_mean": 6120.2,
|
||
|
|
"valid_targets_min": 2674
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.6632996632996635,
|
||
|
|
"grad_norm": 0.5303579086202546,
|
||
|
|
"learning_rate": 1.2125826313078144e-05,
|
||
|
|
"loss": 0.2473,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24351969361305237,
|
||
|
|
"step": 2770,
|
||
|
|
"valid_targets_mean": 4967.9,
|
||
|
|
"valid_targets_min": 789
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.671717171717171,
|
||
|
|
"grad_norm": 0.4757122640440963,
|
||
|
|
"learning_rate": 1.2048721603307971e-05,
|
||
|
|
"loss": 0.2283,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21351343393325806,
|
||
|
|
"step": 2775,
|
||
|
|
"valid_targets_mean": 5402.2,
|
||
|
|
"valid_targets_min": 2497
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.68013468013468,
|
||
|
|
"grad_norm": 0.49558312945900046,
|
||
|
|
"learning_rate": 1.1971757003336661e-05,
|
||
|
|
"loss": 0.244,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23598149418830872,
|
||
|
|
"step": 2780,
|
||
|
|
"valid_targets_mean": 5560.8,
|
||
|
|
"valid_targets_min": 3411
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.688552188552189,
|
||
|
|
"grad_norm": 0.5070715903913475,
|
||
|
|
"learning_rate": 1.1894933869360555e-05,
|
||
|
|
"loss": 0.2492,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2461165487766266,
|
||
|
|
"step": 2785,
|
||
|
|
"valid_targets_mean": 5727.7,
|
||
|
|
"valid_targets_min": 4203
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.696969696969697,
|
||
|
|
"grad_norm": 0.5205203781186487,
|
||
|
|
"learning_rate": 1.1818253555083192e-05,
|
||
|
|
"loss": 0.246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27181577682495117,
|
||
|
|
"step": 2790,
|
||
|
|
"valid_targets_mean": 5783.8,
|
||
|
|
"valid_targets_min": 1486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.705387205387205,
|
||
|
|
"grad_norm": 0.4816612147414662,
|
||
|
|
"learning_rate": 1.1741717411691509e-05,
|
||
|
|
"loss": 0.2421,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2371402531862259,
|
||
|
|
"step": 2795,
|
||
|
|
"valid_targets_mean": 5769.2,
|
||
|
|
"valid_targets_min": 1968
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.713804713804714,
|
||
|
|
"grad_norm": 0.47718296915439024,
|
||
|
|
"learning_rate": 1.1665326787831983e-05,
|
||
|
|
"loss": 0.2395,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22365979850292206,
|
||
|
|
"step": 2800,
|
||
|
|
"valid_targets_mean": 6007.6,
|
||
|
|
"valid_targets_min": 3942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.722222222222222,
|
||
|
|
"grad_norm": 0.4796220783634332,
|
||
|
|
"learning_rate": 1.1589083029586906e-05,
|
||
|
|
"loss": 0.2553,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23196230828762054,
|
||
|
|
"step": 2805,
|
||
|
|
"valid_targets_mean": 5937.4,
|
||
|
|
"valid_targets_min": 2363
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.730639730639731,
|
||
|
|
"grad_norm": 0.47684003132237357,
|
||
|
|
"learning_rate": 1.1512987480450612e-05,
|
||
|
|
"loss": 0.2471,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2604638934135437,
|
||
|
|
"step": 2810,
|
||
|
|
"valid_targets_mean": 5854.0,
|
||
|
|
"valid_targets_min": 2407
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.739057239057239,
|
||
|
|
"grad_norm": 0.4874748967420592,
|
||
|
|
"learning_rate": 1.1437041481305877e-05,
|
||
|
|
"loss": 0.2376,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24680189788341522,
|
||
|
|
"step": 2815,
|
||
|
|
"valid_targets_mean": 5404.6,
|
||
|
|
"valid_targets_min": 688
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.747474747474747,
|
||
|
|
"grad_norm": 0.48612021159408475,
|
||
|
|
"learning_rate": 1.1361246370400221e-05,
|
||
|
|
"loss": 0.2453,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24513094127178192,
|
||
|
|
"step": 2820,
|
||
|
|
"valid_targets_mean": 5797.8,
|
||
|
|
"valid_targets_min": 2659
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.755892255892256,
|
||
|
|
"grad_norm": 0.48208763672646515,
|
||
|
|
"learning_rate": 1.1285603483322362e-05,
|
||
|
|
"loss": 0.2505,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26105183362960815,
|
||
|
|
"step": 2825,
|
||
|
|
"valid_targets_mean": 5589.4,
|
||
|
|
"valid_targets_min": 572
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.764309764309765,
|
||
|
|
"grad_norm": 0.5015272529735295,
|
||
|
|
"learning_rate": 1.1210114152978683e-05,
|
||
|
|
"loss": 0.2345,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23148992657661438,
|
||
|
|
"step": 2830,
|
||
|
|
"valid_targets_mean": 5191.4,
|
||
|
|
"valid_targets_min": 1249
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7727272727272725,
|
||
|
|
"grad_norm": 0.47511244937891034,
|
||
|
|
"learning_rate": 1.1134779709569732e-05,
|
||
|
|
"loss": 0.2331,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22913163900375366,
|
||
|
|
"step": 2835,
|
||
|
|
"valid_targets_mean": 6066.2,
|
||
|
|
"valid_targets_min": 963
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.781144781144781,
|
||
|
|
"grad_norm": 0.47372641919213665,
|
||
|
|
"learning_rate": 1.1059601480566781e-05,
|
||
|
|
"loss": 0.2425,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24431616067886353,
|
||
|
|
"step": 2840,
|
||
|
|
"valid_targets_mean": 6332.1,
|
||
|
|
"valid_targets_min": 4314
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.78956228956229,
|
||
|
|
"grad_norm": 0.49151338626590396,
|
||
|
|
"learning_rate": 1.0984580790688463e-05,
|
||
|
|
"loss": 0.2415,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.257941335439682,
|
||
|
|
"step": 2845,
|
||
|
|
"valid_targets_mean": 5998.0,
|
||
|
|
"valid_targets_min": 4609
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.797979797979798,
|
||
|
|
"grad_norm": 0.4624557972804474,
|
||
|
|
"learning_rate": 1.0909718961877385e-05,
|
||
|
|
"loss": 0.235,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21516473591327667,
|
||
|
|
"step": 2850,
|
||
|
|
"valid_targets_mean": 6487.0,
|
||
|
|
"valid_targets_min": 4000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.806397306397306,
|
||
|
|
"grad_norm": 0.48375209307777617,
|
||
|
|
"learning_rate": 1.0835017313276874e-05,
|
||
|
|
"loss": 0.2323,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22575807571411133,
|
||
|
|
"step": 2855,
|
||
|
|
"valid_targets_mean": 5556.9,
|
||
|
|
"valid_targets_min": 807
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.814814814814815,
|
||
|
|
"grad_norm": 0.47388280379263475,
|
||
|
|
"learning_rate": 1.0760477161207707e-05,
|
||
|
|
"loss": 0.2341,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21567316353321075,
|
||
|
|
"step": 2860,
|
||
|
|
"valid_targets_mean": 5498.6,
|
||
|
|
"valid_targets_min": 737
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.8232323232323235,
|
||
|
|
"grad_norm": 0.45953983274977706,
|
||
|
|
"learning_rate": 1.068609981914492e-05,
|
||
|
|
"loss": 0.2513,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24462053179740906,
|
||
|
|
"step": 2865,
|
||
|
|
"valid_targets_mean": 5789.3,
|
||
|
|
"valid_targets_min": 1753
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.831649831649831,
|
||
|
|
"grad_norm": 0.6013089443836538,
|
||
|
|
"learning_rate": 1.0611886597694672e-05,
|
||
|
|
"loss": 0.2326,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22891020774841309,
|
||
|
|
"step": 2870,
|
||
|
|
"valid_targets_mean": 6236.3,
|
||
|
|
"valid_targets_min": 2824
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.84006734006734,
|
||
|
|
"grad_norm": 0.4765798783055637,
|
||
|
|
"learning_rate": 1.0537838804571145e-05,
|
||
|
|
"loss": 0.2391,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2332342267036438,
|
||
|
|
"step": 2875,
|
||
|
|
"valid_targets_mean": 5363.1,
|
||
|
|
"valid_targets_min": 1997
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.848484848484849,
|
||
|
|
"grad_norm": 0.4850073815980291,
|
||
|
|
"learning_rate": 1.0463957744573502e-05,
|
||
|
|
"loss": 0.2498,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23898157477378845,
|
||
|
|
"step": 2880,
|
||
|
|
"valid_targets_mean": 5507.9,
|
||
|
|
"valid_targets_min": 1004
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.856902356902357,
|
||
|
|
"grad_norm": 0.4732284576996267,
|
||
|
|
"learning_rate": 1.0390244719562901e-05,
|
||
|
|
"loss": 0.2385,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2569975256919861,
|
||
|
|
"step": 2885,
|
||
|
|
"valid_targets_mean": 6173.3,
|
||
|
|
"valid_targets_min": 2158
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.865319865319865,
|
||
|
|
"grad_norm": 0.5066251528378924,
|
||
|
|
"learning_rate": 1.031670102843954e-05,
|
||
|
|
"loss": 0.2455,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23153352737426758,
|
||
|
|
"step": 2890,
|
||
|
|
"valid_targets_mean": 5168.5,
|
||
|
|
"valid_targets_min": 786
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.873737373737374,
|
||
|
|
"grad_norm": 0.4927142942425226,
|
||
|
|
"learning_rate": 1.0243327967119772e-05,
|
||
|
|
"loss": 0.2522,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24447093904018402,
|
||
|
|
"step": 2895,
|
||
|
|
"valid_targets_mean": 5767.2,
|
||
|
|
"valid_targets_min": 3057
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.882154882154882,
|
||
|
|
"grad_norm": 0.47799262367577505,
|
||
|
|
"learning_rate": 1.0170126828513301e-05,
|
||
|
|
"loss": 0.239,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23044052720069885,
|
||
|
|
"step": 2900,
|
||
|
|
"valid_targets_mean": 5838.8,
|
||
|
|
"valid_targets_min": 2031
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.890572390572391,
|
||
|
|
"grad_norm": 0.47677839550243056,
|
||
|
|
"learning_rate": 1.0097098902500346e-05,
|
||
|
|
"loss": 0.2428,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23265433311462402,
|
||
|
|
"step": 2905,
|
||
|
|
"valid_targets_mean": 5649.4,
|
||
|
|
"valid_targets_min": 713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.898989898989899,
|
||
|
|
"grad_norm": 0.46809374636357964,
|
||
|
|
"learning_rate": 1.0024245475908956e-05,
|
||
|
|
"loss": 0.2405,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2440536618232727,
|
||
|
|
"step": 2910,
|
||
|
|
"valid_targets_mean": 6046.9,
|
||
|
|
"valid_targets_min": 4710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.907407407407407,
|
||
|
|
"grad_norm": 0.5025997220995649,
|
||
|
|
"learning_rate": 9.951567832492308e-06,
|
||
|
|
"loss": 0.2489,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2694641351699829,
|
||
|
|
"step": 2915,
|
||
|
|
"valid_targets_mean": 5369.6,
|
||
|
|
"valid_targets_min": 1971
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.915824915824916,
|
||
|
|
"grad_norm": 0.5128476244209108,
|
||
|
|
"learning_rate": 9.879067252906098e-06,
|
||
|
|
"loss": 0.2421,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2652130126953125,
|
||
|
|
"step": 2920,
|
||
|
|
"valid_targets_mean": 5546.7,
|
||
|
|
"valid_targets_min": 628
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.924242424242424,
|
||
|
|
"grad_norm": 0.46637613972668973,
|
||
|
|
"learning_rate": 9.80674501468599e-06,
|
||
|
|
"loss": 0.241,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2279430627822876,
|
||
|
|
"step": 2925,
|
||
|
|
"valid_targets_mean": 5847.3,
|
||
|
|
"valid_targets_min": 2707
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.9326599326599325,
|
||
|
|
"grad_norm": 0.48287729943481417,
|
||
|
|
"learning_rate": 9.734602392225068e-06,
|
||
|
|
"loss": 0.2304,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22361034154891968,
|
||
|
|
"step": 2930,
|
||
|
|
"valid_targets_mean": 5502.1,
|
||
|
|
"valid_targets_min": 3942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.941077441077441,
|
||
|
|
"grad_norm": 0.46786670784817613,
|
||
|
|
"learning_rate": 9.662640656751396e-06,
|
||
|
|
"loss": 0.2415,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2460593581199646,
|
||
|
|
"step": 2935,
|
||
|
|
"valid_targets_mean": 6188.2,
|
||
|
|
"valid_targets_min": 3514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.94949494949495,
|
||
|
|
"grad_norm": 0.5252202199058081,
|
||
|
|
"learning_rate": 9.59086107630565e-06,
|
||
|
|
"loss": 0.2404,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2338843047618866,
|
||
|
|
"step": 2940,
|
||
|
|
"valid_targets_mean": 4835.6,
|
||
|
|
"valid_targets_min": 923
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.957912457912458,
|
||
|
|
"grad_norm": 0.4519621375125666,
|
||
|
|
"learning_rate": 9.519264915718717e-06,
|
||
|
|
"loss": 0.2315,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22045472264289856,
|
||
|
|
"step": 2945,
|
||
|
|
"valid_targets_mean": 6158.5,
|
||
|
|
"valid_targets_min": 2283
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.966329966329966,
|
||
|
|
"grad_norm": 0.4651056142527175,
|
||
|
|
"learning_rate": 9.447853436589444e-06,
|
||
|
|
"loss": 0.2273,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23641371726989746,
|
||
|
|
"step": 2950,
|
||
|
|
"valid_targets_mean": 5869.6,
|
||
|
|
"valid_targets_min": 2270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.974747474747475,
|
||
|
|
"grad_norm": 0.48650730476506754,
|
||
|
|
"learning_rate": 9.3766278972624e-06,
|
||
|
|
"loss": 0.2393,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2378322184085846,
|
||
|
|
"step": 2955,
|
||
|
|
"valid_targets_mean": 5616.4,
|
||
|
|
"valid_targets_min": 627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.983164983164983,
|
||
|
|
"grad_norm": 0.48351848812987125,
|
||
|
|
"learning_rate": 9.305589552805691e-06,
|
||
|
|
"loss": 0.2335,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21766245365142822,
|
||
|
|
"step": 2960,
|
||
|
|
"valid_targets_mean": 5461.8,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.991582491582491,
|
||
|
|
"grad_norm": 0.4855323304121959,
|
||
|
|
"learning_rate": 9.234739654988879e-06,
|
||
|
|
"loss": 0.2347,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22696718573570251,
|
||
|
|
"step": 2965,
|
||
|
|
"valid_targets_mean": 6261.7,
|
||
|
|
"valid_targets_min": 1713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"grad_norm": 0.4832993346496243,
|
||
|
|
"learning_rate": 9.164079452260874e-06,
|
||
|
|
"loss": 0.2415,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23409955203533173,
|
||
|
|
"step": 2970,
|
||
|
|
"valid_targets_mean": 5369.6,
|
||
|
|
"valid_targets_min": 976
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.008417508417509,
|
||
|
|
"grad_norm": 0.485975550813503,
|
||
|
|
"learning_rate": 9.09361018972797e-06,
|
||
|
|
"loss": 0.233,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2630172371864319,
|
||
|
|
"step": 2975,
|
||
|
|
"valid_targets_mean": 6245.8,
|
||
|
|
"valid_targets_min": 875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.016835016835016,
|
||
|
|
"grad_norm": 0.5183945191375198,
|
||
|
|
"learning_rate": 9.02333310913191e-06,
|
||
|
|
"loss": 0.2259,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22702479362487793,
|
||
|
|
"step": 2980,
|
||
|
|
"valid_targets_mean": 6026.0,
|
||
|
|
"valid_targets_min": 2934
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.025252525252525,
|
||
|
|
"grad_norm": 0.5186739982406766,
|
||
|
|
"learning_rate": 8.953249448827971e-06,
|
||
|
|
"loss": 0.2318,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22462819516658783,
|
||
|
|
"step": 2985,
|
||
|
|
"valid_targets_mean": 5457.3,
|
||
|
|
"valid_targets_min": 2304
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.033670033670034,
|
||
|
|
"grad_norm": 0.47871125321551317,
|
||
|
|
"learning_rate": 8.883360443763174e-06,
|
||
|
|
"loss": 0.2266,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22352522611618042,
|
||
|
|
"step": 2990,
|
||
|
|
"valid_targets_mean": 5989.8,
|
||
|
|
"valid_targets_min": 3942
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.042087542087542,
|
||
|
|
"grad_norm": 0.47853857657725507,
|
||
|
|
"learning_rate": 8.813667325454507e-06,
|
||
|
|
"loss": 0.2269,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21468524634838104,
|
||
|
|
"step": 2995,
|
||
|
|
"valid_targets_mean": 5990.2,
|
||
|
|
"valid_targets_min": 4922
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.05050505050505,
|
||
|
|
"grad_norm": 0.5165248585624271,
|
||
|
|
"learning_rate": 8.744171321967233e-06,
|
||
|
|
"loss": 0.246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2386196106672287,
|
||
|
|
"step": 3000,
|
||
|
|
"valid_targets_mean": 5385.9,
|
||
|
|
"valid_targets_min": 642
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.058922558922559,
|
||
|
|
"grad_norm": 0.46595854294199934,
|
||
|
|
"learning_rate": 8.674873657893236e-06,
|
||
|
|
"loss": 0.2368,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23446813225746155,
|
||
|
|
"step": 3005,
|
||
|
|
"valid_targets_mean": 6161.8,
|
||
|
|
"valid_targets_min": 3469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0673400673400675,
|
||
|
|
"grad_norm": 0.5168319359581155,
|
||
|
|
"learning_rate": 8.605775554329486e-06,
|
||
|
|
"loss": 0.2326,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23876196146011353,
|
||
|
|
"step": 3010,
|
||
|
|
"valid_targets_mean": 5390.1,
|
||
|
|
"valid_targets_min": 577
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.075757575757576,
|
||
|
|
"grad_norm": 0.46775467689427314,
|
||
|
|
"learning_rate": 8.536878228856447e-06,
|
||
|
|
"loss": 0.2345,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2268284261226654,
|
||
|
|
"step": 3015,
|
||
|
|
"valid_targets_mean": 6205.7,
|
||
|
|
"valid_targets_min": 2397
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.084175084175084,
|
||
|
|
"grad_norm": 0.5205111573678236,
|
||
|
|
"learning_rate": 8.468182895516685e-06,
|
||
|
|
"loss": 0.2473,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2528153657913208,
|
||
|
|
"step": 3020,
|
||
|
|
"valid_targets_mean": 6040.9,
|
||
|
|
"valid_targets_min": 1732
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.092592592592593,
|
||
|
|
"grad_norm": 0.5195827014329844,
|
||
|
|
"learning_rate": 8.399690764793464e-06,
|
||
|
|
"loss": 0.2403,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22744807600975037,
|
||
|
|
"step": 3025,
|
||
|
|
"valid_targets_mean": 5111.1,
|
||
|
|
"valid_targets_min": 882
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.101010101010101,
|
||
|
|
"grad_norm": 0.49170918864423774,
|
||
|
|
"learning_rate": 8.331403043589387e-06,
|
||
|
|
"loss": 0.2298,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2277529537677765,
|
||
|
|
"step": 3030,
|
||
|
|
"valid_targets_mean": 5587.4,
|
||
|
|
"valid_targets_min": 3947
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.109427609427609,
|
||
|
|
"grad_norm": 0.47195012869371583,
|
||
|
|
"learning_rate": 8.26332093520516e-06,
|
||
|
|
"loss": 0.2364,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2412089854478836,
|
||
|
|
"step": 3035,
|
||
|
|
"valid_targets_mean": 6275.1,
|
||
|
|
"valid_targets_min": 3853
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.117845117845118,
|
||
|
|
"grad_norm": 0.5116529738273514,
|
||
|
|
"learning_rate": 8.195445639318369e-06,
|
||
|
|
"loss": 0.2284,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22236831486225128,
|
||
|
|
"step": 3040,
|
||
|
|
"valid_targets_mean": 5911.4,
|
||
|
|
"valid_targets_min": 963
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.126262626262626,
|
||
|
|
"grad_norm": 0.5188254282626279,
|
||
|
|
"learning_rate": 8.127778351962349e-06,
|
||
|
|
"loss": 0.2449,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2555454969406128,
|
||
|
|
"step": 3045,
|
||
|
|
"valid_targets_mean": 5581.8,
|
||
|
|
"valid_targets_min": 834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.134680134680135,
|
||
|
|
"grad_norm": 0.47352791612280154,
|
||
|
|
"learning_rate": 8.06032026550513e-06,
|
||
|
|
"loss": 0.2243,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22536420822143555,
|
||
|
|
"step": 3050,
|
||
|
|
"valid_targets_mean": 6212.1,
|
||
|
|
"valid_targets_min": 3591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.143097643097643,
|
||
|
|
"grad_norm": 0.5161817528182612,
|
||
|
|
"learning_rate": 7.993072568628378e-06,
|
||
|
|
"loss": 0.2334,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21615377068519592,
|
||
|
|
"step": 3055,
|
||
|
|
"valid_targets_mean": 5326.1,
|
||
|
|
"valid_targets_min": 1984
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.151515151515151,
|
||
|
|
"grad_norm": 0.4674652659739291,
|
||
|
|
"learning_rate": 7.926036446306473e-06,
|
||
|
|
"loss": 0.2336,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23871754109859467,
|
||
|
|
"step": 3060,
|
||
|
|
"valid_targets_mean": 6839.7,
|
||
|
|
"valid_targets_min": 4102
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.15993265993266,
|
||
|
|
"grad_norm": 0.5193304210527502,
|
||
|
|
"learning_rate": 7.859213079785665e-06,
|
||
|
|
"loss": 0.2356,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21915632486343384,
|
||
|
|
"step": 3065,
|
||
|
|
"valid_targets_mean": 5766.3,
|
||
|
|
"valid_targets_min": 3637
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.168350168350169,
|
||
|
|
"grad_norm": 0.52661788686289,
|
||
|
|
"learning_rate": 7.79260364656319e-06,
|
||
|
|
"loss": 0.2354,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24809540808200836,
|
||
|
|
"step": 3070,
|
||
|
|
"valid_targets_mean": 5621.6,
|
||
|
|
"valid_targets_min": 744
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.1767676767676765,
|
||
|
|
"grad_norm": 0.5034046143594989,
|
||
|
|
"learning_rate": 7.726209320366575e-06,
|
||
|
|
"loss": 0.2372,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22819620370864868,
|
||
|
|
"step": 3075,
|
||
|
|
"valid_targets_mean": 6095.1,
|
||
|
|
"valid_targets_min": 3789
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.185185185185185,
|
||
|
|
"grad_norm": 0.5157047863687968,
|
||
|
|
"learning_rate": 7.660031271132933e-06,
|
||
|
|
"loss": 0.2261,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2319592982530594,
|
||
|
|
"step": 3080,
|
||
|
|
"valid_targets_mean": 5742.2,
|
||
|
|
"valid_targets_min": 2157
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.193602693602694,
|
||
|
|
"grad_norm": 0.5392072929319364,
|
||
|
|
"learning_rate": 7.594070664988338e-06,
|
||
|
|
"loss": 0.2313,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2188304364681244,
|
||
|
|
"step": 3085,
|
||
|
|
"valid_targets_mean": 5194.3,
|
||
|
|
"valid_targets_min": 828
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.202020202020202,
|
||
|
|
"grad_norm": 0.529921176347278,
|
||
|
|
"learning_rate": 7.528328664227324e-06,
|
||
|
|
"loss": 0.2346,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.244046151638031,
|
||
|
|
"step": 3090,
|
||
|
|
"valid_targets_mean": 5709.8,
|
||
|
|
"valid_targets_min": 628
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.21043771043771,
|
||
|
|
"grad_norm": 0.5240479163794681,
|
||
|
|
"learning_rate": 7.462806427292342e-06,
|
||
|
|
"loss": 0.2278,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22765925526618958,
|
||
|
|
"step": 3095,
|
||
|
|
"valid_targets_mean": 5149.2,
|
||
|
|
"valid_targets_min": 753
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.218855218855219,
|
||
|
|
"grad_norm": 0.5136782077308949,
|
||
|
|
"learning_rate": 7.397505108753378e-06,
|
||
|
|
"loss": 0.2351,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2189491093158722,
|
||
|
|
"step": 3100,
|
||
|
|
"valid_targets_mean": 5187.2,
|
||
|
|
"valid_targets_min": 1968
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.2272727272727275,
|
||
|
|
"grad_norm": 0.5058540114550893,
|
||
|
|
"learning_rate": 7.332425859287624e-06,
|
||
|
|
"loss": 0.2432,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.262953519821167,
|
||
|
|
"step": 3105,
|
||
|
|
"valid_targets_mean": 6108.0,
|
||
|
|
"valid_targets_min": 2633
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.235690235690235,
|
||
|
|
"grad_norm": 0.5158981797860003,
|
||
|
|
"learning_rate": 7.2675698256591735e-06,
|
||
|
|
"loss": 0.2351,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22714978456497192,
|
||
|
|
"step": 3110,
|
||
|
|
"valid_targets_mean": 5883.8,
|
||
|
|
"valid_targets_min": 2261
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.244107744107744,
|
||
|
|
"grad_norm": 0.5372180066384188,
|
||
|
|
"learning_rate": 7.202938150698804e-06,
|
||
|
|
"loss": 0.2331,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22206753492355347,
|
||
|
|
"step": 3115,
|
||
|
|
"valid_targets_mean": 4753.1,
|
||
|
|
"valid_targets_min": 769
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.252525252525253,
|
||
|
|
"grad_norm": 0.46082204133963633,
|
||
|
|
"learning_rate": 7.138531973283898e-06,
|
||
|
|
"loss": 0.2368,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2284386157989502,
|
||
|
|
"step": 3120,
|
||
|
|
"valid_targets_mean": 6732.4,
|
||
|
|
"valid_targets_min": 4606
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.260942760942761,
|
||
|
|
"grad_norm": 0.5258612308182962,
|
||
|
|
"learning_rate": 7.074352428318312e-06,
|
||
|
|
"loss": 0.2376,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23692598938941956,
|
||
|
|
"step": 3125,
|
||
|
|
"valid_targets_mean": 5280.9,
|
||
|
|
"valid_targets_min": 927
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.269360269360269,
|
||
|
|
"grad_norm": 0.47852423674654065,
|
||
|
|
"learning_rate": 7.010400646712401e-06,
|
||
|
|
"loss": 0.2232,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20909680426120758,
|
||
|
|
"step": 3130,
|
||
|
|
"valid_targets_mean": 5772.5,
|
||
|
|
"valid_targets_min": 2157
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.277777777777778,
|
||
|
|
"grad_norm": 0.4670349699487819,
|
||
|
|
"learning_rate": 6.946677755363116e-06,
|
||
|
|
"loss": 0.231,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21536147594451904,
|
||
|
|
"step": 3135,
|
||
|
|
"valid_targets_mean": 6276.1,
|
||
|
|
"valid_targets_min": 3836
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.286195286195286,
|
||
|
|
"grad_norm": 0.47317451479187395,
|
||
|
|
"learning_rate": 6.883184877134099e-06,
|
||
|
|
"loss": 0.2362,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2287386655807495,
|
||
|
|
"step": 3140,
|
||
|
|
"valid_targets_mean": 6471.6,
|
||
|
|
"valid_targets_min": 2095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.294612794612795,
|
||
|
|
"grad_norm": 0.4992948685768268,
|
||
|
|
"learning_rate": 6.819923130835926e-06,
|
||
|
|
"loss": 0.231,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22403103113174438,
|
||
|
|
"step": 3145,
|
||
|
|
"valid_targets_mean": 5905.2,
|
||
|
|
"valid_targets_min": 2270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.303030303030303,
|
||
|
|
"grad_norm": 0.49261776085093156,
|
||
|
|
"learning_rate": 6.756893631206409e-06,
|
||
|
|
"loss": 0.2338,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24229532480239868,
|
||
|
|
"step": 3150,
|
||
|
|
"valid_targets_mean": 6232.4,
|
||
|
|
"valid_targets_min": 2027
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.311447811447811,
|
||
|
|
"grad_norm": 0.5028192511116332,
|
||
|
|
"learning_rate": 6.694097488890889e-06,
|
||
|
|
"loss": 0.2345,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.260204553604126,
|
||
|
|
"step": 3155,
|
||
|
|
"valid_targets_mean": 5657.8,
|
||
|
|
"valid_targets_min": 1273
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.31986531986532,
|
||
|
|
"grad_norm": 0.48478369016215206,
|
||
|
|
"learning_rate": 6.63153581042276e-06,
|
||
|
|
"loss": 0.2384,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23380783200263977,
|
||
|
|
"step": 3160,
|
||
|
|
"valid_targets_mean": 6147.7,
|
||
|
|
"valid_targets_min": 2819
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.328282828282829,
|
||
|
|
"grad_norm": 0.5458848519398222,
|
||
|
|
"learning_rate": 6.56920969820388e-06,
|
||
|
|
"loss": 0.2361,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24366281926631927,
|
||
|
|
"step": 3165,
|
||
|
|
"valid_targets_mean": 5250.1,
|
||
|
|
"valid_targets_min": 693
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.3367003367003365,
|
||
|
|
"grad_norm": 0.534336924740091,
|
||
|
|
"learning_rate": 6.507120250485195e-06,
|
||
|
|
"loss": 0.2351,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.225617915391922,
|
||
|
|
"step": 3170,
|
||
|
|
"valid_targets_mean": 5071.1,
|
||
|
|
"valid_targets_min": 743
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.345117845117845,
|
||
|
|
"grad_norm": 0.5311919196381524,
|
||
|
|
"learning_rate": 6.445268561347393e-06,
|
||
|
|
"loss": 0.2261,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23042187094688416,
|
||
|
|
"step": 3175,
|
||
|
|
"valid_targets_mean": 5667.2,
|
||
|
|
"valid_targets_min": 1807
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.353535353535354,
|
||
|
|
"grad_norm": 0.6646559705802462,
|
||
|
|
"learning_rate": 6.383655720681588e-06,
|
||
|
|
"loss": 0.2362,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22798696160316467,
|
||
|
|
"step": 3180,
|
||
|
|
"valid_targets_mean": 5714.7,
|
||
|
|
"valid_targets_min": 2030
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.361952861952862,
|
||
|
|
"grad_norm": 0.48527450594210503,
|
||
|
|
"learning_rate": 6.3222828141701335e-06,
|
||
|
|
"loss": 0.2304,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22262442111968994,
|
||
|
|
"step": 3185,
|
||
|
|
"valid_targets_mean": 5532.1,
|
||
|
|
"valid_targets_min": 849
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.37037037037037,
|
||
|
|
"grad_norm": 0.5089773350495819,
|
||
|
|
"learning_rate": 6.261150923267525e-06,
|
||
|
|
"loss": 0.2366,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22977544367313385,
|
||
|
|
"step": 3190,
|
||
|
|
"valid_targets_mean": 5720.7,
|
||
|
|
"valid_targets_min": 2546
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.378787878787879,
|
||
|
|
"grad_norm": 0.48608571702885467,
|
||
|
|
"learning_rate": 6.200261125181262e-06,
|
||
|
|
"loss": 0.2349,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.258963018655777,
|
||
|
|
"step": 3195,
|
||
|
|
"valid_targets_mean": 6741.2,
|
||
|
|
"valid_targets_min": 5102
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.3872053872053876,
|
||
|
|
"grad_norm": 0.5024329241116928,
|
||
|
|
"learning_rate": 6.1396144928529614e-06,
|
||
|
|
"loss": 0.234,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2263956516981125,
|
||
|
|
"step": 3200,
|
||
|
|
"valid_targets_mean": 5581.6,
|
||
|
|
"valid_targets_min": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.395622895622895,
|
||
|
|
"grad_norm": 0.49165929762887833,
|
||
|
|
"learning_rate": 6.0792120949393885e-06,
|
||
|
|
"loss": 0.2302,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21596331894397736,
|
||
|
|
"step": 3205,
|
||
|
|
"valid_targets_mean": 5900.8,
|
||
|
|
"valid_targets_min": 4528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.404040404040404,
|
||
|
|
"grad_norm": 0.4624623310895367,
|
||
|
|
"learning_rate": 6.019054995793634e-06,
|
||
|
|
"loss": 0.2387,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24683353304862976,
|
||
|
|
"step": 3210,
|
||
|
|
"valid_targets_mean": 6548.0,
|
||
|
|
"valid_targets_min": 4096
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.412457912457913,
|
||
|
|
"grad_norm": 0.4918872774950123,
|
||
|
|
"learning_rate": 5.959144255446392e-06,
|
||
|
|
"loss": 0.2387,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22914943099021912,
|
||
|
|
"step": 3215,
|
||
|
|
"valid_targets_mean": 5636.7,
|
||
|
|
"valid_targets_min": 2221
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.420875420875421,
|
||
|
|
"grad_norm": 0.5236989395915803,
|
||
|
|
"learning_rate": 5.899480929587243e-06,
|
||
|
|
"loss": 0.2369,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23069006204605103,
|
||
|
|
"step": 3220,
|
||
|
|
"valid_targets_mean": 5107.2,
|
||
|
|
"valid_targets_min": 2261
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.429292929292929,
|
||
|
|
"grad_norm": 0.49362777785329914,
|
||
|
|
"learning_rate": 5.840066069546062e-06,
|
||
|
|
"loss": 0.2258,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23647350072860718,
|
||
|
|
"step": 3225,
|
||
|
|
"valid_targets_mean": 6049.1,
|
||
|
|
"valid_targets_min": 2780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.437710437710438,
|
||
|
|
"grad_norm": 0.5387261419400055,
|
||
|
|
"learning_rate": 5.780900722274522e-06,
|
||
|
|
"loss": 0.2388,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25978630781173706,
|
||
|
|
"step": 3230,
|
||
|
|
"valid_targets_mean": 5692.9,
|
||
|
|
"valid_targets_min": 2371
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.446127946127946,
|
||
|
|
"grad_norm": 0.5097787365388916,
|
||
|
|
"learning_rate": 5.721985930327585e-06,
|
||
|
|
"loss": 0.229,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22551800310611725,
|
||
|
|
"step": 3235,
|
||
|
|
"valid_targets_mean": 5662.1,
|
||
|
|
"valid_targets_min": 1900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.454545454545454,
|
||
|
|
"grad_norm": 0.4732570960654139,
|
||
|
|
"learning_rate": 5.663322731845204e-06,
|
||
|
|
"loss": 0.2252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21448811888694763,
|
||
|
|
"step": 3240,
|
||
|
|
"valid_targets_mean": 6084.2,
|
||
|
|
"valid_targets_min": 5120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.462962962962963,
|
||
|
|
"grad_norm": 0.504245619409664,
|
||
|
|
"learning_rate": 5.604912160533968e-06,
|
||
|
|
"loss": 0.2374,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23949390649795532,
|
||
|
|
"step": 3245,
|
||
|
|
"valid_targets_mean": 6064.9,
|
||
|
|
"valid_targets_min": 4162
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.4713804713804715,
|
||
|
|
"grad_norm": 0.4844841529391725,
|
||
|
|
"learning_rate": 5.546755245648918e-06,
|
||
|
|
"loss": 0.2325,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.214034304022789,
|
||
|
|
"step": 3250,
|
||
|
|
"valid_targets_mean": 5519.1,
|
||
|
|
"valid_targets_min": 1900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.47979797979798,
|
||
|
|
"grad_norm": 0.5148425473218765,
|
||
|
|
"learning_rate": 5.488853011975421e-06,
|
||
|
|
"loss": 0.2294,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2293565422296524,
|
||
|
|
"step": 3255,
|
||
|
|
"valid_targets_mean": 5810.1,
|
||
|
|
"valid_targets_min": 3554
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.488215488215488,
|
||
|
|
"grad_norm": 0.5043263239532035,
|
||
|
|
"learning_rate": 5.431206479811076e-06,
|
||
|
|
"loss": 0.2288,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23567970097064972,
|
||
|
|
"step": 3260,
|
||
|
|
"valid_targets_mean": 5856.5,
|
||
|
|
"valid_targets_min": 3514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.4966329966329965,
|
||
|
|
"grad_norm": 0.5234246855324006,
|
||
|
|
"learning_rate": 5.373816664947753e-06,
|
||
|
|
"loss": 0.2318,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2325543314218521,
|
||
|
|
"step": 3265,
|
||
|
|
"valid_targets_mean": 5819.9,
|
||
|
|
"valid_targets_min": 2170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.505050505050505,
|
||
|
|
"grad_norm": 0.48084978877344153,
|
||
|
|
"learning_rate": 5.316684578653728e-06,
|
||
|
|
"loss": 0.2286,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23692665994167328,
|
||
|
|
"step": 3270,
|
||
|
|
"valid_targets_mean": 6192.4,
|
||
|
|
"valid_targets_min": 4568
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.513468013468014,
|
||
|
|
"grad_norm": 0.46469157525134236,
|
||
|
|
"learning_rate": 5.259811227655789e-06,
|
||
|
|
"loss": 0.2311,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23742267489433289,
|
||
|
|
"step": 3275,
|
||
|
|
"valid_targets_mean": 6736.9,
|
||
|
|
"valid_targets_min": 4696
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.521885521885522,
|
||
|
|
"grad_norm": 0.5248519679171413,
|
||
|
|
"learning_rate": 5.203197614121558e-06,
|
||
|
|
"loss": 0.2417,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25347089767456055,
|
||
|
|
"step": 3280,
|
||
|
|
"valid_targets_mean": 5189.9,
|
||
|
|
"valid_targets_min": 962
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.53030303030303,
|
||
|
|
"grad_norm": 0.5158347840082577,
|
||
|
|
"learning_rate": 5.146844735641827e-06,
|
||
|
|
"loss": 0.2404,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2295902818441391,
|
||
|
|
"step": 3285,
|
||
|
|
"valid_targets_mean": 5830.0,
|
||
|
|
"valid_targets_min": 1486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.538720538720539,
|
||
|
|
"grad_norm": 0.5028953980064815,
|
||
|
|
"learning_rate": 5.090753585212947e-06,
|
||
|
|
"loss": 0.2402,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24436822533607483,
|
||
|
|
"step": 3290,
|
||
|
|
"valid_targets_mean": 5826.0,
|
||
|
|
"valid_targets_min": 851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.547138047138047,
|
||
|
|
"grad_norm": 0.4936768840129265,
|
||
|
|
"learning_rate": 5.034925151219343e-06,
|
||
|
|
"loss": 0.2325,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23498250544071198,
|
||
|
|
"step": 3295,
|
||
|
|
"valid_targets_mean": 5751.9,
|
||
|
|
"valid_targets_min": 2591
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.555555555555555,
|
||
|
|
"grad_norm": 0.5050995601396032,
|
||
|
|
"learning_rate": 4.979360417416126e-06,
|
||
|
|
"loss": 0.2344,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24720489978790283,
|
||
|
|
"step": 3300,
|
||
|
|
"valid_targets_mean": 5693.1,
|
||
|
|
"valid_targets_min": 4156
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.563973063973064,
|
||
|
|
"grad_norm": 0.4969926232957146,
|
||
|
|
"learning_rate": 4.9240603629117175e-06,
|
||
|
|
"loss": 0.2271,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20771655440330505,
|
||
|
|
"step": 3305,
|
||
|
|
"valid_targets_mean": 5534.0,
|
||
|
|
"valid_targets_min": 3133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.572390572390573,
|
||
|
|
"grad_norm": 0.5530553002623905,
|
||
|
|
"learning_rate": 4.869025962150617e-06,
|
||
|
|
"loss": 0.2355,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2520415186882019,
|
||
|
|
"step": 3310,
|
||
|
|
"valid_targets_mean": 5264.3,
|
||
|
|
"valid_targets_min": 1400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.58080808080808,
|
||
|
|
"grad_norm": 0.47824826642439083,
|
||
|
|
"learning_rate": 4.814258184896234e-06,
|
||
|
|
"loss": 0.2309,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23448395729064941,
|
||
|
|
"step": 3315,
|
||
|
|
"valid_targets_mean": 5964.6,
|
||
|
|
"valid_targets_min": 3968
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.589225589225589,
|
||
|
|
"grad_norm": 0.5333562093887002,
|
||
|
|
"learning_rate": 4.759757996213783e-06,
|
||
|
|
"loss": 0.232,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21456602215766907,
|
||
|
|
"step": 3320,
|
||
|
|
"valid_targets_mean": 5262.3,
|
||
|
|
"valid_targets_min": 1919
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.597643097643098,
|
||
|
|
"grad_norm": 0.5291019973267682,
|
||
|
|
"learning_rate": 4.7055263564533096e-06,
|
||
|
|
"loss": 0.2234,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22403772175312042,
|
||
|
|
"step": 3325,
|
||
|
|
"valid_targets_mean": 4866.6,
|
||
|
|
"valid_targets_min": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.606060606060606,
|
||
|
|
"grad_norm": 0.5520805495799307,
|
||
|
|
"learning_rate": 4.651564221232728e-06,
|
||
|
|
"loss": 0.2361,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23195771872997284,
|
||
|
|
"step": 3330,
|
||
|
|
"valid_targets_mean": 4537.1,
|
||
|
|
"valid_targets_min": 775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.614478114478114,
|
||
|
|
"grad_norm": 0.5108188611657087,
|
||
|
|
"learning_rate": 4.597872541421007e-06,
|
||
|
|
"loss": 0.2462,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23533491790294647,
|
||
|
|
"step": 3335,
|
||
|
|
"valid_targets_mean": 5605.8,
|
||
|
|
"valid_targets_min": 706
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.622895622895623,
|
||
|
|
"grad_norm": 0.5157543076277402,
|
||
|
|
"learning_rate": 4.544452263121422e-06,
|
||
|
|
"loss": 0.233,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2332892119884491,
|
||
|
|
"step": 3340,
|
||
|
|
"valid_targets_mean": 5331.5,
|
||
|
|
"valid_targets_min": 577
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.6313131313131315,
|
||
|
|
"grad_norm": 0.49379242379442295,
|
||
|
|
"learning_rate": 4.491304327654855e-06,
|
||
|
|
"loss": 0.2115,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21516942977905273,
|
||
|
|
"step": 3345,
|
||
|
|
"valid_targets_mean": 5680.6,
|
||
|
|
"valid_targets_min": 1033
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.63973063973064,
|
||
|
|
"grad_norm": 0.46555609809855103,
|
||
|
|
"learning_rate": 4.438429671543234e-06,
|
||
|
|
"loss": 0.234,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2280246913433075,
|
||
|
|
"step": 3350,
|
||
|
|
"valid_targets_mean": 6279.8,
|
||
|
|
"valid_targets_min": 4022
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.648148148148148,
|
||
|
|
"grad_norm": 0.4853473764029863,
|
||
|
|
"learning_rate": 4.385829226493015e-06,
|
||
|
|
"loss": 0.2366,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.231397345662117,
|
||
|
|
"step": 3355,
|
||
|
|
"valid_targets_mean": 5583.8,
|
||
|
|
"valid_targets_min": 2088
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.656565656565657,
|
||
|
|
"grad_norm": 0.4657647618680769,
|
||
|
|
"learning_rate": 4.333503919378767e-06,
|
||
|
|
"loss": 0.2337,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24527719616889954,
|
||
|
|
"step": 3360,
|
||
|
|
"valid_targets_mean": 6640.7,
|
||
|
|
"valid_targets_min": 4507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.664983164983165,
|
||
|
|
"grad_norm": 0.5317871094974028,
|
||
|
|
"learning_rate": 4.2814546722268595e-06,
|
||
|
|
"loss": 0.2347,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23336941003799438,
|
||
|
|
"step": 3365,
|
||
|
|
"valid_targets_mean": 5179.6,
|
||
|
|
"valid_targets_min": 753
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.673400673400673,
|
||
|
|
"grad_norm": 0.532977311370893,
|
||
|
|
"learning_rate": 4.229682402199175e-06,
|
||
|
|
"loss": 0.2332,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23605574667453766,
|
||
|
|
"step": 3370,
|
||
|
|
"valid_targets_mean": 5920.1,
|
||
|
|
"valid_targets_min": 2042
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.681818181818182,
|
||
|
|
"grad_norm": 0.5750924142313466,
|
||
|
|
"learning_rate": 4.178188021576983e-06,
|
||
|
|
"loss": 0.2308,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23879092931747437,
|
||
|
|
"step": 3375,
|
||
|
|
"valid_targets_mean": 5165.8,
|
||
|
|
"valid_targets_min": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.69023569023569,
|
||
|
|
"grad_norm": 0.48765554257352756,
|
||
|
|
"learning_rate": 4.126972437744861e-06,
|
||
|
|
"loss": 0.2368,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23397740721702576,
|
||
|
|
"step": 3380,
|
||
|
|
"valid_targets_mean": 5988.1,
|
||
|
|
"valid_targets_min": 4242
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.698653198653199,
|
||
|
|
"grad_norm": 0.5294602681979758,
|
||
|
|
"learning_rate": 4.076036553174678e-06,
|
||
|
|
"loss": 0.2395,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23284104466438293,
|
||
|
|
"step": 3385,
|
||
|
|
"valid_targets_mean": 5681.9,
|
||
|
|
"valid_targets_min": 2217
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.707070707070707,
|
||
|
|
"grad_norm": 0.49114433501009724,
|
||
|
|
"learning_rate": 4.025381265409722e-06,
|
||
|
|
"loss": 0.2268,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22848021984100342,
|
||
|
|
"step": 3390,
|
||
|
|
"valid_targets_mean": 6036.4,
|
||
|
|
"valid_targets_min": 4350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.715488215488215,
|
||
|
|
"grad_norm": 0.5110269277385011,
|
||
|
|
"learning_rate": 3.975007467048875e-06,
|
||
|
|
"loss": 0.2384,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22457490861415863,
|
||
|
|
"step": 3395,
|
||
|
|
"valid_targets_mean": 5598.8,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.723905723905724,
|
||
|
|
"grad_norm": 0.5155941569501963,
|
||
|
|
"learning_rate": 3.924916045730873e-06,
|
||
|
|
"loss": 0.2325,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20673015713691711,
|
||
|
|
"step": 3400,
|
||
|
|
"valid_targets_mean": 4903.1,
|
||
|
|
"valid_targets_min": 1735
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.732323232323233,
|
||
|
|
"grad_norm": 0.5433799296336833,
|
||
|
|
"learning_rate": 3.875107884118681e-06,
|
||
|
|
"loss": 0.2291,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26549580693244934,
|
||
|
|
"step": 3405,
|
||
|
|
"valid_targets_mean": 5974.6,
|
||
|
|
"valid_targets_min": 648
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.7407407407407405,
|
||
|
|
"grad_norm": 0.5256583886737612,
|
||
|
|
"learning_rate": 3.825583859883941e-06,
|
||
|
|
"loss": 0.2332,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23430989682674408,
|
||
|
|
"step": 3410,
|
||
|
|
"valid_targets_mean": 5242.2,
|
||
|
|
"valid_targets_min": 836
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.749158249158249,
|
||
|
|
"grad_norm": 0.4716795001316659,
|
||
|
|
"learning_rate": 3.7763448456914886e-06,
|
||
|
|
"loss": 0.2323,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23861783742904663,
|
||
|
|
"step": 3415,
|
||
|
|
"valid_targets_mean": 6166.8,
|
||
|
|
"valid_targets_min": 2042
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.757575757575758,
|
||
|
|
"grad_norm": 0.5261853664922447,
|
||
|
|
"learning_rate": 3.7273917091839806e-06,
|
||
|
|
"loss": 0.2383,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25470608472824097,
|
||
|
|
"step": 3420,
|
||
|
|
"valid_targets_mean": 5424.5,
|
||
|
|
"valid_targets_min": 634
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.7659932659932664,
|
||
|
|
"grad_norm": 0.4962287138405268,
|
||
|
|
"learning_rate": 3.678725312966629e-06,
|
||
|
|
"loss": 0.2377,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23568958044052124,
|
||
|
|
"step": 3425,
|
||
|
|
"valid_targets_mean": 5845.6,
|
||
|
|
"valid_targets_min": 2110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.774410774410774,
|
||
|
|
"grad_norm": 0.45971665253736965,
|
||
|
|
"learning_rate": 3.6303465145919716e-06,
|
||
|
|
"loss": 0.2256,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18748649954795837,
|
||
|
|
"step": 3430,
|
||
|
|
"valid_targets_mean": 6514.1,
|
||
|
|
"valid_targets_min": 4917
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.782828282828283,
|
||
|
|
"grad_norm": 0.5045763625955956,
|
||
|
|
"learning_rate": 3.582256166544773e-06,
|
||
|
|
"loss": 0.2326,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23577597737312317,
|
||
|
|
"step": 3435,
|
||
|
|
"valid_targets_mean": 5670.4,
|
||
|
|
"valid_targets_min": 2205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.7912457912457915,
|
||
|
|
"grad_norm": 0.5026506603551945,
|
||
|
|
"learning_rate": 3.534455116227005e-06,
|
||
|
|
"loss": 0.2312,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24811536073684692,
|
||
|
|
"step": 3440,
|
||
|
|
"valid_targets_mean": 6188.9,
|
||
|
|
"valid_targets_min": 3727
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.799663299663299,
|
||
|
|
"grad_norm": 0.5013307207280829,
|
||
|
|
"learning_rate": 3.4869442059429084e-06,
|
||
|
|
"loss": 0.2337,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24296782910823822,
|
||
|
|
"step": 3445,
|
||
|
|
"valid_targets_mean": 5887.4,
|
||
|
|
"valid_targets_min": 2936
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.808080808080808,
|
||
|
|
"grad_norm": 0.4857730396730159,
|
||
|
|
"learning_rate": 3.4397242728841663e-06,
|
||
|
|
"loss": 0.2314,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2121039479970932,
|
||
|
|
"step": 3450,
|
||
|
|
"valid_targets_mean": 5653.2,
|
||
|
|
"valid_targets_min": 677
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.816498316498317,
|
||
|
|
"grad_norm": 0.5330733750805351,
|
||
|
|
"learning_rate": 3.392796149115132e-06,
|
||
|
|
"loss": 0.2316,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2211214303970337,
|
||
|
|
"step": 3455,
|
||
|
|
"valid_targets_mean": 5979.4,
|
||
|
|
"valid_targets_min": 3875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.824915824915825,
|
||
|
|
"grad_norm": 0.5031682106996872,
|
||
|
|
"learning_rate": 3.3461606615581686e-06,
|
||
|
|
"loss": 0.2294,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22831107676029205,
|
||
|
|
"step": 3460,
|
||
|
|
"valid_targets_mean": 5962.1,
|
||
|
|
"valid_targets_min": 1753
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.833333333333333,
|
||
|
|
"grad_norm": 0.5018170197197533,
|
||
|
|
"learning_rate": 3.2998186319791037e-06,
|
||
|
|
"loss": 0.2359,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22891631722450256,
|
||
|
|
"step": 3465,
|
||
|
|
"valid_targets_mean": 5357.0,
|
||
|
|
"valid_targets_min": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.841750841750842,
|
||
|
|
"grad_norm": 0.5144545245009204,
|
||
|
|
"learning_rate": 3.2537708769727105e-06,
|
||
|
|
"loss": 0.242,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23250479996204376,
|
||
|
|
"step": 3470,
|
||
|
|
"valid_targets_mean": 5865.8,
|
||
|
|
"valid_targets_min": 976
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.85016835016835,
|
||
|
|
"grad_norm": 0.5293606642107064,
|
||
|
|
"learning_rate": 3.2080182079483492e-06,
|
||
|
|
"loss": 0.2283,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23509103059768677,
|
||
|
|
"step": 3475,
|
||
|
|
"valid_targets_mean": 4859.6,
|
||
|
|
"valid_targets_min": 642
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.858585858585858,
|
||
|
|
"grad_norm": 0.5144848394075487,
|
||
|
|
"learning_rate": 3.162561431115654e-06,
|
||
|
|
"loss": 0.2433,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23617658019065857,
|
||
|
|
"step": 3480,
|
||
|
|
"valid_targets_mean": 5443.1,
|
||
|
|
"valid_targets_min": 1760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.867003367003367,
|
||
|
|
"grad_norm": 0.5254868769972713,
|
||
|
|
"learning_rate": 3.1174013474703255e-06,
|
||
|
|
"loss": 0.2264,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2453891634941101,
|
||
|
|
"step": 3485,
|
||
|
|
"valid_targets_mean": 5495.3,
|
||
|
|
"valid_targets_min": 786
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.875420875420875,
|
||
|
|
"grad_norm": 0.4776346524861737,
|
||
|
|
"learning_rate": 3.0725387527800388e-06,
|
||
|
|
"loss": 0.2222,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2222374677658081,
|
||
|
|
"step": 3490,
|
||
|
|
"valid_targets_mean": 6261.4,
|
||
|
|
"valid_targets_min": 3908
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.883838383838384,
|
||
|
|
"grad_norm": 0.5207427790341015,
|
||
|
|
"learning_rate": 3.027974437570389e-06,
|
||
|
|
"loss": 0.2353,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24584221839904785,
|
||
|
|
"step": 3495,
|
||
|
|
"valid_targets_mean": 5397.8,
|
||
|
|
"valid_targets_min": 1098
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.892255892255893,
|
||
|
|
"grad_norm": 0.5309523002539125,
|
||
|
|
"learning_rate": 2.9837091871109748e-06,
|
||
|
|
"loss": 0.2347,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2331022322177887,
|
||
|
|
"step": 3500,
|
||
|
|
"valid_targets_mean": 5541.5,
|
||
|
|
"valid_targets_min": 805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.9006734006734005,
|
||
|
|
"grad_norm": 0.5265535546852831,
|
||
|
|
"learning_rate": 2.939743781401576e-06,
|
||
|
|
"loss": 0.225,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24659153819084167,
|
||
|
|
"step": 3505,
|
||
|
|
"valid_targets_mean": 5344.6,
|
||
|
|
"valid_targets_min": 1761
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.909090909090909,
|
||
|
|
"grad_norm": 0.5269585514846513,
|
||
|
|
"learning_rate": 2.896078995158391e-06,
|
||
|
|
"loss": 0.2333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2496645152568817,
|
||
|
|
"step": 3510,
|
||
|
|
"valid_targets_mean": 5882.9,
|
||
|
|
"valid_targets_min": 823
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.917508417508418,
|
||
|
|
"grad_norm": 0.5318764988027825,
|
||
|
|
"learning_rate": 2.852715597800373e-06,
|
||
|
|
"loss": 0.2289,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2297423630952835,
|
||
|
|
"step": 3515,
|
||
|
|
"valid_targets_mean": 5516.7,
|
||
|
|
"valid_targets_min": 599
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.925925925925926,
|
||
|
|
"grad_norm": 0.4895912672050829,
|
||
|
|
"learning_rate": 2.8096543534357157e-06,
|
||
|
|
"loss": 0.2384,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22827956080436707,
|
||
|
|
"step": 3520,
|
||
|
|
"valid_targets_mean": 6107.5,
|
||
|
|
"valid_targets_min": 4393
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.934343434343434,
|
||
|
|
"grad_norm": 0.5091664341563568,
|
||
|
|
"learning_rate": 2.766896020848351e-06,
|
||
|
|
"loss": 0.2322,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24848194420337677,
|
||
|
|
"step": 3525,
|
||
|
|
"valid_targets_mean": 6239.6,
|
||
|
|
"valid_targets_min": 4263
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.942760942760943,
|
||
|
|
"grad_norm": 0.4896189673623001,
|
||
|
|
"learning_rate": 2.7244413534845813e-06,
|
||
|
|
"loss": 0.2364,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2458949238061905,
|
||
|
|
"step": 3530,
|
||
|
|
"valid_targets_mean": 5956.6,
|
||
|
|
"valid_targets_min": 2262
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.951178451178452,
|
||
|
|
"grad_norm": 0.4985274922172781,
|
||
|
|
"learning_rate": 2.6822910994398334e-06,
|
||
|
|
"loss": 0.2407,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2408095747232437,
|
||
|
|
"step": 3535,
|
||
|
|
"valid_targets_mean": 5447.9,
|
||
|
|
"valid_targets_min": 737
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.959595959595959,
|
||
|
|
"grad_norm": 0.5025455962690482,
|
||
|
|
"learning_rate": 2.6404460014454356e-06,
|
||
|
|
"loss": 0.2291,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22609840333461761,
|
||
|
|
"step": 3540,
|
||
|
|
"valid_targets_mean": 5844.9,
|
||
|
|
"valid_targets_min": 833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.968013468013468,
|
||
|
|
"grad_norm": 0.5777884151750781,
|
||
|
|
"learning_rate": 2.5989067968555514e-06,
|
||
|
|
"loss": 0.2236,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2250966876745224,
|
||
|
|
"step": 3545,
|
||
|
|
"valid_targets_mean": 5791.6,
|
||
|
|
"valid_targets_min": 1732
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.976430976430977,
|
||
|
|
"grad_norm": 0.47037374955087863,
|
||
|
|
"learning_rate": 2.557674217634196e-06,
|
||
|
|
"loss": 0.231,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2266923189163208,
|
||
|
|
"step": 3550,
|
||
|
|
"valid_targets_mean": 6096.9,
|
||
|
|
"valid_targets_min": 3177
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.984848484848484,
|
||
|
|
"grad_norm": 0.4729716390407066,
|
||
|
|
"learning_rate": 2.516748990342317e-06,
|
||
|
|
"loss": 0.217,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2235698103904724,
|
||
|
|
"step": 3555,
|
||
|
|
"valid_targets_mean": 5944.9,
|
||
|
|
"valid_targets_min": 2345
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.993265993265993,
|
||
|
|
"grad_norm": 0.5070240661629777,
|
||
|
|
"learning_rate": 2.476131836125e-06,
|
||
|
|
"loss": 0.2192,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21875323355197906,
|
||
|
|
"step": 3560,
|
||
|
|
"valid_targets_mean": 5720.9,
|
||
|
|
"valid_targets_min": 3382
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.001683501683502,
|
||
|
|
"grad_norm": 0.5114000681337469,
|
||
|
|
"learning_rate": 2.435823470698768e-06,
|
||
|
|
"loss": 0.2307,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23242336511611938,
|
||
|
|
"step": 3565,
|
||
|
|
"valid_targets_mean": 5812.8,
|
||
|
|
"valid_targets_min": 2026
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.01010101010101,
|
||
|
|
"grad_norm": 0.4877299717933894,
|
||
|
|
"learning_rate": 2.3958246043389566e-06,
|
||
|
|
"loss": 0.2286,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22799277305603027,
|
||
|
|
"step": 3570,
|
||
|
|
"valid_targets_mean": 6143.9,
|
||
|
|
"valid_targets_min": 3948
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.018518518518518,
|
||
|
|
"grad_norm": 0.5104477619265424,
|
||
|
|
"learning_rate": 2.356135941867217e-06,
|
||
|
|
"loss": 0.2292,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2472635954618454,
|
||
|
|
"step": 3575,
|
||
|
|
"valid_targets_mean": 6150.5,
|
||
|
|
"valid_targets_min": 5193
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.026936026936027,
|
||
|
|
"grad_norm": 0.5257410926565665,
|
||
|
|
"learning_rate": 2.3167581826390806e-06,
|
||
|
|
"loss": 0.2268,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22177693247795105,
|
||
|
|
"step": 3580,
|
||
|
|
"valid_targets_mean": 6363.8,
|
||
|
|
"valid_targets_min": 1273
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.0353535353535355,
|
||
|
|
"grad_norm": 0.5340948925922618,
|
||
|
|
"learning_rate": 2.2776920205316276e-06,
|
||
|
|
"loss": 0.2217,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21528631448745728,
|
||
|
|
"step": 3585,
|
||
|
|
"valid_targets_mean": 5297.4,
|
||
|
|
"valid_targets_min": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.043771043771044,
|
||
|
|
"grad_norm": 0.49589683213064645,
|
||
|
|
"learning_rate": 2.2389381439312973e-06,
|
||
|
|
"loss": 0.2285,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2327052652835846,
|
||
|
|
"step": 3590,
|
||
|
|
"valid_targets_mean": 6229.2,
|
||
|
|
"valid_targets_min": 4307
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.052188552188552,
|
||
|
|
"grad_norm": 0.5405859979341503,
|
||
|
|
"learning_rate": 2.2004972357217146e-06,
|
||
|
|
"loss": 0.2386,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2503454089164734,
|
||
|
|
"step": 3595,
|
||
|
|
"valid_targets_mean": 5809.4,
|
||
|
|
"valid_targets_min": 2837
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.0606060606060606,
|
||
|
|
"grad_norm": 0.5362685969042656,
|
||
|
|
"learning_rate": 2.1623699732716787e-06,
|
||
|
|
"loss": 0.2297,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.210447758436203,
|
||
|
|
"step": 3600,
|
||
|
|
"valid_targets_mean": 5844.3,
|
||
|
|
"valid_targets_min": 2311
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.069023569023569,
|
||
|
|
"grad_norm": 0.4857377618243944,
|
||
|
|
"learning_rate": 2.1245570284232263e-06,
|
||
|
|
"loss": 0.2273,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2283036708831787,
|
||
|
|
"step": 3605,
|
||
|
|
"valid_targets_mean": 5930.3,
|
||
|
|
"valid_targets_min": 3133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.077441077441078,
|
||
|
|
"grad_norm": 0.5069274043988372,
|
||
|
|
"learning_rate": 2.0870590674797884e-06,
|
||
|
|
"loss": 0.2324,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24341779947280884,
|
||
|
|
"step": 3610,
|
||
|
|
"valid_targets_mean": 6278.7,
|
||
|
|
"valid_targets_min": 809
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.085858585858586,
|
||
|
|
"grad_norm": 0.5185395351533187,
|
||
|
|
"learning_rate": 2.049876751194464e-06,
|
||
|
|
"loss": 0.2275,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2473084181547165,
|
||
|
|
"step": 3615,
|
||
|
|
"valid_targets_mean": 6127.9,
|
||
|
|
"valid_targets_min": 4402
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.094276094276094,
|
||
|
|
"grad_norm": 0.5140171375506261,
|
||
|
|
"learning_rate": 2.0130107347583492e-06,
|
||
|
|
"loss": 0.2246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22461938858032227,
|
||
|
|
"step": 3620,
|
||
|
|
"valid_targets_mean": 5651.7,
|
||
|
|
"valid_targets_min": 3026
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.102693602693603,
|
||
|
|
"grad_norm": 0.5106698343028654,
|
||
|
|
"learning_rate": 1.976461667789009e-06,
|
||
|
|
"loss": 0.2361,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2493961602449417,
|
||
|
|
"step": 3625,
|
||
|
|
"valid_targets_mean": 6109.2,
|
||
|
|
"valid_targets_min": 775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.111111111111111,
|
||
|
|
"grad_norm": 0.5052132229100402,
|
||
|
|
"learning_rate": 1.940230194319044e-06,
|
||
|
|
"loss": 0.2348,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22754473984241486,
|
||
|
|
"step": 3630,
|
||
|
|
"valid_targets_mean": 5725.6,
|
||
|
|
"valid_targets_min": 1919
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.119528619528619,
|
||
|
|
"grad_norm": 0.5159221563265757,
|
||
|
|
"learning_rate": 1.9043169527847106e-06,
|
||
|
|
"loss": 0.2367,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24825209379196167,
|
||
|
|
"step": 3635,
|
||
|
|
"valid_targets_mean": 5646.5,
|
||
|
|
"valid_targets_min": 2609
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.127946127946128,
|
||
|
|
"grad_norm": 0.5586060335280434,
|
||
|
|
"learning_rate": 1.8687225760146922e-06,
|
||
|
|
"loss": 0.2297,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22073093056678772,
|
||
|
|
"step": 3640,
|
||
|
|
"valid_targets_mean": 5455.7,
|
||
|
|
"valid_targets_min": 2205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.136363636363637,
|
||
|
|
"grad_norm": 0.5243551621539269,
|
||
|
|
"learning_rate": 1.833447691218948e-06,
|
||
|
|
"loss": 0.2196,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22795337438583374,
|
||
|
|
"step": 3645,
|
||
|
|
"valid_targets_mean": 5602.2,
|
||
|
|
"valid_targets_min": 2095
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.1447811447811445,
|
||
|
|
"grad_norm": 0.4906849438414537,
|
||
|
|
"learning_rate": 1.7984929199776457e-06,
|
||
|
|
"loss": 0.2184,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19058871269226074,
|
||
|
|
"step": 3650,
|
||
|
|
"valid_targets_mean": 5773.0,
|
||
|
|
"valid_targets_min": 2224
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.153198653198653,
|
||
|
|
"grad_norm": 0.5073904766406345,
|
||
|
|
"learning_rate": 1.7638588782302336e-06,
|
||
|
|
"loss": 0.2325,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22966495156288147,
|
||
|
|
"step": 3655,
|
||
|
|
"valid_targets_mean": 5965.8,
|
||
|
|
"valid_targets_min": 2918
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.161616161616162,
|
||
|
|
"grad_norm": 0.5236586043176525,
|
||
|
|
"learning_rate": 1.7295461762645626e-06,
|
||
|
|
"loss": 0.2259,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.207240492105484,
|
||
|
|
"step": 3660,
|
||
|
|
"valid_targets_mean": 5210.7,
|
||
|
|
"valid_targets_min": 607
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.17003367003367,
|
||
|
|
"grad_norm": 0.49544814137145954,
|
||
|
|
"learning_rate": 1.6955554187061406e-06,
|
||
|
|
"loss": 0.2226,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22393043339252472,
|
||
|
|
"step": 3665,
|
||
|
|
"valid_targets_mean": 6129.8,
|
||
|
|
"valid_targets_min": 2636
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.178451178451178,
|
||
|
|
"grad_norm": 0.5200333003420963,
|
||
|
|
"learning_rate": 1.6618872045074929e-06,
|
||
|
|
"loss": 0.2194,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2077876478433609,
|
||
|
|
"step": 3670,
|
||
|
|
"valid_targets_mean": 6336.3,
|
||
|
|
"valid_targets_min": 2918
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.186868686868687,
|
||
|
|
"grad_norm": 0.5493760889830712,
|
||
|
|
"learning_rate": 1.6285421269375823e-06,
|
||
|
|
"loss": 0.2324,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23120251297950745,
|
||
|
|
"step": 3675,
|
||
|
|
"valid_targets_mean": 5103.4,
|
||
|
|
"valid_targets_min": 886
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.1952861952861955,
|
||
|
|
"grad_norm": 0.4952648358724955,
|
||
|
|
"learning_rate": 1.595520773571364e-06,
|
||
|
|
"loss": 0.2335,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2233111560344696,
|
||
|
|
"step": 3680,
|
||
|
|
"valid_targets_mean": 6430.8,
|
||
|
|
"valid_targets_min": 4065
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.203703703703703,
|
||
|
|
"grad_norm": 0.5308289686736317,
|
||
|
|
"learning_rate": 1.5628237262794544e-06,
|
||
|
|
"loss": 0.2223,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23317524790763855,
|
||
|
|
"step": 3685,
|
||
|
|
"valid_targets_mean": 5724.6,
|
||
|
|
"valid_targets_min": 3592
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.212121212121212,
|
||
|
|
"grad_norm": 0.5204100323050561,
|
||
|
|
"learning_rate": 1.5304515612178428e-06,
|
||
|
|
"loss": 0.2188,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21899659931659698,
|
||
|
|
"step": 3690,
|
||
|
|
"valid_targets_mean": 5272.9,
|
||
|
|
"valid_targets_min": 795
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.220538720538721,
|
||
|
|
"grad_norm": 0.5395423268893877,
|
||
|
|
"learning_rate": 1.4984048488177628e-06,
|
||
|
|
"loss": 0.2262,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23501020669937134,
|
||
|
|
"step": 3695,
|
||
|
|
"valid_targets_mean": 5511.4,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.228956228956229,
|
||
|
|
"grad_norm": 0.5078500154199302,
|
||
|
|
"learning_rate": 1.466684153775635e-06,
|
||
|
|
"loss": 0.2268,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21515780687332153,
|
||
|
|
"step": 3700,
|
||
|
|
"valid_targets_mean": 5566.1,
|
||
|
|
"valid_targets_min": 3132
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.237373737373737,
|
||
|
|
"grad_norm": 0.49003697819144293,
|
||
|
|
"learning_rate": 1.435290035043111e-06,
|
||
|
|
"loss": 0.2313,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2258685827255249,
|
||
|
|
"step": 3705,
|
||
|
|
"valid_targets_mean": 6305.3,
|
||
|
|
"valid_targets_min": 4132
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.245791245791246,
|
||
|
|
"grad_norm": 0.5234990052252189,
|
||
|
|
"learning_rate": 1.4042230458172257e-06,
|
||
|
|
"loss": 0.2399,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23973727226257324,
|
||
|
|
"step": 3710,
|
||
|
|
"valid_targets_mean": 5435.1,
|
||
|
|
"valid_targets_min": 2469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.254208754208754,
|
||
|
|
"grad_norm": 0.5091769248174087,
|
||
|
|
"learning_rate": 1.373483733530665e-06,
|
||
|
|
"loss": 0.2245,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24551436305046082,
|
||
|
|
"step": 3715,
|
||
|
|
"valid_targets_mean": 6078.8,
|
||
|
|
"valid_targets_min": 4434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.262626262626263,
|
||
|
|
"grad_norm": 0.5297523589562412,
|
||
|
|
"learning_rate": 1.3430726398420846e-06,
|
||
|
|
"loss": 0.2268,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23026011884212494,
|
||
|
|
"step": 3720,
|
||
|
|
"valid_targets_mean": 5890.8,
|
||
|
|
"valid_targets_min": 2292
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.271043771043771,
|
||
|
|
"grad_norm": 0.5205027082734003,
|
||
|
|
"learning_rate": 1.3129903006266065e-06,
|
||
|
|
"loss": 0.2243,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.212215855717659,
|
||
|
|
"step": 3725,
|
||
|
|
"valid_targets_mean": 5467.5,
|
||
|
|
"valid_targets_min": 2309
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.279461279461279,
|
||
|
|
"grad_norm": 0.5248938296372679,
|
||
|
|
"learning_rate": 1.2832372459663445e-06,
|
||
|
|
"loss": 0.2257,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22421753406524658,
|
||
|
|
"step": 3730,
|
||
|
|
"valid_targets_mean": 5535.9,
|
||
|
|
"valid_targets_min": 875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.287878787878788,
|
||
|
|
"grad_norm": 0.5597486286171638,
|
||
|
|
"learning_rate": 1.2538140001410826e-06,
|
||
|
|
"loss": 0.2392,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22579503059387207,
|
||
|
|
"step": 3735,
|
||
|
|
"valid_targets_mean": 5427.9,
|
||
|
|
"valid_targets_min": 702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.296296296296296,
|
||
|
|
"grad_norm": 0.5237046035694222,
|
||
|
|
"learning_rate": 1.224721081619029e-06,
|
||
|
|
"loss": 0.2294,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24596376717090607,
|
||
|
|
"step": 3740,
|
||
|
|
"valid_targets_mean": 5579.4,
|
||
|
|
"valid_targets_min": 923
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.3047138047138045,
|
||
|
|
"grad_norm": 0.5378460293231798,
|
||
|
|
"learning_rate": 1.1959590030476798e-06,
|
||
|
|
"loss": 0.2251,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.223431795835495,
|
||
|
|
"step": 3745,
|
||
|
|
"valid_targets_mean": 5233.2,
|
||
|
|
"valid_targets_min": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.313131313131313,
|
||
|
|
"grad_norm": 0.4948826998786642,
|
||
|
|
"learning_rate": 1.1675282712447821e-06,
|
||
|
|
"loss": 0.2184,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2282189428806305,
|
||
|
|
"step": 3750,
|
||
|
|
"valid_targets_mean": 6037.9,
|
||
|
|
"valid_targets_min": 4773
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.321548821548822,
|
||
|
|
"grad_norm": 0.5783981006909941,
|
||
|
|
"learning_rate": 1.1394293871894258e-06,
|
||
|
|
"loss": 0.2351,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23504894971847534,
|
||
|
|
"step": 3755,
|
||
|
|
"valid_targets_mean": 4965.1,
|
||
|
|
"valid_targets_min": 828
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.32996632996633,
|
||
|
|
"grad_norm": 0.5040102749412789,
|
||
|
|
"learning_rate": 1.1116628460131684e-06,
|
||
|
|
"loss": 0.2229,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21298432350158691,
|
||
|
|
"step": 3760,
|
||
|
|
"valid_targets_mean": 5762.1,
|
||
|
|
"valid_targets_min": 2447
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.338383838383838,
|
||
|
|
"grad_norm": 0.4952294476096354,
|
||
|
|
"learning_rate": 1.0842291369913705e-06,
|
||
|
|
"loss": 0.2272,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22944945096969604,
|
||
|
|
"step": 3765,
|
||
|
|
"valid_targets_mean": 6218.2,
|
||
|
|
"valid_targets_min": 2281
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.346801346801347,
|
||
|
|
"grad_norm": 0.5230154051584566,
|
||
|
|
"learning_rate": 1.0571287435345256e-06,
|
||
|
|
"loss": 0.2353,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23738403618335724,
|
||
|
|
"step": 3770,
|
||
|
|
"valid_targets_mean": 5475.8,
|
||
|
|
"valid_targets_min": 2815
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.3552188552188555,
|
||
|
|
"grad_norm": 0.5584119544200802,
|
||
|
|
"learning_rate": 1.0303621431797638e-06,
|
||
|
|
"loss": 0.2179,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21592649817466736,
|
||
|
|
"step": 3775,
|
||
|
|
"valid_targets_mean": 5428.8,
|
||
|
|
"valid_targets_min": 805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.363636363636363,
|
||
|
|
"grad_norm": 0.5252472014851841,
|
||
|
|
"learning_rate": 1.0039298075824377e-06,
|
||
|
|
"loss": 0.2254,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21030861139297485,
|
||
|
|
"step": 3780,
|
||
|
|
"valid_targets_mean": 6101.9,
|
||
|
|
"valid_targets_min": 4283
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.372053872053872,
|
||
|
|
"grad_norm": 0.510018527842041,
|
||
|
|
"learning_rate": 9.778322025078025e-07,
|
||
|
|
"loss": 0.2188,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22540400922298431,
|
||
|
|
"step": 3785,
|
||
|
|
"valid_targets_mean": 6047.4,
|
||
|
|
"valid_targets_min": 3342
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.380471380471381,
|
||
|
|
"grad_norm": 0.4966811461897099,
|
||
|
|
"learning_rate": 9.520697878228136e-07,
|
||
|
|
"loss": 0.2257,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2320118099451065,
|
||
|
|
"step": 3790,
|
||
|
|
"valid_targets_mean": 5937.1,
|
||
|
|
"valid_targets_min": 2198
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.388888888888889,
|
||
|
|
"grad_norm": 0.5169653382390963,
|
||
|
|
"learning_rate": 9.266430174880314e-07,
|
||
|
|
"loss": 0.2356,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24477005004882812,
|
||
|
|
"step": 3795,
|
||
|
|
"valid_targets_mean": 5758.1,
|
||
|
|
"valid_targets_min": 895
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.397306397306397,
|
||
|
|
"grad_norm": 0.4975857094960845,
|
||
|
|
"learning_rate": 9.015523395495963e-07,
|
||
|
|
"loss": 0.223,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22504638135433197,
|
||
|
|
"step": 3800,
|
||
|
|
"valid_targets_mean": 5927.6,
|
||
|
|
"valid_targets_min": 524
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.405723905723906,
|
||
|
|
"grad_norm": 0.5210723802099997,
|
||
|
|
"learning_rate": 8.767981961313632e-07,
|
||
|
|
"loss": 0.2298,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23169521987438202,
|
||
|
|
"step": 3805,
|
||
|
|
"valid_targets_mean": 6047.5,
|
||
|
|
"valid_targets_min": 2719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.414141414141414,
|
||
|
|
"grad_norm": 0.507675266076043,
|
||
|
|
"learning_rate": 8.523810234271024e-07,
|
||
|
|
"loss": 0.2266,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21925891935825348,
|
||
|
|
"step": 3810,
|
||
|
|
"valid_targets_mean": 6002.9,
|
||
|
|
"valid_targets_min": 3674
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.422558922558922,
|
||
|
|
"grad_norm": 0.5044432912409538,
|
||
|
|
"learning_rate": 8.283012516927979e-07,
|
||
|
|
"loss": 0.2308,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2430918663740158,
|
||
|
|
"step": 3815,
|
||
|
|
"valid_targets_mean": 6220.0,
|
||
|
|
"valid_targets_min": 3282
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.430976430976431,
|
||
|
|
"grad_norm": 0.5539200125899316,
|
||
|
|
"learning_rate": 8.045593052390832e-07,
|
||
|
|
"loss": 0.2267,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22239992022514343,
|
||
|
|
"step": 3820,
|
||
|
|
"valid_targets_mean": 5531.8,
|
||
|
|
"valid_targets_min": 3670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.4393939393939394,
|
||
|
|
"grad_norm": 0.5212600886414331,
|
||
|
|
"learning_rate": 7.811556024237644e-07,
|
||
|
|
"loss": 0.2254,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23301962018013,
|
||
|
|
"step": 3825,
|
||
|
|
"valid_targets_mean": 6046.6,
|
||
|
|
"valid_targets_min": 2085
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.447811447811448,
|
||
|
|
"grad_norm": 0.543517601490649,
|
||
|
|
"learning_rate": 7.580905556444351e-07,
|
||
|
|
"loss": 0.2244,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2337494194507599,
|
||
|
|
"step": 3830,
|
||
|
|
"valid_targets_mean": 5565.8,
|
||
|
|
"valid_targets_min": 3133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.456228956228956,
|
||
|
|
"grad_norm": 0.5789718366274003,
|
||
|
|
"learning_rate": 7.353645713312164e-07,
|
||
|
|
"loss": 0.2225,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22291827201843262,
|
||
|
|
"step": 3835,
|
||
|
|
"valid_targets_mean": 5005.3,
|
||
|
|
"valid_targets_min": 548
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.4646464646464645,
|
||
|
|
"grad_norm": 0.5259856372538945,
|
||
|
|
"learning_rate": 7.12978049939601e-07,
|
||
|
|
"loss": 0.2296,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21570321917533875,
|
||
|
|
"step": 3840,
|
||
|
|
"valid_targets_mean": 5165.9,
|
||
|
|
"valid_targets_min": 2304
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.473063973063973,
|
||
|
|
"grad_norm": 0.5136883078710031,
|
||
|
|
"learning_rate": 6.909313859433874e-07,
|
||
|
|
"loss": 0.232,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2724156975746155,
|
||
|
|
"step": 3845,
|
||
|
|
"valid_targets_mean": 6629.4,
|
||
|
|
"valid_targets_min": 823
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.481481481481482,
|
||
|
|
"grad_norm": 0.4915282768697449,
|
||
|
|
"learning_rate": 6.692249678277373e-07,
|
||
|
|
"loss": 0.2313,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21886561810970306,
|
||
|
|
"step": 3850,
|
||
|
|
"valid_targets_mean": 5787.4,
|
||
|
|
"valid_targets_min": 3854
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.48989898989899,
|
||
|
|
"grad_norm": 0.5101943668044208,
|
||
|
|
"learning_rate": 6.478591780823262e-07,
|
||
|
|
"loss": 0.2305,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21968825161457062,
|
||
|
|
"step": 3855,
|
||
|
|
"valid_targets_mean": 6163.6,
|
||
|
|
"valid_targets_min": 3495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.498316498316498,
|
||
|
|
"grad_norm": 0.5021067934809788,
|
||
|
|
"learning_rate": 6.268343931945952e-07,
|
||
|
|
"loss": 0.218,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22854362428188324,
|
||
|
|
"step": 3860,
|
||
|
|
"valid_targets_mean": 5995.1,
|
||
|
|
"valid_targets_min": 3142
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.506734006734007,
|
||
|
|
"grad_norm": 0.557332449113888,
|
||
|
|
"learning_rate": 6.061509836431367e-07,
|
||
|
|
"loss": 0.2152,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21220096945762634,
|
||
|
|
"step": 3865,
|
||
|
|
"valid_targets_mean": 5461.5,
|
||
|
|
"valid_targets_min": 3963
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.515151515151516,
|
||
|
|
"grad_norm": 0.5186256456829461,
|
||
|
|
"learning_rate": 5.85809313891148e-07,
|
||
|
|
"loss": 0.2151,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20755138993263245,
|
||
|
|
"step": 3870,
|
||
|
|
"valid_targets_mean": 5407.1,
|
||
|
|
"valid_targets_min": 1868
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.523569023569023,
|
||
|
|
"grad_norm": 0.49245121143841225,
|
||
|
|
"learning_rate": 5.658097423800124e-07,
|
||
|
|
"loss": 0.2256,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24901744723320007,
|
||
|
|
"step": 3875,
|
||
|
|
"valid_targets_mean": 6360.4,
|
||
|
|
"valid_targets_min": 4581
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.531986531986532,
|
||
|
|
"grad_norm": 0.5838948599522868,
|
||
|
|
"learning_rate": 5.461526215229929e-07,
|
||
|
|
"loss": 0.2333,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2362005114555359,
|
||
|
|
"step": 3880,
|
||
|
|
"valid_targets_mean": 5119.0,
|
||
|
|
"valid_targets_min": 628
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.540404040404041,
|
||
|
|
"grad_norm": 0.4873484403671387,
|
||
|
|
"learning_rate": 5.268382976990083e-07,
|
||
|
|
"loss": 0.2325,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24804040789604187,
|
||
|
|
"step": 3885,
|
||
|
|
"valid_targets_mean": 6448.1,
|
||
|
|
"valid_targets_min": 3836
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.548821548821548,
|
||
|
|
"grad_norm": 0.47878246569387206,
|
||
|
|
"learning_rate": 5.078671112465472e-07,
|
||
|
|
"loss": 0.2364,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2332177460193634,
|
||
|
|
"step": 3890,
|
||
|
|
"valid_targets_mean": 6442.3,
|
||
|
|
"valid_targets_min": 4637
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.557239057239057,
|
||
|
|
"grad_norm": 0.5247985422667938,
|
||
|
|
"learning_rate": 4.892393964576547e-07,
|
||
|
|
"loss": 0.2326,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22282634675502777,
|
||
|
|
"step": 3895,
|
||
|
|
"valid_targets_mean": 5424.9,
|
||
|
|
"valid_targets_min": 669
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.565656565656566,
|
||
|
|
"grad_norm": 0.5485608251182884,
|
||
|
|
"learning_rate": 4.7095548157204184e-07,
|
||
|
|
"loss": 0.224,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22252830862998962,
|
||
|
|
"step": 3900,
|
||
|
|
"valid_targets_mean": 5017.9,
|
||
|
|
"valid_targets_min": 771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.574074074074074,
|
||
|
|
"grad_norm": 0.5514701095369009,
|
||
|
|
"learning_rate": 4.5301568877132106e-07,
|
||
|
|
"loss": 0.2155,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1949262022972107,
|
||
|
|
"step": 3905,
|
||
|
|
"valid_targets_mean": 6186.4,
|
||
|
|
"valid_targets_min": 2158
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.582491582491582,
|
||
|
|
"grad_norm": 0.49508090440885005,
|
||
|
|
"learning_rate": 4.3542033417330433e-07,
|
||
|
|
"loss": 0.2189,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22400163114070892,
|
||
|
|
"step": 3910,
|
||
|
|
"valid_targets_mean": 5883.4,
|
||
|
|
"valid_targets_min": 3182
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.590909090909091,
|
||
|
|
"grad_norm": 0.5226134266773029,
|
||
|
|
"learning_rate": 4.181697278264496e-07,
|
||
|
|
"loss": 0.2253,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22463016211986542,
|
||
|
|
"step": 3915,
|
||
|
|
"valid_targets_mean": 5738.9,
|
||
|
|
"valid_targets_min": 4297
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5993265993265995,
|
||
|
|
"grad_norm": 0.46667116305509626,
|
||
|
|
"learning_rate": 4.012641737043899e-07,
|
||
|
|
"loss": 0.2297,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22830307483673096,
|
||
|
|
"step": 3920,
|
||
|
|
"valid_targets_mean": 6400.0,
|
||
|
|
"valid_targets_min": 2262
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.607744107744107,
|
||
|
|
"grad_norm": 0.5006237626497043,
|
||
|
|
"learning_rate": 3.8470396970057946e-07,
|
||
|
|
"loss": 0.2275,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2222435474395752,
|
||
|
|
"step": 3925,
|
||
|
|
"valid_targets_mean": 5903.4,
|
||
|
|
"valid_targets_min": 4945
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.616161616161616,
|
||
|
|
"grad_norm": 0.5174878932627028,
|
||
|
|
"learning_rate": 3.6848940762304277e-07,
|
||
|
|
"loss": 0.2292,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23240050673484802,
|
||
|
|
"step": 3930,
|
||
|
|
"valid_targets_mean": 5703.8,
|
||
|
|
"valid_targets_min": 1984
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.624579124579125,
|
||
|
|
"grad_norm": 0.5198759175442372,
|
||
|
|
"learning_rate": 3.5262077318923615e-07,
|
||
|
|
"loss": 0.2296,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2365482747554779,
|
||
|
|
"step": 3935,
|
||
|
|
"valid_targets_mean": 6016.2,
|
||
|
|
"valid_targets_min": 4563
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.632996632996633,
|
||
|
|
"grad_norm": 0.5115968930294122,
|
||
|
|
"learning_rate": 3.37098346021012e-07,
|
||
|
|
"loss": 0.2262,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22688379883766174,
|
||
|
|
"step": 3940,
|
||
|
|
"valid_targets_mean": 5793.9,
|
||
|
|
"valid_targets_min": 4283
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.641414141414142,
|
||
|
|
"grad_norm": 0.5152584191758609,
|
||
|
|
"learning_rate": 3.2192239963968477e-07,
|
||
|
|
"loss": 0.225,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22982186079025269,
|
||
|
|
"step": 3945,
|
||
|
|
"valid_targets_mean": 5751.3,
|
||
|
|
"valid_targets_min": 1486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.64983164983165,
|
||
|
|
"grad_norm": 0.4903080196046145,
|
||
|
|
"learning_rate": 3.07093201461226e-07,
|
||
|
|
"loss": 0.2293,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2335420846939087,
|
||
|
|
"step": 3950,
|
||
|
|
"valid_targets_mean": 6418.2,
|
||
|
|
"valid_targets_min": 3667
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.658249158249158,
|
||
|
|
"grad_norm": 0.5411356489778124,
|
||
|
|
"learning_rate": 2.926110127915327e-07,
|
||
|
|
"loss": 0.2241,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22956421971321106,
|
||
|
|
"step": 3955,
|
||
|
|
"valid_targets_mean": 5280.3,
|
||
|
|
"valid_targets_min": 580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.666666666666667,
|
||
|
|
"grad_norm": 0.48921399133046517,
|
||
|
|
"learning_rate": 2.7847608882184186e-07,
|
||
|
|
"loss": 0.2168,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21850576996803284,
|
||
|
|
"step": 3960,
|
||
|
|
"valid_targets_mean": 6326.2,
|
||
|
|
"valid_targets_min": 4168
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.675084175084175,
|
||
|
|
"grad_norm": 0.5082833393301474,
|
||
|
|
"learning_rate": 2.6468867862421865e-07,
|
||
|
|
"loss": 0.2276,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.228357195854187,
|
||
|
|
"step": 3965,
|
||
|
|
"valid_targets_mean": 5868.0,
|
||
|
|
"valid_targets_min": 1900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.683501683501683,
|
||
|
|
"grad_norm": 0.48858787085087396,
|
||
|
|
"learning_rate": 2.5124902514717773e-07,
|
||
|
|
"loss": 0.2263,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22642362117767334,
|
||
|
|
"step": 3970,
|
||
|
|
"valid_targets_mean": 6190.9,
|
||
|
|
"valid_targets_min": 4478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.691919191919192,
|
||
|
|
"grad_norm": 0.48981711224027497,
|
||
|
|
"learning_rate": 2.3815736521139775e-07,
|
||
|
|
"loss": 0.2237,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22989197075366974,
|
||
|
|
"step": 3975,
|
||
|
|
"valid_targets_mean": 6458.6,
|
||
|
|
"valid_targets_min": 3865
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.700336700336701,
|
||
|
|
"grad_norm": 0.5396742819020192,
|
||
|
|
"learning_rate": 2.254139295055513e-07,
|
||
|
|
"loss": 0.2253,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21652349829673767,
|
||
|
|
"step": 3980,
|
||
|
|
"valid_targets_mean": 5217.4,
|
||
|
|
"valid_targets_min": 1809
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.7087542087542085,
|
||
|
|
"grad_norm": 0.538771879253705,
|
||
|
|
"learning_rate": 2.1301894258223045e-07,
|
||
|
|
"loss": 0.2444,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2519531846046448,
|
||
|
|
"step": 3985,
|
||
|
|
"valid_targets_mean": 5681.4,
|
||
|
|
"valid_targets_min": 2085
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.717171717171717,
|
||
|
|
"grad_norm": 0.5053903866002492,
|
||
|
|
"learning_rate": 2.0097262285400764e-07,
|
||
|
|
"loss": 0.2285,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21983280777931213,
|
||
|
|
"step": 3990,
|
||
|
|
"valid_targets_mean": 5716.7,
|
||
|
|
"valid_targets_min": 2388
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.725589225589226,
|
||
|
|
"grad_norm": 0.5128327242924421,
|
||
|
|
"learning_rate": 1.8927518258957e-07,
|
||
|
|
"loss": 0.2269,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20815545320510864,
|
||
|
|
"step": 3995,
|
||
|
|
"valid_targets_mean": 5685.9,
|
||
|
|
"valid_targets_min": 882
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.7340067340067336,
|
||
|
|
"grad_norm": 0.5155864351532222,
|
||
|
|
"learning_rate": 1.7792682790998217e-07,
|
||
|
|
"loss": 0.2449,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25162017345428467,
|
||
|
|
"step": 4000,
|
||
|
|
"valid_targets_mean": 6619.4,
|
||
|
|
"valid_targets_min": 2331
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.742424242424242,
|
||
|
|
"grad_norm": 0.5572123489167949,
|
||
|
|
"learning_rate": 1.6692775878506705e-07,
|
||
|
|
"loss": 0.2375,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23857121169567108,
|
||
|
|
"step": 4005,
|
||
|
|
"valid_targets_mean": 5601.3,
|
||
|
|
"valid_targets_min": 2042
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.750841750841751,
|
||
|
|
"grad_norm": 0.5386724169939262,
|
||
|
|
"learning_rate": 1.5627816902986204e-07,
|
||
|
|
"loss": 0.2322,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2514593005180359,
|
||
|
|
"step": 4010,
|
||
|
|
"valid_targets_mean": 5908.2,
|
||
|
|
"valid_targets_min": 3662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.7592592592592595,
|
||
|
|
"grad_norm": 0.5142136892512724,
|
||
|
|
"learning_rate": 1.4597824630122604e-07,
|
||
|
|
"loss": 0.2217,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22154447436332703,
|
||
|
|
"step": 4015,
|
||
|
|
"valid_targets_mean": 5728.4,
|
||
|
|
"valid_targets_min": 3307
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.767676767676767,
|
||
|
|
"grad_norm": 0.505999257157595,
|
||
|
|
"learning_rate": 1.360281720945089e-07,
|
||
|
|
"loss": 0.2331,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22565945982933044,
|
||
|
|
"step": 4020,
|
||
|
|
"valid_targets_mean": 6077.9,
|
||
|
|
"valid_targets_min": 3131
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.776094276094276,
|
||
|
|
"grad_norm": 0.5091773084597425,
|
||
|
|
"learning_rate": 1.2642812174037621e-07,
|
||
|
|
"loss": 0.2252,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24219831824302673,
|
||
|
|
"step": 4025,
|
||
|
|
"valid_targets_mean": 5903.4,
|
||
|
|
"valid_targets_min": 713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.784511784511785,
|
||
|
|
"grad_norm": 0.469852641795815,
|
||
|
|
"learning_rate": 1.1717826440170054e-07,
|
||
|
|
"loss": 0.2231,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21723823249340057,
|
||
|
|
"step": 4030,
|
||
|
|
"valid_targets_mean": 6776.9,
|
||
|
|
"valid_targets_min": 5320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.792929292929293,
|
||
|
|
"grad_norm": 0.4814380129058402,
|
||
|
|
"learning_rate": 1.0827876307059503e-07,
|
||
|
|
"loss": 0.224,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22944395244121552,
|
||
|
|
"step": 4035,
|
||
|
|
"valid_targets_mean": 6529.6,
|
||
|
|
"valid_targets_min": 4226
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.801346801346801,
|
||
|
|
"grad_norm": 0.5218042402047304,
|
||
|
|
"learning_rate": 9.972977456553345e-08,
|
||
|
|
"loss": 0.2304,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2270907461643219,
|
||
|
|
"step": 4040,
|
||
|
|
"valid_targets_mean": 5496.2,
|
||
|
|
"valid_targets_min": 2157
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.80976430976431,
|
||
|
|
"grad_norm": 0.5691017326768867,
|
||
|
|
"learning_rate": 9.153144952859017e-08,
|
||
|
|
"loss": 0.2246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22278842329978943,
|
||
|
|
"step": 4045,
|
||
|
|
"valid_targets_mean": 5140.4,
|
||
|
|
"valid_targets_min": 767
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.818181818181818,
|
||
|
|
"grad_norm": 0.5499421984771002,
|
||
|
|
"learning_rate": 8.368393242277784e-08,
|
||
|
|
"loss": 0.2336,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2250651717185974,
|
||
|
|
"step": 4050,
|
||
|
|
"valid_targets_mean": 5465.9,
|
||
|
|
"valid_targets_min": 791
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.826599326599327,
|
||
|
|
"grad_norm": 0.5378879282700434,
|
||
|
|
"learning_rate": 7.618736152951611e-08,
|
||
|
|
"loss": 0.2326,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.253796249628067,
|
||
|
|
"step": 4055,
|
||
|
|
"valid_targets_mean": 5403.1,
|
||
|
|
"valid_targets_min": 1986
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.835016835016835,
|
||
|
|
"grad_norm": 0.5589319216560491,
|
||
|
|
"learning_rate": 6.904186894618692e-08,
|
||
|
|
"loss": 0.2263,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2060225009918213,
|
||
|
|
"step": 4060,
|
||
|
|
"valid_targets_mean": 6291.4,
|
||
|
|
"valid_targets_min": 3416
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.843434343434343,
|
||
|
|
"grad_norm": 0.5563582211872528,
|
||
|
|
"learning_rate": 6.224758058380298e-08,
|
||
|
|
"loss": 0.2288,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23780454695224762,
|
||
|
|
"step": 4065,
|
||
|
|
"valid_targets_mean": 5445.9,
|
||
|
|
"valid_targets_min": 1482
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.851851851851852,
|
||
|
|
"grad_norm": 0.49591666289659025,
|
||
|
|
"learning_rate": 5.580461616479183e-08,
|
||
|
|
"loss": 0.2183,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2293078452348709,
|
||
|
|
"step": 4070,
|
||
|
|
"valid_targets_mean": 5957.1,
|
||
|
|
"valid_targets_min": 4006
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.86026936026936,
|
||
|
|
"grad_norm": 0.5195416954225124,
|
||
|
|
"learning_rate": 4.971308922089746e-08,
|
||
|
|
"loss": 0.2267,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21978473663330078,
|
||
|
|
"step": 4075,
|
||
|
|
"valid_targets_mean": 5605.7,
|
||
|
|
"valid_targets_min": 2270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.8686868686868685,
|
||
|
|
"grad_norm": 0.6230757006742872,
|
||
|
|
"learning_rate": 4.397310709115532e-08,
|
||
|
|
"loss": 0.2219,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21849480271339417,
|
||
|
|
"step": 4080,
|
||
|
|
"valid_targets_mean": 4612.2,
|
||
|
|
"valid_targets_min": 804
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.877104377104377,
|
||
|
|
"grad_norm": 0.5136176458728036,
|
||
|
|
"learning_rate": 3.85847709200271e-08,
|
||
|
|
"loss": 0.232,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23595461249351501,
|
||
|
|
"step": 4085,
|
||
|
|
"valid_targets_mean": 5972.6,
|
||
|
|
"valid_targets_min": 5081
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.885521885521886,
|
||
|
|
"grad_norm": 0.48471813785882656,
|
||
|
|
"learning_rate": 3.35481756556022e-08,
|
||
|
|
"loss": 0.2263,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23174874484539032,
|
||
|
|
"step": 4090,
|
||
|
|
"valid_targets_mean": 5933.6,
|
||
|
|
"valid_targets_min": 1004
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.893939393939394,
|
||
|
|
"grad_norm": 0.5181480546027055,
|
||
|
|
"learning_rate": 2.886341004793014e-08,
|
||
|
|
"loss": 0.227,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22367450594902039,
|
||
|
|
"step": 4095,
|
||
|
|
"valid_targets_mean": 5459.8,
|
||
|
|
"valid_targets_min": 3090
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.902356902356902,
|
||
|
|
"grad_norm": 0.5259358606796519,
|
||
|
|
"learning_rate": 2.4530556647457404e-08,
|
||
|
|
"loss": 0.2296,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2420312762260437,
|
||
|
|
"step": 4100,
|
||
|
|
"valid_targets_mean": 5836.4,
|
||
|
|
"valid_targets_min": 2388
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.910774410774411,
|
||
|
|
"grad_norm": 0.5216774951720721,
|
||
|
|
"learning_rate": 2.0549691803566362e-08,
|
||
|
|
"loss": 0.2345,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24406731128692627,
|
||
|
|
"step": 4105,
|
||
|
|
"valid_targets_mean": 5652.4,
|
||
|
|
"valid_targets_min": 963
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.91919191919192,
|
||
|
|
"grad_norm": 0.5064354053022379,
|
||
|
|
"learning_rate": 1.6920885663238573e-08,
|
||
|
|
"loss": 0.221,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2080550193786621,
|
||
|
|
"step": 4110,
|
||
|
|
"valid_targets_mean": 5772.5,
|
||
|
|
"valid_targets_min": 3893
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.927609427609427,
|
||
|
|
"grad_norm": 0.48979470818174814,
|
||
|
|
"learning_rate": 1.3644202169813547e-08,
|
||
|
|
"loss": 0.2353,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2382018268108368,
|
||
|
|
"step": 4115,
|
||
|
|
"valid_targets_mean": 6401.1,
|
||
|
|
"valid_targets_min": 2133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.936026936026936,
|
||
|
|
"grad_norm": 0.5657014297746613,
|
||
|
|
"learning_rate": 1.0719699061865208e-08,
|
||
|
|
"loss": 0.2202,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2425881028175354,
|
||
|
|
"step": 4120,
|
||
|
|
"valid_targets_mean": 5984.1,
|
||
|
|
"valid_targets_min": 4215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.944444444444445,
|
||
|
|
"grad_norm": 0.49423121741118176,
|
||
|
|
"learning_rate": 8.147427872180481e-09,
|
||
|
|
"loss": 0.2315,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20437312126159668,
|
||
|
|
"step": 4125,
|
||
|
|
"valid_targets_mean": 5447.6,
|
||
|
|
"valid_targets_min": 728
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.952861952861953,
|
||
|
|
"grad_norm": 0.5045817158250394,
|
||
|
|
"learning_rate": 5.927433926855575e-09,
|
||
|
|
"loss": 0.2344,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24840131402015686,
|
||
|
|
"step": 4130,
|
||
|
|
"valid_targets_mean": 6229.9,
|
||
|
|
"valid_targets_min": 5249
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.961279461279461,
|
||
|
|
"grad_norm": 0.5657405963065263,
|
||
|
|
"learning_rate": 4.059756344492183e-09,
|
||
|
|
"loss": 0.2236,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.222096785902977,
|
||
|
|
"step": 4135,
|
||
|
|
"valid_targets_mean": 5216.6,
|
||
|
|
"valid_targets_min": 1782
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.96969696969697,
|
||
|
|
"grad_norm": 0.5185358892889281,
|
||
|
|
"learning_rate": 2.5444280355135796e-09,
|
||
|
|
"loss": 0.2234,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22283129394054413,
|
||
|
|
"step": 4140,
|
||
|
|
"valid_targets_mean": 6054.6,
|
||
|
|
"valid_targets_min": 2148
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.978114478114478,
|
||
|
|
"grad_norm": 0.6277226876991597,
|
||
|
|
"learning_rate": 1.381475701580648e-09,
|
||
|
|
"loss": 0.2137,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2108631134033203,
|
||
|
|
"step": 4145,
|
||
|
|
"valid_targets_mean": 5032.8,
|
||
|
|
"valid_targets_min": 896
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.986531986531986,
|
||
|
|
"grad_norm": 0.5184384890847686,
|
||
|
|
"learning_rate": 5.709198351233624e-10,
|
||
|
|
"loss": 0.2301,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2267255336046219,
|
||
|
|
"step": 4150,
|
||
|
|
"valid_targets_mean": 5675.1,
|
||
|
|
"valid_targets_min": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.994949494949495,
|
||
|
|
"grad_norm": 0.5444858373108672,
|
||
|
|
"learning_rate": 1.1277471898107905e-10,
|
||
|
|
"loss": 0.2356,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21790213882923126,
|
||
|
|
"step": 4155,
|
||
|
|
"valid_targets_mean": 5063.0,
|
||
|
|
"valid_targets_min": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.0,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23528757691383362,
|
||
|
|
"step": 4158,
|
||
|
|
"total_flos": 2225784336678912.0,
|
||
|
|
"train_loss": 0.1554051661732221,
|
||
|
|
"train_runtime": 19007.0278,
|
||
|
|
"train_samples_per_second": 3.495,
|
||
|
|
"train_steps_per_second": 0.219,
|
||
|
|
"valid_targets_mean": 5790.4,
|
||
|
|
"valid_targets_min": 2026
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 4158,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 7,
|
||
|
|
"save_steps": 1500,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 2225784336678912.0,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|