Model: laion/exp-syh-r2egym-swesmith-mixed_glm_4_7_traces_jupiter Source: Original Platform
7957 lines
221 KiB
JSON
7957 lines
221 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 3598,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.009737098344693282,
|
|
"grad_norm": 23.118894280668776,
|
|
"learning_rate": 4.444444444444445e-07,
|
|
"loss": 0.8845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44496992230415344,
|
|
"step": 5,
|
|
"valid_targets_mean": 4557.9,
|
|
"valid_targets_min": 3428
|
|
},
|
|
{
|
|
"epoch": 0.019474196689386564,
|
|
"grad_norm": 18.20757555459068,
|
|
"learning_rate": 1.0000000000000002e-06,
|
|
"loss": 0.8703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40375640988349915,
|
|
"step": 10,
|
|
"valid_targets_mean": 4226.9,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 0.029211295034079845,
|
|
"grad_norm": 17.163536853020847,
|
|
"learning_rate": 1.5555555555555558e-06,
|
|
"loss": 0.862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4125989079475403,
|
|
"step": 15,
|
|
"valid_targets_mean": 4368.9,
|
|
"valid_targets_min": 3337
|
|
},
|
|
{
|
|
"epoch": 0.03894839337877313,
|
|
"grad_norm": 10.759049970128293,
|
|
"learning_rate": 2.1111111111111114e-06,
|
|
"loss": 0.793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3467726707458496,
|
|
"step": 20,
|
|
"valid_targets_mean": 3963.6,
|
|
"valid_targets_min": 3066
|
|
},
|
|
{
|
|
"epoch": 0.04868549172346641,
|
|
"grad_norm": 5.752975899301949,
|
|
"learning_rate": 2.666666666666667e-06,
|
|
"loss": 0.7362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35217881202697754,
|
|
"step": 25,
|
|
"valid_targets_mean": 4167.4,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 0.05842259006815969,
|
|
"grad_norm": 3.181712614596086,
|
|
"learning_rate": 3.2222222222222227e-06,
|
|
"loss": 0.7161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31451717019081116,
|
|
"step": 30,
|
|
"valid_targets_mean": 3717.9,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 0.06815968841285297,
|
|
"grad_norm": 2.104356488152819,
|
|
"learning_rate": 3.777777777777778e-06,
|
|
"loss": 0.663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2986396849155426,
|
|
"step": 35,
|
|
"valid_targets_mean": 4273.5,
|
|
"valid_targets_min": 3190
|
|
},
|
|
{
|
|
"epoch": 0.07789678675754626,
|
|
"grad_norm": 1.747451505893338,
|
|
"learning_rate": 4.333333333333334e-06,
|
|
"loss": 0.6479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36746519804000854,
|
|
"step": 40,
|
|
"valid_targets_mean": 4585.8,
|
|
"valid_targets_min": 3646
|
|
},
|
|
{
|
|
"epoch": 0.08763388510223953,
|
|
"grad_norm": 1.4018106341346264,
|
|
"learning_rate": 4.888888888888889e-06,
|
|
"loss": 0.6132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2811136245727539,
|
|
"step": 45,
|
|
"valid_targets_mean": 4117.2,
|
|
"valid_targets_min": 3317
|
|
},
|
|
{
|
|
"epoch": 0.09737098344693282,
|
|
"grad_norm": 0.9532297822853748,
|
|
"learning_rate": 5.444444444444445e-06,
|
|
"loss": 0.575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2593851387500763,
|
|
"step": 50,
|
|
"valid_targets_mean": 5093.6,
|
|
"valid_targets_min": 2856
|
|
},
|
|
{
|
|
"epoch": 0.10710808179162609,
|
|
"grad_norm": 0.9254315700338628,
|
|
"learning_rate": 6e-06,
|
|
"loss": 0.5861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2671780586242676,
|
|
"step": 55,
|
|
"valid_targets_mean": 4203.6,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 0.11684518013631938,
|
|
"grad_norm": 0.9230508754491137,
|
|
"learning_rate": 6.555555555555556e-06,
|
|
"loss": 0.5542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3054065704345703,
|
|
"step": 60,
|
|
"valid_targets_mean": 3866.5,
|
|
"valid_targets_min": 2940
|
|
},
|
|
{
|
|
"epoch": 0.12658227848101267,
|
|
"grad_norm": 0.7785329416348905,
|
|
"learning_rate": 7.111111111111112e-06,
|
|
"loss": 0.5499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23154807090759277,
|
|
"step": 65,
|
|
"valid_targets_mean": 4170.0,
|
|
"valid_targets_min": 3412
|
|
},
|
|
{
|
|
"epoch": 0.13631937682570594,
|
|
"grad_norm": 0.8291010793663051,
|
|
"learning_rate": 7.666666666666667e-06,
|
|
"loss": 0.5404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2817365527153015,
|
|
"step": 70,
|
|
"valid_targets_mean": 4144.8,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 0.1460564751703992,
|
|
"grad_norm": 0.6498603596886385,
|
|
"learning_rate": 8.222222222222222e-06,
|
|
"loss": 0.5136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2053757905960083,
|
|
"step": 75,
|
|
"valid_targets_mean": 4598.1,
|
|
"valid_targets_min": 3318
|
|
},
|
|
{
|
|
"epoch": 0.15579357351509251,
|
|
"grad_norm": 0.7555006740365169,
|
|
"learning_rate": 8.777777777777778e-06,
|
|
"loss": 0.515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26266223192214966,
|
|
"step": 80,
|
|
"valid_targets_mean": 3878.2,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 0.1655306718597858,
|
|
"grad_norm": 2.0689702978780855,
|
|
"learning_rate": 9.333333333333334e-06,
|
|
"loss": 0.4948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2561669945716858,
|
|
"step": 85,
|
|
"valid_targets_mean": 4380.8,
|
|
"valid_targets_min": 3373
|
|
},
|
|
{
|
|
"epoch": 0.17526777020447906,
|
|
"grad_norm": 0.6915310477602487,
|
|
"learning_rate": 9.88888888888889e-06,
|
|
"loss": 0.4966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24356107413768768,
|
|
"step": 90,
|
|
"valid_targets_mean": 4549.0,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 0.18500486854917234,
|
|
"grad_norm": 0.6982060079865288,
|
|
"learning_rate": 1.0444444444444445e-05,
|
|
"loss": 0.5124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2629743814468384,
|
|
"step": 95,
|
|
"valid_targets_mean": 3903.0,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 0.19474196689386564,
|
|
"grad_norm": 0.6567694540972767,
|
|
"learning_rate": 1.1000000000000001e-05,
|
|
"loss": 0.4656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2269618809223175,
|
|
"step": 100,
|
|
"valid_targets_mean": 4326.8,
|
|
"valid_targets_min": 3638
|
|
},
|
|
{
|
|
"epoch": 0.2044790652385589,
|
|
"grad_norm": 0.6764309558951358,
|
|
"learning_rate": 1.1555555555555556e-05,
|
|
"loss": 0.4742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24546125531196594,
|
|
"step": 105,
|
|
"valid_targets_mean": 4178.0,
|
|
"valid_targets_min": 3704
|
|
},
|
|
{
|
|
"epoch": 0.21421616358325218,
|
|
"grad_norm": 0.6657658572559553,
|
|
"learning_rate": 1.211111111111111e-05,
|
|
"loss": 0.4607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22610408067703247,
|
|
"step": 110,
|
|
"valid_targets_mean": 5350.2,
|
|
"valid_targets_min": 3504
|
|
},
|
|
{
|
|
"epoch": 0.22395326192794549,
|
|
"grad_norm": 0.6812819460216482,
|
|
"learning_rate": 1.2666666666666667e-05,
|
|
"loss": 0.4476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21488001942634583,
|
|
"step": 115,
|
|
"valid_targets_mean": 4650.6,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 0.23369036027263876,
|
|
"grad_norm": 0.6719297028546384,
|
|
"learning_rate": 1.3222222222222223e-05,
|
|
"loss": 0.4579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2021813690662384,
|
|
"step": 120,
|
|
"valid_targets_mean": 3420.8,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 0.24342745861733203,
|
|
"grad_norm": 0.7134508385172472,
|
|
"learning_rate": 1.377777777777778e-05,
|
|
"loss": 0.4624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22716659307479858,
|
|
"step": 125,
|
|
"valid_targets_mean": 4303.2,
|
|
"valid_targets_min": 3452
|
|
},
|
|
{
|
|
"epoch": 0.25316455696202533,
|
|
"grad_norm": 0.6539962297564527,
|
|
"learning_rate": 1.4333333333333334e-05,
|
|
"loss": 0.4488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22620850801467896,
|
|
"step": 130,
|
|
"valid_targets_mean": 5172.8,
|
|
"valid_targets_min": 3925
|
|
},
|
|
{
|
|
"epoch": 0.2629016553067186,
|
|
"grad_norm": 0.6692439195316646,
|
|
"learning_rate": 1.488888888888889e-05,
|
|
"loss": 0.4374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21539512276649475,
|
|
"step": 135,
|
|
"valid_targets_mean": 4375.2,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 0.2726387536514119,
|
|
"grad_norm": 0.6105804659736762,
|
|
"learning_rate": 1.5444444444444446e-05,
|
|
"loss": 0.4635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24036164581775665,
|
|
"step": 140,
|
|
"valid_targets_mean": 5055.6,
|
|
"valid_targets_min": 3058
|
|
},
|
|
{
|
|
"epoch": 0.28237585199610515,
|
|
"grad_norm": 0.736115990989608,
|
|
"learning_rate": 1.6000000000000003e-05,
|
|
"loss": 0.4285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16464482247829437,
|
|
"step": 145,
|
|
"valid_targets_mean": 3126.5,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 0.2921129503407984,
|
|
"grad_norm": 1.3117904270902987,
|
|
"learning_rate": 1.6555555555555556e-05,
|
|
"loss": 0.4317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1798105686903,
|
|
"step": 150,
|
|
"valid_targets_mean": 4147.5,
|
|
"valid_targets_min": 3211
|
|
},
|
|
{
|
|
"epoch": 0.3018500486854917,
|
|
"grad_norm": 0.8818126643515471,
|
|
"learning_rate": 1.7111111111111112e-05,
|
|
"loss": 0.4322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24699127674102783,
|
|
"step": 155,
|
|
"valid_targets_mean": 3863.1,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 0.31158714703018503,
|
|
"grad_norm": 0.6558286204768558,
|
|
"learning_rate": 1.7666666666666668e-05,
|
|
"loss": 0.4338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17653688788414001,
|
|
"step": 160,
|
|
"valid_targets_mean": 3518.4,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 0.3213242453748783,
|
|
"grad_norm": 0.6672104486072807,
|
|
"learning_rate": 1.8222222222222224e-05,
|
|
"loss": 0.4299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18814575672149658,
|
|
"step": 165,
|
|
"valid_targets_mean": 3876.2,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 0.3310613437195716,
|
|
"grad_norm": 0.6416782890906239,
|
|
"learning_rate": 1.877777777777778e-05,
|
|
"loss": 0.4149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16728775203227997,
|
|
"step": 170,
|
|
"valid_targets_mean": 3383.9,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 0.34079844206426485,
|
|
"grad_norm": 0.7239962544924382,
|
|
"learning_rate": 1.9333333333333333e-05,
|
|
"loss": 0.418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19848763942718506,
|
|
"step": 175,
|
|
"valid_targets_mean": 4528.2,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 0.3505355404089581,
|
|
"grad_norm": 0.7745664978311559,
|
|
"learning_rate": 1.988888888888889e-05,
|
|
"loss": 0.4246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2215413749217987,
|
|
"step": 180,
|
|
"valid_targets_mean": 3834.4,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 0.3602726387536514,
|
|
"grad_norm": 0.6749053142831414,
|
|
"learning_rate": 2.0444444444444446e-05,
|
|
"loss": 0.4128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18815171718597412,
|
|
"step": 185,
|
|
"valid_targets_mean": 4859.8,
|
|
"valid_targets_min": 3753
|
|
},
|
|
{
|
|
"epoch": 0.37000973709834467,
|
|
"grad_norm": 0.8176016579754839,
|
|
"learning_rate": 2.1000000000000002e-05,
|
|
"loss": 0.4137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20829084515571594,
|
|
"step": 190,
|
|
"valid_targets_mean": 4058.4,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 0.379746835443038,
|
|
"grad_norm": 0.6584754889654173,
|
|
"learning_rate": 2.1555555555555555e-05,
|
|
"loss": 0.4067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1744072288274765,
|
|
"step": 195,
|
|
"valid_targets_mean": 4416.4,
|
|
"valid_targets_min": 3252
|
|
},
|
|
{
|
|
"epoch": 0.3894839337877313,
|
|
"grad_norm": 0.6767016018050401,
|
|
"learning_rate": 2.2111111111111115e-05,
|
|
"loss": 0.4062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19470298290252686,
|
|
"step": 200,
|
|
"valid_targets_mean": 4403.0,
|
|
"valid_targets_min": 3480
|
|
},
|
|
{
|
|
"epoch": 0.39922103213242455,
|
|
"grad_norm": 0.775144326550449,
|
|
"learning_rate": 2.2666666666666668e-05,
|
|
"loss": 0.3916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20275896787643433,
|
|
"step": 205,
|
|
"valid_targets_mean": 4417.1,
|
|
"valid_targets_min": 3425
|
|
},
|
|
{
|
|
"epoch": 0.4089581304771178,
|
|
"grad_norm": 0.7300868695719883,
|
|
"learning_rate": 2.3222222222222227e-05,
|
|
"loss": 0.4201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29349997639656067,
|
|
"step": 210,
|
|
"valid_targets_mean": 6009.2,
|
|
"valid_targets_min": 1608
|
|
},
|
|
{
|
|
"epoch": 0.4186952288218111,
|
|
"grad_norm": 0.6897090851188222,
|
|
"learning_rate": 2.377777777777778e-05,
|
|
"loss": 0.4184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21879325807094574,
|
|
"step": 215,
|
|
"valid_targets_mean": 4199.0,
|
|
"valid_targets_min": 3242
|
|
},
|
|
{
|
|
"epoch": 0.42843232716650437,
|
|
"grad_norm": 0.6925917180708949,
|
|
"learning_rate": 2.4333333333333333e-05,
|
|
"loss": 0.392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17544391751289368,
|
|
"step": 220,
|
|
"valid_targets_mean": 3835.2,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 0.43816942551119764,
|
|
"grad_norm": 0.6605511360195409,
|
|
"learning_rate": 2.4888888888888893e-05,
|
|
"loss": 0.4039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1834697276353836,
|
|
"step": 225,
|
|
"valid_targets_mean": 4260.5,
|
|
"valid_targets_min": 2304
|
|
},
|
|
{
|
|
"epoch": 0.44790652385589097,
|
|
"grad_norm": 0.5777182162922304,
|
|
"learning_rate": 2.5444444444444446e-05,
|
|
"loss": 0.3919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24139994382858276,
|
|
"step": 230,
|
|
"valid_targets_mean": 5811.8,
|
|
"valid_targets_min": 4310
|
|
},
|
|
{
|
|
"epoch": 0.45764362220058424,
|
|
"grad_norm": 0.7226453858676247,
|
|
"learning_rate": 2.6000000000000002e-05,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2048182636499405,
|
|
"step": 235,
|
|
"valid_targets_mean": 4898.6,
|
|
"valid_targets_min": 3473
|
|
},
|
|
{
|
|
"epoch": 0.4673807205452775,
|
|
"grad_norm": 0.6611768065959763,
|
|
"learning_rate": 2.6555555555555558e-05,
|
|
"loss": 0.3904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19229689240455627,
|
|
"step": 240,
|
|
"valid_targets_mean": 4399.5,
|
|
"valid_targets_min": 3633
|
|
},
|
|
{
|
|
"epoch": 0.4771178188899708,
|
|
"grad_norm": 0.6683314067792802,
|
|
"learning_rate": 2.7111111111111114e-05,
|
|
"loss": 0.3684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1989506483078003,
|
|
"step": 245,
|
|
"valid_targets_mean": 5267.8,
|
|
"valid_targets_min": 3228
|
|
},
|
|
{
|
|
"epoch": 0.48685491723466406,
|
|
"grad_norm": 0.7449427318485009,
|
|
"learning_rate": 2.7666666666666667e-05,
|
|
"loss": 0.3909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22531406581401825,
|
|
"step": 250,
|
|
"valid_targets_mean": 4148.8,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 0.49659201557935734,
|
|
"grad_norm": 0.6875225240479791,
|
|
"learning_rate": 2.8222222222222227e-05,
|
|
"loss": 0.3919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.240289568901062,
|
|
"step": 255,
|
|
"valid_targets_mean": 4402.1,
|
|
"valid_targets_min": 3586
|
|
},
|
|
{
|
|
"epoch": 0.5063291139240507,
|
|
"grad_norm": 0.6980597143084923,
|
|
"learning_rate": 2.877777777777778e-05,
|
|
"loss": 0.3848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16534990072250366,
|
|
"step": 260,
|
|
"valid_targets_mean": 3970.9,
|
|
"valid_targets_min": 2911
|
|
},
|
|
{
|
|
"epoch": 0.5160662122687439,
|
|
"grad_norm": 0.7148073487775419,
|
|
"learning_rate": 2.9333333333333333e-05,
|
|
"loss": 0.3815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19760876893997192,
|
|
"step": 265,
|
|
"valid_targets_mean": 4875.1,
|
|
"valid_targets_min": 2971
|
|
},
|
|
{
|
|
"epoch": 0.5258033106134372,
|
|
"grad_norm": 0.6980379940260597,
|
|
"learning_rate": 2.9888888888888892e-05,
|
|
"loss": 0.3892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1551802158355713,
|
|
"step": 270,
|
|
"valid_targets_mean": 3418.5,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 0.5355404089581305,
|
|
"grad_norm": 0.6304591414413756,
|
|
"learning_rate": 3.0444444444444445e-05,
|
|
"loss": 0.3808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20365405082702637,
|
|
"step": 275,
|
|
"valid_targets_mean": 4439.2,
|
|
"valid_targets_min": 2437
|
|
},
|
|
{
|
|
"epoch": 0.5452775073028238,
|
|
"grad_norm": 0.8756759484752723,
|
|
"learning_rate": 3.1e-05,
|
|
"loss": 0.3879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2187492549419403,
|
|
"step": 280,
|
|
"valid_targets_mean": 3823.9,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 0.555014605647517,
|
|
"grad_norm": 0.6925384162720198,
|
|
"learning_rate": 3.155555555555556e-05,
|
|
"loss": 0.3893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20272786915302277,
|
|
"step": 285,
|
|
"valid_targets_mean": 4757.5,
|
|
"valid_targets_min": 3393
|
|
},
|
|
{
|
|
"epoch": 0.5647517039922103,
|
|
"grad_norm": 0.7364360129118342,
|
|
"learning_rate": 3.2111111111111114e-05,
|
|
"loss": 0.3771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23127461969852448,
|
|
"step": 290,
|
|
"valid_targets_mean": 4591.4,
|
|
"valid_targets_min": 3502
|
|
},
|
|
{
|
|
"epoch": 0.5744888023369036,
|
|
"grad_norm": 0.6154305473597166,
|
|
"learning_rate": 3.266666666666667e-05,
|
|
"loss": 0.3835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18655447661876678,
|
|
"step": 295,
|
|
"valid_targets_mean": 4427.8,
|
|
"valid_targets_min": 3700
|
|
},
|
|
{
|
|
"epoch": 0.5842259006815969,
|
|
"grad_norm": 0.6881642873057486,
|
|
"learning_rate": 3.3222222222222226e-05,
|
|
"loss": 0.3863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19806928932666779,
|
|
"step": 300,
|
|
"valid_targets_mean": 4328.2,
|
|
"valid_targets_min": 3043
|
|
},
|
|
{
|
|
"epoch": 0.5939629990262901,
|
|
"grad_norm": 0.6438307753318919,
|
|
"learning_rate": 3.377777777777778e-05,
|
|
"loss": 0.3962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1805514693260193,
|
|
"step": 305,
|
|
"valid_targets_mean": 4537.4,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 0.6037000973709834,
|
|
"grad_norm": 0.7124154337044125,
|
|
"learning_rate": 3.433333333333333e-05,
|
|
"loss": 0.373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19304269552230835,
|
|
"step": 310,
|
|
"valid_targets_mean": 4386.0,
|
|
"valid_targets_min": 3137
|
|
},
|
|
{
|
|
"epoch": 0.6134371957156767,
|
|
"grad_norm": 0.6401560840195569,
|
|
"learning_rate": 3.4888888888888895e-05,
|
|
"loss": 0.3779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17118550837039948,
|
|
"step": 315,
|
|
"valid_targets_mean": 4464.1,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 0.6231742940603701,
|
|
"grad_norm": 0.729992124621984,
|
|
"learning_rate": 3.5444444444444445e-05,
|
|
"loss": 0.3895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2159796953201294,
|
|
"step": 320,
|
|
"valid_targets_mean": 4337.1,
|
|
"valid_targets_min": 3397
|
|
},
|
|
{
|
|
"epoch": 0.6329113924050633,
|
|
"grad_norm": 0.6756411225774193,
|
|
"learning_rate": 3.6e-05,
|
|
"loss": 0.378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2206284999847412,
|
|
"step": 325,
|
|
"valid_targets_mean": 4809.0,
|
|
"valid_targets_min": 2999
|
|
},
|
|
{
|
|
"epoch": 0.6426484907497566,
|
|
"grad_norm": 0.8071912360810334,
|
|
"learning_rate": 3.655555555555556e-05,
|
|
"loss": 0.378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21432818472385406,
|
|
"step": 330,
|
|
"valid_targets_mean": 4727.4,
|
|
"valid_targets_min": 3967
|
|
},
|
|
{
|
|
"epoch": 0.6523855890944499,
|
|
"grad_norm": 0.6867194176879942,
|
|
"learning_rate": 3.7111111111111113e-05,
|
|
"loss": 0.367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16056394577026367,
|
|
"step": 335,
|
|
"valid_targets_mean": 3940.0,
|
|
"valid_targets_min": 2953
|
|
},
|
|
{
|
|
"epoch": 0.6621226874391432,
|
|
"grad_norm": 0.6751773593057298,
|
|
"learning_rate": 3.766666666666667e-05,
|
|
"loss": 0.3904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18800443410873413,
|
|
"step": 340,
|
|
"valid_targets_mean": 4233.9,
|
|
"valid_targets_min": 3162
|
|
},
|
|
{
|
|
"epoch": 0.6718597857838364,
|
|
"grad_norm": 0.8257749702852918,
|
|
"learning_rate": 3.8222222222222226e-05,
|
|
"loss": 0.3769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1990666687488556,
|
|
"step": 345,
|
|
"valid_targets_mean": 4171.2,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 0.6815968841285297,
|
|
"grad_norm": 0.9924626388160639,
|
|
"learning_rate": 3.877777777777778e-05,
|
|
"loss": 0.3677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21095314621925354,
|
|
"step": 350,
|
|
"valid_targets_mean": 4846.0,
|
|
"valid_targets_min": 3425
|
|
},
|
|
{
|
|
"epoch": 0.691333982473223,
|
|
"grad_norm": 0.7020761469375107,
|
|
"learning_rate": 3.933333333333333e-05,
|
|
"loss": 0.3734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1791234016418457,
|
|
"step": 355,
|
|
"valid_targets_mean": 4898.1,
|
|
"valid_targets_min": 3918
|
|
},
|
|
{
|
|
"epoch": 0.7010710808179162,
|
|
"grad_norm": 0.6418609790138166,
|
|
"learning_rate": 3.9888888888888895e-05,
|
|
"loss": 0.3695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20117180049419403,
|
|
"step": 360,
|
|
"valid_targets_mean": 4694.0,
|
|
"valid_targets_min": 3481
|
|
},
|
|
{
|
|
"epoch": 0.7108081791626095,
|
|
"grad_norm": 0.6657235308214312,
|
|
"learning_rate": 3.9999849385947476e-05,
|
|
"loss": 0.3689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19872650504112244,
|
|
"step": 365,
|
|
"valid_targets_mean": 5165.4,
|
|
"valid_targets_min": 1978
|
|
},
|
|
{
|
|
"epoch": 0.7205452775073028,
|
|
"grad_norm": 0.5628424240381045,
|
|
"learning_rate": 3.999923752024691e-05,
|
|
"loss": 0.377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15092147886753082,
|
|
"step": 370,
|
|
"valid_targets_mean": 4257.0,
|
|
"valid_targets_min": 1065
|
|
},
|
|
{
|
|
"epoch": 0.7302823758519961,
|
|
"grad_norm": 0.7874439802927935,
|
|
"learning_rate": 3.999815500390824e-05,
|
|
"loss": 0.3708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2088533639907837,
|
|
"step": 375,
|
|
"valid_targets_mean": 4398.4,
|
|
"valid_targets_min": 3126
|
|
},
|
|
{
|
|
"epoch": 0.7400194741966893,
|
|
"grad_norm": 0.813377438148282,
|
|
"learning_rate": 3.9996601862406804e-05,
|
|
"loss": 0.3699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19973430037498474,
|
|
"step": 380,
|
|
"valid_targets_mean": 4125.2,
|
|
"valid_targets_min": 3527
|
|
},
|
|
{
|
|
"epoch": 0.7497565725413826,
|
|
"grad_norm": 0.7659873084812755,
|
|
"learning_rate": 3.999457813229333e-05,
|
|
"loss": 0.3789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17584359645843506,
|
|
"step": 385,
|
|
"valid_targets_mean": 3499.8,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 0.759493670886076,
|
|
"grad_norm": 0.7873349990956648,
|
|
"learning_rate": 3.999208386119314e-05,
|
|
"loss": 0.3744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16551227867603302,
|
|
"step": 390,
|
|
"valid_targets_mean": 3595.4,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 0.7692307692307693,
|
|
"grad_norm": 4.887804203127558,
|
|
"learning_rate": 3.998911910780497e-05,
|
|
"loss": 0.3757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22275911271572113,
|
|
"step": 395,
|
|
"valid_targets_mean": 6269.8,
|
|
"valid_targets_min": 2934
|
|
},
|
|
{
|
|
"epoch": 0.7789678675754625,
|
|
"grad_norm": 0.7130696585369338,
|
|
"learning_rate": 3.9985683941899654e-05,
|
|
"loss": 0.3759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1910901963710785,
|
|
"step": 400,
|
|
"valid_targets_mean": 4176.4,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 0.7887049659201558,
|
|
"grad_norm": 0.7069241977582585,
|
|
"learning_rate": 3.9981778444318414e-05,
|
|
"loss": 0.346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18419122695922852,
|
|
"step": 405,
|
|
"valid_targets_mean": 4007.8,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 0.7984420642648491,
|
|
"grad_norm": 0.7089301143975381,
|
|
"learning_rate": 3.9977402706971e-05,
|
|
"loss": 0.3726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17921440303325653,
|
|
"step": 410,
|
|
"valid_targets_mean": 4235.0,
|
|
"valid_targets_min": 3444
|
|
},
|
|
{
|
|
"epoch": 0.8081791626095424,
|
|
"grad_norm": 0.6495013262151981,
|
|
"learning_rate": 3.997255683283352e-05,
|
|
"loss": 0.3494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15462112426757812,
|
|
"step": 415,
|
|
"valid_targets_mean": 4498.2,
|
|
"valid_targets_min": 2870
|
|
},
|
|
{
|
|
"epoch": 0.8179162609542356,
|
|
"grad_norm": 0.7086966629425835,
|
|
"learning_rate": 3.9967240935946004e-05,
|
|
"loss": 0.3637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1628843992948532,
|
|
"step": 420,
|
|
"valid_targets_mean": 3420.5,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 0.8276533592989289,
|
|
"grad_norm": 0.6508253350898261,
|
|
"learning_rate": 3.996145514140974e-05,
|
|
"loss": 0.3718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1732420027256012,
|
|
"step": 425,
|
|
"valid_targets_mean": 4132.9,
|
|
"valid_targets_min": 3245
|
|
},
|
|
{
|
|
"epoch": 0.8373904576436222,
|
|
"grad_norm": 0.7151576501999023,
|
|
"learning_rate": 3.995519958538431e-05,
|
|
"loss": 0.3606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20288825035095215,
|
|
"step": 430,
|
|
"valid_targets_mean": 5239.4,
|
|
"valid_targets_min": 2900
|
|
},
|
|
{
|
|
"epoch": 0.8471275559883155,
|
|
"grad_norm": 0.7006918695243219,
|
|
"learning_rate": 3.99484744150844e-05,
|
|
"loss": 0.3611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15888266265392303,
|
|
"step": 435,
|
|
"valid_targets_mean": 4164.4,
|
|
"valid_targets_min": 2532
|
|
},
|
|
{
|
|
"epoch": 0.8568646543330087,
|
|
"grad_norm": 0.6708952844523018,
|
|
"learning_rate": 3.9941279788776324e-05,
|
|
"loss": 0.3583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17105917632579803,
|
|
"step": 440,
|
|
"valid_targets_mean": 4686.4,
|
|
"valid_targets_min": 3353
|
|
},
|
|
{
|
|
"epoch": 0.866601752677702,
|
|
"grad_norm": 0.6807208807268397,
|
|
"learning_rate": 3.9933615875774314e-05,
|
|
"loss": 0.3568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16278314590454102,
|
|
"step": 445,
|
|
"valid_targets_mean": 3676.5,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 0.8763388510223953,
|
|
"grad_norm": 0.6809046096024852,
|
|
"learning_rate": 3.992548285643653e-05,
|
|
"loss": 0.3525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1752071976661682,
|
|
"step": 450,
|
|
"valid_targets_mean": 3966.8,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 0.8860759493670886,
|
|
"grad_norm": 0.6165006271847027,
|
|
"learning_rate": 3.99168809221608e-05,
|
|
"loss": 0.3589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16164737939834595,
|
|
"step": 455,
|
|
"valid_targets_mean": 4148.8,
|
|
"valid_targets_min": 3007
|
|
},
|
|
{
|
|
"epoch": 0.8958130477117819,
|
|
"grad_norm": 0.9369745333324089,
|
|
"learning_rate": 3.9907810275380133e-05,
|
|
"loss": 0.3567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15810510516166687,
|
|
"step": 460,
|
|
"valid_targets_mean": 4040.5,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 0.9055501460564752,
|
|
"grad_norm": 0.7111339776253992,
|
|
"learning_rate": 3.989827112955794e-05,
|
|
"loss": 0.3598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22363640367984772,
|
|
"step": 465,
|
|
"valid_targets_mean": 5090.4,
|
|
"valid_targets_min": 3853
|
|
},
|
|
{
|
|
"epoch": 0.9152872444011685,
|
|
"grad_norm": 0.6788966159986422,
|
|
"learning_rate": 3.988826370918306e-05,
|
|
"loss": 0.3577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17157606780529022,
|
|
"step": 470,
|
|
"valid_targets_mean": 3225.8,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 0.9250243427458618,
|
|
"grad_norm": 0.5910545724823087,
|
|
"learning_rate": 3.987778824976439e-05,
|
|
"loss": 0.3668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14586105942726135,
|
|
"step": 475,
|
|
"valid_targets_mean": 4006.1,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 0.934761441090555,
|
|
"grad_norm": 0.6343251918862248,
|
|
"learning_rate": 3.9866844997825415e-05,
|
|
"loss": 0.3669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19073855876922607,
|
|
"step": 480,
|
|
"valid_targets_mean": 4539.5,
|
|
"valid_targets_min": 3121
|
|
},
|
|
{
|
|
"epoch": 0.9444985394352483,
|
|
"grad_norm": 0.586057970732359,
|
|
"learning_rate": 3.985543421089839e-05,
|
|
"loss": 0.3462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1797710657119751,
|
|
"step": 485,
|
|
"valid_targets_mean": 5746.5,
|
|
"valid_targets_min": 3749
|
|
},
|
|
{
|
|
"epoch": 0.9542356377799416,
|
|
"grad_norm": 0.7576728228823382,
|
|
"learning_rate": 3.984355615751824e-05,
|
|
"loss": 0.3519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16860824823379517,
|
|
"step": 490,
|
|
"valid_targets_mean": 3673.6,
|
|
"valid_targets_min": 3347
|
|
},
|
|
{
|
|
"epoch": 0.9639727361246349,
|
|
"grad_norm": 0.6047838804348274,
|
|
"learning_rate": 3.983121111721631e-05,
|
|
"loss": 0.355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1805913895368576,
|
|
"step": 495,
|
|
"valid_targets_mean": 4506.4,
|
|
"valid_targets_min": 3192
|
|
},
|
|
{
|
|
"epoch": 0.9737098344693281,
|
|
"grad_norm": 0.7091348845871056,
|
|
"learning_rate": 3.981839938051372e-05,
|
|
"loss": 0.3541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18835121393203735,
|
|
"step": 500,
|
|
"valid_targets_mean": 4159.5,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 0.9834469328140214,
|
|
"grad_norm": 1.1000070436324856,
|
|
"learning_rate": 3.980512124891458e-05,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18073980510234833,
|
|
"step": 505,
|
|
"valid_targets_mean": 4863.8,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 0.9931840311587147,
|
|
"grad_norm": 0.7257275042131754,
|
|
"learning_rate": 3.979137703489883e-05,
|
|
"loss": 0.3489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17253929376602173,
|
|
"step": 510,
|
|
"valid_targets_mean": 4703.0,
|
|
"valid_targets_min": 3474
|
|
},
|
|
{
|
|
"epoch": 1.0019474196689386,
|
|
"grad_norm": 0.6913753699346316,
|
|
"learning_rate": 3.9777167061915e-05,
|
|
"loss": 0.3572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1802053153514862,
|
|
"step": 515,
|
|
"valid_targets_mean": 4860.0,
|
|
"valid_targets_min": 3877
|
|
},
|
|
{
|
|
"epoch": 1.011684518013632,
|
|
"grad_norm": 0.6242824703734373,
|
|
"learning_rate": 3.976249166437243e-05,
|
|
"loss": 0.3537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1576920747756958,
|
|
"step": 520,
|
|
"valid_targets_mean": 4406.5,
|
|
"valid_targets_min": 3229
|
|
},
|
|
{
|
|
"epoch": 1.0214216163583252,
|
|
"grad_norm": 0.6750185091695285,
|
|
"learning_rate": 3.974735118763359e-05,
|
|
"loss": 0.344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15952584147453308,
|
|
"step": 525,
|
|
"valid_targets_mean": 4164.1,
|
|
"valid_targets_min": 3141
|
|
},
|
|
{
|
|
"epoch": 1.0311587147030186,
|
|
"grad_norm": 0.6658277383697083,
|
|
"learning_rate": 3.97317459880058e-05,
|
|
"loss": 0.367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14350742101669312,
|
|
"step": 530,
|
|
"valid_targets_mean": 3377.6,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 1.0408958130477117,
|
|
"grad_norm": 0.7400005484213806,
|
|
"learning_rate": 3.971567643273292e-05,
|
|
"loss": 0.3635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17670761048793793,
|
|
"step": 535,
|
|
"valid_targets_mean": 3588.6,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 1.0506329113924051,
|
|
"grad_norm": 0.5702088025195045,
|
|
"learning_rate": 3.9699142899986685e-05,
|
|
"loss": 0.3365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1621592938899994,
|
|
"step": 540,
|
|
"valid_targets_mean": 5561.0,
|
|
"valid_targets_min": 4228
|
|
},
|
|
{
|
|
"epoch": 1.0603700097370983,
|
|
"grad_norm": 0.5360192650840298,
|
|
"learning_rate": 3.968214577885782e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19491013884544373,
|
|
"step": 545,
|
|
"valid_targets_mean": 6266.8,
|
|
"valid_targets_min": 3349
|
|
},
|
|
{
|
|
"epoch": 1.0701071080817917,
|
|
"grad_norm": 0.6459768744810942,
|
|
"learning_rate": 3.966468546934687e-05,
|
|
"loss": 0.3519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18637236952781677,
|
|
"step": 550,
|
|
"valid_targets_mean": 4426.6,
|
|
"valid_targets_min": 3476
|
|
},
|
|
{
|
|
"epoch": 1.0798442064264848,
|
|
"grad_norm": 0.5899914630150387,
|
|
"learning_rate": 3.964676238235479e-05,
|
|
"loss": 0.352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19765692949295044,
|
|
"step": 555,
|
|
"valid_targets_mean": 5279.5,
|
|
"valid_targets_min": 3574
|
|
},
|
|
{
|
|
"epoch": 1.0895813047711782,
|
|
"grad_norm": 0.6297912988991564,
|
|
"learning_rate": 3.9628376939673275e-05,
|
|
"loss": 0.3538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1682511270046234,
|
|
"step": 560,
|
|
"valid_targets_mean": 4767.6,
|
|
"valid_targets_min": 3447
|
|
},
|
|
{
|
|
"epoch": 1.0993184031158714,
|
|
"grad_norm": 0.6625458925495679,
|
|
"learning_rate": 3.960952957397484e-05,
|
|
"loss": 0.3488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1940007507801056,
|
|
"step": 565,
|
|
"valid_targets_mean": 4963.5,
|
|
"valid_targets_min": 3316
|
|
},
|
|
{
|
|
"epoch": 1.1090555014605648,
|
|
"grad_norm": 0.5869505477237944,
|
|
"learning_rate": 3.9590220728802625e-05,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17819081246852875,
|
|
"step": 570,
|
|
"valid_targets_mean": 5148.1,
|
|
"valid_targets_min": 3943
|
|
},
|
|
{
|
|
"epoch": 1.1187925998052581,
|
|
"grad_norm": 0.6109185331055149,
|
|
"learning_rate": 3.9570450858559975e-05,
|
|
"loss": 0.3539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13978806138038635,
|
|
"step": 575,
|
|
"valid_targets_mean": 4263.9,
|
|
"valid_targets_min": 3170
|
|
},
|
|
{
|
|
"epoch": 1.1285296981499513,
|
|
"grad_norm": 0.7478744405338642,
|
|
"learning_rate": 3.9550220428499725e-05,
|
|
"loss": 0.3425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17585650086402893,
|
|
"step": 580,
|
|
"valid_targets_mean": 3399.4,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 1.1382667964946447,
|
|
"grad_norm": 0.6085289979330304,
|
|
"learning_rate": 3.9529529914713266e-05,
|
|
"loss": 0.3538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22081728279590607,
|
|
"step": 585,
|
|
"valid_targets_mean": 5320.0,
|
|
"valid_targets_min": 3697
|
|
},
|
|
{
|
|
"epoch": 1.1480038948393378,
|
|
"grad_norm": 0.5898435042077811,
|
|
"learning_rate": 3.950837980411934e-05,
|
|
"loss": 0.3502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16094085574150085,
|
|
"step": 590,
|
|
"valid_targets_mean": 4559.2,
|
|
"valid_targets_min": 3464
|
|
},
|
|
{
|
|
"epoch": 1.1577409931840312,
|
|
"grad_norm": 0.705932213352843,
|
|
"learning_rate": 3.948677059445257e-05,
|
|
"loss": 0.3381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1893872320652008,
|
|
"step": 595,
|
|
"valid_targets_mean": 3764.2,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 1.1674780915287244,
|
|
"grad_norm": 0.6879944126389513,
|
|
"learning_rate": 3.946470279425177e-05,
|
|
"loss": 0.3509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18740157783031464,
|
|
"step": 600,
|
|
"valid_targets_mean": 5641.4,
|
|
"valid_targets_min": 3749
|
|
},
|
|
{
|
|
"epoch": 1.1772151898734178,
|
|
"grad_norm": 0.5632271616115904,
|
|
"learning_rate": 3.944217692284794e-05,
|
|
"loss": 0.3496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2088479995727539,
|
|
"step": 605,
|
|
"valid_targets_mean": 5663.2,
|
|
"valid_targets_min": 3212
|
|
},
|
|
{
|
|
"epoch": 1.186952288218111,
|
|
"grad_norm": 0.6829030682356111,
|
|
"learning_rate": 3.941919351035208e-05,
|
|
"loss": 0.3315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1710636168718338,
|
|
"step": 610,
|
|
"valid_targets_mean": 3843.6,
|
|
"valid_targets_min": 2966
|
|
},
|
|
{
|
|
"epoch": 1.1966893865628043,
|
|
"grad_norm": 0.6123893566411411,
|
|
"learning_rate": 3.9395753097642703e-05,
|
|
"loss": 0.3358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16163797676563263,
|
|
"step": 615,
|
|
"valid_targets_mean": 4377.5,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 1.2064264849074975,
|
|
"grad_norm": 0.6669525552357072,
|
|
"learning_rate": 3.937185623635309e-05,
|
|
"loss": 0.3499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15360671281814575,
|
|
"step": 620,
|
|
"valid_targets_mean": 3847.6,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 1.2161635832521909,
|
|
"grad_norm": 0.6172113771284536,
|
|
"learning_rate": 3.934750348885835e-05,
|
|
"loss": 0.3509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1759207546710968,
|
|
"step": 625,
|
|
"valid_targets_mean": 4304.6,
|
|
"valid_targets_min": 3328
|
|
},
|
|
{
|
|
"epoch": 1.225900681596884,
|
|
"grad_norm": 0.5598570443758227,
|
|
"learning_rate": 3.932269542826211e-05,
|
|
"loss": 0.3318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13947413861751556,
|
|
"step": 630,
|
|
"valid_targets_mean": 4682.0,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 1.2356377799415774,
|
|
"grad_norm": 0.6695863027447954,
|
|
"learning_rate": 3.9297432638383133e-05,
|
|
"loss": 0.3555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17652413249015808,
|
|
"step": 635,
|
|
"valid_targets_mean": 4701.8,
|
|
"valid_targets_min": 3484
|
|
},
|
|
{
|
|
"epoch": 1.2453748782862708,
|
|
"grad_norm": 0.6044650007938313,
|
|
"learning_rate": 3.9271715713741484e-05,
|
|
"loss": 0.347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17632779479026794,
|
|
"step": 640,
|
|
"valid_targets_mean": 4647.2,
|
|
"valid_targets_min": 4157
|
|
},
|
|
{
|
|
"epoch": 1.255111976630964,
|
|
"grad_norm": 0.565415725601775,
|
|
"learning_rate": 3.9245545259544575e-05,
|
|
"loss": 0.3407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18473611772060394,
|
|
"step": 645,
|
|
"valid_targets_mean": 5276.8,
|
|
"valid_targets_min": 2969
|
|
},
|
|
{
|
|
"epoch": 1.2648490749756571,
|
|
"grad_norm": 0.6648539720597427,
|
|
"learning_rate": 3.9218921891672955e-05,
|
|
"loss": 0.3403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16993564367294312,
|
|
"step": 650,
|
|
"valid_targets_mean": 4524.9,
|
|
"valid_targets_min": 2912
|
|
},
|
|
{
|
|
"epoch": 1.2745861733203505,
|
|
"grad_norm": 0.5878924178522696,
|
|
"learning_rate": 3.919184623666575e-05,
|
|
"loss": 0.3414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1747129261493683,
|
|
"step": 655,
|
|
"valid_targets_mean": 4582.0,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 1.284323271665044,
|
|
"grad_norm": 0.5369825238186156,
|
|
"learning_rate": 3.916431893170596e-05,
|
|
"loss": 0.3356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1542832851409912,
|
|
"step": 660,
|
|
"valid_targets_mean": 5046.2,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 1.294060370009737,
|
|
"grad_norm": 0.5746035578362171,
|
|
"learning_rate": 3.913634062460548e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18797087669372559,
|
|
"step": 665,
|
|
"valid_targets_mean": 4773.5,
|
|
"valid_targets_min": 3311
|
|
},
|
|
{
|
|
"epoch": 1.3037974683544304,
|
|
"grad_norm": 0.5518753089791267,
|
|
"learning_rate": 3.910791197378982e-05,
|
|
"loss": 0.3526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1611878126859665,
|
|
"step": 670,
|
|
"valid_targets_mean": 4323.8,
|
|
"valid_targets_min": 3052
|
|
},
|
|
{
|
|
"epoch": 1.3135345666991236,
|
|
"grad_norm": 0.6171209886908569,
|
|
"learning_rate": 3.9079033648282595e-05,
|
|
"loss": 0.3445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15734905004501343,
|
|
"step": 675,
|
|
"valid_targets_mean": 4047.4,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 1.323271665043817,
|
|
"grad_norm": 0.6224968861674886,
|
|
"learning_rate": 3.9049706327689844e-05,
|
|
"loss": 0.324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17927172780036926,
|
|
"step": 680,
|
|
"valid_targets_mean": 4603.4,
|
|
"valid_targets_min": 3735
|
|
},
|
|
{
|
|
"epoch": 1.3330087633885102,
|
|
"grad_norm": 0.5698572717191832,
|
|
"learning_rate": 3.9019930702183995e-05,
|
|
"loss": 0.3416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19674797356128693,
|
|
"step": 685,
|
|
"valid_targets_mean": 5559.0,
|
|
"valid_targets_min": 3683
|
|
},
|
|
{
|
|
"epoch": 1.3427458617332035,
|
|
"grad_norm": 0.568448928459607,
|
|
"learning_rate": 3.8989707472487605e-05,
|
|
"loss": 0.345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1570851057767868,
|
|
"step": 690,
|
|
"valid_targets_mean": 4524.1,
|
|
"valid_targets_min": 3631
|
|
},
|
|
{
|
|
"epoch": 1.352482960077897,
|
|
"grad_norm": 0.9315757333471183,
|
|
"learning_rate": 3.8959037349856915e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16120608150959015,
|
|
"step": 695,
|
|
"valid_targets_mean": 4775.5,
|
|
"valid_targets_min": 3790
|
|
},
|
|
{
|
|
"epoch": 1.36222005842259,
|
|
"grad_norm": 0.6226245068931342,
|
|
"learning_rate": 3.8927921056065085e-05,
|
|
"loss": 0.3386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16979745030403137,
|
|
"step": 700,
|
|
"valid_targets_mean": 4089.8,
|
|
"valid_targets_min": 2312
|
|
},
|
|
{
|
|
"epoch": 1.3719571567672832,
|
|
"grad_norm": 0.5787345580527016,
|
|
"learning_rate": 3.8896359323385196e-05,
|
|
"loss": 0.3434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19725698232650757,
|
|
"step": 705,
|
|
"valid_targets_mean": 4358.8,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 1.3816942551119766,
|
|
"grad_norm": 0.5884063178612424,
|
|
"learning_rate": 3.886435289457306e-05,
|
|
"loss": 0.3407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1875753551721573,
|
|
"step": 710,
|
|
"valid_targets_mean": 4473.2,
|
|
"valid_targets_min": 3380
|
|
},
|
|
{
|
|
"epoch": 1.39143135345667,
|
|
"grad_norm": 0.5246635437913529,
|
|
"learning_rate": 3.883190252284969e-05,
|
|
"loss": 0.3215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1934608817100525,
|
|
"step": 715,
|
|
"valid_targets_mean": 5352.0,
|
|
"valid_targets_min": 3471
|
|
},
|
|
{
|
|
"epoch": 1.4011684518013632,
|
|
"grad_norm": 0.514659332271549,
|
|
"learning_rate": 3.879900897188361e-05,
|
|
"loss": 0.3318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1622016280889511,
|
|
"step": 720,
|
|
"valid_targets_mean": 4736.0,
|
|
"valid_targets_min": 2764
|
|
},
|
|
{
|
|
"epoch": 1.4109055501460563,
|
|
"grad_norm": 0.6102138458061687,
|
|
"learning_rate": 3.876567301577287e-05,
|
|
"loss": 0.3215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17146432399749756,
|
|
"step": 725,
|
|
"valid_targets_mean": 3958.9,
|
|
"valid_targets_min": 2135
|
|
},
|
|
{
|
|
"epoch": 1.4206426484907497,
|
|
"grad_norm": 0.6309198818185169,
|
|
"learning_rate": 3.873189543902683e-05,
|
|
"loss": 0.3461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20825514197349548,
|
|
"step": 730,
|
|
"valid_targets_mean": 5724.9,
|
|
"valid_targets_min": 3655
|
|
},
|
|
{
|
|
"epoch": 1.4303797468354431,
|
|
"grad_norm": 0.5423095282228948,
|
|
"learning_rate": 3.86976770365477e-05,
|
|
"loss": 0.3371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1908440887928009,
|
|
"step": 735,
|
|
"valid_targets_mean": 5605.0,
|
|
"valid_targets_min": 4149
|
|
},
|
|
{
|
|
"epoch": 1.4401168451801363,
|
|
"grad_norm": 0.5953169726854945,
|
|
"learning_rate": 3.8663018613611824e-05,
|
|
"loss": 0.3297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14922137558460236,
|
|
"step": 740,
|
|
"valid_targets_mean": 4322.8,
|
|
"valid_targets_min": 3711
|
|
},
|
|
{
|
|
"epoch": 1.4498539435248297,
|
|
"grad_norm": 0.6381764782176631,
|
|
"learning_rate": 3.862792098585074e-05,
|
|
"loss": 0.3386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21677705645561218,
|
|
"step": 745,
|
|
"valid_targets_mean": 4385.9,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 1.4595910418695228,
|
|
"grad_norm": 0.5555832839990852,
|
|
"learning_rate": 3.8592384979231984e-05,
|
|
"loss": 0.338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18439984321594238,
|
|
"step": 750,
|
|
"valid_targets_mean": 4975.5,
|
|
"valid_targets_min": 2397
|
|
},
|
|
{
|
|
"epoch": 1.4693281402142162,
|
|
"grad_norm": 0.6034849154056584,
|
|
"learning_rate": 3.8556411430039656e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.160589799284935,
|
|
"step": 755,
|
|
"valid_targets_mean": 4349.0,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 1.4790652385589094,
|
|
"grad_norm": 0.5463379544896788,
|
|
"learning_rate": 3.852000118485471e-05,
|
|
"loss": 0.3262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16344690322875977,
|
|
"step": 760,
|
|
"valid_targets_mean": 4050.9,
|
|
"valid_targets_min": 3413
|
|
},
|
|
{
|
|
"epoch": 1.4888023369036028,
|
|
"grad_norm": 0.538144454746949,
|
|
"learning_rate": 3.8483155100535074e-05,
|
|
"loss": 0.3286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2017931342124939,
|
|
"step": 765,
|
|
"valid_targets_mean": 5685.1,
|
|
"valid_targets_min": 3969
|
|
},
|
|
{
|
|
"epoch": 1.4985394352482961,
|
|
"grad_norm": 0.5278935727103556,
|
|
"learning_rate": 3.8445874044195475e-05,
|
|
"loss": 0.3252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14791113138198853,
|
|
"step": 770,
|
|
"valid_targets_mean": 4189.1,
|
|
"valid_targets_min": 3396
|
|
},
|
|
{
|
|
"epoch": 1.5082765335929893,
|
|
"grad_norm": 0.5990293287045305,
|
|
"learning_rate": 3.840815889318699e-05,
|
|
"loss": 0.3271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18097707629203796,
|
|
"step": 775,
|
|
"valid_targets_mean": 3896.0,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 1.5180136319376825,
|
|
"grad_norm": 0.5482519342505475,
|
|
"learning_rate": 3.837001053507645e-05,
|
|
"loss": 0.3369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12885573506355286,
|
|
"step": 780,
|
|
"valid_targets_mean": 3838.6,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 1.5277507302823758,
|
|
"grad_norm": 0.5931806259151262,
|
|
"learning_rate": 3.833142986762556e-05,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1717328429222107,
|
|
"step": 785,
|
|
"valid_targets_mean": 4410.5,
|
|
"valid_targets_min": 3582
|
|
},
|
|
{
|
|
"epoch": 1.5374878286270692,
|
|
"grad_norm": 0.5117694497218511,
|
|
"learning_rate": 3.82924177987697e-05,
|
|
"loss": 0.3348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13753187656402588,
|
|
"step": 790,
|
|
"valid_targets_mean": 4291.2,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 1.5472249269717624,
|
|
"grad_norm": 0.6078453733478832,
|
|
"learning_rate": 3.8252975246596634e-05,
|
|
"loss": 0.3369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16878488659858704,
|
|
"step": 795,
|
|
"valid_targets_mean": 4458.0,
|
|
"valid_targets_min": 3700
|
|
},
|
|
{
|
|
"epoch": 1.5569620253164556,
|
|
"grad_norm": 0.5971417813267725,
|
|
"learning_rate": 3.821310313932486e-05,
|
|
"loss": 0.3384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1788347363471985,
|
|
"step": 800,
|
|
"valid_targets_mean": 4847.5,
|
|
"valid_targets_min": 3328
|
|
},
|
|
{
|
|
"epoch": 1.566699123661149,
|
|
"grad_norm": 0.5858426391310002,
|
|
"learning_rate": 3.8172802415281796e-05,
|
|
"loss": 0.3368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1780751645565033,
|
|
"step": 805,
|
|
"valid_targets_mean": 4821.9,
|
|
"valid_targets_min": 3804
|
|
},
|
|
{
|
|
"epoch": 1.5764362220058423,
|
|
"grad_norm": 0.5843861600416765,
|
|
"learning_rate": 3.813207402288167e-05,
|
|
"loss": 0.3431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1434214562177658,
|
|
"step": 810,
|
|
"valid_targets_mean": 3885.6,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 1.5861733203505355,
|
|
"grad_norm": 0.5951781181184778,
|
|
"learning_rate": 3.8090918920603214e-05,
|
|
"loss": 0.3209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.158657968044281,
|
|
"step": 815,
|
|
"valid_targets_mean": 4067.1,
|
|
"valid_targets_min": 3131
|
|
},
|
|
{
|
|
"epoch": 1.5959104186952289,
|
|
"grad_norm": 0.5198602516353442,
|
|
"learning_rate": 3.804933807696712e-05,
|
|
"loss": 0.342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16447174549102783,
|
|
"step": 820,
|
|
"valid_targets_mean": 4880.2,
|
|
"valid_targets_min": 2800
|
|
},
|
|
{
|
|
"epoch": 1.6056475170399223,
|
|
"grad_norm": 0.6307341038381309,
|
|
"learning_rate": 3.80073324705132e-05,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15612384676933289,
|
|
"step": 825,
|
|
"valid_targets_mean": 4144.4,
|
|
"valid_targets_min": 3474
|
|
},
|
|
{
|
|
"epoch": 1.6153846153846154,
|
|
"grad_norm": 0.5315493079155139,
|
|
"learning_rate": 3.7964903089777443e-05,
|
|
"loss": 0.3338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18811750411987305,
|
|
"step": 830,
|
|
"valid_targets_mean": 5177.2,
|
|
"valid_targets_min": 3489
|
|
},
|
|
{
|
|
"epoch": 1.6251217137293086,
|
|
"grad_norm": 0.44026095814162886,
|
|
"learning_rate": 3.792205093326865e-05,
|
|
"loss": 0.3129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13885745406150818,
|
|
"step": 835,
|
|
"valid_targets_mean": 6850.1,
|
|
"valid_targets_min": 3856
|
|
},
|
|
{
|
|
"epoch": 1.634858812074002,
|
|
"grad_norm": 0.6239019005370933,
|
|
"learning_rate": 3.7878777009445025e-05,
|
|
"loss": 0.3238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16471534967422485,
|
|
"step": 840,
|
|
"valid_targets_mean": 3539.1,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 1.6445959104186954,
|
|
"grad_norm": 0.5957746473054812,
|
|
"learning_rate": 3.7835082336690376e-05,
|
|
"loss": 0.335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16049107909202576,
|
|
"step": 845,
|
|
"valid_targets_mean": 3975.8,
|
|
"valid_targets_min": 3441
|
|
},
|
|
{
|
|
"epoch": 1.6543330087633885,
|
|
"grad_norm": 0.5481274199856965,
|
|
"learning_rate": 3.7790967943290186e-05,
|
|
"loss": 0.3289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16004453599452972,
|
|
"step": 850,
|
|
"valid_targets_mean": 5271.0,
|
|
"valid_targets_min": 3463
|
|
},
|
|
{
|
|
"epoch": 1.6640701071080817,
|
|
"grad_norm": 0.5872201220355594,
|
|
"learning_rate": 3.7746434867407405e-05,
|
|
"loss": 0.3286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16967305541038513,
|
|
"step": 855,
|
|
"valid_targets_mean": 5490.1,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 1.673807205452775,
|
|
"grad_norm": 0.5762659495897475,
|
|
"learning_rate": 3.7701484157058016e-05,
|
|
"loss": 0.3431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17216044664382935,
|
|
"step": 860,
|
|
"valid_targets_mean": 4733.0,
|
|
"valid_targets_min": 3811
|
|
},
|
|
{
|
|
"epoch": 1.6835443037974684,
|
|
"grad_norm": 0.5721111304528664,
|
|
"learning_rate": 3.7656116870086376e-05,
|
|
"loss": 0.3447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13863375782966614,
|
|
"step": 865,
|
|
"valid_targets_mean": 3824.9,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 1.6932814021421616,
|
|
"grad_norm": 0.6302614388804196,
|
|
"learning_rate": 3.761033407414032e-05,
|
|
"loss": 0.3407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16379553079605103,
|
|
"step": 870,
|
|
"valid_targets_mean": 4153.2,
|
|
"valid_targets_min": 2275
|
|
},
|
|
{
|
|
"epoch": 1.7030185004868548,
|
|
"grad_norm": 0.5474823695849069,
|
|
"learning_rate": 3.756413684664602e-05,
|
|
"loss": 0.3283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14791777729988098,
|
|
"step": 875,
|
|
"valid_targets_mean": 4692.0,
|
|
"valid_targets_min": 3221
|
|
},
|
|
{
|
|
"epoch": 1.7127555988315482,
|
|
"grad_norm": 0.6013805243068079,
|
|
"learning_rate": 3.751752627478266e-05,
|
|
"loss": 0.3432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17561230063438416,
|
|
"step": 880,
|
|
"valid_targets_mean": 4641.8,
|
|
"valid_targets_min": 4118
|
|
},
|
|
{
|
|
"epoch": 1.7224926971762415,
|
|
"grad_norm": 0.6047033407359883,
|
|
"learning_rate": 3.747050345545683e-05,
|
|
"loss": 0.3333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15203335881233215,
|
|
"step": 885,
|
|
"valid_targets_mean": 3712.0,
|
|
"valid_targets_min": 2209
|
|
},
|
|
{
|
|
"epoch": 1.7322297955209347,
|
|
"grad_norm": 0.537236757613599,
|
|
"learning_rate": 3.7423069495276705e-05,
|
|
"loss": 0.3269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1560620814561844,
|
|
"step": 890,
|
|
"valid_targets_mean": 4875.8,
|
|
"valid_targets_min": 3234
|
|
},
|
|
{
|
|
"epoch": 1.741966893865628,
|
|
"grad_norm": 0.5019465253265417,
|
|
"learning_rate": 3.737522551052603e-05,
|
|
"loss": 0.3398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16537125408649445,
|
|
"step": 895,
|
|
"valid_targets_mean": 5448.4,
|
|
"valid_targets_min": 3481
|
|
},
|
|
{
|
|
"epoch": 1.7517039922103215,
|
|
"grad_norm": 0.5974952266273105,
|
|
"learning_rate": 3.732697262713783e-05,
|
|
"loss": 0.3317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17743460834026337,
|
|
"step": 900,
|
|
"valid_targets_mean": 4350.5,
|
|
"valid_targets_min": 3080
|
|
},
|
|
{
|
|
"epoch": 1.7614410905550146,
|
|
"grad_norm": 0.5639957174514908,
|
|
"learning_rate": 3.727831198066791e-05,
|
|
"loss": 0.3203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1562909483909607,
|
|
"step": 905,
|
|
"valid_targets_mean": 4166.9,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 1.7711781888997078,
|
|
"grad_norm": 0.6170631485068467,
|
|
"learning_rate": 3.722924471626815e-05,
|
|
"loss": 0.3294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17804409563541412,
|
|
"step": 910,
|
|
"valid_targets_mean": 4821.1,
|
|
"valid_targets_min": 3630
|
|
},
|
|
{
|
|
"epoch": 1.7809152872444012,
|
|
"grad_norm": 0.4673158328554574,
|
|
"learning_rate": 3.7179771988659526e-05,
|
|
"loss": 0.3314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21074894070625305,
|
|
"step": 915,
|
|
"valid_targets_mean": 6402.4,
|
|
"valid_targets_min": 2390
|
|
},
|
|
{
|
|
"epoch": 1.7906523855890946,
|
|
"grad_norm": 0.5399548888910377,
|
|
"learning_rate": 3.712989496210497e-05,
|
|
"loss": 0.3253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16420333087444305,
|
|
"step": 920,
|
|
"valid_targets_mean": 4586.5,
|
|
"valid_targets_min": 3330
|
|
},
|
|
{
|
|
"epoch": 1.8003894839337877,
|
|
"grad_norm": 0.6395265590171174,
|
|
"learning_rate": 3.707961481038196e-05,
|
|
"loss": 0.344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16306795179843903,
|
|
"step": 925,
|
|
"valid_targets_mean": 3683.4,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 1.810126582278481,
|
|
"grad_norm": 0.582459961196632,
|
|
"learning_rate": 3.702893271675487e-05,
|
|
"loss": 0.3328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1666254699230194,
|
|
"step": 930,
|
|
"valid_targets_mean": 4838.0,
|
|
"valid_targets_min": 3779
|
|
},
|
|
{
|
|
"epoch": 1.8198636806231743,
|
|
"grad_norm": 0.5790814344575442,
|
|
"learning_rate": 3.697784987394716e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16780097782611847,
|
|
"step": 935,
|
|
"valid_targets_mean": 4165.1,
|
|
"valid_targets_min": 3656
|
|
},
|
|
{
|
|
"epoch": 1.8296007789678677,
|
|
"grad_norm": 0.6077344380147001,
|
|
"learning_rate": 3.692636748411329e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15567153692245483,
|
|
"step": 940,
|
|
"valid_targets_mean": 3334.8,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 1.8393378773125608,
|
|
"grad_norm": 0.5401163680599803,
|
|
"learning_rate": 3.687448675881043e-05,
|
|
"loss": 0.3369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16284507513046265,
|
|
"step": 945,
|
|
"valid_targets_mean": 4593.9,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 1.849074975657254,
|
|
"grad_norm": 0.5365013569314557,
|
|
"learning_rate": 3.682220891896995e-05,
|
|
"loss": 0.3259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18380087614059448,
|
|
"step": 950,
|
|
"valid_targets_mean": 4784.4,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 1.8588120740019474,
|
|
"grad_norm": 0.524376576602583,
|
|
"learning_rate": 3.676953519486868e-05,
|
|
"loss": 0.3252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15939775109291077,
|
|
"step": 955,
|
|
"valid_targets_mean": 4947.6,
|
|
"valid_targets_min": 3542
|
|
},
|
|
{
|
|
"epoch": 1.8685491723466408,
|
|
"grad_norm": 0.538526758648797,
|
|
"learning_rate": 3.671646682609999e-05,
|
|
"loss": 0.3282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1511407345533371,
|
|
"step": 960,
|
|
"valid_targets_mean": 3909.6,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 1.878286270691334,
|
|
"grad_norm": 0.5976696663245833,
|
|
"learning_rate": 3.666300506154455e-05,
|
|
"loss": 0.3301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15569263696670532,
|
|
"step": 965,
|
|
"valid_targets_mean": 4446.4,
|
|
"valid_targets_min": 3368
|
|
},
|
|
{
|
|
"epoch": 1.8880233690360273,
|
|
"grad_norm": 0.5183784588140344,
|
|
"learning_rate": 3.660915115934103e-05,
|
|
"loss": 0.3282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15463094413280487,
|
|
"step": 970,
|
|
"valid_targets_mean": 4198.8,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 1.8977604673807207,
|
|
"grad_norm": 0.49812783952355444,
|
|
"learning_rate": 3.6554906386856394e-05,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338355392217636,
|
|
"step": 975,
|
|
"valid_targets_mean": 4279.0,
|
|
"valid_targets_min": 3723
|
|
},
|
|
{
|
|
"epoch": 1.9074975657254138,
|
|
"grad_norm": 0.599851452429201,
|
|
"learning_rate": 3.650027202065617e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1384090781211853,
|
|
"step": 980,
|
|
"valid_targets_mean": 3824.5,
|
|
"valid_targets_min": 3166
|
|
},
|
|
{
|
|
"epoch": 1.917234664070107,
|
|
"grad_norm": 0.6980758322611927,
|
|
"learning_rate": 3.644524934647432e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15351882576942444,
|
|
"step": 985,
|
|
"valid_targets_mean": 4324.6,
|
|
"valid_targets_min": 3715
|
|
},
|
|
{
|
|
"epoch": 1.9269717624148004,
|
|
"grad_norm": 0.5080846132356038,
|
|
"learning_rate": 3.6389839659183064e-05,
|
|
"loss": 0.3338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12791533768177032,
|
|
"step": 990,
|
|
"valid_targets_mean": 3750.4,
|
|
"valid_targets_min": 3293
|
|
},
|
|
{
|
|
"epoch": 1.9367088607594938,
|
|
"grad_norm": 0.6054785224835317,
|
|
"learning_rate": 3.633404426276234e-05,
|
|
"loss": 0.33,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16290783882141113,
|
|
"step": 995,
|
|
"valid_targets_mean": 4358.2,
|
|
"valid_targets_min": 2759
|
|
},
|
|
{
|
|
"epoch": 1.946445959104187,
|
|
"grad_norm": 0.542886736319438,
|
|
"learning_rate": 3.6277864470269164e-05,
|
|
"loss": 0.3336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17033496499061584,
|
|
"step": 1000,
|
|
"valid_targets_mean": 4465.5,
|
|
"valid_targets_min": 3579
|
|
},
|
|
{
|
|
"epoch": 1.95618305744888,
|
|
"grad_norm": 0.5553705816899526,
|
|
"learning_rate": 3.622130160380668e-05,
|
|
"loss": 0.3289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1488012969493866,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4739.0,
|
|
"valid_targets_min": 3394
|
|
},
|
|
{
|
|
"epoch": 1.9659201557935735,
|
|
"grad_norm": 0.5666918487923883,
|
|
"learning_rate": 3.616435699449311e-05,
|
|
"loss": 0.3392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.193047434091568,
|
|
"step": 1010,
|
|
"valid_targets_mean": 5256.9,
|
|
"valid_targets_min": 4252
|
|
},
|
|
{
|
|
"epoch": 1.9756572541382669,
|
|
"grad_norm": 0.5702605885274633,
|
|
"learning_rate": 3.6107031982430374e-05,
|
|
"loss": 0.3192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15549835562705994,
|
|
"step": 1015,
|
|
"valid_targets_mean": 4033.8,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 1.98539435248296,
|
|
"grad_norm": 0.5839914548334156,
|
|
"learning_rate": 3.604932791667258e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1451077163219452,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3714.9,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 1.9951314508276532,
|
|
"grad_norm": 0.5179190161445116,
|
|
"learning_rate": 3.599124615519427e-05,
|
|
"loss": 0.3269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1830562800168991,
|
|
"step": 1025,
|
|
"valid_targets_mean": 5324.0,
|
|
"valid_targets_min": 3764
|
|
},
|
|
{
|
|
"epoch": 2.0038948393378773,
|
|
"grad_norm": 0.5062141740709912,
|
|
"learning_rate": 3.5932788064858454e-05,
|
|
"loss": 0.323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12352689355611801,
|
|
"step": 1030,
|
|
"valid_targets_mean": 4541.1,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 2.0136319376825704,
|
|
"grad_norm": 0.5982437604050643,
|
|
"learning_rate": 3.5873955021384464e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1479397416114807,
|
|
"step": 1035,
|
|
"valid_targets_mean": 4528.1,
|
|
"valid_targets_min": 2932
|
|
},
|
|
{
|
|
"epoch": 2.023369036027264,
|
|
"grad_norm": 0.5331262104040699,
|
|
"learning_rate": 3.5814748409315545e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12414468824863434,
|
|
"step": 1040,
|
|
"valid_targets_mean": 4133.8,
|
|
"valid_targets_min": 2274
|
|
},
|
|
{
|
|
"epoch": 2.033106134371957,
|
|
"grad_norm": 0.6100922068681757,
|
|
"learning_rate": 3.575516962198632e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1860358715057373,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4050.4,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 2.0428432327166504,
|
|
"grad_norm": 0.591663776472911,
|
|
"learning_rate": 3.5695220061489935e-05,
|
|
"loss": 0.3077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1805332452058792,
|
|
"step": 1050,
|
|
"valid_targets_mean": 4804.6,
|
|
"valid_targets_min": 3410
|
|
},
|
|
{
|
|
"epoch": 2.0525803310613435,
|
|
"grad_norm": 0.5881192373727118,
|
|
"learning_rate": 3.563490113864514e-05,
|
|
"loss": 0.3123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14763674139976501,
|
|
"step": 1055,
|
|
"valid_targets_mean": 4956.6,
|
|
"valid_targets_min": 3359
|
|
},
|
|
{
|
|
"epoch": 2.062317429406037,
|
|
"grad_norm": 0.5637764211473857,
|
|
"learning_rate": 3.557421427296302e-05,
|
|
"loss": 0.3269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1598266214132309,
|
|
"step": 1060,
|
|
"valid_targets_mean": 4222.1,
|
|
"valid_targets_min": 3046
|
|
},
|
|
{
|
|
"epoch": 2.0720545277507303,
|
|
"grad_norm": 0.5619488862108217,
|
|
"learning_rate": 3.551316089261363e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1785147488117218,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4655.8,
|
|
"valid_targets_min": 3556
|
|
},
|
|
{
|
|
"epoch": 2.0817916260954235,
|
|
"grad_norm": 0.5786529359794729,
|
|
"learning_rate": 3.545174243439236e-05,
|
|
"loss": 0.3296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13969169557094574,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3962.9,
|
|
"valid_targets_min": 2532
|
|
},
|
|
{
|
|
"epoch": 2.091528724440117,
|
|
"grad_norm": 0.5835797067821024,
|
|
"learning_rate": 3.538996034368615e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14176321029663086,
|
|
"step": 1075,
|
|
"valid_targets_mean": 5522.0,
|
|
"valid_targets_min": 3169
|
|
},
|
|
{
|
|
"epoch": 2.1012658227848102,
|
|
"grad_norm": 0.5648054676557743,
|
|
"learning_rate": 3.5327816074439445e-05,
|
|
"loss": 0.3145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17459940910339355,
|
|
"step": 1080,
|
|
"valid_targets_mean": 5075.1,
|
|
"valid_targets_min": 2892
|
|
},
|
|
{
|
|
"epoch": 2.1110029211295034,
|
|
"grad_norm": 0.6489311229405051,
|
|
"learning_rate": 3.526531108912e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1332150101661682,
|
|
"step": 1085,
|
|
"valid_targets_mean": 3368.8,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 2.1207400194741965,
|
|
"grad_norm": 0.494709264377525,
|
|
"learning_rate": 3.520244685868446e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19108489155769348,
|
|
"step": 1090,
|
|
"valid_targets_mean": 5825.9,
|
|
"valid_targets_min": 3682
|
|
},
|
|
{
|
|
"epoch": 2.13047711781889,
|
|
"grad_norm": 0.6281778006653698,
|
|
"learning_rate": 3.513922486254373e-05,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1603708565235138,
|
|
"step": 1095,
|
|
"valid_targets_mean": 3378.2,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 2.1402142161635833,
|
|
"grad_norm": 0.5106745404768562,
|
|
"learning_rate": 3.507564658852817e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15806594491004944,
|
|
"step": 1100,
|
|
"valid_targets_mean": 5692.6,
|
|
"valid_targets_min": 3474
|
|
},
|
|
{
|
|
"epoch": 2.1499513145082765,
|
|
"grad_norm": 0.5744368733768405,
|
|
"learning_rate": 3.501171353285258e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14167222380638123,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4465.9,
|
|
"valid_targets_min": 3456
|
|
},
|
|
{
|
|
"epoch": 2.1596884128529696,
|
|
"grad_norm": 0.5212004839518923,
|
|
"learning_rate": 3.494742720008102e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15998679399490356,
|
|
"step": 1110,
|
|
"valid_targets_mean": 5271.0,
|
|
"valid_targets_min": 3923
|
|
},
|
|
{
|
|
"epoch": 2.1694255111976632,
|
|
"grad_norm": 0.577219955451033,
|
|
"learning_rate": 3.488278910309131e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17543968558311462,
|
|
"step": 1115,
|
|
"valid_targets_mean": 4961.6,
|
|
"valid_targets_min": 3974
|
|
},
|
|
{
|
|
"epoch": 2.1791626095423564,
|
|
"grad_norm": 0.5594038062707075,
|
|
"learning_rate": 3.481780076303953e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13161903619766235,
|
|
"step": 1120,
|
|
"valid_targets_mean": 4576.1,
|
|
"valid_targets_min": 3519
|
|
},
|
|
{
|
|
"epoch": 2.1888997078870496,
|
|
"grad_norm": 0.5322052353521118,
|
|
"learning_rate": 3.475246370932419e-05,
|
|
"loss": 0.3072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13524478673934937,
|
|
"step": 1125,
|
|
"valid_targets_mean": 4187.4,
|
|
"valid_targets_min": 3659
|
|
},
|
|
{
|
|
"epoch": 2.1986368062317427,
|
|
"grad_norm": 0.5286884721648863,
|
|
"learning_rate": 3.468677947955019e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13178390264511108,
|
|
"step": 1130,
|
|
"valid_targets_mean": 4223.1,
|
|
"valid_targets_min": 3501
|
|
},
|
|
{
|
|
"epoch": 2.2083739045764363,
|
|
"grad_norm": 0.5918475213999014,
|
|
"learning_rate": 3.4620749619492704e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1466686874628067,
|
|
"step": 1135,
|
|
"valid_targets_mean": 4163.9,
|
|
"valid_targets_min": 3078
|
|
},
|
|
{
|
|
"epoch": 2.2181110029211295,
|
|
"grad_norm": 0.949871270636262,
|
|
"learning_rate": 3.455437568306076e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15687699615955353,
|
|
"step": 1140,
|
|
"valid_targets_mean": 4895.0,
|
|
"valid_targets_min": 3380
|
|
},
|
|
{
|
|
"epoch": 2.2278481012658227,
|
|
"grad_norm": 0.5576871609528415,
|
|
"learning_rate": 3.448765923226069e-05,
|
|
"loss": 0.3123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1623878926038742,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4699.9,
|
|
"valid_targets_min": 2929
|
|
},
|
|
{
|
|
"epoch": 2.2375851996105163,
|
|
"grad_norm": 0.6209223489561736,
|
|
"learning_rate": 3.442060183715935e-05,
|
|
"loss": 0.3164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18064065277576447,
|
|
"step": 1150,
|
|
"valid_targets_mean": 4818.5,
|
|
"valid_targets_min": 3741
|
|
},
|
|
{
|
|
"epoch": 2.2473222979552094,
|
|
"grad_norm": 0.5753288910537513,
|
|
"learning_rate": 3.4353205075847195e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1549452543258667,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3921.2,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 2.2570593962999026,
|
|
"grad_norm": 0.6114731147497137,
|
|
"learning_rate": 3.428547053440113e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16155298054218292,
|
|
"step": 1160,
|
|
"valid_targets_mean": 3402.0,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 2.2667964946445958,
|
|
"grad_norm": 0.5699595421030039,
|
|
"learning_rate": 3.421739980684718e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14192327857017517,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4583.9,
|
|
"valid_targets_min": 3196
|
|
},
|
|
{
|
|
"epoch": 2.2765335929892894,
|
|
"grad_norm": 0.5395074172555943,
|
|
"learning_rate": 3.4148994495123e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14800921082496643,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4952.8,
|
|
"valid_targets_min": 3083
|
|
},
|
|
{
|
|
"epoch": 2.2862706913339825,
|
|
"grad_norm": 0.5246294090811836,
|
|
"learning_rate": 3.408025620904012e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14312931895256042,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4658.1,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 2.2960077896786757,
|
|
"grad_norm": 0.537381381891859,
|
|
"learning_rate": 3.401118656624613e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14508512616157532,
|
|
"step": 1180,
|
|
"valid_targets_mean": 5010.2,
|
|
"valid_targets_min": 3947
|
|
},
|
|
{
|
|
"epoch": 2.305744888023369,
|
|
"grad_norm": 0.5312342214151107,
|
|
"learning_rate": 3.3941787192186584e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17704224586486816,
|
|
"step": 1185,
|
|
"valid_targets_mean": 5460.2,
|
|
"valid_targets_min": 2632
|
|
},
|
|
{
|
|
"epoch": 2.3154819863680625,
|
|
"grad_norm": 0.5439839749528556,
|
|
"learning_rate": 3.387205972006671e-05,
|
|
"loss": 0.3117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1809322088956833,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4715.5,
|
|
"valid_targets_min": 3856
|
|
},
|
|
{
|
|
"epoch": 2.3252190847127556,
|
|
"grad_norm": 0.6164616753321168,
|
|
"learning_rate": 3.3802005790813046e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13872000575065613,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3101.9,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 2.334956183057449,
|
|
"grad_norm": 0.5386625547789149,
|
|
"learning_rate": 3.373162705303476e-05,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16491888463497162,
|
|
"step": 1200,
|
|
"valid_targets_mean": 5674.6,
|
|
"valid_targets_min": 3462
|
|
},
|
|
{
|
|
"epoch": 2.344693281402142,
|
|
"grad_norm": 0.5548428906068497,
|
|
"learning_rate": 3.366092516298489e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15847313404083252,
|
|
"step": 1205,
|
|
"valid_targets_mean": 4145.0,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 2.3544303797468356,
|
|
"grad_norm": 0.5549765263309128,
|
|
"learning_rate": 3.358990178452135e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14063230156898499,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3901.8,
|
|
"valid_targets_min": 2271
|
|
},
|
|
{
|
|
"epoch": 2.3641674780915287,
|
|
"grad_norm": 0.5697552170803372,
|
|
"learning_rate": 3.351855858906778e-05,
|
|
"loss": 0.3283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1691378951072693,
|
|
"step": 1215,
|
|
"valid_targets_mean": 4227.8,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 2.373904576436222,
|
|
"grad_norm": 0.6390756448819453,
|
|
"learning_rate": 3.344689725557422e-05,
|
|
"loss": 0.3156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17019158601760864,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3547.5,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 2.3836416747809155,
|
|
"grad_norm": 0.6261244446363918,
|
|
"learning_rate": 3.337491947047757e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16938380897045135,
|
|
"step": 1225,
|
|
"valid_targets_mean": 5079.8,
|
|
"valid_targets_min": 3861
|
|
},
|
|
{
|
|
"epoch": 2.3933787731256086,
|
|
"grad_norm": 0.5859230761108937,
|
|
"learning_rate": 3.330262692766193e-05,
|
|
"loss": 0.3172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19337376952171326,
|
|
"step": 1230,
|
|
"valid_targets_mean": 5368.2,
|
|
"valid_targets_min": 3945
|
|
},
|
|
{
|
|
"epoch": 2.403115871470302,
|
|
"grad_norm": 0.680801190103709,
|
|
"learning_rate": 3.323002132841875e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16228210926055908,
|
|
"step": 1235,
|
|
"valid_targets_mean": 3661.8,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 2.412852969814995,
|
|
"grad_norm": 0.5495257553775102,
|
|
"learning_rate": 3.315710438140674e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14480452239513397,
|
|
"step": 1240,
|
|
"valid_targets_mean": 3951.8,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 2.4225900681596886,
|
|
"grad_norm": 0.5236898539896812,
|
|
"learning_rate": 3.308387780261169e-05,
|
|
"loss": 0.3138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1490398347377777,
|
|
"step": 1245,
|
|
"valid_targets_mean": 4787.9,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 2.4323271665043817,
|
|
"grad_norm": 0.5065248416968627,
|
|
"learning_rate": 3.3010343315306106e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13232021033763885,
|
|
"step": 1250,
|
|
"valid_targets_mean": 4457.4,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 2.442064264849075,
|
|
"grad_norm": 0.5715277917419054,
|
|
"learning_rate": 3.293650265000864e-05,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16457292437553406,
|
|
"step": 1255,
|
|
"valid_targets_mean": 5246.2,
|
|
"valid_targets_min": 3369
|
|
},
|
|
{
|
|
"epoch": 2.451801363193768,
|
|
"grad_norm": 0.6035199329795341,
|
|
"learning_rate": 3.286235754444335e-05,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1417427361011505,
|
|
"step": 1260,
|
|
"valid_targets_mean": 4964.5,
|
|
"valid_targets_min": 3370
|
|
},
|
|
{
|
|
"epoch": 2.4615384615384617,
|
|
"grad_norm": 0.5442116744097487,
|
|
"learning_rate": 3.278790974349882e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14931772649288177,
|
|
"step": 1265,
|
|
"valid_targets_mean": 4670.9,
|
|
"valid_targets_min": 3923
|
|
},
|
|
{
|
|
"epoch": 2.471275559883155,
|
|
"grad_norm": 0.559333461619273,
|
|
"learning_rate": 3.271316099918709e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14295917749404907,
|
|
"step": 1270,
|
|
"valid_targets_mean": 3777.9,
|
|
"valid_targets_min": 2508
|
|
},
|
|
{
|
|
"epoch": 2.481012658227848,
|
|
"grad_norm": 0.5582745118297089,
|
|
"learning_rate": 3.2638113070602436e-05,
|
|
"loss": 0.3066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1525634229183197,
|
|
"step": 1275,
|
|
"valid_targets_mean": 4427.1,
|
|
"valid_targets_min": 2023
|
|
},
|
|
{
|
|
"epoch": 2.4907497565725416,
|
|
"grad_norm": 0.4842882335527433,
|
|
"learning_rate": 3.256276772387997e-05,
|
|
"loss": 0.3021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11070093512535095,
|
|
"step": 1280,
|
|
"valid_targets_mean": 4191.4,
|
|
"valid_targets_min": 2641
|
|
},
|
|
{
|
|
"epoch": 2.5004868549172348,
|
|
"grad_norm": 0.43041408650781615,
|
|
"learning_rate": 3.248712673215405e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14166027307510376,
|
|
"step": 1285,
|
|
"valid_targets_mean": 6169.1,
|
|
"valid_targets_min": 3099
|
|
},
|
|
{
|
|
"epoch": 2.510223953261928,
|
|
"grad_norm": 0.5312349566335257,
|
|
"learning_rate": 3.241119187551661e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18950393795967102,
|
|
"step": 1290,
|
|
"valid_targets_mean": 5119.1,
|
|
"valid_targets_min": 3304
|
|
},
|
|
{
|
|
"epoch": 2.519961051606621,
|
|
"grad_norm": 0.5658429474177419,
|
|
"learning_rate": 3.233496494097521e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17059403657913208,
|
|
"step": 1295,
|
|
"valid_targets_mean": 4803.5,
|
|
"valid_targets_min": 3540
|
|
},
|
|
{
|
|
"epoch": 2.5296981499513143,
|
|
"grad_norm": 0.5807236823843408,
|
|
"learning_rate": 3.2258447722411e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14119458198547363,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3788.0,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 2.539435248296008,
|
|
"grad_norm": 0.5288057155805473,
|
|
"learning_rate": 3.2181642020536536e-05,
|
|
"loss": 0.3271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12025199830532074,
|
|
"step": 1305,
|
|
"valid_targets_mean": 4173.2,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 2.549172346640701,
|
|
"grad_norm": 0.5875797606727595,
|
|
"learning_rate": 3.210454964285333e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1568223237991333,
|
|
"step": 1310,
|
|
"valid_targets_mean": 4531.1,
|
|
"valid_targets_min": 3127
|
|
},
|
|
{
|
|
"epoch": 2.558909444985394,
|
|
"grad_norm": 0.5805603082044997,
|
|
"learning_rate": 3.202717240360941e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17503443360328674,
|
|
"step": 1315,
|
|
"valid_targets_mean": 4729.4,
|
|
"valid_targets_min": 3798
|
|
},
|
|
{
|
|
"epoch": 2.568646543330088,
|
|
"grad_norm": 0.5516596770086013,
|
|
"learning_rate": 3.194951212375654e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15626661479473114,
|
|
"step": 1320,
|
|
"valid_targets_mean": 4995.6,
|
|
"valid_targets_min": 3708
|
|
},
|
|
{
|
|
"epoch": 2.578383641674781,
|
|
"grad_norm": 0.5398796088767358,
|
|
"learning_rate": 3.18715706309074e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1588321179151535,
|
|
"step": 1325,
|
|
"valid_targets_mean": 4792.9,
|
|
"valid_targets_min": 3465
|
|
},
|
|
{
|
|
"epoch": 2.588120740019474,
|
|
"grad_norm": 0.5827115145582986,
|
|
"learning_rate": 3.17933497592926e-05,
|
|
"loss": 0.2977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13239231705665588,
|
|
"step": 1330,
|
|
"valid_targets_mean": 4697.9,
|
|
"valid_targets_min": 3551
|
|
},
|
|
{
|
|
"epoch": 2.5978578383641677,
|
|
"grad_norm": 0.5311381122705868,
|
|
"learning_rate": 3.171485134971748e-05,
|
|
"loss": 0.3241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14801988005638123,
|
|
"step": 1335,
|
|
"valid_targets_mean": 4075.0,
|
|
"valid_targets_min": 3568
|
|
},
|
|
{
|
|
"epoch": 2.607594936708861,
|
|
"grad_norm": 0.5912447143993821,
|
|
"learning_rate": 3.163607724951877e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14190785586833954,
|
|
"step": 1340,
|
|
"valid_targets_mean": 4143.0,
|
|
"valid_targets_min": 3443
|
|
},
|
|
{
|
|
"epoch": 2.617332035053554,
|
|
"grad_norm": 0.5384191302770057,
|
|
"learning_rate": 3.155702931252119e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1374586820602417,
|
|
"step": 1345,
|
|
"valid_targets_mean": 4380.6,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 2.627069133398247,
|
|
"grad_norm": 0.5562861038499344,
|
|
"learning_rate": 3.147770939899376e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1505712866783142,
|
|
"step": 1350,
|
|
"valid_targets_mean": 4433.2,
|
|
"valid_targets_min": 3710
|
|
},
|
|
{
|
|
"epoch": 2.6368062317429404,
|
|
"grad_norm": 0.5389095108329163,
|
|
"learning_rate": 3.1398119375606046e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13420599699020386,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3818.4,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 2.646543330087634,
|
|
"grad_norm": 0.5844352106901917,
|
|
"learning_rate": 3.13182611153842e-05,
|
|
"loss": 0.3081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18089349567890167,
|
|
"step": 1360,
|
|
"valid_targets_mean": 4613.8,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 2.656280428432327,
|
|
"grad_norm": 0.538351884420079,
|
|
"learning_rate": 3.1238136497666944e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16512431204319,
|
|
"step": 1365,
|
|
"valid_targets_mean": 5107.4,
|
|
"valid_targets_min": 4051
|
|
},
|
|
{
|
|
"epoch": 2.6660175267770203,
|
|
"grad_norm": 1.1550577087046785,
|
|
"learning_rate": 3.115774740806128e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15896329283714294,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3995.5,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 2.675754625121714,
|
|
"grad_norm": 0.582180853176557,
|
|
"learning_rate": 3.1077095738398153e-05,
|
|
"loss": 0.3033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17014168202877045,
|
|
"step": 1375,
|
|
"valid_targets_mean": 4367.5,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 2.685491723466407,
|
|
"grad_norm": 0.5934598260961282,
|
|
"learning_rate": 3.099618338668791e-05,
|
|
"loss": 0.3131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12493668496608734,
|
|
"step": 1380,
|
|
"valid_targets_mean": 4327.0,
|
|
"valid_targets_min": 3714
|
|
},
|
|
{
|
|
"epoch": 2.6952288218111002,
|
|
"grad_norm": 0.5259583870899158,
|
|
"learning_rate": 3.0915012257075635e-05,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14063671231269836,
|
|
"step": 1385,
|
|
"valid_targets_mean": 4856.4,
|
|
"valid_targets_min": 3505
|
|
},
|
|
{
|
|
"epoch": 2.704965920155794,
|
|
"grad_norm": 0.5535675261760915,
|
|
"learning_rate": 3.083358425979637e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16535528004169464,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4588.6,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 2.714703018500487,
|
|
"grad_norm": 0.5361465180818037,
|
|
"learning_rate": 3.075190131113009e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1534717082977295,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4369.6,
|
|
"valid_targets_min": 3242
|
|
},
|
|
{
|
|
"epoch": 2.72444011684518,
|
|
"grad_norm": 0.5351953528412584,
|
|
"learning_rate": 3.0669965333356695e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12435472011566162,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3751.4,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 2.7341772151898733,
|
|
"grad_norm": 0.5756275556181341,
|
|
"learning_rate": 3.058777825471071e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13730017840862274,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3602.5,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 2.7439143135345665,
|
|
"grad_norm": 0.5469536506954087,
|
|
"learning_rate": 3.0505342009335898e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13161402940750122,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3866.4,
|
|
"valid_targets_min": 3365
|
|
},
|
|
{
|
|
"epoch": 2.75365141187926,
|
|
"grad_norm": 0.5827655936407213,
|
|
"learning_rate": 3.0422658537239823e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13761383295059204,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4115.8,
|
|
"valid_targets_min": 3294
|
|
},
|
|
{
|
|
"epoch": 2.7633885102239533,
|
|
"grad_norm": 0.5649461258398,
|
|
"learning_rate": 3.0339729784248103e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15500204265117645,
|
|
"step": 1420,
|
|
"valid_targets_mean": 4271.4,
|
|
"valid_targets_min": 3406
|
|
},
|
|
{
|
|
"epoch": 2.7731256085686464,
|
|
"grad_norm": 0.5793711003701393,
|
|
"learning_rate": 3.025655770195866e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1798117309808731,
|
|
"step": 1425,
|
|
"valid_targets_mean": 4510.8,
|
|
"valid_targets_min": 2712
|
|
},
|
|
{
|
|
"epoch": 2.78286270691334,
|
|
"grad_norm": 0.5177877954454848,
|
|
"learning_rate": 3.0173144247695796e-05,
|
|
"loss": 0.32,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1714254915714264,
|
|
"step": 1430,
|
|
"valid_targets_mean": 5389.1,
|
|
"valid_targets_min": 3922
|
|
},
|
|
{
|
|
"epoch": 2.792599805258033,
|
|
"grad_norm": 0.49950463829089403,
|
|
"learning_rate": 3.008949138446413e-05,
|
|
"loss": 0.3015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1443287432193756,
|
|
"step": 1435,
|
|
"valid_targets_mean": 4833.9,
|
|
"valid_targets_min": 3476
|
|
},
|
|
{
|
|
"epoch": 2.8023369036027264,
|
|
"grad_norm": 0.5247189977861658,
|
|
"learning_rate": 3.0005601080902376e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17010296881198883,
|
|
"step": 1440,
|
|
"valid_targets_mean": 5573.1,
|
|
"valid_targets_min": 3676
|
|
},
|
|
{
|
|
"epoch": 2.8120740019474195,
|
|
"grad_norm": 0.5612910980221456,
|
|
"learning_rate": 2.9921475311237055e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17654839158058167,
|
|
"step": 1445,
|
|
"valid_targets_mean": 4656.1,
|
|
"valid_targets_min": 3715
|
|
},
|
|
{
|
|
"epoch": 2.8218111002921127,
|
|
"grad_norm": 0.5074720786944287,
|
|
"learning_rate": 2.9837116055236e-05,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14683783054351807,
|
|
"step": 1450,
|
|
"valid_targets_mean": 4678.2,
|
|
"valid_targets_min": 3345
|
|
},
|
|
{
|
|
"epoch": 2.8315481986368063,
|
|
"grad_norm": 0.6824579003436837,
|
|
"learning_rate": 2.975252529816178e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13158957660198212,
|
|
"step": 1455,
|
|
"valid_targets_mean": 3765.4,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 2.8412852969814995,
|
|
"grad_norm": 0.5591052774964402,
|
|
"learning_rate": 2.9667705030724976e-05,
|
|
"loss": 0.3172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2231038212776184,
|
|
"step": 1460,
|
|
"valid_targets_mean": 4964.5,
|
|
"valid_targets_min": 3417
|
|
},
|
|
{
|
|
"epoch": 2.8510223953261926,
|
|
"grad_norm": 0.5562122280848797,
|
|
"learning_rate": 2.9582657249037335e-05,
|
|
"loss": 0.3067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1449592113494873,
|
|
"step": 1465,
|
|
"valid_targets_mean": 4370.0,
|
|
"valid_targets_min": 3510
|
|
},
|
|
{
|
|
"epoch": 2.8607594936708862,
|
|
"grad_norm": 0.568215618071862,
|
|
"learning_rate": 2.9497383954564807e-05,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1569761335849762,
|
|
"step": 1470,
|
|
"valid_targets_mean": 4728.9,
|
|
"valid_targets_min": 2500
|
|
},
|
|
{
|
|
"epoch": 2.8704965920155794,
|
|
"grad_norm": 0.49601170795295146,
|
|
"learning_rate": 2.941188715408041e-05,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1577417403459549,
|
|
"step": 1475,
|
|
"valid_targets_mean": 5978.9,
|
|
"valid_targets_min": 3426
|
|
},
|
|
{
|
|
"epoch": 2.8802336903602725,
|
|
"grad_norm": 0.5893755134602132,
|
|
"learning_rate": 2.9326168859617044e-05,
|
|
"loss": 0.3135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18798495829105377,
|
|
"step": 1480,
|
|
"valid_targets_mean": 4515.2,
|
|
"valid_targets_min": 2565
|
|
},
|
|
{
|
|
"epoch": 2.889970788704966,
|
|
"grad_norm": 0.5602906459329979,
|
|
"learning_rate": 2.9240231088420112e-05,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18006697297096252,
|
|
"step": 1485,
|
|
"valid_targets_mean": 5029.5,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 2.8997078870496593,
|
|
"grad_norm": 0.5646142998572641,
|
|
"learning_rate": 2.9154075862900047e-05,
|
|
"loss": 0.3128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17120343446731567,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4255.9,
|
|
"valid_targets_min": 3711
|
|
},
|
|
{
|
|
"epoch": 2.9094449853943525,
|
|
"grad_norm": 0.564805662671005,
|
|
"learning_rate": 2.9067705210584764e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1788097769021988,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4762.5,
|
|
"valid_targets_min": 3633
|
|
},
|
|
{
|
|
"epoch": 2.9191820837390456,
|
|
"grad_norm": 0.6174628930697116,
|
|
"learning_rate": 2.898112116407186e-05,
|
|
"loss": 0.3172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16867046058177948,
|
|
"step": 1500,
|
|
"valid_targets_mean": 4821.1,
|
|
"valid_targets_min": 3266
|
|
},
|
|
{
|
|
"epoch": 2.928919182083739,
|
|
"grad_norm": 0.5749250150204451,
|
|
"learning_rate": 2.889432576098087e-05,
|
|
"loss": 0.3255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14719168841838837,
|
|
"step": 1505,
|
|
"valid_targets_mean": 4338.9,
|
|
"valid_targets_min": 3455
|
|
},
|
|
{
|
|
"epoch": 2.9386562804284324,
|
|
"grad_norm": 0.5750089330289814,
|
|
"learning_rate": 2.8807321043905253e-05,
|
|
"loss": 0.3101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14039798080921173,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3943.5,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 2.9483933787731256,
|
|
"grad_norm": 0.5128539306501144,
|
|
"learning_rate": 2.872010906036435e-05,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1284104585647583,
|
|
"step": 1515,
|
|
"valid_targets_mean": 4067.5,
|
|
"valid_targets_min": 2981
|
|
},
|
|
{
|
|
"epoch": 2.9581304771178187,
|
|
"grad_norm": 0.6991441353480424,
|
|
"learning_rate": 2.863269186275519e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14474327862262726,
|
|
"step": 1520,
|
|
"valid_targets_mean": 3958.2,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 2.9678675754625123,
|
|
"grad_norm": 0.5849315103365564,
|
|
"learning_rate": 2.8545071508304193e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14652173221111298,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3729.0,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 2.9776046738072055,
|
|
"grad_norm": 0.49959866035083783,
|
|
"learning_rate": 2.8457250059018765e-05,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1424277126789093,
|
|
"step": 1530,
|
|
"valid_targets_mean": 4692.8,
|
|
"valid_targets_min": 3413
|
|
},
|
|
{
|
|
"epoch": 2.9873417721518987,
|
|
"grad_norm": 0.5461571110524959,
|
|
"learning_rate": 2.836922958163875e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14327436685562134,
|
|
"step": 1535,
|
|
"valid_targets_mean": 4981.6,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 2.9970788704965923,
|
|
"grad_norm": 0.5916489668113946,
|
|
"learning_rate": 2.8281012147587816e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1285589635372162,
|
|
"step": 1540,
|
|
"valid_targets_mean": 3224.1,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 3.005842259006816,
|
|
"grad_norm": 0.5260468736879659,
|
|
"learning_rate": 2.8192599832924677e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1306810975074768,
|
|
"step": 1545,
|
|
"valid_targets_mean": 4352.6,
|
|
"valid_targets_min": 3437
|
|
},
|
|
{
|
|
"epoch": 3.015579357351509,
|
|
"grad_norm": 0.613461419110215,
|
|
"learning_rate": 2.810399471829429e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1785038709640503,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4464.5,
|
|
"valid_targets_min": 1065
|
|
},
|
|
{
|
|
"epoch": 3.0253164556962027,
|
|
"grad_norm": 0.5673533735219711,
|
|
"learning_rate": 2.8015198888878835e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14384561777114868,
|
|
"step": 1555,
|
|
"valid_targets_mean": 4750.8,
|
|
"valid_targets_min": 3953
|
|
},
|
|
{
|
|
"epoch": 3.035053554040896,
|
|
"grad_norm": 0.565253904430208,
|
|
"learning_rate": 2.7926214434348645e-05,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14825423061847687,
|
|
"step": 1560,
|
|
"valid_targets_mean": 4587.1,
|
|
"valid_targets_min": 3610
|
|
},
|
|
{
|
|
"epoch": 3.044790652385589,
|
|
"grad_norm": 0.5577420057055872,
|
|
"learning_rate": 2.78370434488131e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13824856281280518,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3871.2,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 3.0545277507302826,
|
|
"grad_norm": 0.5137625048643271,
|
|
"learning_rate": 2.7747688030771253e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14553338289260864,
|
|
"step": 1570,
|
|
"valid_targets_mean": 4825.0,
|
|
"valid_targets_min": 3620
|
|
},
|
|
{
|
|
"epoch": 3.0642648490749758,
|
|
"grad_norm": 0.5968584656427234,
|
|
"learning_rate": 2.765815028306249e-05,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15466415882110596,
|
|
"step": 1575,
|
|
"valid_targets_mean": 4244.1,
|
|
"valid_targets_min": 1246
|
|
},
|
|
{
|
|
"epoch": 3.074001947419669,
|
|
"grad_norm": 0.5701188989154224,
|
|
"learning_rate": 2.756843231281707e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17137470841407776,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4704.4,
|
|
"valid_targets_min": 3481
|
|
},
|
|
{
|
|
"epoch": 3.083739045764362,
|
|
"grad_norm": 0.5932389465647907,
|
|
"learning_rate": 2.747853623140646e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1456705778837204,
|
|
"step": 1585,
|
|
"valid_targets_mean": 4333.1,
|
|
"valid_targets_min": 3412
|
|
},
|
|
{
|
|
"epoch": 3.0934761441090557,
|
|
"grad_norm": 1.1639637682347415,
|
|
"learning_rate": 2.7388464154393735e-05,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11643892526626587,
|
|
"step": 1590,
|
|
"valid_targets_mean": 3620.2,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 3.103213242453749,
|
|
"grad_norm": 0.5443951877519766,
|
|
"learning_rate": 2.7298218201483725e-05,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13159920275211334,
|
|
"step": 1595,
|
|
"valid_targets_mean": 4375.2,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 3.112950340798442,
|
|
"grad_norm": 0.6565290190153463,
|
|
"learning_rate": 2.7207800496473157e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1461639404296875,
|
|
"step": 1600,
|
|
"valid_targets_mean": 3693.1,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 3.122687439143135,
|
|
"grad_norm": 0.5838526029422603,
|
|
"learning_rate": 2.7117213167200696e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16136851906776428,
|
|
"step": 1605,
|
|
"valid_targets_mean": 3947.2,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 3.132424537487829,
|
|
"grad_norm": 0.6509334579262541,
|
|
"learning_rate": 2.7026458345496826e-05,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19014398753643036,
|
|
"step": 1610,
|
|
"valid_targets_mean": 4305.9,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 3.142161635832522,
|
|
"grad_norm": 0.6046997299463505,
|
|
"learning_rate": 2.6935538167133702e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17839956283569336,
|
|
"step": 1615,
|
|
"valid_targets_mean": 4219.6,
|
|
"valid_targets_min": 2892
|
|
},
|
|
{
|
|
"epoch": 3.151898734177215,
|
|
"grad_norm": 0.543645233726318,
|
|
"learning_rate": 2.6844454771774898e-05,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13276870548725128,
|
|
"step": 1620,
|
|
"valid_targets_mean": 5300.0,
|
|
"valid_targets_min": 3132
|
|
},
|
|
{
|
|
"epoch": 3.1616358325219083,
|
|
"grad_norm": 0.5582937777224337,
|
|
"learning_rate": 2.6753210302925044e-05,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18694062530994415,
|
|
"step": 1625,
|
|
"valid_targets_mean": 5042.0,
|
|
"valid_targets_min": 1463
|
|
},
|
|
{
|
|
"epoch": 3.171372930866602,
|
|
"grad_norm": 0.5302062621160738,
|
|
"learning_rate": 2.6661806907879385e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16426973044872284,
|
|
"step": 1630,
|
|
"valid_targets_mean": 5698.5,
|
|
"valid_targets_min": 3997
|
|
},
|
|
{
|
|
"epoch": 3.181110029211295,
|
|
"grad_norm": 0.5218898199389939,
|
|
"learning_rate": 2.6570246737673216e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338643878698349,
|
|
"step": 1635,
|
|
"valid_targets_mean": 5401.1,
|
|
"valid_targets_min": 2999
|
|
},
|
|
{
|
|
"epoch": 3.190847127555988,
|
|
"grad_norm": 0.5457509565915137,
|
|
"learning_rate": 2.6478531947031325e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1352064609527588,
|
|
"step": 1640,
|
|
"valid_targets_mean": 4577.5,
|
|
"valid_targets_min": 3313
|
|
},
|
|
{
|
|
"epoch": 3.2005842259006814,
|
|
"grad_norm": 0.5221241813266677,
|
|
"learning_rate": 2.638666469431723e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16360121965408325,
|
|
"step": 1645,
|
|
"valid_targets_mean": 6039.2,
|
|
"valid_targets_min": 4422
|
|
},
|
|
{
|
|
"epoch": 3.210321324245375,
|
|
"grad_norm": 0.5383552979573822,
|
|
"learning_rate": 2.6294647141482413e-05,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1205267682671547,
|
|
"step": 1650,
|
|
"valid_targets_mean": 3710.4,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 3.220058422590068,
|
|
"grad_norm": 0.5510664272717279,
|
|
"learning_rate": 2.6202481454015426e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15154537558555603,
|
|
"step": 1655,
|
|
"valid_targets_mean": 4707.9,
|
|
"valid_targets_min": 3403
|
|
},
|
|
{
|
|
"epoch": 3.2297955209347613,
|
|
"grad_norm": 0.6134241750336596,
|
|
"learning_rate": 2.6110169800890937e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1724124699831009,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4382.6,
|
|
"valid_targets_min": 2870
|
|
},
|
|
{
|
|
"epoch": 3.239532619279455,
|
|
"grad_norm": 0.7623792747817205,
|
|
"learning_rate": 2.6017714354518696e-05,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14434286952018738,
|
|
"step": 1665,
|
|
"valid_targets_mean": 4364.2,
|
|
"valid_targets_min": 3705
|
|
},
|
|
{
|
|
"epoch": 3.249269717624148,
|
|
"grad_norm": 0.5777416446525403,
|
|
"learning_rate": 2.592511729069239e-05,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1274181604385376,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3835.0,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 3.2590068159688412,
|
|
"grad_norm": 0.5485897640559537,
|
|
"learning_rate": 2.5832380788538457e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13670822978019714,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3975.5,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 3.2687439143135344,
|
|
"grad_norm": 0.5198559660018749,
|
|
"learning_rate": 2.573950703046479e-05,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15408968925476074,
|
|
"step": 1680,
|
|
"valid_targets_mean": 5415.4,
|
|
"valid_targets_min": 3539
|
|
},
|
|
{
|
|
"epoch": 3.278481012658228,
|
|
"grad_norm": 0.5719344459529572,
|
|
"learning_rate": 2.564649820210939e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14029723405838013,
|
|
"step": 1685,
|
|
"valid_targets_mean": 4609.9,
|
|
"valid_targets_min": 2505
|
|
},
|
|
{
|
|
"epoch": 3.288218111002921,
|
|
"grad_norm": 0.5380223254879148,
|
|
"learning_rate": 2.5553356492288933e-05,
|
|
"loss": 0.2848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14935359358787537,
|
|
"step": 1690,
|
|
"valid_targets_mean": 5267.2,
|
|
"valid_targets_min": 3694
|
|
},
|
|
{
|
|
"epoch": 3.2979552093476143,
|
|
"grad_norm": 0.5084028591808477,
|
|
"learning_rate": 2.5460084092947233e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12564489245414734,
|
|
"step": 1695,
|
|
"valid_targets_mean": 4477.4,
|
|
"valid_targets_min": 3452
|
|
},
|
|
{
|
|
"epoch": 3.3076923076923075,
|
|
"grad_norm": 0.8648494099648836,
|
|
"learning_rate": 2.5366683199103692e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16778680682182312,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4279.0,
|
|
"valid_targets_min": 3471
|
|
},
|
|
{
|
|
"epoch": 3.317429406037001,
|
|
"grad_norm": 0.6045428648420095,
|
|
"learning_rate": 2.5273156008801614e-05,
|
|
"loss": 0.3027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14208298921585083,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3909.2,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 3.3271665043816943,
|
|
"grad_norm": 0.581595997149262,
|
|
"learning_rate": 2.517950472305651e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1563170850276947,
|
|
"step": 1710,
|
|
"valid_targets_mean": 4329.1,
|
|
"valid_targets_min": 3063
|
|
},
|
|
{
|
|
"epoch": 3.3369036027263874,
|
|
"grad_norm": 0.5940529837089468,
|
|
"learning_rate": 2.508573154580427e-05,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14828792214393616,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4038.0,
|
|
"valid_targets_min": 1077
|
|
},
|
|
{
|
|
"epoch": 3.346640701071081,
|
|
"grad_norm": 0.5083181698359721,
|
|
"learning_rate": 2.4991838683849306e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15568482875823975,
|
|
"step": 1720,
|
|
"valid_targets_mean": 5898.8,
|
|
"valid_targets_min": 3812
|
|
},
|
|
{
|
|
"epoch": 3.356377799415774,
|
|
"grad_norm": 0.5645820229503116,
|
|
"learning_rate": 2.4897828346812637e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12027935683727264,
|
|
"step": 1725,
|
|
"valid_targets_mean": 4065.9,
|
|
"valid_targets_min": 2764
|
|
},
|
|
{
|
|
"epoch": 3.3661148977604674,
|
|
"grad_norm": 0.6090201084886291,
|
|
"learning_rate": 2.4803702747079858e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14703837037086487,
|
|
"step": 1730,
|
|
"valid_targets_mean": 4114.9,
|
|
"valid_targets_min": 2407
|
|
},
|
|
{
|
|
"epoch": 3.3758519961051605,
|
|
"grad_norm": 0.5764853595735026,
|
|
"learning_rate": 2.4709464099749093e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11748038232326508,
|
|
"step": 1735,
|
|
"valid_targets_mean": 3635.8,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 3.385589094449854,
|
|
"grad_norm": 0.6258474949813664,
|
|
"learning_rate": 2.4615114622578875e-05,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15654751658439636,
|
|
"step": 1740,
|
|
"valid_targets_mean": 3771.4,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 3.3953261927945473,
|
|
"grad_norm": 0.5368536640282213,
|
|
"learning_rate": 2.4520656535935927e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12668928503990173,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4120.4,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 3.4050632911392404,
|
|
"grad_norm": 0.5442086380240969,
|
|
"learning_rate": 2.4426092062742933e-05,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1398339718580246,
|
|
"step": 1750,
|
|
"valid_targets_mean": 4653.0,
|
|
"valid_targets_min": 3225
|
|
},
|
|
{
|
|
"epoch": 3.4148003894839336,
|
|
"grad_norm": 0.5526100632903542,
|
|
"learning_rate": 2.4331423428426227e-05,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12823820114135742,
|
|
"step": 1755,
|
|
"valid_targets_mean": 4305.2,
|
|
"valid_targets_min": 3318
|
|
},
|
|
{
|
|
"epoch": 3.424537487828627,
|
|
"grad_norm": 0.5532058792386103,
|
|
"learning_rate": 2.4236652860863396e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14938613772392273,
|
|
"step": 1760,
|
|
"valid_targets_mean": 4481.2,
|
|
"valid_targets_min": 3624
|
|
},
|
|
{
|
|
"epoch": 3.4342745861733204,
|
|
"grad_norm": 0.5374042900443554,
|
|
"learning_rate": 2.414178259033087e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12356485426425934,
|
|
"step": 1765,
|
|
"valid_targets_mean": 4096.5,
|
|
"valid_targets_min": 2509
|
|
},
|
|
{
|
|
"epoch": 3.4440116845180135,
|
|
"grad_norm": 0.547795350043253,
|
|
"learning_rate": 2.404681484945143e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12814772129058838,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4457.9,
|
|
"valid_targets_min": 3025
|
|
},
|
|
{
|
|
"epoch": 3.453748782862707,
|
|
"grad_norm": 0.573682880868741,
|
|
"learning_rate": 2.3951751873141683e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16075778007507324,
|
|
"step": 1775,
|
|
"valid_targets_mean": 4378.9,
|
|
"valid_targets_min": 3659
|
|
},
|
|
{
|
|
"epoch": 3.4634858812074003,
|
|
"grad_norm": 0.6700701681577663,
|
|
"learning_rate": 2.385659589855944e-05,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12645533680915833,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3833.9,
|
|
"valid_targets_min": 3130
|
|
},
|
|
{
|
|
"epoch": 3.4732229795520935,
|
|
"grad_norm": 0.516920158125621,
|
|
"learning_rate": 2.3761349165051075e-05,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13252399861812592,
|
|
"step": 1785,
|
|
"valid_targets_mean": 4755.9,
|
|
"valid_targets_min": 3471
|
|
},
|
|
{
|
|
"epoch": 3.4829600778967866,
|
|
"grad_norm": 0.5407516671910045,
|
|
"learning_rate": 2.366601391409884e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15991093218326569,
|
|
"step": 1790,
|
|
"valid_targets_mean": 4990.1,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 3.49269717624148,
|
|
"grad_norm": 0.5399070345981802,
|
|
"learning_rate": 2.35705923892681e-05,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12803298234939575,
|
|
"step": 1795,
|
|
"valid_targets_mean": 3880.9,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 3.5024342745861734,
|
|
"grad_norm": 0.6051604699248941,
|
|
"learning_rate": 2.3475086836154547e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1268686205148697,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3766.1,
|
|
"valid_targets_min": 3080
|
|
},
|
|
{
|
|
"epoch": 3.5121713729308666,
|
|
"grad_norm": 0.5592469483450967,
|
|
"learning_rate": 2.3379499502331347e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1607351005077362,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4104.2,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 3.5219084712755597,
|
|
"grad_norm": 0.5472048813949635,
|
|
"learning_rate": 2.328383263729624e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1516656130552292,
|
|
"step": 1810,
|
|
"valid_targets_mean": 4345.8,
|
|
"valid_targets_min": 3417
|
|
},
|
|
{
|
|
"epoch": 3.5316455696202533,
|
|
"grad_norm": 0.5660511838449515,
|
|
"learning_rate": 2.318808849241861e-05,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17257198691368103,
|
|
"step": 1815,
|
|
"valid_targets_mean": 4635.9,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 3.5413826679649465,
|
|
"grad_norm": 0.6086328989938601,
|
|
"learning_rate": 2.3092269320886497e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1785932332277298,
|
|
"step": 1820,
|
|
"valid_targets_mean": 4519.5,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 3.5511197663096397,
|
|
"grad_norm": 0.560412233251361,
|
|
"learning_rate": 2.299637737765358e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.138437420129776,
|
|
"step": 1825,
|
|
"valid_targets_mean": 4574.2,
|
|
"valid_targets_min": 3341
|
|
},
|
|
{
|
|
"epoch": 3.5608568646543333,
|
|
"grad_norm": 0.5688189435596688,
|
|
"learning_rate": 2.290041491938612e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13171690702438354,
|
|
"step": 1830,
|
|
"valid_targets_mean": 3840.8,
|
|
"valid_targets_min": 3129
|
|
},
|
|
{
|
|
"epoch": 3.5705939629990264,
|
|
"grad_norm": 0.5321971426477093,
|
|
"learning_rate": 2.280438420440981e-05,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15251140296459198,
|
|
"step": 1835,
|
|
"valid_targets_mean": 5420.9,
|
|
"valid_targets_min": 3663
|
|
},
|
|
{
|
|
"epoch": 3.5803310613437196,
|
|
"grad_norm": 0.5278436022313175,
|
|
"learning_rate": 2.2708287492656675e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1386672854423523,
|
|
"step": 1840,
|
|
"valid_targets_mean": 5215.6,
|
|
"valid_targets_min": 2875
|
|
},
|
|
{
|
|
"epoch": 3.5900681596884128,
|
|
"grad_norm": 0.598027527879423,
|
|
"learning_rate": 2.2612127045611867e-05,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19816479086875916,
|
|
"step": 1845,
|
|
"valid_targets_mean": 4695.1,
|
|
"valid_targets_min": 2115
|
|
},
|
|
{
|
|
"epoch": 3.599805258033106,
|
|
"grad_norm": 0.5860358117843885,
|
|
"learning_rate": 2.2515905126260455e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15322990715503693,
|
|
"step": 1850,
|
|
"valid_targets_mean": 4438.0,
|
|
"valid_targets_min": 1823
|
|
},
|
|
{
|
|
"epoch": 3.6095423563777995,
|
|
"grad_norm": 0.5690131258642075,
|
|
"learning_rate": 2.2419623999034146e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14129850268363953,
|
|
"step": 1855,
|
|
"valid_targets_mean": 4125.4,
|
|
"valid_targets_min": 2981
|
|
},
|
|
{
|
|
"epoch": 3.6192794547224927,
|
|
"grad_norm": 0.552589033864262,
|
|
"learning_rate": 2.232328592975802e-05,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1523721069097519,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4640.0,
|
|
"valid_targets_min": 3695
|
|
},
|
|
{
|
|
"epoch": 3.629016553067186,
|
|
"grad_norm": 0.5140072456027895,
|
|
"learning_rate": 2.2226893185597204e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16340777277946472,
|
|
"step": 1865,
|
|
"valid_targets_mean": 5004.8,
|
|
"valid_targets_min": 3676
|
|
},
|
|
{
|
|
"epoch": 3.6387536514118795,
|
|
"grad_norm": 0.5551126966184123,
|
|
"learning_rate": 2.213044803500351e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14633576571941376,
|
|
"step": 1870,
|
|
"valid_targets_mean": 4514.2,
|
|
"valid_targets_min": 3420
|
|
},
|
|
{
|
|
"epoch": 3.6484907497565726,
|
|
"grad_norm": 0.5296584477717181,
|
|
"learning_rate": 2.2033952747662052e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16924160718917847,
|
|
"step": 1875,
|
|
"valid_targets_mean": 5243.0,
|
|
"valid_targets_min": 3647
|
|
},
|
|
{
|
|
"epoch": 3.6582278481012658,
|
|
"grad_norm": 0.5411777729687121,
|
|
"learning_rate": 2.193740959443782e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16427026689052582,
|
|
"step": 1880,
|
|
"valid_targets_mean": 5358.1,
|
|
"valid_targets_min": 3712
|
|
},
|
|
{
|
|
"epoch": 3.667964946445959,
|
|
"grad_norm": 0.6392163613800248,
|
|
"learning_rate": 2.1840820847322274e-05,
|
|
"loss": 0.3019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14590109884738922,
|
|
"step": 1885,
|
|
"valid_targets_mean": 3963.8,
|
|
"valid_targets_min": 2978
|
|
},
|
|
{
|
|
"epoch": 3.6777020447906525,
|
|
"grad_norm": 0.5446464017065908,
|
|
"learning_rate": 2.1744188779379844e-05,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11845506727695465,
|
|
"step": 1890,
|
|
"valid_targets_mean": 3757.0,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 3.6874391431353457,
|
|
"grad_norm": 0.6307138115490523,
|
|
"learning_rate": 2.1647515664694455e-05,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.155528262257576,
|
|
"step": 1895,
|
|
"valid_targets_mean": 3430.5,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 3.697176241480039,
|
|
"grad_norm": 0.4966267118193543,
|
|
"learning_rate": 2.155080377831599e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11575086414813995,
|
|
"step": 1900,
|
|
"valid_targets_mean": 5169.8,
|
|
"valid_targets_min": 3527
|
|
},
|
|
{
|
|
"epoch": 3.706913339824732,
|
|
"grad_norm": 0.6032965411677166,
|
|
"learning_rate": 2.145405539620677e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13318978250026703,
|
|
"step": 1905,
|
|
"valid_targets_mean": 3789.5,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 3.7166504381694256,
|
|
"grad_norm": 0.4879618884378425,
|
|
"learning_rate": 2.135727279518798e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11494357138872147,
|
|
"step": 1910,
|
|
"valid_targets_mean": 4792.5,
|
|
"valid_targets_min": 3024
|
|
},
|
|
{
|
|
"epoch": 3.726387536514119,
|
|
"grad_norm": 0.6041615973146506,
|
|
"learning_rate": 2.1260458252886117e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15611885488033295,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4290.2,
|
|
"valid_targets_min": 3156
|
|
},
|
|
{
|
|
"epoch": 3.736124634858812,
|
|
"grad_norm": 0.5471319297852897,
|
|
"learning_rate": 2.1163614047679322e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1619880199432373,
|
|
"step": 1920,
|
|
"valid_targets_mean": 5631.6,
|
|
"valid_targets_min": 3536
|
|
},
|
|
{
|
|
"epoch": 3.7458617332035056,
|
|
"grad_norm": 0.5433298412415333,
|
|
"learning_rate": 2.1066742458643853e-05,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12154314666986465,
|
|
"step": 1925,
|
|
"valid_targets_mean": 3650.4,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 3.7555988315481987,
|
|
"grad_norm": 0.5467862498289342,
|
|
"learning_rate": 2.096984576550037e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14659389853477478,
|
|
"step": 1930,
|
|
"valid_targets_mean": 5092.9,
|
|
"valid_targets_min": 2971
|
|
},
|
|
{
|
|
"epoch": 3.765335929892892,
|
|
"grad_norm": 0.5483320511170172,
|
|
"learning_rate": 2.087292624856033e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13486263155937195,
|
|
"step": 1935,
|
|
"valid_targets_mean": 4104.2,
|
|
"valid_targets_min": 1752
|
|
},
|
|
{
|
|
"epoch": 3.775073028237585,
|
|
"grad_norm": 0.5817233307820583,
|
|
"learning_rate": 2.0775986188672324e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15036016702651978,
|
|
"step": 1940,
|
|
"valid_targets_mean": 4762.9,
|
|
"valid_targets_min": 4132
|
|
},
|
|
{
|
|
"epoch": 3.7848101265822782,
|
|
"grad_norm": 0.6344443115544806,
|
|
"learning_rate": 2.067902786716836e-05,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14191430807113647,
|
|
"step": 1945,
|
|
"valid_targets_mean": 4761.0,
|
|
"valid_targets_min": 2819
|
|
},
|
|
{
|
|
"epoch": 3.794547224926972,
|
|
"grad_norm": 0.609004672155014,
|
|
"learning_rate": 2.0582053565810242e-05,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1991158127784729,
|
|
"step": 1950,
|
|
"valid_targets_mean": 4473.6,
|
|
"valid_targets_min": 3793
|
|
},
|
|
{
|
|
"epoch": 3.804284323271665,
|
|
"grad_norm": 0.5034242782357946,
|
|
"learning_rate": 2.0485065566735795e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1523382067680359,
|
|
"step": 1955,
|
|
"valid_targets_mean": 5951.2,
|
|
"valid_targets_min": 4099
|
|
},
|
|
{
|
|
"epoch": 3.814021421616358,
|
|
"grad_norm": 0.6063258740549531,
|
|
"learning_rate": 2.038806615240523e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14123478531837463,
|
|
"step": 1960,
|
|
"valid_targets_mean": 3855.9,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 3.8237585199610518,
|
|
"grad_norm": 0.5470703757609693,
|
|
"learning_rate": 2.0291057605547393e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13107328116893768,
|
|
"step": 1965,
|
|
"valid_targets_mean": 4028.1,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 3.833495618305745,
|
|
"grad_norm": 0.58379490266951,
|
|
"learning_rate": 2.0194042209106023e-05,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1519891619682312,
|
|
"step": 1970,
|
|
"valid_targets_mean": 4293.8,
|
|
"valid_targets_min": 3617
|
|
},
|
|
{
|
|
"epoch": 3.843232716650438,
|
|
"grad_norm": 0.6057702397815357,
|
|
"learning_rate": 2.0097022246186092e-05,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14032156765460968,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4505.0,
|
|
"valid_targets_min": 3610
|
|
},
|
|
{
|
|
"epoch": 3.8529698149951317,
|
|
"grad_norm": 0.5960263185366536,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14757369458675385,
|
|
"step": 1980,
|
|
"valid_targets_mean": 4093.4,
|
|
"valid_targets_min": 3564
|
|
},
|
|
{
|
|
"epoch": 3.862706913339825,
|
|
"grad_norm": 0.5121200999748795,
|
|
"learning_rate": 1.9902977753813918e-05,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12379224598407745,
|
|
"step": 1985,
|
|
"valid_targets_mean": 4788.9,
|
|
"valid_targets_min": 2999
|
|
},
|
|
{
|
|
"epoch": 3.872444011684518,
|
|
"grad_norm": 0.5547844848078258,
|
|
"learning_rate": 1.980595779089398e-05,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1422676295042038,
|
|
"step": 1990,
|
|
"valid_targets_mean": 4921.5,
|
|
"valid_targets_min": 1738
|
|
},
|
|
{
|
|
"epoch": 3.882181110029211,
|
|
"grad_norm": 0.5389421099899807,
|
|
"learning_rate": 1.9708942394452613e-05,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14590831100940704,
|
|
"step": 1995,
|
|
"valid_targets_mean": 4425.5,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 3.8919182083739043,
|
|
"grad_norm": 0.5340208097427439,
|
|
"learning_rate": 1.9611933847594775e-05,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14527465403079987,
|
|
"step": 2000,
|
|
"valid_targets_mean": 4698.1,
|
|
"valid_targets_min": 3293
|
|
},
|
|
{
|
|
"epoch": 3.901655306718598,
|
|
"grad_norm": 0.5545048413553086,
|
|
"learning_rate": 1.951493443326421e-05,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11707796156406403,
|
|
"step": 2005,
|
|
"valid_targets_mean": 4093.0,
|
|
"valid_targets_min": 2641
|
|
},
|
|
{
|
|
"epoch": 3.911392405063291,
|
|
"grad_norm": 0.5970160483314149,
|
|
"learning_rate": 1.9417946434189768e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1793132722377777,
|
|
"step": 2010,
|
|
"valid_targets_mean": 3792.6,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 3.9211295034079843,
|
|
"grad_norm": 0.5572763457549483,
|
|
"learning_rate": 1.9320972132831643e-05,
|
|
"loss": 0.2973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13952180743217468,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4292.2,
|
|
"valid_targets_min": 2048
|
|
},
|
|
{
|
|
"epoch": 3.930866601752678,
|
|
"grad_norm": 0.5536248691901265,
|
|
"learning_rate": 1.9224013811327686e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14861848950386047,
|
|
"step": 2020,
|
|
"valid_targets_mean": 4233.6,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 3.940603700097371,
|
|
"grad_norm": 0.5220787788234639,
|
|
"learning_rate": 1.912707375143967e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1583413928747177,
|
|
"step": 2025,
|
|
"valid_targets_mean": 5155.9,
|
|
"valid_targets_min": 3517
|
|
},
|
|
{
|
|
"epoch": 3.950340798442064,
|
|
"grad_norm": 0.5066297771243922,
|
|
"learning_rate": 1.9030154234499635e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11751598119735718,
|
|
"step": 2030,
|
|
"valid_targets_mean": 4495.5,
|
|
"valid_targets_min": 3007
|
|
},
|
|
{
|
|
"epoch": 3.960077896786758,
|
|
"grad_norm": 0.5482199893330084,
|
|
"learning_rate": 1.8933257541356154e-05,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14266768097877502,
|
|
"step": 2035,
|
|
"valid_targets_mean": 5122.5,
|
|
"valid_targets_min": 4211
|
|
},
|
|
{
|
|
"epoch": 3.969814995131451,
|
|
"grad_norm": 0.557872657595188,
|
|
"learning_rate": 1.8836385952320685e-05,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14202716946601868,
|
|
"step": 2040,
|
|
"valid_targets_mean": 4115.1,
|
|
"valid_targets_min": 3417
|
|
},
|
|
{
|
|
"epoch": 3.979552093476144,
|
|
"grad_norm": 0.5311877613148388,
|
|
"learning_rate": 1.873954174711389e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14849787950515747,
|
|
"step": 2045,
|
|
"valid_targets_mean": 4303.6,
|
|
"valid_targets_min": 3366
|
|
},
|
|
{
|
|
"epoch": 3.9892891918208373,
|
|
"grad_norm": 0.5168859850941633,
|
|
"learning_rate": 1.8642727204812025e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13526809215545654,
|
|
"step": 2050,
|
|
"valid_targets_mean": 4980.6,
|
|
"valid_targets_min": 3593
|
|
},
|
|
{
|
|
"epoch": 3.9990262901655305,
|
|
"grad_norm": 0.5787749198333305,
|
|
"learning_rate": 1.854594460379324e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14266636967658997,
|
|
"step": 2055,
|
|
"valid_targets_mean": 4507.5,
|
|
"valid_targets_min": 3729
|
|
},
|
|
{
|
|
"epoch": 4.0077896786757545,
|
|
"grad_norm": 0.5136859761839889,
|
|
"learning_rate": 1.8449196221684015e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16664046049118042,
|
|
"step": 2060,
|
|
"valid_targets_mean": 5604.5,
|
|
"valid_targets_min": 3549
|
|
},
|
|
{
|
|
"epoch": 4.017526777020448,
|
|
"grad_norm": 0.5981774935499524,
|
|
"learning_rate": 1.835248433530555e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15837013721466064,
|
|
"step": 2065,
|
|
"valid_targets_mean": 4229.9,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 4.027263875365141,
|
|
"grad_norm": 0.5362352570374173,
|
|
"learning_rate": 1.825581122062016e-05,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13771720230579376,
|
|
"step": 2070,
|
|
"valid_targets_mean": 5174.4,
|
|
"valid_targets_min": 2819
|
|
},
|
|
{
|
|
"epoch": 4.0370009737098345,
|
|
"grad_norm": 0.5239106785501526,
|
|
"learning_rate": 1.815917915267773e-05,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12104038149118423,
|
|
"step": 2075,
|
|
"valid_targets_mean": 5380.5,
|
|
"valid_targets_min": 2971
|
|
},
|
|
{
|
|
"epoch": 4.046738072054528,
|
|
"grad_norm": 0.5568779725421424,
|
|
"learning_rate": 1.8062590405562185e-05,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11859002709388733,
|
|
"step": 2080,
|
|
"valid_targets_mean": 3867.0,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 4.056475170399221,
|
|
"grad_norm": 0.5844880108896232,
|
|
"learning_rate": 1.7966047252337958e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16946063935756683,
|
|
"step": 2085,
|
|
"valid_targets_mean": 4974.0,
|
|
"valid_targets_min": 3894
|
|
},
|
|
{
|
|
"epoch": 4.066212268743914,
|
|
"grad_norm": 0.5293538400053209,
|
|
"learning_rate": 1.78695519649965e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1562759131193161,
|
|
"step": 2090,
|
|
"valid_targets_mean": 5811.5,
|
|
"valid_targets_min": 3870
|
|
},
|
|
{
|
|
"epoch": 4.075949367088608,
|
|
"grad_norm": 0.5845566950272216,
|
|
"learning_rate": 1.7773106814402796e-05,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14184260368347168,
|
|
"step": 2095,
|
|
"valid_targets_mean": 4350.2,
|
|
"valid_targets_min": 3555
|
|
},
|
|
{
|
|
"epoch": 4.085686465433301,
|
|
"grad_norm": 0.5410430670507724,
|
|
"learning_rate": 1.7676714070241987e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1313467174768448,
|
|
"step": 2100,
|
|
"valid_targets_mean": 4300.6,
|
|
"valid_targets_min": 3564
|
|
},
|
|
{
|
|
"epoch": 4.095423563777994,
|
|
"grad_norm": 0.5567674742100422,
|
|
"learning_rate": 1.7580376000965868e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14477092027664185,
|
|
"step": 2105,
|
|
"valid_targets_mean": 4909.0,
|
|
"valid_targets_min": 3737
|
|
},
|
|
{
|
|
"epoch": 4.105160662122687,
|
|
"grad_norm": 0.5619393312312974,
|
|
"learning_rate": 1.748409487373955e-05,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13284417986869812,
|
|
"step": 2110,
|
|
"valid_targets_mean": 4546.5,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 4.114897760467381,
|
|
"grad_norm": 0.5824019392896247,
|
|
"learning_rate": 1.7387872954388136e-05,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13246268033981323,
|
|
"step": 2115,
|
|
"valid_targets_mean": 4690.9,
|
|
"valid_targets_min": 1099
|
|
},
|
|
{
|
|
"epoch": 4.124634858812074,
|
|
"grad_norm": 0.5531557239293169,
|
|
"learning_rate": 1.729171250734333e-05,
|
|
"loss": 0.2595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11213093996047974,
|
|
"step": 2120,
|
|
"valid_targets_mean": 4267.1,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 4.134371957156767,
|
|
"grad_norm": 0.6032030320614948,
|
|
"learning_rate": 1.71956157955902e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1515222191810608,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4064.1,
|
|
"valid_targets_min": 3638
|
|
},
|
|
{
|
|
"epoch": 4.144109055501461,
|
|
"grad_norm": 0.5377391811617392,
|
|
"learning_rate": 1.7099585080613884e-05,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14780691266059875,
|
|
"step": 2130,
|
|
"valid_targets_mean": 4704.5,
|
|
"valid_targets_min": 3896
|
|
},
|
|
{
|
|
"epoch": 4.153846153846154,
|
|
"grad_norm": 0.5296855380182146,
|
|
"learning_rate": 1.7003622622346424e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10799191892147064,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3958.4,
|
|
"valid_targets_min": 3331
|
|
},
|
|
{
|
|
"epoch": 4.163583252190847,
|
|
"grad_norm": 0.5733981369338119,
|
|
"learning_rate": 1.6907730679113513e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14801648259162903,
|
|
"step": 2140,
|
|
"valid_targets_mean": 4299.1,
|
|
"valid_targets_min": 2304
|
|
},
|
|
{
|
|
"epoch": 4.1733203505355405,
|
|
"grad_norm": 0.5691736630969233,
|
|
"learning_rate": 1.6811911507581394e-05,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12390045821666718,
|
|
"step": 2145,
|
|
"valid_targets_mean": 3538.9,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 4.183057448880234,
|
|
"grad_norm": 0.7425856634700476,
|
|
"learning_rate": 1.6716167362703764e-05,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12949338555335999,
|
|
"step": 2150,
|
|
"valid_targets_mean": 4258.9,
|
|
"valid_targets_min": 2917
|
|
},
|
|
{
|
|
"epoch": 4.192794547224927,
|
|
"grad_norm": 0.5129347984318157,
|
|
"learning_rate": 1.6620500497668656e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1561252623796463,
|
|
"step": 2155,
|
|
"valid_targets_mean": 6050.6,
|
|
"valid_targets_min": 3633
|
|
},
|
|
{
|
|
"epoch": 4.2025316455696204,
|
|
"grad_norm": 0.5775971462437329,
|
|
"learning_rate": 1.6524913163845456e-05,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1378484070301056,
|
|
"step": 2160,
|
|
"valid_targets_mean": 4225.6,
|
|
"valid_targets_min": 1065
|
|
},
|
|
{
|
|
"epoch": 4.212268743914313,
|
|
"grad_norm": 0.5195669326608295,
|
|
"learning_rate": 1.6429407610731902e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16003721952438354,
|
|
"step": 2165,
|
|
"valid_targets_mean": 4790.5,
|
|
"valid_targets_min": 3512
|
|
},
|
|
{
|
|
"epoch": 4.222005842259007,
|
|
"grad_norm": 0.5838830036782895,
|
|
"learning_rate": 1.6333986085901167e-05,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12029451876878738,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3395.4,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 4.2317429406037,
|
|
"grad_norm": 0.5925278744937844,
|
|
"learning_rate": 1.6238650834948935e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.132302924990654,
|
|
"step": 2175,
|
|
"valid_targets_mean": 4403.1,
|
|
"valid_targets_min": 3407
|
|
},
|
|
{
|
|
"epoch": 4.241480038948393,
|
|
"grad_norm": 0.5497005624299525,
|
|
"learning_rate": 1.614340410144056e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13596339523792267,
|
|
"step": 2180,
|
|
"valid_targets_mean": 4251.1,
|
|
"valid_targets_min": 3058
|
|
},
|
|
{
|
|
"epoch": 4.251217137293087,
|
|
"grad_norm": 0.5607598997414214,
|
|
"learning_rate": 1.604824812685832e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12437552213668823,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3670.5,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 4.26095423563778,
|
|
"grad_norm": 0.5247628330014403,
|
|
"learning_rate": 1.5953185150548574e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14278113842010498,
|
|
"step": 2190,
|
|
"valid_targets_mean": 4467.4,
|
|
"valid_targets_min": 3341
|
|
},
|
|
{
|
|
"epoch": 4.270691333982473,
|
|
"grad_norm": 0.6155120865012138,
|
|
"learning_rate": 1.5858217409669138e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1712673306465149,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4592.9,
|
|
"valid_targets_min": 2662
|
|
},
|
|
{
|
|
"epoch": 4.280428432327167,
|
|
"grad_norm": 0.5352335544552204,
|
|
"learning_rate": 1.576334713913661e-05,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1256646066904068,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4240.1,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 4.29016553067186,
|
|
"grad_norm": 0.5217129928467933,
|
|
"learning_rate": 1.566857657157378e-05,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13561739027500153,
|
|
"step": 2205,
|
|
"valid_targets_mean": 5007.6,
|
|
"valid_targets_min": 3606
|
|
},
|
|
{
|
|
"epoch": 4.299902629016553,
|
|
"grad_norm": 0.5360187581454293,
|
|
"learning_rate": 1.557390793725707e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14662693440914154,
|
|
"step": 2210,
|
|
"valid_targets_mean": 4260.2,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 4.309639727361247,
|
|
"grad_norm": 0.6110785559660482,
|
|
"learning_rate": 1.5479343464064077e-05,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13063398003578186,
|
|
"step": 2215,
|
|
"valid_targets_mean": 3515.4,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 4.319376825705939,
|
|
"grad_norm": 0.6872788257615631,
|
|
"learning_rate": 1.5384885377421132e-05,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13563059270381927,
|
|
"step": 2220,
|
|
"valid_targets_mean": 4153.5,
|
|
"valid_targets_min": 3033
|
|
},
|
|
{
|
|
"epoch": 4.329113924050633,
|
|
"grad_norm": 0.5910646433861839,
|
|
"learning_rate": 1.5290535900250914e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13082250952720642,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4175.8,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 4.3388510223953265,
|
|
"grad_norm": 0.6146744901962076,
|
|
"learning_rate": 1.5196297252920142e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14690689742565155,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3832.9,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 4.348588120740019,
|
|
"grad_norm": 0.5401253600776769,
|
|
"learning_rate": 1.5102171653187367e-05,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15970611572265625,
|
|
"step": 2235,
|
|
"valid_targets_mean": 5439.5,
|
|
"valid_targets_min": 3874
|
|
},
|
|
{
|
|
"epoch": 4.358325219084713,
|
|
"grad_norm": 0.5870942049027378,
|
|
"learning_rate": 1.5008161316150699e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14684417843818665,
|
|
"step": 2240,
|
|
"valid_targets_mean": 4266.9,
|
|
"valid_targets_min": 3163
|
|
},
|
|
{
|
|
"epoch": 4.368062317429406,
|
|
"grad_norm": 0.5692162308171781,
|
|
"learning_rate": 1.4914268454195737e-05,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11460793763399124,
|
|
"step": 2245,
|
|
"valid_targets_mean": 3348.4,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 4.377799415774099,
|
|
"grad_norm": 0.5322408190990068,
|
|
"learning_rate": 1.4820495276943491e-05,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1452890932559967,
|
|
"step": 2250,
|
|
"valid_targets_mean": 5687.5,
|
|
"valid_targets_min": 3793
|
|
},
|
|
{
|
|
"epoch": 4.387536514118793,
|
|
"grad_norm": 0.6296043507226554,
|
|
"learning_rate": 1.4726843991198389e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17112836241722107,
|
|
"step": 2255,
|
|
"valid_targets_mean": 5177.5,
|
|
"valid_targets_min": 3601
|
|
},
|
|
{
|
|
"epoch": 4.3972736124634855,
|
|
"grad_norm": 0.5817379841473254,
|
|
"learning_rate": 1.4633316800896316e-05,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17200392484664917,
|
|
"step": 2260,
|
|
"valid_targets_mean": 5279.5,
|
|
"valid_targets_min": 3815
|
|
},
|
|
{
|
|
"epoch": 4.407010710808179,
|
|
"grad_norm": 0.5878664275101592,
|
|
"learning_rate": 1.453991590705277e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11871537566184998,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3587.9,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 4.416747809152873,
|
|
"grad_norm": 0.5938421904495325,
|
|
"learning_rate": 1.4446643507711068e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15642127394676208,
|
|
"step": 2270,
|
|
"valid_targets_mean": 4670.1,
|
|
"valid_targets_min": 3445
|
|
},
|
|
{
|
|
"epoch": 4.426484907497565,
|
|
"grad_norm": 0.5575374570304052,
|
|
"learning_rate": 1.4353501797890617e-05,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11765633523464203,
|
|
"step": 2275,
|
|
"valid_targets_mean": 4103.6,
|
|
"valid_targets_min": 3729
|
|
},
|
|
{
|
|
"epoch": 4.436222005842259,
|
|
"grad_norm": 0.641354225313106,
|
|
"learning_rate": 1.4260492969535218e-05,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13833260536193848,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3654.1,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 4.445959104186953,
|
|
"grad_norm": 0.5659868843679989,
|
|
"learning_rate": 1.416761921146155e-05,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14027290046215057,
|
|
"step": 2285,
|
|
"valid_targets_mean": 4713.5,
|
|
"valid_targets_min": 3550
|
|
},
|
|
{
|
|
"epoch": 4.455696202531645,
|
|
"grad_norm": 0.6072066527265636,
|
|
"learning_rate": 1.4074882709307617e-05,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12903058528900146,
|
|
"step": 2290,
|
|
"valid_targets_mean": 3712.9,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 4.465433300876339,
|
|
"grad_norm": 0.5966008564633976,
|
|
"learning_rate": 1.398228564548131e-05,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13171996176242828,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3790.0,
|
|
"valid_targets_min": 3024
|
|
},
|
|
{
|
|
"epoch": 4.4751703992210325,
|
|
"grad_norm": 0.629784304838426,
|
|
"learning_rate": 1.3889830199109064e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14193607866764069,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3831.9,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 4.484907497565725,
|
|
"grad_norm": 0.5644757434124769,
|
|
"learning_rate": 1.3797518545984577e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.157664954662323,
|
|
"step": 2305,
|
|
"valid_targets_mean": 4961.6,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 4.494644595910419,
|
|
"grad_norm": 0.5271108170203499,
|
|
"learning_rate": 1.3705352858517596e-05,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13701586425304413,
|
|
"step": 2310,
|
|
"valid_targets_mean": 4128.4,
|
|
"valid_targets_min": 2626
|
|
},
|
|
{
|
|
"epoch": 4.504381694255112,
|
|
"grad_norm": 0.5304460719469469,
|
|
"learning_rate": 1.3613335305682769e-05,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13064759969711304,
|
|
"step": 2315,
|
|
"valid_targets_mean": 5048.8,
|
|
"valid_targets_min": 3473
|
|
},
|
|
{
|
|
"epoch": 4.514118792599805,
|
|
"grad_norm": 0.5458627218972989,
|
|
"learning_rate": 1.3521468052968682e-05,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14502672851085663,
|
|
"step": 2320,
|
|
"valid_targets_mean": 4709.1,
|
|
"valid_targets_min": 2260
|
|
},
|
|
{
|
|
"epoch": 4.523855890944499,
|
|
"grad_norm": 0.5759038949296822,
|
|
"learning_rate": 1.3429753262326793e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1461823284626007,
|
|
"step": 2325,
|
|
"valid_targets_mean": 4782.9,
|
|
"valid_targets_min": 3318
|
|
},
|
|
{
|
|
"epoch": 4.5335929892891915,
|
|
"grad_norm": 0.567960701771148,
|
|
"learning_rate": 1.3338193092120628e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12537768483161926,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3912.6,
|
|
"valid_targets_min": 3326
|
|
},
|
|
{
|
|
"epoch": 4.543330087633885,
|
|
"grad_norm": 0.7052317228799738,
|
|
"learning_rate": 1.3246789697074958e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16390681266784668,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3901.9,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 4.553067185978579,
|
|
"grad_norm": 0.5953304567242834,
|
|
"learning_rate": 1.3155545228225104e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12468735128641129,
|
|
"step": 2340,
|
|
"valid_targets_mean": 4355.1,
|
|
"valid_targets_min": 3303
|
|
},
|
|
{
|
|
"epoch": 4.5628042843232715,
|
|
"grad_norm": 0.6039311040992758,
|
|
"learning_rate": 1.3064461832866304e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1164741963148117,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3679.5,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 4.572541382667965,
|
|
"grad_norm": 0.5664262024793001,
|
|
"learning_rate": 1.297354165450318e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14275625348091125,
|
|
"step": 2350,
|
|
"valid_targets_mean": 3972.5,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 4.582278481012658,
|
|
"grad_norm": 0.5409513205584667,
|
|
"learning_rate": 1.2882786832799305e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12675923109054565,
|
|
"step": 2355,
|
|
"valid_targets_mean": 4927.1,
|
|
"valid_targets_min": 3510
|
|
},
|
|
{
|
|
"epoch": 4.592015579357351,
|
|
"grad_norm": 0.5457666608911914,
|
|
"learning_rate": 1.2792199503526848e-05,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13596731424331665,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4412.2,
|
|
"valid_targets_min": 2978
|
|
},
|
|
{
|
|
"epoch": 4.601752677702045,
|
|
"grad_norm": 0.5609504378399859,
|
|
"learning_rate": 1.2701781798516288e-05,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1252172887325287,
|
|
"step": 2365,
|
|
"valid_targets_mean": 4228.0,
|
|
"valid_targets_min": 3245
|
|
},
|
|
{
|
|
"epoch": 4.611489776046738,
|
|
"grad_norm": 0.5923372193100374,
|
|
"learning_rate": 1.2611535845606273e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1578305959701538,
|
|
"step": 2370,
|
|
"valid_targets_mean": 4186.9,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 4.621226874391431,
|
|
"grad_norm": 0.5169443996407771,
|
|
"learning_rate": 1.2521463768593545e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.126631960272789,
|
|
"step": 2375,
|
|
"valid_targets_mean": 4827.5,
|
|
"valid_targets_min": 3755
|
|
},
|
|
{
|
|
"epoch": 4.630963972736125,
|
|
"grad_norm": 0.5610815881727654,
|
|
"learning_rate": 1.2431567687182939e-05,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13907116651535034,
|
|
"step": 2380,
|
|
"valid_targets_mean": 4431.8,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 4.640701071080818,
|
|
"grad_norm": 0.5842041962165991,
|
|
"learning_rate": 1.234184971693751e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15577971935272217,
|
|
"step": 2385,
|
|
"valid_targets_mean": 4680.5,
|
|
"valid_targets_min": 2636
|
|
},
|
|
{
|
|
"epoch": 4.650438169425511,
|
|
"grad_norm": 0.5969770329317248,
|
|
"learning_rate": 1.2252311969228754e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16110055148601532,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4422.2,
|
|
"valid_targets_min": 3361
|
|
},
|
|
{
|
|
"epoch": 4.660175267770205,
|
|
"grad_norm": 0.7217644614322357,
|
|
"learning_rate": 1.2162956551186904e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13301247358322144,
|
|
"step": 2395,
|
|
"valid_targets_mean": 4543.1,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 4.669912366114898,
|
|
"grad_norm": 0.5894166571790098,
|
|
"learning_rate": 1.2073785565651362e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13496392965316772,
|
|
"step": 2400,
|
|
"valid_targets_mean": 4744.4,
|
|
"valid_targets_min": 2098
|
|
},
|
|
{
|
|
"epoch": 4.679649464459591,
|
|
"grad_norm": 0.6092894055973721,
|
|
"learning_rate": 1.1984801111121173e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15692128241062164,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4867.6,
|
|
"valid_targets_min": 3485
|
|
},
|
|
{
|
|
"epoch": 4.689386562804284,
|
|
"grad_norm": 0.5771258746266464,
|
|
"learning_rate": 1.1896005281705717e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13456031680107117,
|
|
"step": 2410,
|
|
"valid_targets_mean": 4214.1,
|
|
"valid_targets_min": 3396
|
|
},
|
|
{
|
|
"epoch": 4.6991236611489775,
|
|
"grad_norm": 0.5568575483086141,
|
|
"learning_rate": 1.180740016707533e-05,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12436433881521225,
|
|
"step": 2415,
|
|
"valid_targets_mean": 4504.8,
|
|
"valid_targets_min": 3181
|
|
},
|
|
{
|
|
"epoch": 4.708860759493671,
|
|
"grad_norm": 0.5399990244638656,
|
|
"learning_rate": 1.1718987852412193e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14934614300727844,
|
|
"step": 2420,
|
|
"valid_targets_mean": 5030.1,
|
|
"valid_targets_min": 3875
|
|
},
|
|
{
|
|
"epoch": 4.718597857838364,
|
|
"grad_norm": 0.5341269094156897,
|
|
"learning_rate": 1.1630770418361255e-05,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1262548565864563,
|
|
"step": 2425,
|
|
"valid_targets_mean": 4268.9,
|
|
"valid_targets_min": 3474
|
|
},
|
|
{
|
|
"epoch": 4.728334956183057,
|
|
"grad_norm": 0.56704398213233,
|
|
"learning_rate": 1.154274994098124e-05,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15214189887046814,
|
|
"step": 2430,
|
|
"valid_targets_mean": 4421.8,
|
|
"valid_targets_min": 3771
|
|
},
|
|
{
|
|
"epoch": 4.738072054527751,
|
|
"grad_norm": 0.5708035935238708,
|
|
"learning_rate": 1.1454928491695803e-05,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.175454780459404,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4808.1,
|
|
"valid_targets_min": 3429
|
|
},
|
|
{
|
|
"epoch": 4.747809152872444,
|
|
"grad_norm": 0.5961648042694737,
|
|
"learning_rate": 1.1367308137244817e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15418508648872375,
|
|
"step": 2440,
|
|
"valid_targets_mean": 4309.5,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 4.757546251217137,
|
|
"grad_norm": 0.5454320270405673,
|
|
"learning_rate": 1.1279890939635659e-05,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14200162887573242,
|
|
"step": 2445,
|
|
"valid_targets_mean": 4722.0,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 4.767283349561831,
|
|
"grad_norm": 0.5671420334055753,
|
|
"learning_rate": 1.1192678956094753e-05,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14497379958629608,
|
|
"step": 2450,
|
|
"valid_targets_mean": 4579.5,
|
|
"valid_targets_min": 3584
|
|
},
|
|
{
|
|
"epoch": 4.777020447906524,
|
|
"grad_norm": 0.570717589059506,
|
|
"learning_rate": 1.1105674239019132e-05,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17003610730171204,
|
|
"step": 2455,
|
|
"valid_targets_mean": 5249.0,
|
|
"valid_targets_min": 3724
|
|
},
|
|
{
|
|
"epoch": 4.786757546251217,
|
|
"grad_norm": 0.5791111279823757,
|
|
"learning_rate": 1.1018878835928142e-05,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18683591485023499,
|
|
"step": 2460,
|
|
"valid_targets_mean": 5686.0,
|
|
"valid_targets_min": 2981
|
|
},
|
|
{
|
|
"epoch": 4.79649464459591,
|
|
"grad_norm": 0.6089182003826573,
|
|
"learning_rate": 1.093229478941525e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14613917469978333,
|
|
"step": 2465,
|
|
"valid_targets_mean": 3757.4,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 4.806231742940604,
|
|
"grad_norm": 0.6245899796007326,
|
|
"learning_rate": 1.0845924137099953e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12675762176513672,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3290.8,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 4.815968841285297,
|
|
"grad_norm": 0.5812675378587401,
|
|
"learning_rate": 1.0759768911579896e-05,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12059226632118225,
|
|
"step": 2475,
|
|
"valid_targets_mean": 4403.4,
|
|
"valid_targets_min": 3315
|
|
},
|
|
{
|
|
"epoch": 4.82570593962999,
|
|
"grad_norm": 0.6272705385483619,
|
|
"learning_rate": 1.0673831140382962e-05,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13646277785301208,
|
|
"step": 2480,
|
|
"valid_targets_mean": 4803.5,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 4.8354430379746836,
|
|
"grad_norm": 0.5983443056746343,
|
|
"learning_rate": 1.0588112845919594e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1404026746749878,
|
|
"step": 2485,
|
|
"valid_targets_mean": 4592.2,
|
|
"valid_targets_min": 3043
|
|
},
|
|
{
|
|
"epoch": 4.845180136319377,
|
|
"grad_norm": 0.583017939789062,
|
|
"learning_rate": 1.0502616045435202e-05,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11172003298997879,
|
|
"step": 2490,
|
|
"valid_targets_mean": 5374.1,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 4.85491723466407,
|
|
"grad_norm": 0.620001414479601,
|
|
"learning_rate": 1.0417342750962671e-05,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14799468219280243,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4181.1,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 4.8646543330087635,
|
|
"grad_norm": 0.5976939250938321,
|
|
"learning_rate": 1.0332294969275032e-05,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1401589810848236,
|
|
"step": 2500,
|
|
"valid_targets_mean": 4004.1,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 4.874391431353457,
|
|
"grad_norm": 0.5520969320433156,
|
|
"learning_rate": 1.0247474701838222e-05,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16280868649482727,
|
|
"step": 2505,
|
|
"valid_targets_mean": 4841.1,
|
|
"valid_targets_min": 3213
|
|
},
|
|
{
|
|
"epoch": 4.88412852969815,
|
|
"grad_norm": 0.5691344701092215,
|
|
"learning_rate": 1.0162883944763999e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11899834871292114,
|
|
"step": 2510,
|
|
"valid_targets_mean": 3853.6,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 4.893865628042843,
|
|
"grad_norm": 0.5327517354734008,
|
|
"learning_rate": 1.0078524688762955e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13255907595157623,
|
|
"step": 2515,
|
|
"valid_targets_mean": 4446.5,
|
|
"valid_targets_min": 2942
|
|
},
|
|
{
|
|
"epoch": 4.903602726387536,
|
|
"grad_norm": 0.5585537320901789,
|
|
"learning_rate": 9.994398919097634e-06,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15005439519882202,
|
|
"step": 2520,
|
|
"valid_targets_mean": 4569.1,
|
|
"valid_targets_min": 3356
|
|
},
|
|
{
|
|
"epoch": 4.91333982473223,
|
|
"grad_norm": 0.5600367879730025,
|
|
"learning_rate": 9.910508615535876e-06,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12984417378902435,
|
|
"step": 2525,
|
|
"valid_targets_mean": 4127.2,
|
|
"valid_targets_min": 3519
|
|
},
|
|
{
|
|
"epoch": 4.923076923076923,
|
|
"grad_norm": 0.5601312156014363,
|
|
"learning_rate": 9.826855752304209e-06,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1686951071023941,
|
|
"step": 2530,
|
|
"valid_targets_mean": 4656.6,
|
|
"valid_targets_min": 3366
|
|
},
|
|
{
|
|
"epoch": 4.932814021421616,
|
|
"grad_norm": 0.5479108158473612,
|
|
"learning_rate": 9.743442298041348e-06,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13912788033485413,
|
|
"step": 2535,
|
|
"valid_targets_mean": 4483.1,
|
|
"valid_targets_min": 2814
|
|
},
|
|
{
|
|
"epoch": 4.94255111976631,
|
|
"grad_norm": 0.5440732727837874,
|
|
"learning_rate": 9.660270215751905e-06,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12737274169921875,
|
|
"step": 2540,
|
|
"valid_targets_mean": 4983.1,
|
|
"valid_targets_min": 3555
|
|
},
|
|
{
|
|
"epoch": 4.952288218111003,
|
|
"grad_norm": 0.5550387817773772,
|
|
"learning_rate": 9.577341462760182e-06,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10945715010166168,
|
|
"step": 2545,
|
|
"valid_targets_mean": 4190.8,
|
|
"valid_targets_min": 3008
|
|
},
|
|
{
|
|
"epoch": 4.962025316455696,
|
|
"grad_norm": 0.5509232769715511,
|
|
"learning_rate": 9.494657990664105e-06,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1368618607521057,
|
|
"step": 2550,
|
|
"valid_targets_mean": 4022.4,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 4.97176241480039,
|
|
"grad_norm": 0.5410045293695397,
|
|
"learning_rate": 9.412221745289296e-06,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12477469444274902,
|
|
"step": 2555,
|
|
"valid_targets_mean": 4277.1,
|
|
"valid_targets_min": 3511
|
|
},
|
|
{
|
|
"epoch": 4.981499513145083,
|
|
"grad_norm": 0.5971026784303557,
|
|
"learning_rate": 9.330034666643304e-06,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1349230855703354,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3912.9,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 4.991236611489776,
|
|
"grad_norm": 0.5876186069657041,
|
|
"learning_rate": 9.248098688869917e-06,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1389990895986557,
|
|
"step": 2565,
|
|
"valid_targets_mean": 4729.9,
|
|
"valid_targets_min": 3588
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.6652018241186674,
|
|
"learning_rate": 9.166415740203644e-06,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2316880077123642,
|
|
"step": 2570,
|
|
"valid_targets_mean": 5446.8,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 5.009737098344694,
|
|
"grad_norm": 0.5288795494831254,
|
|
"learning_rate": 9.084987742924365e-06,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15422624349594116,
|
|
"step": 2575,
|
|
"valid_targets_mean": 5806.0,
|
|
"valid_targets_min": 2647
|
|
},
|
|
{
|
|
"epoch": 5.019474196689386,
|
|
"grad_norm": 0.5879412791025103,
|
|
"learning_rate": 9.003816613312097e-06,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13253945112228394,
|
|
"step": 2580,
|
|
"valid_targets_mean": 4170.8,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 5.02921129503408,
|
|
"grad_norm": 0.5583616674162757,
|
|
"learning_rate": 8.922904261601853e-06,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1358906626701355,
|
|
"step": 2585,
|
|
"valid_targets_mean": 5283.0,
|
|
"valid_targets_min": 3474
|
|
},
|
|
{
|
|
"epoch": 5.0389483933787735,
|
|
"grad_norm": 0.5520309454233593,
|
|
"learning_rate": 8.842252591938725e-06,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11403089016675949,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4317.2,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 5.048685491723466,
|
|
"grad_norm": 0.5307420709186887,
|
|
"learning_rate": 8.76186350233306e-06,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15633825957775116,
|
|
"step": 2595,
|
|
"valid_targets_mean": 4873.6,
|
|
"valid_targets_min": 2532
|
|
},
|
|
{
|
|
"epoch": 5.05842259006816,
|
|
"grad_norm": 0.5771965979889756,
|
|
"learning_rate": 8.681738884615805e-06,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11819124221801758,
|
|
"step": 2600,
|
|
"valid_targets_mean": 4147.9,
|
|
"valid_targets_min": 3700
|
|
},
|
|
{
|
|
"epoch": 5.068159688412853,
|
|
"grad_norm": 0.540471816725143,
|
|
"learning_rate": 8.601880624393964e-06,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15742212533950806,
|
|
"step": 2605,
|
|
"valid_targets_mean": 5679.2,
|
|
"valid_targets_min": 3510
|
|
},
|
|
{
|
|
"epoch": 5.077896786757546,
|
|
"grad_norm": 0.5534055847740066,
|
|
"learning_rate": 8.522290601006245e-06,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15606442093849182,
|
|
"step": 2610,
|
|
"valid_targets_mean": 4930.2,
|
|
"valid_targets_min": 3773
|
|
},
|
|
{
|
|
"epoch": 5.08763388510224,
|
|
"grad_norm": 0.512109161015483,
|
|
"learning_rate": 8.442970687478815e-06,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11657322943210602,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4701.5,
|
|
"valid_targets_min": 3649
|
|
},
|
|
{
|
|
"epoch": 5.0973709834469325,
|
|
"grad_norm": 0.5499587264369343,
|
|
"learning_rate": 8.363922750481239e-06,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12532466650009155,
|
|
"step": 2620,
|
|
"valid_targets_mean": 5028.0,
|
|
"valid_targets_min": 3340
|
|
},
|
|
{
|
|
"epoch": 5.107108081791626,
|
|
"grad_norm": 0.6864978487855258,
|
|
"learning_rate": 8.28514865028253e-06,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1322849690914154,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3335.9,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 5.11684518013632,
|
|
"grad_norm": 0.5347325764819136,
|
|
"learning_rate": 8.2066502407074e-06,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1218637079000473,
|
|
"step": 2630,
|
|
"valid_targets_mean": 5235.8,
|
|
"valid_targets_min": 3984
|
|
},
|
|
{
|
|
"epoch": 5.1265822784810124,
|
|
"grad_norm": 0.5416486232067882,
|
|
"learning_rate": 8.128429369092599e-06,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11215312033891678,
|
|
"step": 2635,
|
|
"valid_targets_mean": 3770.8,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 5.136319376825706,
|
|
"grad_norm": 0.545270846539876,
|
|
"learning_rate": 8.050487876243473e-06,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1312100738286972,
|
|
"step": 2640,
|
|
"valid_targets_mean": 4953.8,
|
|
"valid_targets_min": 3655
|
|
},
|
|
{
|
|
"epoch": 5.146056475170399,
|
|
"grad_norm": 0.578536734373265,
|
|
"learning_rate": 7.972827596390594e-06,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14729297161102295,
|
|
"step": 2645,
|
|
"valid_targets_mean": 5177.2,
|
|
"valid_targets_min": 3693
|
|
},
|
|
{
|
|
"epoch": 5.155793573515092,
|
|
"grad_norm": 0.6341996643049761,
|
|
"learning_rate": 7.89545035714667e-06,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12493026256561279,
|
|
"step": 2650,
|
|
"valid_targets_mean": 3539.8,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 5.165530671859786,
|
|
"grad_norm": 0.5945101604753489,
|
|
"learning_rate": 7.81835797946347e-06,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15727218985557556,
|
|
"step": 2655,
|
|
"valid_targets_mean": 5600.0,
|
|
"valid_targets_min": 3922
|
|
},
|
|
{
|
|
"epoch": 5.175267770204479,
|
|
"grad_norm": 0.46822873296134326,
|
|
"learning_rate": 7.741552277589e-06,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10561253875494003,
|
|
"step": 2660,
|
|
"valid_targets_mean": 6128.2,
|
|
"valid_targets_min": 3621
|
|
},
|
|
{
|
|
"epoch": 5.185004868549172,
|
|
"grad_norm": 0.568516292388697,
|
|
"learning_rate": 7.665035059024794e-06,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12044404447078705,
|
|
"step": 2665,
|
|
"valid_targets_mean": 4455.0,
|
|
"valid_targets_min": 3070
|
|
},
|
|
{
|
|
"epoch": 5.194741966893866,
|
|
"grad_norm": 0.6118147617843488,
|
|
"learning_rate": 7.5888081244833936e-06,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13256195187568665,
|
|
"step": 2670,
|
|
"valid_targets_mean": 3765.0,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 5.204479065238559,
|
|
"grad_norm": 0.6113159519914808,
|
|
"learning_rate": 7.512873267845948e-06,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12021034955978394,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3537.5,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 5.214216163583252,
|
|
"grad_norm": 0.5371681464248128,
|
|
"learning_rate": 7.437232276120037e-06,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16698968410491943,
|
|
"step": 2680,
|
|
"valid_targets_mean": 6355.2,
|
|
"valid_targets_min": 3737
|
|
},
|
|
{
|
|
"epoch": 5.223953261927946,
|
|
"grad_norm": 0.5624774569171574,
|
|
"learning_rate": 7.361886929397568e-06,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11259446293115616,
|
|
"step": 2685,
|
|
"valid_targets_mean": 4247.9,
|
|
"valid_targets_min": 2854
|
|
},
|
|
{
|
|
"epoch": 5.233690360272639,
|
|
"grad_norm": 0.6004229193200243,
|
|
"learning_rate": 7.286839000812922e-06,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11398347467184067,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4567.8,
|
|
"valid_targets_min": 4048
|
|
},
|
|
{
|
|
"epoch": 5.243427458617332,
|
|
"grad_norm": 0.5253484451508934,
|
|
"learning_rate": 7.212090256501185e-06,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17712914943695068,
|
|
"step": 2695,
|
|
"valid_targets_mean": 5988.6,
|
|
"valid_targets_min": 3836
|
|
},
|
|
{
|
|
"epoch": 5.253164556962025,
|
|
"grad_norm": 0.5895239470905921,
|
|
"learning_rate": 7.137642455556654e-06,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.144203782081604,
|
|
"step": 2700,
|
|
"valid_targets_mean": 4873.8,
|
|
"valid_targets_min": 3394
|
|
},
|
|
{
|
|
"epoch": 5.2629016553067185,
|
|
"grad_norm": 0.6872523595647029,
|
|
"learning_rate": 7.063497349991364e-06,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11664122343063354,
|
|
"step": 2705,
|
|
"valid_targets_mean": 4521.0,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 5.272638753651412,
|
|
"grad_norm": 0.6073837322330058,
|
|
"learning_rate": 6.9896566846939e-06,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1288275569677353,
|
|
"step": 2710,
|
|
"valid_targets_mean": 4056.2,
|
|
"valid_targets_min": 3558
|
|
},
|
|
{
|
|
"epoch": 5.282375851996105,
|
|
"grad_norm": 0.6071066812873193,
|
|
"learning_rate": 6.916122197388322e-06,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12028621882200241,
|
|
"step": 2715,
|
|
"valid_targets_mean": 4991.9,
|
|
"valid_targets_min": 3867
|
|
},
|
|
{
|
|
"epoch": 5.292112950340798,
|
|
"grad_norm": 0.723151831785113,
|
|
"learning_rate": 6.842895618593275e-06,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19154182076454163,
|
|
"step": 2720,
|
|
"valid_targets_mean": 5541.2,
|
|
"valid_targets_min": 3173
|
|
},
|
|
{
|
|
"epoch": 5.301850048685492,
|
|
"grad_norm": 0.5998593796028454,
|
|
"learning_rate": 6.769978671581257e-06,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10106934607028961,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3416.6,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 5.311587147030185,
|
|
"grad_norm": 0.5195959110632384,
|
|
"learning_rate": 6.697373072338065e-06,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10479360818862915,
|
|
"step": 2730,
|
|
"valid_targets_mean": 4033.4,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 5.321324245374878,
|
|
"grad_norm": 0.5572841060095769,
|
|
"learning_rate": 6.625080529522432e-06,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10030199587345123,
|
|
"step": 2735,
|
|
"valid_targets_mean": 3960.1,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 5.331061343719572,
|
|
"grad_norm": 0.5876187645850159,
|
|
"learning_rate": 6.55310274442579e-06,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14326044917106628,
|
|
"step": 2740,
|
|
"valid_targets_mean": 4218.1,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 5.340798442064265,
|
|
"grad_norm": 0.5809716381084999,
|
|
"learning_rate": 6.481441410932221e-06,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11509965360164642,
|
|
"step": 2745,
|
|
"valid_targets_mean": 4032.2,
|
|
"valid_targets_min": 2890
|
|
},
|
|
{
|
|
"epoch": 5.350535540408958,
|
|
"grad_norm": 0.8427088337150904,
|
|
"learning_rate": 6.410098215478655e-06,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13928726315498352,
|
|
"step": 2750,
|
|
"valid_targets_mean": 5008.1,
|
|
"valid_targets_min": 3982
|
|
},
|
|
{
|
|
"epoch": 5.360272638753651,
|
|
"grad_norm": 0.5167702046128454,
|
|
"learning_rate": 6.339074837015115e-06,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15279874205589294,
|
|
"step": 2755,
|
|
"valid_targets_mean": 5539.8,
|
|
"valid_targets_min": 3893
|
|
},
|
|
{
|
|
"epoch": 5.370009737098345,
|
|
"grad_norm": 0.6280450199376403,
|
|
"learning_rate": 6.268372946965245e-06,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14101547002792358,
|
|
"step": 2760,
|
|
"valid_targets_mean": 4100.6,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 5.379746835443038,
|
|
"grad_norm": 0.627642627610836,
|
|
"learning_rate": 6.197994209186959e-06,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1552235633134842,
|
|
"step": 2765,
|
|
"valid_targets_mean": 4326.5,
|
|
"valid_targets_min": 3174
|
|
},
|
|
{
|
|
"epoch": 5.389483933787731,
|
|
"grad_norm": 0.6262255156817504,
|
|
"learning_rate": 6.1279402799332934e-06,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11484979093074799,
|
|
"step": 2770,
|
|
"valid_targets_mean": 3308.5,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 5.3992210321324245,
|
|
"grad_norm": 0.5695957620487605,
|
|
"learning_rate": 6.058212807813426e-06,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13625118136405945,
|
|
"step": 2775,
|
|
"valid_targets_mean": 4530.5,
|
|
"valid_targets_min": 2763
|
|
},
|
|
{
|
|
"epoch": 5.408958130477118,
|
|
"grad_norm": 0.6125202278142433,
|
|
"learning_rate": 5.988813433753869e-06,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12049006670713425,
|
|
"step": 2780,
|
|
"valid_targets_mean": 3673.1,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 5.418695228821811,
|
|
"grad_norm": 0.5750127017558843,
|
|
"learning_rate": 5.919743790959888e-06,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13272026181221008,
|
|
"step": 2785,
|
|
"valid_targets_mean": 5180.9,
|
|
"valid_targets_min": 3144
|
|
},
|
|
{
|
|
"epoch": 5.4284323271665045,
|
|
"grad_norm": 0.6018892527349212,
|
|
"learning_rate": 5.851005504877012e-06,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13688334822654724,
|
|
"step": 2790,
|
|
"valid_targets_mean": 4414.6,
|
|
"valid_targets_min": 2723
|
|
},
|
|
{
|
|
"epoch": 5.438169425511198,
|
|
"grad_norm": 0.5494577555889572,
|
|
"learning_rate": 5.782600193152819e-06,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14231738448143005,
|
|
"step": 2795,
|
|
"valid_targets_mean": 4667.2,
|
|
"valid_targets_min": 3274
|
|
},
|
|
{
|
|
"epoch": 5.447906523855891,
|
|
"grad_norm": 0.6622453560914412,
|
|
"learning_rate": 5.714529465598872e-06,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13254985213279724,
|
|
"step": 2800,
|
|
"valid_targets_mean": 4097.4,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 5.457643622200584,
|
|
"grad_norm": 0.5317379386292963,
|
|
"learning_rate": 5.646794924152808e-06,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11913903057575226,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4744.9,
|
|
"valid_targets_min": 3284
|
|
},
|
|
{
|
|
"epoch": 5.467380720545277,
|
|
"grad_norm": 0.5818749298261658,
|
|
"learning_rate": 5.579398162840655e-06,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1679440438747406,
|
|
"step": 2810,
|
|
"valid_targets_mean": 4563.0,
|
|
"valid_targets_min": 3303
|
|
},
|
|
{
|
|
"epoch": 5.477117818889971,
|
|
"grad_norm": 0.5413551686425397,
|
|
"learning_rate": 5.512340767739315e-06,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12620975077152252,
|
|
"step": 2815,
|
|
"valid_targets_mean": 4558.0,
|
|
"valid_targets_min": 3316
|
|
},
|
|
{
|
|
"epoch": 5.486854917234664,
|
|
"grad_norm": 0.5757817513815339,
|
|
"learning_rate": 5.445624316939244e-06,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12110395729541779,
|
|
"step": 2820,
|
|
"valid_targets_mean": 4123.9,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 5.496592015579357,
|
|
"grad_norm": 0.572096076627566,
|
|
"learning_rate": 5.379250380507302e-06,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14133252203464508,
|
|
"step": 2825,
|
|
"valid_targets_mean": 4435.5,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 5.506329113924051,
|
|
"grad_norm": 0.5335056653420006,
|
|
"learning_rate": 5.313220520449811e-06,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12444727122783661,
|
|
"step": 2830,
|
|
"valid_targets_mean": 4310.5,
|
|
"valid_targets_min": 3149
|
|
},
|
|
{
|
|
"epoch": 5.516066212268744,
|
|
"grad_norm": 0.5314357184193337,
|
|
"learning_rate": 5.247536290675821e-06,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13497817516326904,
|
|
"step": 2835,
|
|
"valid_targets_mean": 5504.6,
|
|
"valid_targets_min": 3408
|
|
},
|
|
{
|
|
"epoch": 5.525803310613437,
|
|
"grad_norm": 0.6197150637076682,
|
|
"learning_rate": 5.182199236960476e-06,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12460529804229736,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3932.4,
|
|
"valid_targets_min": 3555
|
|
},
|
|
{
|
|
"epoch": 5.535540408958131,
|
|
"grad_norm": 0.5554177542411431,
|
|
"learning_rate": 5.117210896908702e-06,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11300884932279587,
|
|
"step": 2845,
|
|
"valid_targets_mean": 4468.1,
|
|
"valid_targets_min": 3298
|
|
},
|
|
{
|
|
"epoch": 5.545277507302824,
|
|
"grad_norm": 0.5428969300824971,
|
|
"learning_rate": 5.0525727999189865e-06,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13841629028320312,
|
|
"step": 2850,
|
|
"valid_targets_mean": 5221.8,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 5.555014605647517,
|
|
"grad_norm": 0.5487562725991112,
|
|
"learning_rate": 4.988286467147416e-06,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12354198843240738,
|
|
"step": 2855,
|
|
"valid_targets_mean": 4753.5,
|
|
"valid_targets_min": 3200
|
|
},
|
|
{
|
|
"epoch": 5.5647517039922105,
|
|
"grad_norm": 0.6157379936764658,
|
|
"learning_rate": 4.924353411471834e-06,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13908755779266357,
|
|
"step": 2860,
|
|
"valid_targets_mean": 4255.0,
|
|
"valid_targets_min": 3570
|
|
},
|
|
{
|
|
"epoch": 5.574488802336903,
|
|
"grad_norm": 0.6194877458178691,
|
|
"learning_rate": 4.860775137456275e-06,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15384067595005035,
|
|
"step": 2865,
|
|
"valid_targets_mean": 4724.9,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 5.584225900681597,
|
|
"grad_norm": 0.6122741656212982,
|
|
"learning_rate": 4.797553141315543e-06,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12587867677211761,
|
|
"step": 2870,
|
|
"valid_targets_mean": 4549.8,
|
|
"valid_targets_min": 3489
|
|
},
|
|
{
|
|
"epoch": 5.5939629990262905,
|
|
"grad_norm": 0.6552174659046002,
|
|
"learning_rate": 4.734688910880001e-06,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12267832458019257,
|
|
"step": 2875,
|
|
"valid_targets_mean": 3143.9,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 5.603700097370983,
|
|
"grad_norm": 0.6454771690548284,
|
|
"learning_rate": 4.672183925560559e-06,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14382687211036682,
|
|
"step": 2880,
|
|
"valid_targets_mean": 4443.1,
|
|
"valid_targets_min": 3484
|
|
},
|
|
{
|
|
"epoch": 5.613437195715677,
|
|
"grad_norm": 0.5996830256797818,
|
|
"learning_rate": 4.610039656313854e-06,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15515586733818054,
|
|
"step": 2885,
|
|
"valid_targets_mean": 4243.8,
|
|
"valid_targets_min": 3004
|
|
},
|
|
{
|
|
"epoch": 5.62317429406037,
|
|
"grad_norm": 0.5692227834245153,
|
|
"learning_rate": 4.548257565607643e-06,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13581958413124084,
|
|
"step": 2890,
|
|
"valid_targets_mean": 5126.1,
|
|
"valid_targets_min": 3410
|
|
},
|
|
{
|
|
"epoch": 5.632911392405063,
|
|
"grad_norm": 0.6015762804363665,
|
|
"learning_rate": 4.486839107386378e-06,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14768774807453156,
|
|
"step": 2895,
|
|
"valid_targets_mean": 4318.8,
|
|
"valid_targets_min": 3837
|
|
},
|
|
{
|
|
"epoch": 5.642648490749757,
|
|
"grad_norm": 0.5684074934654639,
|
|
"learning_rate": 4.425785727036979e-06,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1376716047525406,
|
|
"step": 2900,
|
|
"valid_targets_mean": 4161.8,
|
|
"valid_targets_min": 3423
|
|
},
|
|
{
|
|
"epoch": 5.65238558909445,
|
|
"grad_norm": 0.58973420331503,
|
|
"learning_rate": 4.365098861354862e-06,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14269214868545532,
|
|
"step": 2905,
|
|
"valid_targets_mean": 4265.8,
|
|
"valid_targets_min": 3175
|
|
},
|
|
{
|
|
"epoch": 5.662122687439143,
|
|
"grad_norm": 0.6085675319443372,
|
|
"learning_rate": 4.304779938510073e-06,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14632467925548553,
|
|
"step": 2910,
|
|
"valid_targets_mean": 4133.9,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 5.671859785783837,
|
|
"grad_norm": 0.6079693750729607,
|
|
"learning_rate": 4.244830378013689e-06,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1428033411502838,
|
|
"step": 2915,
|
|
"valid_targets_mean": 4243.4,
|
|
"valid_targets_min": 3191
|
|
},
|
|
{
|
|
"epoch": 5.681596884128529,
|
|
"grad_norm": 0.5606109685233575,
|
|
"learning_rate": 4.185251590684458e-06,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13005805015563965,
|
|
"step": 2920,
|
|
"valid_targets_mean": 4592.1,
|
|
"valid_targets_min": 1788
|
|
},
|
|
{
|
|
"epoch": 5.691333982473223,
|
|
"grad_norm": 0.5564845154951865,
|
|
"learning_rate": 4.126044978615544e-06,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1190466582775116,
|
|
"step": 2925,
|
|
"valid_targets_mean": 4435.8,
|
|
"valid_targets_min": 3534
|
|
},
|
|
{
|
|
"epoch": 5.701071080817917,
|
|
"grad_norm": 0.5668478065834343,
|
|
"learning_rate": 4.067211935141551e-06,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14294397830963135,
|
|
"step": 2930,
|
|
"valid_targets_mean": 5025.0,
|
|
"valid_targets_min": 3876
|
|
},
|
|
{
|
|
"epoch": 5.710808179162609,
|
|
"grad_norm": 0.6147896620823001,
|
|
"learning_rate": 4.008753844805737e-06,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14136391878128052,
|
|
"step": 2935,
|
|
"valid_targets_mean": 4029.4,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 5.720545277507303,
|
|
"grad_norm": 0.5626589156836649,
|
|
"learning_rate": 3.950672083327425e-06,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13519398868083954,
|
|
"step": 2940,
|
|
"valid_targets_mean": 4848.5,
|
|
"valid_targets_min": 3126
|
|
},
|
|
{
|
|
"epoch": 5.730282375851996,
|
|
"grad_norm": 0.62572827845321,
|
|
"learning_rate": 3.8929680175696315e-06,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11717302352190018,
|
|
"step": 2945,
|
|
"valid_targets_mean": 3703.6,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 5.740019474196689,
|
|
"grad_norm": 0.6432964638404618,
|
|
"learning_rate": 3.835643005506893e-06,
|
|
"loss": 0.2539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.122787706553936,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4017.5,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 5.749756572541383,
|
|
"grad_norm": 0.5038658324601383,
|
|
"learning_rate": 3.7786983961933234e-06,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12044684588909149,
|
|
"step": 2955,
|
|
"valid_targets_mean": 4827.1,
|
|
"valid_targets_min": 3860
|
|
},
|
|
{
|
|
"epoch": 5.759493670886076,
|
|
"grad_norm": 0.5385138670002907,
|
|
"learning_rate": 3.7221355297308483e-06,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12703923881053925,
|
|
"step": 2960,
|
|
"valid_targets_mean": 4848.0,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 5.769230769230769,
|
|
"grad_norm": 0.5700915903494469,
|
|
"learning_rate": 3.665955737237665e-06,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11475673317909241,
|
|
"step": 2965,
|
|
"valid_targets_mean": 3782.5,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 5.778967867575463,
|
|
"grad_norm": 0.5538767219295648,
|
|
"learning_rate": 3.6101603408169373e-06,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13634178042411804,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4513.9,
|
|
"valid_targets_min": 3322
|
|
},
|
|
{
|
|
"epoch": 5.7887049659201555,
|
|
"grad_norm": 0.5696905873896441,
|
|
"learning_rate": 3.5547506535256825e-06,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11470770090818405,
|
|
"step": 2975,
|
|
"valid_targets_mean": 4806.4,
|
|
"valid_targets_min": 3429
|
|
},
|
|
{
|
|
"epoch": 5.798442064264849,
|
|
"grad_norm": 0.5688987863983855,
|
|
"learning_rate": 3.4997279793438387e-06,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12858857214450836,
|
|
"step": 2980,
|
|
"valid_targets_mean": 4567.4,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 5.808179162609543,
|
|
"grad_norm": 0.6043518178012176,
|
|
"learning_rate": 3.445093613143611e-06,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12447791546583176,
|
|
"step": 2985,
|
|
"valid_targets_mean": 3964.2,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 5.817916260954235,
|
|
"grad_norm": 0.6618079554381421,
|
|
"learning_rate": 3.390848840658978e-06,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13681542873382568,
|
|
"step": 2990,
|
|
"valid_targets_mean": 4346.1,
|
|
"valid_targets_min": 3241
|
|
},
|
|
{
|
|
"epoch": 5.827653359298929,
|
|
"grad_norm": 0.48326011452912454,
|
|
"learning_rate": 3.3369949384554514e-06,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.130824476480484,
|
|
"step": 2995,
|
|
"valid_targets_mean": 6315.1,
|
|
"valid_targets_min": 3519
|
|
},
|
|
{
|
|
"epoch": 5.837390457643622,
|
|
"grad_norm": 0.47435965638735184,
|
|
"learning_rate": 3.2835331739000178e-06,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11557748913764954,
|
|
"step": 3000,
|
|
"valid_targets_mean": 5293.5,
|
|
"valid_targets_min": 3633
|
|
},
|
|
{
|
|
"epoch": 5.847127555988315,
|
|
"grad_norm": 0.5591996348165854,
|
|
"learning_rate": 3.2304648051313146e-06,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12140397727489471,
|
|
"step": 3005,
|
|
"valid_targets_mean": 3925.2,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 5.856864654333009,
|
|
"grad_norm": 0.5969681588740798,
|
|
"learning_rate": 3.177791081030057e-06,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1455400437116623,
|
|
"step": 3010,
|
|
"valid_targets_mean": 4608.8,
|
|
"valid_targets_min": 3316
|
|
},
|
|
{
|
|
"epoch": 5.866601752677702,
|
|
"grad_norm": 0.492090203647912,
|
|
"learning_rate": 3.125513241189575e-06,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10691028833389282,
|
|
"step": 3015,
|
|
"valid_targets_mean": 4801.2,
|
|
"valid_targets_min": 3630
|
|
},
|
|
{
|
|
"epoch": 5.876338851022395,
|
|
"grad_norm": 0.596202748537758,
|
|
"learning_rate": 3.073632515886711e-06,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1574542075395584,
|
|
"step": 3020,
|
|
"valid_targets_mean": 4729.8,
|
|
"valid_targets_min": 3596
|
|
},
|
|
{
|
|
"epoch": 5.886075949367089,
|
|
"grad_norm": 0.5007005108922521,
|
|
"learning_rate": 3.0221501260528406e-06,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14202313125133514,
|
|
"step": 3025,
|
|
"valid_targets_mean": 5916.8,
|
|
"valid_targets_min": 3666
|
|
},
|
|
{
|
|
"epoch": 5.895813047711782,
|
|
"grad_norm": 0.5919767954319199,
|
|
"learning_rate": 2.9710672832451303e-06,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1491570770740509,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4450.6,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 5.905550146056475,
|
|
"grad_norm": 0.5951433087758027,
|
|
"learning_rate": 2.9203851896180422e-06,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.130938321352005,
|
|
"step": 3035,
|
|
"valid_targets_mean": 3789.5,
|
|
"valid_targets_min": 2617
|
|
},
|
|
{
|
|
"epoch": 5.915287244401169,
|
|
"grad_norm": 0.5996825938141891,
|
|
"learning_rate": 2.8701050378950303e-06,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13092640042304993,
|
|
"step": 3040,
|
|
"valid_targets_mean": 4583.9,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 5.9250243427458615,
|
|
"grad_norm": 0.6157833129416928,
|
|
"learning_rate": 2.820228011340478e-06,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11334574222564697,
|
|
"step": 3045,
|
|
"valid_targets_mean": 3525.5,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 5.934761441090555,
|
|
"grad_norm": 0.5758296075292605,
|
|
"learning_rate": 2.7707552837318584e-06,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1297803372144699,
|
|
"step": 3050,
|
|
"valid_targets_mean": 5045.0,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 5.944498539435248,
|
|
"grad_norm": 0.5250840577539552,
|
|
"learning_rate": 2.7216880193320915e-06,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1023487001657486,
|
|
"step": 3055,
|
|
"valid_targets_mean": 4394.6,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 5.9542356377799415,
|
|
"grad_norm": 1.7369997573826805,
|
|
"learning_rate": 2.673027372862178e-06,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12410314381122589,
|
|
"step": 3060,
|
|
"valid_targets_mean": 5099.9,
|
|
"valid_targets_min": 3699
|
|
},
|
|
{
|
|
"epoch": 5.963972736124635,
|
|
"grad_norm": 0.5927136750448293,
|
|
"learning_rate": 2.624774489473978e-06,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13789965212345123,
|
|
"step": 3065,
|
|
"valid_targets_mean": 4792.8,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 5.973709834469328,
|
|
"grad_norm": 0.5867435459835871,
|
|
"learning_rate": 2.5769305047233004e-06,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.126939058303833,
|
|
"step": 3070,
|
|
"valid_targets_mean": 3818.9,
|
|
"valid_targets_min": 3190
|
|
},
|
|
{
|
|
"epoch": 5.983446932814021,
|
|
"grad_norm": 0.618242013062481,
|
|
"learning_rate": 2.5294965445431775e-06,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14879021048545837,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3982.0,
|
|
"valid_targets_min": 2229
|
|
},
|
|
{
|
|
"epoch": 5.993184031158715,
|
|
"grad_norm": 0.5438479266146226,
|
|
"learning_rate": 2.4824737252173447e-06,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11985643953084946,
|
|
"step": 3080,
|
|
"valid_targets_mean": 4163.5,
|
|
"valid_targets_min": 3439
|
|
},
|
|
{
|
|
"epoch": 6.001947419668939,
|
|
"grad_norm": 0.6338602864986878,
|
|
"learning_rate": 2.4358631533539902e-06,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11650922894477844,
|
|
"step": 3085,
|
|
"valid_targets_mean": 4022.9,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 6.011684518013632,
|
|
"grad_norm": 0.5338895226760605,
|
|
"learning_rate": 2.3896659258596877e-06,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13872568309307098,
|
|
"step": 3090,
|
|
"valid_targets_mean": 5218.1,
|
|
"valid_targets_min": 4045
|
|
},
|
|
{
|
|
"epoch": 6.021421616358325,
|
|
"grad_norm": 0.6098014560953198,
|
|
"learning_rate": 2.3438831299136265e-06,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12719812989234924,
|
|
"step": 3095,
|
|
"valid_targets_mean": 4604.1,
|
|
"valid_targets_min": 2617
|
|
},
|
|
{
|
|
"epoch": 6.031158714703018,
|
|
"grad_norm": 0.5305405824387005,
|
|
"learning_rate": 2.2985158429419885e-06,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12109411507844925,
|
|
"step": 3100,
|
|
"valid_targets_mean": 4571.6,
|
|
"valid_targets_min": 2854
|
|
},
|
|
{
|
|
"epoch": 6.040895813047712,
|
|
"grad_norm": 0.5509847213792466,
|
|
"learning_rate": 2.2535651325926012e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14724647998809814,
|
|
"step": 3105,
|
|
"valid_targets_mean": 4772.4,
|
|
"valid_targets_min": 3479
|
|
},
|
|
{
|
|
"epoch": 6.050632911392405,
|
|
"grad_norm": 0.5966951243296615,
|
|
"learning_rate": 2.209032056709821e-06,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12193121016025543,
|
|
"step": 3110,
|
|
"valid_targets_mean": 4448.4,
|
|
"valid_targets_min": 2721
|
|
},
|
|
{
|
|
"epoch": 6.060370009737098,
|
|
"grad_norm": 0.557424087505133,
|
|
"learning_rate": 2.1649176633096312e-06,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13333502411842346,
|
|
"step": 3115,
|
|
"valid_targets_mean": 4766.8,
|
|
"valid_targets_min": 3381
|
|
},
|
|
{
|
|
"epoch": 6.070107108081792,
|
|
"grad_norm": 0.6299305432207086,
|
|
"learning_rate": 2.121222990554981e-06,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12951087951660156,
|
|
"step": 3120,
|
|
"valid_targets_mean": 3683.9,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 6.079844206426485,
|
|
"grad_norm": 0.5172467244063791,
|
|
"learning_rate": 2.0779490667313485e-06,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13096672296524048,
|
|
"step": 3125,
|
|
"valid_targets_mean": 5114.8,
|
|
"valid_targets_min": 2826
|
|
},
|
|
{
|
|
"epoch": 6.089581304771178,
|
|
"grad_norm": 0.7297506514243797,
|
|
"learning_rate": 2.0350969102225603e-06,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13555462658405304,
|
|
"step": 3130,
|
|
"valid_targets_mean": 4284.8,
|
|
"valid_targets_min": 3234
|
|
},
|
|
{
|
|
"epoch": 6.099318403115872,
|
|
"grad_norm": 0.5958563577782829,
|
|
"learning_rate": 1.9926675294868024e-06,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12135843932628632,
|
|
"step": 3135,
|
|
"valid_targets_mean": 3860.8,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 6.109055501460565,
|
|
"grad_norm": 0.66806359721193,
|
|
"learning_rate": 1.9506619230328884e-06,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1265953630208969,
|
|
"step": 3140,
|
|
"valid_targets_mean": 4137.6,
|
|
"valid_targets_min": 2348
|
|
},
|
|
{
|
|
"epoch": 6.118792599805258,
|
|
"grad_norm": 0.5474345433744974,
|
|
"learning_rate": 1.9090810793967885e-06,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09850633889436722,
|
|
"step": 3145,
|
|
"valid_targets_mean": 3770.9,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 6.1285296981499515,
|
|
"grad_norm": 0.5736547523307033,
|
|
"learning_rate": 1.8679259771183322e-06,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12463327497243881,
|
|
"step": 3150,
|
|
"valid_targets_mean": 4120.9,
|
|
"valid_targets_min": 3250
|
|
},
|
|
{
|
|
"epoch": 6.138266796494644,
|
|
"grad_norm": 0.5548063306347019,
|
|
"learning_rate": 1.8271975847182056e-06,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12539273500442505,
|
|
"step": 3155,
|
|
"valid_targets_mean": 4375.4,
|
|
"valid_targets_min": 3353
|
|
},
|
|
{
|
|
"epoch": 6.148003894839338,
|
|
"grad_norm": 0.6116148977471212,
|
|
"learning_rate": 1.7868968606751425e-06,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14930123090744019,
|
|
"step": 3160,
|
|
"valid_targets_mean": 4701.0,
|
|
"valid_targets_min": 3962
|
|
},
|
|
{
|
|
"epoch": 6.1577409931840315,
|
|
"grad_norm": 0.5601857951268385,
|
|
"learning_rate": 1.7470247534033703e-06,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12585139274597168,
|
|
"step": 3165,
|
|
"valid_targets_mean": 3999.2,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 6.167478091528724,
|
|
"grad_norm": 0.5550093380076379,
|
|
"learning_rate": 1.7075822012303022e-06,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12577325105667114,
|
|
"step": 3170,
|
|
"valid_targets_mean": 4218.4,
|
|
"valid_targets_min": 2883
|
|
},
|
|
{
|
|
"epoch": 6.177215189873418,
|
|
"grad_norm": 0.570349205112702,
|
|
"learning_rate": 1.6685701323744386e-06,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1341044008731842,
|
|
"step": 3175,
|
|
"valid_targets_mean": 4057.8,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 6.186952288218111,
|
|
"grad_norm": 0.6203481316172027,
|
|
"learning_rate": 1.6299894649235448e-06,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12967824935913086,
|
|
"step": 3180,
|
|
"valid_targets_mean": 3175.0,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 6.196689386562804,
|
|
"grad_norm": 0.5558153886719512,
|
|
"learning_rate": 1.591841106813017e-06,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11848209798336029,
|
|
"step": 3185,
|
|
"valid_targets_mean": 4083.8,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 6.206426484907498,
|
|
"grad_norm": 0.5686596540616676,
|
|
"learning_rate": 1.5541259558045308e-06,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.128545880317688,
|
|
"step": 3190,
|
|
"valid_targets_mean": 4163.1,
|
|
"valid_targets_min": 2841
|
|
},
|
|
{
|
|
"epoch": 6.21616358325219,
|
|
"grad_norm": 0.5771593380601775,
|
|
"learning_rate": 1.5168448994649266e-06,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14863663911819458,
|
|
"step": 3195,
|
|
"valid_targets_mean": 4561.0,
|
|
"valid_targets_min": 3303
|
|
},
|
|
{
|
|
"epoch": 6.225900681596884,
|
|
"grad_norm": 0.6037081214346742,
|
|
"learning_rate": 1.479998815145296e-06,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12713660299777985,
|
|
"step": 3200,
|
|
"valid_targets_mean": 4676.5,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 6.235637779941578,
|
|
"grad_norm": 0.579098707024515,
|
|
"learning_rate": 1.4435885699603524e-06,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13061289489269257,
|
|
"step": 3205,
|
|
"valid_targets_mean": 4608.2,
|
|
"valid_targets_min": 2764
|
|
},
|
|
{
|
|
"epoch": 6.24537487828627,
|
|
"grad_norm": 0.6047379254303943,
|
|
"learning_rate": 1.4076150207680185e-06,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12266331166028976,
|
|
"step": 3210,
|
|
"valid_targets_mean": 4430.4,
|
|
"valid_targets_min": 3637
|
|
},
|
|
{
|
|
"epoch": 6.255111976630964,
|
|
"grad_norm": 0.5613330703715288,
|
|
"learning_rate": 1.3720790141492657e-06,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14952513575553894,
|
|
"step": 3215,
|
|
"valid_targets_mean": 5048.0,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 6.264849074975658,
|
|
"grad_norm": 0.5548976762045761,
|
|
"learning_rate": 1.336981386388183e-06,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13214871287345886,
|
|
"step": 3220,
|
|
"valid_targets_mean": 4700.4,
|
|
"valid_targets_min": 3797
|
|
},
|
|
{
|
|
"epoch": 6.27458617332035,
|
|
"grad_norm": 0.5455007242031316,
|
|
"learning_rate": 1.3023229634523027e-06,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12056834250688553,
|
|
"step": 3225,
|
|
"valid_targets_mean": 4660.1,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 6.284323271665044,
|
|
"grad_norm": 0.5596956774026515,
|
|
"learning_rate": 1.2681045609731756e-06,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13410544395446777,
|
|
"step": 3230,
|
|
"valid_targets_mean": 5073.1,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 6.2940603700097375,
|
|
"grad_norm": 0.8305948996853828,
|
|
"learning_rate": 1.2343269842271366e-06,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16496288776397705,
|
|
"step": 3235,
|
|
"valid_targets_mean": 4975.1,
|
|
"valid_targets_min": 3407
|
|
},
|
|
{
|
|
"epoch": 6.30379746835443,
|
|
"grad_norm": 0.5792805407733531,
|
|
"learning_rate": 1.2009910281163962e-06,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15418891608715057,
|
|
"step": 3240,
|
|
"valid_targets_mean": 4802.9,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 6.313534566699124,
|
|
"grad_norm": 0.6412591020908027,
|
|
"learning_rate": 1.168097477150314e-06,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11696699261665344,
|
|
"step": 3245,
|
|
"valid_targets_mean": 4022.8,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 6.3232716650438165,
|
|
"grad_norm": 0.550247607229888,
|
|
"learning_rate": 1.1356471054269447e-06,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1333034187555313,
|
|
"step": 3250,
|
|
"valid_targets_mean": 4622.4,
|
|
"valid_targets_min": 3099
|
|
},
|
|
{
|
|
"epoch": 6.33300876338851,
|
|
"grad_norm": 0.5972533526918419,
|
|
"learning_rate": 1.1036406766148033e-06,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1299445629119873,
|
|
"step": 3255,
|
|
"valid_targets_mean": 4425.6,
|
|
"valid_targets_min": 3656
|
|
},
|
|
{
|
|
"epoch": 6.342745861733204,
|
|
"grad_norm": 0.5560017228765931,
|
|
"learning_rate": 1.0720789439349177e-06,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15433597564697266,
|
|
"step": 3260,
|
|
"valid_targets_mean": 5480.0,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 6.3524829600778965,
|
|
"grad_norm": 0.6172818811859626,
|
|
"learning_rate": 1.0409626501430847e-06,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1500224769115448,
|
|
"step": 3265,
|
|
"valid_targets_mean": 4562.9,
|
|
"valid_targets_min": 3067
|
|
},
|
|
{
|
|
"epoch": 6.36222005842259,
|
|
"grad_norm": 0.5722658055251421,
|
|
"learning_rate": 1.0102925275123954e-06,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.115928053855896,
|
|
"step": 3270,
|
|
"valid_targets_mean": 4861.5,
|
|
"valid_targets_min": 3402
|
|
},
|
|
{
|
|
"epoch": 6.371957156767284,
|
|
"grad_norm": 0.5999231505340598,
|
|
"learning_rate": 9.800692978160064e-07,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14334945380687714,
|
|
"step": 3275,
|
|
"valid_targets_mean": 4504.1,
|
|
"valid_targets_min": 2003
|
|
},
|
|
{
|
|
"epoch": 6.381694255111976,
|
|
"grad_norm": 0.5654502118435346,
|
|
"learning_rate": 9.502936723101563e-07,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11749427020549774,
|
|
"step": 3280,
|
|
"valid_targets_mean": 4144.2,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 6.39143135345667,
|
|
"grad_norm": 0.6262391676147439,
|
|
"learning_rate": 9.209663517174094e-07,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14620202779769897,
|
|
"step": 3285,
|
|
"valid_targets_mean": 4256.8,
|
|
"valid_targets_min": 2985
|
|
},
|
|
{
|
|
"epoch": 6.401168451801363,
|
|
"grad_norm": 0.565140273467742,
|
|
"learning_rate": 8.920880262101894e-07,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11826826632022858,
|
|
"step": 3290,
|
|
"valid_targets_mean": 4537.8,
|
|
"valid_targets_min": 3102
|
|
},
|
|
{
|
|
"epoch": 6.410905550146056,
|
|
"grad_norm": 0.5564152053695781,
|
|
"learning_rate": 8.636593753945188e-07,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14234530925750732,
|
|
"step": 3295,
|
|
"valid_targets_mean": 4763.9,
|
|
"valid_targets_min": 3920
|
|
},
|
|
{
|
|
"epoch": 6.42064264849075,
|
|
"grad_norm": 0.5961241948152944,
|
|
"learning_rate": 8.356810682940386e-07,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13406327366828918,
|
|
"step": 3300,
|
|
"valid_targets_mean": 4606.6,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 6.430379746835443,
|
|
"grad_norm": 0.5463667595224455,
|
|
"learning_rate": 8.081537633342562e-07,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1459270864725113,
|
|
"step": 3305,
|
|
"valid_targets_mean": 5078.8,
|
|
"valid_targets_min": 3699
|
|
},
|
|
{
|
|
"epoch": 6.440116845180136,
|
|
"grad_norm": 0.5777925140682082,
|
|
"learning_rate": 7.810781083270491e-07,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11504077911376953,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3968.4,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 6.44985394352483,
|
|
"grad_norm": 0.536522147999086,
|
|
"learning_rate": 7.544547404554236e-07,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13226105272769928,
|
|
"step": 3315,
|
|
"valid_targets_mean": 4502.2,
|
|
"valid_targets_min": 3273
|
|
},
|
|
{
|
|
"epoch": 6.459591041869523,
|
|
"grad_norm": 0.6201364700559213,
|
|
"learning_rate": 7.282842862585204e-07,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15036822855472565,
|
|
"step": 3320,
|
|
"valid_targets_mean": 3782.2,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 6.469328140214216,
|
|
"grad_norm": 0.5845177136011124,
|
|
"learning_rate": 7.025673616168704e-07,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10761187970638275,
|
|
"step": 3325,
|
|
"valid_targets_mean": 4047.9,
|
|
"valid_targets_min": 3451
|
|
},
|
|
{
|
|
"epoch": 6.47906523855891,
|
|
"grad_norm": 0.6483669364419883,
|
|
"learning_rate": 6.773045717378912e-07,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13007041811943054,
|
|
"step": 3330,
|
|
"valid_targets_mean": 3900.2,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 6.4888023369036025,
|
|
"grad_norm": 0.5733772441529873,
|
|
"learning_rate": 6.524965111416604e-07,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13473814725875854,
|
|
"step": 3335,
|
|
"valid_targets_mean": 4968.9,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 6.498539435248296,
|
|
"grad_norm": 0.6029767744349168,
|
|
"learning_rate": 6.281437636469135e-07,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13791951537132263,
|
|
"step": 3340,
|
|
"valid_targets_mean": 4719.6,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 6.508276533592989,
|
|
"grad_norm": 0.5790751433998983,
|
|
"learning_rate": 6.042469023573016e-07,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16216571629047394,
|
|
"step": 3345,
|
|
"valid_targets_mean": 5296.2,
|
|
"valid_targets_min": 3498
|
|
},
|
|
{
|
|
"epoch": 6.5180136319376825,
|
|
"grad_norm": 0.5336902757553303,
|
|
"learning_rate": 5.808064896479227e-07,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09904158115386963,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4873.9,
|
|
"valid_targets_min": 3494
|
|
},
|
|
{
|
|
"epoch": 6.527750730282376,
|
|
"grad_norm": 0.5422114050455206,
|
|
"learning_rate": 5.578230771520665e-07,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10119602829217911,
|
|
"step": 3355,
|
|
"valid_targets_mean": 4950.1,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 6.537487828627069,
|
|
"grad_norm": 0.5350177720269675,
|
|
"learning_rate": 5.352972057482375e-07,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11143676936626434,
|
|
"step": 3360,
|
|
"valid_targets_mean": 4541.0,
|
|
"valid_targets_min": 2917
|
|
},
|
|
{
|
|
"epoch": 6.547224926971762,
|
|
"grad_norm": 0.5752818105313237,
|
|
"learning_rate": 5.132294055474307e-07,
|
|
"loss": 0.2484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13125720620155334,
|
|
"step": 3365,
|
|
"valid_targets_mean": 4442.2,
|
|
"valid_targets_min": 3625
|
|
},
|
|
{
|
|
"epoch": 6.556962025316456,
|
|
"grad_norm": 0.5791605604567498,
|
|
"learning_rate": 4.916201958806621e-07,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13184475898742676,
|
|
"step": 3370,
|
|
"valid_targets_mean": 4955.4,
|
|
"valid_targets_min": 3225
|
|
},
|
|
{
|
|
"epoch": 6.566699123661149,
|
|
"grad_norm": 0.6338878577366508,
|
|
"learning_rate": 4.704700852867361e-07,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1653052717447281,
|
|
"step": 3375,
|
|
"valid_targets_mean": 4628.2,
|
|
"valid_targets_min": 3874
|
|
},
|
|
{
|
|
"epoch": 6.576436222005842,
|
|
"grad_norm": 0.5848248037217846,
|
|
"learning_rate": 4.497795715002795e-07,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0983453243970871,
|
|
"step": 3380,
|
|
"valid_targets_mean": 3992.5,
|
|
"valid_targets_min": 2759
|
|
},
|
|
{
|
|
"epoch": 6.586173320350536,
|
|
"grad_norm": 0.5568697497738493,
|
|
"learning_rate": 4.295491414400288e-07,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12542709708213806,
|
|
"step": 3385,
|
|
"valid_targets_mean": 4685.4,
|
|
"valid_targets_min": 3412
|
|
},
|
|
{
|
|
"epoch": 6.595910418695229,
|
|
"grad_norm": 0.5304602856207518,
|
|
"learning_rate": 4.0977927119737695e-07,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12435789406299591,
|
|
"step": 3390,
|
|
"valid_targets_mean": 5304.8,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 6.605647517039922,
|
|
"grad_norm": 0.512373148121127,
|
|
"learning_rate": 3.9047042602516237e-07,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10368811339139938,
|
|
"step": 3395,
|
|
"valid_targets_mean": 4513.0,
|
|
"valid_targets_min": 2011
|
|
},
|
|
{
|
|
"epoch": 6.615384615384615,
|
|
"grad_norm": 0.5521385688601449,
|
|
"learning_rate": 3.716230603267268e-07,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12800955772399902,
|
|
"step": 3400,
|
|
"valid_targets_mean": 4877.2,
|
|
"valid_targets_min": 3671
|
|
},
|
|
{
|
|
"epoch": 6.625121713729309,
|
|
"grad_norm": 0.5966593594879436,
|
|
"learning_rate": 3.532376176452146e-07,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13831618428230286,
|
|
"step": 3405,
|
|
"valid_targets_mean": 4648.6,
|
|
"valid_targets_min": 3463
|
|
},
|
|
{
|
|
"epoch": 6.634858812074002,
|
|
"grad_norm": 0.562594079404017,
|
|
"learning_rate": 3.353145306531347e-07,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12337220460176468,
|
|
"step": 3410,
|
|
"valid_targets_mean": 4300.8,
|
|
"valid_targets_min": 2931
|
|
},
|
|
{
|
|
"epoch": 6.644595910418695,
|
|
"grad_norm": 0.5953344375676264,
|
|
"learning_rate": 3.17854221142182e-07,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12950412929058075,
|
|
"step": 3415,
|
|
"valid_targets_mean": 4138.1,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 6.6543330087633885,
|
|
"grad_norm": 0.6001428116488979,
|
|
"learning_rate": 3.008571000133187e-07,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18734675645828247,
|
|
"step": 3420,
|
|
"valid_targets_mean": 5734.4,
|
|
"valid_targets_min": 3462
|
|
},
|
|
{
|
|
"epoch": 6.664070107108082,
|
|
"grad_norm": 0.6238577956752472,
|
|
"learning_rate": 2.843235672670841e-07,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10213933885097504,
|
|
"step": 3425,
|
|
"valid_targets_mean": 4204.6,
|
|
"valid_targets_min": 3431
|
|
},
|
|
{
|
|
"epoch": 6.673807205452775,
|
|
"grad_norm": 0.5246539879686309,
|
|
"learning_rate": 2.682540119942023e-07,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11438605189323425,
|
|
"step": 3430,
|
|
"valid_targets_mean": 4997.4,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 6.6835443037974684,
|
|
"grad_norm": 0.5616102543677218,
|
|
"learning_rate": 2.5264881236640947e-07,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13888661563396454,
|
|
"step": 3435,
|
|
"valid_targets_mean": 5347.2,
|
|
"valid_targets_min": 3348
|
|
},
|
|
{
|
|
"epoch": 6.693281402142162,
|
|
"grad_norm": 0.6382127778058433,
|
|
"learning_rate": 2.375083356275676e-07,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1229659765958786,
|
|
"step": 3440,
|
|
"valid_targets_mean": 4152.2,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 6.703018500486855,
|
|
"grad_norm": 0.6128412645192438,
|
|
"learning_rate": 2.2283293808501138e-07,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11142013967037201,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3643.0,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 6.712755598831548,
|
|
"grad_norm": 0.537730487076946,
|
|
"learning_rate": 2.086229651011684e-07,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1253141313791275,
|
|
"step": 3450,
|
|
"valid_targets_mean": 5924.1,
|
|
"valid_targets_min": 4004
|
|
},
|
|
{
|
|
"epoch": 6.722492697176241,
|
|
"grad_norm": 0.5645898925278727,
|
|
"learning_rate": 1.9487875108542997e-07,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16142700612545013,
|
|
"step": 3455,
|
|
"valid_targets_mean": 5317.0,
|
|
"valid_targets_min": 3600
|
|
},
|
|
{
|
|
"epoch": 6.732229795520935,
|
|
"grad_norm": 0.6030651917500885,
|
|
"learning_rate": 1.8160061948628406e-07,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10611419379711151,
|
|
"step": 3460,
|
|
"valid_targets_mean": 4151.0,
|
|
"valid_targets_min": 3321
|
|
},
|
|
{
|
|
"epoch": 6.741966893865628,
|
|
"grad_norm": 0.6373287790671797,
|
|
"learning_rate": 1.6878888278369255e-07,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11590185761451721,
|
|
"step": 3465,
|
|
"valid_targets_mean": 4123.8,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 6.751703992210321,
|
|
"grad_norm": 0.5845102835734832,
|
|
"learning_rate": 1.5644384248176159e-07,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1272096335887909,
|
|
"step": 3470,
|
|
"valid_targets_mean": 3837.5,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 6.761441090555015,
|
|
"grad_norm": 0.5989467111830217,
|
|
"learning_rate": 1.445657891016161e-07,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13397274911403656,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4363.5,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 6.771178188899708,
|
|
"grad_norm": 0.5698769042550108,
|
|
"learning_rate": 1.3315500217458533e-07,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14439411461353302,
|
|
"step": 3480,
|
|
"valid_targets_mean": 5124.2,
|
|
"valid_targets_min": 3339
|
|
},
|
|
{
|
|
"epoch": 6.780915287244401,
|
|
"grad_norm": 0.5016809565704832,
|
|
"learning_rate": 1.2221175023561238e-07,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12245055288076401,
|
|
"step": 3485,
|
|
"valid_targets_mean": 5968.0,
|
|
"valid_targets_min": 3231
|
|
},
|
|
{
|
|
"epoch": 6.790652385589095,
|
|
"grad_norm": 0.5226761574566673,
|
|
"learning_rate": 1.1173629081694392e-07,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11084966361522675,
|
|
"step": 3490,
|
|
"valid_targets_mean": 5004.0,
|
|
"valid_targets_min": 3166
|
|
},
|
|
{
|
|
"epoch": 6.800389483933788,
|
|
"grad_norm": 0.5444726652241876,
|
|
"learning_rate": 1.0172887044205936e-07,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11974833905696869,
|
|
"step": 3495,
|
|
"valid_targets_mean": 4302.9,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 6.810126582278481,
|
|
"grad_norm": 0.5573574539229955,
|
|
"learning_rate": 9.21897246198733e-08,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12142840027809143,
|
|
"step": 3500,
|
|
"valid_targets_mean": 4211.2,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 6.8198636806231745,
|
|
"grad_norm": 0.602159621339256,
|
|
"learning_rate": 8.311907783920437e-08,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12724614143371582,
|
|
"step": 3505,
|
|
"valid_targets_mean": 4097.1,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 6.829600778967867,
|
|
"grad_norm": 0.5548278130139928,
|
|
"learning_rate": 7.451714356347062e-08,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12457823753356934,
|
|
"step": 3510,
|
|
"valid_targets_mean": 4514.8,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 6.839337877312561,
|
|
"grad_norm": 0.6795066527322409,
|
|
"learning_rate": 6.638412422568463e-08,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1567058116197586,
|
|
"step": 3515,
|
|
"valid_targets_mean": 4655.0,
|
|
"valid_targets_min": 3487
|
|
},
|
|
{
|
|
"epoch": 6.849074975657254,
|
|
"grad_norm": 0.600295513403086,
|
|
"learning_rate": 5.8720211223677324e-08,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11704479157924652,
|
|
"step": 3520,
|
|
"valid_targets_mean": 4077.5,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 6.858812074001947,
|
|
"grad_norm": 0.5317290732973234,
|
|
"learning_rate": 5.1525584915605996e-08,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11121546477079391,
|
|
"step": 3525,
|
|
"valid_targets_mean": 4620.9,
|
|
"valid_targets_min": 3664
|
|
},
|
|
{
|
|
"epoch": 6.868549172346641,
|
|
"grad_norm": 0.5758246663130877,
|
|
"learning_rate": 4.480041461569773e-08,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1220594123005867,
|
|
"step": 3530,
|
|
"valid_targets_mean": 4087.6,
|
|
"valid_targets_min": 2812
|
|
},
|
|
{
|
|
"epoch": 6.878286270691334,
|
|
"grad_norm": 0.5958116802229949,
|
|
"learning_rate": 3.854485859026591e-08,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12879547476768494,
|
|
"step": 3535,
|
|
"valid_targets_mean": 4854.9,
|
|
"valid_targets_min": 3686
|
|
},
|
|
{
|
|
"epoch": 6.888023369036027,
|
|
"grad_norm": 0.5564425582970186,
|
|
"learning_rate": 3.2759064054002086e-08,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10474040359258652,
|
|
"step": 3540,
|
|
"valid_targets_mean": 3980.8,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 6.897760467380721,
|
|
"grad_norm": 0.5278837918654566,
|
|
"learning_rate": 2.744316716648543e-08,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15160135924816132,
|
|
"step": 3545,
|
|
"valid_targets_mean": 6259.8,
|
|
"valid_targets_min": 3911
|
|
},
|
|
{
|
|
"epoch": 6.907497565725414,
|
|
"grad_norm": 0.5147662495970018,
|
|
"learning_rate": 2.259729302900304e-08,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13430249691009521,
|
|
"step": 3550,
|
|
"valid_targets_mean": 5264.2,
|
|
"valid_targets_min": 3511
|
|
},
|
|
{
|
|
"epoch": 6.917234664070107,
|
|
"grad_norm": 0.5422874133842184,
|
|
"learning_rate": 1.822155568159012e-08,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1191805824637413,
|
|
"step": 3555,
|
|
"valid_targets_mean": 4269.1,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 6.926971762414801,
|
|
"grad_norm": 0.5968655366009338,
|
|
"learning_rate": 1.431605810034764e-08,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11171600222587585,
|
|
"step": 3560,
|
|
"valid_targets_mean": 3575.4,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 6.936708860759493,
|
|
"grad_norm": 0.5923220185254061,
|
|
"learning_rate": 1.0880892195028747e-08,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12644121050834656,
|
|
"step": 3565,
|
|
"valid_targets_mean": 4888.5,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 6.946445959104187,
|
|
"grad_norm": 0.5362126127938313,
|
|
"learning_rate": 7.916138806864927e-09,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13645507395267487,
|
|
"step": 3570,
|
|
"valid_targets_mean": 4954.9,
|
|
"valid_targets_min": 3907
|
|
},
|
|
{
|
|
"epoch": 6.9561830574488805,
|
|
"grad_norm": 0.6191550564671183,
|
|
"learning_rate": 5.421867706671968e-09,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13485130667686462,
|
|
"step": 3575,
|
|
"valid_targets_mean": 4264.5,
|
|
"valid_targets_min": 3473
|
|
},
|
|
{
|
|
"epoch": 6.965920155793573,
|
|
"grad_norm": 0.6276254874417744,
|
|
"learning_rate": 3.3981375932001792e-09,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11372117698192596,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3629.0,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 6.975657254138267,
|
|
"grad_norm": 0.5743287211412047,
|
|
"learning_rate": 1.8449960917599207e-09,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12705141305923462,
|
|
"step": 3585,
|
|
"valid_targets_mean": 4258.5,
|
|
"valid_targets_min": 3710
|
|
},
|
|
{
|
|
"epoch": 6.98539435248296,
|
|
"grad_norm": 0.5803787544717841,
|
|
"learning_rate": 7.624797530958461e-10,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13234540820121765,
|
|
"step": 3590,
|
|
"valid_targets_mean": 4884.5,
|
|
"valid_targets_min": 3974
|
|
},
|
|
{
|
|
"epoch": 6.995131450827653,
|
|
"grad_norm": 0.584921438228633,
|
|
"learning_rate": 1.5061405252980721e-10,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1520397961139679,
|
|
"step": 3595,
|
|
"valid_targets_mean": 5244.6,
|
|
"valid_targets_min": 3575
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2848636209964752,
|
|
"step": 3598,
|
|
"total_flos": 2.2384198545393582e+18,
|
|
"train_loss": 0.3086844535421305,
|
|
"train_runtime": 48838.3115,
|
|
"train_samples_per_second": 1.177,
|
|
"train_steps_per_second": 0.074,
|
|
"valid_targets_mean": 4286.8,
|
|
"valid_targets_min": 2135
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 3598,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.2384198545393582e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|