10179 lines
282 KiB
JSON
10179 lines
282 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 7.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 4606,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.007598784194528876,
|
||
|
|
"grad_norm": 16.54100795724015,
|
||
|
|
"learning_rate": 3.4707158351409984e-07,
|
||
|
|
"loss": 0.6415,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.6890859603881836,
|
||
|
|
"step": 5,
|
||
|
|
"valid_targets_mean": 4029.6,
|
||
|
|
"valid_targets_min": 742
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.015197568389057751,
|
||
|
|
"grad_norm": 17.760320126405695,
|
||
|
|
"learning_rate": 7.809110629067245e-07,
|
||
|
|
"loss": 0.6735,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.6768203973770142,
|
||
|
|
"step": 10,
|
||
|
|
"valid_targets_mean": 4724.0,
|
||
|
|
"valid_targets_min": 333
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.022796352583586626,
|
||
|
|
"grad_norm": 14.700502278309516,
|
||
|
|
"learning_rate": 1.2147505422993492e-06,
|
||
|
|
"loss": 0.6621,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.6386404633522034,
|
||
|
|
"step": 15,
|
||
|
|
"valid_targets_mean": 4853.7,
|
||
|
|
"valid_targets_min": 1059
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.030395136778115502,
|
||
|
|
"grad_norm": 12.016024137339164,
|
||
|
|
"learning_rate": 1.6485900216919743e-06,
|
||
|
|
"loss": 0.6268,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.6092511415481567,
|
||
|
|
"step": 20,
|
||
|
|
"valid_targets_mean": 5505.4,
|
||
|
|
"valid_targets_min": 2608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.037993920972644375,
|
||
|
|
"grad_norm": 7.202624222295684,
|
||
|
|
"learning_rate": 2.0824295010845986e-06,
|
||
|
|
"loss": 0.5318,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4805890917778015,
|
||
|
|
"step": 25,
|
||
|
|
"valid_targets_mean": 4894.8,
|
||
|
|
"valid_targets_min": 746
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04559270516717325,
|
||
|
|
"grad_norm": 5.248039681477442,
|
||
|
|
"learning_rate": 2.516268980477224e-06,
|
||
|
|
"loss": 0.5034,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.5152153372764587,
|
||
|
|
"step": 30,
|
||
|
|
"valid_targets_mean": 4687.7,
|
||
|
|
"valid_targets_min": 1705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05319148936170213,
|
||
|
|
"grad_norm": 2.714176181066118,
|
||
|
|
"learning_rate": 2.950108459869848e-06,
|
||
|
|
"loss": 0.4815,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4753047823905945,
|
||
|
|
"step": 35,
|
||
|
|
"valid_targets_mean": 4859.7,
|
||
|
|
"valid_targets_min": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.060790273556231005,
|
||
|
|
"grad_norm": 1.7777692231211755,
|
||
|
|
"learning_rate": 3.383947939262473e-06,
|
||
|
|
"loss": 0.4372,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4611857831478119,
|
||
|
|
"step": 40,
|
||
|
|
"valid_targets_mean": 4217.9,
|
||
|
|
"valid_targets_min": 741
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06838905775075987,
|
||
|
|
"grad_norm": 1.3787959870798525,
|
||
|
|
"learning_rate": 3.817787418655098e-06,
|
||
|
|
"loss": 0.4164,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.4521799385547638,
|
||
|
|
"step": 45,
|
||
|
|
"valid_targets_mean": 3577.2,
|
||
|
|
"valid_targets_min": 525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07598784194528875,
|
||
|
|
"grad_norm": 1.0709482777091444,
|
||
|
|
"learning_rate": 4.251626898047723e-06,
|
||
|
|
"loss": 0.4129,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.45762407779693604,
|
||
|
|
"step": 50,
|
||
|
|
"valid_targets_mean": 4046.8,
|
||
|
|
"valid_targets_min": 847
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08358662613981763,
|
||
|
|
"grad_norm": 0.8316164594444523,
|
||
|
|
"learning_rate": 4.685466377440348e-06,
|
||
|
|
"loss": 0.3775,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.38839221000671387,
|
||
|
|
"step": 55,
|
||
|
|
"valid_targets_mean": 5905.9,
|
||
|
|
"valid_targets_min": 1232
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0911854103343465,
|
||
|
|
"grad_norm": 0.7446660666424425,
|
||
|
|
"learning_rate": 5.1193058568329725e-06,
|
||
|
|
"loss": 0.3891,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.34218358993530273,
|
||
|
|
"step": 60,
|
||
|
|
"valid_targets_mean": 4599.2,
|
||
|
|
"valid_targets_min": 554
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09878419452887538,
|
||
|
|
"grad_norm": 0.8425007884341953,
|
||
|
|
"learning_rate": 5.5531453362255974e-06,
|
||
|
|
"loss": 0.3781,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.36715853214263916,
|
||
|
|
"step": 65,
|
||
|
|
"valid_targets_mean": 4719.6,
|
||
|
|
"valid_targets_min": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10638297872340426,
|
||
|
|
"grad_norm": 0.6619149306280321,
|
||
|
|
"learning_rate": 5.986984815618222e-06,
|
||
|
|
"loss": 0.3861,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3819279968738556,
|
||
|
|
"step": 70,
|
||
|
|
"valid_targets_mean": 4757.8,
|
||
|
|
"valid_targets_min": 881
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11398176291793313,
|
||
|
|
"grad_norm": 0.5731111234637317,
|
||
|
|
"learning_rate": 6.420824295010846e-06,
|
||
|
|
"loss": 0.3629,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3706345558166504,
|
||
|
|
"step": 75,
|
||
|
|
"valid_targets_mean": 5672.5,
|
||
|
|
"valid_targets_min": 1128
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12158054711246201,
|
||
|
|
"grad_norm": 0.6080686326455707,
|
||
|
|
"learning_rate": 6.854663774403471e-06,
|
||
|
|
"loss": 0.3312,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.38144651055336,
|
||
|
|
"step": 80,
|
||
|
|
"valid_targets_mean": 4179.6,
|
||
|
|
"valid_targets_min": 814
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12917933130699089,
|
||
|
|
"grad_norm": 0.6499495432455763,
|
||
|
|
"learning_rate": 7.288503253796096e-06,
|
||
|
|
"loss": 0.3446,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3940460979938507,
|
||
|
|
"step": 85,
|
||
|
|
"valid_targets_mean": 4595.6,
|
||
|
|
"valid_targets_min": 583
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13677811550151975,
|
||
|
|
"grad_norm": 0.4843533852497594,
|
||
|
|
"learning_rate": 7.722342733188721e-06,
|
||
|
|
"loss": 0.332,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3182428181171417,
|
||
|
|
"step": 90,
|
||
|
|
"valid_targets_mean": 5585.4,
|
||
|
|
"valid_targets_min": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14437689969604864,
|
||
|
|
"grad_norm": 0.6242189491842878,
|
||
|
|
"learning_rate": 8.156182212581345e-06,
|
||
|
|
"loss": 0.3316,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3119521737098694,
|
||
|
|
"step": 95,
|
||
|
|
"valid_targets_mean": 5690.2,
|
||
|
|
"valid_targets_min": 792
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1519756838905775,
|
||
|
|
"grad_norm": 0.5346923069880523,
|
||
|
|
"learning_rate": 8.59002169197397e-06,
|
||
|
|
"loss": 0.3313,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31788039207458496,
|
||
|
|
"step": 100,
|
||
|
|
"valid_targets_mean": 4587.9,
|
||
|
|
"valid_targets_min": 767
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1595744680851064,
|
||
|
|
"grad_norm": 0.5073268077298685,
|
||
|
|
"learning_rate": 9.023861171366595e-06,
|
||
|
|
"loss": 0.3194,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3051730990409851,
|
||
|
|
"step": 105,
|
||
|
|
"valid_targets_mean": 5143.8,
|
||
|
|
"valid_targets_min": 823
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16717325227963525,
|
||
|
|
"grad_norm": 0.5467162692473684,
|
||
|
|
"learning_rate": 9.457700650759219e-06,
|
||
|
|
"loss": 0.3018,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32401126623153687,
|
||
|
|
"step": 110,
|
||
|
|
"valid_targets_mean": 5420.0,
|
||
|
|
"valid_targets_min": 674
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17477203647416414,
|
||
|
|
"grad_norm": 0.5541588789236693,
|
||
|
|
"learning_rate": 9.891540130151845e-06,
|
||
|
|
"loss": 0.3334,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3607725501060486,
|
||
|
|
"step": 115,
|
||
|
|
"valid_targets_mean": 5116.0,
|
||
|
|
"valid_targets_min": 569
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.182370820668693,
|
||
|
|
"grad_norm": 0.5403092031436869,
|
||
|
|
"learning_rate": 1.032537960954447e-05,
|
||
|
|
"loss": 0.2937,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.290938138961792,
|
||
|
|
"step": 120,
|
||
|
|
"valid_targets_mean": 4455.6,
|
||
|
|
"valid_targets_min": 1819
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1899696048632219,
|
||
|
|
"grad_norm": 0.5924655899271373,
|
||
|
|
"learning_rate": 1.0759219088937095e-05,
|
||
|
|
"loss": 0.3343,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31885042786598206,
|
||
|
|
"step": 125,
|
||
|
|
"valid_targets_mean": 3642.6,
|
||
|
|
"valid_targets_min": 401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19756838905775076,
|
||
|
|
"grad_norm": 0.49980402806514684,
|
||
|
|
"learning_rate": 1.119305856832972e-05,
|
||
|
|
"loss": 0.2857,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2728749215602875,
|
||
|
|
"step": 130,
|
||
|
|
"valid_targets_mean": 4462.3,
|
||
|
|
"valid_targets_min": 585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20516717325227962,
|
||
|
|
"grad_norm": 0.5224338478901207,
|
||
|
|
"learning_rate": 1.1626898047722344e-05,
|
||
|
|
"loss": 0.2962,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29036590456962585,
|
||
|
|
"step": 135,
|
||
|
|
"valid_targets_mean": 4953.9,
|
||
|
|
"valid_targets_min": 901
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2127659574468085,
|
||
|
|
"grad_norm": 0.5647977318046398,
|
||
|
|
"learning_rate": 1.2060737527114967e-05,
|
||
|
|
"loss": 0.2992,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.30531400442123413,
|
||
|
|
"step": 140,
|
||
|
|
"valid_targets_mean": 3919.9,
|
||
|
|
"valid_targets_min": 797
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22036474164133737,
|
||
|
|
"grad_norm": 0.5768676708119788,
|
||
|
|
"learning_rate": 1.2494577006507593e-05,
|
||
|
|
"loss": 0.3144,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3926551342010498,
|
||
|
|
"step": 145,
|
||
|
|
"valid_targets_mean": 4445.3,
|
||
|
|
"valid_targets_min": 595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22796352583586627,
|
||
|
|
"grad_norm": 0.46004731662025494,
|
||
|
|
"learning_rate": 1.2928416485900217e-05,
|
||
|
|
"loss": 0.2964,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23685210943222046,
|
||
|
|
"step": 150,
|
||
|
|
"valid_targets_mean": 4963.2,
|
||
|
|
"valid_targets_min": 1646
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23556231003039513,
|
||
|
|
"grad_norm": 0.6456923331623713,
|
||
|
|
"learning_rate": 1.3362255965292842e-05,
|
||
|
|
"loss": 0.2947,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.32530516386032104,
|
||
|
|
"step": 155,
|
||
|
|
"valid_targets_mean": 3147.6,
|
||
|
|
"valid_targets_min": 713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24316109422492402,
|
||
|
|
"grad_norm": 0.5132446277120045,
|
||
|
|
"learning_rate": 1.3796095444685466e-05,
|
||
|
|
"loss": 0.2866,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3017147481441498,
|
||
|
|
"step": 160,
|
||
|
|
"valid_targets_mean": 5447.2,
|
||
|
|
"valid_targets_min": 662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2507598784194529,
|
||
|
|
"grad_norm": 0.5526604062654956,
|
||
|
|
"learning_rate": 1.4229934924078092e-05,
|
||
|
|
"loss": 0.2781,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2687798738479614,
|
||
|
|
"step": 165,
|
||
|
|
"valid_targets_mean": 4341.5,
|
||
|
|
"valid_targets_min": 756
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25835866261398177,
|
||
|
|
"grad_norm": 0.5293460369161004,
|
||
|
|
"learning_rate": 1.4663774403470716e-05,
|
||
|
|
"loss": 0.2804,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28431305289268494,
|
||
|
|
"step": 170,
|
||
|
|
"valid_targets_mean": 6130.4,
|
||
|
|
"valid_targets_min": 1724
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26595744680851063,
|
||
|
|
"grad_norm": 0.5138145073678304,
|
||
|
|
"learning_rate": 1.5097613882863342e-05,
|
||
|
|
"loss": 0.2892,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.29994410276412964,
|
||
|
|
"step": 175,
|
||
|
|
"valid_targets_mean": 4394.1,
|
||
|
|
"valid_targets_min": 589
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2735562310030395,
|
||
|
|
"grad_norm": 0.5043230432581788,
|
||
|
|
"learning_rate": 1.5531453362255964e-05,
|
||
|
|
"loss": 0.2841,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2635194659233093,
|
||
|
|
"step": 180,
|
||
|
|
"valid_targets_mean": 4637.4,
|
||
|
|
"valid_targets_min": 588
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2811550151975684,
|
||
|
|
"grad_norm": 0.5633614566403176,
|
||
|
|
"learning_rate": 1.5965292841648592e-05,
|
||
|
|
"loss": 0.2694,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.349490761756897,
|
||
|
|
"step": 185,
|
||
|
|
"valid_targets_mean": 5181.4,
|
||
|
|
"valid_targets_min": 1743
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2887537993920973,
|
||
|
|
"grad_norm": 0.5633432190899903,
|
||
|
|
"learning_rate": 1.6399132321041216e-05,
|
||
|
|
"loss": 0.2859,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28400588035583496,
|
||
|
|
"step": 190,
|
||
|
|
"valid_targets_mean": 4035.6,
|
||
|
|
"valid_targets_min": 1318
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29635258358662614,
|
||
|
|
"grad_norm": 0.48584306761961266,
|
||
|
|
"learning_rate": 1.6832971800433843e-05,
|
||
|
|
"loss": 0.2778,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2302139550447464,
|
||
|
|
"step": 195,
|
||
|
|
"valid_targets_mean": 5217.0,
|
||
|
|
"valid_targets_min": 1825
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.303951367781155,
|
||
|
|
"grad_norm": 0.5423337122654377,
|
||
|
|
"learning_rate": 1.7266811279826464e-05,
|
||
|
|
"loss": 0.265,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2699054479598999,
|
||
|
|
"step": 200,
|
||
|
|
"valid_targets_mean": 4060.4,
|
||
|
|
"valid_targets_min": 824
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31155015197568386,
|
||
|
|
"grad_norm": 0.583883380975703,
|
||
|
|
"learning_rate": 1.770065075921909e-05,
|
||
|
|
"loss": 0.263,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28162840008735657,
|
||
|
|
"step": 205,
|
||
|
|
"valid_targets_mean": 4595.1,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3191489361702128,
|
||
|
|
"grad_norm": 0.5187106940915953,
|
||
|
|
"learning_rate": 1.8134490238611715e-05,
|
||
|
|
"loss": 0.2586,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25653451681137085,
|
||
|
|
"step": 210,
|
||
|
|
"valid_targets_mean": 4721.1,
|
||
|
|
"valid_targets_min": 885
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.32674772036474165,
|
||
|
|
"grad_norm": 0.5079241607768279,
|
||
|
|
"learning_rate": 1.856832971800434e-05,
|
||
|
|
"loss": 0.2524,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.257007360458374,
|
||
|
|
"step": 215,
|
||
|
|
"valid_targets_mean": 5210.9,
|
||
|
|
"valid_targets_min": 784
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3343465045592705,
|
||
|
|
"grad_norm": 0.6159525490464162,
|
||
|
|
"learning_rate": 1.9002169197396964e-05,
|
||
|
|
"loss": 0.2707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3035469651222229,
|
||
|
|
"step": 220,
|
||
|
|
"valid_targets_mean": 4253.7,
|
||
|
|
"valid_targets_min": 670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34194528875379937,
|
||
|
|
"grad_norm": 0.618048775817979,
|
||
|
|
"learning_rate": 1.9436008676789588e-05,
|
||
|
|
"loss": 0.2914,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3232946991920471,
|
||
|
|
"step": 225,
|
||
|
|
"valid_targets_mean": 3915.6,
|
||
|
|
"valid_targets_min": 686
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3495440729483283,
|
||
|
|
"grad_norm": 0.4988454732686451,
|
||
|
|
"learning_rate": 1.9869848156182215e-05,
|
||
|
|
"loss": 0.2604,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2464175820350647,
|
||
|
|
"step": 230,
|
||
|
|
"valid_targets_mean": 4478.2,
|
||
|
|
"valid_targets_min": 1934
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35714285714285715,
|
||
|
|
"grad_norm": 0.5788630961414701,
|
||
|
|
"learning_rate": 2.030368763557484e-05,
|
||
|
|
"loss": 0.2697,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25093919038772583,
|
||
|
|
"step": 235,
|
||
|
|
"valid_targets_mean": 4403.9,
|
||
|
|
"valid_targets_min": 2002
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.364741641337386,
|
||
|
|
"grad_norm": 0.5727233609316779,
|
||
|
|
"learning_rate": 2.0737527114967463e-05,
|
||
|
|
"loss": 0.2787,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2930532693862915,
|
||
|
|
"step": 240,
|
||
|
|
"valid_targets_mean": 3878.2,
|
||
|
|
"valid_targets_min": 683
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3723404255319149,
|
||
|
|
"grad_norm": 0.507643333171839,
|
||
|
|
"learning_rate": 2.117136659436009e-05,
|
||
|
|
"loss": 0.2612,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26503849029541016,
|
||
|
|
"step": 245,
|
||
|
|
"valid_targets_mean": 5116.3,
|
||
|
|
"valid_targets_min": 1771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3799392097264438,
|
||
|
|
"grad_norm": 0.5575549222120635,
|
||
|
|
"learning_rate": 2.160520607375271e-05,
|
||
|
|
"loss": 0.239,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24154013395309448,
|
||
|
|
"step": 250,
|
||
|
|
"valid_targets_mean": 4581.1,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38753799392097266,
|
||
|
|
"grad_norm": 0.5133849867561285,
|
||
|
|
"learning_rate": 2.203904555314534e-05,
|
||
|
|
"loss": 0.2536,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2547852694988251,
|
||
|
|
"step": 255,
|
||
|
|
"valid_targets_mean": 4651.8,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3951367781155015,
|
||
|
|
"grad_norm": 0.46783460540928934,
|
||
|
|
"learning_rate": 2.2472885032537963e-05,
|
||
|
|
"loss": 0.2714,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23877258598804474,
|
||
|
|
"step": 260,
|
||
|
|
"valid_targets_mean": 5490.8,
|
||
|
|
"valid_targets_min": 1221
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4027355623100304,
|
||
|
|
"grad_norm": 0.5446072363730794,
|
||
|
|
"learning_rate": 2.290672451193059e-05,
|
||
|
|
"loss": 0.2643,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23716062307357788,
|
||
|
|
"step": 265,
|
||
|
|
"valid_targets_mean": 4415.1,
|
||
|
|
"valid_targets_min": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41033434650455924,
|
||
|
|
"grad_norm": 0.5076237944415188,
|
||
|
|
"learning_rate": 2.334056399132321e-05,
|
||
|
|
"loss": 0.2406,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2342528998851776,
|
||
|
|
"step": 270,
|
||
|
|
"valid_targets_mean": 4982.2,
|
||
|
|
"valid_targets_min": 867
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41793313069908816,
|
||
|
|
"grad_norm": 0.5204833234773354,
|
||
|
|
"learning_rate": 2.3774403470715835e-05,
|
||
|
|
"loss": 0.2722,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24817609786987305,
|
||
|
|
"step": 275,
|
||
|
|
"valid_targets_mean": 4886.1,
|
||
|
|
"valid_targets_min": 1124
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.425531914893617,
|
||
|
|
"grad_norm": 1.1929428353263904,
|
||
|
|
"learning_rate": 2.4208242950108462e-05,
|
||
|
|
"loss": 0.2534,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2741398513317108,
|
||
|
|
"step": 280,
|
||
|
|
"valid_targets_mean": 5717.5,
|
||
|
|
"valid_targets_min": 863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4331306990881459,
|
||
|
|
"grad_norm": 0.585914448752194,
|
||
|
|
"learning_rate": 2.464208242950109e-05,
|
||
|
|
"loss": 0.2498,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27346092462539673,
|
||
|
|
"step": 285,
|
||
|
|
"valid_targets_mean": 4714.8,
|
||
|
|
"valid_targets_min": 738
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44072948328267475,
|
||
|
|
"grad_norm": 0.6700936401277653,
|
||
|
|
"learning_rate": 2.507592190889371e-05,
|
||
|
|
"loss": 0.2714,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3218214511871338,
|
||
|
|
"step": 290,
|
||
|
|
"valid_targets_mean": 3352.8,
|
||
|
|
"valid_targets_min": 665
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44832826747720367,
|
||
|
|
"grad_norm": 0.5819949812246825,
|
||
|
|
"learning_rate": 2.5509761388286335e-05,
|
||
|
|
"loss": 0.2454,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24475443363189697,
|
||
|
|
"step": 295,
|
||
|
|
"valid_targets_mean": 5024.5,
|
||
|
|
"valid_targets_min": 1008
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45592705167173253,
|
||
|
|
"grad_norm": 0.5564851893611434,
|
||
|
|
"learning_rate": 2.5943600867678962e-05,
|
||
|
|
"loss": 0.2715,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25335440039634705,
|
||
|
|
"step": 300,
|
||
|
|
"valid_targets_mean": 4271.6,
|
||
|
|
"valid_targets_min": 495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4635258358662614,
|
||
|
|
"grad_norm": 0.6776311836920197,
|
||
|
|
"learning_rate": 2.637744034707159e-05,
|
||
|
|
"loss": 0.2637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26621827483177185,
|
||
|
|
"step": 305,
|
||
|
|
"valid_targets_mean": 4792.2,
|
||
|
|
"valid_targets_min": 585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47112462006079026,
|
||
|
|
"grad_norm": 0.5014128476072633,
|
||
|
|
"learning_rate": 2.681127982646421e-05,
|
||
|
|
"loss": 0.2433,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23652535676956177,
|
||
|
|
"step": 310,
|
||
|
|
"valid_targets_mean": 5203.9,
|
||
|
|
"valid_targets_min": 631
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4787234042553192,
|
||
|
|
"grad_norm": 0.5471285546055437,
|
||
|
|
"learning_rate": 2.7245119305856834e-05,
|
||
|
|
"loss": 0.2392,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2725940942764282,
|
||
|
|
"step": 315,
|
||
|
|
"valid_targets_mean": 4777.1,
|
||
|
|
"valid_targets_min": 1762
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48632218844984804,
|
||
|
|
"grad_norm": 0.5215837029470914,
|
||
|
|
"learning_rate": 2.7678958785249462e-05,
|
||
|
|
"loss": 0.2357,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2674940228462219,
|
||
|
|
"step": 320,
|
||
|
|
"valid_targets_mean": 4912.2,
|
||
|
|
"valid_targets_min": 1286
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4939209726443769,
|
||
|
|
"grad_norm": 0.5545464098953832,
|
||
|
|
"learning_rate": 2.8112798264642082e-05,
|
||
|
|
"loss": 0.2555,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27066224813461304,
|
||
|
|
"step": 325,
|
||
|
|
"valid_targets_mean": 4713.8,
|
||
|
|
"valid_targets_min": 581
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5015197568389058,
|
||
|
|
"grad_norm": 0.5291981229160326,
|
||
|
|
"learning_rate": 2.854663774403471e-05,
|
||
|
|
"loss": 0.2646,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27261027693748474,
|
||
|
|
"step": 330,
|
||
|
|
"valid_targets_mean": 5018.4,
|
||
|
|
"valid_targets_min": 825
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5091185410334347,
|
||
|
|
"grad_norm": 0.5927074347019183,
|
||
|
|
"learning_rate": 2.8980477223427334e-05,
|
||
|
|
"loss": 0.249,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23593679070472717,
|
||
|
|
"step": 335,
|
||
|
|
"valid_targets_mean": 4369.9,
|
||
|
|
"valid_targets_min": 852
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5167173252279635,
|
||
|
|
"grad_norm": 0.5613566212263488,
|
||
|
|
"learning_rate": 2.941431670281996e-05,
|
||
|
|
"loss": 0.246,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25743183493614197,
|
||
|
|
"step": 340,
|
||
|
|
"valid_targets_mean": 4878.5,
|
||
|
|
"valid_targets_min": 736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5243161094224924,
|
||
|
|
"grad_norm": 0.43333351737553155,
|
||
|
|
"learning_rate": 2.9848156182212582e-05,
|
||
|
|
"loss": 0.2418,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22708739340305328,
|
||
|
|
"step": 345,
|
||
|
|
"valid_targets_mean": 5272.6,
|
||
|
|
"valid_targets_min": 2600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5319148936170213,
|
||
|
|
"grad_norm": 0.5349091750560264,
|
||
|
|
"learning_rate": 3.028199566160521e-05,
|
||
|
|
"loss": 0.2484,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2423931062221527,
|
||
|
|
"step": 350,
|
||
|
|
"valid_targets_mean": 4710.4,
|
||
|
|
"valid_targets_min": 1708
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5395136778115501,
|
||
|
|
"grad_norm": 0.552342573882699,
|
||
|
|
"learning_rate": 3.0715835140997834e-05,
|
||
|
|
"loss": 0.2515,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2209261655807495,
|
||
|
|
"step": 355,
|
||
|
|
"valid_targets_mean": 4220.2,
|
||
|
|
"valid_targets_min": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.547112462006079,
|
||
|
|
"grad_norm": 0.47358599117445827,
|
||
|
|
"learning_rate": 3.114967462039046e-05,
|
||
|
|
"loss": 0.2431,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22403742372989655,
|
||
|
|
"step": 360,
|
||
|
|
"valid_targets_mean": 5009.1,
|
||
|
|
"valid_targets_min": 826
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5547112462006079,
|
||
|
|
"grad_norm": 0.5508791979851065,
|
||
|
|
"learning_rate": 3.158351409978308e-05,
|
||
|
|
"loss": 0.271,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2744586765766144,
|
||
|
|
"step": 365,
|
||
|
|
"valid_targets_mean": 4221.0,
|
||
|
|
"valid_targets_min": 658
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5623100303951368,
|
||
|
|
"grad_norm": 0.4880656851972933,
|
||
|
|
"learning_rate": 3.2017353579175706e-05,
|
||
|
|
"loss": 0.257,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2102406919002533,
|
||
|
|
"step": 370,
|
||
|
|
"valid_targets_mean": 5021.4,
|
||
|
|
"valid_targets_min": 422
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5699088145896657,
|
||
|
|
"grad_norm": 0.5066085991817625,
|
||
|
|
"learning_rate": 3.2451193058568337e-05,
|
||
|
|
"loss": 0.2493,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.220636785030365,
|
||
|
|
"step": 375,
|
||
|
|
"valid_targets_mean": 4511.9,
|
||
|
|
"valid_targets_min": 904
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5775075987841946,
|
||
|
|
"grad_norm": 0.7189915330518424,
|
||
|
|
"learning_rate": 3.288503253796096e-05,
|
||
|
|
"loss": 0.2561,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2566250264644623,
|
||
|
|
"step": 380,
|
||
|
|
"valid_targets_mean": 4385.5,
|
||
|
|
"valid_targets_min": 551
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5851063829787234,
|
||
|
|
"grad_norm": 0.548460587196264,
|
||
|
|
"learning_rate": 3.331887201735358e-05,
|
||
|
|
"loss": 0.259,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23478102684020996,
|
||
|
|
"step": 385,
|
||
|
|
"valid_targets_mean": 4521.1,
|
||
|
|
"valid_targets_min": 940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5927051671732523,
|
||
|
|
"grad_norm": 0.7285447421019886,
|
||
|
|
"learning_rate": 3.375271149674621e-05,
|
||
|
|
"loss": 0.2528,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2555179297924042,
|
||
|
|
"step": 390,
|
||
|
|
"valid_targets_mean": 3723.9,
|
||
|
|
"valid_targets_min": 409
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6003039513677811,
|
||
|
|
"grad_norm": 0.5544467667445715,
|
||
|
|
"learning_rate": 3.418655097613883e-05,
|
||
|
|
"loss": 0.2459,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2515088617801666,
|
||
|
|
"step": 395,
|
||
|
|
"valid_targets_mean": 4729.4,
|
||
|
|
"valid_targets_min": 791
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.60790273556231,
|
||
|
|
"grad_norm": 0.53995409546225,
|
||
|
|
"learning_rate": 3.462039045553146e-05,
|
||
|
|
"loss": 0.2477,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23457960784435272,
|
||
|
|
"step": 400,
|
||
|
|
"valid_targets_mean": 5069.6,
|
||
|
|
"valid_targets_min": 489
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6155015197568389,
|
||
|
|
"grad_norm": 0.9196646418934739,
|
||
|
|
"learning_rate": 3.505422993492408e-05,
|
||
|
|
"loss": 0.2449,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23785921931266785,
|
||
|
|
"step": 405,
|
||
|
|
"valid_targets_mean": 4393.4,
|
||
|
|
"valid_targets_min": 829
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6231003039513677,
|
||
|
|
"grad_norm": 0.6099790951053485,
|
||
|
|
"learning_rate": 3.5488069414316705e-05,
|
||
|
|
"loss": 0.2363,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23619459569454193,
|
||
|
|
"step": 410,
|
||
|
|
"valid_targets_mean": 3865.0,
|
||
|
|
"valid_targets_min": 1002
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6306990881458967,
|
||
|
|
"grad_norm": 0.5925560150305617,
|
||
|
|
"learning_rate": 3.592190889370933e-05,
|
||
|
|
"loss": 0.2534,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24648156762123108,
|
||
|
|
"step": 415,
|
||
|
|
"valid_targets_mean": 5077.6,
|
||
|
|
"valid_targets_min": 1028
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6382978723404256,
|
||
|
|
"grad_norm": 0.5334746742630121,
|
||
|
|
"learning_rate": 3.635574837310195e-05,
|
||
|
|
"loss": 0.2337,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21448864042758942,
|
||
|
|
"step": 420,
|
||
|
|
"valid_targets_mean": 4616.1,
|
||
|
|
"valid_targets_min": 574
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6458966565349544,
|
||
|
|
"grad_norm": 0.5167724016109878,
|
||
|
|
"learning_rate": 3.678958785249458e-05,
|
||
|
|
"loss": 0.2433,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.230168879032135,
|
||
|
|
"step": 425,
|
||
|
|
"valid_targets_mean": 4870.9,
|
||
|
|
"valid_targets_min": 921
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6534954407294833,
|
||
|
|
"grad_norm": 0.5454984899416742,
|
||
|
|
"learning_rate": 3.722342733188721e-05,
|
||
|
|
"loss": 0.2439,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.262331485748291,
|
||
|
|
"step": 430,
|
||
|
|
"valid_targets_mean": 5432.7,
|
||
|
|
"valid_targets_min": 933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6610942249240122,
|
||
|
|
"grad_norm": 0.7273049295570767,
|
||
|
|
"learning_rate": 3.765726681127983e-05,
|
||
|
|
"loss": 0.2398,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25644373893737793,
|
||
|
|
"step": 435,
|
||
|
|
"valid_targets_mean": 3536.0,
|
||
|
|
"valid_targets_min": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.668693009118541,
|
||
|
|
"grad_norm": 0.5453606831740409,
|
||
|
|
"learning_rate": 3.8091106290672456e-05,
|
||
|
|
"loss": 0.2312,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24661529064178467,
|
||
|
|
"step": 440,
|
||
|
|
"valid_targets_mean": 4021.8,
|
||
|
|
"valid_targets_min": 599
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6762917933130699,
|
||
|
|
"grad_norm": 0.5397724365767885,
|
||
|
|
"learning_rate": 3.852494577006508e-05,
|
||
|
|
"loss": 0.2536,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2421499490737915,
|
||
|
|
"step": 445,
|
||
|
|
"valid_targets_mean": 4597.5,
|
||
|
|
"valid_targets_min": 370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6838905775075987,
|
||
|
|
"grad_norm": 0.5764238756332084,
|
||
|
|
"learning_rate": 3.8958785249457704e-05,
|
||
|
|
"loss": 0.2261,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2172556221485138,
|
||
|
|
"step": 450,
|
||
|
|
"valid_targets_mean": 5077.4,
|
||
|
|
"valid_targets_min": 1299
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6914893617021277,
|
||
|
|
"grad_norm": 0.5511870862131859,
|
||
|
|
"learning_rate": 3.939262472885033e-05,
|
||
|
|
"loss": 0.2546,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23796507716178894,
|
||
|
|
"step": 455,
|
||
|
|
"valid_targets_mean": 3540.4,
|
||
|
|
"valid_targets_min": 781
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6990881458966566,
|
||
|
|
"grad_norm": 0.5240137590905279,
|
||
|
|
"learning_rate": 3.982646420824295e-05,
|
||
|
|
"loss": 0.2749,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3238080143928528,
|
||
|
|
"step": 460,
|
||
|
|
"valid_targets_mean": 6050.4,
|
||
|
|
"valid_targets_min": 921
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7066869300911854,
|
||
|
|
"grad_norm": 0.504788443919895,
|
||
|
|
"learning_rate": 3.999994829970777e-05,
|
||
|
|
"loss": 0.2519,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2535270154476166,
|
||
|
|
"step": 465,
|
||
|
|
"valid_targets_mean": 4623.6,
|
||
|
|
"valid_targets_min": 642
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7142857142857143,
|
||
|
|
"grad_norm": 0.6552382106268326,
|
||
|
|
"learning_rate": 3.999963235444541e-05,
|
||
|
|
"loss": 0.249,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23992504179477692,
|
||
|
|
"step": 470,
|
||
|
|
"valid_targets_mean": 5108.4,
|
||
|
|
"valid_targets_min": 702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7218844984802432,
|
||
|
|
"grad_norm": 0.5338400861286152,
|
||
|
|
"learning_rate": 3.999902919083712e-05,
|
||
|
|
"loss": 0.2426,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2298986315727234,
|
||
|
|
"step": 475,
|
||
|
|
"valid_targets_mean": 4551.4,
|
||
|
|
"valid_targets_min": 827
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.729483282674772,
|
||
|
|
"grad_norm": 0.4717163253476549,
|
||
|
|
"learning_rate": 3.999813881754504e-05,
|
||
|
|
"loss": 0.2367,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22416530549526215,
|
||
|
|
"step": 480,
|
||
|
|
"valid_targets_mean": 5166.6,
|
||
|
|
"valid_targets_min": 2461
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7370820668693009,
|
||
|
|
"grad_norm": 0.46569192135259724,
|
||
|
|
"learning_rate": 3.999696124735598e-05,
|
||
|
|
"loss": 0.2388,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20350381731987,
|
||
|
|
"step": 485,
|
||
|
|
"valid_targets_mean": 5345.7,
|
||
|
|
"valid_targets_min": 1890
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7446808510638298,
|
||
|
|
"grad_norm": 0.42173419222407094,
|
||
|
|
"learning_rate": 3.999549649718124e-05,
|
||
|
|
"loss": 0.2325,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21917913854122162,
|
||
|
|
"step": 490,
|
||
|
|
"valid_targets_mean": 6391.4,
|
||
|
|
"valid_targets_min": 3738
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7522796352583586,
|
||
|
|
"grad_norm": 0.5432626540059464,
|
||
|
|
"learning_rate": 3.999374458805636e-05,
|
||
|
|
"loss": 0.2436,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2308879792690277,
|
||
|
|
"step": 495,
|
||
|
|
"valid_targets_mean": 4123.1,
|
||
|
|
"valid_targets_min": 597
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7598784194528876,
|
||
|
|
"grad_norm": 0.5137145588974806,
|
||
|
|
"learning_rate": 3.999170554514082e-05,
|
||
|
|
"loss": 0.2331,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23714055120944977,
|
||
|
|
"step": 500,
|
||
|
|
"valid_targets_mean": 4805.9,
|
||
|
|
"valid_targets_min": 605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7674772036474165,
|
||
|
|
"grad_norm": 0.49268000212812557,
|
||
|
|
"learning_rate": 3.998937939771771e-05,
|
||
|
|
"loss": 0.2222,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24276787042617798,
|
||
|
|
"step": 505,
|
||
|
|
"valid_targets_mean": 4071.9,
|
||
|
|
"valid_targets_min": 790
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7750759878419453,
|
||
|
|
"grad_norm": 0.6202296018779553,
|
||
|
|
"learning_rate": 3.998676617919322e-05,
|
||
|
|
"loss": 0.2437,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27594321966171265,
|
||
|
|
"step": 510,
|
||
|
|
"valid_targets_mean": 3380.7,
|
||
|
|
"valid_targets_min": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7826747720364742,
|
||
|
|
"grad_norm": 0.4863803507783552,
|
||
|
|
"learning_rate": 3.9983865927096276e-05,
|
||
|
|
"loss": 0.2384,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24749267101287842,
|
||
|
|
"step": 515,
|
||
|
|
"valid_targets_mean": 5073.1,
|
||
|
|
"valid_targets_min": 777
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.790273556231003,
|
||
|
|
"grad_norm": 0.5836827968647129,
|
||
|
|
"learning_rate": 3.998067868307792e-05,
|
||
|
|
"loss": 0.2272,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23861373960971832,
|
||
|
|
"step": 520,
|
||
|
|
"valid_targets_mean": 5020.6,
|
||
|
|
"valid_targets_min": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7978723404255319,
|
||
|
|
"grad_norm": 0.5231558404449659,
|
||
|
|
"learning_rate": 3.9977204492910744e-05,
|
||
|
|
"loss": 0.233,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22653838992118835,
|
||
|
|
"step": 525,
|
||
|
|
"valid_targets_mean": 4575.4,
|
||
|
|
"valid_targets_min": 565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8054711246200608,
|
||
|
|
"grad_norm": 0.5430934870471972,
|
||
|
|
"learning_rate": 3.997344340648822e-05,
|
||
|
|
"loss": 0.2358,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22603115439414978,
|
||
|
|
"step": 530,
|
||
|
|
"valid_targets_mean": 4679.3,
|
||
|
|
"valid_targets_min": 937
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8130699088145896,
|
||
|
|
"grad_norm": 0.5286884499015065,
|
||
|
|
"learning_rate": 3.996939547782399e-05,
|
||
|
|
"loss": 0.2352,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22073689103126526,
|
||
|
|
"step": 535,
|
||
|
|
"valid_targets_mean": 4476.1,
|
||
|
|
"valid_targets_min": 718
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8206686930091185,
|
||
|
|
"grad_norm": 0.5785998410503295,
|
||
|
|
"learning_rate": 3.996506076505109e-05,
|
||
|
|
"loss": 0.2497,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2660371661186218,
|
||
|
|
"step": 540,
|
||
|
|
"valid_targets_mean": 4106.3,
|
||
|
|
"valid_targets_min": 487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8282674772036475,
|
||
|
|
"grad_norm": 0.6951317726718936,
|
||
|
|
"learning_rate": 3.996043933042112e-05,
|
||
|
|
"loss": 0.2367,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24442562460899353,
|
||
|
|
"step": 545,
|
||
|
|
"valid_targets_mean": 4844.5,
|
||
|
|
"valid_targets_min": 834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8358662613981763,
|
||
|
|
"grad_norm": 0.43506966442749545,
|
||
|
|
"learning_rate": 3.995553124030334e-05,
|
||
|
|
"loss": 0.2214,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20776590704917908,
|
||
|
|
"step": 550,
|
||
|
|
"valid_targets_mean": 4980.6,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8434650455927052,
|
||
|
|
"grad_norm": 0.4706049122050459,
|
||
|
|
"learning_rate": 3.9950336565183725e-05,
|
||
|
|
"loss": 0.2308,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.27285271883010864,
|
||
|
|
"step": 555,
|
||
|
|
"valid_targets_mean": 5395.9,
|
||
|
|
"valid_targets_min": 990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.851063829787234,
|
||
|
|
"grad_norm": 0.49091088519408277,
|
||
|
|
"learning_rate": 3.994485537966394e-05,
|
||
|
|
"loss": 0.221,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2088671624660492,
|
||
|
|
"step": 560,
|
||
|
|
"valid_targets_mean": 4588.9,
|
||
|
|
"valid_targets_min": 930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8586626139817629,
|
||
|
|
"grad_norm": 0.5986083125056836,
|
||
|
|
"learning_rate": 3.993908776246029e-05,
|
||
|
|
"loss": 0.2397,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24598953127861023,
|
||
|
|
"step": 565,
|
||
|
|
"valid_targets_mean": 3864.4,
|
||
|
|
"valid_targets_min": 694
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8662613981762918,
|
||
|
|
"grad_norm": 0.5161879038146385,
|
||
|
|
"learning_rate": 3.993303379640256e-05,
|
||
|
|
"loss": 0.2433,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24257272481918335,
|
||
|
|
"step": 570,
|
||
|
|
"valid_targets_mean": 3828.1,
|
||
|
|
"valid_targets_min": 714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8738601823708206,
|
||
|
|
"grad_norm": 0.4991892963677317,
|
||
|
|
"learning_rate": 3.992669356843287e-05,
|
||
|
|
"loss": 0.218,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2508493661880493,
|
||
|
|
"step": 575,
|
||
|
|
"valid_targets_mean": 4322.0,
|
||
|
|
"valid_targets_min": 1183
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8814589665653495,
|
||
|
|
"grad_norm": 0.46377204664593574,
|
||
|
|
"learning_rate": 3.992006716960437e-05,
|
||
|
|
"loss": 0.2293,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2126588225364685,
|
||
|
|
"step": 580,
|
||
|
|
"valid_targets_mean": 4726.4,
|
||
|
|
"valid_targets_min": 771
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8890577507598785,
|
||
|
|
"grad_norm": 0.4484638597015378,
|
||
|
|
"learning_rate": 3.9913154695079983e-05,
|
||
|
|
"loss": 0.2479,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2277049422264099,
|
||
|
|
"step": 585,
|
||
|
|
"valid_targets_mean": 5441.6,
|
||
|
|
"valid_targets_min": 672
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8966565349544073,
|
||
|
|
"grad_norm": 0.473975742493428,
|
||
|
|
"learning_rate": 3.9905956244131e-05,
|
||
|
|
"loss": 0.2406,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2310577630996704,
|
||
|
|
"step": 590,
|
||
|
|
"valid_targets_mean": 4966.7,
|
||
|
|
"valid_targets_min": 171
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9042553191489362,
|
||
|
|
"grad_norm": 0.5269660383414424,
|
||
|
|
"learning_rate": 3.989847192013569e-05,
|
||
|
|
"loss": 0.2439,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20536382496356964,
|
||
|
|
"step": 595,
|
||
|
|
"valid_targets_mean": 4305.1,
|
||
|
|
"valid_targets_min": 818
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9118541033434651,
|
||
|
|
"grad_norm": 0.5290469023785411,
|
||
|
|
"learning_rate": 3.9890701830577784e-05,
|
||
|
|
"loss": 0.2519,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19890496134757996,
|
||
|
|
"step": 600,
|
||
|
|
"valid_targets_mean": 3980.8,
|
||
|
|
"valid_targets_min": 911
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9194528875379939,
|
||
|
|
"grad_norm": 0.44803784041894573,
|
||
|
|
"learning_rate": 3.9882646087044944e-05,
|
||
|
|
"loss": 0.2219,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2399585247039795,
|
||
|
|
"step": 605,
|
||
|
|
"valid_targets_mean": 5333.2,
|
||
|
|
"valid_targets_min": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9270516717325228,
|
||
|
|
"grad_norm": 0.4328101297576573,
|
||
|
|
"learning_rate": 3.987430480522717e-05,
|
||
|
|
"loss": 0.2227,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21868915855884552,
|
||
|
|
"step": 610,
|
||
|
|
"valid_targets_mean": 5678.4,
|
||
|
|
"valid_targets_min": 1871
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9346504559270516,
|
||
|
|
"grad_norm": 0.49304307903824257,
|
||
|
|
"learning_rate": 3.986567810491511e-05,
|
||
|
|
"loss": 0.2557,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2420819252729416,
|
||
|
|
"step": 615,
|
||
|
|
"valid_targets_mean": 4536.2,
|
||
|
|
"valid_targets_min": 809
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9422492401215805,
|
||
|
|
"grad_norm": 0.5700813425381566,
|
||
|
|
"learning_rate": 3.9856766109998376e-05,
|
||
|
|
"loss": 0.2407,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21311193704605103,
|
||
|
|
"step": 620,
|
||
|
|
"valid_targets_mean": 4451.4,
|
||
|
|
"valid_targets_min": 593
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9498480243161094,
|
||
|
|
"grad_norm": 0.6294683305416847,
|
||
|
|
"learning_rate": 3.9847568948463754e-05,
|
||
|
|
"loss": 0.2508,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20918530225753784,
|
||
|
|
"step": 625,
|
||
|
|
"valid_targets_mean": 4797.4,
|
||
|
|
"valid_targets_min": 847
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9574468085106383,
|
||
|
|
"grad_norm": 0.49380029688075006,
|
||
|
|
"learning_rate": 3.983808675239333e-05,
|
||
|
|
"loss": 0.2399,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2152310311794281,
|
||
|
|
"step": 630,
|
||
|
|
"valid_targets_mean": 4176.9,
|
||
|
|
"valid_targets_min": 692
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9650455927051672,
|
||
|
|
"grad_norm": 0.5200102205973911,
|
||
|
|
"learning_rate": 3.9828319657962655e-05,
|
||
|
|
"loss": 0.2211,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2529393136501312,
|
||
|
|
"step": 635,
|
||
|
|
"valid_targets_mean": 4521.0,
|
||
|
|
"valid_targets_min": 1069
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9726443768996961,
|
||
|
|
"grad_norm": 0.5009014898507517,
|
||
|
|
"learning_rate": 3.981826780543873e-05,
|
||
|
|
"loss": 0.2398,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2124088704586029,
|
||
|
|
"step": 640,
|
||
|
|
"valid_targets_mean": 4639.8,
|
||
|
|
"valid_targets_min": 268
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9802431610942249,
|
||
|
|
"grad_norm": 0.47854286438275906,
|
||
|
|
"learning_rate": 3.980793133917805e-05,
|
||
|
|
"loss": 0.2284,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22133532166481018,
|
||
|
|
"step": 645,
|
||
|
|
"valid_targets_mean": 4939.1,
|
||
|
|
"valid_targets_min": 1059
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9878419452887538,
|
||
|
|
"grad_norm": 0.44229863697562477,
|
||
|
|
"learning_rate": 3.979731040762446e-05,
|
||
|
|
"loss": 0.2596,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20959582924842834,
|
||
|
|
"step": 650,
|
||
|
|
"valid_targets_mean": 5179.6,
|
||
|
|
"valid_targets_min": 805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9954407294832827,
|
||
|
|
"grad_norm": 0.4641726376092792,
|
||
|
|
"learning_rate": 3.97864051633071e-05,
|
||
|
|
"loss": 0.2249,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22157493233680725,
|
||
|
|
"step": 655,
|
||
|
|
"valid_targets_mean": 4819.9,
|
||
|
|
"valid_targets_min": 646
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0030395136778116,
|
||
|
|
"grad_norm": 0.7931644284514437,
|
||
|
|
"learning_rate": 3.977521576283815e-05,
|
||
|
|
"loss": 0.2343,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19440427422523499,
|
||
|
|
"step": 660,
|
||
|
|
"valid_targets_mean": 4668.7,
|
||
|
|
"valid_targets_min": 915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0106382978723405,
|
||
|
|
"grad_norm": 0.48788029210277306,
|
||
|
|
"learning_rate": 3.9763742366910626e-05,
|
||
|
|
"loss": 0.206,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1956978291273117,
|
||
|
|
"step": 665,
|
||
|
|
"valid_targets_mean": 4805.8,
|
||
|
|
"valid_targets_min": 694
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0182370820668694,
|
||
|
|
"grad_norm": 0.5298990414713055,
|
||
|
|
"learning_rate": 3.975198514029604e-05,
|
||
|
|
"loss": 0.2228,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2972638010978699,
|
||
|
|
"step": 670,
|
||
|
|
"valid_targets_mean": 4853.2,
|
||
|
|
"valid_targets_min": 512
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0258358662613982,
|
||
|
|
"grad_norm": 0.5473369439399378,
|
||
|
|
"learning_rate": 3.9739944251842054e-05,
|
||
|
|
"loss": 0.2088,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21648220717906952,
|
||
|
|
"step": 675,
|
||
|
|
"valid_targets_mean": 5254.8,
|
||
|
|
"valid_targets_min": 1077
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.033434650455927,
|
||
|
|
"grad_norm": 0.8965405989782883,
|
||
|
|
"learning_rate": 3.9727619874470066e-05,
|
||
|
|
"loss": 0.2229,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24239912629127502,
|
||
|
|
"step": 680,
|
||
|
|
"valid_targets_mean": 4575.9,
|
||
|
|
"valid_targets_min": 847
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.041033434650456,
|
||
|
|
"grad_norm": 0.45053643384905995,
|
||
|
|
"learning_rate": 3.971501218517267e-05,
|
||
|
|
"loss": 0.2202,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20542305707931519,
|
||
|
|
"step": 685,
|
||
|
|
"valid_targets_mean": 4701.5,
|
||
|
|
"valid_targets_min": 359
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0486322188449848,
|
||
|
|
"grad_norm": 0.5355621304269382,
|
||
|
|
"learning_rate": 3.9702121365011194e-05,
|
||
|
|
"loss": 0.2427,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24637873470783234,
|
||
|
|
"step": 690,
|
||
|
|
"valid_targets_mean": 4662.4,
|
||
|
|
"valid_targets_min": 797
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0562310030395137,
|
||
|
|
"grad_norm": 0.46031160112965963,
|
||
|
|
"learning_rate": 3.968894759911304e-05,
|
||
|
|
"loss": 0.2221,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22839926183223724,
|
||
|
|
"step": 695,
|
||
|
|
"valid_targets_mean": 5164.2,
|
||
|
|
"valid_targets_min": 904
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0638297872340425,
|
||
|
|
"grad_norm": 0.4910433091107382,
|
||
|
|
"learning_rate": 3.9675491076669043e-05,
|
||
|
|
"loss": 0.2247,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21850205957889557,
|
||
|
|
"step": 700,
|
||
|
|
"valid_targets_mean": 4950.7,
|
||
|
|
"valid_targets_min": 478
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0714285714285714,
|
||
|
|
"grad_norm": 0.5876241654950701,
|
||
|
|
"learning_rate": 3.966175199093077e-05,
|
||
|
|
"loss": 0.2152,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23394906520843506,
|
||
|
|
"step": 705,
|
||
|
|
"valid_targets_mean": 3169.8,
|
||
|
|
"valid_targets_min": 809
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0790273556231003,
|
||
|
|
"grad_norm": 0.46432326611030017,
|
||
|
|
"learning_rate": 3.9647730539207715e-05,
|
||
|
|
"loss": 0.2216,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2200213074684143,
|
||
|
|
"step": 710,
|
||
|
|
"valid_targets_mean": 5574.2,
|
||
|
|
"valid_targets_min": 2786
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0866261398176291,
|
||
|
|
"grad_norm": 0.4275229879620877,
|
||
|
|
"learning_rate": 3.963342692286449e-05,
|
||
|
|
"loss": 0.2161,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20706182718276978,
|
||
|
|
"step": 715,
|
||
|
|
"valid_targets_mean": 5002.6,
|
||
|
|
"valid_targets_min": 1028
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.094224924012158,
|
||
|
|
"grad_norm": 0.5268786606408785,
|
||
|
|
"learning_rate": 3.9618841347317925e-05,
|
||
|
|
"loss": 0.2099,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20836099982261658,
|
||
|
|
"step": 720,
|
||
|
|
"valid_targets_mean": 3824.6,
|
||
|
|
"valid_targets_min": 834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1018237082066868,
|
||
|
|
"grad_norm": 0.44352266391750467,
|
||
|
|
"learning_rate": 3.9603974022034117e-05,
|
||
|
|
"loss": 0.2158,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1991317719221115,
|
||
|
|
"step": 725,
|
||
|
|
"valid_targets_mean": 5762.8,
|
||
|
|
"valid_targets_min": 2565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1094224924012157,
|
||
|
|
"grad_norm": 0.5059377730602702,
|
||
|
|
"learning_rate": 3.9588825160525406e-05,
|
||
|
|
"loss": 0.2101,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2332250475883484,
|
||
|
|
"step": 730,
|
||
|
|
"valid_targets_mean": 4394.6,
|
||
|
|
"valid_targets_min": 778
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1170212765957448,
|
||
|
|
"grad_norm": 0.5277265421398942,
|
||
|
|
"learning_rate": 3.9573394980347354e-05,
|
||
|
|
"loss": 0.2138,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23748330771923065,
|
||
|
|
"step": 735,
|
||
|
|
"valid_targets_mean": 4523.4,
|
||
|
|
"valid_targets_min": 588
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1246200607902737,
|
||
|
|
"grad_norm": 0.46757311577973365,
|
||
|
|
"learning_rate": 3.9557683703095564e-05,
|
||
|
|
"loss": 0.2074,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20653977990150452,
|
||
|
|
"step": 740,
|
||
|
|
"valid_targets_mean": 5119.2,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1322188449848025,
|
||
|
|
"grad_norm": 0.4423041648030623,
|
||
|
|
"learning_rate": 3.954169155440255e-05,
|
||
|
|
"loss": 0.2234,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2072131335735321,
|
||
|
|
"step": 745,
|
||
|
|
"valid_targets_mean": 4472.0,
|
||
|
|
"valid_targets_min": 271
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1398176291793314,
|
||
|
|
"grad_norm": 0.4428410311750491,
|
||
|
|
"learning_rate": 3.952541876393444e-05,
|
||
|
|
"loss": 0.2206,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23159939050674438,
|
||
|
|
"step": 750,
|
||
|
|
"valid_targets_mean": 4634.8,
|
||
|
|
"valid_targets_min": 810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1474164133738602,
|
||
|
|
"grad_norm": 0.5243183428010766,
|
||
|
|
"learning_rate": 3.9508865565387745e-05,
|
||
|
|
"loss": 0.2137,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22126150131225586,
|
||
|
|
"step": 755,
|
||
|
|
"valid_targets_mean": 4285.5,
|
||
|
|
"valid_targets_min": 702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.155015197568389,
|
||
|
|
"grad_norm": 0.45761304225615707,
|
||
|
|
"learning_rate": 3.949203219648594e-05,
|
||
|
|
"loss": 0.217,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20640668272972107,
|
||
|
|
"step": 760,
|
||
|
|
"valid_targets_mean": 4922.4,
|
||
|
|
"valid_targets_min": 229
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.162613981762918,
|
||
|
|
"grad_norm": 0.4830790302776904,
|
||
|
|
"learning_rate": 3.94749188989761e-05,
|
||
|
|
"loss": 0.2241,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2300214022397995,
|
||
|
|
"step": 765,
|
||
|
|
"valid_targets_mean": 4988.9,
|
||
|
|
"valid_targets_min": 2275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1702127659574468,
|
||
|
|
"grad_norm": 0.6344964312381591,
|
||
|
|
"learning_rate": 3.945752591862538e-05,
|
||
|
|
"loss": 0.2075,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22842663526535034,
|
||
|
|
"step": 770,
|
||
|
|
"valid_targets_mean": 3627.2,
|
||
|
|
"valid_targets_min": 662
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1778115501519757,
|
||
|
|
"grad_norm": 0.48098689360030655,
|
||
|
|
"learning_rate": 3.943985350521753e-05,
|
||
|
|
"loss": 0.2306,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2292681783437729,
|
||
|
|
"step": 775,
|
||
|
|
"valid_targets_mean": 4028.1,
|
||
|
|
"valid_targets_min": 732
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1854103343465046,
|
||
|
|
"grad_norm": 0.5336086809835088,
|
||
|
|
"learning_rate": 3.942190191254928e-05,
|
||
|
|
"loss": 0.2216,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22446206212043762,
|
||
|
|
"step": 780,
|
||
|
|
"valid_targets_mean": 4275.8,
|
||
|
|
"valid_targets_min": 741
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1930091185410334,
|
||
|
|
"grad_norm": 0.613832091134627,
|
||
|
|
"learning_rate": 3.9403671398426705e-05,
|
||
|
|
"loss": 0.2138,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2014659196138382,
|
||
|
|
"step": 785,
|
||
|
|
"valid_targets_mean": 5527.8,
|
||
|
|
"valid_targets_min": 2519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2006079027355623,
|
||
|
|
"grad_norm": 0.7772463802272693,
|
||
|
|
"learning_rate": 3.938516222466153e-05,
|
||
|
|
"loss": 0.2149,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20852729678153992,
|
||
|
|
"step": 790,
|
||
|
|
"valid_targets_mean": 4968.9,
|
||
|
|
"valid_targets_min": 605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2082066869300911,
|
||
|
|
"grad_norm": 0.5158401148303159,
|
||
|
|
"learning_rate": 3.936637465706735e-05,
|
||
|
|
"loss": 0.213,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25038856267929077,
|
||
|
|
"step": 795,
|
||
|
|
"valid_targets_mean": 4784.4,
|
||
|
|
"valid_targets_min": 655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.21580547112462,
|
||
|
|
"grad_norm": 1.031320737042501,
|
||
|
|
"learning_rate": 3.934730896545583e-05,
|
||
|
|
"loss": 0.2212,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21661558747291565,
|
||
|
|
"step": 800,
|
||
|
|
"valid_targets_mean": 3556.4,
|
||
|
|
"valid_targets_min": 653
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2234042553191489,
|
||
|
|
"grad_norm": 0.46779923698778514,
|
||
|
|
"learning_rate": 3.932796542363283e-05,
|
||
|
|
"loss": 0.2113,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19566161930561066,
|
||
|
|
"step": 805,
|
||
|
|
"valid_targets_mean": 4376.0,
|
||
|
|
"valid_targets_min": 810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2310030395136777,
|
||
|
|
"grad_norm": 0.4326481517949445,
|
||
|
|
"learning_rate": 3.930834430939444e-05,
|
||
|
|
"loss": 0.2145,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19606706500053406,
|
||
|
|
"step": 810,
|
||
|
|
"valid_targets_mean": 5616.6,
|
||
|
|
"valid_targets_min": 2732
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2386018237082066,
|
||
|
|
"grad_norm": 0.47029688029877836,
|
||
|
|
"learning_rate": 3.9288445904523063e-05,
|
||
|
|
"loss": 0.219,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1963462084531784,
|
||
|
|
"step": 815,
|
||
|
|
"valid_targets_mean": 4256.6,
|
||
|
|
"valid_targets_min": 1593
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2462006079027357,
|
||
|
|
"grad_norm": 0.4914352971694942,
|
||
|
|
"learning_rate": 3.926827049478329e-05,
|
||
|
|
"loss": 0.2273,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2369862049818039,
|
||
|
|
"step": 820,
|
||
|
|
"valid_targets_mean": 5430.8,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2537993920972643,
|
||
|
|
"grad_norm": 0.4983986981283837,
|
||
|
|
"learning_rate": 3.924781836991783e-05,
|
||
|
|
"loss": 0.2175,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2174890637397766,
|
||
|
|
"step": 825,
|
||
|
|
"valid_targets_mean": 4507.7,
|
||
|
|
"valid_targets_min": 801
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2613981762917934,
|
||
|
|
"grad_norm": 0.4385888971144186,
|
||
|
|
"learning_rate": 3.922708982364337e-05,
|
||
|
|
"loss": 0.2118,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21676486730575562,
|
||
|
|
"step": 830,
|
||
|
|
"valid_targets_mean": 5402.2,
|
||
|
|
"valid_targets_min": 933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2689969604863223,
|
||
|
|
"grad_norm": 0.45249778212242275,
|
||
|
|
"learning_rate": 3.920608515364631e-05,
|
||
|
|
"loss": 0.2245,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20077060163021088,
|
||
|
|
"step": 835,
|
||
|
|
"valid_targets_mean": 4443.8,
|
||
|
|
"valid_targets_min": 491
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2765957446808511,
|
||
|
|
"grad_norm": 0.47777362802657686,
|
||
|
|
"learning_rate": 3.9184804661578535e-05,
|
||
|
|
"loss": 0.2268,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23784896731376648,
|
||
|
|
"step": 840,
|
||
|
|
"valid_targets_mean": 5061.4,
|
||
|
|
"valid_targets_min": 846
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.28419452887538,
|
||
|
|
"grad_norm": 0.4135085697969651,
|
||
|
|
"learning_rate": 3.9163248653053033e-05,
|
||
|
|
"loss": 0.2066,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20635738968849182,
|
||
|
|
"step": 845,
|
||
|
|
"valid_targets_mean": 5489.4,
|
||
|
|
"valid_targets_min": 1148
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2917933130699089,
|
||
|
|
"grad_norm": 0.4117297091921728,
|
||
|
|
"learning_rate": 3.9141417437639566e-05,
|
||
|
|
"loss": 0.2018,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18252739310264587,
|
||
|
|
"step": 850,
|
||
|
|
"valid_targets_mean": 4780.3,
|
||
|
|
"valid_targets_min": 799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2993920972644377,
|
||
|
|
"grad_norm": 0.48551756342329966,
|
||
|
|
"learning_rate": 3.911931132886016e-05,
|
||
|
|
"loss": 0.2228,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24173299968242645,
|
||
|
|
"step": 855,
|
||
|
|
"valid_targets_mean": 5113.2,
|
||
|
|
"valid_targets_min": 756
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3069908814589666,
|
||
|
|
"grad_norm": 0.4727140541170104,
|
||
|
|
"learning_rate": 3.9096930644184674e-05,
|
||
|
|
"loss": 0.1978,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1915874183177948,
|
||
|
|
"step": 860,
|
||
|
|
"valid_targets_mean": 4394.3,
|
||
|
|
"valid_targets_min": 685
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3145896656534954,
|
||
|
|
"grad_norm": 0.48025400940036744,
|
||
|
|
"learning_rate": 3.907427570502616e-05,
|
||
|
|
"loss": 0.2019,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2418954074382782,
|
||
|
|
"step": 865,
|
||
|
|
"valid_targets_mean": 3894.8,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3221884498480243,
|
||
|
|
"grad_norm": 0.4849042187708983,
|
||
|
|
"learning_rate": 3.90513468367363e-05,
|
||
|
|
"loss": 0.2157,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2329632043838501,
|
||
|
|
"step": 870,
|
||
|
|
"valid_targets_mean": 4571.7,
|
||
|
|
"valid_targets_min": 1992
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3297872340425532,
|
||
|
|
"grad_norm": 0.5027508815921192,
|
||
|
|
"learning_rate": 3.9028144368600746e-05,
|
||
|
|
"loss": 0.2072,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19249165058135986,
|
||
|
|
"step": 875,
|
||
|
|
"valid_targets_mean": 4092.1,
|
||
|
|
"valid_targets_min": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.337386018237082,
|
||
|
|
"grad_norm": 0.43340094826535847,
|
||
|
|
"learning_rate": 3.900466863383434e-05,
|
||
|
|
"loss": 0.2074,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16721683740615845,
|
||
|
|
"step": 880,
|
||
|
|
"valid_targets_mean": 4360.1,
|
||
|
|
"valid_targets_min": 678
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.344984802431611,
|
||
|
|
"grad_norm": 0.42492899268845147,
|
||
|
|
"learning_rate": 3.898091996957638e-05,
|
||
|
|
"loss": 0.2218,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18311715126037598,
|
||
|
|
"step": 885,
|
||
|
|
"valid_targets_mean": 5269.4,
|
||
|
|
"valid_targets_min": 2234
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3525835866261398,
|
||
|
|
"grad_norm": 0.45185682893053764,
|
||
|
|
"learning_rate": 3.895689871688571e-05,
|
||
|
|
"loss": 0.2277,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2394876629114151,
|
||
|
|
"step": 890,
|
||
|
|
"valid_targets_mean": 5222.2,
|
||
|
|
"valid_targets_min": 779
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3601823708206686,
|
||
|
|
"grad_norm": 0.4595708686629068,
|
||
|
|
"learning_rate": 3.893260522073591e-05,
|
||
|
|
"loss": 0.2037,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19897980988025665,
|
||
|
|
"step": 895,
|
||
|
|
"valid_targets_mean": 4327.2,
|
||
|
|
"valid_targets_min": 1913
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3677811550151975,
|
||
|
|
"grad_norm": 0.5538183693714539,
|
||
|
|
"learning_rate": 3.8908039830010296e-05,
|
||
|
|
"loss": 0.2085,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21077513694763184,
|
||
|
|
"step": 900,
|
||
|
|
"valid_targets_mean": 4477.2,
|
||
|
|
"valid_targets_min": 1914
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3753799392097266,
|
||
|
|
"grad_norm": 0.4826233268835563,
|
||
|
|
"learning_rate": 3.888320289749687e-05,
|
||
|
|
"loss": 0.2167,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20590105652809143,
|
||
|
|
"step": 905,
|
||
|
|
"valid_targets_mean": 4855.0,
|
||
|
|
"valid_targets_min": 987
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3829787234042552,
|
||
|
|
"grad_norm": 0.45816874548401804,
|
||
|
|
"learning_rate": 3.885809477988334e-05,
|
||
|
|
"loss": 0.2051,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18594498932361603,
|
||
|
|
"step": 910,
|
||
|
|
"valid_targets_mean": 4613.8,
|
||
|
|
"valid_targets_min": 486
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3905775075987843,
|
||
|
|
"grad_norm": 0.46458650367475696,
|
||
|
|
"learning_rate": 3.883271583775194e-05,
|
||
|
|
"loss": 0.2166,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20757240056991577,
|
||
|
|
"step": 915,
|
||
|
|
"valid_targets_mean": 5135.2,
|
||
|
|
"valid_targets_min": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3981762917933132,
|
||
|
|
"grad_norm": 0.49573212835944025,
|
||
|
|
"learning_rate": 3.880706643557425e-05,
|
||
|
|
"loss": 0.2267,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20922252535820007,
|
||
|
|
"step": 920,
|
||
|
|
"valid_targets_mean": 4993.9,
|
||
|
|
"valid_targets_min": 1608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.405775075987842,
|
||
|
|
"grad_norm": 0.5065076653284654,
|
||
|
|
"learning_rate": 3.8781146941705975e-05,
|
||
|
|
"loss": 0.219,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22266633808612823,
|
||
|
|
"step": 925,
|
||
|
|
"valid_targets_mean": 3984.9,
|
||
|
|
"valid_targets_min": 663
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4133738601823709,
|
||
|
|
"grad_norm": 0.46389395621934165,
|
||
|
|
"learning_rate": 3.8754957728381676e-05,
|
||
|
|
"loss": 0.2122,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1952955424785614,
|
||
|
|
"step": 930,
|
||
|
|
"valid_targets_mean": 4305.5,
|
||
|
|
"valid_targets_min": 659
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4209726443768997,
|
||
|
|
"grad_norm": 0.4244364300260148,
|
||
|
|
"learning_rate": 3.87284991717094e-05,
|
||
|
|
"loss": 0.2062,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19909913837909698,
|
||
|
|
"step": 935,
|
||
|
|
"valid_targets_mean": 5106.8,
|
||
|
|
"valid_targets_min": 692
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4285714285714286,
|
||
|
|
"grad_norm": 0.48753574099908936,
|
||
|
|
"learning_rate": 3.870177165166526e-05,
|
||
|
|
"loss": 0.2278,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22329512238502502,
|
||
|
|
"step": 940,
|
||
|
|
"valid_targets_mean": 4227.9,
|
||
|
|
"valid_targets_min": 960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4361702127659575,
|
||
|
|
"grad_norm": 0.46921808961532296,
|
||
|
|
"learning_rate": 3.8674775552088034e-05,
|
||
|
|
"loss": 0.21,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1966555416584015,
|
||
|
|
"step": 945,
|
||
|
|
"valid_targets_mean": 4668.8,
|
||
|
|
"valid_targets_min": 1045
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4437689969604863,
|
||
|
|
"grad_norm": 0.5011462054285303,
|
||
|
|
"learning_rate": 3.864751126067359e-05,
|
||
|
|
"loss": 0.2288,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.3067967891693115,
|
||
|
|
"step": 950,
|
||
|
|
"valid_targets_mean": 5225.8,
|
||
|
|
"valid_targets_min": 982
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4513677811550152,
|
||
|
|
"grad_norm": 0.45640279658712546,
|
||
|
|
"learning_rate": 3.861997916896937e-05,
|
||
|
|
"loss": 0.2035,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20878750085830688,
|
||
|
|
"step": 955,
|
||
|
|
"valid_targets_mean": 4429.0,
|
||
|
|
"valid_targets_min": 401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.458966565349544,
|
||
|
|
"grad_norm": 0.4318101933694944,
|
||
|
|
"learning_rate": 3.859217967236872e-05,
|
||
|
|
"loss": 0.2142,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21009142696857452,
|
||
|
|
"step": 960,
|
||
|
|
"valid_targets_mean": 5187.9,
|
||
|
|
"valid_targets_min": 1245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.466565349544073,
|
||
|
|
"grad_norm": 0.4639537052196582,
|
||
|
|
"learning_rate": 3.856411317010525e-05,
|
||
|
|
"loss": 0.2253,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20182490348815918,
|
||
|
|
"step": 965,
|
||
|
|
"valid_targets_mean": 4228.9,
|
||
|
|
"valid_targets_min": 1395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4741641337386018,
|
||
|
|
"grad_norm": 0.5830750830049942,
|
||
|
|
"learning_rate": 3.853578006524711e-05,
|
||
|
|
"loss": 0.2198,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25057747960090637,
|
||
|
|
"step": 970,
|
||
|
|
"valid_targets_mean": 3692.9,
|
||
|
|
"valid_targets_min": 853
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4817629179331306,
|
||
|
|
"grad_norm": 0.6135846306078634,
|
||
|
|
"learning_rate": 3.8507180764691134e-05,
|
||
|
|
"loss": 0.231,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26475679874420166,
|
||
|
|
"step": 975,
|
||
|
|
"valid_targets_mean": 5166.6,
|
||
|
|
"valid_targets_min": 2214
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4893617021276595,
|
||
|
|
"grad_norm": 0.42930269291397805,
|
||
|
|
"learning_rate": 3.847831567915706e-05,
|
||
|
|
"loss": 0.23,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22688277065753937,
|
||
|
|
"step": 980,
|
||
|
|
"valid_targets_mean": 5970.2,
|
||
|
|
"valid_targets_min": 1687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4969604863221884,
|
||
|
|
"grad_norm": 0.44889401709296317,
|
||
|
|
"learning_rate": 3.844918522318164e-05,
|
||
|
|
"loss": 0.2144,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2054261565208435,
|
||
|
|
"step": 985,
|
||
|
|
"valid_targets_mean": 4887.9,
|
||
|
|
"valid_targets_min": 1082
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5045592705167175,
|
||
|
|
"grad_norm": 0.4849219730059719,
|
||
|
|
"learning_rate": 3.84197898151126e-05,
|
||
|
|
"loss": 0.22,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2193393111228943,
|
||
|
|
"step": 990,
|
||
|
|
"valid_targets_mean": 5518.3,
|
||
|
|
"valid_targets_min": 348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.512158054711246,
|
||
|
|
"grad_norm": 0.47364060795273005,
|
||
|
|
"learning_rate": 3.839012987710275e-05,
|
||
|
|
"loss": 0.208,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2376159131526947,
|
||
|
|
"step": 995,
|
||
|
|
"valid_targets_mean": 4844.3,
|
||
|
|
"valid_targets_min": 873
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5197568389057752,
|
||
|
|
"grad_norm": 0.477005009585305,
|
||
|
|
"learning_rate": 3.836020583510382e-05,
|
||
|
|
"loss": 0.2223,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23394693434238434,
|
||
|
|
"step": 1000,
|
||
|
|
"valid_targets_mean": 4632.9,
|
||
|
|
"valid_targets_min": 763
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5273556231003038,
|
||
|
|
"grad_norm": 0.43901547735782315,
|
||
|
|
"learning_rate": 3.833001811886041e-05,
|
||
|
|
"loss": 0.2131,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2290436327457428,
|
||
|
|
"step": 1005,
|
||
|
|
"valid_targets_mean": 4733.1,
|
||
|
|
"valid_targets_min": 729
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.534954407294833,
|
||
|
|
"grad_norm": 0.4128584010075957,
|
||
|
|
"learning_rate": 3.8299567161903787e-05,
|
||
|
|
"loss": 0.2134,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18785551190376282,
|
||
|
|
"step": 1010,
|
||
|
|
"valid_targets_mean": 5352.2,
|
||
|
|
"valid_targets_min": 2335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5425531914893615,
|
||
|
|
"grad_norm": 0.5109402768329384,
|
||
|
|
"learning_rate": 3.826885340154566e-05,
|
||
|
|
"loss": 0.2272,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25076034665107727,
|
||
|
|
"step": 1015,
|
||
|
|
"valid_targets_mean": 4775.5,
|
||
|
|
"valid_targets_min": 891
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5501519756838906,
|
||
|
|
"grad_norm": 0.4615963241009783,
|
||
|
|
"learning_rate": 3.8237877278871916e-05,
|
||
|
|
"loss": 0.2257,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22021520137786865,
|
||
|
|
"step": 1020,
|
||
|
|
"valid_targets_mean": 4804.6,
|
||
|
|
"valid_targets_min": 1759
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5577507598784195,
|
||
|
|
"grad_norm": 0.41168421067320043,
|
||
|
|
"learning_rate": 3.820663923873626e-05,
|
||
|
|
"loss": 0.2219,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22017160058021545,
|
||
|
|
"step": 1025,
|
||
|
|
"valid_targets_mean": 5034.6,
|
||
|
|
"valid_targets_min": 689
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5653495440729484,
|
||
|
|
"grad_norm": 0.5570301288304671,
|
||
|
|
"learning_rate": 3.817513972975385e-05,
|
||
|
|
"loss": 0.2154,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21190384030342102,
|
||
|
|
"step": 1030,
|
||
|
|
"valid_targets_mean": 3886.7,
|
||
|
|
"valid_targets_min": 816
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5729483282674772,
|
||
|
|
"grad_norm": 0.48710822220429656,
|
||
|
|
"learning_rate": 3.814337920429485e-05,
|
||
|
|
"loss": 0.2128,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21960590779781342,
|
||
|
|
"step": 1035,
|
||
|
|
"valid_targets_mean": 4042.9,
|
||
|
|
"valid_targets_min": 546
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.580547112462006,
|
||
|
|
"grad_norm": 0.4956003916710543,
|
||
|
|
"learning_rate": 3.811135811847792e-05,
|
||
|
|
"loss": 0.2138,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2413313090801239,
|
||
|
|
"step": 1040,
|
||
|
|
"valid_targets_mean": 4299.2,
|
||
|
|
"valid_targets_min": 605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.588145896656535,
|
||
|
|
"grad_norm": 0.5142660653107718,
|
||
|
|
"learning_rate": 3.807907693216368e-05,
|
||
|
|
"loss": 0.2109,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23288393020629883,
|
||
|
|
"step": 1045,
|
||
|
|
"valid_targets_mean": 4768.5,
|
||
|
|
"valid_targets_min": 811
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5957446808510638,
|
||
|
|
"grad_norm": 0.5129928776520155,
|
||
|
|
"learning_rate": 3.804653610894811e-05,
|
||
|
|
"loss": 0.2157,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20630177855491638,
|
||
|
|
"step": 1050,
|
||
|
|
"valid_targets_mean": 3965.7,
|
||
|
|
"valid_targets_min": 1768
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6033434650455927,
|
||
|
|
"grad_norm": 0.4428766734377924,
|
||
|
|
"learning_rate": 3.801373611615585e-05,
|
||
|
|
"loss": 0.218,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2139415144920349,
|
||
|
|
"step": 1055,
|
||
|
|
"valid_targets_mean": 4501.2,
|
||
|
|
"valid_targets_min": 528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6109422492401215,
|
||
|
|
"grad_norm": 0.3976871596351703,
|
||
|
|
"learning_rate": 3.798067742483355e-05,
|
||
|
|
"loss": 0.2055,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1983199268579483,
|
||
|
|
"step": 1060,
|
||
|
|
"valid_targets_mean": 4932.3,
|
||
|
|
"valid_targets_min": 593
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6185410334346506,
|
||
|
|
"grad_norm": 0.43538181451443764,
|
||
|
|
"learning_rate": 3.794736050974308e-05,
|
||
|
|
"loss": 0.2291,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21794393658638,
|
||
|
|
"step": 1065,
|
||
|
|
"valid_targets_mean": 5077.6,
|
||
|
|
"valid_targets_min": 828
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6261398176291793,
|
||
|
|
"grad_norm": 0.41261656921486206,
|
||
|
|
"learning_rate": 3.7913785849354693e-05,
|
||
|
|
"loss": 0.2125,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18821759521961212,
|
||
|
|
"step": 1070,
|
||
|
|
"valid_targets_mean": 4659.0,
|
||
|
|
"valid_targets_min": 776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6337386018237083,
|
||
|
|
"grad_norm": 0.4607656496157841,
|
||
|
|
"learning_rate": 3.787995392584017e-05,
|
||
|
|
"loss": 0.2155,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23070788383483887,
|
||
|
|
"step": 1075,
|
||
|
|
"valid_targets_mean": 5037.2,
|
||
|
|
"valid_targets_min": 767
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.641337386018237,
|
||
|
|
"grad_norm": 0.521744416704874,
|
||
|
|
"learning_rate": 3.784586522506589e-05,
|
||
|
|
"loss": 0.2356,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23326215147972107,
|
||
|
|
"step": 1080,
|
||
|
|
"valid_targets_mean": 4300.6,
|
||
|
|
"valid_targets_min": 752
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.648936170212766,
|
||
|
|
"grad_norm": 0.5411151907665869,
|
||
|
|
"learning_rate": 3.781152023658588e-05,
|
||
|
|
"loss": 0.2076,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19801482558250427,
|
||
|
|
"step": 1085,
|
||
|
|
"valid_targets_mean": 4037.4,
|
||
|
|
"valid_targets_min": 565
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6565349544072947,
|
||
|
|
"grad_norm": 0.4881180236795167,
|
||
|
|
"learning_rate": 3.7776919453634735e-05,
|
||
|
|
"loss": 0.2217,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20285995304584503,
|
||
|
|
"step": 1090,
|
||
|
|
"valid_targets_mean": 4716.8,
|
||
|
|
"valid_targets_min": 1812
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6641337386018238,
|
||
|
|
"grad_norm": 0.3892565204932596,
|
||
|
|
"learning_rate": 3.774206337312058e-05,
|
||
|
|
"loss": 0.2146,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19384154677391052,
|
||
|
|
"step": 1095,
|
||
|
|
"valid_targets_mean": 5548.1,
|
||
|
|
"valid_targets_min": 1723
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6717325227963524,
|
||
|
|
"grad_norm": 0.6079201585046432,
|
||
|
|
"learning_rate": 3.7706952495617895e-05,
|
||
|
|
"loss": 0.2228,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2272331863641739,
|
||
|
|
"step": 1100,
|
||
|
|
"valid_targets_mean": 4440.2,
|
||
|
|
"valid_targets_min": 754
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6793313069908815,
|
||
|
|
"grad_norm": 0.4392880697961998,
|
||
|
|
"learning_rate": 3.767158732536037e-05,
|
||
|
|
"loss": 0.2057,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20622758567333221,
|
||
|
|
"step": 1105,
|
||
|
|
"valid_targets_mean": 5115.8,
|
||
|
|
"valid_targets_min": 2178
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6869300911854104,
|
||
|
|
"grad_norm": 0.4760913157214835,
|
||
|
|
"learning_rate": 3.7635968370233625e-05,
|
||
|
|
"loss": 0.1947,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22895006835460663,
|
||
|
|
"step": 1110,
|
||
|
|
"valid_targets_mean": 4197.4,
|
||
|
|
"valid_targets_min": 664
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6945288753799392,
|
||
|
|
"grad_norm": 0.42921725245111436,
|
||
|
|
"learning_rate": 3.760009614176792e-05,
|
||
|
|
"loss": 0.2092,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20630860328674316,
|
||
|
|
"step": 1115,
|
||
|
|
"valid_targets_mean": 5539.1,
|
||
|
|
"valid_targets_min": 1629
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.702127659574468,
|
||
|
|
"grad_norm": 0.47421657315391796,
|
||
|
|
"learning_rate": 3.7563971155130834e-05,
|
||
|
|
"loss": 0.2088,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19295352697372437,
|
||
|
|
"step": 1120,
|
||
|
|
"valid_targets_mean": 4742.3,
|
||
|
|
"valid_targets_min": 900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.709726443768997,
|
||
|
|
"grad_norm": 0.44277966038489874,
|
||
|
|
"learning_rate": 3.752759392911986e-05,
|
||
|
|
"loss": 0.2121,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21693378686904907,
|
||
|
|
"step": 1125,
|
||
|
|
"valid_targets_mean": 5488.9,
|
||
|
|
"valid_targets_min": 2673
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7173252279635258,
|
||
|
|
"grad_norm": 0.35764926472016434,
|
||
|
|
"learning_rate": 3.7490964986154936e-05,
|
||
|
|
"loss": 0.2056,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1632017195224762,
|
||
|
|
"step": 1130,
|
||
|
|
"valid_targets_mean": 5798.2,
|
||
|
|
"valid_targets_min": 2382
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7249240121580547,
|
||
|
|
"grad_norm": 0.46310293456133733,
|
||
|
|
"learning_rate": 3.745408485227094e-05,
|
||
|
|
"loss": 0.2166,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2125653326511383,
|
||
|
|
"step": 1135,
|
||
|
|
"valid_targets_mean": 4493.4,
|
||
|
|
"valid_targets_min": 703
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7325227963525835,
|
||
|
|
"grad_norm": 0.48402636671915206,
|
||
|
|
"learning_rate": 3.7416954057110165e-05,
|
||
|
|
"loss": 0.2155,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2212226539850235,
|
||
|
|
"step": 1140,
|
||
|
|
"valid_targets_mean": 4891.2,
|
||
|
|
"valid_targets_min": 422
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7401215805471124,
|
||
|
|
"grad_norm": 0.48544415851378125,
|
||
|
|
"learning_rate": 3.7379573133914686e-05,
|
||
|
|
"loss": 0.2204,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24577516317367554,
|
||
|
|
"step": 1145,
|
||
|
|
"valid_targets_mean": 4704.4,
|
||
|
|
"valid_targets_min": 683
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7477203647416415,
|
||
|
|
"grad_norm": 0.4686217061157862,
|
||
|
|
"learning_rate": 3.7341942619518736e-05,
|
||
|
|
"loss": 0.2134,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20010778307914734,
|
||
|
|
"step": 1150,
|
||
|
|
"valid_targets_mean": 4060.2,
|
||
|
|
"valid_targets_min": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7553191489361701,
|
||
|
|
"grad_norm": 0.456474362675113,
|
||
|
|
"learning_rate": 3.730406305434093e-05,
|
||
|
|
"loss": 0.2183,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21529576182365417,
|
||
|
|
"step": 1155,
|
||
|
|
"valid_targets_mean": 4461.8,
|
||
|
|
"valid_targets_min": 407
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7629179331306992,
|
||
|
|
"grad_norm": 0.39386216864446827,
|
||
|
|
"learning_rate": 3.726593498237659e-05,
|
||
|
|
"loss": 0.2173,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20742088556289673,
|
||
|
|
"step": 1160,
|
||
|
|
"valid_targets_mean": 5696.4,
|
||
|
|
"valid_targets_min": 832
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7705167173252279,
|
||
|
|
"grad_norm": 0.4954398039746765,
|
||
|
|
"learning_rate": 3.7227558951189866e-05,
|
||
|
|
"loss": 0.221,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2127082198858261,
|
||
|
|
"step": 1165,
|
||
|
|
"valid_targets_mean": 4003.1,
|
||
|
|
"valid_targets_min": 1097
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.778115501519757,
|
||
|
|
"grad_norm": 0.47012021916770536,
|
||
|
|
"learning_rate": 3.7188935511905895e-05,
|
||
|
|
"loss": 0.2208,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2261389195919037,
|
||
|
|
"step": 1170,
|
||
|
|
"valid_targets_mean": 5014.4,
|
||
|
|
"valid_targets_min": 1036
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7857142857142856,
|
||
|
|
"grad_norm": 0.4746767287862107,
|
||
|
|
"learning_rate": 3.715006521920289e-05,
|
||
|
|
"loss": 0.2202,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19433888792991638,
|
||
|
|
"step": 1175,
|
||
|
|
"valid_targets_mean": 4523.8,
|
||
|
|
"valid_targets_min": 625
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7933130699088147,
|
||
|
|
"grad_norm": 0.4791758218613005,
|
||
|
|
"learning_rate": 3.711094863130417e-05,
|
||
|
|
"loss": 0.2161,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22052708268165588,
|
||
|
|
"step": 1180,
|
||
|
|
"valid_targets_mean": 5126.8,
|
||
|
|
"valid_targets_min": 1164
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8009118541033433,
|
||
|
|
"grad_norm": 0.43857817303439106,
|
||
|
|
"learning_rate": 3.707158630997015e-05,
|
||
|
|
"loss": 0.2186,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21629557013511658,
|
||
|
|
"step": 1185,
|
||
|
|
"valid_targets_mean": 5371.6,
|
||
|
|
"valid_targets_min": 728
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8085106382978724,
|
||
|
|
"grad_norm": 0.5348041335538533,
|
||
|
|
"learning_rate": 3.703197882049026e-05,
|
||
|
|
"loss": 0.21,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2324264943599701,
|
||
|
|
"step": 1190,
|
||
|
|
"valid_targets_mean": 3775.2,
|
||
|
|
"valid_targets_min": 637
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8161094224924013,
|
||
|
|
"grad_norm": 0.3889586019552747,
|
||
|
|
"learning_rate": 3.699212673167484e-05,
|
||
|
|
"loss": 0.2116,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2274361550807953,
|
||
|
|
"step": 1195,
|
||
|
|
"valid_targets_mean": 6306.4,
|
||
|
|
"valid_targets_min": 622
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8237082066869301,
|
||
|
|
"grad_norm": 0.46857624747519244,
|
||
|
|
"learning_rate": 3.695203061584695e-05,
|
||
|
|
"loss": 0.2068,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23175036907196045,
|
||
|
|
"step": 1200,
|
||
|
|
"valid_targets_mean": 4628.2,
|
||
|
|
"valid_targets_min": 429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.831306990881459,
|
||
|
|
"grad_norm": 0.4885718145421046,
|
||
|
|
"learning_rate": 3.69116910488342e-05,
|
||
|
|
"loss": 0.2214,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22779184579849243,
|
||
|
|
"step": 1205,
|
||
|
|
"valid_targets_mean": 4191.6,
|
||
|
|
"valid_targets_min": 156
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8389057750759878,
|
||
|
|
"grad_norm": 0.4639904198544056,
|
||
|
|
"learning_rate": 3.687110860996041e-05,
|
||
|
|
"loss": 0.2239,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2027101218700409,
|
||
|
|
"step": 1210,
|
||
|
|
"valid_targets_mean": 4831.6,
|
||
|
|
"valid_targets_min": 699
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8465045592705167,
|
||
|
|
"grad_norm": 0.46833719588845396,
|
||
|
|
"learning_rate": 3.6830283882037335e-05,
|
||
|
|
"loss": 0.2165,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20325730741024017,
|
||
|
|
"step": 1215,
|
||
|
|
"valid_targets_mean": 5097.8,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8541033434650456,
|
||
|
|
"grad_norm": 0.41463391792421084,
|
||
|
|
"learning_rate": 3.678921745135631e-05,
|
||
|
|
"loss": 0.2038,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20085079967975616,
|
||
|
|
"step": 1220,
|
||
|
|
"valid_targets_mean": 5023.6,
|
||
|
|
"valid_targets_min": 633
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8617021276595744,
|
||
|
|
"grad_norm": 0.44749514129370893,
|
||
|
|
"learning_rate": 3.674790990767979e-05,
|
||
|
|
"loss": 0.2289,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20688676834106445,
|
||
|
|
"step": 1225,
|
||
|
|
"valid_targets_mean": 4845.9,
|
||
|
|
"valid_targets_min": 667
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8693009118541033,
|
||
|
|
"grad_norm": 0.4502221753599573,
|
||
|
|
"learning_rate": 3.670636184423288e-05,
|
||
|
|
"loss": 0.2103,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19071269035339355,
|
||
|
|
"step": 1230,
|
||
|
|
"valid_targets_mean": 4245.0,
|
||
|
|
"valid_targets_min": 500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8768996960486324,
|
||
|
|
"grad_norm": 0.5334681412726286,
|
||
|
|
"learning_rate": 3.666457385769487e-05,
|
||
|
|
"loss": 0.2042,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2248763144016266,
|
||
|
|
"step": 1235,
|
||
|
|
"valid_targets_mean": 4481.5,
|
||
|
|
"valid_targets_min": 770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.884498480243161,
|
||
|
|
"grad_norm": 0.4644916347166466,
|
||
|
|
"learning_rate": 3.66225465481906e-05,
|
||
|
|
"loss": 0.2091,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2305462807416916,
|
||
|
|
"step": 1240,
|
||
|
|
"valid_targets_mean": 4303.9,
|
||
|
|
"valid_targets_min": 630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.89209726443769,
|
||
|
|
"grad_norm": 0.4267832479199372,
|
||
|
|
"learning_rate": 3.658028051928189e-05,
|
||
|
|
"loss": 0.2092,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18598158657550812,
|
||
|
|
"step": 1245,
|
||
|
|
"valid_targets_mean": 4759.4,
|
||
|
|
"valid_targets_min": 965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8996960486322187,
|
||
|
|
"grad_norm": 0.46975601834979414,
|
||
|
|
"learning_rate": 3.6537776377958836e-05,
|
||
|
|
"loss": 0.2302,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2438843995332718,
|
||
|
|
"step": 1250,
|
||
|
|
"valid_targets_mean": 4358.6,
|
||
|
|
"valid_targets_min": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9072948328267478,
|
||
|
|
"grad_norm": 0.41467959071923455,
|
||
|
|
"learning_rate": 3.649503473463112e-05,
|
||
|
|
"loss": 0.1974,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18656185269355774,
|
||
|
|
"step": 1255,
|
||
|
|
"valid_targets_mean": 4789.7,
|
||
|
|
"valid_targets_min": 622
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9148936170212765,
|
||
|
|
"grad_norm": 0.48720410950048604,
|
||
|
|
"learning_rate": 3.645205620311923e-05,
|
||
|
|
"loss": 0.1892,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2179066389799118,
|
||
|
|
"step": 1260,
|
||
|
|
"valid_targets_mean": 4316.3,
|
||
|
|
"valid_targets_min": 718
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9224924012158056,
|
||
|
|
"grad_norm": 0.47737074347118913,
|
||
|
|
"learning_rate": 3.6408841400645644e-05,
|
||
|
|
"loss": 0.2311,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20159170031547546,
|
||
|
|
"step": 1265,
|
||
|
|
"valid_targets_mean": 4676.4,
|
||
|
|
"valid_targets_min": 920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9300911854103342,
|
||
|
|
"grad_norm": 0.38561599683027503,
|
||
|
|
"learning_rate": 3.636539094782598e-05,
|
||
|
|
"loss": 0.2151,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18330830335617065,
|
||
|
|
"step": 1270,
|
||
|
|
"valid_targets_mean": 4813.8,
|
||
|
|
"valid_targets_min": 811
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9376899696048633,
|
||
|
|
"grad_norm": 0.4850605122437156,
|
||
|
|
"learning_rate": 3.632170546866007e-05,
|
||
|
|
"loss": 0.2168,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22532254457473755,
|
||
|
|
"step": 1275,
|
||
|
|
"valid_targets_mean": 3973.6,
|
||
|
|
"valid_targets_min": 335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9452887537993921,
|
||
|
|
"grad_norm": 0.43992666790664475,
|
||
|
|
"learning_rate": 3.6277785590523e-05,
|
||
|
|
"loss": 0.2201,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24039697647094727,
|
||
|
|
"step": 1280,
|
||
|
|
"valid_targets_mean": 5707.9,
|
||
|
|
"valid_targets_min": 511
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.952887537993921,
|
||
|
|
"grad_norm": 3.9664174496666083,
|
||
|
|
"learning_rate": 3.623363194415609e-05,
|
||
|
|
"loss": 0.214,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21847763657569885,
|
||
|
|
"step": 1285,
|
||
|
|
"valid_targets_mean": 4531.5,
|
||
|
|
"valid_targets_min": 991
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9604863221884499,
|
||
|
|
"grad_norm": 0.4595577415716105,
|
||
|
|
"learning_rate": 3.618924516365788e-05,
|
||
|
|
"loss": 0.2042,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20071235299110413,
|
||
|
|
"step": 1290,
|
||
|
|
"valid_targets_mean": 4902.6,
|
||
|
|
"valid_targets_min": 777
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9680851063829787,
|
||
|
|
"grad_norm": 0.44567806699122325,
|
||
|
|
"learning_rate": 3.614462588647495e-05,
|
||
|
|
"loss": 0.2243,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.24957430362701416,
|
||
|
|
"step": 1295,
|
||
|
|
"valid_targets_mean": 4669.9,
|
||
|
|
"valid_targets_min": 1479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9756838905775076,
|
||
|
|
"grad_norm": 0.42210130028239157,
|
||
|
|
"learning_rate": 3.609977475339284e-05,
|
||
|
|
"loss": 0.2064,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2057563215494156,
|
||
|
|
"step": 1300,
|
||
|
|
"valid_targets_mean": 5229.6,
|
||
|
|
"valid_targets_min": 969
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9832826747720365,
|
||
|
|
"grad_norm": 0.48785866462041777,
|
||
|
|
"learning_rate": 3.6054692408526806e-05,
|
||
|
|
"loss": 0.1967,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2031518667936325,
|
||
|
|
"step": 1305,
|
||
|
|
"valid_targets_mean": 3585.4,
|
||
|
|
"valid_targets_min": 843
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9908814589665653,
|
||
|
|
"grad_norm": 0.4191280920943274,
|
||
|
|
"learning_rate": 3.6009379499312563e-05,
|
||
|
|
"loss": 0.2142,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21392418444156647,
|
||
|
|
"step": 1310,
|
||
|
|
"valid_targets_mean": 5448.1,
|
||
|
|
"valid_targets_min": 2162
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9984802431610942,
|
||
|
|
"grad_norm": 0.571865229628622,
|
||
|
|
"learning_rate": 3.5963836676497034e-05,
|
||
|
|
"loss": 0.2043,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21553698182106018,
|
||
|
|
"step": 1315,
|
||
|
|
"valid_targets_mean": 3434.1,
|
||
|
|
"valid_targets_min": 460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0060790273556233,
|
||
|
|
"grad_norm": 0.4580819817800281,
|
||
|
|
"learning_rate": 3.5918064594128946e-05,
|
||
|
|
"loss": 0.1941,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18602877855300903,
|
||
|
|
"step": 1320,
|
||
|
|
"valid_targets_mean": 4337.8,
|
||
|
|
"valid_targets_min": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.013677811550152,
|
||
|
|
"grad_norm": 0.4308848554547568,
|
||
|
|
"learning_rate": 3.5872063909549465e-05,
|
||
|
|
"loss": 0.2003,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1809544563293457,
|
||
|
|
"step": 1325,
|
||
|
|
"valid_targets_mean": 4842.1,
|
||
|
|
"valid_targets_min": 305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.021276595744681,
|
||
|
|
"grad_norm": 0.4304570423447681,
|
||
|
|
"learning_rate": 3.5825835283382754e-05,
|
||
|
|
"loss": 0.1893,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17694029211997986,
|
||
|
|
"step": 1330,
|
||
|
|
"valid_targets_mean": 5139.7,
|
||
|
|
"valid_targets_min": 1709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0288753799392096,
|
||
|
|
"grad_norm": 0.4201481077802898,
|
||
|
|
"learning_rate": 3.5779379379526516e-05,
|
||
|
|
"loss": 0.1944,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17606091499328613,
|
||
|
|
"step": 1335,
|
||
|
|
"valid_targets_mean": 5894.2,
|
||
|
|
"valid_targets_min": 348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0364741641337387,
|
||
|
|
"grad_norm": 0.43796821938518954,
|
||
|
|
"learning_rate": 3.57326968651424e-05,
|
||
|
|
"loss": 0.1873,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19579800963401794,
|
||
|
|
"step": 1340,
|
||
|
|
"valid_targets_mean": 4780.1,
|
||
|
|
"valid_targets_min": 1077
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0440729483282674,
|
||
|
|
"grad_norm": 0.431578307043594,
|
||
|
|
"learning_rate": 3.5685788410646455e-05,
|
||
|
|
"loss": 0.1859,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17831829190254211,
|
||
|
|
"step": 1345,
|
||
|
|
"valid_targets_mean": 4907.6,
|
||
|
|
"valid_targets_min": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0516717325227964,
|
||
|
|
"grad_norm": 0.4188648321222995,
|
||
|
|
"learning_rate": 3.5638654689699493e-05,
|
||
|
|
"loss": 0.1895,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15893448889255524,
|
||
|
|
"step": 1350,
|
||
|
|
"valid_targets_mean": 5083.1,
|
||
|
|
"valid_targets_min": 1164
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.059270516717325,
|
||
|
|
"grad_norm": 0.4452501919855169,
|
||
|
|
"learning_rate": 3.559129637919744e-05,
|
||
|
|
"loss": 0.194,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19416233897209167,
|
||
|
|
"step": 1355,
|
||
|
|
"valid_targets_mean": 4871.6,
|
||
|
|
"valid_targets_min": 1124
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.066869300911854,
|
||
|
|
"grad_norm": 0.5486933381235863,
|
||
|
|
"learning_rate": 3.5543714159261576e-05,
|
||
|
|
"loss": 0.1991,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21765755116939545,
|
||
|
|
"step": 1360,
|
||
|
|
"valid_targets_mean": 4549.6,
|
||
|
|
"valid_targets_min": 756
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.074468085106383,
|
||
|
|
"grad_norm": 0.46291383560602983,
|
||
|
|
"learning_rate": 3.5495908713228774e-05,
|
||
|
|
"loss": 0.1955,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22245173156261444,
|
||
|
|
"step": 1365,
|
||
|
|
"valid_targets_mean": 4870.3,
|
||
|
|
"valid_targets_min": 829
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.082066869300912,
|
||
|
|
"grad_norm": 0.43346588262932606,
|
||
|
|
"learning_rate": 3.544788072764173e-05,
|
||
|
|
"loss": 0.1822,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16855968534946442,
|
||
|
|
"step": 1370,
|
||
|
|
"valid_targets_mean": 5086.5,
|
||
|
|
"valid_targets_min": 1568
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0896656534954405,
|
||
|
|
"grad_norm": 0.46753186284834775,
|
||
|
|
"learning_rate": 3.5399630892239036e-05,
|
||
|
|
"loss": 0.1951,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17375516891479492,
|
||
|
|
"step": 1375,
|
||
|
|
"valid_targets_mean": 4855.5,
|
||
|
|
"valid_targets_min": 1441
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0972644376899696,
|
||
|
|
"grad_norm": 0.5417497219166529,
|
||
|
|
"learning_rate": 3.535115989994533e-05,
|
||
|
|
"loss": 0.2062,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22097179293632507,
|
||
|
|
"step": 1380,
|
||
|
|
"valid_targets_mean": 4113.6,
|
||
|
|
"valid_targets_min": 608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1048632218844983,
|
||
|
|
"grad_norm": 0.5031450181476247,
|
||
|
|
"learning_rate": 3.530246844686133e-05,
|
||
|
|
"loss": 0.1948,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20294921100139618,
|
||
|
|
"step": 1385,
|
||
|
|
"valid_targets_mean": 4546.1,
|
||
|
|
"valid_targets_min": 610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1124620060790273,
|
||
|
|
"grad_norm": 0.4352408641314383,
|
||
|
|
"learning_rate": 3.5253557232253805e-05,
|
||
|
|
"loss": 0.2062,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18328158557415009,
|
||
|
|
"step": 1390,
|
||
|
|
"valid_targets_mean": 4523.4,
|
||
|
|
"valid_targets_min": 317
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1200607902735564,
|
||
|
|
"grad_norm": 0.4965079663934899,
|
||
|
|
"learning_rate": 3.520442695854558e-05,
|
||
|
|
"loss": 0.1903,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19759127497673035,
|
||
|
|
"step": 1395,
|
||
|
|
"valid_targets_mean": 4122.6,
|
||
|
|
"valid_targets_min": 651
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.127659574468085,
|
||
|
|
"grad_norm": 0.4372048480171316,
|
||
|
|
"learning_rate": 3.515507833130543e-05,
|
||
|
|
"loss": 0.2099,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.260989785194397,
|
||
|
|
"step": 1400,
|
||
|
|
"valid_targets_mean": 5767.1,
|
||
|
|
"valid_targets_min": 647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.135258358662614,
|
||
|
|
"grad_norm": 0.4432824108788825,
|
||
|
|
"learning_rate": 3.510551205923793e-05,
|
||
|
|
"loss": 0.2024,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1798432469367981,
|
||
|
|
"step": 1405,
|
||
|
|
"valid_targets_mean": 4649.9,
|
||
|
|
"valid_targets_min": 887
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.142857142857143,
|
||
|
|
"grad_norm": 0.47056895174749847,
|
||
|
|
"learning_rate": 3.50557288541733e-05,
|
||
|
|
"loss": 0.2022,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28239643573760986,
|
||
|
|
"step": 1410,
|
||
|
|
"valid_targets_mean": 5144.9,
|
||
|
|
"valid_targets_min": 668
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.150455927051672,
|
||
|
|
"grad_norm": 0.48033852287738354,
|
||
|
|
"learning_rate": 3.5005729431057176e-05,
|
||
|
|
"loss": 0.1882,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18898963928222656,
|
||
|
|
"step": 1415,
|
||
|
|
"valid_targets_mean": 4188.7,
|
||
|
|
"valid_targets_min": 642
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1580547112462005,
|
||
|
|
"grad_norm": 0.44907430023415723,
|
||
|
|
"learning_rate": 3.4955514507940335e-05,
|
||
|
|
"loss": 0.1932,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21947622299194336,
|
||
|
|
"step": 1420,
|
||
|
|
"valid_targets_mean": 4689.1,
|
||
|
|
"valid_targets_min": 811
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1656534954407296,
|
||
|
|
"grad_norm": 0.48987352731998796,
|
||
|
|
"learning_rate": 3.490508480596839e-05,
|
||
|
|
"loss": 0.2001,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23712974786758423,
|
||
|
|
"step": 1425,
|
||
|
|
"valid_targets_mean": 4526.9,
|
||
|
|
"valid_targets_min": 1906
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1732522796352582,
|
||
|
|
"grad_norm": 0.4635255839622089,
|
||
|
|
"learning_rate": 3.485444104937144e-05,
|
||
|
|
"loss": 0.188,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19113010168075562,
|
||
|
|
"step": 1430,
|
||
|
|
"valid_targets_mean": 5611.0,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1808510638297873,
|
||
|
|
"grad_norm": 0.5259676202088338,
|
||
|
|
"learning_rate": 3.4803583965453635e-05,
|
||
|
|
"loss": 0.1946,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1842001974582672,
|
||
|
|
"step": 1435,
|
||
|
|
"valid_targets_mean": 5102.6,
|
||
|
|
"valid_targets_min": 2495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.188449848024316,
|
||
|
|
"grad_norm": 0.44606267698952673,
|
||
|
|
"learning_rate": 3.475251428458281e-05,
|
||
|
|
"loss": 0.1778,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16230902075767517,
|
||
|
|
"step": 1440,
|
||
|
|
"valid_targets_mean": 4659.9,
|
||
|
|
"valid_targets_min": 559
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.196048632218845,
|
||
|
|
"grad_norm": 0.4688428145128331,
|
||
|
|
"learning_rate": 3.4701232740179876e-05,
|
||
|
|
"loss": 0.19,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20408767461776733,
|
||
|
|
"step": 1445,
|
||
|
|
"valid_targets_mean": 4121.8,
|
||
|
|
"valid_targets_min": 589
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2036474164133737,
|
||
|
|
"grad_norm": 0.5991278401653526,
|
||
|
|
"learning_rate": 3.464974006870841e-05,
|
||
|
|
"loss": 0.1828,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1730174869298935,
|
||
|
|
"step": 1450,
|
||
|
|
"valid_targets_mean": 4380.2,
|
||
|
|
"valid_targets_min": 872
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.211246200607903,
|
||
|
|
"grad_norm": 0.5317059372292597,
|
||
|
|
"learning_rate": 3.4598037009664e-05,
|
||
|
|
"loss": 0.188,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21209636330604553,
|
||
|
|
"step": 1455,
|
||
|
|
"valid_targets_mean": 4217.3,
|
||
|
|
"valid_targets_min": 576
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2188449848024314,
|
||
|
|
"grad_norm": 0.430168348759962,
|
||
|
|
"learning_rate": 3.454612430556365e-05,
|
||
|
|
"loss": 0.1926,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1825931817293167,
|
||
|
|
"step": 1460,
|
||
|
|
"valid_targets_mean": 5261.9,
|
||
|
|
"valid_targets_min": 605
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2264437689969605,
|
||
|
|
"grad_norm": 0.3749747542503873,
|
||
|
|
"learning_rate": 3.44940027019351e-05,
|
||
|
|
"loss": 0.1878,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17250074446201324,
|
||
|
|
"step": 1465,
|
||
|
|
"valid_targets_mean": 5484.4,
|
||
|
|
"valid_targets_min": 702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2340425531914896,
|
||
|
|
"grad_norm": 0.4590123136778495,
|
||
|
|
"learning_rate": 3.444167294730617e-05,
|
||
|
|
"loss": 0.205,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21813246607780457,
|
||
|
|
"step": 1470,
|
||
|
|
"valid_targets_mean": 5121.4,
|
||
|
|
"valid_targets_min": 306
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2416413373860182,
|
||
|
|
"grad_norm": 0.49931169623759,
|
||
|
|
"learning_rate": 3.4389135793193935e-05,
|
||
|
|
"loss": 0.196,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.218278706073761,
|
||
|
|
"step": 1475,
|
||
|
|
"valid_targets_mean": 3389.2,
|
||
|
|
"valid_targets_min": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2492401215805473,
|
||
|
|
"grad_norm": 0.44543315364919134,
|
||
|
|
"learning_rate": 3.4336391994094e-05,
|
||
|
|
"loss": 0.1931,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18760555982589722,
|
||
|
|
"step": 1480,
|
||
|
|
"valid_targets_mean": 4307.0,
|
||
|
|
"valid_targets_min": 666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.256838905775076,
|
||
|
|
"grad_norm": 0.4886663621134491,
|
||
|
|
"learning_rate": 3.4283442307469625e-05,
|
||
|
|
"loss": 0.1915,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2062349021434784,
|
||
|
|
"step": 1485,
|
||
|
|
"valid_targets_mean": 4356.3,
|
||
|
|
"valid_targets_min": 766
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.264437689969605,
|
||
|
|
"grad_norm": 0.4376731027831436,
|
||
|
|
"learning_rate": 3.423028749374086e-05,
|
||
|
|
"loss": 0.1772,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1697554588317871,
|
||
|
|
"step": 1490,
|
||
|
|
"valid_targets_mean": 4621.5,
|
||
|
|
"valid_targets_min": 626
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2720364741641337,
|
||
|
|
"grad_norm": 0.44145953072536115,
|
||
|
|
"learning_rate": 3.417692831627361e-05,
|
||
|
|
"loss": 0.1953,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17671933770179749,
|
||
|
|
"step": 1495,
|
||
|
|
"valid_targets_mean": 4243.6,
|
||
|
|
"valid_targets_min": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2796352583586628,
|
||
|
|
"grad_norm": 0.46231753746429277,
|
||
|
|
"learning_rate": 3.412336554136871e-05,
|
||
|
|
"loss": 0.189,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.217167928814888,
|
||
|
|
"step": 1500,
|
||
|
|
"valid_targets_mean": 4889.9,
|
||
|
|
"valid_targets_min": 940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2872340425531914,
|
||
|
|
"grad_norm": 0.41849411158618643,
|
||
|
|
"learning_rate": 3.406959993825088e-05,
|
||
|
|
"loss": 0.1879,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1857946813106537,
|
||
|
|
"step": 1505,
|
||
|
|
"valid_targets_mean": 4538.6,
|
||
|
|
"valid_targets_min": 1136
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2948328267477205,
|
||
|
|
"grad_norm": 0.42440454933579597,
|
||
|
|
"learning_rate": 3.4015632279057675e-05,
|
||
|
|
"loss": 0.1913,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19058769941329956,
|
||
|
|
"step": 1510,
|
||
|
|
"valid_targets_mean": 5174.3,
|
||
|
|
"valid_targets_min": 2350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.302431610942249,
|
||
|
|
"grad_norm": 0.4531043322688376,
|
||
|
|
"learning_rate": 3.396146333882846e-05,
|
||
|
|
"loss": 0.2017,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.207040473818779,
|
||
|
|
"step": 1515,
|
||
|
|
"valid_targets_mean": 5085.9,
|
||
|
|
"valid_targets_min": 1122
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.310030395136778,
|
||
|
|
"grad_norm": 0.43663796857915893,
|
||
|
|
"learning_rate": 3.3907093895493186e-05,
|
||
|
|
"loss": 0.2011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19464045763015747,
|
||
|
|
"step": 1520,
|
||
|
|
"valid_targets_mean": 4950.9,
|
||
|
|
"valid_targets_min": 709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.317629179331307,
|
||
|
|
"grad_norm": 0.42579767171142163,
|
||
|
|
"learning_rate": 3.385252472986129e-05,
|
||
|
|
"loss": 0.1882,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17376163601875305,
|
||
|
|
"step": 1525,
|
||
|
|
"valid_targets_mean": 5796.6,
|
||
|
|
"valid_targets_min": 829
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.325227963525836,
|
||
|
|
"grad_norm": 0.4254323477143335,
|
||
|
|
"learning_rate": 3.379775662561045e-05,
|
||
|
|
"loss": 0.2029,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20658773183822632,
|
||
|
|
"step": 1530,
|
||
|
|
"valid_targets_mean": 4603.7,
|
||
|
|
"valid_targets_min": 937
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3328267477203646,
|
||
|
|
"grad_norm": 0.40841513731217177,
|
||
|
|
"learning_rate": 3.374279036927535e-05,
|
||
|
|
"loss": 0.1922,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18336258828639984,
|
||
|
|
"step": 1535,
|
||
|
|
"valid_targets_mean": 4597.2,
|
||
|
|
"valid_targets_min": 376
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3404255319148937,
|
||
|
|
"grad_norm": 0.40660499202393396,
|
||
|
|
"learning_rate": 3.368762675023635e-05,
|
||
|
|
"loss": 0.1931,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17899194359779358,
|
||
|
|
"step": 1540,
|
||
|
|
"valid_targets_mean": 5368.1,
|
||
|
|
"valid_targets_min": 748
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3480243161094223,
|
||
|
|
"grad_norm": 0.4869497094603196,
|
||
|
|
"learning_rate": 3.363226656070819e-05,
|
||
|
|
"loss": 0.1872,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20963457226753235,
|
||
|
|
"step": 1545,
|
||
|
|
"valid_targets_mean": 4233.9,
|
||
|
|
"valid_targets_min": 799
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3556231003039514,
|
||
|
|
"grad_norm": 0.42891641992075347,
|
||
|
|
"learning_rate": 3.3576710595728586e-05,
|
||
|
|
"loss": 0.2024,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18602633476257324,
|
||
|
|
"step": 1550,
|
||
|
|
"valid_targets_mean": 4363.8,
|
||
|
|
"valid_targets_min": 156
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.36322188449848,
|
||
|
|
"grad_norm": 0.4784395146562949,
|
||
|
|
"learning_rate": 3.352095965314682e-05,
|
||
|
|
"loss": 0.1974,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20554772019386292,
|
||
|
|
"step": 1555,
|
||
|
|
"valid_targets_mean": 4412.3,
|
||
|
|
"valid_targets_min": 732
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.370820668693009,
|
||
|
|
"grad_norm": 0.3940116430034305,
|
||
|
|
"learning_rate": 3.3465014533612295e-05,
|
||
|
|
"loss": 0.1797,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16700813174247742,
|
||
|
|
"step": 1560,
|
||
|
|
"valid_targets_mean": 4859.4,
|
||
|
|
"valid_targets_min": 2115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.378419452887538,
|
||
|
|
"grad_norm": 0.4053824724130465,
|
||
|
|
"learning_rate": 3.340887604056301e-05,
|
||
|
|
"loss": 0.1959,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19125846028327942,
|
||
|
|
"step": 1565,
|
||
|
|
"valid_targets_mean": 5586.5,
|
||
|
|
"valid_targets_min": 855
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.386018237082067,
|
||
|
|
"grad_norm": 0.467001631524023,
|
||
|
|
"learning_rate": 3.335254498021404e-05,
|
||
|
|
"loss": 0.1987,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21299326419830322,
|
||
|
|
"step": 1570,
|
||
|
|
"valid_targets_mean": 4965.8,
|
||
|
|
"valid_targets_min": 333
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.393617021276596,
|
||
|
|
"grad_norm": 0.46807262963090795,
|
||
|
|
"learning_rate": 3.329602216154594e-05,
|
||
|
|
"loss": 0.2011,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21306315064430237,
|
||
|
|
"step": 1575,
|
||
|
|
"valid_targets_mean": 4142.6,
|
||
|
|
"valid_targets_min": 618
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4012158054711246,
|
||
|
|
"grad_norm": 0.4351369955863477,
|
||
|
|
"learning_rate": 3.323930839629318e-05,
|
||
|
|
"loss": 0.1885,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.192465141415596,
|
||
|
|
"step": 1580,
|
||
|
|
"valid_targets_mean": 4713.5,
|
||
|
|
"valid_targets_min": 805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4088145896656536,
|
||
|
|
"grad_norm": 0.4664276306785285,
|
||
|
|
"learning_rate": 3.318240449893242e-05,
|
||
|
|
"loss": 0.1994,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20806226134300232,
|
||
|
|
"step": 1585,
|
||
|
|
"valid_targets_mean": 4086.8,
|
||
|
|
"valid_targets_min": 616
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4164133738601823,
|
||
|
|
"grad_norm": 0.4691257656452498,
|
||
|
|
"learning_rate": 3.3125311286670836e-05,
|
||
|
|
"loss": 0.1982,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23262707889080048,
|
||
|
|
"step": 1590,
|
||
|
|
"valid_targets_mean": 4931.9,
|
||
|
|
"valid_targets_min": 633
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4240121580547114,
|
||
|
|
"grad_norm": 0.42906847559664973,
|
||
|
|
"learning_rate": 3.3068029579434404e-05,
|
||
|
|
"loss": 0.196,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19915705919265747,
|
||
|
|
"step": 1595,
|
||
|
|
"valid_targets_mean": 4431.0,
|
||
|
|
"valid_targets_min": 397
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.43161094224924,
|
||
|
|
"grad_norm": 0.4018736513467274,
|
||
|
|
"learning_rate": 3.3010560199856105e-05,
|
||
|
|
"loss": 0.1935,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16372954845428467,
|
||
|
|
"step": 1600,
|
||
|
|
"valid_targets_mean": 5064.1,
|
||
|
|
"valid_targets_min": 1229
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.439209726443769,
|
||
|
|
"grad_norm": 0.4520461458479404,
|
||
|
|
"learning_rate": 3.2952903973264115e-05,
|
||
|
|
"loss": 0.2062,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19510574638843536,
|
||
|
|
"step": 1605,
|
||
|
|
"valid_targets_mean": 4399.9,
|
||
|
|
"valid_targets_min": 646
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4468085106382977,
|
||
|
|
"grad_norm": 0.3918033831100767,
|
||
|
|
"learning_rate": 3.289506172766997e-05,
|
||
|
|
"loss": 0.1992,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1935116946697235,
|
||
|
|
"step": 1610,
|
||
|
|
"valid_targets_mean": 5341.8,
|
||
|
|
"valid_targets_min": 801
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.454407294832827,
|
||
|
|
"grad_norm": 0.43294616241650447,
|
||
|
|
"learning_rate": 3.283703429375663e-05,
|
||
|
|
"loss": 0.1906,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16868659853935242,
|
||
|
|
"step": 1615,
|
||
|
|
"valid_targets_mean": 4395.4,
|
||
|
|
"valid_targets_min": 518
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4620060790273555,
|
||
|
|
"grad_norm": 0.5245421221092823,
|
||
|
|
"learning_rate": 3.2778822504866594e-05,
|
||
|
|
"loss": 0.1863,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2138800024986267,
|
||
|
|
"step": 1620,
|
||
|
|
"valid_targets_mean": 3523.9,
|
||
|
|
"valid_targets_min": 736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4696048632218845,
|
||
|
|
"grad_norm": 0.44947552448463496,
|
||
|
|
"learning_rate": 3.272042719698992e-05,
|
||
|
|
"loss": 0.1986,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2089487910270691,
|
||
|
|
"step": 1625,
|
||
|
|
"valid_targets_mean": 4868.6,
|
||
|
|
"valid_targets_min": 708
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.477203647416413,
|
||
|
|
"grad_norm": 0.5745948604608038,
|
||
|
|
"learning_rate": 3.2661849208752205e-05,
|
||
|
|
"loss": 0.1774,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15748457610607147,
|
||
|
|
"step": 1630,
|
||
|
|
"valid_targets_mean": 5253.4,
|
||
|
|
"valid_targets_min": 1100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4848024316109423,
|
||
|
|
"grad_norm": 0.466310291931274,
|
||
|
|
"learning_rate": 3.2603089381402574e-05,
|
||
|
|
"loss": 0.2008,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18000131845474243,
|
||
|
|
"step": 1635,
|
||
|
|
"valid_targets_mean": 4540.9,
|
||
|
|
"valid_targets_min": 594
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4924012158054714,
|
||
|
|
"grad_norm": 0.4887847049484213,
|
||
|
|
"learning_rate": 3.254414855880155e-05,
|
||
|
|
"loss": 0.1979,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19799655675888062,
|
||
|
|
"step": 1640,
|
||
|
|
"valid_targets_mean": 3895.6,
|
||
|
|
"valid_targets_min": 632
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5,
|
||
|
|
"grad_norm": 0.44432072579472404,
|
||
|
|
"learning_rate": 3.2485027587408965e-05,
|
||
|
|
"loss": 0.1925,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18566831946372986,
|
||
|
|
"step": 1645,
|
||
|
|
"valid_targets_mean": 4716.8,
|
||
|
|
"valid_targets_min": 785
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5075987841945286,
|
||
|
|
"grad_norm": 0.7254579276839739,
|
||
|
|
"learning_rate": 3.2425727316271814e-05,
|
||
|
|
"loss": 0.2156,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2835049033164978,
|
||
|
|
"step": 1650,
|
||
|
|
"valid_targets_mean": 4848.5,
|
||
|
|
"valid_targets_min": 1102
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5151975683890577,
|
||
|
|
"grad_norm": 0.45281348742054633,
|
||
|
|
"learning_rate": 3.2366248597012037e-05,
|
||
|
|
"loss": 0.1921,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21764954924583435,
|
||
|
|
"step": 1655,
|
||
|
|
"valid_targets_mean": 4480.8,
|
||
|
|
"valid_targets_min": 808
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.522796352583587,
|
||
|
|
"grad_norm": 0.45329008128640347,
|
||
|
|
"learning_rate": 3.230659228381432e-05,
|
||
|
|
"loss": 0.1872,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19859766960144043,
|
||
|
|
"step": 1660,
|
||
|
|
"valid_targets_mean": 4694.1,
|
||
|
|
"valid_targets_min": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5303951367781155,
|
||
|
|
"grad_norm": 0.3991066726887112,
|
||
|
|
"learning_rate": 3.2246759233413765e-05,
|
||
|
|
"loss": 0.2077,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18775928020477295,
|
||
|
|
"step": 1665,
|
||
|
|
"valid_targets_mean": 5537.6,
|
||
|
|
"valid_targets_min": 2270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5379939209726445,
|
||
|
|
"grad_norm": 0.45660075830572106,
|
||
|
|
"learning_rate": 3.218675030508367e-05,
|
||
|
|
"loss": 0.1992,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21159949898719788,
|
||
|
|
"step": 1670,
|
||
|
|
"valid_targets_mean": 4234.2,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.545592705167173,
|
||
|
|
"grad_norm": 0.47845524476355505,
|
||
|
|
"learning_rate": 3.212656636062314e-05,
|
||
|
|
"loss": 0.2034,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18367314338684082,
|
||
|
|
"step": 1675,
|
||
|
|
"valid_targets_mean": 3884.0,
|
||
|
|
"valid_targets_min": 609
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5531914893617023,
|
||
|
|
"grad_norm": 0.47337775511493685,
|
||
|
|
"learning_rate": 3.2066208264344695e-05,
|
||
|
|
"loss": 0.1954,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19395776093006134,
|
||
|
|
"step": 1680,
|
||
|
|
"valid_targets_mean": 4240.3,
|
||
|
|
"valid_targets_min": 1731
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.560790273556231,
|
||
|
|
"grad_norm": 0.49086260546972615,
|
||
|
|
"learning_rate": 3.200567688306192e-05,
|
||
|
|
"loss": 0.2118,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2288236916065216,
|
||
|
|
"step": 1685,
|
||
|
|
"valid_targets_mean": 3920.8,
|
||
|
|
"valid_targets_min": 658
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.56838905775076,
|
||
|
|
"grad_norm": 0.48820274570312466,
|
||
|
|
"learning_rate": 3.194497308607694e-05,
|
||
|
|
"loss": 0.2084,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2082967460155487,
|
||
|
|
"step": 1690,
|
||
|
|
"valid_targets_mean": 4256.8,
|
||
|
|
"valid_targets_min": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5759878419452886,
|
||
|
|
"grad_norm": 0.43205681075466934,
|
||
|
|
"learning_rate": 3.1884097745167986e-05,
|
||
|
|
"loss": 0.1852,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1597200632095337,
|
||
|
|
"step": 1695,
|
||
|
|
"valid_targets_mean": 4130.8,
|
||
|
|
"valid_targets_min": 878
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5835866261398177,
|
||
|
|
"grad_norm": 0.5054254906668799,
|
||
|
|
"learning_rate": 3.182305173457688e-05,
|
||
|
|
"loss": 0.2034,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2262079417705536,
|
||
|
|
"step": 1700,
|
||
|
|
"valid_targets_mean": 4460.6,
|
||
|
|
"valid_targets_min": 401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5911854103343464,
|
||
|
|
"grad_norm": 0.504689704310772,
|
||
|
|
"learning_rate": 3.1761835930996424e-05,
|
||
|
|
"loss": 0.1949,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1995968520641327,
|
||
|
|
"step": 1705,
|
||
|
|
"valid_targets_mean": 3648.3,
|
||
|
|
"valid_targets_min": 558
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5987841945288754,
|
||
|
|
"grad_norm": 0.4392316385094666,
|
||
|
|
"learning_rate": 3.1700451213557896e-05,
|
||
|
|
"loss": 0.2026,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20353609323501587,
|
||
|
|
"step": 1710,
|
||
|
|
"valid_targets_mean": 4641.1,
|
||
|
|
"valid_targets_min": 845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6063829787234045,
|
||
|
|
"grad_norm": 0.44339996982872454,
|
||
|
|
"learning_rate": 3.1638898463818336e-05,
|
||
|
|
"loss": 0.189,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15977585315704346,
|
||
|
|
"step": 1715,
|
||
|
|
"valid_targets_mean": 4352.1,
|
||
|
|
"valid_targets_min": 827
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.613981762917933,
|
||
|
|
"grad_norm": 0.5398477413573792,
|
||
|
|
"learning_rate": 3.157717856574794e-05,
|
||
|
|
"loss": 0.1842,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18378981947898865,
|
||
|
|
"step": 1720,
|
||
|
|
"valid_targets_mean": 4131.6,
|
||
|
|
"valid_targets_min": 331
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.621580547112462,
|
||
|
|
"grad_norm": 0.39672349034501203,
|
||
|
|
"learning_rate": 3.151529240571737e-05,
|
||
|
|
"loss": 0.2048,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19032993912696838,
|
||
|
|
"step": 1725,
|
||
|
|
"valid_targets_mean": 5932.7,
|
||
|
|
"valid_targets_min": 2148
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.629179331306991,
|
||
|
|
"grad_norm": 0.4826721393487148,
|
||
|
|
"learning_rate": 3.1453240872484974e-05,
|
||
|
|
"loss": 0.2209,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21052716672420502,
|
||
|
|
"step": 1730,
|
||
|
|
"valid_targets_mean": 4064.6,
|
||
|
|
"valid_targets_min": 713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.63677811550152,
|
||
|
|
"grad_norm": 0.40797959815259904,
|
||
|
|
"learning_rate": 3.139102485718407e-05,
|
||
|
|
"loss": 0.1932,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18385851383209229,
|
||
|
|
"step": 1735,
|
||
|
|
"valid_targets_mean": 5094.5,
|
||
|
|
"valid_targets_min": 774
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6443768996960486,
|
||
|
|
"grad_norm": 0.49388764627269777,
|
||
|
|
"learning_rate": 3.1328645253310136e-05,
|
||
|
|
"loss": 0.1837,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20751240849494934,
|
||
|
|
"step": 1740,
|
||
|
|
"valid_targets_mean": 4489.2,
|
||
|
|
"valid_targets_min": 668
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6519756838905773,
|
||
|
|
"grad_norm": 0.41221441649011,
|
||
|
|
"learning_rate": 3.126610295670798e-05,
|
||
|
|
"loss": 0.1966,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18350102007389069,
|
||
|
|
"step": 1745,
|
||
|
|
"valid_targets_mean": 5068.5,
|
||
|
|
"valid_targets_min": 1047
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6595744680851063,
|
||
|
|
"grad_norm": 0.43487564674132706,
|
||
|
|
"learning_rate": 3.120339886555885e-05,
|
||
|
|
"loss": 0.2039,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1929788887500763,
|
||
|
|
"step": 1750,
|
||
|
|
"valid_targets_mean": 4648.7,
|
||
|
|
"valid_targets_min": 1712
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6671732522796354,
|
||
|
|
"grad_norm": 0.3931845374369816,
|
||
|
|
"learning_rate": 3.114053388036757e-05,
|
||
|
|
"loss": 0.1831,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17840635776519775,
|
||
|
|
"step": 1755,
|
||
|
|
"valid_targets_mean": 5762.4,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.674772036474164,
|
||
|
|
"grad_norm": 0.46325220533318656,
|
||
|
|
"learning_rate": 3.1077508903949594e-05,
|
||
|
|
"loss": 0.1857,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15973162651062012,
|
||
|
|
"step": 1760,
|
||
|
|
"valid_targets_mean": 4577.9,
|
||
|
|
"valid_targets_min": 1190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.682370820668693,
|
||
|
|
"grad_norm": 0.448314100365069,
|
||
|
|
"learning_rate": 3.1014324841418025e-05,
|
||
|
|
"loss": 0.1863,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21308580040931702,
|
||
|
|
"step": 1765,
|
||
|
|
"valid_targets_mean": 4523.5,
|
||
|
|
"valid_targets_min": 689
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.689969604863222,
|
||
|
|
"grad_norm": 0.4126949676808008,
|
||
|
|
"learning_rate": 3.095098260017065e-05,
|
||
|
|
"loss": 0.1889,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17054617404937744,
|
||
|
|
"step": 1770,
|
||
|
|
"valid_targets_mean": 4661.3,
|
||
|
|
"valid_targets_min": 940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.697568389057751,
|
||
|
|
"grad_norm": 0.520594987471646,
|
||
|
|
"learning_rate": 3.088748308987687e-05,
|
||
|
|
"loss": 0.1926,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18412724137306213,
|
||
|
|
"step": 1775,
|
||
|
|
"valid_targets_mean": 4541.3,
|
||
|
|
"valid_targets_min": 425
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7051671732522795,
|
||
|
|
"grad_norm": 0.4292929031518839,
|
||
|
|
"learning_rate": 3.082382722246467e-05,
|
||
|
|
"loss": 0.2083,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18564680218696594,
|
||
|
|
"step": 1780,
|
||
|
|
"valid_targets_mean": 4726.2,
|
||
|
|
"valid_targets_min": 849
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7127659574468086,
|
||
|
|
"grad_norm": 0.4143100769110258,
|
||
|
|
"learning_rate": 3.07600159121075e-05,
|
||
|
|
"loss": 0.1997,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19952227175235748,
|
||
|
|
"step": 1785,
|
||
|
|
"valid_targets_mean": 6396.9,
|
||
|
|
"valid_targets_min": 2406
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7203647416413372,
|
||
|
|
"grad_norm": 0.5138412636257836,
|
||
|
|
"learning_rate": 3.069605007521115e-05,
|
||
|
|
"loss": 0.198,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21767953038215637,
|
||
|
|
"step": 1790,
|
||
|
|
"valid_targets_mean": 4080.7,
|
||
|
|
"valid_targets_min": 855
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7279635258358663,
|
||
|
|
"grad_norm": 0.9274624474305112,
|
||
|
|
"learning_rate": 3.063193063040061e-05,
|
||
|
|
"loss": 0.1951,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2023240178823471,
|
||
|
|
"step": 1795,
|
||
|
|
"valid_targets_mean": 4271.2,
|
||
|
|
"valid_targets_min": 299
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.735562310030395,
|
||
|
|
"grad_norm": 0.4505218736377883,
|
||
|
|
"learning_rate": 3.0567658498506835e-05,
|
||
|
|
"loss": 0.2006,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19997933506965637,
|
||
|
|
"step": 1800,
|
||
|
|
"valid_targets_mean": 6215.2,
|
||
|
|
"valid_targets_min": 834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.743161094224924,
|
||
|
|
"grad_norm": 0.45112279832870705,
|
||
|
|
"learning_rate": 3.050323460255359e-05,
|
||
|
|
"loss": 0.1985,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20109710097312927,
|
||
|
|
"step": 1805,
|
||
|
|
"valid_targets_mean": 4547.4,
|
||
|
|
"valid_targets_min": 594
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.750759878419453,
|
||
|
|
"grad_norm": 0.45439465696192066,
|
||
|
|
"learning_rate": 3.0438659867744104e-05,
|
||
|
|
"loss": 0.2029,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22971929609775543,
|
||
|
|
"step": 1810,
|
||
|
|
"valid_targets_mean": 4679.8,
|
||
|
|
"valid_targets_min": 630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7583586626139818,
|
||
|
|
"grad_norm": 0.4239951858010868,
|
||
|
|
"learning_rate": 3.0373935221447846e-05,
|
||
|
|
"loss": 0.1861,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18463487923145294,
|
||
|
|
"step": 1815,
|
||
|
|
"valid_targets_mean": 4548.8,
|
||
|
|
"valid_targets_min": 832
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7659574468085104,
|
||
|
|
"grad_norm": 0.46571130038179387,
|
||
|
|
"learning_rate": 3.030906159318721e-05,
|
||
|
|
"loss": 0.2062,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2257537692785263,
|
||
|
|
"step": 1820,
|
||
|
|
"valid_targets_mean": 4535.1,
|
||
|
|
"valid_targets_min": 822
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7735562310030395,
|
||
|
|
"grad_norm": 0.5251100785646777,
|
||
|
|
"learning_rate": 3.0244039914624127e-05,
|
||
|
|
"loss": 0.1944,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16839849948883057,
|
||
|
|
"step": 1825,
|
||
|
|
"valid_targets_mean": 4547.6,
|
||
|
|
"valid_targets_min": 551
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7811550151975686,
|
||
|
|
"grad_norm": 0.46551771612143145,
|
||
|
|
"learning_rate": 3.017887111954671e-05,
|
||
|
|
"loss": 0.2063,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21392841637134552,
|
||
|
|
"step": 1830,
|
||
|
|
"valid_targets_mean": 4989.8,
|
||
|
|
"valid_targets_min": 1092
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.788753799392097,
|
||
|
|
"grad_norm": 0.45282545824018966,
|
||
|
|
"learning_rate": 3.0113556143855836e-05,
|
||
|
|
"loss": 0.1769,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17507150769233704,
|
||
|
|
"step": 1835,
|
||
|
|
"valid_targets_mean": 3937.5,
|
||
|
|
"valid_targets_min": 433
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7963525835866263,
|
||
|
|
"grad_norm": 0.8573211013484289,
|
||
|
|
"learning_rate": 3.004809592555172e-05,
|
||
|
|
"loss": 0.1924,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1929873377084732,
|
||
|
|
"step": 1840,
|
||
|
|
"valid_targets_mean": 4398.6,
|
||
|
|
"valid_targets_min": 447
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.803951367781155,
|
||
|
|
"grad_norm": 0.4511946895966249,
|
||
|
|
"learning_rate": 2.9982491404720408e-05,
|
||
|
|
"loss": 0.2171,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.25569894909858704,
|
||
|
|
"step": 1845,
|
||
|
|
"valid_targets_mean": 5263.1,
|
||
|
|
"valid_targets_min": 704
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.811550151975684,
|
||
|
|
"grad_norm": 0.4075365034632511,
|
||
|
|
"learning_rate": 2.9916743523520336e-05,
|
||
|
|
"loss": 0.1807,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18254083395004272,
|
||
|
|
"step": 1850,
|
||
|
|
"valid_targets_mean": 5106.1,
|
||
|
|
"valid_targets_min": 1353
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8191489361702127,
|
||
|
|
"grad_norm": 0.39703735203471957,
|
||
|
|
"learning_rate": 2.9850853226168738e-05,
|
||
|
|
"loss": 0.1819,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1689942181110382,
|
||
|
|
"step": 1855,
|
||
|
|
"valid_targets_mean": 5267.1,
|
||
|
|
"valid_targets_min": 1235
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8267477203647418,
|
||
|
|
"grad_norm": 0.4300034911707786,
|
||
|
|
"learning_rate": 2.9784821458928116e-05,
|
||
|
|
"loss": 0.1834,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19870391488075256,
|
||
|
|
"step": 1860,
|
||
|
|
"valid_targets_mean": 4619.8,
|
||
|
|
"valid_targets_min": 672
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8343465045592704,
|
||
|
|
"grad_norm": 0.4731017750575238,
|
||
|
|
"learning_rate": 2.9718649170092653e-05,
|
||
|
|
"loss": 0.2051,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2331341654062271,
|
||
|
|
"step": 1865,
|
||
|
|
"valid_targets_mean": 4517.1,
|
||
|
|
"valid_targets_min": 673
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8419452887537995,
|
||
|
|
"grad_norm": 0.47588128492307696,
|
||
|
|
"learning_rate": 2.9652337309974582e-05,
|
||
|
|
"loss": 0.1766,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17996634542942047,
|
||
|
|
"step": 1870,
|
||
|
|
"valid_targets_mean": 3985.2,
|
||
|
|
"valid_targets_min": 962
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.849544072948328,
|
||
|
|
"grad_norm": 0.5647869677542844,
|
||
|
|
"learning_rate": 2.958588683089056e-05,
|
||
|
|
"loss": 0.194,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21768032014369965,
|
||
|
|
"step": 1875,
|
||
|
|
"valid_targets_mean": 3708.1,
|
||
|
|
"valid_targets_min": 342
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.857142857142857,
|
||
|
|
"grad_norm": 0.4169198649338515,
|
||
|
|
"learning_rate": 2.9519298687147945e-05,
|
||
|
|
"loss": 0.1901,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17996545135974884,
|
||
|
|
"step": 1880,
|
||
|
|
"valid_targets_mean": 4830.1,
|
||
|
|
"valid_targets_min": 834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8647416413373863,
|
||
|
|
"grad_norm": 0.46342593085414296,
|
||
|
|
"learning_rate": 2.9452573835031154e-05,
|
||
|
|
"loss": 0.189,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17930959165096283,
|
||
|
|
"step": 1885,
|
||
|
|
"valid_targets_mean": 5006.9,
|
||
|
|
"valid_targets_min": 1043
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.872340425531915,
|
||
|
|
"grad_norm": 0.5952920705102613,
|
||
|
|
"learning_rate": 2.9385713232787877e-05,
|
||
|
|
"loss": 0.1878,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17503748834133148,
|
||
|
|
"step": 1890,
|
||
|
|
"valid_targets_mean": 4345.5,
|
||
|
|
"valid_targets_min": 702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8799392097264436,
|
||
|
|
"grad_norm": 1.5179001765998326,
|
||
|
|
"learning_rate": 2.9318717840615352e-05,
|
||
|
|
"loss": 0.1895,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17403315007686615,
|
||
|
|
"step": 1895,
|
||
|
|
"valid_targets_mean": 5409.5,
|
||
|
|
"valid_targets_min": 2159
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8875379939209727,
|
||
|
|
"grad_norm": 0.45039073631427357,
|
||
|
|
"learning_rate": 2.9251588620646543e-05,
|
||
|
|
"loss": 0.2208,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19473648071289062,
|
||
|
|
"step": 1900,
|
||
|
|
"valid_targets_mean": 4456.6,
|
||
|
|
"valid_targets_min": 561
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8951367781155017,
|
||
|
|
"grad_norm": 0.5386037897816643,
|
||
|
|
"learning_rate": 2.9184326536936356e-05,
|
||
|
|
"loss": 0.1999,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20459288358688354,
|
||
|
|
"step": 1905,
|
||
|
|
"valid_targets_mean": 5313.1,
|
||
|
|
"valid_targets_min": 1889
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9027355623100304,
|
||
|
|
"grad_norm": 0.5171036731287691,
|
||
|
|
"learning_rate": 2.9116932555447764e-05,
|
||
|
|
"loss": 0.1924,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19427910447120667,
|
||
|
|
"step": 1910,
|
||
|
|
"valid_targets_mean": 4551.8,
|
||
|
|
"valid_targets_min": 640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.910334346504559,
|
||
|
|
"grad_norm": 0.3956323253175454,
|
||
|
|
"learning_rate": 2.904940764403795e-05,
|
||
|
|
"loss": 0.1921,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16807705163955688,
|
||
|
|
"step": 1915,
|
||
|
|
"valid_targets_mean": 5148.9,
|
||
|
|
"valid_targets_min": 1114
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.917933130699088,
|
||
|
|
"grad_norm": 0.4609179179948364,
|
||
|
|
"learning_rate": 2.898175277244441e-05,
|
||
|
|
"loss": 0.1774,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18651928007602692,
|
||
|
|
"step": 1920,
|
||
|
|
"valid_targets_mean": 4270.8,
|
||
|
|
"valid_targets_min": 829
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.925531914893617,
|
||
|
|
"grad_norm": 0.3746013132633794,
|
||
|
|
"learning_rate": 2.891396891227103e-05,
|
||
|
|
"loss": 0.1966,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18432965874671936,
|
||
|
|
"step": 1925,
|
||
|
|
"valid_targets_mean": 5464.6,
|
||
|
|
"valid_targets_min": 869
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.933130699088146,
|
||
|
|
"grad_norm": 0.44757073319717156,
|
||
|
|
"learning_rate": 2.88460570369741e-05,
|
||
|
|
"loss": 0.1966,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2037007212638855,
|
||
|
|
"step": 1930,
|
||
|
|
"valid_targets_mean": 5044.7,
|
||
|
|
"valid_targets_min": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.940729483282675,
|
||
|
|
"grad_norm": 0.5165497089688482,
|
||
|
|
"learning_rate": 2.877801812184838e-05,
|
||
|
|
"loss": 0.1865,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19577309489250183,
|
||
|
|
"step": 1935,
|
||
|
|
"valid_targets_mean": 3723.6,
|
||
|
|
"valid_targets_min": 263
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9483282674772036,
|
||
|
|
"grad_norm": 0.5248044839976569,
|
||
|
|
"learning_rate": 2.8709853144013063e-05,
|
||
|
|
"loss": 0.2038,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17888793349266052,
|
||
|
|
"step": 1940,
|
||
|
|
"valid_targets_mean": 5473.5,
|
||
|
|
"valid_targets_min": 2971
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9559270516717326,
|
||
|
|
"grad_norm": 0.46290978562548923,
|
||
|
|
"learning_rate": 2.8641563082397755e-05,
|
||
|
|
"loss": 0.1794,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19724613428115845,
|
||
|
|
"step": 1945,
|
||
|
|
"valid_targets_mean": 4504.6,
|
||
|
|
"valid_targets_min": 683
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9635258358662613,
|
||
|
|
"grad_norm": 0.47247130378721536,
|
||
|
|
"learning_rate": 2.857314891772841e-05,
|
||
|
|
"loss": 0.2019,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1742001324892044,
|
||
|
|
"step": 1950,
|
||
|
|
"valid_targets_mean": 4378.8,
|
||
|
|
"valid_targets_min": 900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9711246200607904,
|
||
|
|
"grad_norm": 0.38540265276997787,
|
||
|
|
"learning_rate": 2.850461163251325e-05,
|
||
|
|
"loss": 0.1819,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1545667052268982,
|
||
|
|
"step": 1955,
|
||
|
|
"valid_targets_mean": 4485.9,
|
||
|
|
"valid_targets_min": 825
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.978723404255319,
|
||
|
|
"grad_norm": 0.4375842807396514,
|
||
|
|
"learning_rate": 2.8435952211028652e-05,
|
||
|
|
"loss": 0.189,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20349517464637756,
|
||
|
|
"step": 1960,
|
||
|
|
"valid_targets_mean": 4611.9,
|
||
|
|
"valid_targets_min": 1259
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.986322188449848,
|
||
|
|
"grad_norm": 0.393339700104493,
|
||
|
|
"learning_rate": 2.8367171639305014e-05,
|
||
|
|
"loss": 0.1937,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1762651801109314,
|
||
|
|
"step": 1965,
|
||
|
|
"valid_targets_mean": 4854.0,
|
||
|
|
"valid_targets_min": 1188
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9939209726443767,
|
||
|
|
"grad_norm": 0.4408583709677064,
|
||
|
|
"learning_rate": 2.8298270905112598e-05,
|
||
|
|
"loss": 0.2093,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17071180045604706,
|
||
|
|
"step": 1970,
|
||
|
|
"valid_targets_mean": 4334.9,
|
||
|
|
"valid_targets_min": 1033
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.001519756838906,
|
||
|
|
"grad_norm": 0.4509769720521314,
|
||
|
|
"learning_rate": 2.8229250997947338e-05,
|
||
|
|
"loss": 0.1899,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19761110842227936,
|
||
|
|
"step": 1975,
|
||
|
|
"valid_targets_mean": 5020.1,
|
||
|
|
"valid_targets_min": 464
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0091185410334345,
|
||
|
|
"grad_norm": 0.44397107618825393,
|
||
|
|
"learning_rate": 2.8160112909016627e-05,
|
||
|
|
"loss": 0.1964,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15861201286315918,
|
||
|
|
"step": 1980,
|
||
|
|
"valid_targets_mean": 4464.9,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0167173252279635,
|
||
|
|
"grad_norm": 0.4590917607720272,
|
||
|
|
"learning_rate": 2.8090857631225105e-05,
|
||
|
|
"loss": 0.1901,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17721785604953766,
|
||
|
|
"step": 1985,
|
||
|
|
"valid_targets_mean": 4993.2,
|
||
|
|
"valid_targets_min": 443
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.024316109422492,
|
||
|
|
"grad_norm": 1.6476449501443067,
|
||
|
|
"learning_rate": 2.8021486159160365e-05,
|
||
|
|
"loss": 0.1809,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21059048175811768,
|
||
|
|
"step": 1990,
|
||
|
|
"valid_targets_mean": 3777.8,
|
||
|
|
"valid_targets_min": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0319148936170213,
|
||
|
|
"grad_norm": 0.3955766292317692,
|
||
|
|
"learning_rate": 2.7951999489078697e-05,
|
||
|
|
"loss": 0.1775,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15698814392089844,
|
||
|
|
"step": 1995,
|
||
|
|
"valid_targets_mean": 5706.5,
|
||
|
|
"valid_targets_min": 1789
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0395136778115504,
|
||
|
|
"grad_norm": 0.6060112559647506,
|
||
|
|
"learning_rate": 2.7882398618890763e-05,
|
||
|
|
"loss": 0.1787,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17217731475830078,
|
||
|
|
"step": 2000,
|
||
|
|
"valid_targets_mean": 4799.2,
|
||
|
|
"valid_targets_min": 675
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.047112462006079,
|
||
|
|
"grad_norm": 0.48214170254530214,
|
||
|
|
"learning_rate": 2.781268454814728e-05,
|
||
|
|
"loss": 0.1743,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18182724714279175,
|
||
|
|
"step": 2005,
|
||
|
|
"valid_targets_mean": 4482.8,
|
||
|
|
"valid_targets_min": 268
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.054711246200608,
|
||
|
|
"grad_norm": 0.4254186774774426,
|
||
|
|
"learning_rate": 2.7742858278024657e-05,
|
||
|
|
"loss": 0.1831,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17712415754795074,
|
||
|
|
"step": 2010,
|
||
|
|
"valid_targets_mean": 5016.2,
|
||
|
|
"valid_targets_min": 933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0623100303951367,
|
||
|
|
"grad_norm": 0.4561837122624953,
|
||
|
|
"learning_rate": 2.7672920811310615e-05,
|
||
|
|
"loss": 0.1813,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1763693392276764,
|
||
|
|
"step": 2015,
|
||
|
|
"valid_targets_mean": 5105.2,
|
||
|
|
"valid_targets_min": 1198
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.069908814589666,
|
||
|
|
"grad_norm": 0.6538535804895353,
|
||
|
|
"learning_rate": 2.7602873152389795e-05,
|
||
|
|
"loss": 0.1796,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15733420848846436,
|
||
|
|
"step": 2020,
|
||
|
|
"valid_targets_mean": 4136.8,
|
||
|
|
"valid_targets_min": 1002
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0775075987841944,
|
||
|
|
"grad_norm": 0.4383597724717559,
|
||
|
|
"learning_rate": 2.7532716307229325e-05,
|
||
|
|
"loss": 0.1855,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18817880749702454,
|
||
|
|
"step": 2025,
|
||
|
|
"valid_targets_mean": 4361.4,
|
||
|
|
"valid_targets_min": 566
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0851063829787235,
|
||
|
|
"grad_norm": 0.4468187218329756,
|
||
|
|
"learning_rate": 2.7462451283364372e-05,
|
||
|
|
"loss": 0.1707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18754516541957855,
|
||
|
|
"step": 2030,
|
||
|
|
"valid_targets_mean": 5809.6,
|
||
|
|
"valid_targets_min": 2367
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.092705167173252,
|
||
|
|
"grad_norm": 0.4923380379793573,
|
||
|
|
"learning_rate": 2.739207908988369e-05,
|
||
|
|
"loss": 0.1697,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1901119500398636,
|
||
|
|
"step": 2035,
|
||
|
|
"valid_targets_mean": 4648.6,
|
||
|
|
"valid_targets_min": 651
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1003039513677813,
|
||
|
|
"grad_norm": 0.43950335258891504,
|
||
|
|
"learning_rate": 2.7321600737415103e-05,
|
||
|
|
"loss": 0.1882,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1579056680202484,
|
||
|
|
"step": 2040,
|
||
|
|
"valid_targets_mean": 4848.3,
|
||
|
|
"valid_targets_min": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.10790273556231,
|
||
|
|
"grad_norm": 0.43941575889354745,
|
||
|
|
"learning_rate": 2.7251017238111014e-05,
|
||
|
|
"loss": 0.1818,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17752686142921448,
|
||
|
|
"step": 2045,
|
||
|
|
"valid_targets_mean": 4887.9,
|
||
|
|
"valid_targets_min": 552
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.115501519756839,
|
||
|
|
"grad_norm": 0.43006557381731564,
|
||
|
|
"learning_rate": 2.718032960563384e-05,
|
||
|
|
"loss": 0.1937,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1756466031074524,
|
||
|
|
"step": 2050,
|
||
|
|
"valid_targets_mean": 5296.2,
|
||
|
|
"valid_targets_min": 1395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1231003039513676,
|
||
|
|
"grad_norm": 0.3962603202091622,
|
||
|
|
"learning_rate": 2.710953885514149e-05,
|
||
|
|
"loss": 0.1642,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13223829865455627,
|
||
|
|
"step": 2055,
|
||
|
|
"valid_targets_mean": 4657.5,
|
||
|
|
"valid_targets_min": 1941
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1306990881458967,
|
||
|
|
"grad_norm": 0.47787066171111203,
|
||
|
|
"learning_rate": 2.7038646003272778e-05,
|
||
|
|
"loss": 0.178,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1752995252609253,
|
||
|
|
"step": 2060,
|
||
|
|
"valid_targets_mean": 4332.5,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1382978723404253,
|
||
|
|
"grad_norm": 0.49333053682614264,
|
||
|
|
"learning_rate": 2.6967652068132787e-05,
|
||
|
|
"loss": 0.1647,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1864144653081894,
|
||
|
|
"step": 2065,
|
||
|
|
"valid_targets_mean": 3611.1,
|
||
|
|
"valid_targets_min": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1458966565349544,
|
||
|
|
"grad_norm": 0.4781276995658994,
|
||
|
|
"learning_rate": 2.6896558069278294e-05,
|
||
|
|
"loss": 0.1792,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14743542671203613,
|
||
|
|
"step": 2070,
|
||
|
|
"valid_targets_mean": 4010.8,
|
||
|
|
"valid_targets_min": 543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1534954407294835,
|
||
|
|
"grad_norm": 0.4481532756441824,
|
||
|
|
"learning_rate": 2.6825365027703104e-05,
|
||
|
|
"loss": 0.1789,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1980597823858261,
|
||
|
|
"step": 2075,
|
||
|
|
"valid_targets_mean": 5291.1,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.161094224924012,
|
||
|
|
"grad_norm": 0.44883490224285716,
|
||
|
|
"learning_rate": 2.6754073965823394e-05,
|
||
|
|
"loss": 0.1741,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19897349178791046,
|
||
|
|
"step": 2080,
|
||
|
|
"valid_targets_mean": 5601.1,
|
||
|
|
"valid_targets_min": 460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1686930091185412,
|
||
|
|
"grad_norm": 0.47818122842888633,
|
||
|
|
"learning_rate": 2.668268590746303e-05,
|
||
|
|
"loss": 0.17,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16886334121227264,
|
||
|
|
"step": 2085,
|
||
|
|
"valid_targets_mean": 3956.9,
|
||
|
|
"valid_targets_min": 672
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.17629179331307,
|
||
|
|
"grad_norm": 0.44699975246028895,
|
||
|
|
"learning_rate": 2.6611201877838858e-05,
|
||
|
|
"loss": 0.173,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1886739879846573,
|
||
|
|
"step": 2090,
|
||
|
|
"valid_targets_mean": 5496.0,
|
||
|
|
"valid_targets_min": 1891
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.183890577507599,
|
||
|
|
"grad_norm": 0.46241733640368204,
|
||
|
|
"learning_rate": 2.6539622903545992e-05,
|
||
|
|
"loss": 0.1736,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16013459861278534,
|
||
|
|
"step": 2095,
|
||
|
|
"valid_targets_mean": 5061.5,
|
||
|
|
"valid_targets_min": 842
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1914893617021276,
|
||
|
|
"grad_norm": 0.419537783672445,
|
||
|
|
"learning_rate": 2.6467950012543055e-05,
|
||
|
|
"loss": 0.1806,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15453095734119415,
|
||
|
|
"step": 2100,
|
||
|
|
"valid_targets_mean": 5147.8,
|
||
|
|
"valid_targets_min": 1776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1990881458966567,
|
||
|
|
"grad_norm": 0.4430059744559326,
|
||
|
|
"learning_rate": 2.639618423413742e-05,
|
||
|
|
"loss": 0.1786,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15962053835391998,
|
||
|
|
"step": 2105,
|
||
|
|
"valid_targets_mean": 4258.2,
|
||
|
|
"valid_targets_min": 719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2066869300911853,
|
||
|
|
"grad_norm": 0.4601551349059534,
|
||
|
|
"learning_rate": 2.6324326598970447e-05,
|
||
|
|
"loss": 0.1801,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19752000272274017,
|
||
|
|
"step": 2110,
|
||
|
|
"valid_targets_mean": 5594.7,
|
||
|
|
"valid_targets_min": 2157
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2142857142857144,
|
||
|
|
"grad_norm": 0.4432941671854632,
|
||
|
|
"learning_rate": 2.6252378139002666e-05,
|
||
|
|
"loss": 0.177,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16792172193527222,
|
||
|
|
"step": 2115,
|
||
|
|
"valid_targets_mean": 4681.6,
|
||
|
|
"valid_targets_min": 831
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.221884498480243,
|
||
|
|
"grad_norm": 1.0521842556961734,
|
||
|
|
"learning_rate": 2.618033988749895e-05,
|
||
|
|
"loss": 0.1804,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18343770503997803,
|
||
|
|
"step": 2120,
|
||
|
|
"valid_targets_mean": 4442.8,
|
||
|
|
"valid_targets_min": 593
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.229483282674772,
|
||
|
|
"grad_norm": 0.4703582807896684,
|
||
|
|
"learning_rate": 2.6108212879013694e-05,
|
||
|
|
"loss": 0.1775,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.182417631149292,
|
||
|
|
"step": 2125,
|
||
|
|
"valid_targets_mean": 5050.2,
|
||
|
|
"valid_targets_min": 882
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.237082066869301,
|
||
|
|
"grad_norm": 0.5066204484636513,
|
||
|
|
"learning_rate": 2.6035998149375928e-05,
|
||
|
|
"loss": 0.1856,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23296314477920532,
|
||
|
|
"step": 2130,
|
||
|
|
"valid_targets_mean": 4509.6,
|
||
|
|
"valid_targets_min": 585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.24468085106383,
|
||
|
|
"grad_norm": 0.47427217437454394,
|
||
|
|
"learning_rate": 2.5963696735674487e-05,
|
||
|
|
"loss": 0.1738,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1860450804233551,
|
||
|
|
"step": 2135,
|
||
|
|
"valid_targets_mean": 4647.2,
|
||
|
|
"valid_targets_min": 697
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2522796352583585,
|
||
|
|
"grad_norm": 0.4250698588091916,
|
||
|
|
"learning_rate": 2.5891309676243084e-05,
|
||
|
|
"loss": 0.185,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16689786314964294,
|
||
|
|
"step": 2140,
|
||
|
|
"valid_targets_mean": 4561.6,
|
||
|
|
"valid_targets_min": 756
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2598784194528876,
|
||
|
|
"grad_norm": 0.47029762963920585,
|
||
|
|
"learning_rate": 2.5818838010645393e-05,
|
||
|
|
"loss": 0.1641,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15192772448062897,
|
||
|
|
"step": 2145,
|
||
|
|
"valid_targets_mean": 4186.0,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2674772036474162,
|
||
|
|
"grad_norm": 0.4554599632142264,
|
||
|
|
"learning_rate": 2.5746282779660145e-05,
|
||
|
|
"loss": 0.1752,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17261144518852234,
|
||
|
|
"step": 2150,
|
||
|
|
"valid_targets_mean": 4568.7,
|
||
|
|
"valid_targets_min": 1854
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2750759878419453,
|
||
|
|
"grad_norm": 0.4502827257632833,
|
||
|
|
"learning_rate": 2.5673645025266174e-05,
|
||
|
|
"loss": 0.178,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1779678761959076,
|
||
|
|
"step": 2155,
|
||
|
|
"valid_targets_mean": 4289.7,
|
||
|
|
"valid_targets_min": 781
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.282674772036474,
|
||
|
|
"grad_norm": 0.4787827101748988,
|
||
|
|
"learning_rate": 2.5600925790627423e-05,
|
||
|
|
"loss": 0.1774,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1940232217311859,
|
||
|
|
"step": 2160,
|
||
|
|
"valid_targets_mean": 4486.1,
|
||
|
|
"valid_targets_min": 171
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.290273556231003,
|
||
|
|
"grad_norm": 0.45438086048067683,
|
||
|
|
"learning_rate": 2.5528126120078018e-05,
|
||
|
|
"loss": 0.1949,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19441285729408264,
|
||
|
|
"step": 2165,
|
||
|
|
"valid_targets_mean": 5050.3,
|
||
|
|
"valid_targets_min": 1728
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.297872340425532,
|
||
|
|
"grad_norm": 0.40566641587346486,
|
||
|
|
"learning_rate": 2.545524705910722e-05,
|
||
|
|
"loss": 0.1775,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17091290652751923,
|
||
|
|
"step": 2170,
|
||
|
|
"valid_targets_mean": 5804.4,
|
||
|
|
"valid_targets_min": 1156
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3054711246200608,
|
||
|
|
"grad_norm": 0.42598008462425585,
|
||
|
|
"learning_rate": 2.5382289654344433e-05,
|
||
|
|
"loss": 0.1803,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16645470261573792,
|
||
|
|
"step": 2175,
|
||
|
|
"valid_targets_mean": 4971.6,
|
||
|
|
"valid_targets_min": 673
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.31306990881459,
|
||
|
|
"grad_norm": 0.440353868517696,
|
||
|
|
"learning_rate": 2.530925495354418e-05,
|
||
|
|
"loss": 0.1739,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17304345965385437,
|
||
|
|
"step": 2180,
|
||
|
|
"valid_targets_mean": 4552.6,
|
||
|
|
"valid_targets_min": 1465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3206686930091185,
|
||
|
|
"grad_norm": 0.45749279193739023,
|
||
|
|
"learning_rate": 2.523614400557103e-05,
|
||
|
|
"loss": 0.167,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16047537326812744,
|
||
|
|
"step": 2185,
|
||
|
|
"valid_targets_mean": 4131.4,
|
||
|
|
"valid_targets_min": 641
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3282674772036476,
|
||
|
|
"grad_norm": 0.42763280388882297,
|
||
|
|
"learning_rate": 2.516295786038457e-05,
|
||
|
|
"loss": 0.179,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19560299813747406,
|
||
|
|
"step": 2190,
|
||
|
|
"valid_targets_mean": 5307.1,
|
||
|
|
"valid_targets_min": 686
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.335866261398176,
|
||
|
|
"grad_norm": 0.45229487644284055,
|
||
|
|
"learning_rate": 2.5089697569024293e-05,
|
||
|
|
"loss": 0.1744,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17956998944282532,
|
||
|
|
"step": 2195,
|
||
|
|
"valid_targets_mean": 4415.1,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3434650455927053,
|
||
|
|
"grad_norm": 0.49947402828635423,
|
||
|
|
"learning_rate": 2.501636418359453e-05,
|
||
|
|
"loss": 0.1826,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19726476073265076,
|
||
|
|
"step": 2200,
|
||
|
|
"valid_targets_mean": 4160.0,
|
||
|
|
"valid_targets_min": 702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.351063829787234,
|
||
|
|
"grad_norm": 0.5624015949940533,
|
||
|
|
"learning_rate": 2.4942958757249322e-05,
|
||
|
|
"loss": 0.1751,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17548981308937073,
|
||
|
|
"step": 2205,
|
||
|
|
"valid_targets_mean": 4641.6,
|
||
|
|
"valid_targets_min": 741
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.358662613981763,
|
||
|
|
"grad_norm": 0.47188014259395983,
|
||
|
|
"learning_rate": 2.4869482344177297e-05,
|
||
|
|
"loss": 0.1912,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.191510871052742,
|
||
|
|
"step": 2210,
|
||
|
|
"valid_targets_mean": 5098.9,
|
||
|
|
"valid_targets_min": 732
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3662613981762917,
|
||
|
|
"grad_norm": 0.4044372525372168,
|
||
|
|
"learning_rate": 2.479593599958655e-05,
|
||
|
|
"loss": 0.1633,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1595427691936493,
|
||
|
|
"step": 2215,
|
||
|
|
"valid_targets_mean": 4987.1,
|
||
|
|
"valid_targets_min": 2150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3738601823708207,
|
||
|
|
"grad_norm": 0.4610514723176418,
|
||
|
|
"learning_rate": 2.472232077968947e-05,
|
||
|
|
"loss": 0.1863,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18796592950820923,
|
||
|
|
"step": 2220,
|
||
|
|
"valid_targets_mean": 4755.8,
|
||
|
|
"valid_targets_min": 804
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3814589665653494,
|
||
|
|
"grad_norm": 0.4762033973991841,
|
||
|
|
"learning_rate": 2.4648637741687572e-05,
|
||
|
|
"loss": 0.1773,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17647765576839447,
|
||
|
|
"step": 2225,
|
||
|
|
"valid_targets_mean": 4052.8,
|
||
|
|
"valid_targets_min": 1097
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3890577507598785,
|
||
|
|
"grad_norm": 0.5674700704733731,
|
||
|
|
"learning_rate": 2.4574887943756322e-05,
|
||
|
|
"loss": 0.1841,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21014189720153809,
|
||
|
|
"step": 2230,
|
||
|
|
"valid_targets_mean": 5642.9,
|
||
|
|
"valid_targets_min": 887
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.396656534954407,
|
||
|
|
"grad_norm": 0.45407587371796526,
|
||
|
|
"learning_rate": 2.450107244502994e-05,
|
||
|
|
"loss": 0.1911,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18419310450553894,
|
||
|
|
"step": 2235,
|
||
|
|
"valid_targets_mean": 4964.2,
|
||
|
|
"valid_targets_min": 631
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.404255319148936,
|
||
|
|
"grad_norm": 0.6036076286770431,
|
||
|
|
"learning_rate": 2.442719230558619e-05,
|
||
|
|
"loss": 0.1935,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19704486429691315,
|
||
|
|
"step": 2240,
|
||
|
|
"valid_targets_mean": 3990.9,
|
||
|
|
"valid_targets_min": 879
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4118541033434653,
|
||
|
|
"grad_norm": 0.5044426709371705,
|
||
|
|
"learning_rate": 2.435324858643114e-05,
|
||
|
|
"loss": 0.1888,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17976920306682587,
|
||
|
|
"step": 2245,
|
||
|
|
"valid_targets_mean": 4607.1,
|
||
|
|
"valid_targets_min": 276
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.419452887537994,
|
||
|
|
"grad_norm": 0.47427419752248345,
|
||
|
|
"learning_rate": 2.4279242349483945e-05,
|
||
|
|
"loss": 0.1869,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18903280794620514,
|
||
|
|
"step": 2250,
|
||
|
|
"valid_targets_mean": 4716.1,
|
||
|
|
"valid_targets_min": 1806
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4270516717325226,
|
||
|
|
"grad_norm": 0.5315098096977057,
|
||
|
|
"learning_rate": 2.42051746575616e-05,
|
||
|
|
"loss": 0.1807,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22392138838768005,
|
||
|
|
"step": 2255,
|
||
|
|
"valid_targets_mean": 3496.4,
|
||
|
|
"valid_targets_min": 263
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4346504559270516,
|
||
|
|
"grad_norm": 0.39060444130604355,
|
||
|
|
"learning_rate": 2.413104657436365e-05,
|
||
|
|
"loss": 0.1621,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14147084951400757,
|
||
|
|
"step": 2260,
|
||
|
|
"valid_targets_mean": 4482.1,
|
||
|
|
"valid_targets_min": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4422492401215807,
|
||
|
|
"grad_norm": 0.44338388859066824,
|
||
|
|
"learning_rate": 2.4056859164456933e-05,
|
||
|
|
"loss": 0.1787,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20094552636146545,
|
||
|
|
"step": 2265,
|
||
|
|
"valid_targets_mean": 5268.6,
|
||
|
|
"valid_targets_min": 601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4498480243161094,
|
||
|
|
"grad_norm": 0.4361432780966179,
|
||
|
|
"learning_rate": 2.3982613493260298e-05,
|
||
|
|
"loss": 0.1887,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1768036186695099,
|
||
|
|
"step": 2270,
|
||
|
|
"valid_targets_mean": 5291.0,
|
||
|
|
"valid_targets_min": 506
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4574468085106385,
|
||
|
|
"grad_norm": 0.5069263015357972,
|
||
|
|
"learning_rate": 2.390831062702929e-05,
|
||
|
|
"loss": 0.1829,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19090662896633148,
|
||
|
|
"step": 2275,
|
||
|
|
"valid_targets_mean": 4792.0,
|
||
|
|
"valid_targets_min": 361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.465045592705167,
|
||
|
|
"grad_norm": 0.40616123029264917,
|
||
|
|
"learning_rate": 2.383395163284083e-05,
|
||
|
|
"loss": 0.1709,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15730628371238708,
|
||
|
|
"step": 2280,
|
||
|
|
"valid_targets_mean": 5398.0,
|
||
|
|
"valid_targets_min": 1965
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.472644376899696,
|
||
|
|
"grad_norm": 0.4339438381996928,
|
||
|
|
"learning_rate": 2.3759537578577926e-05,
|
||
|
|
"loss": 0.1815,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16227596998214722,
|
||
|
|
"step": 2285,
|
||
|
|
"valid_targets_mean": 5389.4,
|
||
|
|
"valid_targets_min": 682
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.480243161094225,
|
||
|
|
"grad_norm": 0.44867888022495267,
|
||
|
|
"learning_rate": 2.3685069532914292e-05,
|
||
|
|
"loss": 0.1797,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1895052194595337,
|
||
|
|
"step": 2290,
|
||
|
|
"valid_targets_mean": 5373.5,
|
||
|
|
"valid_targets_min": 1378
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.487841945288754,
|
||
|
|
"grad_norm": 0.45859302831224075,
|
||
|
|
"learning_rate": 2.3610548565299044e-05,
|
||
|
|
"loss": 0.1759,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.161161869764328,
|
||
|
|
"step": 2295,
|
||
|
|
"valid_targets_mean": 4974.8,
|
||
|
|
"valid_targets_min": 963
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4954407294832825,
|
||
|
|
"grad_norm": 0.47990973790811997,
|
||
|
|
"learning_rate": 2.35359757459413e-05,
|
||
|
|
"loss": 0.1861,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1807297170162201,
|
||
|
|
"step": 2300,
|
||
|
|
"valid_targets_mean": 3966.6,
|
||
|
|
"valid_targets_min": 594
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5030395136778116,
|
||
|
|
"grad_norm": 0.4458814501241054,
|
||
|
|
"learning_rate": 2.3461352145794835e-05,
|
||
|
|
"loss": 0.1871,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1584734320640564,
|
||
|
|
"step": 2305,
|
||
|
|
"valid_targets_mean": 4710.0,
|
||
|
|
"valid_targets_min": 1068
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5106382978723403,
|
||
|
|
"grad_norm": 0.5204277337187776,
|
||
|
|
"learning_rate": 2.338667883654271e-05,
|
||
|
|
"loss": 0.1726,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19432294368743896,
|
||
|
|
"step": 2310,
|
||
|
|
"valid_targets_mean": 3841.7,
|
||
|
|
"valid_targets_min": 403
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5182370820668694,
|
||
|
|
"grad_norm": 0.5002703947790278,
|
||
|
|
"learning_rate": 2.3311956890581845e-05,
|
||
|
|
"loss": 0.1985,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1830371916294098,
|
||
|
|
"step": 2315,
|
||
|
|
"valid_targets_mean": 4266.8,
|
||
|
|
"valid_targets_min": 588
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5258358662613984,
|
||
|
|
"grad_norm": 0.45671055489988216,
|
||
|
|
"learning_rate": 2.3237187381007654e-05,
|
||
|
|
"loss": 0.1758,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17926675081253052,
|
||
|
|
"step": 2320,
|
||
|
|
"valid_targets_mean": 4515.0,
|
||
|
|
"valid_targets_min": 654
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.533434650455927,
|
||
|
|
"grad_norm": 0.44706162372061575,
|
||
|
|
"learning_rate": 2.3162371381598627e-05,
|
||
|
|
"loss": 0.1722,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17958760261535645,
|
||
|
|
"step": 2325,
|
||
|
|
"valid_targets_mean": 4538.9,
|
||
|
|
"valid_targets_min": 646
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5410334346504557,
|
||
|
|
"grad_norm": 0.48526337740475023,
|
||
|
|
"learning_rate": 2.308750996680089e-05,
|
||
|
|
"loss": 0.1857,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1857941448688507,
|
||
|
|
"step": 2330,
|
||
|
|
"valid_targets_mean": 4541.9,
|
||
|
|
"valid_targets_min": 718
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.548632218844985,
|
||
|
|
"grad_norm": 0.6863209953782875,
|
||
|
|
"learning_rate": 2.3012604211712787e-05,
|
||
|
|
"loss": 0.1801,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15989352762699127,
|
||
|
|
"step": 2335,
|
||
|
|
"valid_targets_mean": 4711.3,
|
||
|
|
"valid_targets_min": 905
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.556231003039514,
|
||
|
|
"grad_norm": 0.43601817882878313,
|
||
|
|
"learning_rate": 2.2937655192069453e-05,
|
||
|
|
"loss": 0.1727,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15695396065711975,
|
||
|
|
"step": 2340,
|
||
|
|
"valid_targets_mean": 4525.1,
|
||
|
|
"valid_targets_min": 554
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5638297872340425,
|
||
|
|
"grad_norm": 0.4384690019868411,
|
||
|
|
"learning_rate": 2.286266398422734e-05,
|
||
|
|
"loss": 0.1885,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17277196049690247,
|
||
|
|
"step": 2345,
|
||
|
|
"valid_targets_mean": 5536.7,
|
||
|
|
"valid_targets_min": 966
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.571428571428571,
|
||
|
|
"grad_norm": 0.50720927906344,
|
||
|
|
"learning_rate": 2.278763166514879e-05,
|
||
|
|
"loss": 0.1735,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1746547669172287,
|
||
|
|
"step": 2350,
|
||
|
|
"valid_targets_mean": 4262.4,
|
||
|
|
"valid_targets_min": 688
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5790273556231003,
|
||
|
|
"grad_norm": 0.5082859794711881,
|
||
|
|
"learning_rate": 2.2712559312386525e-05,
|
||
|
|
"loss": 0.1545,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16094228625297546,
|
||
|
|
"step": 2355,
|
||
|
|
"valid_targets_mean": 4750.5,
|
||
|
|
"valid_targets_min": 354
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5866261398176293,
|
||
|
|
"grad_norm": 0.5324693739038185,
|
||
|
|
"learning_rate": 2.2637448004068227e-05,
|
||
|
|
"loss": 0.186,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1898549497127533,
|
||
|
|
"step": 2360,
|
||
|
|
"valid_targets_mean": 4889.1,
|
||
|
|
"valid_targets_min": 868
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.594224924012158,
|
||
|
|
"grad_norm": 0.40332096235924436,
|
||
|
|
"learning_rate": 2.2562298818881005e-05,
|
||
|
|
"loss": 0.1729,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1677415668964386,
|
||
|
|
"step": 2365,
|
||
|
|
"valid_targets_mean": 4815.6,
|
||
|
|
"valid_targets_min": 449
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.601823708206687,
|
||
|
|
"grad_norm": 0.4975962462446565,
|
||
|
|
"learning_rate": 2.2487112836055932e-05,
|
||
|
|
"loss": 0.1714,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1909307986497879,
|
||
|
|
"step": 2370,
|
||
|
|
"valid_targets_mean": 4240.7,
|
||
|
|
"valid_targets_min": 873
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6094224924012157,
|
||
|
|
"grad_norm": 0.4333440171846238,
|
||
|
|
"learning_rate": 2.241189113535253e-05,
|
||
|
|
"loss": 0.1729,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17210185527801514,
|
||
|
|
"step": 2375,
|
||
|
|
"valid_targets_mean": 4546.3,
|
||
|
|
"valid_targets_min": 814
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.617021276595745,
|
||
|
|
"grad_norm": 0.4286229722867218,
|
||
|
|
"learning_rate": 2.2336634797043294e-05,
|
||
|
|
"loss": 0.1767,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17077794671058655,
|
||
|
|
"step": 2380,
|
||
|
|
"valid_targets_mean": 4828.9,
|
||
|
|
"valid_targets_min": 2075
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6246200607902734,
|
||
|
|
"grad_norm": 0.5351152453484341,
|
||
|
|
"learning_rate": 2.226134490189813e-05,
|
||
|
|
"loss": 0.1864,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20225580036640167,
|
||
|
|
"step": 2385,
|
||
|
|
"valid_targets_mean": 4493.8,
|
||
|
|
"valid_targets_min": 816
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6322188449848025,
|
||
|
|
"grad_norm": 0.4595793337143316,
|
||
|
|
"learning_rate": 2.2186022531168877e-05,
|
||
|
|
"loss": 0.1806,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18067389726638794,
|
||
|
|
"step": 2390,
|
||
|
|
"valid_targets_mean": 4226.4,
|
||
|
|
"valid_targets_min": 876
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.639817629179331,
|
||
|
|
"grad_norm": 0.4376288036773449,
|
||
|
|
"learning_rate": 2.2110668766573756e-05,
|
||
|
|
"loss": 0.1839,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17458736896514893,
|
||
|
|
"step": 2395,
|
||
|
|
"valid_targets_mean": 4607.4,
|
||
|
|
"valid_targets_min": 378
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6474164133738602,
|
||
|
|
"grad_norm": 0.4235273238288212,
|
||
|
|
"learning_rate": 2.2035284690281835e-05,
|
||
|
|
"loss": 0.1662,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16206535696983337,
|
||
|
|
"step": 2400,
|
||
|
|
"valid_targets_mean": 4671.6,
|
||
|
|
"valid_targets_min": 427
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.655015197568389,
|
||
|
|
"grad_norm": 0.4645835996278293,
|
||
|
|
"learning_rate": 2.1959871384897508e-05,
|
||
|
|
"loss": 0.168,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1612105816602707,
|
||
|
|
"step": 2405,
|
||
|
|
"valid_targets_mean": 4012.2,
|
||
|
|
"valid_targets_min": 736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.662613981762918,
|
||
|
|
"grad_norm": 0.4339263612443116,
|
||
|
|
"learning_rate": 2.188442993344492e-05,
|
||
|
|
"loss": 0.1828,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18110080063343048,
|
||
|
|
"step": 2410,
|
||
|
|
"valid_targets_mean": 5371.9,
|
||
|
|
"valid_targets_min": 1880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.670212765957447,
|
||
|
|
"grad_norm": 0.5156390412963044,
|
||
|
|
"learning_rate": 2.1808961419352433e-05,
|
||
|
|
"loss": 0.1856,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19328826665878296,
|
||
|
|
"step": 2415,
|
||
|
|
"valid_targets_mean": 5049.5,
|
||
|
|
"valid_targets_min": 1733
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6778115501519757,
|
||
|
|
"grad_norm": 0.4257958182746545,
|
||
|
|
"learning_rate": 2.173346692643706e-05,
|
||
|
|
"loss": 0.165,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15053753554821014,
|
||
|
|
"step": 2420,
|
||
|
|
"valid_targets_mean": 4545.5,
|
||
|
|
"valid_targets_min": 845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6854103343465043,
|
||
|
|
"grad_norm": 0.41421446171755727,
|
||
|
|
"learning_rate": 2.1657947538888907e-05,
|
||
|
|
"loss": 0.1751,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15670928359031677,
|
||
|
|
"step": 2425,
|
||
|
|
"valid_targets_mean": 4735.4,
|
||
|
|
"valid_targets_min": 2303
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6930091185410334,
|
||
|
|
"grad_norm": 0.46488209722389967,
|
||
|
|
"learning_rate": 2.158240434125557e-05,
|
||
|
|
"loss": 0.1809,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17502865195274353,
|
||
|
|
"step": 2430,
|
||
|
|
"valid_targets_mean": 4069.9,
|
||
|
|
"valid_targets_min": 843
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7006079027355625,
|
||
|
|
"grad_norm": 0.44273639823588856,
|
||
|
|
"learning_rate": 2.150683841842662e-05,
|
||
|
|
"loss": 0.1701,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17152325809001923,
|
||
|
|
"step": 2435,
|
||
|
|
"valid_targets_mean": 4846.6,
|
||
|
|
"valid_targets_min": 1167
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.708206686930091,
|
||
|
|
"grad_norm": 0.41156415643010313,
|
||
|
|
"learning_rate": 2.1431250855617978e-05,
|
||
|
|
"loss": 0.1742,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15151536464691162,
|
||
|
|
"step": 2440,
|
||
|
|
"valid_targets_mean": 4472.1,
|
||
|
|
"valid_targets_min": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.71580547112462,
|
||
|
|
"grad_norm": 0.45636761630984557,
|
||
|
|
"learning_rate": 2.1355642738356327e-05,
|
||
|
|
"loss": 0.1666,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16730879247188568,
|
||
|
|
"step": 2445,
|
||
|
|
"valid_targets_mean": 3877.2,
|
||
|
|
"valid_targets_min": 406
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.723404255319149,
|
||
|
|
"grad_norm": 0.41821546008836596,
|
||
|
|
"learning_rate": 2.128001515246355e-05,
|
||
|
|
"loss": 0.1752,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1652117371559143,
|
||
|
|
"step": 2450,
|
||
|
|
"valid_targets_mean": 5117.6,
|
||
|
|
"valid_targets_min": 1054
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.731003039513678,
|
||
|
|
"grad_norm": 0.5195131380448494,
|
||
|
|
"learning_rate": 2.1204369184041115e-05,
|
||
|
|
"loss": 0.1768,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21220675110816956,
|
||
|
|
"step": 2455,
|
||
|
|
"valid_targets_mean": 3935.9,
|
||
|
|
"valid_targets_min": 608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7386018237082066,
|
||
|
|
"grad_norm": 0.4759365332843418,
|
||
|
|
"learning_rate": 2.1128705919454488e-05,
|
||
|
|
"loss": 0.1819,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21190626919269562,
|
||
|
|
"step": 2460,
|
||
|
|
"valid_targets_mean": 5204.1,
|
||
|
|
"valid_targets_min": 776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7462006079027357,
|
||
|
|
"grad_norm": 0.44340296838544296,
|
||
|
|
"learning_rate": 2.1053026445317534e-05,
|
||
|
|
"loss": 0.1841,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16357441246509552,
|
||
|
|
"step": 2465,
|
||
|
|
"valid_targets_mean": 5137.4,
|
||
|
|
"valid_targets_min": 531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7537993920972643,
|
||
|
|
"grad_norm": 0.602232408360396,
|
||
|
|
"learning_rate": 2.09773318484769e-05,
|
||
|
|
"loss": 0.1669,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14004534482955933,
|
||
|
|
"step": 2470,
|
||
|
|
"valid_targets_mean": 4292.4,
|
||
|
|
"valid_targets_min": 861
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7613981762917934,
|
||
|
|
"grad_norm": 0.4860093104756001,
|
||
|
|
"learning_rate": 2.0901623215996406e-05,
|
||
|
|
"loss": 0.1819,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17179439961910248,
|
||
|
|
"step": 2475,
|
||
|
|
"valid_targets_mean": 4222.4,
|
||
|
|
"valid_targets_min": 2111
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.768996960486322,
|
||
|
|
"grad_norm": 0.5345377630387919,
|
||
|
|
"learning_rate": 2.082590163514146e-05,
|
||
|
|
"loss": 0.188,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19142243266105652,
|
||
|
|
"step": 2480,
|
||
|
|
"valid_targets_mean": 3549.6,
|
||
|
|
"valid_targets_min": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.776595744680851,
|
||
|
|
"grad_norm": 0.5131611773395347,
|
||
|
|
"learning_rate": 2.07501681933634e-05,
|
||
|
|
"loss": 0.1842,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23167961835861206,
|
||
|
|
"step": 2485,
|
||
|
|
"valid_targets_mean": 4783.1,
|
||
|
|
"valid_targets_min": 422
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.78419452887538,
|
||
|
|
"grad_norm": 0.43772278574910306,
|
||
|
|
"learning_rate": 2.0674423978283924e-05,
|
||
|
|
"loss": 0.1702,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.159462109208107,
|
||
|
|
"step": 2490,
|
||
|
|
"valid_targets_mean": 4151.8,
|
||
|
|
"valid_targets_min": 1045
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.791793313069909,
|
||
|
|
"grad_norm": 0.48634040579611115,
|
||
|
|
"learning_rate": 2.059867007767943e-05,
|
||
|
|
"loss": 0.1729,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1738453060388565,
|
||
|
|
"step": 2495,
|
||
|
|
"valid_targets_mean": 4146.1,
|
||
|
|
"valid_targets_min": 314
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7993920972644375,
|
||
|
|
"grad_norm": 0.45093092640477583,
|
||
|
|
"learning_rate": 2.0522907579465413e-05,
|
||
|
|
"loss": 0.1753,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1663065254688263,
|
||
|
|
"step": 2500,
|
||
|
|
"valid_targets_mean": 3941.2,
|
||
|
|
"valid_targets_min": 1010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8069908814589666,
|
||
|
|
"grad_norm": 0.50654438664862,
|
||
|
|
"learning_rate": 2.0447137571680856e-05,
|
||
|
|
"loss": 0.1978,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20836275815963745,
|
||
|
|
"step": 2505,
|
||
|
|
"valid_targets_mean": 4301.9,
|
||
|
|
"valid_targets_min": 870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8145896656534957,
|
||
|
|
"grad_norm": 0.47213930537918863,
|
||
|
|
"learning_rate": 2.037136114247257e-05,
|
||
|
|
"loss": 0.1906,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19136860966682434,
|
||
|
|
"step": 2510,
|
||
|
|
"valid_targets_mean": 4427.4,
|
||
|
|
"valid_targets_min": 571
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8221884498480243,
|
||
|
|
"grad_norm": 0.6652881182583804,
|
||
|
|
"learning_rate": 2.0295579380079596e-05,
|
||
|
|
"loss": 0.1825,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16894681751728058,
|
||
|
|
"step": 2515,
|
||
|
|
"valid_targets_mean": 4250.4,
|
||
|
|
"valid_targets_min": 473
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.829787234042553,
|
||
|
|
"grad_norm": 0.4220064998153064,
|
||
|
|
"learning_rate": 2.0219793372817557e-05,
|
||
|
|
"loss": 0.1764,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1527949720621109,
|
||
|
|
"step": 2520,
|
||
|
|
"valid_targets_mean": 4826.4,
|
||
|
|
"valid_targets_min": 827
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.837386018237082,
|
||
|
|
"grad_norm": 0.43668863176276407,
|
||
|
|
"learning_rate": 2.0144004209063042e-05,
|
||
|
|
"loss": 0.176,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1740725338459015,
|
||
|
|
"step": 2525,
|
||
|
|
"valid_targets_mean": 4150.2,
|
||
|
|
"valid_targets_min": 772
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.844984802431611,
|
||
|
|
"grad_norm": 0.3823174418425911,
|
||
|
|
"learning_rate": 2.0068212977237983e-05,
|
||
|
|
"loss": 0.1655,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14751499891281128,
|
||
|
|
"step": 2530,
|
||
|
|
"valid_targets_mean": 5399.6,
|
||
|
|
"valid_targets_min": 2779
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8525835866261398,
|
||
|
|
"grad_norm": 0.4587095534889756,
|
||
|
|
"learning_rate": 1.999242076579398e-05,
|
||
|
|
"loss": 0.1775,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19451266527175903,
|
||
|
|
"step": 2535,
|
||
|
|
"valid_targets_mean": 4256.1,
|
||
|
|
"valid_targets_min": 743
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.860182370820669,
|
||
|
|
"grad_norm": 0.45169233975095224,
|
||
|
|
"learning_rate": 1.9916628663196743e-05,
|
||
|
|
"loss": 0.1892,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17704080045223236,
|
||
|
|
"step": 2540,
|
||
|
|
"valid_targets_mean": 4775.8,
|
||
|
|
"valid_targets_min": 2350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8677811550151975,
|
||
|
|
"grad_norm": 1.675127625058686,
|
||
|
|
"learning_rate": 1.9840837757910383e-05,
|
||
|
|
"loss": 0.1858,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1759578138589859,
|
||
|
|
"step": 2545,
|
||
|
|
"valid_targets_mean": 4113.9,
|
||
|
|
"valid_targets_min": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8753799392097266,
|
||
|
|
"grad_norm": 0.43203621299681155,
|
||
|
|
"learning_rate": 1.976504913838184e-05,
|
||
|
|
"loss": 0.1811,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1715025007724762,
|
||
|
|
"step": 2550,
|
||
|
|
"valid_targets_mean": 5221.4,
|
||
|
|
"valid_targets_min": 1839
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.882978723404255,
|
||
|
|
"grad_norm": 0.4384904057940854,
|
||
|
|
"learning_rate": 1.968926389302521e-05,
|
||
|
|
"loss": 0.1636,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16733646392822266,
|
||
|
|
"step": 2555,
|
||
|
|
"valid_targets_mean": 4565.3,
|
||
|
|
"valid_targets_min": 609
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8905775075987843,
|
||
|
|
"grad_norm": 0.4481617113710614,
|
||
|
|
"learning_rate": 1.9613483110206154e-05,
|
||
|
|
"loss": 0.1701,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17321643233299255,
|
||
|
|
"step": 2560,
|
||
|
|
"valid_targets_mean": 4512.8,
|
||
|
|
"valid_targets_min": 895
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.898176291793313,
|
||
|
|
"grad_norm": 0.45819511671682567,
|
||
|
|
"learning_rate": 1.9537707878226216e-05,
|
||
|
|
"loss": 0.1737,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17141106724739075,
|
||
|
|
"step": 2565,
|
||
|
|
"valid_targets_mean": 4315.5,
|
||
|
|
"valid_targets_min": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.905775075987842,
|
||
|
|
"grad_norm": 0.4764235545857351,
|
||
|
|
"learning_rate": 1.9461939285307247e-05,
|
||
|
|
"loss": 0.1797,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1686917096376419,
|
||
|
|
"step": 2570,
|
||
|
|
"valid_targets_mean": 3827.4,
|
||
|
|
"valid_targets_min": 622
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9133738601823707,
|
||
|
|
"grad_norm": 0.4437209902659711,
|
||
|
|
"learning_rate": 1.9386178419575745e-05,
|
||
|
|
"loss": 0.1714,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16246762871742249,
|
||
|
|
"step": 2575,
|
||
|
|
"valid_targets_mean": 4545.2,
|
||
|
|
"valid_targets_min": 746
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9209726443768997,
|
||
|
|
"grad_norm": 0.3844261379651969,
|
||
|
|
"learning_rate": 1.931042636904724e-05,
|
||
|
|
"loss": 0.1759,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17688338458538055,
|
||
|
|
"step": 2580,
|
||
|
|
"valid_targets_mean": 5911.6,
|
||
|
|
"valid_targets_min": 3130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.928571428571429,
|
||
|
|
"grad_norm": 0.4052859116056726,
|
||
|
|
"learning_rate": 1.923468422161066e-05,
|
||
|
|
"loss": 0.1789,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16950541734695435,
|
||
|
|
"step": 2585,
|
||
|
|
"valid_targets_mean": 5731.0,
|
||
|
|
"valid_targets_min": 509
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9361702127659575,
|
||
|
|
"grad_norm": 0.4236870787112286,
|
||
|
|
"learning_rate": 1.9158953065012715e-05,
|
||
|
|
"loss": 0.1718,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15792736411094666,
|
||
|
|
"step": 2590,
|
||
|
|
"valid_targets_mean": 5720.6,
|
||
|
|
"valid_targets_min": 2293
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.943768996960486,
|
||
|
|
"grad_norm": 0.46365326810020263,
|
||
|
|
"learning_rate": 1.9083233986842275e-05,
|
||
|
|
"loss": 0.1689,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17180944979190826,
|
||
|
|
"step": 2595,
|
||
|
|
"valid_targets_mean": 5083.0,
|
||
|
|
"valid_targets_min": 711
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.951367781155015,
|
||
|
|
"grad_norm": 0.44863750873052993,
|
||
|
|
"learning_rate": 1.900752807451475e-05,
|
||
|
|
"loss": 0.1793,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16900160908699036,
|
||
|
|
"step": 2600,
|
||
|
|
"valid_targets_mean": 4392.1,
|
||
|
|
"valid_targets_min": 810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9589665653495443,
|
||
|
|
"grad_norm": 0.462622398920299,
|
||
|
|
"learning_rate": 1.8931836415256468e-05,
|
||
|
|
"loss": 0.1788,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16800816357135773,
|
||
|
|
"step": 2605,
|
||
|
|
"valid_targets_mean": 4713.2,
|
||
|
|
"valid_targets_min": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.966565349544073,
|
||
|
|
"grad_norm": 0.46050975543187894,
|
||
|
|
"learning_rate": 1.885616009608907e-05,
|
||
|
|
"loss": 0.1654,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14823880791664124,
|
||
|
|
"step": 2610,
|
||
|
|
"valid_targets_mean": 4082.6,
|
||
|
|
"valid_targets_min": 401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9741641337386016,
|
||
|
|
"grad_norm": 0.46421973514436904,
|
||
|
|
"learning_rate": 1.8780500203813902e-05,
|
||
|
|
"loss": 0.1726,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18864090740680695,
|
||
|
|
"step": 2615,
|
||
|
|
"valid_targets_mean": 4411.6,
|
||
|
|
"valid_targets_min": 655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9817629179331306,
|
||
|
|
"grad_norm": 0.4291031448782404,
|
||
|
|
"learning_rate": 1.870485782499638e-05,
|
||
|
|
"loss": 0.1723,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17856952548027039,
|
||
|
|
"step": 2620,
|
||
|
|
"valid_targets_mean": 4765.4,
|
||
|
|
"valid_targets_min": 348
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9893617021276597,
|
||
|
|
"grad_norm": 0.44985063891512206,
|
||
|
|
"learning_rate": 1.8629234045950434e-05,
|
||
|
|
"loss": 0.1904,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16248270869255066,
|
||
|
|
"step": 2625,
|
||
|
|
"valid_targets_mean": 4526.0,
|
||
|
|
"valid_targets_min": 1027
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9969604863221884,
|
||
|
|
"grad_norm": 0.4752973434947488,
|
||
|
|
"learning_rate": 1.8553629952722853e-05,
|
||
|
|
"loss": 0.1636,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17370517551898956,
|
||
|
|
"step": 2630,
|
||
|
|
"valid_targets_mean": 4128.8,
|
||
|
|
"valid_targets_min": 843
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.004559270516717,
|
||
|
|
"grad_norm": 0.4277885641586408,
|
||
|
|
"learning_rate": 1.8478046631077734e-05,
|
||
|
|
"loss": 0.167,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1758805513381958,
|
||
|
|
"step": 2635,
|
||
|
|
"valid_targets_mean": 4960.5,
|
||
|
|
"valid_targets_min": 647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.0121580547112465,
|
||
|
|
"grad_norm": 0.4340489860844752,
|
||
|
|
"learning_rate": 1.8402485166480854e-05,
|
||
|
|
"loss": 0.1694,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1884659081697464,
|
||
|
|
"step": 2640,
|
||
|
|
"valid_targets_mean": 5273.1,
|
||
|
|
"valid_targets_min": 787
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.019756838905775,
|
||
|
|
"grad_norm": 0.4325490917240746,
|
||
|
|
"learning_rate": 1.8326946644084112e-05,
|
||
|
|
"loss": 0.1573,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1543048769235611,
|
||
|
|
"step": 2645,
|
||
|
|
"valid_targets_mean": 4604.4,
|
||
|
|
"valid_targets_min": 547
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.027355623100304,
|
||
|
|
"grad_norm": 0.5514612078158647,
|
||
|
|
"learning_rate": 1.8251432148709912e-05,
|
||
|
|
"loss": 0.1535,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1609114408493042,
|
||
|
|
"step": 2650,
|
||
|
|
"valid_targets_mean": 3597.3,
|
||
|
|
"valid_targets_min": 695
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.0349544072948325,
|
||
|
|
"grad_norm": 0.43126338580266377,
|
||
|
|
"learning_rate": 1.817594276483563e-05,
|
||
|
|
"loss": 0.1579,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18365256488323212,
|
||
|
|
"step": 2655,
|
||
|
|
"valid_targets_mean": 5326.8,
|
||
|
|
"valid_targets_min": 666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.042553191489362,
|
||
|
|
"grad_norm": 0.4737332570423805,
|
||
|
|
"learning_rate": 1.8100479576577973e-05,
|
||
|
|
"loss": 0.1625,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17224054038524628,
|
||
|
|
"step": 2660,
|
||
|
|
"valid_targets_mean": 4818.4,
|
||
|
|
"valid_targets_min": 918
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.050151975683891,
|
||
|
|
"grad_norm": 0.4438849641405412,
|
||
|
|
"learning_rate": 1.8025043667677484e-05,
|
||
|
|
"loss": 0.166,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1628122627735138,
|
||
|
|
"step": 2665,
|
||
|
|
"valid_targets_mean": 4852.9,
|
||
|
|
"valid_targets_min": 1421
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.057750759878419,
|
||
|
|
"grad_norm": 0.5200587086302579,
|
||
|
|
"learning_rate": 1.794963612148291e-05,
|
||
|
|
"loss": 0.1565,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1755530685186386,
|
||
|
|
"step": 2670,
|
||
|
|
"valid_targets_mean": 3932.9,
|
||
|
|
"valid_targets_min": 844
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.065349544072948,
|
||
|
|
"grad_norm": 0.5142004311196928,
|
||
|
|
"learning_rate": 1.7874258020935708e-05,
|
||
|
|
"loss": 0.1792,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16136983036994934,
|
||
|
|
"step": 2675,
|
||
|
|
"valid_targets_mean": 3818.9,
|
||
|
|
"valid_targets_min": 758
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.072948328267477,
|
||
|
|
"grad_norm": 0.47669155432301397,
|
||
|
|
"learning_rate": 1.7798910448554433e-05,
|
||
|
|
"loss": 0.1749,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17607629299163818,
|
||
|
|
"step": 2680,
|
||
|
|
"valid_targets_mean": 4376.6,
|
||
|
|
"valid_targets_min": 1368
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.080547112462006,
|
||
|
|
"grad_norm": 0.4655137661586852,
|
||
|
|
"learning_rate": 1.7723594486419245e-05,
|
||
|
|
"loss": 0.163,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15961049497127533,
|
||
|
|
"step": 2685,
|
||
|
|
"valid_targets_mean": 4512.1,
|
||
|
|
"valid_targets_min": 1601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.088145896656535,
|
||
|
|
"grad_norm": 0.7525257215139017,
|
||
|
|
"learning_rate": 1.764831121615631e-05,
|
||
|
|
"loss": 0.1675,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.21296311914920807,
|
||
|
|
"step": 2690,
|
||
|
|
"valid_targets_mean": 4298.8,
|
||
|
|
"valid_targets_min": 756
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.095744680851064,
|
||
|
|
"grad_norm": 0.4433451433036404,
|
||
|
|
"learning_rate": 1.7573061718922347e-05,
|
||
|
|
"loss": 0.1641,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16739048063755035,
|
||
|
|
"step": 2695,
|
||
|
|
"valid_targets_mean": 5009.2,
|
||
|
|
"valid_targets_min": 853
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.103343465045593,
|
||
|
|
"grad_norm": 0.4830355618281073,
|
||
|
|
"learning_rate": 1.7497847075389004e-05,
|
||
|
|
"loss": 0.1621,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16623979806900024,
|
||
|
|
"step": 2700,
|
||
|
|
"valid_targets_mean": 4105.4,
|
||
|
|
"valid_targets_min": 714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.1109422492401215,
|
||
|
|
"grad_norm": 0.4179613714457159,
|
||
|
|
"learning_rate": 1.7422668365727428e-05,
|
||
|
|
"loss": 0.1572,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16168740391731262,
|
||
|
|
"step": 2705,
|
||
|
|
"valid_targets_mean": 5317.4,
|
||
|
|
"valid_targets_min": 1811
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.11854103343465,
|
||
|
|
"grad_norm": 0.5056361715181314,
|
||
|
|
"learning_rate": 1.734752666959268e-05,
|
||
|
|
"loss": 0.1659,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16789555549621582,
|
||
|
|
"step": 2710,
|
||
|
|
"valid_targets_mean": 4172.6,
|
||
|
|
"valid_targets_min": 666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.12613981762918,
|
||
|
|
"grad_norm": 0.48722281404971973,
|
||
|
|
"learning_rate": 1.7272423066108306e-05,
|
||
|
|
"loss": 0.171,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17382162809371948,
|
||
|
|
"step": 2715,
|
||
|
|
"valid_targets_mean": 4376.5,
|
||
|
|
"valid_targets_min": 263
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.133738601823708,
|
||
|
|
"grad_norm": 0.4790825604864442,
|
||
|
|
"learning_rate": 1.7197358633850744e-05,
|
||
|
|
"loss": 0.1566,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16230902075767517,
|
||
|
|
"step": 2720,
|
||
|
|
"valid_targets_mean": 4323.8,
|
||
|
|
"valid_targets_min": 370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.141337386018237,
|
||
|
|
"grad_norm": 0.41425680904063444,
|
||
|
|
"learning_rate": 1.7122334450833933e-05,
|
||
|
|
"loss": 0.1532,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18430155515670776,
|
||
|
|
"step": 2725,
|
||
|
|
"valid_targets_mean": 6086.1,
|
||
|
|
"valid_targets_min": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.148936170212766,
|
||
|
|
"grad_norm": 0.4752171364109056,
|
||
|
|
"learning_rate": 1.7047351594493755e-05,
|
||
|
|
"loss": 0.1739,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1950949728488922,
|
||
|
|
"step": 2730,
|
||
|
|
"valid_targets_mean": 5050.2,
|
||
|
|
"valid_targets_min": 804
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.156534954407295,
|
||
|
|
"grad_norm": 0.4757519244575605,
|
||
|
|
"learning_rate": 1.6972411141672614e-05,
|
||
|
|
"loss": 0.1621,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1867167055606842,
|
||
|
|
"step": 2735,
|
||
|
|
"valid_targets_mean": 4470.1,
|
||
|
|
"valid_targets_min": 828
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.164133738601824,
|
||
|
|
"grad_norm": 0.4513608069410081,
|
||
|
|
"learning_rate": 1.6897514168603924e-05,
|
||
|
|
"loss": 0.1696,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1613789200782776,
|
||
|
|
"step": 2740,
|
||
|
|
"valid_targets_mean": 4477.4,
|
||
|
|
"valid_targets_min": 742
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.171732522796352,
|
||
|
|
"grad_norm": 0.5141640145999388,
|
||
|
|
"learning_rate": 1.6822661750896707e-05,
|
||
|
|
"loss": 0.1524,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19608411192893982,
|
||
|
|
"step": 2745,
|
||
|
|
"valid_targets_mean": 4257.2,
|
||
|
|
"valid_targets_min": 892
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.179331306990881,
|
||
|
|
"grad_norm": 0.4724318719284284,
|
||
|
|
"learning_rate": 1.67478549635201e-05,
|
||
|
|
"loss": 0.1741,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1456223726272583,
|
||
|
|
"step": 2750,
|
||
|
|
"valid_targets_mean": 4336.9,
|
||
|
|
"valid_targets_min": 541
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.186930091185411,
|
||
|
|
"grad_norm": 0.4970941462233452,
|
||
|
|
"learning_rate": 1.6673094880787933e-05,
|
||
|
|
"loss": 0.1661,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1658686399459839,
|
||
|
|
"step": 2755,
|
||
|
|
"valid_targets_mean": 4121.6,
|
||
|
|
"valid_targets_min": 601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.194528875379939,
|
||
|
|
"grad_norm": 0.5000186608612818,
|
||
|
|
"learning_rate": 1.6598382576343312e-05,
|
||
|
|
"loss": 0.1657,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16827929019927979,
|
||
|
|
"step": 2760,
|
||
|
|
"valid_targets_mean": 5388.8,
|
||
|
|
"valid_targets_min": 1463
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.202127659574468,
|
||
|
|
"grad_norm": 0.4684433964610707,
|
||
|
|
"learning_rate": 1.6523719123143186e-05,
|
||
|
|
"loss": 0.1547,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15693283081054688,
|
||
|
|
"step": 2765,
|
||
|
|
"valid_targets_mean": 4821.7,
|
||
|
|
"valid_targets_min": 746
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.2097264437689965,
|
||
|
|
"grad_norm": 0.45967354792383214,
|
||
|
|
"learning_rate": 1.6449105593442936e-05,
|
||
|
|
"loss": 0.1779,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19241458177566528,
|
||
|
|
"step": 2770,
|
||
|
|
"valid_targets_mean": 4597.6,
|
||
|
|
"valid_targets_min": 855
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.217325227963526,
|
||
|
|
"grad_norm": 0.4733160030781719,
|
||
|
|
"learning_rate": 1.6374543058780998e-05,
|
||
|
|
"loss": 0.1576,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16462013125419617,
|
||
|
|
"step": 2775,
|
||
|
|
"valid_targets_mean": 4405.4,
|
||
|
|
"valid_targets_min": 850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.224924012158055,
|
||
|
|
"grad_norm": 0.4815352707164652,
|
||
|
|
"learning_rate": 1.6300032589963436e-05,
|
||
|
|
"loss": 0.1669,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15910238027572632,
|
||
|
|
"step": 2780,
|
||
|
|
"valid_targets_mean": 4541.5,
|
||
|
|
"valid_targets_min": 909
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.232522796352583,
|
||
|
|
"grad_norm": 0.47250106872988024,
|
||
|
|
"learning_rate": 1.6225575257048622e-05,
|
||
|
|
"loss": 0.1587,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1517636775970459,
|
||
|
|
"step": 2785,
|
||
|
|
"valid_targets_mean": 4741.7,
|
||
|
|
"valid_targets_min": 1120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.240121580547113,
|
||
|
|
"grad_norm": 1.1194758985013034,
|
||
|
|
"learning_rate": 1.6151172129331786e-05,
|
||
|
|
"loss": 0.1722,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16665083169937134,
|
||
|
|
"step": 2790,
|
||
|
|
"valid_targets_mean": 4795.9,
|
||
|
|
"valid_targets_min": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.2477203647416415,
|
||
|
|
"grad_norm": 0.5081276624925015,
|
||
|
|
"learning_rate": 1.6076824275329758e-05,
|
||
|
|
"loss": 0.1719,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16445952653884888,
|
||
|
|
"step": 2795,
|
||
|
|
"valid_targets_mean": 4864.7,
|
||
|
|
"valid_targets_min": 1608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.25531914893617,
|
||
|
|
"grad_norm": 0.48328270331504203,
|
||
|
|
"learning_rate": 1.6002532762765524e-05,
|
||
|
|
"loss": 0.164,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17975090444087982,
|
||
|
|
"step": 2800,
|
||
|
|
"valid_targets_mean": 4240.7,
|
||
|
|
"valid_targets_min": 616
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.262917933130699,
|
||
|
|
"grad_norm": 0.46912231409179,
|
||
|
|
"learning_rate": 1.5928298658552988e-05,
|
||
|
|
"loss": 0.169,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1919006109237671,
|
||
|
|
"step": 2805,
|
||
|
|
"valid_targets_mean": 5055.8,
|
||
|
|
"valid_targets_min": 588
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.270516717325228,
|
||
|
|
"grad_norm": 0.6210976552983519,
|
||
|
|
"learning_rate": 1.5854123028781557e-05,
|
||
|
|
"loss": 0.1585,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1582549810409546,
|
||
|
|
"step": 2810,
|
||
|
|
"valid_targets_mean": 3868.8,
|
||
|
|
"valid_targets_min": 365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.278115501519757,
|
||
|
|
"grad_norm": 0.4072288493810071,
|
||
|
|
"learning_rate": 1.5780006938700917e-05,
|
||
|
|
"loss": 0.1584,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16181963682174683,
|
||
|
|
"step": 2815,
|
||
|
|
"valid_targets_mean": 5906.6,
|
||
|
|
"valid_targets_min": 1016
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.285714285714286,
|
||
|
|
"grad_norm": 0.45582906884120683,
|
||
|
|
"learning_rate": 1.5705951452705654e-05,
|
||
|
|
"loss": 0.1661,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14693260192871094,
|
||
|
|
"step": 2820,
|
||
|
|
"valid_targets_mean": 4234.7,
|
||
|
|
"valid_targets_min": 811
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.293313069908814,
|
||
|
|
"grad_norm": 0.5429695412336001,
|
||
|
|
"learning_rate": 1.5631957634320048e-05,
|
||
|
|
"loss": 0.1697,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1935248076915741,
|
||
|
|
"step": 2825,
|
||
|
|
"valid_targets_mean": 4832.9,
|
||
|
|
"valid_targets_min": 774
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.300911854103344,
|
||
|
|
"grad_norm": 0.46340922363130344,
|
||
|
|
"learning_rate": 1.555802654618272e-05,
|
||
|
|
"loss": 0.1622,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17274600267410278,
|
||
|
|
"step": 2830,
|
||
|
|
"valid_targets_mean": 5227.2,
|
||
|
|
"valid_targets_min": 933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.308510638297872,
|
||
|
|
"grad_norm": 0.8213856644150885,
|
||
|
|
"learning_rate": 1.5484159250031445e-05,
|
||
|
|
"loss": 0.1661,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17811016738414764,
|
||
|
|
"step": 2835,
|
||
|
|
"valid_targets_mean": 3817.1,
|
||
|
|
"valid_targets_min": 692
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.316109422492401,
|
||
|
|
"grad_norm": 0.4391910613617335,
|
||
|
|
"learning_rate": 1.541035680668785e-05,
|
||
|
|
"loss": 0.1592,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17826610803604126,
|
||
|
|
"step": 2840,
|
||
|
|
"valid_targets_mean": 5220.5,
|
||
|
|
"valid_targets_min": 571
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.32370820668693,
|
||
|
|
"grad_norm": 0.5400136046572052,
|
||
|
|
"learning_rate": 1.5336620276042193e-05,
|
||
|
|
"loss": 0.1738,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17823699116706848,
|
||
|
|
"step": 2845,
|
||
|
|
"valid_targets_mean": 4212.0,
|
||
|
|
"valid_targets_min": 156
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.331306990881459,
|
||
|
|
"grad_norm": 0.4689908498540122,
|
||
|
|
"learning_rate": 1.526295071703817e-05,
|
||
|
|
"loss": 0.1559,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14596086740493774,
|
||
|
|
"step": 2850,
|
||
|
|
"valid_targets_mean": 4071.1,
|
||
|
|
"valid_targets_min": 842
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.338905775075988,
|
||
|
|
"grad_norm": 0.43293471403933603,
|
||
|
|
"learning_rate": 1.5189349187657665e-05,
|
||
|
|
"loss": 0.1746,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1921626329421997,
|
||
|
|
"step": 2855,
|
||
|
|
"valid_targets_mean": 5745.9,
|
||
|
|
"valid_targets_min": 899
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.3465045592705165,
|
||
|
|
"grad_norm": 0.4470655930449328,
|
||
|
|
"learning_rate": 1.5115816744905596e-05,
|
||
|
|
"loss": 0.1616,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.135872483253479,
|
||
|
|
"step": 2860,
|
||
|
|
"valid_targets_mean": 4759.4,
|
||
|
|
"valid_targets_min": 872
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.354103343465045,
|
||
|
|
"grad_norm": 0.5712823358033652,
|
||
|
|
"learning_rate": 1.504235444479469e-05,
|
||
|
|
"loss": 0.1648,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20416559278964996,
|
||
|
|
"step": 2865,
|
||
|
|
"valid_targets_mean": 4337.3,
|
||
|
|
"valid_targets_min": 361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.361702127659575,
|
||
|
|
"grad_norm": 0.4182532739028534,
|
||
|
|
"learning_rate": 1.4968963342330369e-05,
|
||
|
|
"loss": 0.1715,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17232292890548706,
|
||
|
|
"step": 2870,
|
||
|
|
"valid_targets_mean": 5397.1,
|
||
|
|
"valid_targets_min": 1864
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.369300911854103,
|
||
|
|
"grad_norm": 0.4274336280548651,
|
||
|
|
"learning_rate": 1.4895644491495547e-05,
|
||
|
|
"loss": 0.1657,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17278841137886047,
|
||
|
|
"step": 2875,
|
||
|
|
"valid_targets_mean": 5085.6,
|
||
|
|
"valid_targets_min": 758
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.376899696048632,
|
||
|
|
"grad_norm": 0.4542314839910377,
|
||
|
|
"learning_rate": 1.4822398945235545e-05,
|
||
|
|
"loss": 0.1539,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15121108293533325,
|
||
|
|
"step": 2880,
|
||
|
|
"valid_targets_mean": 4447.8,
|
||
|
|
"valid_targets_min": 422
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.3844984802431615,
|
||
|
|
"grad_norm": 0.4154772615137915,
|
||
|
|
"learning_rate": 1.4749227755442927e-05,
|
||
|
|
"loss": 0.1761,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1821514070034027,
|
||
|
|
"step": 2885,
|
||
|
|
"valid_targets_mean": 5593.2,
|
||
|
|
"valid_targets_min": 2877
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.39209726443769,
|
||
|
|
"grad_norm": 0.4559773456464244,
|
||
|
|
"learning_rate": 1.4676131972942416e-05,
|
||
|
|
"loss": 0.1633,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16653896868228912,
|
||
|
|
"step": 2890,
|
||
|
|
"valid_targets_mean": 4738.8,
|
||
|
|
"valid_targets_min": 920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.399696048632219,
|
||
|
|
"grad_norm": 0.41739506385968844,
|
||
|
|
"learning_rate": 1.4603112647475795e-05,
|
||
|
|
"loss": 0.1681,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15362367033958435,
|
||
|
|
"step": 2895,
|
||
|
|
"valid_targets_mean": 5320.4,
|
||
|
|
"valid_targets_min": 552
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.407294832826747,
|
||
|
|
"grad_norm": 0.3845071167642644,
|
||
|
|
"learning_rate": 1.4530170827686831e-05,
|
||
|
|
"loss": 0.1633,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13495799899101257,
|
||
|
|
"step": 2900,
|
||
|
|
"valid_targets_mean": 5222.8,
|
||
|
|
"valid_targets_min": 583
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.414893617021277,
|
||
|
|
"grad_norm": 0.44390729359629055,
|
||
|
|
"learning_rate": 1.4457307561106226e-05,
|
||
|
|
"loss": 0.1618,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15813395380973816,
|
||
|
|
"step": 2905,
|
||
|
|
"valid_targets_mean": 4969.7,
|
||
|
|
"valid_targets_min": 776
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.422492401215806,
|
||
|
|
"grad_norm": 0.5018673994147254,
|
||
|
|
"learning_rate": 1.438452389413656e-05,
|
||
|
|
"loss": 0.174,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15857061743736267,
|
||
|
|
"step": 2910,
|
||
|
|
"valid_targets_mean": 3932.9,
|
||
|
|
"valid_targets_min": 633
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.430091185410334,
|
||
|
|
"grad_norm": 0.4806146977648294,
|
||
|
|
"learning_rate": 1.4311820872037264e-05,
|
||
|
|
"loss": 0.1684,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1731853187084198,
|
||
|
|
"step": 2915,
|
||
|
|
"valid_targets_mean": 4577.8,
|
||
|
|
"valid_targets_min": 328
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.437689969604863,
|
||
|
|
"grad_norm": 0.4617241179372537,
|
||
|
|
"learning_rate": 1.423919953890963e-05,
|
||
|
|
"loss": 0.1666,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18794915080070496,
|
||
|
|
"step": 2920,
|
||
|
|
"valid_targets_mean": 5022.4,
|
||
|
|
"valid_targets_min": 629
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.445288753799392,
|
||
|
|
"grad_norm": 0.48939540976668194,
|
||
|
|
"learning_rate": 1.4166660937681771e-05,
|
||
|
|
"loss": 0.1658,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17850425839424133,
|
||
|
|
"step": 2925,
|
||
|
|
"valid_targets_mean": 4809.8,
|
||
|
|
"valid_targets_min": 409
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.452887537993921,
|
||
|
|
"grad_norm": 0.4842928277510236,
|
||
|
|
"learning_rate": 1.4094206110093712e-05,
|
||
|
|
"loss": 0.1736,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1715683937072754,
|
||
|
|
"step": 2930,
|
||
|
|
"valid_targets_mean": 4601.0,
|
||
|
|
"valid_targets_min": 1634
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.46048632218845,
|
||
|
|
"grad_norm": 0.4074913023031158,
|
||
|
|
"learning_rate": 1.4021836096682343e-05,
|
||
|
|
"loss": 0.1575,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1516132950782776,
|
||
|
|
"step": 2935,
|
||
|
|
"valid_targets_mean": 5510.4,
|
||
|
|
"valid_targets_min": 778
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.468085106382979,
|
||
|
|
"grad_norm": 0.42946649016106,
|
||
|
|
"learning_rate": 1.394955193676657e-05,
|
||
|
|
"loss": 0.1594,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16095909476280212,
|
||
|
|
"step": 2940,
|
||
|
|
"valid_targets_mean": 5310.4,
|
||
|
|
"valid_targets_min": 1020
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.475683890577508,
|
||
|
|
"grad_norm": 0.4385638298379317,
|
||
|
|
"learning_rate": 1.3877354668432297e-05,
|
||
|
|
"loss": 0.1606,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15726953744888306,
|
||
|
|
"step": 2945,
|
||
|
|
"valid_targets_mean": 4561.4,
|
||
|
|
"valid_targets_min": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.4832826747720365,
|
||
|
|
"grad_norm": 0.5193379752723524,
|
||
|
|
"learning_rate": 1.38052453285176e-05,
|
||
|
|
"loss": 0.1845,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16541394591331482,
|
||
|
|
"step": 2950,
|
||
|
|
"valid_targets_mean": 3761.6,
|
||
|
|
"valid_targets_min": 561
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.490881458966565,
|
||
|
|
"grad_norm": 0.4490166063535176,
|
||
|
|
"learning_rate": 1.3733224952597764e-05,
|
||
|
|
"loss": 0.1583,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1475997269153595,
|
||
|
|
"step": 2955,
|
||
|
|
"valid_targets_mean": 4226.3,
|
||
|
|
"valid_targets_min": 666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.498480243161095,
|
||
|
|
"grad_norm": 0.5152090350550662,
|
||
|
|
"learning_rate": 1.3661294574970485e-05,
|
||
|
|
"loss": 0.1717,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18321773409843445,
|
||
|
|
"step": 2960,
|
||
|
|
"valid_targets_mean": 4961.2,
|
||
|
|
"valid_targets_min": 602
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.506079027355623,
|
||
|
|
"grad_norm": 0.43396945945061266,
|
||
|
|
"learning_rate": 1.3589455228640938e-05,
|
||
|
|
"loss": 0.1754,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17606866359710693,
|
||
|
|
"step": 2965,
|
||
|
|
"valid_targets_mean": 5181.6,
|
||
|
|
"valid_targets_min": 1460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.513677811550152,
|
||
|
|
"grad_norm": 0.4690277765034569,
|
||
|
|
"learning_rate": 1.3517707945307013e-05,
|
||
|
|
"loss": 0.1655,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18834683299064636,
|
||
|
|
"step": 2970,
|
||
|
|
"valid_targets_mean": 4646.1,
|
||
|
|
"valid_targets_min": 953
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5212765957446805,
|
||
|
|
"grad_norm": 0.5066167184858231,
|
||
|
|
"learning_rate": 1.3446053755344439e-05,
|
||
|
|
"loss": 0.172,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15756574273109436,
|
||
|
|
"step": 2975,
|
||
|
|
"valid_targets_mean": 4516.5,
|
||
|
|
"valid_targets_min": 844
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.52887537993921,
|
||
|
|
"grad_norm": 0.5134300889609936,
|
||
|
|
"learning_rate": 1.3374493687792045e-05,
|
||
|
|
"loss": 0.1675,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19803106784820557,
|
||
|
|
"step": 2980,
|
||
|
|
"valid_targets_mean": 4092.7,
|
||
|
|
"valid_targets_min": 719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.536474164133739,
|
||
|
|
"grad_norm": 0.4463665768466012,
|
||
|
|
"learning_rate": 1.3303028770336914e-05,
|
||
|
|
"loss": 0.1685,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1606081873178482,
|
||
|
|
"step": 2985,
|
||
|
|
"valid_targets_mean": 4620.2,
|
||
|
|
"valid_targets_min": 714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.544072948328267,
|
||
|
|
"grad_norm": 0.4159202471735973,
|
||
|
|
"learning_rate": 1.3231660029299703e-05,
|
||
|
|
"loss": 0.1536,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16770178079605103,
|
||
|
|
"step": 2990,
|
||
|
|
"valid_targets_mean": 5974.0,
|
||
|
|
"valid_targets_min": 1221
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.551671732522796,
|
||
|
|
"grad_norm": 0.4174566017417907,
|
||
|
|
"learning_rate": 1.316038848961982e-05,
|
||
|
|
"loss": 0.1715,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16645801067352295,
|
||
|
|
"step": 2995,
|
||
|
|
"valid_targets_mean": 5945.2,
|
||
|
|
"valid_targets_min": 1900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5592705167173255,
|
||
|
|
"grad_norm": 0.41700946512428255,
|
||
|
|
"learning_rate": 1.3089215174840783e-05,
|
||
|
|
"loss": 0.1569,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15380007028579712,
|
||
|
|
"step": 3000,
|
||
|
|
"valid_targets_mean": 5489.1,
|
||
|
|
"valid_targets_min": 894
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.566869300911854,
|
||
|
|
"grad_norm": 0.5304865164006446,
|
||
|
|
"learning_rate": 1.3018141107095455e-05,
|
||
|
|
"loss": 0.1625,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16447263956069946,
|
||
|
|
"step": 3005,
|
||
|
|
"valid_targets_mean": 3494.4,
|
||
|
|
"valid_targets_min": 491
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.574468085106383,
|
||
|
|
"grad_norm": 0.48520946915581575,
|
||
|
|
"learning_rate": 1.2947167307091424e-05,
|
||
|
|
"loss": 0.1534,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15445218980312347,
|
||
|
|
"step": 3010,
|
||
|
|
"valid_targets_mean": 4516.8,
|
||
|
|
"valid_targets_min": 664
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5820668693009114,
|
||
|
|
"grad_norm": 0.47660891700948743,
|
||
|
|
"learning_rate": 1.287629479409628e-05,
|
||
|
|
"loss": 0.154,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15592724084854126,
|
||
|
|
"step": 3015,
|
||
|
|
"valid_targets_mean": 4057.2,
|
||
|
|
"valid_targets_min": 905
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.589665653495441,
|
||
|
|
"grad_norm": 0.41867433836985307,
|
||
|
|
"learning_rate": 1.2805524585923048e-05,
|
||
|
|
"loss": 0.1577,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1499272882938385,
|
||
|
|
"step": 3020,
|
||
|
|
"valid_targets_mean": 4869.1,
|
||
|
|
"valid_targets_min": 501
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.59726443768997,
|
||
|
|
"grad_norm": 0.40048802671676303,
|
||
|
|
"learning_rate": 1.2734857698915502e-05,
|
||
|
|
"loss": 0.1615,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13745658099651337,
|
||
|
|
"step": 3025,
|
||
|
|
"valid_targets_mean": 5079.4,
|
||
|
|
"valid_targets_min": 1254
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.604863221884498,
|
||
|
|
"grad_norm": 0.4149531859078102,
|
||
|
|
"learning_rate": 1.266429514793363e-05,
|
||
|
|
"loss": 0.1555,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15537427365779877,
|
||
|
|
"step": 3030,
|
||
|
|
"valid_targets_mean": 5211.1,
|
||
|
|
"valid_targets_min": 861
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.612462006079028,
|
||
|
|
"grad_norm": 0.6490039513363777,
|
||
|
|
"learning_rate": 1.2593837946339008e-05,
|
||
|
|
"loss": 0.1618,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18460866808891296,
|
||
|
|
"step": 3035,
|
||
|
|
"valid_targets_mean": 4625.2,
|
||
|
|
"valid_targets_min": 918
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.620060790273556,
|
||
|
|
"grad_norm": 0.4992835453594247,
|
||
|
|
"learning_rate": 1.252348710598029e-05,
|
||
|
|
"loss": 0.1736,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.167076975107193,
|
||
|
|
"step": 3040,
|
||
|
|
"valid_targets_mean": 4100.6,
|
||
|
|
"valid_targets_min": 432
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.627659574468085,
|
||
|
|
"grad_norm": 0.4615786144106656,
|
||
|
|
"learning_rate": 1.245324363717864e-05,
|
||
|
|
"loss": 0.169,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16426023840904236,
|
||
|
|
"step": 3045,
|
||
|
|
"valid_targets_mean": 4565.8,
|
||
|
|
"valid_targets_min": 828
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.635258358662614,
|
||
|
|
"grad_norm": 0.41965629757500533,
|
||
|
|
"learning_rate": 1.2383108548713254e-05,
|
||
|
|
"loss": 0.1669,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1467415690422058,
|
||
|
|
"step": 3050,
|
||
|
|
"valid_targets_mean": 5088.9,
|
||
|
|
"valid_targets_min": 1159
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.642857142857143,
|
||
|
|
"grad_norm": 0.4441828730212559,
|
||
|
|
"learning_rate": 1.2313082847806852e-05,
|
||
|
|
"loss": 0.1776,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1823854148387909,
|
||
|
|
"step": 3055,
|
||
|
|
"valid_targets_mean": 4715.0,
|
||
|
|
"valid_targets_min": 741
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.650455927051672,
|
||
|
|
"grad_norm": 0.459237281832204,
|
||
|
|
"learning_rate": 1.2243167540111216e-05,
|
||
|
|
"loss": 0.1831,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14467957615852356,
|
||
|
|
"step": 3060,
|
||
|
|
"valid_targets_mean": 3859.3,
|
||
|
|
"valid_targets_min": 673
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.6580547112462005,
|
||
|
|
"grad_norm": 0.4946697227727552,
|
||
|
|
"learning_rate": 1.2173363629692756e-05,
|
||
|
|
"loss": 0.1732,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19268517196178436,
|
||
|
|
"step": 3065,
|
||
|
|
"valid_targets_mean": 3953.9,
|
||
|
|
"valid_targets_min": 931
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.665653495440729,
|
||
|
|
"grad_norm": 0.43385480300907053,
|
||
|
|
"learning_rate": 1.2103672119018086e-05,
|
||
|
|
"loss": 0.1481,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15391963720321655,
|
||
|
|
"step": 3070,
|
||
|
|
"valid_targets_mean": 4950.1,
|
||
|
|
"valid_targets_min": 1644
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.673252279635259,
|
||
|
|
"grad_norm": 0.47954284812840575,
|
||
|
|
"learning_rate": 1.2034094008939624e-05,
|
||
|
|
"loss": 0.1781,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19548840820789337,
|
||
|
|
"step": 3075,
|
||
|
|
"valid_targets_mean": 5021.8,
|
||
|
|
"valid_targets_min": 774
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.680851063829787,
|
||
|
|
"grad_norm": 0.4636621474235311,
|
||
|
|
"learning_rate": 1.1964630298681221e-05,
|
||
|
|
"loss": 0.1679,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14948098361492157,
|
||
|
|
"step": 3080,
|
||
|
|
"valid_targets_mean": 3846.4,
|
||
|
|
"valid_targets_min": 940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.688449848024316,
|
||
|
|
"grad_norm": 1.0079316283200346,
|
||
|
|
"learning_rate": 1.1895281985823815e-05,
|
||
|
|
"loss": 0.1756,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1952032595872879,
|
||
|
|
"step": 3085,
|
||
|
|
"valid_targets_mean": 3557.1,
|
||
|
|
"valid_targets_min": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.696048632218845,
|
||
|
|
"grad_norm": 0.4689910480215768,
|
||
|
|
"learning_rate": 1.1826050066291097e-05,
|
||
|
|
"loss": 0.1685,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16652071475982666,
|
||
|
|
"step": 3090,
|
||
|
|
"valid_targets_mean": 4480.4,
|
||
|
|
"valid_targets_min": 728
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.703647416413374,
|
||
|
|
"grad_norm": 0.451296162182404,
|
||
|
|
"learning_rate": 1.1756935534335212e-05,
|
||
|
|
"loss": 0.1625,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13410750031471252,
|
||
|
|
"step": 3095,
|
||
|
|
"valid_targets_mean": 4405.3,
|
||
|
|
"valid_targets_min": 1325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.711246200607903,
|
||
|
|
"grad_norm": 0.42448672661829445,
|
||
|
|
"learning_rate": 1.1687939382522493e-05,
|
||
|
|
"loss": 0.1619,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1605660319328308,
|
||
|
|
"step": 3100,
|
||
|
|
"valid_targets_mean": 4885.4,
|
||
|
|
"valid_targets_min": 1135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.718844984802431,
|
||
|
|
"grad_norm": 0.4707749031713044,
|
||
|
|
"learning_rate": 1.1619062601719171e-05,
|
||
|
|
"loss": 0.1609,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1604931652545929,
|
||
|
|
"step": 3105,
|
||
|
|
"valid_targets_mean": 4170.2,
|
||
|
|
"valid_targets_min": 857
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.72644376899696,
|
||
|
|
"grad_norm": 0.4670653605297488,
|
||
|
|
"learning_rate": 1.15503061810772e-05,
|
||
|
|
"loss": 0.1665,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16604188084602356,
|
||
|
|
"step": 3110,
|
||
|
|
"valid_targets_mean": 5626.7,
|
||
|
|
"valid_targets_min": 414
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.73404255319149,
|
||
|
|
"grad_norm": 0.4609235488117921,
|
||
|
|
"learning_rate": 1.1481671108019984e-05,
|
||
|
|
"loss": 0.1752,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23836404085159302,
|
||
|
|
"step": 3115,
|
||
|
|
"valid_targets_mean": 5899.8,
|
||
|
|
"valid_targets_min": 543
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.741641337386018,
|
||
|
|
"grad_norm": 0.46042215360634425,
|
||
|
|
"learning_rate": 1.1413158368228249e-05,
|
||
|
|
"loss": 0.1571,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.163685142993927,
|
||
|
|
"step": 3120,
|
||
|
|
"valid_targets_mean": 4402.4,
|
||
|
|
"valid_targets_min": 512
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.749240121580547,
|
||
|
|
"grad_norm": 0.5338506865095787,
|
||
|
|
"learning_rate": 1.1344768945625884e-05,
|
||
|
|
"loss": 0.1723,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1888251006603241,
|
||
|
|
"step": 3125,
|
||
|
|
"valid_targets_mean": 4155.4,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.756838905775076,
|
||
|
|
"grad_norm": 0.40137648604950027,
|
||
|
|
"learning_rate": 1.127650382236578e-05,
|
||
|
|
"loss": 0.1618,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14810852706432343,
|
||
|
|
"step": 3130,
|
||
|
|
"valid_targets_mean": 5314.3,
|
||
|
|
"valid_targets_min": 354
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.764437689969605,
|
||
|
|
"grad_norm": 0.5240009462617735,
|
||
|
|
"learning_rate": 1.1208363978815746e-05,
|
||
|
|
"loss": 0.1679,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18549123406410217,
|
||
|
|
"step": 3135,
|
||
|
|
"valid_targets_mean": 4340.2,
|
||
|
|
"valid_targets_min": 593
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.772036474164134,
|
||
|
|
"grad_norm": 0.43622202592017323,
|
||
|
|
"learning_rate": 1.1140350393544422e-05,
|
||
|
|
"loss": 0.1593,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15057966113090515,
|
||
|
|
"step": 3140,
|
||
|
|
"valid_targets_mean": 4826.1,
|
||
|
|
"valid_targets_min": 794
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.779635258358662,
|
||
|
|
"grad_norm": 0.46748847050986225,
|
||
|
|
"learning_rate": 1.1072464043307259e-05,
|
||
|
|
"loss": 0.1647,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20890364050865173,
|
||
|
|
"step": 3145,
|
||
|
|
"valid_targets_mean": 4810.9,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.787234042553192,
|
||
|
|
"grad_norm": 0.4709284580160048,
|
||
|
|
"learning_rate": 1.1004705903032406e-05,
|
||
|
|
"loss": 0.1661,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17572824656963348,
|
||
|
|
"step": 3150,
|
||
|
|
"valid_targets_mean": 4275.6,
|
||
|
|
"valid_targets_min": 787
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.7948328267477205,
|
||
|
|
"grad_norm": 0.4947193520586711,
|
||
|
|
"learning_rate": 1.0937076945806837e-05,
|
||
|
|
"loss": 0.1567,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1653721183538437,
|
||
|
|
"step": 3155,
|
||
|
|
"valid_targets_mean": 3996.0,
|
||
|
|
"valid_targets_min": 608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.802431610942249,
|
||
|
|
"grad_norm": 0.95424384071522,
|
||
|
|
"learning_rate": 1.0869578142862228e-05,
|
||
|
|
"loss": 0.1561,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15056976675987244,
|
||
|
|
"step": 3160,
|
||
|
|
"valid_targets_mean": 5264.1,
|
||
|
|
"valid_targets_min": 911
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.810030395136778,
|
||
|
|
"grad_norm": 0.436658942790459,
|
||
|
|
"learning_rate": 1.0802210463561166e-05,
|
||
|
|
"loss": 0.1709,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15242451429367065,
|
||
|
|
"step": 3165,
|
||
|
|
"valid_targets_mean": 4430.6,
|
||
|
|
"valid_targets_min": 1307
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.817629179331307,
|
||
|
|
"grad_norm": 0.4940935042193495,
|
||
|
|
"learning_rate": 1.0734974875383066e-05,
|
||
|
|
"loss": 0.1646,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15941178798675537,
|
||
|
|
"step": 3170,
|
||
|
|
"valid_targets_mean": 3658.3,
|
||
|
|
"valid_targets_min": 640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.825227963525836,
|
||
|
|
"grad_norm": 0.5857976461421992,
|
||
|
|
"learning_rate": 1.0667872343910432e-05,
|
||
|
|
"loss": 0.1681,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16998814046382904,
|
||
|
|
"step": 3175,
|
||
|
|
"valid_targets_mean": 4214.1,
|
||
|
|
"valid_targets_min": 619
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.832826747720365,
|
||
|
|
"grad_norm": 0.447470023536544,
|
||
|
|
"learning_rate": 1.0600903832814856e-05,
|
||
|
|
"loss": 0.1525,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16528019309043884,
|
||
|
|
"step": 3180,
|
||
|
|
"valid_targets_mean": 4406.2,
|
||
|
|
"valid_targets_min": 774
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.840425531914894,
|
||
|
|
"grad_norm": 0.45582176893752013,
|
||
|
|
"learning_rate": 1.0534070303843294e-05,
|
||
|
|
"loss": 0.1619,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16366976499557495,
|
||
|
|
"step": 3185,
|
||
|
|
"valid_targets_mean": 4874.4,
|
||
|
|
"valid_targets_min": 531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.848024316109423,
|
||
|
|
"grad_norm": 0.491211928315914,
|
||
|
|
"learning_rate": 1.0467372716804141e-05,
|
||
|
|
"loss": 0.18,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18734398484230042,
|
||
|
|
"step": 3190,
|
||
|
|
"valid_targets_mean": 4573.7,
|
||
|
|
"valid_targets_min": 655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.855623100303951,
|
||
|
|
"grad_norm": 0.5889214979063572,
|
||
|
|
"learning_rate": 1.0400812029553569e-05,
|
||
|
|
"loss": 0.1707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18033604323863983,
|
||
|
|
"step": 3195,
|
||
|
|
"valid_targets_mean": 3925.6,
|
||
|
|
"valid_targets_min": 545
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.86322188449848,
|
||
|
|
"grad_norm": 0.4334778399672226,
|
||
|
|
"learning_rate": 1.0334389197981638e-05,
|
||
|
|
"loss": 0.1741,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14970465004444122,
|
||
|
|
"step": 3200,
|
||
|
|
"valid_targets_mean": 4557.2,
|
||
|
|
"valid_targets_min": 736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.870820668693009,
|
||
|
|
"grad_norm": 0.5178203832731765,
|
||
|
|
"learning_rate": 1.0268105175998713e-05,
|
||
|
|
"loss": 0.1667,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16531500220298767,
|
||
|
|
"step": 3205,
|
||
|
|
"valid_targets_mean": 3934.7,
|
||
|
|
"valid_targets_min": 616
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.878419452887538,
|
||
|
|
"grad_norm": 0.4677336984690735,
|
||
|
|
"learning_rate": 1.0201960915521614e-05,
|
||
|
|
"loss": 0.1832,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15597450733184814,
|
||
|
|
"step": 3210,
|
||
|
|
"valid_targets_mean": 4830.2,
|
||
|
|
"valid_targets_min": 631
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.886018237082067,
|
||
|
|
"grad_norm": 0.4582200792232965,
|
||
|
|
"learning_rate": 1.0135957366460087e-05,
|
||
|
|
"loss": 0.1608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14683008193969727,
|
||
|
|
"step": 3215,
|
||
|
|
"valid_targets_mean": 4210.8,
|
||
|
|
"valid_targets_min": 1151
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.8936170212765955,
|
||
|
|
"grad_norm": 2.6017818537468114,
|
||
|
|
"learning_rate": 1.0070095476703036e-05,
|
||
|
|
"loss": 0.1694,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17961879074573517,
|
||
|
|
"step": 3220,
|
||
|
|
"valid_targets_mean": 4255.6,
|
||
|
|
"valid_targets_min": 544
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.901215805471125,
|
||
|
|
"grad_norm": 0.48429935318886524,
|
||
|
|
"learning_rate": 1.0004376192105032e-05,
|
||
|
|
"loss": 0.1751,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1917518824338913,
|
||
|
|
"step": 3225,
|
||
|
|
"valid_targets_mean": 4164.9,
|
||
|
|
"valid_targets_min": 683
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.908814589665654,
|
||
|
|
"grad_norm": 0.408755975822661,
|
||
|
|
"learning_rate": 9.938800456472603e-06,
|
||
|
|
"loss": 0.1578,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15598323941230774,
|
||
|
|
"step": 3230,
|
||
|
|
"valid_targets_mean": 5235.6,
|
||
|
|
"valid_targets_min": 673
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.916413373860182,
|
||
|
|
"grad_norm": 0.5687286004991109,
|
||
|
|
"learning_rate": 9.87336921155081e-06,
|
||
|
|
"loss": 0.1596,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1737002432346344,
|
||
|
|
"step": 3235,
|
||
|
|
"valid_targets_mean": 4874.2,
|
||
|
|
"valid_targets_min": 648
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.924012158054711,
|
||
|
|
"grad_norm": 0.4449749826597429,
|
||
|
|
"learning_rate": 9.80808339700959e-06,
|
||
|
|
"loss": 0.1652,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18869557976722717,
|
||
|
|
"step": 3240,
|
||
|
|
"valid_targets_mean": 5077.7,
|
||
|
|
"valid_targets_min": 778
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.9316109422492405,
|
||
|
|
"grad_norm": 0.43592967879817196,
|
||
|
|
"learning_rate": 9.74294395043039e-06,
|
||
|
|
"loss": 0.1634,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16350185871124268,
|
||
|
|
"step": 3245,
|
||
|
|
"valid_targets_mean": 4365.2,
|
||
|
|
"valid_targets_min": 579
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.939209726443769,
|
||
|
|
"grad_norm": 0.44671726836927617,
|
||
|
|
"learning_rate": 9.677951807292584e-06,
|
||
|
|
"loss": 0.1554,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1710178405046463,
|
||
|
|
"step": 3250,
|
||
|
|
"valid_targets_mean": 4973.2,
|
||
|
|
"valid_targets_min": 456
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.946808510638298,
|
||
|
|
"grad_norm": 0.4273982901555888,
|
||
|
|
"learning_rate": 9.61310790096015e-06,
|
||
|
|
"loss": 0.1588,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13666585087776184,
|
||
|
|
"step": 3255,
|
||
|
|
"valid_targets_mean": 4339.3,
|
||
|
|
"valid_targets_min": 970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.954407294832826,
|
||
|
|
"grad_norm": 0.4111092281316594,
|
||
|
|
"learning_rate": 9.548413162668155e-06,
|
||
|
|
"loss": 0.1578,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13993242383003235,
|
||
|
|
"step": 3260,
|
||
|
|
"valid_targets_mean": 5151.4,
|
||
|
|
"valid_targets_min": 1606
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.962006079027356,
|
||
|
|
"grad_norm": 0.4678973949276577,
|
||
|
|
"learning_rate": 9.483868521509492e-06,
|
||
|
|
"loss": 0.1565,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17497044801712036,
|
||
|
|
"step": 3265,
|
||
|
|
"valid_targets_mean": 4569.8,
|
||
|
|
"valid_targets_min": 507
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.9696048632218845,
|
||
|
|
"grad_norm": 0.48923608505182986,
|
||
|
|
"learning_rate": 9.419474904421422e-06,
|
||
|
|
"loss": 0.161,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17405647039413452,
|
||
|
|
"step": 3270,
|
||
|
|
"valid_targets_mean": 4378.5,
|
||
|
|
"valid_targets_min": 646
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.977203647416413,
|
||
|
|
"grad_norm": 0.42080134203036884,
|
||
|
|
"learning_rate": 9.355233236172381e-06,
|
||
|
|
"loss": 0.1507,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14896854758262634,
|
||
|
|
"step": 3275,
|
||
|
|
"valid_targets_mean": 4978.7,
|
||
|
|
"valid_targets_min": 1936
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.984802431610943,
|
||
|
|
"grad_norm": 0.45247817713087146,
|
||
|
|
"learning_rate": 9.2911444393486e-06,
|
||
|
|
"loss": 0.1697,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14354351162910461,
|
||
|
|
"step": 3280,
|
||
|
|
"valid_targets_mean": 5089.9,
|
||
|
|
"valid_targets_min": 754
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.992401215805471,
|
||
|
|
"grad_norm": 0.4892191547497894,
|
||
|
|
"learning_rate": 9.227209434340914e-06,
|
||
|
|
"loss": 0.1628,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1533418595790863,
|
||
|
|
"step": 3285,
|
||
|
|
"valid_targets_mean": 4154.1,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"grad_norm": 0.4367103289579237,
|
||
|
|
"learning_rate": 9.163429139331516e-06,
|
||
|
|
"loss": 0.1656,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1589938849210739,
|
||
|
|
"step": 3290,
|
||
|
|
"valid_targets_mean": 4928.7,
|
||
|
|
"valid_targets_min": 567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.007598784194529,
|
||
|
|
"grad_norm": 0.5506166462705645,
|
||
|
|
"learning_rate": 9.099804470280791e-06,
|
||
|
|
"loss": 0.1619,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1861366331577301,
|
||
|
|
"step": 3295,
|
||
|
|
"valid_targets_mean": 3553.4,
|
||
|
|
"valid_targets_min": 519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.015197568389058,
|
||
|
|
"grad_norm": 0.44365514163643444,
|
||
|
|
"learning_rate": 9.036336340914138e-06,
|
||
|
|
"loss": 0.1662,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18212465941905975,
|
||
|
|
"step": 3300,
|
||
|
|
"valid_targets_mean": 4840.4,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.022796352583587,
|
||
|
|
"grad_norm": 0.5002686598117616,
|
||
|
|
"learning_rate": 8.973025662708875e-06,
|
||
|
|
"loss": 0.1501,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16633297502994537,
|
||
|
|
"step": 3305,
|
||
|
|
"valid_targets_mean": 5309.5,
|
||
|
|
"valid_targets_min": 834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0303951367781155,
|
||
|
|
"grad_norm": 0.5141187748402867,
|
||
|
|
"learning_rate": 8.909873344881125e-06,
|
||
|
|
"loss": 0.1566,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1726076900959015,
|
||
|
|
"step": 3310,
|
||
|
|
"valid_targets_mean": 4149.5,
|
||
|
|
"valid_targets_min": 1217
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.037993920972644,
|
||
|
|
"grad_norm": 0.42838999609845296,
|
||
|
|
"learning_rate": 8.846880294372777e-06,
|
||
|
|
"loss": 0.1601,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1435108482837677,
|
||
|
|
"step": 3315,
|
||
|
|
"valid_targets_mean": 5703.1,
|
||
|
|
"valid_targets_min": 781
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.045592705167174,
|
||
|
|
"grad_norm": 0.4990807265797361,
|
||
|
|
"learning_rate": 8.784047415838446e-06,
|
||
|
|
"loss": 0.1636,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15767478942871094,
|
||
|
|
"step": 3320,
|
||
|
|
"valid_targets_mean": 4732.1,
|
||
|
|
"valid_targets_min": 998
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.053191489361702,
|
||
|
|
"grad_norm": 0.4244905713932716,
|
||
|
|
"learning_rate": 8.721375611632494e-06,
|
||
|
|
"loss": 0.1497,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15482358634471893,
|
||
|
|
"step": 3325,
|
||
|
|
"valid_targets_mean": 5283.4,
|
||
|
|
"valid_targets_min": 977
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.060790273556231,
|
||
|
|
"grad_norm": 0.5268949634896493,
|
||
|
|
"learning_rate": 8.658865781796059e-06,
|
||
|
|
"loss": 0.1534,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16202573478221893,
|
||
|
|
"step": 3330,
|
||
|
|
"valid_targets_mean": 4009.4,
|
||
|
|
"valid_targets_min": 736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0683890577507595,
|
||
|
|
"grad_norm": 0.5222959380611226,
|
||
|
|
"learning_rate": 8.596518824044145e-06,
|
||
|
|
"loss": 0.1628,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1700512319803238,
|
||
|
|
"step": 3335,
|
||
|
|
"valid_targets_mean": 4815.2,
|
||
|
|
"valid_targets_min": 1896
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.075987841945289,
|
||
|
|
"grad_norm": 0.48817810574292153,
|
||
|
|
"learning_rate": 8.53433563375271e-06,
|
||
|
|
"loss": 0.1583,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15692004561424255,
|
||
|
|
"step": 3340,
|
||
|
|
"valid_targets_mean": 5081.4,
|
||
|
|
"valid_targets_min": 604
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.083586626139818,
|
||
|
|
"grad_norm": 0.5145519787891805,
|
||
|
|
"learning_rate": 8.472317103945827e-06,
|
||
|
|
"loss": 0.1554,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15433713793754578,
|
||
|
|
"step": 3345,
|
||
|
|
"valid_targets_mean": 4923.5,
|
||
|
|
"valid_targets_min": 664
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.091185410334346,
|
||
|
|
"grad_norm": 0.49860669438741656,
|
||
|
|
"learning_rate": 8.410464125282842e-06,
|
||
|
|
"loss": 0.1597,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18030579388141632,
|
||
|
|
"step": 3350,
|
||
|
|
"valid_targets_mean": 4054.4,
|
||
|
|
"valid_targets_min": 849
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.098784194528875,
|
||
|
|
"grad_norm": 0.4977104637629043,
|
||
|
|
"learning_rate": 8.348777586045599e-06,
|
||
|
|
"loss": 0.1591,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16232091188430786,
|
||
|
|
"step": 3355,
|
||
|
|
"valid_targets_mean": 4112.1,
|
||
|
|
"valid_targets_min": 342
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.1063829787234045,
|
||
|
|
"grad_norm": 0.40792452628529047,
|
||
|
|
"learning_rate": 8.287258372125666e-06,
|
||
|
|
"loss": 0.1496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14980870485305786,
|
||
|
|
"step": 3360,
|
||
|
|
"valid_targets_mean": 5441.6,
|
||
|
|
"valid_targets_min": 828
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.113981762917933,
|
||
|
|
"grad_norm": 0.48624828153735117,
|
||
|
|
"learning_rate": 8.22590736701163e-06,
|
||
|
|
"loss": 0.1599,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15524911880493164,
|
||
|
|
"step": 3365,
|
||
|
|
"valid_targets_mean": 4029.7,
|
||
|
|
"valid_targets_min": 527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.121580547112462,
|
||
|
|
"grad_norm": 0.41260477481757823,
|
||
|
|
"learning_rate": 8.164725451776396e-06,
|
||
|
|
"loss": 0.139,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11952009797096252,
|
||
|
|
"step": 3370,
|
||
|
|
"valid_targets_mean": 5127.4,
|
||
|
|
"valid_targets_min": 1259
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.129179331306991,
|
||
|
|
"grad_norm": 0.5121037283737604,
|
||
|
|
"learning_rate": 8.103713505064542e-06,
|
||
|
|
"loss": 0.147,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13970546424388885,
|
||
|
|
"step": 3375,
|
||
|
|
"valid_targets_mean": 5086.4,
|
||
|
|
"valid_targets_min": 447
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.13677811550152,
|
||
|
|
"grad_norm": 0.554785879889902,
|
||
|
|
"learning_rate": 8.042872403079695e-06,
|
||
|
|
"loss": 0.1718,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17848604917526245,
|
||
|
|
"step": 3380,
|
||
|
|
"valid_targets_mean": 4835.1,
|
||
|
|
"valid_targets_min": 229
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.144376899696049,
|
||
|
|
"grad_norm": 0.4543169082658653,
|
||
|
|
"learning_rate": 7.982203019571951e-06,
|
||
|
|
"loss": 0.1633,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15885761380195618,
|
||
|
|
"step": 3385,
|
||
|
|
"valid_targets_mean": 4715.2,
|
||
|
|
"valid_targets_min": 614
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.151975683890577,
|
||
|
|
"grad_norm": 0.49519076224489234,
|
||
|
|
"learning_rate": 7.921706225825323e-06,
|
||
|
|
"loss": 0.1553,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14728990197181702,
|
||
|
|
"step": 3390,
|
||
|
|
"valid_targets_mean": 4132.9,
|
||
|
|
"valid_targets_min": 705
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.159574468085107,
|
||
|
|
"grad_norm": 0.4692483655123735,
|
||
|
|
"learning_rate": 7.861382890645235e-06,
|
||
|
|
"loss": 0.1666,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1677330732345581,
|
||
|
|
"step": 3395,
|
||
|
|
"valid_targets_mean": 4542.6,
|
||
|
|
"valid_targets_min": 571
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.167173252279635,
|
||
|
|
"grad_norm": 0.4981198573213941,
|
||
|
|
"learning_rate": 7.801233880346044e-06,
|
||
|
|
"loss": 0.1537,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.152068629860878,
|
||
|
|
"step": 3400,
|
||
|
|
"valid_targets_mean": 4878.0,
|
||
|
|
"valid_targets_min": 1568
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.174772036474164,
|
||
|
|
"grad_norm": 0.496973792548942,
|
||
|
|
"learning_rate": 7.741260058738576e-06,
|
||
|
|
"loss": 0.1678,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1845749020576477,
|
||
|
|
"step": 3405,
|
||
|
|
"valid_targets_mean": 4564.4,
|
||
|
|
"valid_targets_min": 333
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.182370820668693,
|
||
|
|
"grad_norm": 0.4801400629594902,
|
||
|
|
"learning_rate": 7.681462287117769e-06,
|
||
|
|
"loss": 0.1599,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15920531749725342,
|
||
|
|
"step": 3410,
|
||
|
|
"valid_targets_mean": 4416.2,
|
||
|
|
"valid_targets_min": 618
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.189969604863222,
|
||
|
|
"grad_norm": 0.44717438624237565,
|
||
|
|
"learning_rate": 7.62184142425026e-06,
|
||
|
|
"loss": 0.1586,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14159642159938812,
|
||
|
|
"step": 3415,
|
||
|
|
"valid_targets_mean": 5296.9,
|
||
|
|
"valid_targets_min": 1679
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.197568389057751,
|
||
|
|
"grad_norm": 0.5694531326181947,
|
||
|
|
"learning_rate": 7.562398326362068e-06,
|
||
|
|
"loss": 0.1603,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1987701654434204,
|
||
|
|
"step": 3420,
|
||
|
|
"valid_targets_mean": 4228.2,
|
||
|
|
"valid_targets_min": 500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.2051671732522795,
|
||
|
|
"grad_norm": 0.4837511107077152,
|
||
|
|
"learning_rate": 7.503133847126298e-06,
|
||
|
|
"loss": 0.1446,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14547014236450195,
|
||
|
|
"step": 3425,
|
||
|
|
"valid_targets_mean": 4541.6,
|
||
|
|
"valid_targets_min": 537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.212765957446808,
|
||
|
|
"grad_norm": 0.46751291957082003,
|
||
|
|
"learning_rate": 7.444048837650879e-06,
|
||
|
|
"loss": 0.1547,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1482163369655609,
|
||
|
|
"step": 3430,
|
||
|
|
"valid_targets_mean": 4358.3,
|
||
|
|
"valid_targets_min": 464
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.220364741641338,
|
||
|
|
"grad_norm": 0.48046465195883326,
|
||
|
|
"learning_rate": 7.3851441464663455e-06,
|
||
|
|
"loss": 0.1505,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15912845730781555,
|
||
|
|
"step": 3435,
|
||
|
|
"valid_targets_mean": 4258.4,
|
||
|
|
"valid_targets_min": 1734
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.227963525835866,
|
||
|
|
"grad_norm": 0.5201595463216373,
|
||
|
|
"learning_rate": 7.326420619513645e-06,
|
||
|
|
"loss": 0.144,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14452405273914337,
|
||
|
|
"step": 3440,
|
||
|
|
"valid_targets_mean": 4945.8,
|
||
|
|
"valid_targets_min": 2273
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.235562310030395,
|
||
|
|
"grad_norm": 0.4662188330800987,
|
||
|
|
"learning_rate": 7.267879100131996e-06,
|
||
|
|
"loss": 0.1601,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16573692858219147,
|
||
|
|
"step": 3445,
|
||
|
|
"valid_targets_mean": 5350.9,
|
||
|
|
"valid_targets_min": 1272
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.243161094224924,
|
||
|
|
"grad_norm": 0.5445679437490333,
|
||
|
|
"learning_rate": 7.209520429046768e-06,
|
||
|
|
"loss": 0.1508,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14232224225997925,
|
||
|
|
"step": 3450,
|
||
|
|
"valid_targets_mean": 3531.5,
|
||
|
|
"valid_targets_min": 756
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.250759878419453,
|
||
|
|
"grad_norm": 0.48341944333718034,
|
||
|
|
"learning_rate": 7.151345444357418e-06,
|
||
|
|
"loss": 0.1778,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15717411041259766,
|
||
|
|
"step": 3455,
|
||
|
|
"valid_targets_mean": 4357.9,
|
||
|
|
"valid_targets_min": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.258358662613982,
|
||
|
|
"grad_norm": 0.46774893892784875,
|
||
|
|
"learning_rate": 7.0933549815254465e-06,
|
||
|
|
"loss": 0.1592,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16652044653892517,
|
||
|
|
"step": 3460,
|
||
|
|
"valid_targets_mean": 5190.8,
|
||
|
|
"valid_targets_min": 433
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.26595744680851,
|
||
|
|
"grad_norm": 0.47602862093431514,
|
||
|
|
"learning_rate": 7.035549873362406e-06,
|
||
|
|
"loss": 0.1648,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14074617624282837,
|
||
|
|
"step": 3465,
|
||
|
|
"valid_targets_mean": 4641.2,
|
||
|
|
"valid_targets_min": 1537
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.27355623100304,
|
||
|
|
"grad_norm": 0.49943887077257887,
|
||
|
|
"learning_rate": 6.97793095001793e-06,
|
||
|
|
"loss": 0.1609,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15125201642513275,
|
||
|
|
"step": 3470,
|
||
|
|
"valid_targets_mean": 4864.9,
|
||
|
|
"valid_targets_min": 754
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.281155015197569,
|
||
|
|
"grad_norm": 0.56912423597624,
|
||
|
|
"learning_rate": 6.920499038967825e-06,
|
||
|
|
"loss": 0.1524,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1387101113796234,
|
||
|
|
"step": 3475,
|
||
|
|
"valid_targets_mean": 4497.3,
|
||
|
|
"valid_targets_min": 780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.288753799392097,
|
||
|
|
"grad_norm": 0.6031298612179781,
|
||
|
|
"learning_rate": 6.863254965002178e-06,
|
||
|
|
"loss": 0.1563,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17738491296768188,
|
||
|
|
"step": 3480,
|
||
|
|
"valid_targets_mean": 2821.3,
|
||
|
|
"valid_targets_min": 271
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.296352583586626,
|
||
|
|
"grad_norm": 0.42528213654923797,
|
||
|
|
"learning_rate": 6.80619955021351e-06,
|
||
|
|
"loss": 0.1575,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13962329924106598,
|
||
|
|
"step": 3485,
|
||
|
|
"valid_targets_mean": 5226.1,
|
||
|
|
"valid_targets_min": 1805
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.303951367781155,
|
||
|
|
"grad_norm": 0.4641682316415047,
|
||
|
|
"learning_rate": 6.749333613984979e-06,
|
||
|
|
"loss": 0.1638,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17883500456809998,
|
||
|
|
"step": 3490,
|
||
|
|
"valid_targets_mean": 5505.0,
|
||
|
|
"valid_targets_min": 2227
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.311550151975684,
|
||
|
|
"grad_norm": 0.41287597531580333,
|
||
|
|
"learning_rate": 6.6926579729786025e-06,
|
||
|
|
"loss": 0.1569,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13076046109199524,
|
||
|
|
"step": 3495,
|
||
|
|
"valid_targets_mean": 5253.1,
|
||
|
|
"valid_targets_min": 624
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.319148936170213,
|
||
|
|
"grad_norm": 0.47804228992277803,
|
||
|
|
"learning_rate": 6.636173441123537e-06,
|
||
|
|
"loss": 0.1507,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13578727841377258,
|
||
|
|
"step": 3500,
|
||
|
|
"valid_targets_mean": 4141.2,
|
||
|
|
"valid_targets_min": 974
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.326747720364741,
|
||
|
|
"grad_norm": 0.6043580771376642,
|
||
|
|
"learning_rate": 6.5798808296043835e-06,
|
||
|
|
"loss": 0.1569,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1580887734889984,
|
||
|
|
"step": 3505,
|
||
|
|
"valid_targets_mean": 3810.6,
|
||
|
|
"valid_targets_min": 769
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.334346504559271,
|
||
|
|
"grad_norm": 0.5078101575478104,
|
||
|
|
"learning_rate": 6.52378094684954e-06,
|
||
|
|
"loss": 0.1674,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17107361555099487,
|
||
|
|
"step": 3510,
|
||
|
|
"valid_targets_mean": 3934.2,
|
||
|
|
"valid_targets_min": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.3419452887537995,
|
||
|
|
"grad_norm": 0.4141414829732433,
|
||
|
|
"learning_rate": 6.467874598519597e-06,
|
||
|
|
"loss": 0.1603,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14241334795951843,
|
||
|
|
"step": 3515,
|
||
|
|
"valid_targets_mean": 5362.5,
|
||
|
|
"valid_targets_min": 2329
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.349544072948328,
|
||
|
|
"grad_norm": 0.4458140032344141,
|
||
|
|
"learning_rate": 6.412162587495754e-06,
|
||
|
|
"loss": 0.1719,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13986560702323914,
|
||
|
|
"step": 3520,
|
||
|
|
"valid_targets_mean": 4640.0,
|
||
|
|
"valid_targets_min": 593
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.357142857142857,
|
||
|
|
"grad_norm": 0.49980298962190683,
|
||
|
|
"learning_rate": 6.3566457138683015e-06,
|
||
|
|
"loss": 0.1519,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1622404009103775,
|
||
|
|
"step": 3525,
|
||
|
|
"valid_targets_mean": 4144.8,
|
||
|
|
"valid_targets_min": 523
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.364741641337386,
|
||
|
|
"grad_norm": 0.5309190846014655,
|
||
|
|
"learning_rate": 6.301324774925128e-06,
|
||
|
|
"loss": 0.1449,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1419864147901535,
|
||
|
|
"step": 3530,
|
||
|
|
"valid_targets_mean": 4773.5,
|
||
|
|
"valid_targets_min": 601
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.372340425531915,
|
||
|
|
"grad_norm": 0.4394509557877868,
|
||
|
|
"learning_rate": 6.246200565140266e-06,
|
||
|
|
"loss": 0.1382,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1346133053302765,
|
||
|
|
"step": 3535,
|
||
|
|
"valid_targets_mean": 5314.5,
|
||
|
|
"valid_targets_min": 1831
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.379939209726444,
|
||
|
|
"grad_norm": 0.6157551616826415,
|
||
|
|
"learning_rate": 6.191273876162487e-06,
|
||
|
|
"loss": 0.1559,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16742759943008423,
|
||
|
|
"step": 3540,
|
||
|
|
"valid_targets_mean": 4461.9,
|
||
|
|
"valid_targets_min": 632
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.387537993920972,
|
||
|
|
"grad_norm": 0.5207893779334158,
|
||
|
|
"learning_rate": 6.136545496803925e-06,
|
||
|
|
"loss": 0.1576,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.173904150724411,
|
||
|
|
"step": 3545,
|
||
|
|
"valid_targets_mean": 4806.9,
|
||
|
|
"valid_targets_min": 816
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.395136778115502,
|
||
|
|
"grad_norm": 0.41444365615563894,
|
||
|
|
"learning_rate": 6.082016213028761e-06,
|
||
|
|
"loss": 0.1605,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14027008414268494,
|
||
|
|
"step": 3550,
|
||
|
|
"valid_targets_mean": 5664.4,
|
||
|
|
"valid_targets_min": 840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.40273556231003,
|
||
|
|
"grad_norm": 0.4453222293778615,
|
||
|
|
"learning_rate": 6.0276868079419235e-06,
|
||
|
|
"loss": 0.1496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1503615528345108,
|
||
|
|
"step": 3555,
|
||
|
|
"valid_targets_mean": 5143.4,
|
||
|
|
"valid_targets_min": 979
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.410334346504559,
|
||
|
|
"grad_norm": 0.4781236892192554,
|
||
|
|
"learning_rate": 5.973558061777849e-06,
|
||
|
|
"loss": 0.1529,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15819337964057922,
|
||
|
|
"step": 3560,
|
||
|
|
"valid_targets_mean": 5053.2,
|
||
|
|
"valid_targets_min": 664
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.4179331306990886,
|
||
|
|
"grad_norm": 0.41198950921824895,
|
||
|
|
"learning_rate": 5.919630751889274e-06,
|
||
|
|
"loss": 0.1599,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13387863337993622,
|
||
|
|
"step": 3565,
|
||
|
|
"valid_targets_mean": 5390.4,
|
||
|
|
"valid_targets_min": 1116
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.425531914893617,
|
||
|
|
"grad_norm": 0.516010838368697,
|
||
|
|
"learning_rate": 5.865905652736072e-06,
|
||
|
|
"loss": 0.1584,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17436817288398743,
|
||
|
|
"step": 3570,
|
||
|
|
"valid_targets_mean": 4064.7,
|
||
|
|
"valid_targets_min": 894
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.433130699088146,
|
||
|
|
"grad_norm": 0.5141562761832758,
|
||
|
|
"learning_rate": 5.812383535874131e-06,
|
||
|
|
"loss": 0.1428,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13484925031661987,
|
||
|
|
"step": 3575,
|
||
|
|
"valid_targets_mean": 3949.4,
|
||
|
|
"valid_targets_min": 1312
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.4407294832826745,
|
||
|
|
"grad_norm": 0.45662210514471807,
|
||
|
|
"learning_rate": 5.759065169944274e-06,
|
||
|
|
"loss": 0.1434,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1350744068622589,
|
||
|
|
"step": 3580,
|
||
|
|
"valid_targets_mean": 4565.4,
|
||
|
|
"valid_targets_min": 381
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.448328267477204,
|
||
|
|
"grad_norm": 0.49698187827992,
|
||
|
|
"learning_rate": 5.705951320661222e-06,
|
||
|
|
"loss": 0.1411,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14174014329910278,
|
||
|
|
"step": 3585,
|
||
|
|
"valid_targets_mean": 4722.2,
|
||
|
|
"valid_targets_min": 2183
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.455927051671733,
|
||
|
|
"grad_norm": 0.45234338868163015,
|
||
|
|
"learning_rate": 5.653042750802591e-06,
|
||
|
|
"loss": 0.1678,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14616911113262177,
|
||
|
|
"step": 3590,
|
||
|
|
"valid_targets_mean": 4943.4,
|
||
|
|
"valid_targets_min": 1261
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.463525835866261,
|
||
|
|
"grad_norm": 0.4806481881918089,
|
||
|
|
"learning_rate": 5.600340220197946e-06,
|
||
|
|
"loss": 0.1523,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17162683606147766,
|
||
|
|
"step": 3595,
|
||
|
|
"valid_targets_mean": 4331.8,
|
||
|
|
"valid_targets_min": 414
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.47112462006079,
|
||
|
|
"grad_norm": 0.46035606110531224,
|
||
|
|
"learning_rate": 5.547844485717884e-06,
|
||
|
|
"loss": 0.1479,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14485681056976318,
|
||
|
|
"step": 3600,
|
||
|
|
"valid_targets_mean": 4167.1,
|
||
|
|
"valid_targets_min": 1119
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.4787234042553195,
|
||
|
|
"grad_norm": 0.4684818904402137,
|
||
|
|
"learning_rate": 5.4955563012631606e-06,
|
||
|
|
"loss": 0.1449,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14429020881652832,
|
||
|
|
"step": 3605,
|
||
|
|
"valid_targets_mean": 5013.6,
|
||
|
|
"valid_targets_min": 1788
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.486322188449848,
|
||
|
|
"grad_norm": 0.6076128140539062,
|
||
|
|
"learning_rate": 5.443476417753877e-06,
|
||
|
|
"loss": 0.1681,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17048346996307373,
|
||
|
|
"step": 3610,
|
||
|
|
"valid_targets_mean": 2878.3,
|
||
|
|
"valid_targets_min": 491
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.493920972644377,
|
||
|
|
"grad_norm": 0.46825002881973943,
|
||
|
|
"learning_rate": 5.39160558311868e-06,
|
||
|
|
"loss": 0.1569,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17676037549972534,
|
||
|
|
"step": 3615,
|
||
|
|
"valid_targets_mean": 4354.2,
|
||
|
|
"valid_targets_min": 646
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.501519756838906,
|
||
|
|
"grad_norm": 0.49007855328228217,
|
||
|
|
"learning_rate": 5.33994454228403e-06,
|
||
|
|
"loss": 0.1608,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17580349743366241,
|
||
|
|
"step": 3620,
|
||
|
|
"valid_targets_mean": 5169.6,
|
||
|
|
"valid_targets_min": 953
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.509118541033435,
|
||
|
|
"grad_norm": 0.44768382223068853,
|
||
|
|
"learning_rate": 5.2884940371634915e-06,
|
||
|
|
"loss": 0.1533,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14162276685237885,
|
||
|
|
"step": 3625,
|
||
|
|
"valid_targets_mean": 5179.9,
|
||
|
|
"valid_targets_min": 1051
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.5167173252279635,
|
||
|
|
"grad_norm": 0.4352121315996926,
|
||
|
|
"learning_rate": 5.237254806647117e-06,
|
||
|
|
"loss": 0.1508,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1292266994714737,
|
||
|
|
"step": 3630,
|
||
|
|
"valid_targets_mean": 4936.1,
|
||
|
|
"valid_targets_min": 912
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.524316109422492,
|
||
|
|
"grad_norm": 0.5433474427694438,
|
||
|
|
"learning_rate": 5.1862275865907575e-06,
|
||
|
|
"loss": 0.1517,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1526874601840973,
|
||
|
|
"step": 3635,
|
||
|
|
"valid_targets_mean": 3939.9,
|
||
|
|
"valid_targets_min": 1082
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.531914893617021,
|
||
|
|
"grad_norm": 0.4664875248042157,
|
||
|
|
"learning_rate": 5.135413109805596e-06,
|
||
|
|
"loss": 0.1601,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14586284756660461,
|
||
|
|
"step": 3640,
|
||
|
|
"valid_targets_mean": 4819.6,
|
||
|
|
"valid_targets_min": 1708
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.53951367781155,
|
||
|
|
"grad_norm": 0.4479408250207293,
|
||
|
|
"learning_rate": 5.084812106047525e-06,
|
||
|
|
"loss": 0.1325,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13202941417694092,
|
||
|
|
"step": 3645,
|
||
|
|
"valid_targets_mean": 5479.3,
|
||
|
|
"valid_targets_min": 1475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.547112462006079,
|
||
|
|
"grad_norm": 0.46875681135292097,
|
||
|
|
"learning_rate": 5.034425302006751e-06,
|
||
|
|
"loss": 0.1482,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15239191055297852,
|
||
|
|
"step": 3650,
|
||
|
|
"valid_targets_mean": 4168.7,
|
||
|
|
"valid_targets_min": 534
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.554711246200608,
|
||
|
|
"grad_norm": 0.5099225903083285,
|
||
|
|
"learning_rate": 4.984253421297285e-06,
|
||
|
|
"loss": 0.1634,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15130409598350525,
|
||
|
|
"step": 3655,
|
||
|
|
"valid_targets_mean": 3672.7,
|
||
|
|
"valid_targets_min": 465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.562310030395137,
|
||
|
|
"grad_norm": 0.44549612335892824,
|
||
|
|
"learning_rate": 4.934297184446617e-06,
|
||
|
|
"loss": 0.1636,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15832307934761047,
|
||
|
|
"step": 3660,
|
||
|
|
"valid_targets_mean": 5213.5,
|
||
|
|
"valid_targets_min": 1002
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.569908814589666,
|
||
|
|
"grad_norm": 0.5093597450581251,
|
||
|
|
"learning_rate": 4.884557308885302e-06,
|
||
|
|
"loss": 0.1496,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16333602368831635,
|
||
|
|
"step": 3665,
|
||
|
|
"valid_targets_mean": 4690.4,
|
||
|
|
"valid_targets_min": 876
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.577507598784194,
|
||
|
|
"grad_norm": 0.5062778786472242,
|
||
|
|
"learning_rate": 4.835034508936736e-06,
|
||
|
|
"loss": 0.152,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15072304010391235,
|
||
|
|
"step": 3670,
|
||
|
|
"valid_targets_mean": 4472.8,
|
||
|
|
"valid_targets_min": 863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.585106382978723,
|
||
|
|
"grad_norm": 0.4887232514300285,
|
||
|
|
"learning_rate": 4.785729495806804e-06,
|
||
|
|
"loss": 0.1489,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1506553441286087,
|
||
|
|
"step": 3675,
|
||
|
|
"valid_targets_mean": 4468.6,
|
||
|
|
"valid_targets_min": 1870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.592705167173253,
|
||
|
|
"grad_norm": 0.5178118286048965,
|
||
|
|
"learning_rate": 4.736642977573745e-06,
|
||
|
|
"loss": 0.1443,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14603829383850098,
|
||
|
|
"step": 3680,
|
||
|
|
"valid_targets_mean": 3900.1,
|
||
|
|
"valid_targets_min": 314
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.600303951367781,
|
||
|
|
"grad_norm": 0.49021784557999176,
|
||
|
|
"learning_rate": 4.6877756591779465e-06,
|
||
|
|
"loss": 0.1579,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1614750772714615,
|
||
|
|
"step": 3685,
|
||
|
|
"valid_targets_mean": 4549.1,
|
||
|
|
"valid_targets_min": 1530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.60790273556231,
|
||
|
|
"grad_norm": 0.46685253564728685,
|
||
|
|
"learning_rate": 4.63912824241183e-06,
|
||
|
|
"loss": 0.1479,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15664935111999512,
|
||
|
|
"step": 3690,
|
||
|
|
"valid_targets_mean": 4078.2,
|
||
|
|
"valid_targets_min": 702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.6155015197568385,
|
||
|
|
"grad_norm": 0.4476391544726595,
|
||
|
|
"learning_rate": 4.590701425909763e-06,
|
||
|
|
"loss": 0.1633,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1614711433649063,
|
||
|
|
"step": 3695,
|
||
|
|
"valid_targets_mean": 5204.6,
|
||
|
|
"valid_targets_min": 1871
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.623100303951368,
|
||
|
|
"grad_norm": 0.46752237532336416,
|
||
|
|
"learning_rate": 4.5424959051380376e-06,
|
||
|
|
"loss": 0.1423,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14600232243537903,
|
||
|
|
"step": 3700,
|
||
|
|
"valid_targets_mean": 4345.4,
|
||
|
|
"valid_targets_min": 1418
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.630699088145897,
|
||
|
|
"grad_norm": 0.498907377725208,
|
||
|
|
"learning_rate": 4.4945123723848785e-06,
|
||
|
|
"loss": 0.1668,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16958801448345184,
|
||
|
|
"step": 3705,
|
||
|
|
"valid_targets_mean": 4744.0,
|
||
|
|
"valid_targets_min": 655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.638297872340425,
|
||
|
|
"grad_norm": 0.47032248993505454,
|
||
|
|
"learning_rate": 4.446751516750496e-06,
|
||
|
|
"loss": 0.1441,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15484699606895447,
|
||
|
|
"step": 3710,
|
||
|
|
"valid_targets_mean": 4455.9,
|
||
|
|
"valid_targets_min": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.645896656534955,
|
||
|
|
"grad_norm": 0.49291725666506586,
|
||
|
|
"learning_rate": 4.399214024137199e-06,
|
||
|
|
"loss": 0.1519,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1515854448080063,
|
||
|
|
"step": 3715,
|
||
|
|
"valid_targets_mean": 4026.5,
|
||
|
|
"valid_targets_min": 401
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.6534954407294835,
|
||
|
|
"grad_norm": 0.42768293201723206,
|
||
|
|
"learning_rate": 4.351900577239534e-06,
|
||
|
|
"loss": 0.1508,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1308768093585968,
|
||
|
|
"step": 3720,
|
||
|
|
"valid_targets_mean": 5203.2,
|
||
|
|
"valid_targets_min": 511
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.661094224924012,
|
||
|
|
"grad_norm": 0.419913357256391,
|
||
|
|
"learning_rate": 4.30481185553449e-06,
|
||
|
|
"loss": 0.143,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15996746718883514,
|
||
|
|
"step": 3725,
|
||
|
|
"valid_targets_mean": 6209.4,
|
||
|
|
"valid_targets_min": 3964
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.668693009118541,
|
||
|
|
"grad_norm": 0.5024514745126758,
|
||
|
|
"learning_rate": 4.2579485352717365e-06,
|
||
|
|
"loss": 0.1429,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14928898215293884,
|
||
|
|
"step": 3730,
|
||
|
|
"valid_targets_mean": 5061.5,
|
||
|
|
"valid_targets_min": 854
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.676291793313069,
|
||
|
|
"grad_norm": 0.6107697257765264,
|
||
|
|
"learning_rate": 4.211311289463913e-06,
|
||
|
|
"loss": 0.1791,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.212265282869339,
|
||
|
|
"step": 3735,
|
||
|
|
"valid_targets_mean": 2980.9,
|
||
|
|
"valid_targets_min": 613
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.683890577507599,
|
||
|
|
"grad_norm": 0.40762228356266644,
|
||
|
|
"learning_rate": 4.164900787876958e-06,
|
||
|
|
"loss": 0.1572,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15725256502628326,
|
||
|
|
"step": 3740,
|
||
|
|
"valid_targets_mean": 5801.9,
|
||
|
|
"valid_targets_min": 1647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.691489361702128,
|
||
|
|
"grad_norm": 0.42068151608687643,
|
||
|
|
"learning_rate": 4.118717697020503e-06,
|
||
|
|
"loss": 0.1494,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15178656578063965,
|
||
|
|
"step": 3745,
|
||
|
|
"valid_targets_mean": 5270.4,
|
||
|
|
"valid_targets_min": 714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.699088145896656,
|
||
|
|
"grad_norm": 0.510742750219421,
|
||
|
|
"learning_rate": 4.072762680138283e-06,
|
||
|
|
"loss": 0.1561,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16748473048210144,
|
||
|
|
"step": 3750,
|
||
|
|
"valid_targets_mean": 4111.5,
|
||
|
|
"valid_targets_min": 690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.706686930091186,
|
||
|
|
"grad_norm": 0.4725979697075687,
|
||
|
|
"learning_rate": 4.02703639719863e-06,
|
||
|
|
"loss": 0.1675,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16421647369861603,
|
||
|
|
"step": 3755,
|
||
|
|
"valid_targets_mean": 5007.1,
|
||
|
|
"valid_targets_min": 1593
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.714285714285714,
|
||
|
|
"grad_norm": 0.4858255836304888,
|
||
|
|
"learning_rate": 3.981539504884975e-06,
|
||
|
|
"loss": 0.1585,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1452382504940033,
|
||
|
|
"step": 3760,
|
||
|
|
"valid_targets_mean": 4335.0,
|
||
|
|
"valid_targets_min": 268
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.721884498480243,
|
||
|
|
"grad_norm": 0.496365033025088,
|
||
|
|
"learning_rate": 3.936272656586455e-06,
|
||
|
|
"loss": 0.1639,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1607646495103836,
|
||
|
|
"step": 3765,
|
||
|
|
"valid_targets_mean": 4691.0,
|
||
|
|
"valid_targets_min": 801
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.729483282674772,
|
||
|
|
"grad_norm": 0.49339220005728074,
|
||
|
|
"learning_rate": 3.891236502388463e-06,
|
||
|
|
"loss": 0.1399,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12876826524734497,
|
||
|
|
"step": 3770,
|
||
|
|
"valid_targets_mean": 3568.1,
|
||
|
|
"valid_targets_min": 629
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.737082066869301,
|
||
|
|
"grad_norm": 0.42864321603538963,
|
||
|
|
"learning_rate": 3.846431689063395e-06,
|
||
|
|
"loss": 0.1598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14634914696216583,
|
||
|
|
"step": 3775,
|
||
|
|
"valid_targets_mean": 5067.4,
|
||
|
|
"valid_targets_min": 1740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.74468085106383,
|
||
|
|
"grad_norm": 0.4858809112101771,
|
||
|
|
"learning_rate": 3.801858860061276e-06,
|
||
|
|
"loss": 0.1707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15183153748512268,
|
||
|
|
"step": 3780,
|
||
|
|
"valid_targets_mean": 4308.7,
|
||
|
|
"valid_targets_min": 2342
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.7522796352583585,
|
||
|
|
"grad_norm": 0.430010773677836,
|
||
|
|
"learning_rate": 3.757518655500607e-06,
|
||
|
|
"loss": 0.1413,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11490476876497269,
|
||
|
|
"step": 3785,
|
||
|
|
"valid_targets_mean": 4636.7,
|
||
|
|
"valid_targets_min": 1770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.759878419452887,
|
||
|
|
"grad_norm": 0.44137928337029025,
|
||
|
|
"learning_rate": 3.7134117121590783e-06,
|
||
|
|
"loss": 0.1477,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1602618247270584,
|
||
|
|
"step": 3790,
|
||
|
|
"valid_targets_mean": 5387.0,
|
||
|
|
"valid_targets_min": 1520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.767477203647417,
|
||
|
|
"grad_norm": 0.49716171756169814,
|
||
|
|
"learning_rate": 3.6695386634645268e-06,
|
||
|
|
"loss": 0.1568,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17046484351158142,
|
||
|
|
"step": 3795,
|
||
|
|
"valid_targets_mean": 3979.0,
|
||
|
|
"valid_targets_min": 637
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.775075987841945,
|
||
|
|
"grad_norm": 0.6225472640133536,
|
||
|
|
"learning_rate": 3.625900139485732e-06,
|
||
|
|
"loss": 0.1483,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16933000087738037,
|
||
|
|
"step": 3800,
|
||
|
|
"valid_targets_mean": 3247.3,
|
||
|
|
"valid_targets_min": 403
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.782674772036474,
|
||
|
|
"grad_norm": 0.4826561130766123,
|
||
|
|
"learning_rate": 3.5824967669234712e-06,
|
||
|
|
"loss": 0.1677,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14927777647972107,
|
||
|
|
"step": 3805,
|
||
|
|
"valid_targets_mean": 4297.8,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.7902735562310035,
|
||
|
|
"grad_norm": 0.4594251420598871,
|
||
|
|
"learning_rate": 3.539329169101424e-06,
|
||
|
|
"loss": 0.1679,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15365689992904663,
|
||
|
|
"step": 3810,
|
||
|
|
"valid_targets_mean": 5167.4,
|
||
|
|
"valid_targets_min": 892
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.797872340425532,
|
||
|
|
"grad_norm": 0.4557619581698914,
|
||
|
|
"learning_rate": 3.49639796595731e-06,
|
||
|
|
"loss": 0.164,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1744815707206726,
|
||
|
|
"step": 3815,
|
||
|
|
"valid_targets_mean": 5195.3,
|
||
|
|
"valid_targets_min": 634
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.805471124620061,
|
||
|
|
"grad_norm": 0.46537892238846895,
|
||
|
|
"learning_rate": 3.453703774033901e-06,
|
||
|
|
"loss": 0.1587,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14466017484664917,
|
||
|
|
"step": 3820,
|
||
|
|
"valid_targets_mean": 4648.0,
|
||
|
|
"valid_targets_min": 736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.813069908814589,
|
||
|
|
"grad_norm": 0.5247551115664364,
|
||
|
|
"learning_rate": 3.4112472064702473e-06,
|
||
|
|
"loss": 0.1533,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16951002180576324,
|
||
|
|
"step": 3825,
|
||
|
|
"valid_targets_mean": 4100.8,
|
||
|
|
"valid_targets_min": 529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.820668693009118,
|
||
|
|
"grad_norm": 0.4408079538837663,
|
||
|
|
"learning_rate": 3.369028872992792e-06,
|
||
|
|
"loss": 0.1589,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15143227577209473,
|
||
|
|
"step": 3830,
|
||
|
|
"valid_targets_mean": 5215.6,
|
||
|
|
"valid_targets_min": 1009
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.828267477203648,
|
||
|
|
"grad_norm": 0.5248938288172915,
|
||
|
|
"learning_rate": 3.327049379906695e-06,
|
||
|
|
"loss": 0.1633,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17994189262390137,
|
||
|
|
"step": 3835,
|
||
|
|
"valid_targets_mean": 4604.6,
|
||
|
|
"valid_targets_min": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.835866261398176,
|
||
|
|
"grad_norm": 0.5052356507109963,
|
||
|
|
"learning_rate": 3.2853093300870452e-06,
|
||
|
|
"loss": 0.1457,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1394600123167038,
|
||
|
|
"step": 3840,
|
||
|
|
"valid_targets_mean": 4017.8,
|
||
|
|
"valid_targets_min": 736
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.843465045592705,
|
||
|
|
"grad_norm": 0.47463044811940924,
|
||
|
|
"learning_rate": 3.2438093229702905e-06,
|
||
|
|
"loss": 0.1577,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15373006463050842,
|
||
|
|
"step": 3845,
|
||
|
|
"valid_targets_mean": 4134.0,
|
||
|
|
"valid_targets_min": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.851063829787234,
|
||
|
|
"grad_norm": 0.513814275937601,
|
||
|
|
"learning_rate": 3.202549954545533e-06,
|
||
|
|
"loss": 0.1712,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23078633844852448,
|
||
|
|
"step": 3850,
|
||
|
|
"valid_targets_mean": 5543.1,
|
||
|
|
"valid_targets_min": 655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.858662613981763,
|
||
|
|
"grad_norm": 0.48073524570765763,
|
||
|
|
"learning_rate": 3.161531817346062e-06,
|
||
|
|
"loss": 0.1579,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17668002843856812,
|
||
|
|
"step": 3855,
|
||
|
|
"valid_targets_mean": 4446.9,
|
||
|
|
"valid_targets_min": 449
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.866261398176292,
|
||
|
|
"grad_norm": 0.4258287069585612,
|
||
|
|
"learning_rate": 3.120755500440762e-06,
|
||
|
|
"loss": 0.1546,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14281289279460907,
|
||
|
|
"step": 3860,
|
||
|
|
"valid_targets_mean": 5058.1,
|
||
|
|
"valid_targets_min": 1100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.87386018237082,
|
||
|
|
"grad_norm": 0.4556314380528612,
|
||
|
|
"learning_rate": 3.0802215894257336e-06,
|
||
|
|
"loss": 0.1565,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15817520022392273,
|
||
|
|
"step": 3865,
|
||
|
|
"valid_targets_mean": 4869.2,
|
||
|
|
"valid_targets_min": 2426
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.88145896656535,
|
||
|
|
"grad_norm": 0.632880711013422,
|
||
|
|
"learning_rate": 3.0399306664158e-06,
|
||
|
|
"loss": 0.1443,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1440334916114807,
|
||
|
|
"step": 3870,
|
||
|
|
"valid_targets_mean": 4294.8,
|
||
|
|
"valid_targets_min": 1072
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.8890577507598785,
|
||
|
|
"grad_norm": 0.40158185981708566,
|
||
|
|
"learning_rate": 2.9998833100362336e-06,
|
||
|
|
"loss": 0.1524,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1560600996017456,
|
||
|
|
"step": 3875,
|
||
|
|
"valid_targets_mean": 5631.6,
|
||
|
|
"valid_targets_min": 1728
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.896656534954407,
|
||
|
|
"grad_norm": 0.500623763450671,
|
||
|
|
"learning_rate": 2.9600800954143572e-06,
|
||
|
|
"loss": 0.138,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1587526947259903,
|
||
|
|
"step": 3880,
|
||
|
|
"valid_targets_mean": 4028.8,
|
||
|
|
"valid_targets_min": 666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.904255319148936,
|
||
|
|
"grad_norm": 0.4944868321896879,
|
||
|
|
"learning_rate": 2.9205215941713704e-06,
|
||
|
|
"loss": 0.1535,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16634200513362885,
|
||
|
|
"step": 3885,
|
||
|
|
"valid_targets_mean": 4611.9,
|
||
|
|
"valid_targets_min": 732
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.911854103343465,
|
||
|
|
"grad_norm": 0.5115742550534301,
|
||
|
|
"learning_rate": 2.8812083744140616e-06,
|
||
|
|
"loss": 0.1515,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15021023154258728,
|
||
|
|
"step": 3890,
|
||
|
|
"valid_targets_mean": 4236.3,
|
||
|
|
"valid_targets_min": 1104
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.919452887537994,
|
||
|
|
"grad_norm": 0.48224861462921226,
|
||
|
|
"learning_rate": 2.842141000726726e-06,
|
||
|
|
"loss": 0.1537,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1501300185918808,
|
||
|
|
"step": 3895,
|
||
|
|
"valid_targets_mean": 4213.5,
|
||
|
|
"valid_targets_min": 774
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.927051671732523,
|
||
|
|
"grad_norm": 0.4030631035629361,
|
||
|
|
"learning_rate": 2.8033200341629886e-06,
|
||
|
|
"loss": 0.1519,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12463116645812988,
|
||
|
|
"step": 3900,
|
||
|
|
"valid_targets_mean": 5398.5,
|
||
|
|
"valid_targets_min": 1514
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.934650455927052,
|
||
|
|
"grad_norm": 0.45096444866968627,
|
||
|
|
"learning_rate": 2.7647460322377927e-06,
|
||
|
|
"loss": 0.1766,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17850109934806824,
|
||
|
|
"step": 3905,
|
||
|
|
"valid_targets_mean": 5562.8,
|
||
|
|
"valid_targets_min": 569
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.942249240121581,
|
||
|
|
"grad_norm": 0.41781952891112895,
|
||
|
|
"learning_rate": 2.72641954891937e-06,
|
||
|
|
"loss": 0.1637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.145452082157135,
|
||
|
|
"step": 3910,
|
||
|
|
"valid_targets_mean": 5237.6,
|
||
|
|
"valid_targets_min": 787
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.949848024316109,
|
||
|
|
"grad_norm": 0.462920182118315,
|
||
|
|
"learning_rate": 2.688341134621295e-06,
|
||
|
|
"loss": 0.1494,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1589956283569336,
|
||
|
|
"step": 3915,
|
||
|
|
"valid_targets_mean": 5620.2,
|
||
|
|
"valid_targets_min": 797
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.957446808510638,
|
||
|
|
"grad_norm": 0.48846090610407683,
|
||
|
|
"learning_rate": 2.6505113361945833e-06,
|
||
|
|
"loss": 0.1552,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18880805373191833,
|
||
|
|
"step": 3920,
|
||
|
|
"valid_targets_mean": 4641.4,
|
||
|
|
"valid_targets_min": 879
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.9650455927051675,
|
||
|
|
"grad_norm": 0.440227878411004,
|
||
|
|
"learning_rate": 2.612930696919822e-06,
|
||
|
|
"loss": 0.1488,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13358448445796967,
|
||
|
|
"step": 3925,
|
||
|
|
"valid_targets_mean": 4184.0,
|
||
|
|
"valid_targets_min": 559
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.972644376899696,
|
||
|
|
"grad_norm": 0.4575207059455806,
|
||
|
|
"learning_rate": 2.5755997564993894e-06,
|
||
|
|
"loss": 0.1561,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14469373226165771,
|
||
|
|
"step": 3930,
|
||
|
|
"valid_targets_mean": 4649.6,
|
||
|
|
"valid_targets_min": 2261
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.980243161094225,
|
||
|
|
"grad_norm": 0.5005233906873359,
|
||
|
|
"learning_rate": 2.5385190510496858e-06,
|
||
|
|
"loss": 0.1618,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17244486510753632,
|
||
|
|
"step": 3935,
|
||
|
|
"valid_targets_mean": 4924.8,
|
||
|
|
"valid_targets_min": 171
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.9878419452887535,
|
||
|
|
"grad_norm": 0.4458252216107338,
|
||
|
|
"learning_rate": 2.5016891130934463e-06,
|
||
|
|
"loss": 0.1457,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13665539026260376,
|
||
|
|
"step": 3940,
|
||
|
|
"valid_targets_mean": 4990.1,
|
||
|
|
"valid_targets_min": 500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.995440729483283,
|
||
|
|
"grad_norm": 0.4846406288628859,
|
||
|
|
"learning_rate": 2.465110471552086e-06,
|
||
|
|
"loss": 0.1664,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16603650152683258,
|
||
|
|
"step": 3945,
|
||
|
|
"valid_targets_mean": 4758.2,
|
||
|
|
"valid_targets_min": 599
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.003039513677812,
|
||
|
|
"grad_norm": 0.44214905635705837,
|
||
|
|
"learning_rate": 2.4287836517381113e-06,
|
||
|
|
"loss": 0.1349,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11986207216978073,
|
||
|
|
"step": 3950,
|
||
|
|
"valid_targets_mean": 4267.4,
|
||
|
|
"valid_targets_min": 335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.01063829787234,
|
||
|
|
"grad_norm": 0.4887571085350468,
|
||
|
|
"learning_rate": 2.392709175347554e-06,
|
||
|
|
"loss": 0.1505,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14001381397247314,
|
||
|
|
"step": 3955,
|
||
|
|
"valid_targets_mean": 4841.9,
|
||
|
|
"valid_targets_min": 797
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.018237082066869,
|
||
|
|
"grad_norm": 0.5312808060591666,
|
||
|
|
"learning_rate": 2.356887560452528e-06,
|
||
|
|
"loss": 0.1583,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17127478122711182,
|
||
|
|
"step": 3960,
|
||
|
|
"valid_targets_mean": 3657.6,
|
||
|
|
"valid_targets_min": 713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.025835866261398,
|
||
|
|
"grad_norm": 0.49420144944896355,
|
||
|
|
"learning_rate": 2.321319321493718e-06,
|
||
|
|
"loss": 0.1538,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15153075754642487,
|
||
|
|
"step": 3965,
|
||
|
|
"valid_targets_mean": 4141.9,
|
||
|
|
"valid_targets_min": 792
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.033434650455927,
|
||
|
|
"grad_norm": 0.43348847119788875,
|
||
|
|
"learning_rate": 2.2860049692730745e-06,
|
||
|
|
"loss": 0.1506,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15093687176704407,
|
||
|
|
"step": 3970,
|
||
|
|
"valid_targets_mean": 5298.1,
|
||
|
|
"valid_targets_min": 491
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.041033434650456,
|
||
|
|
"grad_norm": 0.4852668062537144,
|
||
|
|
"learning_rate": 2.2509450109463903e-06,
|
||
|
|
"loss": 0.1462,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15192578732967377,
|
||
|
|
"step": 3975,
|
||
|
|
"valid_targets_mean": 4195.6,
|
||
|
|
"valid_targets_min": 646
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.048632218844984,
|
||
|
|
"grad_norm": 0.4985600383895827,
|
||
|
|
"learning_rate": 2.2161399500161005e-06,
|
||
|
|
"loss": 0.1423,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12650343775749207,
|
||
|
|
"step": 3980,
|
||
|
|
"valid_targets_mean": 4298.5,
|
||
|
|
"valid_targets_min": 1887
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.056231003039514,
|
||
|
|
"grad_norm": 0.46036482703942144,
|
||
|
|
"learning_rate": 2.1815902863239826e-06,
|
||
|
|
"loss": 0.1459,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12870556116104126,
|
||
|
|
"step": 3985,
|
||
|
|
"valid_targets_mean": 4722.8,
|
||
|
|
"valid_targets_min": 1027
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.0638297872340425,
|
||
|
|
"grad_norm": 0.4724257952962997,
|
||
|
|
"learning_rate": 2.1472965160440307e-06,
|
||
|
|
"loss": 0.1548,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15059977769851685,
|
||
|
|
"step": 3990,
|
||
|
|
"valid_targets_mean": 4350.8,
|
||
|
|
"valid_targets_min": 1618
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.071428571428571,
|
||
|
|
"grad_norm": 0.4736941877158723,
|
||
|
|
"learning_rate": 2.1132591316752824e-06,
|
||
|
|
"loss": 0.1446,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12970831990242004,
|
||
|
|
"step": 3995,
|
||
|
|
"valid_targets_mean": 4671.1,
|
||
|
|
"valid_targets_min": 1136
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.079027355623101,
|
||
|
|
"grad_norm": 0.4790995642913263,
|
||
|
|
"learning_rate": 2.079478622034803e-06,
|
||
|
|
"loss": 0.1506,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1599026918411255,
|
||
|
|
"step": 4000,
|
||
|
|
"valid_targets_mean": 5553.9,
|
||
|
|
"valid_targets_min": 2157
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.086626139817629,
|
||
|
|
"grad_norm": 0.5750210601223926,
|
||
|
|
"learning_rate": 2.045955472250598e-06,
|
||
|
|
"loss": 0.1557,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1329852193593979,
|
||
|
|
"step": 4005,
|
||
|
|
"valid_targets_mean": 4740.1,
|
||
|
|
"valid_targets_min": 658
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.094224924012158,
|
||
|
|
"grad_norm": 0.5356962782262873,
|
||
|
|
"learning_rate": 2.012690163754716e-06,
|
||
|
|
"loss": 0.1506,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1583370566368103,
|
||
|
|
"step": 4010,
|
||
|
|
"valid_targets_mean": 3881.9,
|
||
|
|
"valid_targets_min": 833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.101823708206687,
|
||
|
|
"grad_norm": 0.5430593717962985,
|
||
|
|
"learning_rate": 1.9796831742762658e-06,
|
||
|
|
"loss": 0.1503,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17519140243530273,
|
||
|
|
"step": 4015,
|
||
|
|
"valid_targets_mean": 4008.7,
|
||
|
|
"valid_targets_min": 513
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.109422492401216,
|
||
|
|
"grad_norm": 0.5395476454999809,
|
||
|
|
"learning_rate": 1.9469349778346223e-06,
|
||
|
|
"loss": 0.1416,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1673513501882553,
|
||
|
|
"step": 4020,
|
||
|
|
"valid_targets_mean": 3863.8,
|
||
|
|
"valid_targets_min": 862
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.117021276595745,
|
||
|
|
"grad_norm": 0.6041814504745721,
|
||
|
|
"learning_rate": 1.9144460447325564e-06,
|
||
|
|
"loss": 0.1433,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16433076560497284,
|
||
|
|
"step": 4025,
|
||
|
|
"valid_targets_mean": 4698.3,
|
||
|
|
"valid_targets_min": 821
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.124620060790273,
|
||
|
|
"grad_norm": 0.6094776194613557,
|
||
|
|
"learning_rate": 1.8822168415495422e-06,
|
||
|
|
"loss": 0.1453,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13730981945991516,
|
||
|
|
"step": 4030,
|
||
|
|
"valid_targets_mean": 4077.6,
|
||
|
|
"valid_targets_min": 901
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.132218844984802,
|
||
|
|
"grad_norm": 0.45063574077263974,
|
||
|
|
"learning_rate": 1.8502478311349947e-06,
|
||
|
|
"loss": 0.156,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16246050596237183,
|
||
|
|
"step": 4035,
|
||
|
|
"valid_targets_mean": 5466.6,
|
||
|
|
"valid_targets_min": 2424
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.139817629179332,
|
||
|
|
"grad_norm": 0.4816199109533305,
|
||
|
|
"learning_rate": 1.8185394726016791e-06,
|
||
|
|
"loss": 0.1604,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1370609700679779,
|
||
|
|
"step": 4040,
|
||
|
|
"valid_targets_mean": 4561.9,
|
||
|
|
"valid_targets_min": 743
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.14741641337386,
|
||
|
|
"grad_norm": 0.6732958197054807,
|
||
|
|
"learning_rate": 1.7870922213190755e-06,
|
||
|
|
"loss": 0.1581,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16209891438484192,
|
||
|
|
"step": 4045,
|
||
|
|
"valid_targets_mean": 3887.8,
|
||
|
|
"valid_targets_min": 550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.155015197568389,
|
||
|
|
"grad_norm": 0.4601061012528339,
|
||
|
|
"learning_rate": 1.7559065289068633e-06,
|
||
|
|
"loss": 0.1574,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13671234250068665,
|
||
|
|
"step": 4050,
|
||
|
|
"valid_targets_mean": 5133.4,
|
||
|
|
"valid_targets_min": 479
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.1626139817629175,
|
||
|
|
"grad_norm": 0.43746420290219207,
|
||
|
|
"learning_rate": 1.72498284322842e-06,
|
||
|
|
"loss": 0.1468,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1267484724521637,
|
||
|
|
"step": 4055,
|
||
|
|
"valid_targets_mean": 5035.9,
|
||
|
|
"valid_targets_min": 1853
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.170212765957447,
|
||
|
|
"grad_norm": 0.4569139573946025,
|
||
|
|
"learning_rate": 1.694321608384406e-06,
|
||
|
|
"loss": 0.1461,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14077520370483398,
|
||
|
|
"step": 4060,
|
||
|
|
"valid_targets_mean": 5069.5,
|
||
|
|
"valid_targets_min": 2002
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.177811550151976,
|
||
|
|
"grad_norm": 0.43274533973618445,
|
||
|
|
"learning_rate": 1.663923264706373e-06,
|
||
|
|
"loss": 0.1329,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14554743468761444,
|
||
|
|
"step": 4065,
|
||
|
|
"valid_targets_mean": 5077.8,
|
||
|
|
"valid_targets_min": 519
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.185410334346504,
|
||
|
|
"grad_norm": 0.47582107432910764,
|
||
|
|
"learning_rate": 1.6337882487504452e-06,
|
||
|
|
"loss": 0.1473,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14857840538024902,
|
||
|
|
"step": 4070,
|
||
|
|
"valid_targets_mean": 4424.7,
|
||
|
|
"valid_targets_min": 674
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.193009118541034,
|
||
|
|
"grad_norm": 0.5274167558239128,
|
||
|
|
"learning_rate": 1.603916993291048e-06,
|
||
|
|
"loss": 0.1502,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14230819046497345,
|
||
|
|
"step": 4075,
|
||
|
|
"valid_targets_mean": 3961.1,
|
||
|
|
"valid_targets_min": 654
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.2006079027355625,
|
||
|
|
"grad_norm": 0.49913070570635226,
|
||
|
|
"learning_rate": 1.5743099273146967e-06,
|
||
|
|
"loss": 0.1504,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1440865695476532,
|
||
|
|
"step": 4080,
|
||
|
|
"valid_targets_mean": 5009.5,
|
||
|
|
"valid_targets_min": 444
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.208206686930091,
|
||
|
|
"grad_norm": 0.4571022258136094,
|
||
|
|
"learning_rate": 1.5449674760138344e-06,
|
||
|
|
"loss": 0.1355,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1474510282278061,
|
||
|
|
"step": 4085,
|
||
|
|
"valid_targets_mean": 4348.4,
|
||
|
|
"valid_targets_min": 770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.21580547112462,
|
||
|
|
"grad_norm": 0.4714848047264726,
|
||
|
|
"learning_rate": 1.5158900607807248e-06,
|
||
|
|
"loss": 0.1574,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16296353936195374,
|
||
|
|
"step": 4090,
|
||
|
|
"valid_targets_mean": 4949.5,
|
||
|
|
"valid_targets_min": 694
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.223404255319149,
|
||
|
|
"grad_norm": 0.4322780542533103,
|
||
|
|
"learning_rate": 1.4870780992013956e-06,
|
||
|
|
"loss": 0.161,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13294166326522827,
|
||
|
|
"step": 4095,
|
||
|
|
"valid_targets_mean": 4950.1,
|
||
|
|
"valid_targets_min": 329
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.231003039513678,
|
||
|
|
"grad_norm": 0.5464674366866006,
|
||
|
|
"learning_rate": 1.4585320050496531e-06,
|
||
|
|
"loss": 0.1446,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15174424648284912,
|
||
|
|
"step": 4100,
|
||
|
|
"valid_targets_mean": 4007.2,
|
||
|
|
"valid_targets_min": 834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.238601823708207,
|
||
|
|
"grad_norm": 0.42685774893365297,
|
||
|
|
"learning_rate": 1.4302521882811316e-06,
|
||
|
|
"loss": 0.1441,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1383514404296875,
|
||
|
|
"step": 4105,
|
||
|
|
"valid_targets_mean": 5403.9,
|
||
|
|
"valid_targets_min": 918
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.246200607902735,
|
||
|
|
"grad_norm": 0.5366963612937214,
|
||
|
|
"learning_rate": 1.4022390550274034e-06,
|
||
|
|
"loss": 0.1468,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15787436068058014,
|
||
|
|
"step": 4110,
|
||
|
|
"valid_targets_mean": 3990.0,
|
||
|
|
"valid_targets_min": 434
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.253799392097265,
|
||
|
|
"grad_norm": 0.46724324680744234,
|
||
|
|
"learning_rate": 1.3744930075901563e-06,
|
||
|
|
"loss": 0.1529,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14154699444770813,
|
||
|
|
"step": 4115,
|
||
|
|
"valid_targets_mean": 4727.9,
|
||
|
|
"valid_targets_min": 588
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.261398176291793,
|
||
|
|
"grad_norm": 0.41711994711720174,
|
||
|
|
"learning_rate": 1.3470144444354061e-06,
|
||
|
|
"loss": 0.1499,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1419452279806137,
|
||
|
|
"step": 4120,
|
||
|
|
"valid_targets_mean": 5510.2,
|
||
|
|
"valid_targets_min": 713
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.268996960486322,
|
||
|
|
"grad_norm": 0.5220642496809625,
|
||
|
|
"learning_rate": 1.3198037601877789e-06,
|
||
|
|
"loss": 0.1511,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1353391706943512,
|
||
|
|
"step": 4125,
|
||
|
|
"valid_targets_mean": 4115.4,
|
||
|
|
"valid_targets_min": 450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.276595744680851,
|
||
|
|
"grad_norm": 0.4939240759429581,
|
||
|
|
"learning_rate": 1.2928613456248473e-06,
|
||
|
|
"loss": 0.1614,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15414312481880188,
|
||
|
|
"step": 4130,
|
||
|
|
"valid_targets_mean": 5176.1,
|
||
|
|
"valid_targets_min": 571
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.28419452887538,
|
||
|
|
"grad_norm": 0.442808139834025,
|
||
|
|
"learning_rate": 1.266187587671508e-06,
|
||
|
|
"loss": 0.1484,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15302585065364838,
|
||
|
|
"step": 4135,
|
||
|
|
"valid_targets_mean": 5258.6,
|
||
|
|
"valid_targets_min": 2495
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.291793313069909,
|
||
|
|
"grad_norm": 0.4548322696937593,
|
||
|
|
"learning_rate": 1.2397828693944346e-06,
|
||
|
|
"loss": 0.1469,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14306071400642395,
|
||
|
|
"step": 4140,
|
||
|
|
"valid_targets_mean": 4863.1,
|
||
|
|
"valid_targets_min": 1948
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.2993920972644375,
|
||
|
|
"grad_norm": 0.46459072507225524,
|
||
|
|
"learning_rate": 1.2136475699965766e-06,
|
||
|
|
"loss": 0.1448,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1553143858909607,
|
||
|
|
"step": 4145,
|
||
|
|
"valid_targets_mean": 4614.7,
|
||
|
|
"valid_targets_min": 816
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.306990881458967,
|
||
|
|
"grad_norm": 0.4740383536049804,
|
||
|
|
"learning_rate": 1.1877820648117045e-06,
|
||
|
|
"loss": 0.1487,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13636600971221924,
|
||
|
|
"step": 4150,
|
||
|
|
"valid_targets_mean": 4426.7,
|
||
|
|
"valid_targets_min": 668
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.314589665653496,
|
||
|
|
"grad_norm": 0.6354900899788738,
|
||
|
|
"learning_rate": 1.162186725299026e-06,
|
||
|
|
"loss": 0.1596,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.155159130692482,
|
||
|
|
"step": 4155,
|
||
|
|
"valid_targets_mean": 3329.7,
|
||
|
|
"valid_targets_min": 585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.322188449848024,
|
||
|
|
"grad_norm": 0.4835553520976796,
|
||
|
|
"learning_rate": 1.1368619190378527e-06,
|
||
|
|
"loss": 0.1577,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17658662796020508,
|
||
|
|
"step": 4160,
|
||
|
|
"valid_targets_mean": 4895.8,
|
||
|
|
"valid_targets_min": 1482
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.329787234042553,
|
||
|
|
"grad_norm": 0.4554986467408132,
|
||
|
|
"learning_rate": 1.1118080097223194e-06,
|
||
|
|
"loss": 0.1577,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15265491604804993,
|
||
|
|
"step": 4165,
|
||
|
|
"valid_targets_mean": 5167.2,
|
||
|
|
"valid_targets_min": 1292
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.3373860182370825,
|
||
|
|
"grad_norm": 0.41973694830585506,
|
||
|
|
"learning_rate": 1.0870253571561595e-06,
|
||
|
|
"loss": 0.1551,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12299495190382004,
|
||
|
|
"step": 4170,
|
||
|
|
"valid_targets_mean": 4780.7,
|
||
|
|
"valid_targets_min": 970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.344984802431611,
|
||
|
|
"grad_norm": 0.5118900681918644,
|
||
|
|
"learning_rate": 1.0625143172475404e-06,
|
||
|
|
"loss": 0.1589,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15326988697052002,
|
||
|
|
"step": 4175,
|
||
|
|
"valid_targets_mean": 4208.6,
|
||
|
|
"valid_targets_min": 636
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.35258358662614,
|
||
|
|
"grad_norm": 0.49642090601129557,
|
||
|
|
"learning_rate": 1.0382752420039455e-06,
|
||
|
|
"loss": 0.1536,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15455183386802673,
|
||
|
|
"step": 4180,
|
||
|
|
"valid_targets_mean": 4102.3,
|
||
|
|
"valid_targets_min": 997
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.360182370820668,
|
||
|
|
"grad_norm": 0.45380918848581747,
|
||
|
|
"learning_rate": 1.0143084795271329e-06,
|
||
|
|
"loss": 0.1553,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15013962984085083,
|
||
|
|
"step": 4185,
|
||
|
|
"valid_targets_mean": 4628.6,
|
||
|
|
"valid_targets_min": 1572
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.367781155015198,
|
||
|
|
"grad_norm": 0.5198176682483998,
|
||
|
|
"learning_rate": 9.906143740081232e-07,
|
||
|
|
"loss": 0.1465,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15279626846313477,
|
||
|
|
"step": 4190,
|
||
|
|
"valid_targets_mean": 3914.1,
|
||
|
|
"valid_targets_min": 365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.375379939209727,
|
||
|
|
"grad_norm": 0.4840441603322201,
|
||
|
|
"learning_rate": 9.671932657222593e-07,
|
||
|
|
"loss": 0.1497,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14245177805423737,
|
||
|
|
"step": 4195,
|
||
|
|
"valid_targets_mean": 4812.6,
|
||
|
|
"valid_targets_min": 370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.382978723404255,
|
||
|
|
"grad_norm": 0.49127649884609786,
|
||
|
|
"learning_rate": 9.440454910243235e-07,
|
||
|
|
"loss": 0.1599,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1520758867263794,
|
||
|
|
"step": 4200,
|
||
|
|
"valid_targets_mean": 4315.1,
|
||
|
|
"valid_targets_min": 517
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.390577507598784,
|
||
|
|
"grad_norm": 0.4383379165078549,
|
||
|
|
"learning_rate": 9.211713823437063e-07,
|
||
|
|
"loss": 0.135,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1275199055671692,
|
||
|
|
"step": 4205,
|
||
|
|
"valid_targets_mean": 5704.6,
|
||
|
|
"valid_targets_min": 581
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.398176291793313,
|
||
|
|
"grad_norm": 0.5022657150694129,
|
||
|
|
"learning_rate": 8.985712681796288e-07,
|
||
|
|
"loss": 0.1528,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17914274334907532,
|
||
|
|
"step": 4210,
|
||
|
|
"valid_targets_mean": 4508.4,
|
||
|
|
"valid_targets_min": 640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.405775075987842,
|
||
|
|
"grad_norm": 0.443057452645931,
|
||
|
|
"learning_rate": 8.762454730964265e-07,
|
||
|
|
"loss": 0.1503,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15875935554504395,
|
||
|
|
"step": 4215,
|
||
|
|
"valid_targets_mean": 5279.9,
|
||
|
|
"valid_targets_min": 972
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.413373860182371,
|
||
|
|
"grad_norm": 0.5449589653106505,
|
||
|
|
"learning_rate": 8.541943177188882e-07,
|
||
|
|
"loss": 0.1492,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14582566916942596,
|
||
|
|
"step": 4220,
|
||
|
|
"valid_targets_mean": 4343.2,
|
||
|
|
"valid_targets_min": 892
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.420972644376899,
|
||
|
|
"grad_norm": 0.4997679647771693,
|
||
|
|
"learning_rate": 8.324181187276581e-07,
|
||
|
|
"loss": 0.1643,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15510517358779907,
|
||
|
|
"step": 4225,
|
||
|
|
"valid_targets_mean": 4085.7,
|
||
|
|
"valid_targets_min": 912
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.428571428571429,
|
||
|
|
"grad_norm": 0.44678133251760943,
|
||
|
|
"learning_rate": 8.109171888546763e-07,
|
||
|
|
"loss": 0.1554,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15751153230667114,
|
||
|
|
"step": 4230,
|
||
|
|
"valid_targets_mean": 5778.9,
|
||
|
|
"valid_targets_min": 1954
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.4361702127659575,
|
||
|
|
"grad_norm": 0.46788177391921826,
|
||
|
|
"learning_rate": 7.896918368786921e-07,
|
||
|
|
"loss": 0.16,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15475589036941528,
|
||
|
|
"step": 4235,
|
||
|
|
"valid_targets_mean": 5169.8,
|
||
|
|
"valid_targets_min": 398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.443768996960486,
|
||
|
|
"grad_norm": 0.5529538967645823,
|
||
|
|
"learning_rate": 7.687423676208361e-07,
|
||
|
|
"loss": 0.1455,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13819439709186554,
|
||
|
|
"step": 4240,
|
||
|
|
"valid_targets_mean": 3202.6,
|
||
|
|
"valid_targets_min": 528
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.451367781155016,
|
||
|
|
"grad_norm": 0.5420949591531663,
|
||
|
|
"learning_rate": 7.480690819402348e-07,
|
||
|
|
"loss": 0.1493,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1723976731300354,
|
||
|
|
"step": 4245,
|
||
|
|
"valid_targets_mean": 3846.0,
|
||
|
|
"valid_targets_min": 630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.458966565349544,
|
||
|
|
"grad_norm": 0.46741861641823984,
|
||
|
|
"learning_rate": 7.276722767296873e-07,
|
||
|
|
"loss": 0.1677,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1849384605884552,
|
||
|
|
"step": 4250,
|
||
|
|
"valid_targets_mean": 5119.9,
|
||
|
|
"valid_targets_min": 883
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.466565349544073,
|
||
|
|
"grad_norm": 0.4582713436855528,
|
||
|
|
"learning_rate": 7.075522449114158e-07,
|
||
|
|
"loss": 0.1431,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15820877254009247,
|
||
|
|
"step": 4255,
|
||
|
|
"valid_targets_mean": 5030.8,
|
||
|
|
"valid_targets_min": 802
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.474164133738602,
|
||
|
|
"grad_norm": 0.5211860109572727,
|
||
|
|
"learning_rate": 6.877092754328419e-07,
|
||
|
|
"loss": 0.1582,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13718628883361816,
|
||
|
|
"step": 4260,
|
||
|
|
"valid_targets_mean": 3807.9,
|
||
|
|
"valid_targets_min": 845
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.481762917933131,
|
||
|
|
"grad_norm": 0.45451059770584273,
|
||
|
|
"learning_rate": 6.681436532624474e-07,
|
||
|
|
"loss": 0.1487,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1656309813261032,
|
||
|
|
"step": 4265,
|
||
|
|
"valid_targets_mean": 5183.2,
|
||
|
|
"valid_targets_min": 1002
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.48936170212766,
|
||
|
|
"grad_norm": 0.47843071748812777,
|
||
|
|
"learning_rate": 6.488556593856809e-07,
|
||
|
|
"loss": 0.1594,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.150562584400177,
|
||
|
|
"step": 4270,
|
||
|
|
"valid_targets_mean": 4543.6,
|
||
|
|
"valid_targets_min": 656
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.496960486322188,
|
||
|
|
"grad_norm": 0.5319539824885515,
|
||
|
|
"learning_rate": 6.298455708009176e-07,
|
||
|
|
"loss": 0.1497,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15512758493423462,
|
||
|
|
"step": 4275,
|
||
|
|
"valid_targets_mean": 3799.5,
|
||
|
|
"valid_targets_min": 395
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.504559270516717,
|
||
|
|
"grad_norm": 0.4705596941066166,
|
||
|
|
"learning_rate": 6.111136605154877e-07,
|
||
|
|
"loss": 0.1457,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16856907308101654,
|
||
|
|
"step": 4280,
|
||
|
|
"valid_targets_mean": 4642.4,
|
||
|
|
"valid_targets_min": 589
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5121580547112465,
|
||
|
|
"grad_norm": 0.46141711207212177,
|
||
|
|
"learning_rate": 5.926601975417501e-07,
|
||
|
|
"loss": 0.1504,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17225798964500427,
|
||
|
|
"step": 4285,
|
||
|
|
"valid_targets_mean": 4939.4,
|
||
|
|
"valid_targets_min": 706
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.519756838905775,
|
||
|
|
"grad_norm": 0.4495104074555779,
|
||
|
|
"learning_rate": 5.744854468932315e-07,
|
||
|
|
"loss": 0.1541,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1445414125919342,
|
||
|
|
"step": 4290,
|
||
|
|
"valid_targets_mean": 4603.4,
|
||
|
|
"valid_targets_min": 810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.527355623100304,
|
||
|
|
"grad_norm": 0.5733777706336946,
|
||
|
|
"learning_rate": 5.565896695808203e-07,
|
||
|
|
"loss": 0.1705,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16275358200073242,
|
||
|
|
"step": 4295,
|
||
|
|
"valid_targets_mean": 4785.0,
|
||
|
|
"valid_targets_min": 599
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.5349544072948325,
|
||
|
|
"grad_norm": 0.8991578623134154,
|
||
|
|
"learning_rate": 5.389731226090189e-07,
|
||
|
|
"loss": 0.144,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14771892130374908,
|
||
|
|
"step": 4300,
|
||
|
|
"valid_targets_mean": 4996.3,
|
||
|
|
"valid_targets_min": 981
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.542553191489362,
|
||
|
|
"grad_norm": 0.4730812664882942,
|
||
|
|
"learning_rate": 5.216360589722546e-07,
|
||
|
|
"loss": 0.134,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12634432315826416,
|
||
|
|
"step": 4305,
|
||
|
|
"valid_targets_mean": 4793.1,
|
||
|
|
"valid_targets_min": 689
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.550151975683891,
|
||
|
|
"grad_norm": 0.7912559796750507,
|
||
|
|
"learning_rate": 5.045787276512371e-07,
|
||
|
|
"loss": 0.1406,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14239290356636047,
|
||
|
|
"step": 4310,
|
||
|
|
"valid_targets_mean": 4268.8,
|
||
|
|
"valid_targets_min": 558
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.557750759878419,
|
||
|
|
"grad_norm": 0.5029546562969596,
|
||
|
|
"learning_rate": 4.878013736093979e-07,
|
||
|
|
"loss": 0.1436,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1558806449174881,
|
||
|
|
"step": 4315,
|
||
|
|
"valid_targets_mean": 4656.5,
|
||
|
|
"valid_targets_min": 767
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.565349544072948,
|
||
|
|
"grad_norm": 0.4604621873568329,
|
||
|
|
"learning_rate": 4.713042377893562e-07,
|
||
|
|
"loss": 0.1377,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1444740742444992,
|
||
|
|
"step": 4320,
|
||
|
|
"valid_targets_mean": 4422.7,
|
||
|
|
"valid_targets_min": 346
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.572948328267477,
|
||
|
|
"grad_norm": 0.4561980791707648,
|
||
|
|
"learning_rate": 4.550875571094726e-07,
|
||
|
|
"loss": 0.1712,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14359544217586517,
|
||
|
|
"step": 4325,
|
||
|
|
"valid_targets_mean": 4979.9,
|
||
|
|
"valid_targets_min": 2820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.580547112462006,
|
||
|
|
"grad_norm": 0.47209358910428856,
|
||
|
|
"learning_rate": 4.391515644604383e-07,
|
||
|
|
"loss": 0.1502,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1348213255405426,
|
||
|
|
"step": 4330,
|
||
|
|
"valid_targets_mean": 4494.7,
|
||
|
|
"valid_targets_min": 589
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.588145896656535,
|
||
|
|
"grad_norm": 0.4994707929739602,
|
||
|
|
"learning_rate": 4.2349648870193103e-07,
|
||
|
|
"loss": 0.1461,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15195885300636292,
|
||
|
|
"step": 4335,
|
||
|
|
"valid_targets_mean": 4239.8,
|
||
|
|
"valid_targets_min": 307
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.595744680851064,
|
||
|
|
"grad_norm": 0.44042761045131223,
|
||
|
|
"learning_rate": 4.081225546593337e-07,
|
||
|
|
"loss": 0.1513,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.129265695810318,
|
||
|
|
"step": 4340,
|
||
|
|
"valid_targets_mean": 5283.0,
|
||
|
|
"valid_targets_min": 1159
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.603343465045593,
|
||
|
|
"grad_norm": 0.47257646281879734,
|
||
|
|
"learning_rate": 3.9302998312049865e-07,
|
||
|
|
"loss": 0.1543,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14243176579475403,
|
||
|
|
"step": 4345,
|
||
|
|
"valid_targets_mean": 5513.4,
|
||
|
|
"valid_targets_min": 1072
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.6109422492401215,
|
||
|
|
"grad_norm": 0.5058368728464357,
|
||
|
|
"learning_rate": 3.782189908325817e-07,
|
||
|
|
"loss": 0.1492,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14209382236003876,
|
||
|
|
"step": 4350,
|
||
|
|
"valid_targets_mean": 4845.4,
|
||
|
|
"valid_targets_min": 876
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.61854103343465,
|
||
|
|
"grad_norm": 0.7222155225557564,
|
||
|
|
"learning_rate": 3.636897904989312e-07,
|
||
|
|
"loss": 0.1581,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17290833592414856,
|
||
|
|
"step": 4355,
|
||
|
|
"valid_targets_mean": 5422.5,
|
||
|
|
"valid_targets_min": 833
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.62613981762918,
|
||
|
|
"grad_norm": 0.5233180981965705,
|
||
|
|
"learning_rate": 3.494425907760235e-07,
|
||
|
|
"loss": 0.1507,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1492675244808197,
|
||
|
|
"step": 4360,
|
||
|
|
"valid_targets_mean": 4823.8,
|
||
|
|
"valid_targets_min": 1224
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.633738601823708,
|
||
|
|
"grad_norm": 0.5428953839010765,
|
||
|
|
"learning_rate": 3.3547759627047927e-07,
|
||
|
|
"loss": 0.1585,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16122207045555115,
|
||
|
|
"step": 4365,
|
||
|
|
"valid_targets_mean": 3899.6,
|
||
|
|
"valid_targets_min": 753
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.641337386018237,
|
||
|
|
"grad_norm": 0.4419180172745714,
|
||
|
|
"learning_rate": 3.2179500753611423e-07,
|
||
|
|
"loss": 0.1436,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.135797381401062,
|
||
|
|
"step": 4370,
|
||
|
|
"valid_targets_mean": 5275.6,
|
||
|
|
"valid_targets_min": 861
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.648936170212766,
|
||
|
|
"grad_norm": 0.4922555679423693,
|
||
|
|
"learning_rate": 3.0839502107106625e-07,
|
||
|
|
"loss": 0.1535,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13909777998924255,
|
||
|
|
"step": 4375,
|
||
|
|
"valid_targets_mean": 3951.1,
|
||
|
|
"valid_targets_min": 702
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.656534954407295,
|
||
|
|
"grad_norm": 0.5087072512502506,
|
||
|
|
"learning_rate": 2.952778293149705e-07,
|
||
|
|
"loss": 0.1534,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16498416662216187,
|
||
|
|
"step": 4380,
|
||
|
|
"valid_targets_mean": 4312.6,
|
||
|
|
"valid_targets_min": 429
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.664133738601824,
|
||
|
|
"grad_norm": 0.4560459158918358,
|
||
|
|
"learning_rate": 2.8244362064619777e-07,
|
||
|
|
"loss": 0.1542,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13717833161354065,
|
||
|
|
"step": 4385,
|
||
|
|
"valid_targets_mean": 5210.4,
|
||
|
|
"valid_targets_min": 864
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.671732522796352,
|
||
|
|
"grad_norm": 0.5227653302420172,
|
||
|
|
"learning_rate": 2.698925793791407e-07,
|
||
|
|
"loss": 0.1476,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13961048424243927,
|
||
|
|
"step": 4390,
|
||
|
|
"valid_targets_mean": 4236.1,
|
||
|
|
"valid_targets_min": 534
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.679331306990882,
|
||
|
|
"grad_norm": 0.6213258302004528,
|
||
|
|
"learning_rate": 2.576248857615826e-07,
|
||
|
|
"loss": 0.148,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14259956777095795,
|
||
|
|
"step": 4395,
|
||
|
|
"valid_targets_mean": 4035.6,
|
||
|
|
"valid_targets_min": 752
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.686930091185411,
|
||
|
|
"grad_norm": 0.4688652989866426,
|
||
|
|
"learning_rate": 2.4564071597209304e-07,
|
||
|
|
"loss": 0.1579,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15681865811347961,
|
||
|
|
"step": 4400,
|
||
|
|
"valid_targets_mean": 4584.2,
|
||
|
|
"valid_targets_min": 869
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.694528875379939,
|
||
|
|
"grad_norm": 0.44005419382672806,
|
||
|
|
"learning_rate": 2.3394024211750964e-07,
|
||
|
|
"loss": 0.1589,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1523476541042328,
|
||
|
|
"step": 4405,
|
||
|
|
"valid_targets_mean": 4952.2,
|
||
|
|
"valid_targets_min": 646
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.702127659574468,
|
||
|
|
"grad_norm": 0.5205262877358279,
|
||
|
|
"learning_rate": 2.2252363223045358e-07,
|
||
|
|
"loss": 0.1474,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16646242141723633,
|
||
|
|
"step": 4410,
|
||
|
|
"valid_targets_mean": 4151.1,
|
||
|
|
"valid_targets_min": 820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.7097264437689965,
|
||
|
|
"grad_norm": 0.40766981515095174,
|
||
|
|
"learning_rate": 2.1139105026693586e-07,
|
||
|
|
"loss": 0.139,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11196255683898926,
|
||
|
|
"step": 4415,
|
||
|
|
"valid_targets_mean": 5557.6,
|
||
|
|
"valid_targets_min": 2410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.717325227963526,
|
||
|
|
"grad_norm": 0.4673986831108265,
|
||
|
|
"learning_rate": 2.0054265610397916e-07,
|
||
|
|
"loss": 0.151,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1210668534040451,
|
||
|
|
"step": 4420,
|
||
|
|
"valid_targets_mean": 3992.8,
|
||
|
|
"valid_targets_min": 936
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.724924012158055,
|
||
|
|
"grad_norm": 0.5619671678982381,
|
||
|
|
"learning_rate": 1.8997860553733981e-07,
|
||
|
|
"loss": 0.155,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17317882180213928,
|
||
|
|
"step": 4425,
|
||
|
|
"valid_targets_mean": 3874.4,
|
||
|
|
"valid_targets_min": 446
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.732522796352583,
|
||
|
|
"grad_norm": 0.4280630319229206,
|
||
|
|
"learning_rate": 1.7969905027926504e-07,
|
||
|
|
"loss": 0.1758,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1904495656490326,
|
||
|
|
"step": 4430,
|
||
|
|
"valid_targets_mean": 6671.5,
|
||
|
|
"valid_targets_min": 907
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.740121580547113,
|
||
|
|
"grad_norm": 0.44122684787277994,
|
||
|
|
"learning_rate": 1.6970413795631025e-07,
|
||
|
|
"loss": 0.1528,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14559206366539001,
|
||
|
|
"step": 4435,
|
||
|
|
"valid_targets_mean": 5373.0,
|
||
|
|
"valid_targets_min": 595
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.7477203647416415,
|
||
|
|
"grad_norm": 0.5080820811124032,
|
||
|
|
"learning_rate": 1.5999401210722075e-07,
|
||
|
|
"loss": 0.1423,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1340736597776413,
|
||
|
|
"step": 4440,
|
||
|
|
"valid_targets_mean": 3668.2,
|
||
|
|
"valid_targets_min": 694
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.75531914893617,
|
||
|
|
"grad_norm": 0.5257059339394762,
|
||
|
|
"learning_rate": 1.5056881218088016e-07,
|
||
|
|
"loss": 0.1471,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15984541177749634,
|
||
|
|
"step": 4445,
|
||
|
|
"valid_targets_mean": 4337.5,
|
||
|
|
"valid_targets_min": 498
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.762917933130699,
|
||
|
|
"grad_norm": 0.43327223465331344,
|
||
|
|
"learning_rate": 1.4142867353428514e-07,
|
||
|
|
"loss": 0.1488,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.157816082239151,
|
||
|
|
"step": 4450,
|
||
|
|
"valid_targets_mean": 5491.9,
|
||
|
|
"valid_targets_min": 827
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.770516717325228,
|
||
|
|
"grad_norm": 0.50219861891989,
|
||
|
|
"learning_rate": 1.3257372743063157e-07,
|
||
|
|
"loss": 0.1447,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.11714717745780945,
|
||
|
|
"step": 4455,
|
||
|
|
"valid_targets_mean": 3812.8,
|
||
|
|
"valid_targets_min": 502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.778115501519757,
|
||
|
|
"grad_norm": 0.45055666905824504,
|
||
|
|
"learning_rate": 1.2400410103740045e-07,
|
||
|
|
"loss": 0.136,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14584630727767944,
|
||
|
|
"step": 4460,
|
||
|
|
"valid_targets_mean": 5178.9,
|
||
|
|
"valid_targets_min": 516
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.785714285714286,
|
||
|
|
"grad_norm": 0.43952764279896794,
|
||
|
|
"learning_rate": 1.157199174245549e-07,
|
||
|
|
"loss": 0.1411,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13095200061798096,
|
||
|
|
"step": 4465,
|
||
|
|
"valid_targets_mean": 5314.9,
|
||
|
|
"valid_targets_min": 1217
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.793313069908814,
|
||
|
|
"grad_norm": 0.4738968512497199,
|
||
|
|
"learning_rate": 1.0772129556275268e-07,
|
||
|
|
"loss": 0.1442,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12924160063266754,
|
||
|
|
"step": 4470,
|
||
|
|
"valid_targets_mean": 4845.8,
|
||
|
|
"valid_targets_min": 843
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.800911854103344,
|
||
|
|
"grad_norm": 0.5060295374260998,
|
||
|
|
"learning_rate": 1.0000835032165645e-07,
|
||
|
|
"loss": 0.1536,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1658594310283661,
|
||
|
|
"step": 4475,
|
||
|
|
"valid_targets_mean": 4230.7,
|
||
|
|
"valid_targets_min": 915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.808510638297872,
|
||
|
|
"grad_norm": 0.4466023053432,
|
||
|
|
"learning_rate": 9.258119246826625e-08,
|
||
|
|
"loss": 0.1447,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1489754319190979,
|
||
|
|
"step": 4480,
|
||
|
|
"valid_targets_mean": 5316.6,
|
||
|
|
"valid_targets_min": 2627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.816109422492401,
|
||
|
|
"grad_norm": 0.46338292936158193,
|
||
|
|
"learning_rate": 8.543992866534734e-08,
|
||
|
|
"loss": 0.1511,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14392951130867004,
|
||
|
|
"step": 4485,
|
||
|
|
"valid_targets_mean": 4956.6,
|
||
|
|
"valid_targets_min": 409
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.823708206686931,
|
||
|
|
"grad_norm": 0.5158394667496263,
|
||
|
|
"learning_rate": 7.858466146988042e-08,
|
||
|
|
"loss": 0.1488,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16497617959976196,
|
||
|
|
"step": 4490,
|
||
|
|
"valid_targets_mean": 4279.7,
|
||
|
|
"valid_targets_min": 775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.831306990881459,
|
||
|
|
"grad_norm": 0.42417696152695383,
|
||
|
|
"learning_rate": 7.201548933160275e-08,
|
||
|
|
"loss": 0.1478,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14127472043037415,
|
||
|
|
"step": 4495,
|
||
|
|
"valid_targets_mean": 5479.2,
|
||
|
|
"valid_targets_min": 2631
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.838905775075988,
|
||
|
|
"grad_norm": 0.5211912905058261,
|
||
|
|
"learning_rate": 6.573250659158481e-08,
|
||
|
|
"loss": 0.1543,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14878888428211212,
|
||
|
|
"step": 4500,
|
||
|
|
"valid_targets_mean": 4435.9,
|
||
|
|
"valid_targets_min": 2412
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.8465045592705165,
|
||
|
|
"grad_norm": 0.43147229360794226,
|
||
|
|
"learning_rate": 5.973580348088259e-08,
|
||
|
|
"loss": 0.154,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15211549401283264,
|
||
|
|
"step": 4505,
|
||
|
|
"valid_targets_mean": 5367.4,
|
||
|
|
"valid_targets_min": 2364
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.854103343465045,
|
||
|
|
"grad_norm": 0.4561025698955299,
|
||
|
|
"learning_rate": 5.4025466119234094e-08,
|
||
|
|
"loss": 0.152,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13766488432884216,
|
||
|
|
"step": 4510,
|
||
|
|
"valid_targets_mean": 5133.6,
|
||
|
|
"valid_targets_min": 655
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.861702127659575,
|
||
|
|
"grad_norm": 0.4670200688124053,
|
||
|
|
"learning_rate": 4.860157651383146e-08,
|
||
|
|
"loss": 0.1457,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12196432799100876,
|
||
|
|
"step": 4515,
|
||
|
|
"valid_targets_mean": 5354.1,
|
||
|
|
"valid_targets_min": 1941
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.869300911854103,
|
||
|
|
"grad_norm": 0.46811141014194185,
|
||
|
|
"learning_rate": 4.346421255813527e-08,
|
||
|
|
"loss": 0.1485,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1508735716342926,
|
||
|
|
"step": 4520,
|
||
|
|
"valid_targets_mean": 4876.9,
|
||
|
|
"valid_targets_min": 647
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.876899696048632,
|
||
|
|
"grad_norm": 0.5309409348069238,
|
||
|
|
"learning_rate": 3.8613448030759836e-08,
|
||
|
|
"loss": 0.1449,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1319991499185562,
|
||
|
|
"step": 4525,
|
||
|
|
"valid_targets_mean": 4153.0,
|
||
|
|
"valid_targets_min": 558
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.8844984802431615,
|
||
|
|
"grad_norm": 0.4583034610783906,
|
||
|
|
"learning_rate": 3.404935259441633e-08,
|
||
|
|
"loss": 0.1482,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1385440230369568,
|
||
|
|
"step": 4530,
|
||
|
|
"valid_targets_mean": 5040.1,
|
||
|
|
"valid_targets_min": 666
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.89209726443769,
|
||
|
|
"grad_norm": 0.5760708842558298,
|
||
|
|
"learning_rate": 2.977199179490686e-08,
|
||
|
|
"loss": 0.152,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16344650089740753,
|
||
|
|
"step": 4535,
|
||
|
|
"valid_targets_mean": 4789.9,
|
||
|
|
"valid_targets_min": 774
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.899696048632219,
|
||
|
|
"grad_norm": 0.5268446726565859,
|
||
|
|
"learning_rate": 2.5781427060183052e-08,
|
||
|
|
"loss": 0.1543,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1606959104537964,
|
||
|
|
"step": 4540,
|
||
|
|
"valid_targets_mean": 4283.8,
|
||
|
|
"valid_targets_min": 229
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.907294832826747,
|
||
|
|
"grad_norm": 0.4734646023589767,
|
||
|
|
"learning_rate": 2.2077715699468928e-08,
|
||
|
|
"loss": 0.1558,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14003373682498932,
|
||
|
|
"step": 4545,
|
||
|
|
"valid_targets_mean": 4974.8,
|
||
|
|
"valid_targets_min": 773
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.914893617021277,
|
||
|
|
"grad_norm": 0.4360796739556199,
|
||
|
|
"learning_rate": 1.8660910902434936e-08,
|
||
|
|
"loss": 0.1501,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12974536418914795,
|
||
|
|
"step": 4550,
|
||
|
|
"valid_targets_mean": 5910.8,
|
||
|
|
"valid_targets_min": 1114
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.922492401215806,
|
||
|
|
"grad_norm": 0.48684145305746557,
|
||
|
|
"learning_rate": 1.5531061738436327e-08,
|
||
|
|
"loss": 0.1561,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15198959410190582,
|
||
|
|
"step": 4555,
|
||
|
|
"valid_targets_mean": 3936.3,
|
||
|
|
"valid_targets_min": 1779
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.930091185410334,
|
||
|
|
"grad_norm": 0.5091294127835059,
|
||
|
|
"learning_rate": 1.2688213155802598e-08,
|
||
|
|
"loss": 0.1589,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1628214567899704,
|
||
|
|
"step": 4560,
|
||
|
|
"valid_targets_mean": 4214.5,
|
||
|
|
"valid_targets_min": 683
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.937689969604863,
|
||
|
|
"grad_norm": 0.43005172219863,
|
||
|
|
"learning_rate": 1.0132405981195804e-08,
|
||
|
|
"loss": 0.1287,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13487809896469116,
|
||
|
|
"step": 4565,
|
||
|
|
"valid_targets_mean": 5104.1,
|
||
|
|
"valid_targets_min": 2548
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.945288753799392,
|
||
|
|
"grad_norm": 0.5851616543398429,
|
||
|
|
"learning_rate": 7.863676919031005e-09,
|
||
|
|
"loss": 0.1459,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13807334005832672,
|
||
|
|
"step": 4570,
|
||
|
|
"valid_targets_mean": 4572.6,
|
||
|
|
"valid_targets_min": 792
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.952887537993921,
|
||
|
|
"grad_norm": 0.5007425137471639,
|
||
|
|
"learning_rate": 5.882058550932268e-09,
|
||
|
|
"loss": 0.1427,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1591353416442871,
|
||
|
|
"step": 4575,
|
||
|
|
"valid_targets_mean": 3956.2,
|
||
|
|
"valid_targets_min": 447
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.96048632218845,
|
||
|
|
"grad_norm": 0.5253135197553259,
|
||
|
|
"learning_rate": 4.187579335281911e-09,
|
||
|
|
"loss": 0.1618,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16565826535224915,
|
||
|
|
"step": 4580,
|
||
|
|
"valid_targets_mean": 3869.7,
|
||
|
|
"valid_targets_min": 1117
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.968085106382979,
|
||
|
|
"grad_norm": 0.5136974200238252,
|
||
|
|
"learning_rate": 2.780263606805278e-09,
|
||
|
|
"loss": 0.1449,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1372585892677307,
|
||
|
|
"step": 4585,
|
||
|
|
"valid_targets_mean": 4006.6,
|
||
|
|
"valid_targets_min": 538
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.975683890577508,
|
||
|
|
"grad_norm": 0.5478996499330996,
|
||
|
|
"learning_rate": 1.6601315762154735e-09,
|
||
|
|
"loss": 0.1487,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1610221564769745,
|
||
|
|
"step": 4590,
|
||
|
|
"valid_targets_mean": 4602.3,
|
||
|
|
"valid_targets_min": 741
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.9832826747720365,
|
||
|
|
"grad_norm": 0.48873947966074016,
|
||
|
|
"learning_rate": 8.271993299358017e-10,
|
||
|
|
"loss": 0.169,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16153308749198914,
|
||
|
|
"step": 4595,
|
||
|
|
"valid_targets_mean": 4922.5,
|
||
|
|
"valid_targets_min": 589
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.990881458966565,
|
||
|
|
"grad_norm": 0.484585895107589,
|
||
|
|
"learning_rate": 2.814788298532989e-10,
|
||
|
|
"loss": 0.1529,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14617104828357697,
|
||
|
|
"step": 4600,
|
||
|
|
"valid_targets_mean": 4203.9,
|
||
|
|
"valid_targets_min": 714
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.998480243161094,
|
||
|
|
"grad_norm": 0.4567401537531439,
|
||
|
|
"learning_rate": 2.2977913158861444e-11,
|
||
|
|
"loss": 0.1517,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15201044082641602,
|
||
|
|
"step": 4605,
|
||
|
|
"valid_targets_mean": 5189.9,
|
||
|
|
"valid_targets_min": 511
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.0,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1645904779434204,
|
||
|
|
"step": 4606,
|
||
|
|
"total_flos": 1702570888593408.0,
|
||
|
|
"train_loss": 0.19200077259858897,
|
||
|
|
"train_runtime": 28350.7795,
|
||
|
|
"train_samples_per_second": 2.597,
|
||
|
|
"train_steps_per_second": 0.162,
|
||
|
|
"valid_targets_mean": 3879.8,
|
||
|
|
"valid_targets_min": 728
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 4606,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 7,
|
||
|
|
"save_steps": 1500,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 1702570888593408.0,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|